Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/ValveSoftware/openvr.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/vrcommon/strtools_public.cpp')
-rw-r--r--src/vrcommon/strtools_public.cpp194
1 files changed, 109 insertions, 85 deletions
diff --git a/src/vrcommon/strtools_public.cpp b/src/vrcommon/strtools_public.cpp
index 61cf324..661325a 100644
--- a/src/vrcommon/strtools_public.cpp
+++ b/src/vrcommon/strtools_public.cpp
@@ -4,7 +4,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <sstream>
+#include <codecvt>
#include <iostream>
+#include <functional>
+#include <locale>
+#include <codecvt>
//-----------------------------------------------------------------------------
// Purpose:
@@ -49,96 +53,36 @@ bool StringHasSuffixCaseSensitive( const std::string &sString, const std::string
//-----------------------------------------------------------------------------
// Purpose:
//-----------------------------------------------------------------------------
+
std::string UTF16to8(const wchar_t * in)
{
- std::string out;
- unsigned int codepoint = 0;
- for ( ; in && *in != 0; ++in )
+ try
{
- if (*in >= 0xd800 && *in <= 0xdbff)
- codepoint = ((*in - 0xd800) << 10) + 0x10000;
- else
- {
- if (*in >= 0xdc00 && *in <= 0xdfff)
- codepoint |= *in - 0xdc00;
- else
- codepoint = *in;
+ typedef std::codecvt_utf8< wchar_t > convert_type;
+ std::wstring_convert< convert_type, wchar_t > converter;
- if (codepoint <= 0x7f)
- out.append(1, static_cast<char>(codepoint));
- else if (codepoint <= 0x7ff)
- {
- out.append(1, static_cast<char>(0xc0 | ((codepoint >> 6) & 0x1f)));
- out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
- }
- else if (codepoint <= 0xffff)
- {
- out.append(1, static_cast<char>(0xe0 | ((codepoint >> 12) & 0x0f)));
- out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
- out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
- }
- else
- {
- out.append(1, static_cast<char>(0xf0 | ((codepoint >> 18) & 0x07)));
- out.append(1, static_cast<char>(0x80 | ((codepoint >> 12) & 0x3f)));
- out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
- out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
- }
- codepoint = 0;
- }
+ return converter.to_bytes( in );
+ }
+ catch ( ... )
+ {
+ return std::string();
}
- return out;
}
+
std::wstring UTF8to16(const char * in)
{
- std::wstring out;
- unsigned int codepoint = 0;
- int following = 0;
- for ( ; in && *in != 0; ++in )
+ try
{
- unsigned char ch = *in;
- if (ch <= 0x7f)
- {
- codepoint = ch;
- following = 0;
- }
- else if (ch <= 0xbf)
- {
- if (following > 0)
- {
- codepoint = (codepoint << 6) | (ch & 0x3f);
- --following;
- }
- }
- else if (ch <= 0xdf)
- {
- codepoint = ch & 0x1f;
- following = 1;
- }
- else if (ch <= 0xef)
- {
- codepoint = ch & 0x0f;
- following = 2;
- }
- else
- {
- codepoint = ch & 0x07;
- following = 3;
- }
- if (following == 0)
- {
- if (codepoint > 0xffff)
- {
- out.append(1, static_cast<wchar_t>(0xd800 + (codepoint >> 10)));
- out.append(1, static_cast<wchar_t>(0xdc00 + (codepoint & 0x03ff)));
- }
- else
- out.append(1, static_cast<wchar_t>(codepoint));
- codepoint = 0;
- }
+ typedef std::codecvt_utf8< wchar_t > convert_type;
+ std::wstring_convert< convert_type, wchar_t > converter;
+
+ return converter.from_bytes( in );
+ }
+ catch ( ... )
+ {
+ return std::wstring();
}
- return out;
}
@@ -245,11 +189,30 @@ int iHexCharToInt( char cValue )
return -1;
}
+
+//-----------------------------------------------------------------------------
+// Purpose: These define the set of characters to filter for components (which
+// need all the escaping we can muster) vs. paths (which don't want
+// / and : escaped so we don't break less compliant URL handling code.
+//-----------------------------------------------------------------------------
+static bool CharNeedsEscape_Component( const char c )
+{
+ return (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') && !(c >= '0' && c <= '9')
+ && c != '-' && c != '_' && c != '.');
+}
+static bool CharNeedsEscape_FullPath( const char c )
+{
+ return (!(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z') && !(c >= '0' && c <= '9')
+ && c != '-' && c != '_' && c != '.' && c != '/' && c != ':' );
+}
+
+
//-----------------------------------------------------------------------------
// Purpose: Internal implementation of encode, works in the strict RFC manner, or
// with spaces turned to + like HTML form encoding.
//-----------------------------------------------------------------------------
-void V_URLEncodeInternal( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen, bool bUsePlusForSpace )
+void V_URLEncodeInternal( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen,
+ bool bUsePlusForSpace, std::function< bool(const char)> fnNeedsEscape )
{
//AssertMsg( nDestLen > 3*nSourceLen, "Target buffer for V_URLEncode should be 3x source length, plus one for terminating null\n" );
@@ -267,9 +230,7 @@ void V_URLEncodeInternal( char *pchDest, int nDestLen, const char *pchSource, in
// We allow only a-z, A-Z, 0-9, period, underscore, and hyphen to pass through unescaped.
// These are the characters allowed by both the original RFC 1738 and the latest RFC 3986.
// Current specs also allow '~', but that is forbidden under original RFC 1738.
- if ( !( pchSource[i] >= 'a' && pchSource[i] <= 'z' ) && !( pchSource[i] >= 'A' && pchSource[i] <= 'Z' ) && !(pchSource[i] >= '0' && pchSource[i] <= '9' )
- && pchSource[i] != '-' && pchSource[i] != '_' && pchSource[i] != '.'
- )
+ if ( fnNeedsEscape( pchSource[i] ) )
{
if ( bUsePlusForSpace && pchSource[i] == ' ' )
{
@@ -397,15 +358,19 @@ size_t V_URLDecodeInternal( char *pchDecodeDest, int nDecodeDestLen, const char
//-----------------------------------------------------------------------------
void V_URLEncode( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen )
{
- return V_URLEncodeInternal( pchDest, nDestLen, pchSource, nSourceLen, true );
+ return V_URLEncodeInternal( pchDest, nDestLen, pchSource, nSourceLen, true, CharNeedsEscape_Component );
}
void V_URLEncodeNoPlusForSpace( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen )
{
- return V_URLEncodeInternal( pchDest, nDestLen, pchSource, nSourceLen, false );
+ return V_URLEncodeInternal( pchDest, nDestLen, pchSource, nSourceLen, false, CharNeedsEscape_Component );
}
+void V_URLEncodeFullPath( char *pchDest, int nDestLen, const char *pchSource, int nSourceLen )
+{
+ return V_URLEncodeInternal( pchDest, nDestLen, pchSource, nSourceLen, false, CharNeedsEscape_FullPath );
+}
//-----------------------------------------------------------------------------
// Purpose: Decodes a string (or binary data) from URL encoding format, see rfc1738 section 2.2.
@@ -458,3 +423,62 @@ std::vector<std::string> TokenizeString( const std::string & sString, char cToke
return vecStrings;
}
+//-----------------------------------------------------------------------------
+// Purpose: Repairs a should-be-UTF-8 string to a for-sure-is-UTF-8 string, plus return boolean if we subbed in '?' somewhere
+//-----------------------------------------------------------------------------
+bool RepairUTF8( const char *pbegin, const char *pend, std::string & sOutputUtf8 )
+{
+ typedef std::codecvt_utf8<char32_t> facet_type;
+ facet_type myfacet;
+
+ std::mbstate_t mystate = std::mbstate_t();
+
+ sOutputUtf8.clear();
+ sOutputUtf8.reserve( pend - pbegin );
+ bool bSqueakyClean = true;
+
+ const char *pmid = pbegin;
+
+ while ( pmid != pend )
+ {
+ char32_t out = 0xdeadbeef, *pout;
+ pbegin = pmid;
+ switch ( myfacet.in( mystate, pbegin, pend, pmid, &out, &out + 1, pout ) )
+ {
+ case facet_type::ok:
+ // could convert back, but no need
+ for ( const char *p = pbegin; p != pmid; ++p )
+ {
+ sOutputUtf8 += *p;
+ }
+ break;
+
+ case facet_type::noconv:
+ // unexpected! always converting type
+ bSqueakyClean = false;
+ break;
+
+ case facet_type::partial:
+ sOutputUtf8 += '?';
+ pmid++; // partial consumes 0, so make progress
+ bSqueakyClean = false;
+ break;
+
+ case facet_type::error:
+ sOutputUtf8 += '?';
+ // error consumes some bytes
+ bSqueakyClean = false;
+ break;
+ }
+ }
+
+ return bSqueakyClean;
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: Repairs a should-be-UTF-8 string to a for-sure-is-UTF-8 string, plus return boolean if we subbed in '?' somewhere
+//-----------------------------------------------------------------------------
+bool RepairUTF8( const std::string & sInputUtf8, std::string & sOutputUtf8 )
+{
+ return RepairUTF8( sInputUtf8.data(), sInputUtf8.data() + sInputUtf8.size(), sOutputUtf8 );
+}