diff options
author | Milo Yip <miloyip@gmail.com> | 2019-12-20 12:03:21 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-12-20 12:03:21 +0300 |
commit | 4842934001dcc3aceb8b358faececade768885c5 (patch) | |
tree | 975f48999393b376de76359d4a3d50f32158d997 | |
parent | be9f30b664259129f8f361b5f84f01053167564c (diff) | |
parent | 596d590e00174b221e9fd8591a02178c3b143d7a (diff) |
Merge pull request #1613 from piratf/Issue1604_ASCIIValidationIssue1604_ASCIIValidation
Check before really encoding code points by default.
-rw-r--r-- | include/rapidjson/encodings.h | 99 | ||||
-rw-r--r-- | include/rapidjson/fwd.h | 2 | ||||
-rw-r--r-- | include/rapidjson/reader.h | 5 | ||||
-rw-r--r-- | test/unittest/documenttest.cpp | 5 | ||||
-rw-r--r-- | test/unittest/encodingstest.cpp | 34 | ||||
-rw-r--r-- | test/unittest/fwdtest.cpp | 2 |
6 files changed, 137 insertions, 10 deletions
diff --git a/include/rapidjson/encodings.h b/include/rapidjson/encodings.h index 0b244679..b7e0516e 100644 --- a/include/rapidjson/encodings.h +++ b/include/rapidjson/encodings.h @@ -99,6 +99,11 @@ struct UTF8 { enum { supportUnicode = 1 }; template<typename OutputStream> + static bool ValidateCodePoint(OutputStream&, unsigned codepoint) { + return codepoint <= 0x10FFFF; + } + + template<typename OutputStream> static void Encode(OutputStream& os, unsigned codepoint) { if (codepoint <= 0x7F) os.Put(static_cast<Ch>(codepoint & 0xFF)); @@ -273,6 +278,16 @@ struct UTF16 { enum { supportUnicode = 1 }; template<typename OutputStream> + static bool ValidateCodePoint(OutputStream&, unsigned codepoint) { + if (codepoint <= 0xFFFF) { + return (codepoint < 0xD800 || codepoint > 0xDFFF); + } + else { + return codepoint <= 0x10FFFF; + } + } + + template<typename OutputStream> static void Encode(OutputStream& os, unsigned codepoint) { RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); if (codepoint <= 0xFFFF) { @@ -422,6 +437,11 @@ struct UTF32 { enum { supportUnicode = 1 }; template<typename OutputStream> + static bool ValidateCodePoint(OutputStream&, unsigned codepoint) { + return codepoint <= 0x10FFFF; + } + + template<typename OutputStream> static void Encode(OutputStream& os, unsigned codepoint) { RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); @@ -545,6 +565,11 @@ struct ASCII { enum { supportUnicode = 0 }; template<typename OutputStream> + static bool ValidateCodePoint(OutputStream&, unsigned codepoint) { + return codepoint <= 0x7F; + } + + template<typename OutputStream> static void Encode(OutputStream& os, unsigned codepoint) { RAPIDJSON_ASSERT(codepoint <= 0x7F); os.Put(static_cast<Ch>(codepoint & 0xFF)); @@ -620,6 +645,13 @@ struct AutoUTF { #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x template<typename OutputStream> + static RAPIDJSON_FORCEINLINE bool ValidateCodePoint(OutputStream& os, unsigned codepoint) { + typedef bool (*ValidateCodePointFunc)(OutputStream&, unsigned); + static const ValidateCodePointFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(ValidateCodePoint) }; + return (*f[os.GetType()])(os, codepoint); + } + + template<typename OutputStream> static RAPIDJSON_FORCEINLINE void Encode(OutputStream& os, unsigned codepoint) { typedef void (*EncodeFunc)(OutputStream&, unsigned); static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) }; @@ -651,10 +683,69 @@ struct AutoUTF { }; /////////////////////////////////////////////////////////////////////////////// +// ValidatableEncoder + +/*! Wrapper for TEncoding::Encode, have an optional validate feature. + Since the feature is optional, this function will be implemented by + template partial specialization, to avoid the overhead of runtime check. + + \tprarm CodePointValidation Run validate before encode or not. +*/ +// By default, This Encoder will validate code point and generate parse error. +// Users can switch the check feature off by set 'CodePointValidation' to 'false'. +template<bool CodePointValidation = true> +class ValidatableEncoder { +public: + template<typename TEncoding, typename OutputStream> + static bool Encode(OutputStream &os, unsigned codepoint); + + template<typename TEncoding, typename OutputStream> + static bool EncodeUnsafe(OutputStream &os, unsigned codepoint); +}; + +template<bool CodePointValidation> +template<typename TEncoding, typename OutputStream> +bool +ValidatableEncoder<CodePointValidation>::Encode(OutputStream &os, unsigned codepoint) { + if (!TEncoding::ValidateCodePoint(os, codepoint)) { + return false; + } + TEncoding::Encode(os, codepoint); + return true; +} + +template<bool CodePointValidation> +template<typename TEncoding, typename OutputStream> +bool +ValidatableEncoder<CodePointValidation>::EncodeUnsafe(OutputStream &os, unsigned codepoint) { + if (!TEncoding::ValidateCodePoint(os, codepoint)) { + return false; + } + TEncoding::EncodeUnsafe(os, codepoint); + return true; +} + +template<> +template<typename TEncoding, typename OutputStream> +bool +ValidatableEncoder<false>::Encode(OutputStream &os, unsigned codepoint) { + TEncoding::Encode(os, codepoint); + return true; +} + +template<> +template<typename TEncoding, typename OutputStream> +bool +ValidatableEncoder<false>::EncodeUnsafe(OutputStream &os, unsigned codepoint) { + TEncoding::EncodeUnsafe(os, codepoint); + return true; +} + +/////////////////////////////////////////////////////////////////////////////// // Transcoder //! Encoding conversion. -template<typename SourceEncoding, typename TargetEncoding> +template<typename SourceEncoding, typename TargetEncoding, bool CodePointValidation = true> struct Transcoder { //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream. template<typename InputStream, typename OutputStream> @@ -662,8 +753,7 @@ struct Transcoder { unsigned codepoint; if (!SourceEncoding::Decode(is, &codepoint)) return false; - TargetEncoding::Encode(os, codepoint); - return true; + return ValidatableEncoder<CodePointValidation>::template Encode<TargetEncoding>(os, codepoint); } template<typename InputStream, typename OutputStream> @@ -671,8 +761,7 @@ struct Transcoder { unsigned codepoint; if (!SourceEncoding::Decode(is, &codepoint)) return false; - TargetEncoding::EncodeUnsafe(os, codepoint); - return true; + return ValidatableEncoder<CodePointValidation>::template EncodeUnsafe<TargetEncoding>(os, codepoint); } //! Validate one Unicode codepoint from an encoded stream. diff --git a/include/rapidjson/fwd.h b/include/rapidjson/fwd.h index b74a2b81..cdb58947 100644 --- a/include/rapidjson/fwd.h +++ b/include/rapidjson/fwd.h @@ -31,7 +31,7 @@ template<typename CharType> struct UTF32LE; template<typename CharType> struct ASCII; template<typename CharType> struct AutoUTF; -template<typename SourceEncoding, typename TargetEncoding> +template<typename SourceEncoding, typename TargetEncoding, bool CodePointValidation> struct Transcoder; // allocators.h diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 13d27c29..ce8bf0db 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -1028,7 +1028,10 @@ private: RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; } - TEncoding::Encode(os, codepoint); + if (!ValidatableEncoder<static_cast<bool>(parseFlags & kParseValidateEncodingFlag)>::template Encode<TEncoding>(os, codepoint)) + { + RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, escapeOffset); + } } else RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset); diff --git a/test/unittest/documenttest.cpp b/test/unittest/documenttest.cpp index 2b0f269f..038ddb85 100644 --- a/test/unittest/documenttest.cpp +++ b/test/unittest/documenttest.cpp @@ -631,6 +631,11 @@ TEST(Document, Issue1604_ASCIIValidation) { EXPECT_EQ(kParseErrorStringInvalidEncoding, d.GetParseError()); } +TEST(DocumentDeathTest, Issue1604_ASCIIValidation) { + GenericDocument<ASCII<>> d_no_check; + ASSERT_THROW((d_no_check.Parse("\"\\u1234\"")), AssertException); +} + // This test does not properly use parsing, just for testing. // It must call ClearStack() explicitly to prevent memory leak. // But here we cannot as ClearStack() is private. diff --git a/test/unittest/encodingstest.cpp b/test/unittest/encodingstest.cpp index 82cf7776..86866fec 100644 --- a/test/unittest/encodingstest.cpp +++ b/test/unittest/encodingstest.cpp @@ -14,8 +14,6 @@ #include "unittest.h" #include "rapidjson/filereadstream.h" -#include "rapidjson/filewritestream.h" -#include "rapidjson/encodedstream.h" #include "rapidjson/stringbuffer.h" using namespace rapidjson; @@ -332,6 +330,12 @@ TEST(EncodingsTest, UTF8) { } } } + + // Validate code point before encoding + EXPECT_FALSE(ValidatableEncoder<>::Encode<UTF8<> >(os, 0xFFFFFFFF)); + EXPECT_FALSE(ValidatableEncoder<>::EncodeUnsafe<UTF8<> >(os, 0xFFFFFFFF)); + EXPECT_THROW(ValidatableEncoder<false>::Encode<UTF8<> >(os, 0xFFFFFFFF), AssertException); + EXPECT_THROW(ValidatableEncoder<false>::EncodeUnsafe<UTF8<> >(os, 0xFFFFFFFF), AssertException); } TEST(EncodingsTest, UTF16) { @@ -392,6 +396,20 @@ TEST(EncodingsTest, UTF16) { } } } + + // Validate code point before encoding + EXPECT_FALSE(ValidatableEncoder<>::Encode<UTF16<> >(os, 0xFFFFFFFF)); + EXPECT_FALSE(ValidatableEncoder<>::EncodeUnsafe<UTF16<> >(os, 0xFFFFFFFF)); + EXPECT_FALSE(ValidatableEncoder<>::Encode<UTF16<> >(os, 0xD800)); + EXPECT_FALSE(ValidatableEncoder<>::EncodeUnsafe<UTF16<> >(os, 0xD800)); + EXPECT_FALSE(ValidatableEncoder<>::Encode<UTF16<> >(os, 0xDFFF)); + EXPECT_FALSE(ValidatableEncoder<>::EncodeUnsafe<UTF16<> >(os, 0xDFFF)); + EXPECT_THROW(ValidatableEncoder<false>::Encode<UTF16<> >(os, 0xFFFFFFFF), AssertException); + EXPECT_THROW(ValidatableEncoder<false>::EncodeUnsafe<UTF16<> >(os, 0xFFFFFFFF), AssertException); + EXPECT_THROW(ValidatableEncoder<false>::Encode<UTF16<> >(os, 0xD800), AssertException); + EXPECT_THROW(ValidatableEncoder<false>::EncodeUnsafe<UTF16<> >(os, 0xD800), AssertException); + EXPECT_THROW(ValidatableEncoder<false>::Encode<UTF16<> >(os, 0xDFFF), AssertException); + EXPECT_THROW(ValidatableEncoder<false>::EncodeUnsafe<UTF16<> >(os, 0xDFFF), AssertException); } TEST(EncodingsTest, UTF32) { @@ -423,6 +441,12 @@ TEST(EncodingsTest, UTF32) { } } } + + // Validate code point before encoding + EXPECT_FALSE(ValidatableEncoder<>::Encode<UTF32<> >(os, 0xFFFFFFFF)); + EXPECT_FALSE(ValidatableEncoder<>::EncodeUnsafe<UTF32<> >(os, 0xFFFFFFFF)); + EXPECT_THROW(ValidatableEncoder<false>::Encode<UTF32<> >(os, 0xFFFFFFFF), AssertException); + EXPECT_THROW(ValidatableEncoder<false>::EncodeUnsafe<UTF32<> >(os, 0xFFFFFFFF), AssertException); } TEST(EncodingsTest, ASCII) { @@ -448,4 +472,10 @@ TEST(EncodingsTest, ASCII) { EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString())); } } + + // Validate code point before encoding + EXPECT_FALSE(ValidatableEncoder<>::Encode<ASCII<> >(os, 0x0080)); + EXPECT_FALSE(ValidatableEncoder<>::EncodeUnsafe<ASCII<> >(os, 0x0080)); + EXPECT_THROW(ValidatableEncoder<false>::Encode<ASCII<> >(os, 0x0080), AssertException); + EXPECT_THROW(ValidatableEncoder<false>::EncodeUnsafe<ASCII<> >(os, 0x0080), AssertException); } diff --git a/test/unittest/fwdtest.cpp b/test/unittest/fwdtest.cpp index 1936d977..353dba06 100644 --- a/test/unittest/fwdtest.cpp +++ b/test/unittest/fwdtest.cpp @@ -39,7 +39,7 @@ struct Foo { UTF32LE<unsigned>* utf32le; ASCII<char>* ascii; AutoUTF<unsigned>* autoutf; - Transcoder<UTF8<char>, UTF8<char> >* transcoder; + Transcoder<UTF8<char>, UTF8<char>, true>* transcoder; // allocators.h CrtAllocator* crtallocator; |