Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/miloyip/rapidjson.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMilo Yip <miloyip@gmail.com>2019-12-20 12:03:21 +0300
committerGitHub <noreply@github.com>2019-12-20 12:03:21 +0300
commit4842934001dcc3aceb8b358faececade768885c5 (patch)
tree975f48999393b376de76359d4a3d50f32158d997
parentbe9f30b664259129f8f361b5f84f01053167564c (diff)
parent596d590e00174b221e9fd8591a02178c3b143d7a (diff)
Merge pull request #1613 from piratf/Issue1604_ASCIIValidationIssue1604_ASCIIValidation
Check before really encoding code points by default.
-rw-r--r--include/rapidjson/encodings.h99
-rw-r--r--include/rapidjson/fwd.h2
-rw-r--r--include/rapidjson/reader.h5
-rw-r--r--test/unittest/documenttest.cpp5
-rw-r--r--test/unittest/encodingstest.cpp34
-rw-r--r--test/unittest/fwdtest.cpp2
6 files changed, 137 insertions, 10 deletions
diff --git a/include/rapidjson/encodings.h b/include/rapidjson/encodings.h
index 0b244679..b7e0516e 100644
--- a/include/rapidjson/encodings.h
+++ b/include/rapidjson/encodings.h
@@ -99,6 +99,11 @@ struct UTF8 {
enum { supportUnicode = 1 };
template<typename OutputStream>
+ static bool ValidateCodePoint(OutputStream&, unsigned codepoint) {
+ return codepoint <= 0x10FFFF;
+ }
+
+ template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
if (codepoint <= 0x7F)
os.Put(static_cast<Ch>(codepoint & 0xFF));
@@ -273,6 +278,16 @@ struct UTF16 {
enum { supportUnicode = 1 };
template<typename OutputStream>
+ static bool ValidateCodePoint(OutputStream&, unsigned codepoint) {
+ if (codepoint <= 0xFFFF) {
+ return (codepoint < 0xD800 || codepoint > 0xDFFF);
+ }
+ else {
+ return codepoint <= 0x10FFFF;
+ }
+ }
+
+ template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
if (codepoint <= 0xFFFF) {
@@ -422,6 +437,11 @@ struct UTF32 {
enum { supportUnicode = 1 };
template<typename OutputStream>
+ static bool ValidateCodePoint(OutputStream&, unsigned codepoint) {
+ return codepoint <= 0x10FFFF;
+ }
+
+ template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
@@ -545,6 +565,11 @@ struct ASCII {
enum { supportUnicode = 0 };
template<typename OutputStream>
+ static bool ValidateCodePoint(OutputStream&, unsigned codepoint) {
+ return codepoint <= 0x7F;
+ }
+
+ template<typename OutputStream>
static void Encode(OutputStream& os, unsigned codepoint) {
RAPIDJSON_ASSERT(codepoint <= 0x7F);
os.Put(static_cast<Ch>(codepoint & 0xFF));
@@ -620,6 +645,13 @@ struct AutoUTF {
#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
template<typename OutputStream>
+ static RAPIDJSON_FORCEINLINE bool ValidateCodePoint(OutputStream& os, unsigned codepoint) {
+ typedef bool (*ValidateCodePointFunc)(OutputStream&, unsigned);
+ static const ValidateCodePointFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(ValidateCodePoint) };
+ return (*f[os.GetType()])(os, codepoint);
+ }
+
+ template<typename OutputStream>
static RAPIDJSON_FORCEINLINE void Encode(OutputStream& os, unsigned codepoint) {
typedef void (*EncodeFunc)(OutputStream&, unsigned);
static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) };
@@ -651,10 +683,69 @@ struct AutoUTF {
};
///////////////////////////////////////////////////////////////////////////////
+// ValidatableEncoder
+
+/*! Wrapper for TEncoding::Encode, have an optional validate feature.
+ Since the feature is optional, this function will be implemented by
+ template partial specialization, to avoid the overhead of runtime check.
+
+ \tprarm CodePointValidation Run validate before encode or not.
+*/
+// By default, This Encoder will validate code point and generate parse error.
+// Users can switch the check feature off by set 'CodePointValidation' to 'false'.
+template<bool CodePointValidation = true>
+class ValidatableEncoder {
+public:
+ template<typename TEncoding, typename OutputStream>
+ static bool Encode(OutputStream &os, unsigned codepoint);
+
+ template<typename TEncoding, typename OutputStream>
+ static bool EncodeUnsafe(OutputStream &os, unsigned codepoint);
+};
+
+template<bool CodePointValidation>
+template<typename TEncoding, typename OutputStream>
+bool
+ValidatableEncoder<CodePointValidation>::Encode(OutputStream &os, unsigned codepoint) {
+ if (!TEncoding::ValidateCodePoint(os, codepoint)) {
+ return false;
+ }
+ TEncoding::Encode(os, codepoint);
+ return true;
+}
+
+template<bool CodePointValidation>
+template<typename TEncoding, typename OutputStream>
+bool
+ValidatableEncoder<CodePointValidation>::EncodeUnsafe(OutputStream &os, unsigned codepoint) {
+ if (!TEncoding::ValidateCodePoint(os, codepoint)) {
+ return false;
+ }
+ TEncoding::EncodeUnsafe(os, codepoint);
+ return true;
+}
+
+template<>
+template<typename TEncoding, typename OutputStream>
+bool
+ValidatableEncoder<false>::Encode(OutputStream &os, unsigned codepoint) {
+ TEncoding::Encode(os, codepoint);
+ return true;
+}
+
+template<>
+template<typename TEncoding, typename OutputStream>
+bool
+ValidatableEncoder<false>::EncodeUnsafe(OutputStream &os, unsigned codepoint) {
+ TEncoding::EncodeUnsafe(os, codepoint);
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////
// Transcoder
//! Encoding conversion.
-template<typename SourceEncoding, typename TargetEncoding>
+template<typename SourceEncoding, typename TargetEncoding, bool CodePointValidation = true>
struct Transcoder {
//! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream.
template<typename InputStream, typename OutputStream>
@@ -662,8 +753,7 @@ struct Transcoder {
unsigned codepoint;
if (!SourceEncoding::Decode(is, &codepoint))
return false;
- TargetEncoding::Encode(os, codepoint);
- return true;
+ return ValidatableEncoder<CodePointValidation>::template Encode<TargetEncoding>(os, codepoint);
}
template<typename InputStream, typename OutputStream>
@@ -671,8 +761,7 @@ struct Transcoder {
unsigned codepoint;
if (!SourceEncoding::Decode(is, &codepoint))
return false;
- TargetEncoding::EncodeUnsafe(os, codepoint);
- return true;
+ return ValidatableEncoder<CodePointValidation>::template EncodeUnsafe<TargetEncoding>(os, codepoint);
}
//! Validate one Unicode codepoint from an encoded stream.
diff --git a/include/rapidjson/fwd.h b/include/rapidjson/fwd.h
index b74a2b81..cdb58947 100644
--- a/include/rapidjson/fwd.h
+++ b/include/rapidjson/fwd.h
@@ -31,7 +31,7 @@ template<typename CharType> struct UTF32LE;
template<typename CharType> struct ASCII;
template<typename CharType> struct AutoUTF;
-template<typename SourceEncoding, typename TargetEncoding>
+template<typename SourceEncoding, typename TargetEncoding, bool CodePointValidation>
struct Transcoder;
// allocators.h
diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h
index 13d27c29..ce8bf0db 100644
--- a/include/rapidjson/reader.h
+++ b/include/rapidjson/reader.h
@@ -1028,7 +1028,10 @@ private:
RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
}
- TEncoding::Encode(os, codepoint);
+ if (!ValidatableEncoder<static_cast<bool>(parseFlags & kParseValidateEncodingFlag)>::template Encode<TEncoding>(os, codepoint))
+ {
+ RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, escapeOffset);
+ }
}
else
RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset);
diff --git a/test/unittest/documenttest.cpp b/test/unittest/documenttest.cpp
index 2b0f269f..038ddb85 100644
--- a/test/unittest/documenttest.cpp
+++ b/test/unittest/documenttest.cpp
@@ -631,6 +631,11 @@ TEST(Document, Issue1604_ASCIIValidation) {
EXPECT_EQ(kParseErrorStringInvalidEncoding, d.GetParseError());
}
+TEST(DocumentDeathTest, Issue1604_ASCIIValidation) {
+ GenericDocument<ASCII<>> d_no_check;
+ ASSERT_THROW((d_no_check.Parse("\"\\u1234\"")), AssertException);
+}
+
// This test does not properly use parsing, just for testing.
// It must call ClearStack() explicitly to prevent memory leak.
// But here we cannot as ClearStack() is private.
diff --git a/test/unittest/encodingstest.cpp b/test/unittest/encodingstest.cpp
index 82cf7776..86866fec 100644
--- a/test/unittest/encodingstest.cpp
+++ b/test/unittest/encodingstest.cpp
@@ -14,8 +14,6 @@
#include "unittest.h"
#include "rapidjson/filereadstream.h"
-#include "rapidjson/filewritestream.h"
-#include "rapidjson/encodedstream.h"
#include "rapidjson/stringbuffer.h"
using namespace rapidjson;
@@ -332,6 +330,12 @@ TEST(EncodingsTest, UTF8) {
}
}
}
+
+ // Validate code point before encoding
+ EXPECT_FALSE(ValidatableEncoder<>::Encode<UTF8<> >(os, 0xFFFFFFFF));
+ EXPECT_FALSE(ValidatableEncoder<>::EncodeUnsafe<UTF8<> >(os, 0xFFFFFFFF));
+ EXPECT_THROW(ValidatableEncoder<false>::Encode<UTF8<> >(os, 0xFFFFFFFF), AssertException);
+ EXPECT_THROW(ValidatableEncoder<false>::EncodeUnsafe<UTF8<> >(os, 0xFFFFFFFF), AssertException);
}
TEST(EncodingsTest, UTF16) {
@@ -392,6 +396,20 @@ TEST(EncodingsTest, UTF16) {
}
}
}
+
+ // Validate code point before encoding
+ EXPECT_FALSE(ValidatableEncoder<>::Encode<UTF16<> >(os, 0xFFFFFFFF));
+ EXPECT_FALSE(ValidatableEncoder<>::EncodeUnsafe<UTF16<> >(os, 0xFFFFFFFF));
+ EXPECT_FALSE(ValidatableEncoder<>::Encode<UTF16<> >(os, 0xD800));
+ EXPECT_FALSE(ValidatableEncoder<>::EncodeUnsafe<UTF16<> >(os, 0xD800));
+ EXPECT_FALSE(ValidatableEncoder<>::Encode<UTF16<> >(os, 0xDFFF));
+ EXPECT_FALSE(ValidatableEncoder<>::EncodeUnsafe<UTF16<> >(os, 0xDFFF));
+ EXPECT_THROW(ValidatableEncoder<false>::Encode<UTF16<> >(os, 0xFFFFFFFF), AssertException);
+ EXPECT_THROW(ValidatableEncoder<false>::EncodeUnsafe<UTF16<> >(os, 0xFFFFFFFF), AssertException);
+ EXPECT_THROW(ValidatableEncoder<false>::Encode<UTF16<> >(os, 0xD800), AssertException);
+ EXPECT_THROW(ValidatableEncoder<false>::EncodeUnsafe<UTF16<> >(os, 0xD800), AssertException);
+ EXPECT_THROW(ValidatableEncoder<false>::Encode<UTF16<> >(os, 0xDFFF), AssertException);
+ EXPECT_THROW(ValidatableEncoder<false>::EncodeUnsafe<UTF16<> >(os, 0xDFFF), AssertException);
}
TEST(EncodingsTest, UTF32) {
@@ -423,6 +441,12 @@ TEST(EncodingsTest, UTF32) {
}
}
}
+
+ // Validate code point before encoding
+ EXPECT_FALSE(ValidatableEncoder<>::Encode<UTF32<> >(os, 0xFFFFFFFF));
+ EXPECT_FALSE(ValidatableEncoder<>::EncodeUnsafe<UTF32<> >(os, 0xFFFFFFFF));
+ EXPECT_THROW(ValidatableEncoder<false>::Encode<UTF32<> >(os, 0xFFFFFFFF), AssertException);
+ EXPECT_THROW(ValidatableEncoder<false>::EncodeUnsafe<UTF32<> >(os, 0xFFFFFFFF), AssertException);
}
TEST(EncodingsTest, ASCII) {
@@ -448,4 +472,10 @@ TEST(EncodingsTest, ASCII) {
EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString()));
}
}
+
+ // Validate code point before encoding
+ EXPECT_FALSE(ValidatableEncoder<>::Encode<ASCII<> >(os, 0x0080));
+ EXPECT_FALSE(ValidatableEncoder<>::EncodeUnsafe<ASCII<> >(os, 0x0080));
+ EXPECT_THROW(ValidatableEncoder<false>::Encode<ASCII<> >(os, 0x0080), AssertException);
+ EXPECT_THROW(ValidatableEncoder<false>::EncodeUnsafe<ASCII<> >(os, 0x0080), AssertException);
}
diff --git a/test/unittest/fwdtest.cpp b/test/unittest/fwdtest.cpp
index 1936d977..353dba06 100644
--- a/test/unittest/fwdtest.cpp
+++ b/test/unittest/fwdtest.cpp
@@ -39,7 +39,7 @@ struct Foo {
UTF32LE<unsigned>* utf32le;
ASCII<char>* ascii;
AutoUTF<unsigned>* autoutf;
- Transcoder<UTF8<char>, UTF8<char> >* transcoder;
+ Transcoder<UTF8<char>, UTF8<char>, true>* transcoder;
// allocators.h
CrtAllocator* crtallocator;