Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/miloyip/rapidjson.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMilo Yip <miloyip@gmail.com>2020-07-13 06:10:39 +0300
committerGitHub <noreply@github.com>2020-07-13 06:10:39 +0300
commitf56928de85d56add3ca6ae7cf7f119a42ee1585b (patch)
treebbb2ba52416882d0d7487f228006462036edf25d
parent88bd956d66d348f478bceebfdadb8e26c6844695 (diff)
parent6694c996b9e5a5e44d9f7cea1d619cae86384981 (diff)
Merge pull request #1744 from lklein53/improve-surrogate-handling
Improve surrogate handling (#1738)
-rw-r--r--include/rapidjson/reader.h24
-rw-r--r--test/unittest/readertest.cpp3
2 files changed, 19 insertions, 8 deletions
diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h
index 0f85032a..30e45e1f 100644
--- a/include/rapidjson/reader.h
+++ b/include/rapidjson/reader.h
@@ -1023,15 +1023,23 @@ private:
is.Take();
unsigned codepoint = ParseHex4(is, escapeOffset);
RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
- if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
- // Handle UTF-16 surrogate pair
- if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
- RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
- unsigned codepoint2 = ParseHex4(is, escapeOffset);
- RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
- if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
+ if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
+ // high surrogate, check if followed by valid low surrogate
+ if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) {
+ // Handle UTF-16 surrogate pair
+ if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
+ RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
+ unsigned codepoint2 = ParseHex4(is, escapeOffset);
+ RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+ if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
+ RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
+ codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
+ }
+ // single low surrogate
+ else
+ {
RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
- codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
+ }
}
TEncoding::Encode(os, codepoint);
}
diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp
index 2795766c..2a4a6263 100644
--- a/test/unittest/readertest.cpp
+++ b/test/unittest/readertest.cpp
@@ -944,6 +944,9 @@ TEST(Reader, ParseString_Error) {
TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800X\"]", 2u, 8u);
TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800\\uFFFF\"]", 2u, 14u);
+ // Single low surrogate pair in string is invalid.
+ TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\udc4d\"]", 2u, 8u);
+
// Missing a closing quotation mark in string.
TEST_STRING_ERROR(kParseErrorStringMissQuotationMark, "[\"Test]", 7u, 7u);