diff options
author | Milo Yip <miloyip@gmail.com> | 2020-07-13 06:10:39 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-07-13 06:10:39 +0300 |
commit | f56928de85d56add3ca6ae7cf7f119a42ee1585b (patch) | |
tree | bbb2ba52416882d0d7487f228006462036edf25d | |
parent | 88bd956d66d348f478bceebfdadb8e26c6844695 (diff) | |
parent | 6694c996b9e5a5e44d9f7cea1d619cae86384981 (diff) |
Merge pull request #1744 from lklein53/improve-surrogate-handling
Improve surrogate handling (#1738)
-rw-r--r-- | include/rapidjson/reader.h | 24 | ||||
-rw-r--r-- | test/unittest/readertest.cpp | 3 |
2 files changed, 19 insertions, 8 deletions
diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 0f85032a..30e45e1f 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -1023,15 +1023,23 @@ private: is.Take(); unsigned codepoint = ParseHex4(is, escapeOffset); RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; - if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) { - // Handle UTF-16 surrogate pair - if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u'))) - RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); - unsigned codepoint2 = ParseHex4(is, escapeOffset); - RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; - if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)) + if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) { + // high surrogate, check if followed by valid low surrogate + if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) { + // Handle UTF-16 surrogate pair + if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u'))) + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); + unsigned codepoint2 = ParseHex4(is, escapeOffset); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)) + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); + codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; + } + // single low surrogate + else + { RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); - codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; + } } TEncoding::Encode(os, codepoint); } diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 2795766c..2a4a6263 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -944,6 +944,9 @@ TEST(Reader, ParseString_Error) { TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800X\"]", 2u, 8u); TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800\\uFFFF\"]", 2u, 14u); + // Single low surrogate pair in string is invalid. + TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\udc4d\"]", 2u, 8u); + // Missing a closing quotation mark in string. TEST_STRING_ERROR(kParseErrorStringMissQuotationMark, "[\"Test]", 7u, 7u); |