diff options
author | Anna Henningsen <anna@addaleax.net> | 2018-09-05 15:24:45 +0300 |
---|---|---|
committer | Anna Henningsen <anna@addaleax.net> | 2018-09-24 23:19:06 +0300 |
commit | 06f6ac179c674bd5d2b26505327f64eaceeb7644 (patch) | |
tree | 02b02c7d001bf98f0a1f7280cc959355acaa7ec4 /src/string_decoder.cc | |
parent | ab6ddc063469efd55c8b9f51a46b0f9d99369914 (diff) |
string_decoder: fix number of replacement chars
Fixes: https://github.com/nodejs/node/issues/22626
PR-URL: https://github.com/nodejs/node/pull/22709
Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Rich Trott <rtrott@gmail.com>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Diffstat (limited to 'src/string_decoder.cc')
-rw-r--r-- | src/string_decoder.cc | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/src/string_decoder.cc b/src/string_decoder.cc index fa8201faff2..cc38cd927a0 100644 --- a/src/string_decoder.cc +++ b/src/string_decoder.cc @@ -71,16 +71,17 @@ MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate, kIncompleteCharactersEnd); if (Encoding() == UTF8) { // For UTF-8, we need special treatment to align with the V8 decoder: - // If an incomplete character is found at a chunk boundary, we turn - // that character into a single invalid one. + // If an incomplete character is found at a chunk boundary, we use + // its remainder and pass it to V8 as-is. for (size_t i = 0; i < nread && i < MissingBytes(); ++i) { if ((data[i] & 0xC0) != 0x80) { // This byte is not a continuation byte even though it should have - // been one. - // Act as if there was a 1-byte incomplete character, which does - // not make sense but works here because we know it's invalid. + // been one. We stop decoding of the incomplete character at this + // point (but still use the rest of the incomplete bytes from this + // chunk) and assume that the new, unexpected byte starts a new one. state_[kMissingBytes] = 0; - state_[kBufferedBytes] = 1; + memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i); + state_[kBufferedBytes] += i; data += i; nread -= i; break; |