From 2b1f57e08b26879e4fa99670df078b77c848d9e0 Mon Sep 17 00:00:00 2001 From: Eirik Tsarpalis Date: Fri, 11 Nov 2022 17:53:56 +0000 Subject: Ensure ReadBufferState resets any BOM offsets every time the buffer is advanced. (#78221) * Ensure the async reader state resets the BOM offset in every AdvanceBuffer() call. * Add BOM insertion to async serialization stress testing * Update src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs --- .../Text/Json/Serialization/ReadBufferState.cs | 4 +- .../Serialization/ContinuationTests.cs | 25 +++++ .../JsonSerializerWrapper.Reflection.cs | 120 ++++++++++++++++++--- 3 files changed, 134 insertions(+), 15 deletions(-) diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/ReadBufferState.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/ReadBufferState.cs index efbc842b368..c84c00a3524 100644 --- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/ReadBufferState.cs +++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/ReadBufferState.cs @@ -122,7 +122,6 @@ namespace System.Text.Json.Serialization // Copy the unprocessed data to the new buffer while shifting the processed bytes. Buffer.BlockCopy(oldBuffer, _offset + bytesConsumed, newBuffer, 0, _count); _buffer = newBuffer; - _offset = 0; _maxCount = _count; // Clear and return the old buffer @@ -133,9 +132,10 @@ namespace System.Text.Json.Serialization { // Shift the processed bytes to the beginning of buffer to make more room. Buffer.BlockCopy(_buffer, _offset + bytesConsumed, _buffer, 0, _count); - _offset = 0; } } + + _offset = 0; } private void ProcessReadBytes() diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/ContinuationTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/ContinuationTests.cs index 8496254aaed..b08e8b758e9 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/ContinuationTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/ContinuationTests.cs @@ -233,6 +233,31 @@ namespace System.Text.Json.Serialization.Tests Assert.Equal(expectedFailure.Column, ex.BytePositionInLine); } + [Fact] + public static async Task BomHandlingRegressionTest() + { + byte[] utf8Bom = Encoding.UTF8.GetPreamble(); + byte[] json = """{ "Value" : "Hello" }"""u8.ToArray(); + + using var stream = new MemoryStream(); + stream.Write(utf8Bom, 0, utf8Bom.Length); + stream.Write(json, 0, json.Length); + stream.Position = 0; + + var options = new JsonSerializerOptions + { + DefaultBufferSize = 32 + }; + + Test result = await JsonSerializer.DeserializeAsync(stream, options); + Assert.Equal("Hello", result.Value); + } + + private class Test + { + public string Value { get; set; } + } + private class Chunk : ReadOnlySequenceSegment { public Chunk(string json, int firstSegmentLength) diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs index 24636171cfc..6296522dd91 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/JsonSerializerWrapper.Reflection.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; using System.IO; using System.Runtime.CompilerServices; using System.Text.Json.Nodes; @@ -15,9 +16,9 @@ namespace System.Text.Json.Serialization.Tests public static JsonSerializerWrapper SpanSerializer { get; } = new SpanSerializerWrapper(); public static JsonSerializerWrapper StringSerializer { get; } = new StringSerializerWrapper(); public static StreamingJsonSerializerWrapper AsyncStreamSerializer { get; } = new AsyncStreamSerializerWrapper(); - public static StreamingJsonSerializerWrapper AsyncStreamSerializerWithSmallBuffer { get; } = new AsyncStreamSerializerWrapper(forceSmallBufferInOptions: true); + public static StreamingJsonSerializerWrapper AsyncStreamSerializerWithSmallBuffer { get; } = new AsyncStreamSerializerWrapper(forceSmallBufferInOptions: true, forceBomInsertions: true); public static StreamingJsonSerializerWrapper SyncStreamSerializer { get; } = new SyncStreamSerializerWrapper(); - public static StreamingJsonSerializerWrapper SyncStreamSerializerWithSmallBuffer { get; } = new SyncStreamSerializerWrapper(forceSmallBufferInOptions: true); + public static StreamingJsonSerializerWrapper SyncStreamSerializerWithSmallBuffer { get; } = new SyncStreamSerializerWrapper(forceSmallBufferInOptions: true, forceBomInsertions: true); public static JsonSerializerWrapper ReaderWriterSerializer { get; } = new ReaderWriterSerializerWrapper(); public static JsonSerializerWrapper DocumentSerializer { get; } = new DocumentSerializerWrapper(); public static JsonSerializerWrapper ElementSerializer { get; } = new ElementSerializerWrapper(); @@ -120,17 +121,22 @@ namespace System.Text.Json.Serialization.Tests private class AsyncStreamSerializerWrapper : StreamingJsonSerializerWrapper { private readonly bool _forceSmallBufferInOptions; + private readonly bool _forceBomInsertions; public override bool IsAsyncSerializer => true; - public AsyncStreamSerializerWrapper(bool forceSmallBufferInOptions = false) + public AsyncStreamSerializerWrapper(bool forceSmallBufferInOptions = false, bool forceBomInsertions = false) { _forceSmallBufferInOptions = forceSmallBufferInOptions; + _forceBomInsertions = forceBomInsertions; } private JsonSerializerOptions? ResolveOptionsInstance(JsonSerializerOptions? options) => _forceSmallBufferInOptions ? JsonSerializerOptionsSmallBufferMapper.ResolveOptionsInstanceWithSmallBuffer(options) : options; + private Stream ResolveReadStream(Stream stream) + => stream is not null && _forceBomInsertions ? new Utf8BomInsertingStream(stream) : stream; + public override Task SerializeWrapper(Stream utf8Json, T value, JsonSerializerOptions options = null) { return JsonSerializer.SerializeAsync(utf8Json, value, ResolveOptionsInstance(options)); @@ -153,38 +159,43 @@ namespace System.Text.Json.Serialization.Tests public override async Task DeserializeWrapper(Stream utf8Json, JsonSerializerOptions options = null) { - return await JsonSerializer.DeserializeAsync(utf8Json, ResolveOptionsInstance(options)); + return await JsonSerializer.DeserializeAsync(ResolveReadStream(utf8Json), ResolveOptionsInstance(options)); } public override async Task DeserializeWrapper(Stream utf8Json, Type returnType, JsonSerializerOptions options = null) { - return await JsonSerializer.DeserializeAsync(utf8Json, returnType, ResolveOptionsInstance(options)); + return await JsonSerializer.DeserializeAsync(ResolveReadStream(utf8Json), returnType, ResolveOptionsInstance(options)); } public override async Task DeserializeWrapper(Stream utf8Json, JsonTypeInfo jsonTypeInfo) { - return await JsonSerializer.DeserializeAsync(utf8Json, jsonTypeInfo); + return await JsonSerializer.DeserializeAsync(ResolveReadStream(utf8Json), jsonTypeInfo); } public override async Task DeserializeWrapper(Stream utf8Json, Type returnType, JsonSerializerContext context) { - return await JsonSerializer.DeserializeAsync(utf8Json, returnType, context); + return await JsonSerializer.DeserializeAsync(ResolveReadStream(utf8Json), returnType, context); } } private class SyncStreamSerializerWrapper : StreamingJsonSerializerWrapper { private readonly bool _forceSmallBufferInOptions; + private readonly bool _forceBomInsertions; + + public override bool IsAsyncSerializer => false; - public SyncStreamSerializerWrapper(bool forceSmallBufferInOptions = false) + public SyncStreamSerializerWrapper(bool forceSmallBufferInOptions = false, bool forceBomInsertions = false) { _forceSmallBufferInOptions = forceSmallBufferInOptions; + _forceBomInsertions = forceBomInsertions; } private JsonSerializerOptions? ResolveOptionsInstance(JsonSerializerOptions? options) => _forceSmallBufferInOptions ? JsonSerializerOptionsSmallBufferMapper.ResolveOptionsInstanceWithSmallBuffer(options) : options; - public override bool IsAsyncSerializer => false; + private Stream ResolveReadStream(Stream stream) + => stream is not null && _forceBomInsertions ? new Utf8BomInsertingStream(stream) : stream; public override Task SerializeWrapper(Stream utf8Json, T value, JsonSerializerOptions options = null) { @@ -212,25 +223,25 @@ namespace System.Text.Json.Serialization.Tests public override Task DeserializeWrapper(Stream utf8Json, JsonSerializerOptions options = null) { - T result = JsonSerializer.Deserialize(utf8Json, ResolveOptionsInstance(options)); + T result = JsonSerializer.Deserialize(ResolveReadStream(utf8Json), ResolveOptionsInstance(options)); return Task.FromResult(result); } public override Task DeserializeWrapper(Stream utf8Json, Type returnType, JsonSerializerOptions options = null) { - object result = JsonSerializer.Deserialize(utf8Json, returnType, ResolveOptionsInstance(options)); + object result = JsonSerializer.Deserialize(ResolveReadStream(utf8Json), returnType, ResolveOptionsInstance(options)); return Task.FromResult(result); } public override Task DeserializeWrapper(Stream utf8Json, JsonTypeInfo jsonTypeInfo) { - T result = JsonSerializer.Deserialize(utf8Json, jsonTypeInfo); + T result = JsonSerializer.Deserialize(ResolveReadStream(utf8Json), jsonTypeInfo); return Task.FromResult(result); } public override Task DeserializeWrapper(Stream utf8Json, Type returnType, JsonSerializerContext context) { - object result = JsonSerializer.Deserialize(utf8Json, returnType, context); + object result = JsonSerializer.Deserialize(ResolveReadStream(utf8Json), returnType, context); return Task.FromResult(result); } } @@ -653,5 +664,88 @@ namespace System.Text.Json.Serialization.Tests return smallBufferCopy; } } + + private sealed class Utf8BomInsertingStream : Stream + { + private const int Utf8BomLength = 3; + private readonly static byte[] s_utf8Bom = Encoding.UTF8.GetPreamble(); + + private readonly Stream _source; + private byte[]? _prefixBytes; + private int _prefixBytesOffset = 0; + private int _prefixBytesCount = 0; + + public Utf8BomInsertingStream(Stream source) + { + Debug.Assert(source.CanRead); + _source = source; + } + + public override bool CanRead => _source.CanRead; + public override bool CanSeek => false; + public override bool CanWrite => false; + + public override int Read(byte[] buffer, int offset, int count) + { + if (_prefixBytes is null) + { + // This is the first read operation; read the first 3 bytes + // from the source to determine if it already includes a BOM. + // Only insert a BOM if it's missing from the source stream. + + _prefixBytes = new byte[2 * Utf8BomLength]; + int bytesRead = ReadExactlyFromSource(_prefixBytes, Utf8BomLength, Utf8BomLength); + + if (_prefixBytes.AsSpan(Utf8BomLength).SequenceEqual(s_utf8Bom)) + { + _prefixBytesOffset = Utf8BomLength; + _prefixBytesCount = Utf8BomLength; + } + else + { + s_utf8Bom.CopyTo(_prefixBytes, 0); + _prefixBytesOffset = 0; + _prefixBytesCount = Utf8BomLength + bytesRead; + } + } + + int prefixBytesToWrite = Math.Min(_prefixBytesCount, count); + if (prefixBytesToWrite > 0) + { + _prefixBytes.AsSpan(_prefixBytesOffset, prefixBytesToWrite).CopyTo(buffer.AsSpan(offset, count)); + _prefixBytesOffset += prefixBytesToWrite; + _prefixBytesCount -= prefixBytesToWrite; + offset += prefixBytesToWrite; + count -= prefixBytesToWrite; + } + + return prefixBytesToWrite + _source.Read(buffer, offset, count); + } + + private int ReadExactlyFromSource(byte[] buffer, int offset, int count) + { + int totalRead = 0; + + while (totalRead < count) + { + int read = _source.Read(buffer, offset + totalRead, count - totalRead); + if (read == 0) + { + break; + } + + totalRead += read; + } + + return totalRead; + } + + public override long Length => throw new NotSupportedException(); + public override long Position { get => throw new NotSupportedException(); set => throw new NotSupportedException(); } + public override void Flush() => throw new NotSupportedException(); + public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); + public override void SetLength(long value) => throw new NotSupportedException(); + public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); + } } } -- cgit v1.2.3