Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/dotnet/runtime.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Toub <stoub@microsoft.com>2022-11-12 01:59:41 +0300
committerGitHub <noreply@github.com>2022-11-12 01:59:41 +0300
commit3e94fdc791206bd83f247dd87156bdbc14d516c0 (patch)
treeec3ce97c64f35bdf8e4e86a1c30dd491f9a545f4
parent484938a4692a47d019ec1b1abbe947d2d0a4c523 (diff)
React to CheckForOverflowUnderflow in regex source generator (#78228)
* React to CheckForOverflowUnderflow in regex source generator The regex source generator uses code patterns that might have arithmetic overflows, e.g. a bounds check with `(uint)index < span.Length`. These are intentional, and they're benign... unless the project/compilation has opted-in to overflow/underflow checking (CheckForOverflowUnderflow). In that case, the code for many patterns can start throwing false positive overflow exceptions, making the source generator unusable. This commit causes the generator to look at the CheckOverflow setting in the compilation options, and if it's set, to emit `unchecked { ... }` around all the relevant code. * Address PR feedback
-rw-r--r--src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs90
-rw-r--r--src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs41
-rw-r--r--src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs13
-rw-r--r--src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs27
4 files changed, 105 insertions, 66 deletions
diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
index 9e08b3ee1d2..c54d27966b4 100644
--- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
+++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
@@ -172,8 +172,27 @@ namespace System.Text.RegularExpressions.Generator
}
/// <summary>Emits the code for the RunnerFactory. This is the actual logic for the regular expression.</summary>
- private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers)
+ private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers, bool checkOverflow)
{
+ void EnterCheckOverflow()
+ {
+ if (checkOverflow)
+ {
+ writer.WriteLine($"unchecked");
+ writer.WriteLine($"{{");
+ writer.Indent++;
+ }
+ }
+
+ void ExitCheckOverflow()
+ {
+ if (checkOverflow)
+ {
+ writer.Indent--;
+ writer.WriteLine($"}}");
+ }
+ }
+
writer.WriteLine($"/// <summary>Provides a factory for creating <see cref=\"RegexRunner\"/> instances to be used by methods on <see cref=\"Regex\"/>.</summary>");
writer.WriteLine($"private sealed class RunnerFactory : RegexRunnerFactory");
writer.WriteLine($"{{");
@@ -208,7 +227,9 @@ namespace System.Text.RegularExpressions.Generator
writer.WriteLine($" protected override void Scan(ReadOnlySpan<char> inputSpan)");
writer.WriteLine($" {{");
writer.Indent += 3;
+ EnterCheckOverflow();
(bool needsTryFind, bool needsTryMatch) = EmitScan(writer, rm);
+ ExitCheckOverflow();
writer.Indent -= 3;
writer.WriteLine($" }}");
if (needsTryFind)
@@ -220,7 +241,9 @@ namespace System.Text.RegularExpressions.Generator
writer.WriteLine($" private bool TryFindNextPossibleStartingPosition(ReadOnlySpan<char> inputSpan)");
writer.WriteLine($" {{");
writer.Indent += 3;
+ EnterCheckOverflow();
EmitTryFindNextPossibleStartingPosition(writer, rm, requiredHelpers);
+ ExitCheckOverflow();
writer.Indent -= 3;
writer.WriteLine($" }}");
}
@@ -233,7 +256,9 @@ namespace System.Text.RegularExpressions.Generator
writer.WriteLine($" private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan)");
writer.WriteLine($" {{");
writer.Indent += 3;
- EmitTryMatchAtCurrentPosition(writer, rm, requiredHelpers);
+ EnterCheckOverflow();
+ EmitTryMatchAtCurrentPosition(writer, rm, requiredHelpers, checkOverflow);
+ ExitCheckOverflow();
writer.Indent -= 3;
writer.WriteLine($" }}");
}
@@ -288,23 +313,24 @@ namespace System.Text.RegularExpressions.Generator
}
/// <summary>Adds the IsBoundary helper to the required helpers collection.</summary>
- private static void AddIsBoundaryHelper(Dictionary<string, string[]> requiredHelpers)
+ private static void AddIsBoundaryHelper(Dictionary<string, string[]> requiredHelpers, bool checkOverflow)
{
const string IsBoundary = nameof(IsBoundary);
if (!requiredHelpers.ContainsKey(IsBoundary))
{
+ string uncheckedKeyword = checkOverflow ? "unchecked" : "";
requiredHelpers.Add(IsBoundary, new string[]
{
- "/// <summary>Determines whether the specified index is a boundary.</summary>",
- "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
- "internal static bool IsBoundary(ReadOnlySpan<char> inputSpan, int index)",
- "{",
- " int indexMinus1 = index - 1;",
- " return ((uint)indexMinus1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexMinus1])) !=",
- " ((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));",
- "",
- " static bool IsBoundaryWordChar(char ch) => IsWordChar(ch) || (ch == '\\u200C' | ch == '\\u200D');",
- "}",
+ $"/// <summary>Determines whether the specified index is a boundary.</summary>",
+ $"[MethodImpl(MethodImplOptions.AggressiveInlining)]",
+ $"internal static bool IsBoundary(ReadOnlySpan<char> inputSpan, int index)",
+ $"{{",
+ $" int indexMinus1 = index - 1;",
+ $" return {uncheckedKeyword}((uint)indexMinus1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexMinus1])) !=",
+ $" {uncheckedKeyword}((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));",
+ $"",
+ $" static bool IsBoundaryWordChar(char ch) => IsWordChar(ch) || (ch == '\\u200C' | ch == '\\u200D');",
+ $"}}",
});
AddIsWordCharHelper(requiredHelpers);
@@ -312,27 +338,27 @@ namespace System.Text.RegularExpressions.Generator
}
/// <summary>Adds the IsECMABoundary helper to the required helpers collection.</summary>
- private static void AddIsECMABoundaryHelper(Dictionary<string, string[]> requiredHelpers)
+ private static void AddIsECMABoundaryHelper(Dictionary<string, string[]> requiredHelpers, bool checkOverflow)
{
const string IsECMABoundary = nameof(IsECMABoundary);
if (!requiredHelpers.ContainsKey(IsECMABoundary))
{
+ string uncheckedKeyword = checkOverflow ? "unchecked" : "";
requiredHelpers.Add(IsECMABoundary, new string[]
{
- "/// <summary>Determines whether the specified index is a boundary (ECMAScript).</summary>",
- "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
- "internal static bool IsECMABoundary(ReadOnlySpan<char> inputSpan, int index)",
- "{",
- " int indexMinus1 = index - 1;",
- " return ((uint)indexMinus1 < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[indexMinus1])) !=",
- " ((uint)index < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[index]));",
- "",
- " static bool IsECMAWordChar(char ch) =>",
- " ((((uint)ch - 'A') & ~0x20) < 26) || // ASCII letter",
- " (((uint)ch - '0') < 10) || // digit",
- " ch == '_' || // underscore",
- " ch == '\\u0130'; // latin capital letter I with dot above",
- "}",
+ $"/// <summary>Determines whether the specified index is a boundary (ECMAScript).</summary>",
+ $"[MethodImpl(MethodImplOptions.AggressiveInlining)]",
+ $"internal static bool IsECMABoundary(ReadOnlySpan<char> inputSpan, int index)",
+ $"{{",
+ $" int indexMinus1 = index - 1;",
+ $" return {uncheckedKeyword}((uint)indexMinus1 < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[indexMinus1])) !=",
+ $" {uncheckedKeyword}((uint)index < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[index]));",
+ $"",
+ $" static bool IsECMAWordChar(char ch) =>",
+ $" char.IsAsciiLetterOrDigit(ch) ||",
+ $" ch == '_' ||",
+ $" ch == '\\u0130'; // latin capital letter I with dot above",
+ $"}}",
});
}
}
@@ -426,7 +452,7 @@ namespace System.Text.RegularExpressions.Generator
/// <summary>Emits the body of the TryFindNextPossibleStartingPosition.</summary>
private static void EmitTryFindNextPossibleStartingPosition(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers)
{
- RegexOptions options = (RegexOptions)rm.Options;
+ RegexOptions options = rm.Options;
RegexTree regexTree = rm.Tree;
bool rtl = (options & RegexOptions.RightToLeft) != 0;
@@ -1034,7 +1060,7 @@ namespace System.Text.RegularExpressions.Generator
}
/// <summary>Emits the body of the TryMatchAtCurrentPosition.</summary>
- private static void EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers)
+ private static void EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers, bool checkOverflow)
{
// In .NET Framework and up through .NET Core 3.1, the code generated for RegexOptions.Compiled was effectively an unrolled
// version of what RegexInterpreter would process. The RegexNode tree would be turned into a series of opcodes via
@@ -2677,14 +2703,14 @@ namespace System.Text.RegularExpressions.Generator
call = node.Kind is RegexNodeKind.Boundary ?
$"!{HelpersTypeName}.IsBoundary" :
$"{HelpersTypeName}.IsBoundary";
- AddIsBoundaryHelper(requiredHelpers);
+ AddIsBoundaryHelper(requiredHelpers, checkOverflow);
}
else
{
call = node.Kind is RegexNodeKind.ECMABoundary ?
$"!{HelpersTypeName}.IsECMABoundary" :
$"{HelpersTypeName}.IsECMABoundary";
- AddIsECMABoundaryHelper(requiredHelpers);
+ AddIsECMABoundaryHelper(requiredHelpers, checkOverflow);
}
using (EmitBlock(writer, $"if ({call}(inputSpan, pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}))"))
diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs
index 15645d72fd7..ed506320a1a 100644
--- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs
+++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs
@@ -36,13 +36,24 @@ namespace System.Text.RegularExpressions.Generator
"#pragma warning disable CS0219 // Variable assigned but never used",
};
+ private record struct CompilationData(bool AllowUnsafe, bool CheckOverflow);
+
public void Initialize(IncrementalGeneratorInitializationContext context)
{
+ // To avoid invalidating every regex's output when anything from the compilation changes,
+ // we extract from it the only things we care about.
+ IncrementalValueProvider<CompilationData> compilationDataProvider =
+ context.CompilationProvider
+ .Select((x, _) =>
+ x.Options is CSharpCompilationOptions options ?
+ new CompilationData(options.AllowUnsafe, options.CheckOverflow) :
+ default);
+
// Produces one entry per generated regex. This may be:
// - Diagnostic in the case of a failure that should end the compilation
// - (RegexMethod regexMethod, string runnerFactoryImplementation, Dictionary<string, string[]> requiredHelpers) in the case of valid regex
// - (RegexMethod regexMethod, string reason, Diagnostic diagnostic) in the case of a limited-support regex
- IncrementalValueProvider<ImmutableArray<object>> codeOrDiagnostics =
+ IncrementalValueProvider<ImmutableArray<object>> results =
context.SyntaxProvider
// Find all MethodDeclarationSyntax nodes attributed with GeneratedRegex and gather the required information.
@@ -52,9 +63,13 @@ namespace System.Text.RegularExpressions.Generator
GetSemanticTargetForGeneration)
.Where(static m => m is not null)
+ // Incorporate the compilation data, as it impacts code generation.
+ .Combine(compilationDataProvider)
+
// Generate the RunnerFactory for each regex, if possible. This is where the bulk of the implementation occurs.
- .Select((state, _) =>
+ .Select((methodOrDiagnosticAndCompilationData, _) =>
{
+ object? state = methodOrDiagnosticAndCompilationData.Left;
if (state is not RegexMethod regexMethod)
{
Debug.Assert(state is Diagnostic);
@@ -74,26 +89,16 @@ namespace System.Text.RegularExpressions.Generator
var writer = new IndentedTextWriter(sw);
writer.Indent += 2;
writer.WriteLine();
- EmitRegexDerivedTypeRunnerFactory(writer, regexMethod, requiredHelpers);
+ EmitRegexDerivedTypeRunnerFactory(writer, regexMethod, requiredHelpers, methodOrDiagnosticAndCompilationData.Right.CheckOverflow);
writer.Indent -= 2;
- return (regexMethod, sw.ToString(), requiredHelpers);
+ return (regexMethod, sw.ToString(), requiredHelpers, methodOrDiagnosticAndCompilationData.Right);
})
.Collect();
- // To avoid invalidating every regex's output when anything from the compilation changes,
- // we extract from it the only things we care about: whether unsafe code is allowed,
- // and a name based on the assembly's name, and only that information is then fed into
- // RegisterSourceOutput along with all of the cached generated data from each regex.
- IncrementalValueProvider<(bool AllowUnsafe, string? AssemblyName)> compilationDataProvider =
- context.CompilationProvider
- .Select((x, _) => (x.Options is CSharpCompilationOptions { AllowUnsafe: true }, x.AssemblyName));
-
// When there something to output, take all the generated strings and concatenate them to output,
// and raise all of the created diagnostics.
- context.RegisterSourceOutput(codeOrDiagnostics.Combine(compilationDataProvider), static (context, compilationDataAndResults) =>
+ context.RegisterSourceOutput(results, static (context, results) =>
{
- ImmutableArray<object> results = compilationDataAndResults.Left;
-
// Report any top-level diagnostics.
bool allFailures = true;
foreach (object result in results)
@@ -150,7 +155,7 @@ namespace System.Text.RegularExpressions.Generator
context.ReportDiagnostic(limitedSupportResult.Item3);
regexMethod = limitedSupportResult.Item1;
}
- else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>> regexImpl)
+ else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>, CompilationData> regexImpl)
{
foreach (KeyValuePair<string, string[]> helper in regexImpl.Item3)
{
@@ -214,11 +219,11 @@ namespace System.Text.RegularExpressions.Generator
writer.WriteLine();
}
}
- else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>> regexImpl)
+ else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>, CompilationData> regexImpl)
{
if (!regexImpl.Item1.IsDuplicate)
{
- EmitRegexDerivedImplementation(writer, regexImpl.Item1, regexImpl.Item2, compilationDataAndResults.Right.AllowUnsafe);
+ EmitRegexDerivedImplementation(writer, regexImpl.Item1, regexImpl.Item2, regexImpl.Item4.AllowUnsafe);
writer.WriteLine();
}
}
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs
index 04e9683bdd2..340ed7fbea5 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs
@@ -68,13 +68,13 @@ namespace System.Text.RegularExpressions.Tests
}
private static async Task<(Compilation, GeneratorDriverRunResult)> RunGeneratorCore(
- string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
+ string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, bool checkOverflow = true, CancellationToken cancellationToken = default)
{
var proj = new AdhocWorkspace()
.AddSolution(SolutionInfo.Create(SolutionId.CreateNewId(), VersionStamp.Create()))
.AddProject("RegexGeneratorTest", "RegexGeneratorTest.dll", "C#")
.WithMetadataReferences(additionalRefs is not null ? References.Concat(additionalRefs) : References)
- .WithCompilationOptions(new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary, allowUnsafe: allowUnsafe)
+ .WithCompilationOptions(new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary, allowUnsafe: allowUnsafe, checkOverflow: checkOverflow)
.WithNullableContextOptions(NullableContextOptions.Enable))
.WithParseOptions(new CSharpParseOptions(langVersion))
.AddDocument("RegexGenerator.g.cs", SourceText.From(code, Encoding.UTF8)).Project;
@@ -91,9 +91,9 @@ namespace System.Text.RegularExpressions.Tests
}
internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
- string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
+ string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, bool checkOverflow = true, CancellationToken cancellationToken = default)
{
- (Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, cancellationToken);
+ (Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, checkOverflow, cancellationToken);
if (!compile)
{
return generatorResults.Diagnostics;
@@ -114,9 +114,9 @@ namespace System.Text.RegularExpressions.Tests
}
internal static async Task<string> GenerateSourceText(
- string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
+ string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, bool checkOverflow = true, CancellationToken cancellationToken = default)
{
- (Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, cancellationToken);
+ (Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, checkOverflow, cancellationToken);
string generatedSource = string.Concat(generatorResults.GeneratedTrees.Select(t => t.ToString()));
if (generatorResults.Diagnostics.Length != 0)
@@ -197,6 +197,7 @@ namespace System.Text.RegularExpressions.Tests
.WithCompilationOptions(
new CSharpCompilationOptions(
OutputKind.DynamicallyLinkedLibrary,
+ checkOverflow: true,
warningLevel: 9999, // docs recommend using "9999" to catch all warnings now and in the future
specificDiagnosticOptions: ImmutableDictionary<string, ReportDiagnostic>.Empty.Add("SYSLIB1044", ReportDiagnostic.Hidden)) // regex with limited support
.WithNullableContextOptions(NullableContextOptions.Enable))
diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs
index 642bba87ad0..fa86ca59e60 100644
--- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs
@@ -6,6 +6,7 @@ using Microsoft.CodeAnalysis.CSharp;
using Microsoft.DotNet.RemoteExecutor;
using System.Collections.Generic;
using System.Diagnostics;
+using System.Linq;
using System.Globalization;
using System.Threading.Tasks;
using Xunit;
@@ -438,22 +439,28 @@ namespace System.Text.RegularExpressions.Tests
", compile: true));
}
+ public static IEnumerable<object[]> Valid_ClassWithNamespace_ConfigurationOptions_MemberData() =>
+ from pattern in new[] { "", "ab", "ab*c|de*?f|(ghi){1,3}", "\\\\w\\\\W\\\\b\\\\B\\\\d\\\\D\\\\s\\\\S" }
+ from options in new[] { RegexOptions.None, RegexOptions.IgnoreCase, RegexOptions.ECMAScript, RegexOptions.RightToLeft }
+ from allowUnsafe in new[] { false, true }
+ from checkOverflow in new[] { false, true }
+ select new object[] { pattern, options, allowUnsafe, checkOverflow };
+
[Theory]
- [InlineData(false)]
- [InlineData(true)]
- public async Task Valid_ClassWithNamespace(bool allowUnsafe)
+ [MemberData(nameof(Valid_ClassWithNamespace_ConfigurationOptions_MemberData))]
+ public async Task Valid_ClassWithNamespace_ConfigurationOptions(string pattern, RegexOptions options, bool allowUnsafe, bool checkOverflow)
{
- Assert.Empty(await RegexGeneratorHelper.RunGenerator(@"
+ Assert.Empty(await RegexGeneratorHelper.RunGenerator($@"
using System.Text.RegularExpressions;
namespace A
- {
+ {{
partial class C
- {
- [GeneratedRegex(""ab"")]
+ {{
+ [GeneratedRegex(""{pattern}"", RegexOptions.{options})]
private static partial Regex Valid();
- }
- }
- ", compile: true, allowUnsafe: allowUnsafe));
+ }}
+ }}
+ ", compile: true, allowUnsafe: allowUnsafe, checkOverflow: checkOverflow));
}
[Fact]