diff options
author | github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> | 2022-09-09 07:34:21 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-09 07:34:21 +0300 |
commit | d18ff874010ec56449c830ee8120c82702e71568 (patch) | |
tree | 3d6fec66b124ba0fa1a6d4d67a6de603d777377c | |
parent | acf0dd20784a102670e459f1a869c1b4a34a388f (diff) |
[release/7.0] Fix RegexOptions.NonBacktracking matching end anchors at timeout check boundaries (#75308)
* Test for exposing timeout check bug
* Fix timeout check bug
* Improve naming
* Simplify test
* Make test search range higher
* Simplify test for timeout check bug
Co-authored-by: Olli Saarikivi <olsaarik@microsoft.com>
2 files changed, 24 insertions, 8 deletions
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs index 70390343c34..89650f3e27a 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs @@ -448,13 +448,13 @@ namespace System.Text.RegularExpressions.Symbolic // still check the timeout now and again to provide some semblance of the behavior a developer experiences with // the backtracking engines. We can, however, choose a large number here, since it's not actually needed for security. const int CharsPerTimeoutCheck = 1_000; - ReadOnlySpan<char> inputForInnerLoop = _checkTimeout && input.Length - pos > CharsPerTimeoutCheck ? - input.Slice(0, pos + CharsPerTimeoutCheck) : - input; + int innerLoopLength = _checkTimeout && input.Length - pos > CharsPerTimeoutCheck ? + pos + CharsPerTimeoutCheck : + input.Length; bool done = currentState.NfaState is not null ? - FindEndPositionDeltas<NfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) : - FindEndPositionDeltas<DfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate); + FindEndPositionDeltas<NfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(input, innerLoopLength, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) : + FindEndPositionDeltas<DfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(input, innerLoopLength, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate); // If the inner loop indicates that the search finished (for example due to reaching a deadend state) or // there is no more input available, then the whole search is done. @@ -466,7 +466,7 @@ namespace System.Text.RegularExpressions.Symbolic // The search did not finish, so we either failed to transition (which should only happen if we were in DFA mode and // need to switch over to NFA mode) or ran out of input in the inner loop. Check if the inner loop still had more // input available. - if (pos < inputForInnerLoop.Length) + if (pos < innerLoopLength) { // Because there was still more input available, a failure to transition in DFA mode must be the cause // of the early exit. Upgrade to NFA mode. @@ -505,7 +505,7 @@ namespace System.Text.RegularExpressions.Symbolic /// 0 if iteration completed because we reached an initial state. /// A negative value if iteration completed because we ran out of input or we failed to transition. /// </returns> - private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(ReadOnlySpan<char> input, RegexRunnerMode mode, + private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(ReadOnlySpan<char> input, int length, RegexRunnerMode mode, ref int posRef, ref CurrentState state, ref int endPosRef, ref int endStateIdRef, ref int initialStatePosRef, ref int initialStatePosCandidateRef) where TStateHandler : struct, IStateHandler where TInputReader : struct, IInputReader @@ -561,7 +561,7 @@ namespace System.Text.RegularExpressions.Symbolic } // If there is more input available try to transition with the next character. - if (!IsMintermId(positionId) || !TStateHandler.TryTakeTransition(this, ref state, positionId)) + if (pos >= length || !TStateHandler.TryTakeTransition(this, ref state, positionId)) { return false; } diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 9f73f9f1c30..b5b27236a56 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -1278,6 +1278,22 @@ namespace System.Text.RegularExpressions.Tests Assert.InRange(sw.Elapsed.TotalSeconds, 0, 10); // arbitrary upper bound that should be well above what's needed with a 1ms timeout } + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNetCore))] + public void NonBacktracking_NoEndAnchorMatchAtTimeoutCheck() + { + // This constant must be at least as large as the one in the implementation that sets the maximum number + // of innermost loop iterations between timeout checks. + const int CharsToTriggerTimeoutCheck = 10000; + // Check that it is indeed large enough to trigger timeouts. If this fails the constant above needs to be larger. + Assert.Throws<RegexMatchTimeoutException>(() => new Regex("a*", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromTicks(1)) + .Match(new string('a', CharsToTriggerTimeoutCheck))); + + // The actual test: ^a*$ shouldn't match in a string ending in 'b' + Regex testPattern = new Regex("^a*$", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromHours(1)); + string input = string.Concat(new string('a', CharsToTriggerTimeoutCheck), 'b'); + Assert.False(testPattern.IsMatch(input)); + } + public static IEnumerable<object[]> Match_Advanced_TestData() { foreach (RegexEngine engine in RegexHelpers.AvailableEngines) |