1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
# frozen_string_literal: true
module Gitlab
# An untrusted regular expression is any regexp containing patterns sourced
# from user input.
#
# Ruby's built-in regular expression library allows patterns which complete in
# exponential time, permitting denial-of-service attacks.
#
# Not all regular expression features are available in untrusted regexes, and
# there is a strict limit on total execution time. See the RE2 documentation
# at https://github.com/google/re2/wiki/Syntax for more details.
#
# This class doesn't change any instance variables, which allows it to be frozen
# and setup in constants.
class UntrustedRegexp
require_dependency 're2'
# recreate Ruby's \R metacharacter
# https://ruby-doc.org/3.2.2/Regexp.html#class-Regexp-label-Character+Classes
BACKSLASH_R = '(\n|\v|\f|\r|\x{0085}|\x{2028}|\x{2029}|\r\n)'
delegate :===, :source, to: :regexp
def initialize(pattern, multiline: false)
if multiline
pattern = "(?m)#{pattern}"
end
@regexp = RE2::Regexp.new(pattern, log_errors: false)
@scan_regexp = initialize_scan_regexp
raise RegexpError, regexp.error unless regexp.ok?
end
def replace_all(text, rewrite)
RE2.GlobalReplace(text, regexp, rewrite)
end
# There is no built-in replace with block support (like `gsub`). We can accomplish
# the same thing by parsing and rebuilding the string with the substitutions.
def replace_gsub(text)
new_text = +''
remainder = text
matched = match(remainder)
until matched.nil? || matched.to_a.compact.empty?
partitioned = remainder.partition(matched.to_s)
new_text << partitioned.first
remainder = partitioned.last
new_text << yield(matched)
matched = match(remainder)
end
new_text << remainder
end
def scan(text)
matches = scan_regexp.scan(text).to_a
matches.map!(&:first) if regexp.number_of_capturing_groups == 0
matches
end
def match(text)
scan_regexp.match(text)
end
def match?(text)
text.present? && scan(text).present?
end
def replace(text, rewrite)
RE2.Replace(text, regexp, rewrite)
end
# #scan returns an array of the groups captured, rather than MatchData.
# Use this to give the capture group name and grab the proper value
def extract_named_group(name, match)
return unless match
match_position = regexp.named_capturing_groups[name.to_s]
raise RegexpError, "Invalid named capture group: #{name}" unless match_position
match[match_position - 1]
end
def ==(other)
self.source == other.source
end
# Handles regular expressions with the preferred RE2 library where possible
# via UntustedRegex. Falls back to Ruby's built-in regular expression library
# when the syntax would be invalid in RE2.
#
# One difference between these is `(?m)` multi-line mode. Ruby regex enables
# this by default, but also handles `^` and `$` differently.
# See: https://www.regular-expressions.info/modifiers.html
def self.with_fallback(pattern, multiline: false)
UntrustedRegexp.new(pattern, multiline: multiline)
rescue RegexpError
raise if Feature.enabled?(:disable_unsafe_regexp)
if Feature.enabled?(:ci_unsafe_regexp_logger, type: :ops)
Gitlab::AppJsonLogger.info(
class: self.name,
regexp: pattern.to_s,
fabricated: 'unsafe ruby regexp'
)
end
Regexp.new(pattern)
end
private
attr_reader :regexp, :scan_regexp
# RE2 scan operates differently to Ruby scan when there are no capture
# groups, so work around it
def initialize_scan_regexp
if regexp.number_of_capturing_groups == 0
RE2::Regexp.new('(' + regexp.source + ')')
else
regexp
end
end
end
end
|