From ba60d4f6e4f3a6d3cb56c9320f475bee8f0b38da Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Thu, 22 Jun 2017 15:33:17 +0000 Subject: Merge branch '24570-use-re2-for-user-supplied-regexp-9-3' into 'security-9-3' 24570 use re2 for user supplied regexp 9 3 See merge request !2129 --- lib/gitlab/untrusted_regexp.rb | 53 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 lib/gitlab/untrusted_regexp.rb (limited to 'lib/gitlab/untrusted_regexp.rb') diff --git a/lib/gitlab/untrusted_regexp.rb b/lib/gitlab/untrusted_regexp.rb new file mode 100644 index 00000000000..8b43f0053d6 --- /dev/null +++ b/lib/gitlab/untrusted_regexp.rb @@ -0,0 +1,53 @@ +module Gitlab + # An untrusted regular expression is any regexp containing patterns sourced + # from user input. + # + # Ruby's built-in regular expression library allows patterns which complete in + # exponential time, permitting denial-of-service attacks. + # + # Not all regular expression features are available in untrusted regexes, and + # there is a strict limit on total execution time. See the RE2 documentation + # at https://github.com/google/re2/wiki/Syntax for more details. + class UntrustedRegexp + delegate :===, to: :regexp + + def initialize(pattern) + @regexp = RE2::Regexp.new(pattern, log_errors: false) + + raise RegexpError.new(regexp.error) unless regexp.ok? + end + + def replace_all(text, rewrite) + RE2.GlobalReplace(text, regexp, rewrite) + end + + def scan(text) + scan_regexp.scan(text).map do |match| + if regexp.number_of_capturing_groups == 0 + match.first + else + match + end + end + end + + def replace(text, rewrite) + RE2.Replace(text, regexp, rewrite) + end + + private + + attr_reader :regexp + + # RE2 scan operates differently to Ruby scan when there are no capture + # groups, so work around it + def scan_regexp + @scan_regexp ||= + if regexp.number_of_capturing_groups == 0 + RE2::Regexp.new('(' + regexp.source + ')') + else + regexp + end + end + end +end -- cgit v1.2.3