diff options
Diffstat (limited to 'lib/gitlab/robots_txt/parser.rb')
-rw-r--r-- | lib/gitlab/robots_txt/parser.rb | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/lib/gitlab/robots_txt/parser.rb b/lib/gitlab/robots_txt/parser.rb new file mode 100644 index 00000000000..b9a3837e468 --- /dev/null +++ b/lib/gitlab/robots_txt/parser.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module Gitlab + module RobotsTxt + class Parser + attr_reader :disallow_rules + + def initialize(content) + @raw_content = content + + @disallow_rules = parse_raw_content! + end + + def disallowed?(path) + disallow_rules.any? { |rule| path =~ rule } + end + + private + + # This parser is very basic as it only knows about `Disallow:` lines, + # and simply ignores all other lines. + # + # Order of predecence, 'Allow:`, etc are ignored for now. + def parse_raw_content! + @raw_content.each_line.map do |line| + if line.start_with?('Disallow:') + value = line.sub('Disallow:', '').strip + value = Regexp.escape(value).gsub('\*', '.*') + Regexp.new("^#{value}") + else + nil + end + end.compact + end + end + end +end |