diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2020-11-19 11:27:35 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2020-11-19 11:27:35 +0300 |
commit | 7e9c479f7de77702622631cff2628a9c8dcbc627 (patch) | |
tree | c8f718a08e110ad7e1894510980d2155a6549197 /lib/gitlab/robots_txt | |
parent | e852b0ae16db4052c1c567d9efa4facc81146e88 (diff) |
Add latest changes from gitlab-org/gitlab@13-6-stable-eev13.6.0-rc42
Diffstat (limited to 'lib/gitlab/robots_txt')
-rw-r--r-- | lib/gitlab/robots_txt/parser.rb | 60 |
1 files changed, 47 insertions, 13 deletions
diff --git a/lib/gitlab/robots_txt/parser.rb b/lib/gitlab/robots_txt/parser.rb index b9a3837e468..604d2f9b35b 100644 --- a/lib/gitlab/robots_txt/parser.rb +++ b/lib/gitlab/robots_txt/parser.rb @@ -3,34 +3,68 @@ module Gitlab module RobotsTxt class Parser - attr_reader :disallow_rules + DISALLOW_REGEX = /^disallow: /i.freeze + ALLOW_REGEX = /^allow: /i.freeze + + attr_reader :disallow_rules, :allow_rules def initialize(content) @raw_content = content - @disallow_rules = parse_raw_content! + @disallow_rules, @allow_rules = parse_raw_content! end def disallowed?(path) + return false if allow_rules.any? { |rule| path =~ rule } + disallow_rules.any? { |rule| path =~ rule } end private - # This parser is very basic as it only knows about `Disallow:` lines, - # and simply ignores all other lines. + # This parser is very basic as it only knows about `Disallow:` + # and `Allow:` lines, and simply ignores all other lines. # - # Order of predecence, 'Allow:`, etc are ignored for now. + # Patterns ending in `$`, and `*` for 0 or more characters are recognized. + # + # It is case insensitive and `Allow` rules takes precedence + # over `Disallow`. def parse_raw_content! - @raw_content.each_line.map do |line| - if line.start_with?('Disallow:') - value = line.sub('Disallow:', '').strip - value = Regexp.escape(value).gsub('\*', '.*') - Regexp.new("^#{value}") - else - nil + disallowed = [] + allowed = [] + + @raw_content.each_line.each do |line| + if disallow_rule?(line) + disallowed << get_disallow_pattern(line) + elsif allow_rule?(line) + allowed << get_allow_pattern(line) end - end.compact + end + + [disallowed, allowed] + end + + def disallow_rule?(line) + line =~ DISALLOW_REGEX + end + + def get_disallow_pattern(line) + get_pattern(line, DISALLOW_REGEX) + end + + def allow_rule?(line) + line =~ ALLOW_REGEX + end + + def get_allow_pattern(line) + get_pattern(line, ALLOW_REGEX) + end + + def get_pattern(line, rule_regex) + value = line.sub(rule_regex, '').strip + value = Regexp.escape(value).gsub('\*', '.*') + value = value.sub(/\\\$$/, '$') + Regexp.new("^#{value}") end end end |