Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/robots_txt/parser.rb')
-rw-r--r--lib/gitlab/robots_txt/parser.rb60
1 files changed, 47 insertions, 13 deletions
diff --git a/lib/gitlab/robots_txt/parser.rb b/lib/gitlab/robots_txt/parser.rb
index b9a3837e468..604d2f9b35b 100644
--- a/lib/gitlab/robots_txt/parser.rb
+++ b/lib/gitlab/robots_txt/parser.rb
@@ -3,34 +3,68 @@
module Gitlab
module RobotsTxt
class Parser
- attr_reader :disallow_rules
+ DISALLOW_REGEX = /^disallow: /i.freeze
+ ALLOW_REGEX = /^allow: /i.freeze
+
+ attr_reader :disallow_rules, :allow_rules
def initialize(content)
@raw_content = content
- @disallow_rules = parse_raw_content!
+ @disallow_rules, @allow_rules = parse_raw_content!
end
def disallowed?(path)
+ return false if allow_rules.any? { |rule| path =~ rule }
+
disallow_rules.any? { |rule| path =~ rule }
end
private
- # This parser is very basic as it only knows about `Disallow:` lines,
- # and simply ignores all other lines.
+ # This parser is very basic as it only knows about `Disallow:`
+ # and `Allow:` lines, and simply ignores all other lines.
#
- # Order of predecence, 'Allow:`, etc are ignored for now.
+ # Patterns ending in `$`, and `*` for 0 or more characters are recognized.
+ #
+ # It is case insensitive and `Allow` rules takes precedence
+ # over `Disallow`.
def parse_raw_content!
- @raw_content.each_line.map do |line|
- if line.start_with?('Disallow:')
- value = line.sub('Disallow:', '').strip
- value = Regexp.escape(value).gsub('\*', '.*')
- Regexp.new("^#{value}")
- else
- nil
+ disallowed = []
+ allowed = []
+
+ @raw_content.each_line.each do |line|
+ if disallow_rule?(line)
+ disallowed << get_disallow_pattern(line)
+ elsif allow_rule?(line)
+ allowed << get_allow_pattern(line)
end
- end.compact
+ end
+
+ [disallowed, allowed]
+ end
+
+ def disallow_rule?(line)
+ line =~ DISALLOW_REGEX
+ end
+
+ def get_disallow_pattern(line)
+ get_pattern(line, DISALLOW_REGEX)
+ end
+
+ def allow_rule?(line)
+ line =~ ALLOW_REGEX
+ end
+
+ def get_allow_pattern(line)
+ get_pattern(line, ALLOW_REGEX)
+ end
+
+ def get_pattern(line, rule_regex)
+ value = line.sub(rule_regex, '').strip
+ value = Regexp.escape(value).gsub('\*', '.*')
+ value = value.sub(/\\\$$/, '$')
+ Regexp.new("^#{value}")
end
end
end