Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/robots_txt/parser.rb')
-rw-r--r--lib/gitlab/robots_txt/parser.rb37
1 files changed, 37 insertions, 0 deletions
diff --git a/lib/gitlab/robots_txt/parser.rb b/lib/gitlab/robots_txt/parser.rb
new file mode 100644
index 00000000000..b9a3837e468
--- /dev/null
+++ b/lib/gitlab/robots_txt/parser.rb
@@ -0,0 +1,37 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module RobotsTxt
+ class Parser
+ attr_reader :disallow_rules
+
+ def initialize(content)
+ @raw_content = content
+
+ @disallow_rules = parse_raw_content!
+ end
+
+ def disallowed?(path)
+ disallow_rules.any? { |rule| path =~ rule }
+ end
+
+ private
+
+ # This parser is very basic as it only knows about `Disallow:` lines,
+ # and simply ignores all other lines.
+ #
+ # Order of predecence, 'Allow:`, etc are ignored for now.
+ def parse_raw_content!
+ @raw_content.each_line.map do |line|
+ if line.start_with?('Disallow:')
+ value = line.sub('Disallow:', '').strip
+ value = Regexp.escape(value).gsub('\*', '.*')
+ Regexp.new("^#{value}")
+ else
+ nil
+ end
+ end.compact
+ end
+ end
+ end
+end