Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-docs.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGrzegorz Bizon <grzesiek.bizon@gmail.com>2019-02-04 16:09:20 +0300
committerGrzegorz Bizon <grzesiek.bizon@gmail.com>2019-02-05 19:36:48 +0300
commit69606cd957fdc69b41eedb8a957b2d65ff72b254 (patch)
tree7feb9050ea8d7599ca9558d7e527f8dc82753921 /lib/checks
parent91bd72255e13525bdcee92f74bc5eed5f35e3dce (diff)
Use Nokogiri SAX parsing to reduce problems with memory footprint
Diffstat (limited to 'lib/checks')
-rw-r--r--lib/checks/anchors.rb70
1 files changed, 58 insertions, 12 deletions
diff --git a/lib/checks/anchors.rb b/lib/checks/anchors.rb
index 7c56eff4..9d6121d1 100644
--- a/lib/checks/anchors.rb
+++ b/lib/checks/anchors.rb
@@ -10,11 +10,63 @@ module Gitlab
end
end
+ class Element
+ def initialize(name, attributes)
+ @name = name
+ @attributes = attributes
+ end
+
+ def link?
+ @name == 'a' && !href.to_s.empty?
+ end
+
+ def has_id?
+ !id.to_s.empty?
+ end
+
+ def href
+ @href ||= attribute('href')
+ end
+
+ def id
+ @id ||= attribute('id')
+ end
+
+ private
+
+ def attribute(name)
+ @attributes.find { |attr| attr.first == name }&.last
+ end
+ end
+
+ class Document < Nokogiri::XML::SAX::Document
+ def initialize(page)
+ @page = page
+ end
+
+ def start_element(name, attributes = [])
+ Gitlab::Docs::Element.new(name, attributes).tap do |element|
+ @page.hrefs << element.href if element.link?
+ @page.ids << element.id if element.has_id?
+ end
+ end
+ end
+
class Page
attr_reader :file
+ attr_accessor :hrefs, :ids
def initialize(file)
@file = file
+
+ @hrefs = []
+ @ids = []
+
+ return unless exists?
+
+ Nokogiri::HTML::SAX::Parser
+ .new(Gitlab::Docs::Document.new(self))
+ .parse(File.read(file))
end
def exists?
@@ -31,20 +83,14 @@ module Gitlab
@content ||= File.read(@file)
end
- def document
- raise if content.to_s.empty?
-
- @doc ||= Nokogiri::HTML(content)
- end
-
def links
- @links ||= document.css(:a).map do |link|
+ @links ||= @hrefs.map do |link|
Gitlab::Docs::Link.new(link, self)
end
end
def has_anchor?(name)
- document.at_css(%Q{[id="#{name}"]})
+ @ids.include?(name)
end
def self.build(path)
@@ -60,8 +106,7 @@ module Gitlab
attr_reader :link, :href, :page
def initialize(link, page)
- @link = link
- @href = link[:href]
+ @href = link
@page = page
end
@@ -72,7 +117,7 @@ module Gitlab
def anchor_name
raise ArguentError unless to_anchor?
- @href.to_s.partition('#').last
+ @href.to_s.partition('#').last.downcase
end
def internal_anchor?
@@ -148,8 +193,9 @@ Nanoc::Check.define(:internal_anchors) do
- source file `#{link.source_file}`
- destination `#{link.destination_file}`
ERROR
-
end
end
end
+
+ add_issue "#{issues.count} offenses found!"
end