diff options
author | Grzegorz Bizon <grzesiek.bizon@gmail.com> | 2019-02-04 16:09:20 +0300 |
---|---|---|
committer | Grzegorz Bizon <grzesiek.bizon@gmail.com> | 2019-02-05 19:36:48 +0300 |
commit | 69606cd957fdc69b41eedb8a957b2d65ff72b254 (patch) | |
tree | 7feb9050ea8d7599ca9558d7e527f8dc82753921 /lib/checks | |
parent | 91bd72255e13525bdcee92f74bc5eed5f35e3dce (diff) |
Use Nokogiri SAX parsing to reduce problems with memory footprint
Diffstat (limited to 'lib/checks')
-rw-r--r-- | lib/checks/anchors.rb | 70 |
1 files changed, 58 insertions, 12 deletions
diff --git a/lib/checks/anchors.rb b/lib/checks/anchors.rb index 7c56eff4..9d6121d1 100644 --- a/lib/checks/anchors.rb +++ b/lib/checks/anchors.rb @@ -10,11 +10,63 @@ module Gitlab end end + class Element + def initialize(name, attributes) + @name = name + @attributes = attributes + end + + def link? + @name == 'a' && !href.to_s.empty? + end + + def has_id? + !id.to_s.empty? + end + + def href + @href ||= attribute('href') + end + + def id + @id ||= attribute('id') + end + + private + + def attribute(name) + @attributes.find { |attr| attr.first == name }&.last + end + end + + class Document < Nokogiri::XML::SAX::Document + def initialize(page) + @page = page + end + + def start_element(name, attributes = []) + Gitlab::Docs::Element.new(name, attributes).tap do |element| + @page.hrefs << element.href if element.link? + @page.ids << element.id if element.has_id? + end + end + end + class Page attr_reader :file + attr_accessor :hrefs, :ids def initialize(file) @file = file + + @hrefs = [] + @ids = [] + + return unless exists? + + Nokogiri::HTML::SAX::Parser + .new(Gitlab::Docs::Document.new(self)) + .parse(File.read(file)) end def exists? @@ -31,20 +83,14 @@ module Gitlab @content ||= File.read(@file) end - def document - raise if content.to_s.empty? - - @doc ||= Nokogiri::HTML(content) - end - def links - @links ||= document.css(:a).map do |link| + @links ||= @hrefs.map do |link| Gitlab::Docs::Link.new(link, self) end end def has_anchor?(name) - document.at_css(%Q{[id="#{name}"]}) + @ids.include?(name) end def self.build(path) @@ -60,8 +106,7 @@ module Gitlab attr_reader :link, :href, :page def initialize(link, page) - @link = link - @href = link[:href] + @href = link @page = page end @@ -72,7 +117,7 @@ module Gitlab def anchor_name raise ArguentError unless to_anchor? - @href.to_s.partition('#').last + @href.to_s.partition('#').last.downcase end def internal_anchor? @@ -148,8 +193,9 @@ Nanoc::Check.define(:internal_anchors) do - source file `#{link.source_file}` - destination `#{link.destination_file}` ERROR - end end end + + add_issue "#{issues.count} offenses found!" end |