From d2aa46b71e7060524ed47d9a1b9fbb7c6ec0127b Mon Sep 17 00:00:00 2001 From: Sean McGivern Date: Tue, 18 Oct 2016 20:49:51 +0000 Subject: Merge branch '22782-external-link-filter-with-non-lowercase-scheme' into 'master' Add Nofollow for uppercased external url protocols Closes #22782 See merge request !6820 --- CHANGELOG.md | 1 + lib/banzai/filter/external_link_filter.rb | 34 +++++++++++++++++++--- .../lib/banzai/filter/external_link_filter_spec.rb | 34 ++++++++++++++++++++++ 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c09957ba8ed..a816e30a9c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -119,6 +119,7 @@ Please view this file on the master branch, on stable branches it's out of date. - Retouch environments list and deployments list - Add multiple command support for all label related slash commands !6780 (barthc) - Add Container Registry on/off status to Admin Area !6638 (the-undefined) + - Add Nofollow for uppercased scheme in external urls !6820 (the-undefined) - Allow empty merge requests !6384 (Artem Sidorenko) - Grouped pipeline dropdown is a scrollable container - Cleanup Ci::ApplicationController. !6757 (Takuya Noguchi) diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb index 0a29c547a4d..2f19b59e725 100644 --- a/lib/banzai/filter/external_link_filter.rb +++ b/lib/banzai/filter/external_link_filter.rb @@ -3,10 +3,17 @@ module Banzai # HTML Filter to modify the attributes of external links class ExternalLinkFilter < HTML::Pipeline::Filter def call - # Skip non-HTTP(S) links and internal links - doc.xpath("descendant-or-self::a[starts-with(@href, 'http') and not(starts-with(@href, '#{internal_url}'))]").each do |node| - node.set_attribute('rel', 'nofollow noreferrer') - node.set_attribute('target', '_blank') + links.each do |node| + href = href_to_lowercase_scheme(node["href"].to_s) + + unless node["href"].to_s == href + node.set_attribute('href', href) + end + + if href =~ /\Ahttp(s)?:\/\// && external_url?(href) + node.set_attribute('rel', 'nofollow noreferrer') + node.set_attribute('target', '_blank') + end end doc @@ -14,6 +21,25 @@ module Banzai private + def links + query = 'descendant-or-self::a[@href and not(@href = "")]' + doc.xpath(query) + end + + def href_to_lowercase_scheme(href) + scheme_match = href.match(/\A(\w+):\/\//) + + if scheme_match + scheme_match.to_s.downcase + scheme_match.post_match + else + href + end + end + + def external_url?(url) + !url.start_with?(internal_url) + end + def internal_url @internal_url ||= Gitlab.config.gitlab.url end diff --git a/spec/lib/banzai/filter/external_link_filter_spec.rb b/spec/lib/banzai/filter/external_link_filter_spec.rb index 695a5bc6fd4..167397c736b 100644 --- a/spec/lib/banzai/filter/external_link_filter_spec.rb +++ b/spec/lib/banzai/filter/external_link_filter_spec.rb @@ -46,4 +46,38 @@ describe Banzai::Filter::ExternalLinkFilter, lib: true do expect(doc.at_css('a')['rel']).to include 'noreferrer' end end + + context 'for non-lowercase scheme links' do + let(:doc_with_http) { filter %q(

Google

) } + let(:doc_with_https) { filter %q(

Google

) } + + it 'adds rel="nofollow" to external links' do + expect(doc_with_http.at_css('a')).to have_attribute('rel') + expect(doc_with_https.at_css('a')).to have_attribute('rel') + + expect(doc_with_http.at_css('a')['rel']).to include 'nofollow' + expect(doc_with_https.at_css('a')['rel']).to include 'nofollow' + end + + it 'adds rel="noreferrer" to external links' do + expect(doc_with_http.at_css('a')).to have_attribute('rel') + expect(doc_with_https.at_css('a')).to have_attribute('rel') + + expect(doc_with_http.at_css('a')['rel']).to include 'noreferrer' + expect(doc_with_https.at_css('a')['rel']).to include 'noreferrer' + end + + it 'skips internal links' do + internal_link = Gitlab.config.gitlab.url + "/sign_in" + url = internal_link.gsub(/\Ahttp/, 'HtTp') + act = %Q(Login) + exp = %Q(Login) + expect(filter(act).to_html).to eq(exp) + end + + it 'skips relative links' do + exp = act = %q(Relative URL) + expect(filter(act).to_html).to eq(exp) + end + end end -- cgit v1.2.3