From 4cf3aae124b5e05e0c1642ee983a1e4e779bea27 Mon Sep 17 00:00:00 2001 From: Brett Walker Date: Mon, 14 Jan 2019 16:57:54 -0600 Subject: Show tooltip for malicious looking links Such as those with IDN homographs or embedded right-to-left (RTLO) characters. Autolinked hrefs should be escaped --- lib/banzai/filter/autolink_filter.rb | 13 +++-- lib/banzai/filter/external_link_filter.rb | 85 ++++++++++++++++++++++++++++--- lib/banzai/pipeline/email_pipeline.rb | 3 +- 3 files changed, 89 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb index deda4b1872e..f3061bad4ff 100644 --- a/lib/banzai/filter/autolink_filter.rb +++ b/lib/banzai/filter/autolink_filter.rb @@ -8,6 +8,10 @@ module Banzai # # Based on HTML::Pipeline::AutolinkFilter # + # Note that our CommonMark parser, `commonmarker` (using the autolink extension) + # handles standard autolinking, like http/https. We detect additional + # schemes (smb, rdar, etc). + # # Context options: # :autolink - Boolean, skips all processing done by this filter when false # :link_attr - Hash of attributes for the generated links @@ -107,10 +111,13 @@ module Banzai end end - # match has come from node.to_html above, so we know it's encoded - # correctly. + # Since this came from a Text node, make sure the new href is encoded. + # `commonmarker` percent encodes the domains of links it handles, so + # do the same (instead of using `normalized_encode`). + href_safe = Addressable::URI.encode(match).html_safe + html_safe_match = match.html_safe - options = link_options.merge(href: html_safe_match) + options = link_options.merge(href: href_safe) content_tag(:a, html_safe_match, options) + dropped end diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb index 4f60b6f84c6..61ee3eac216 100644 --- a/lib/banzai/filter/external_link_filter.rb +++ b/lib/banzai/filter/external_link_filter.rb @@ -4,17 +4,29 @@ module Banzai module Filter # HTML Filter to modify the attributes of external links class ExternalLinkFilter < HTML::Pipeline::Filter - SCHEMES = ['http', 'https', nil].freeze + SCHEMES = ['http', 'https', nil].freeze + RTLO = "\u202E".freeze + ENCODED_RTLO = '%E2%80%AE'.freeze def call links.each do |node| - uri = uri(node['href'].to_s) - - node.set_attribute('href', uri.to_s) if uri + # URI.parse does stricter checking on the url than Addressable, + # such as on `mailto:` links. Since we've been using it, do an + # initial parse for validity and then use Addressable + # for IDN support, etc + uri = uri_strict(node['href'].to_s) + if uri + node.set_attribute('href', uri.to_s) + addressable_uri = addressable_uri(node['href']) + else + addressable_uri = nil + end - if SCHEMES.include?(uri&.scheme) && !internal_url?(uri) - node.set_attribute('rel', 'nofollow noreferrer noopener') - node.set_attribute('target', '_blank') + unless internal_url?(addressable_uri) + punycode_autolink_node!(addressable_uri, node) + sanitize_link_text!(node) + add_malicious_tooltip!(addressable_uri, node) + add_nofollow!(addressable_uri, node) end end @@ -23,12 +35,18 @@ module Banzai private - def uri(href) + def uri_strict(href) URI.parse(href) rescue URI::Error nil end + def addressable_uri(href) + Addressable::URI.parse(href) + rescue Addressable::URI::InvalidURIError + nil + end + def links query = 'descendant-or-self::a[@href and not(@href = "")]' doc.xpath(query) @@ -45,6 +63,57 @@ module Banzai def internal_url @internal_url ||= URI.parse(Gitlab.config.gitlab.url) end + + # Only replace an autolink with an IDN with it's punycode + # version if we need emailable links. Otherwise let it + # be shown normally and the tooltips will show the + # punycode version. + def punycode_autolink_node!(uri, node) + return unless uri + return unless context[:emailable_links] + + unencoded_uri_str = Addressable::URI.unencode(node['href']) + + if unencoded_uri_str == node.content && idn?(uri) + node.content = uri.normalize + end + end + + # escape any right-to-left (RTLO) characters in link text + def sanitize_link_text!(node) + node.inner_html = node.inner_html.gsub(RTLO, ENCODED_RTLO) + end + + # If the domain is an international domain name (IDN), + # let's expose with a tooltip in case it's intended + # to be malicious. This is particularly useful for links + # where the link text is not the same as the actual link. + # We will continue to show the unicode version of the domain + # in autolinked link text, which could contain emojis, etc. + # + # Also show the tooltip if the url contains the RTLO character, + # as this is an indicator of a malicious link + def add_malicious_tooltip!(uri, node) + if idn?(uri) || has_encoded_rtlo?(uri) + node.add_class('has-tooltip') + node.set_attribute('title', uri.normalize) + end + end + + def add_nofollow!(uri, node) + if SCHEMES.include?(uri&.scheme) + node.set_attribute('rel', 'nofollow noreferrer noopener') + node.set_attribute('target', '_blank') + end + end + + def idn?(uri) + uri&.normalized_host&.start_with?('xn--') + end + + def has_encoded_rtlo?(uri) + uri&.to_s&.include?(ENCODED_RTLO) + end end end end diff --git a/lib/banzai/pipeline/email_pipeline.rb b/lib/banzai/pipeline/email_pipeline.rb index 2c08581ce0d..fc51063c06c 100644 --- a/lib/banzai/pipeline/email_pipeline.rb +++ b/lib/banzai/pipeline/email_pipeline.rb @@ -11,7 +11,8 @@ module Banzai def self.transform_context(context) super(context).merge( - only_path: false + only_path: false, + emailable_links: true ) end end -- cgit v1.2.3