No HTML-only email please

author: Douwe Maan <douwe@gitlab.com> 2015-08-22 02:09:55 +0300
committer: Douwe Maan <douwe@gitlab.com> 2015-08-22 02:09:55 +0300
commit: 15fc7bd6139f0b429c05c055b4cfab561c926e08 (patch)
tree: 5ae7287c076fe74fbab3140b22dcacdd2f96c460 /lib/gitlab
parent: 3abb356dd215235765f89c78c98655dc62688b77 (diff)
2 files changed, 6 insertions, 153 deletions
diff --git a/lib/gitlab/email/html_cleaner.rb b/lib/gitlab/email/html_cleaner.rb
deleted file mode 100644
index e1ae9eee56c..00000000000
--- a/lib/gitlab/email/html_cleaner.rb
+++ /dev/null
@@ -1,135 +0,0 @@
-# Taken mostly from Discourse's Email::HtmlCleaner
-module Gitlab
-  module Email
-    # HtmlCleaner cleans up the extremely dirty HTML that many email clients
-    # generate by stripping out any excess divs or spans, removing styling in
-    # the process (which also makes the html more suitable to be parsed as
-    # Markdown).
-    class HtmlCleaner
-      # Elements to hoist all children out of
-      HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
-      # Node types to always delete
-      HTML_DELETE_ELEMENT_TYPES = [
-        Nokogiri::XML::Node::DTD_NODE,
-        Nokogiri::XML::Node::COMMENT_NODE,
-      ]
-
-      # Private variables:
-      #   @doc - nokogiri document
-      #   @out - same as @doc, but only if trimming has occured
-      def initialize(html)
-        if html.is_a?(String)
-          @doc = Nokogiri::HTML(html)
-        else
-          @doc = html
-        end
-      end
-
-      class << self
-        # HtmlCleaner.trim(inp, opts={})
-        #
-        # Arguments:
-        #   inp - Either a HTML string or a Nokogiri document.
-        # Options:
-        #   :return => :doc, :string
-        #     Specify the desired return type.
-        #     Defaults to the type of the input.
-        #     A value of :string is equivalent to calling get_document_text()
-        #     on the returned document.
-        def trim(inp, opts={})
-          cleaner = HtmlCleaner.new(inp)
-
-          opts[:return] ||= (inp.is_a?(String) ? :string : :doc)
-
-          if opts[:return] == :string
-            cleaner.output_html
-          else
-            cleaner.output_document
-          end
-        end
-
-        # HtmlCleaner.get_document_text(doc)
-        #
-        # Get the body portion of the document, including html, as a string.
-        def get_document_text(doc)
-          body = doc.xpath('//body')
-          if body
-            body.inner_html
-          else
-            doc.inner_html
-          end
-        end
-      end
-
-      def output_document
-        @out ||= begin
-          doc = @doc
-          trim_process_node doc
-          add_newlines doc
-          doc
-        end
-      end
-
-      def output_html
-        HtmlCleaner.get_document_text(output_document)
-      end
-
-      private
-
-      def add_newlines(doc)
-        # Replace <br> tags with a markdown \n
-        doc.xpath('//br').each do |br|
-          br.replace(new_linebreak_node doc, 2)
-        end
-        # Surround <p> tags with newlines, to help with line-wise postprocessing
-        # and ensure markdown paragraphs
-        doc.xpath('//p').each do |p|
-          p.before(new_linebreak_node doc)
-          p.after(new_linebreak_node doc, 2)
-        end
-      end
-
-      def new_linebreak_node(doc, count=1)
-        Nokogiri::XML::Text.new("\n" * count, doc)
-      end
-
-      def trim_process_node(node)
-        if should_hoist?(node)
-          hoisted = trim_hoist_element node
-          hoisted.each { |child| trim_process_node child }
-        elsif should_delete?(node)
-          node.remove
-        else
-          if children = node.children
-            children.each { |child| trim_process_node child }
-          end
-        end
-
-        node
-      end
-
-      def trim_hoist_element(element)
-        hoisted = []
-        element.children.each do |child|
-          element.before(child)
-          hoisted << child
-        end
-        element.remove
-        hoisted
-      end
-
-      def should_hoist?(node)
-        return false unless node.element?
-        HTML_HOIST_ELEMENTS.include? node.name
-      end
-
-      def should_delete?(node)
-        return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
-        return true if node.element? && node.name == 'head'
-        return true if node.text? && node.text.strip.blank?
-
-        false
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb
index 6e768e46a71..6ed36b51f12 100644
--- a/lib/gitlab/email/reply_parser.rb
+++ b/lib/gitlab/email/reply_parser.rb
@@ -23,31 +23,19 @@ module Gitlab
       private
 
       def select_body(message)
-        html = nil
-        text = nil
-
-        if message.multipart?
-          html = fix_charset(message.html_part)
-          text = fix_charset(message.text_part)
-        elsif message.content_type =~ /text\/html/
-          html = fix_charset(message)
-        end
+        text    = message.text_part if message.multipart?
+        text  ||= message           if message.content_type !~ /text\/html/
 
-        # prefer plain text
-        return text if text
+        return "" unless text
 
-        if html
-          body = HtmlCleaner.new(html).output_html
-        else
-          body = fix_charset(message)
-        end
+        text = fix_charset(text)
 
         # Certain trigger phrases that means we didn't parse correctly
-        if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
+        if text =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
           return ""
         end
 
-        body
+        text
       end
 
       # Force encoding to UTF-8 on a Mail::Message or Mail::Part
author	Douwe Maan <douwe@gitlab.com>	2015-08-22 02:09:55 +0300
committer	Douwe Maan <douwe@gitlab.com>	2015-08-22 02:09:55 +0300
commit	15fc7bd6139f0b429c05c055b4cfab561c926e08 (patch)
tree	5ae7287c076fe74fbab3140b22dcacdd2f96c460 /lib/gitlab
parent	3abb356dd215235765f89c78c98655dc62688b77 (diff)