From 15fc7bd6139f0b429c05c055b4cfab561c926e08 Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Fri, 21 Aug 2015 16:09:55 -0700 Subject: No HTML-only email please --- lib/gitlab/email/html_cleaner.rb | 135 --------------------------------------- lib/gitlab/email/reply_parser.rb | 24 ++----- 2 files changed, 6 insertions(+), 153 deletions(-) delete mode 100644 lib/gitlab/email/html_cleaner.rb (limited to 'lib/gitlab') diff --git a/lib/gitlab/email/html_cleaner.rb b/lib/gitlab/email/html_cleaner.rb deleted file mode 100644 index e1ae9eee56c..00000000000 --- a/lib/gitlab/email/html_cleaner.rb +++ /dev/null @@ -1,135 +0,0 @@ -# Taken mostly from Discourse's Email::HtmlCleaner -module Gitlab - module Email - # HtmlCleaner cleans up the extremely dirty HTML that many email clients - # generate by stripping out any excess divs or spans, removing styling in - # the process (which also makes the html more suitable to be parsed as - # Markdown). - class HtmlCleaner - # Elements to hoist all children out of - HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td) - # Node types to always delete - HTML_DELETE_ELEMENT_TYPES = [ - Nokogiri::XML::Node::DTD_NODE, - Nokogiri::XML::Node::COMMENT_NODE, - ] - - # Private variables: - # @doc - nokogiri document - # @out - same as @doc, but only if trimming has occured - def initialize(html) - if html.is_a?(String) - @doc = Nokogiri::HTML(html) - else - @doc = html - end - end - - class << self - # HtmlCleaner.trim(inp, opts={}) - # - # Arguments: - # inp - Either a HTML string or a Nokogiri document. - # Options: - # :return => :doc, :string - # Specify the desired return type. - # Defaults to the type of the input. - # A value of :string is equivalent to calling get_document_text() - # on the returned document. - def trim(inp, opts={}) - cleaner = HtmlCleaner.new(inp) - - opts[:return] ||= (inp.is_a?(String) ? :string : :doc) - - if opts[:return] == :string - cleaner.output_html - else - cleaner.output_document - end - end - - # HtmlCleaner.get_document_text(doc) - # - # Get the body portion of the document, including html, as a string. - def get_document_text(doc) - body = doc.xpath('//body') - if body - body.inner_html - else - doc.inner_html - end - end - end - - def output_document - @out ||= begin - doc = @doc - trim_process_node doc - add_newlines doc - doc - end - end - - def output_html - HtmlCleaner.get_document_text(output_document) - end - - private - - def add_newlines(doc) - # Replace
tags with a markdown \n - doc.xpath('//br').each do |br| - br.replace(new_linebreak_node doc, 2) - end - # Surround

tags with newlines, to help with line-wise postprocessing - # and ensure markdown paragraphs - doc.xpath('//p').each do |p| - p.before(new_linebreak_node doc) - p.after(new_linebreak_node doc, 2) - end - end - - def new_linebreak_node(doc, count=1) - Nokogiri::XML::Text.new("\n" * count, doc) - end - - def trim_process_node(node) - if should_hoist?(node) - hoisted = trim_hoist_element node - hoisted.each { |child| trim_process_node child } - elsif should_delete?(node) - node.remove - else - if children = node.children - children.each { |child| trim_process_node child } - end - end - - node - end - - def trim_hoist_element(element) - hoisted = [] - element.children.each do |child| - element.before(child) - hoisted << child - end - element.remove - hoisted - end - - def should_hoist?(node) - return false unless node.element? - HTML_HOIST_ELEMENTS.include? node.name - end - - def should_delete?(node) - return true if HTML_DELETE_ELEMENT_TYPES.include? node.type - return true if node.element? && node.name == 'head' - return true if node.text? && node.text.strip.blank? - - false - end - end - end -end diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb index 6e768e46a71..6ed36b51f12 100644 --- a/lib/gitlab/email/reply_parser.rb +++ b/lib/gitlab/email/reply_parser.rb @@ -23,31 +23,19 @@ module Gitlab private def select_body(message) - html = nil - text = nil - - if message.multipart? - html = fix_charset(message.html_part) - text = fix_charset(message.text_part) - elsif message.content_type =~ /text\/html/ - html = fix_charset(message) - end + text = message.text_part if message.multipart? + text ||= message if message.content_type !~ /text\/html/ - # prefer plain text - return text if text + return "" unless text - if html - body = HtmlCleaner.new(html).output_html - else - body = fix_charset(message) - end + text = fix_charset(text) # Certain trigger phrases that means we didn't parse correctly - if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/ + if text =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/ return "" end - body + text end # Force encoding to UTF-8 on a Mail::Message or Mail::Part -- cgit v1.2.3