Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2015-08-22 02:09:55 +0300
committerDouwe Maan <douwe@gitlab.com>2015-08-22 02:09:55 +0300
commit15fc7bd6139f0b429c05c055b4cfab561c926e08 (patch)
tree5ae7287c076fe74fbab3140b22dcacdd2f96c460 /lib/gitlab
parent3abb356dd215235765f89c78c98655dc62688b77 (diff)
No HTML-only email please
Diffstat (limited to 'lib/gitlab')
-rw-r--r--lib/gitlab/email/html_cleaner.rb135
-rw-r--r--lib/gitlab/email/reply_parser.rb24
2 files changed, 6 insertions, 153 deletions
diff --git a/lib/gitlab/email/html_cleaner.rb b/lib/gitlab/email/html_cleaner.rb
deleted file mode 100644
index e1ae9eee56c..00000000000
--- a/lib/gitlab/email/html_cleaner.rb
+++ /dev/null
@@ -1,135 +0,0 @@
-# Taken mostly from Discourse's Email::HtmlCleaner
-module Gitlab
- module Email
- # HtmlCleaner cleans up the extremely dirty HTML that many email clients
- # generate by stripping out any excess divs or spans, removing styling in
- # the process (which also makes the html more suitable to be parsed as
- # Markdown).
- class HtmlCleaner
- # Elements to hoist all children out of
- HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
- # Node types to always delete
- HTML_DELETE_ELEMENT_TYPES = [
- Nokogiri::XML::Node::DTD_NODE,
- Nokogiri::XML::Node::COMMENT_NODE,
- ]
-
- # Private variables:
- # @doc - nokogiri document
- # @out - same as @doc, but only if trimming has occured
- def initialize(html)
- if html.is_a?(String)
- @doc = Nokogiri::HTML(html)
- else
- @doc = html
- end
- end
-
- class << self
- # HtmlCleaner.trim(inp, opts={})
- #
- # Arguments:
- # inp - Either a HTML string or a Nokogiri document.
- # Options:
- # :return => :doc, :string
- # Specify the desired return type.
- # Defaults to the type of the input.
- # A value of :string is equivalent to calling get_document_text()
- # on the returned document.
- def trim(inp, opts={})
- cleaner = HtmlCleaner.new(inp)
-
- opts[:return] ||= (inp.is_a?(String) ? :string : :doc)
-
- if opts[:return] == :string
- cleaner.output_html
- else
- cleaner.output_document
- end
- end
-
- # HtmlCleaner.get_document_text(doc)
- #
- # Get the body portion of the document, including html, as a string.
- def get_document_text(doc)
- body = doc.xpath('//body')
- if body
- body.inner_html
- else
- doc.inner_html
- end
- end
- end
-
- def output_document
- @out ||= begin
- doc = @doc
- trim_process_node doc
- add_newlines doc
- doc
- end
- end
-
- def output_html
- HtmlCleaner.get_document_text(output_document)
- end
-
- private
-
- def add_newlines(doc)
- # Replace <br> tags with a markdown \n
- doc.xpath('//br').each do |br|
- br.replace(new_linebreak_node doc, 2)
- end
- # Surround <p> tags with newlines, to help with line-wise postprocessing
- # and ensure markdown paragraphs
- doc.xpath('//p').each do |p|
- p.before(new_linebreak_node doc)
- p.after(new_linebreak_node doc, 2)
- end
- end
-
- def new_linebreak_node(doc, count=1)
- Nokogiri::XML::Text.new("\n" * count, doc)
- end
-
- def trim_process_node(node)
- if should_hoist?(node)
- hoisted = trim_hoist_element node
- hoisted.each { |child| trim_process_node child }
- elsif should_delete?(node)
- node.remove
- else
- if children = node.children
- children.each { |child| trim_process_node child }
- end
- end
-
- node
- end
-
- def trim_hoist_element(element)
- hoisted = []
- element.children.each do |child|
- element.before(child)
- hoisted << child
- end
- element.remove
- hoisted
- end
-
- def should_hoist?(node)
- return false unless node.element?
- HTML_HOIST_ELEMENTS.include? node.name
- end
-
- def should_delete?(node)
- return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
- return true if node.element? && node.name == 'head'
- return true if node.text? && node.text.strip.blank?
-
- false
- end
- end
- end
-end
diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb
index 6e768e46a71..6ed36b51f12 100644
--- a/lib/gitlab/email/reply_parser.rb
+++ b/lib/gitlab/email/reply_parser.rb
@@ -23,31 +23,19 @@ module Gitlab
private
def select_body(message)
- html = nil
- text = nil
-
- if message.multipart?
- html = fix_charset(message.html_part)
- text = fix_charset(message.text_part)
- elsif message.content_type =~ /text\/html/
- html = fix_charset(message)
- end
+ text = message.text_part if message.multipart?
+ text ||= message if message.content_type !~ /text\/html/
- # prefer plain text
- return text if text
+ return "" unless text
- if html
- body = HtmlCleaner.new(html).output_html
- else
- body = fix_charset(message)
- end
+ text = fix_charset(text)
# Certain trigger phrases that means we didn't parse correctly
- if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
+ if text =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
return ""
end
- body
+ text
end
# Force encoding to UTF-8 on a Mail::Message or Mail::Part