Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2015-08-22 02:09:55 +0300
committerDouwe Maan <douwe@gitlab.com>2015-08-22 02:09:55 +0300
commit15fc7bd6139f0b429c05c055b4cfab561c926e08 (patch)
tree5ae7287c076fe74fbab3140b22dcacdd2f96c460
parent3abb356dd215235765f89c78c98655dc62688b77 (diff)
No HTML-only email please
-rw-r--r--lib/gitlab/email/html_cleaner.rb135
-rw-r--r--lib/gitlab/email/reply_parser.rb24
-rw-r--r--spec/fixtures/emails/dutch.eml2
-rw-r--r--spec/fixtures/emails/html_only.eml93
-rw-r--r--spec/fixtures/emails/plaintext_only.eml42
-rw-r--r--spec/lib/gitlab/email/reply_parser_spec.rb19
6 files changed, 65 insertions, 250 deletions
diff --git a/lib/gitlab/email/html_cleaner.rb b/lib/gitlab/email/html_cleaner.rb
deleted file mode 100644
index e1ae9eee56c..00000000000
--- a/lib/gitlab/email/html_cleaner.rb
+++ /dev/null
@@ -1,135 +0,0 @@
-# Taken mostly from Discourse's Email::HtmlCleaner
-module Gitlab
- module Email
- # HtmlCleaner cleans up the extremely dirty HTML that many email clients
- # generate by stripping out any excess divs or spans, removing styling in
- # the process (which also makes the html more suitable to be parsed as
- # Markdown).
- class HtmlCleaner
- # Elements to hoist all children out of
- HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td)
- # Node types to always delete
- HTML_DELETE_ELEMENT_TYPES = [
- Nokogiri::XML::Node::DTD_NODE,
- Nokogiri::XML::Node::COMMENT_NODE,
- ]
-
- # Private variables:
- # @doc - nokogiri document
- # @out - same as @doc, but only if trimming has occured
- def initialize(html)
- if html.is_a?(String)
- @doc = Nokogiri::HTML(html)
- else
- @doc = html
- end
- end
-
- class << self
- # HtmlCleaner.trim(inp, opts={})
- #
- # Arguments:
- # inp - Either a HTML string or a Nokogiri document.
- # Options:
- # :return => :doc, :string
- # Specify the desired return type.
- # Defaults to the type of the input.
- # A value of :string is equivalent to calling get_document_text()
- # on the returned document.
- def trim(inp, opts={})
- cleaner = HtmlCleaner.new(inp)
-
- opts[:return] ||= (inp.is_a?(String) ? :string : :doc)
-
- if opts[:return] == :string
- cleaner.output_html
- else
- cleaner.output_document
- end
- end
-
- # HtmlCleaner.get_document_text(doc)
- #
- # Get the body portion of the document, including html, as a string.
- def get_document_text(doc)
- body = doc.xpath('//body')
- if body
- body.inner_html
- else
- doc.inner_html
- end
- end
- end
-
- def output_document
- @out ||= begin
- doc = @doc
- trim_process_node doc
- add_newlines doc
- doc
- end
- end
-
- def output_html
- HtmlCleaner.get_document_text(output_document)
- end
-
- private
-
- def add_newlines(doc)
- # Replace <br> tags with a markdown \n
- doc.xpath('//br').each do |br|
- br.replace(new_linebreak_node doc, 2)
- end
- # Surround <p> tags with newlines, to help with line-wise postprocessing
- # and ensure markdown paragraphs
- doc.xpath('//p').each do |p|
- p.before(new_linebreak_node doc)
- p.after(new_linebreak_node doc, 2)
- end
- end
-
- def new_linebreak_node(doc, count=1)
- Nokogiri::XML::Text.new("\n" * count, doc)
- end
-
- def trim_process_node(node)
- if should_hoist?(node)
- hoisted = trim_hoist_element node
- hoisted.each { |child| trim_process_node child }
- elsif should_delete?(node)
- node.remove
- else
- if children = node.children
- children.each { |child| trim_process_node child }
- end
- end
-
- node
- end
-
- def trim_hoist_element(element)
- hoisted = []
- element.children.each do |child|
- element.before(child)
- hoisted << child
- end
- element.remove
- hoisted
- end
-
- def should_hoist?(node)
- return false unless node.element?
- HTML_HOIST_ELEMENTS.include? node.name
- end
-
- def should_delete?(node)
- return true if HTML_DELETE_ELEMENT_TYPES.include? node.type
- return true if node.element? && node.name == 'head'
- return true if node.text? && node.text.strip.blank?
-
- false
- end
- end
- end
-end
diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb
index 6e768e46a71..6ed36b51f12 100644
--- a/lib/gitlab/email/reply_parser.rb
+++ b/lib/gitlab/email/reply_parser.rb
@@ -23,31 +23,19 @@ module Gitlab
private
def select_body(message)
- html = nil
- text = nil
-
- if message.multipart?
- html = fix_charset(message.html_part)
- text = fix_charset(message.text_part)
- elsif message.content_type =~ /text\/html/
- html = fix_charset(message)
- end
+ text = message.text_part if message.multipart?
+ text ||= message if message.content_type !~ /text\/html/
- # prefer plain text
- return text if text
+ return "" unless text
- if html
- body = HtmlCleaner.new(html).output_html
- else
- body = fix_charset(message)
- end
+ text = fix_charset(text)
# Certain trigger phrases that means we didn't parse correctly
- if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
+ if text =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
return ""
end
- body
+ text
end
# Force encoding to UTF-8 on a Mail::Message or Mail::Part
diff --git a/spec/fixtures/emails/dutch.eml b/spec/fixtures/emails/dutch.eml
index 7be08dc4938..3142bf30c3b 100644
--- a/spec/fixtures/emails/dutch.eml
+++ b/spec/fixtures/emails/dutch.eml
@@ -17,4 +17,4 @@ Dit is een antwoord in het Nederlands.
Op 18 juli 2013 10:23 schreef Sander Datema het volgende:
-Dit is de originele post. \ No newline at end of file
+Dit is de originele post.
diff --git a/spec/fixtures/emails/html_only.eml b/spec/fixtures/emails/html_only.eml
deleted file mode 100644
index 561b8db2c79..00000000000
--- a/spec/fixtures/emails/html_only.eml
+++ /dev/null
@@ -1,93 +0,0 @@
-
-Delivered-To: walter@breakingbad.com
-Received: by 10.64.13.41 with SMTP id m9csp29769iec;
- Thu, 20 Jun 2013 08:53:22 -0700 (PDT)
-X-Received: by 10.252.23.9 with SMTP id p9mr4055675lag.4.1371743601980;
- Thu, 20 Jun 2013 08:53:21 -0700 (PDT)
-Received: from mail-la0-x229.google.com (mail-la0-x229.google.com [2a00:1450:4010:c03::229])
- by mx.google.com with ESMTPS id u4si430203lae.48.2013.06.20.08.53.20
- for <walter@breakingbad.com>
- (version=TLSv1 cipher=ECDHE-RSA-RC4-SHA bits=128/128);
- Thu, 20 Jun 2013 08:53:21 -0700 (PDT)
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
- d=google.com; s=20120113;
- h=x-forwarded-to:x-forwarded-for:delivered-to:x-return-path
- :content-type:mime-version:content-transfer-encoding:x-mailer
- :message-id:date:subject:from:in-reply-to:to:resent-date:resent-from
- :resent-to:resent-subject:resent-message-id:resent-user-agent
- :x-scanned-by:x-gm-message-state;
- bh=9O67r74ofh9WkEaKTRB/frQ3MKOtQlbCac2mz0/MiyY=;
- b=YVAo2/JDMP53RxDmqDEKNcEMtggtfaVyq2DoseZ6vBAfB7G6NtHC9ZEkRs4oGhk6LU
- fnyAPe0wnz5d9WINoMAuuTRIhplLxzcqysduSnAJAQ2qqR7mFBnlj9wJeVEKltNwmUME
- nPwxsf8go20VBzrZCtECPedcLi60wbl32NCXVn0qwt2LvKiy6ktSS5Xgb4zY8i4dfXAP
- 6Y5gu32boooWIb9DkH1TJkn3C0RrEugNlw/DUnXrnkFefgxWF3pt/zcoW/wYRyikOdx+
- smBClgR9my6QmsS2KsQrMvWJZUva7fddTiZ6FC22e4hW+8Wha0RaZOZu5O7hjg6G4/1g
- IEyg==
-X-Received: by 10.112.55.9 with SMTP id n9mr5916187lbp.5.1371743600857;
- Thu, 20 Jun 2013 08:53:20 -0700 (PDT)
-X-Forwarded-To: walter@breakingbad.com
-X-Forwarded-For: walter@breakingbad.com
-Delivered-To: walter@breakingbad.com
-Content-Type: text/html; charset="us-ascii"
-MIME-Version: 1.0
-Content-Transfer-Encoding: quoted-printable
-X-Mailer: BlackBerry Email (10.1.0.1720)
-Message-ID: <20130619231548.6307981.74194.2379@breakingbad.com>
-Date: Wed, 19 Jun 2013 19:15:48 -0400
-Subject: Re: [Discourse Meta] [PM] re: Regarding your post in "Site
- Customization not working"
-From: aaron@breakingbad.com
-In-Reply-To: <51c238655a394_5f4e3ce6690667bd@tiefighter2.mail>
-To: reply+20c1b0a8bd1a63c0163cc7e7641ca06b@appmail.adventuretime.ooo
-ReSent-Date: Thu, 20 Jun 2013 11:53:08 -0400 (EDT)
-ReSent-From: Aaron <aaron@breakingbad.com>
-ReSent-Subject: Re: [Discourse Meta] [PM] re: Regarding your post in "Site
- Customization not working"
-X-Gm-Message-State: ALoCoQl1BtN83rAX7At808XAPv1yCqUK3Du2IvK7eCyY3jsI77u4e5cak28307pYYHAo1JlO/Eu9
-
-<html><head></head><body data-blackberry-caret-color=3D"#00a8df" style=3D"b=
-ackground-color: rgb(255, 255, 255); line-height: initial;"><div id=3D"BB10=
-_response_div" style=3D"width: 100%; font-size: initial; font-family: Calib=
-ri, 'Slate Pro', sans-serif; color: rgb(31, 73, 125); text-align: initial; =
-background-color: rgb(255, 255, 255);">The EC2 instance - I've seen that th=
-ere tends to be odd and unrecommended settings on the Bitnami installs that=
- I've checked out.</div> =
- =
- <div id=3D"response_div_spacer" style=3D"width: 100%; font-size: ini=
-tial; font-family: Calibri, 'Slate Pro', sans-serif; color: rgb(31, 73, 125=
-); text-align: initial; background-color: rgb(255, 255, 255);"><br style=3D=
-"display:initial"></div> =
- =
- <div id=3D"_signaturePlaceholder" style=3D"font-size: initial; font-=
-family: Calibri, 'Slate Pro', sans-serif; color: rgb(31, 73, 125); text-ali=
-gn: initial; background-color: rgb(255, 255, 255);"></div> =
- =
- =
- <table width=3D"100%" style=3D"background-color:white;bord=
-er-spacing:0px;"> <tbody><tr><td id=3D"_persistentHeaderContainer" colspan=
-=3D"2" style=3D"font-size: initial; text-align: initial; background-color: =
-rgb(255, 255, 255);"> <div id=
-=3D"_persistentHeader" style=3D"border-style: solid none none; border-top-c=
-olor: rgb(181, 196, 223); border-top-width: 1pt; padding: 3pt 0in 0in; font=
--family: Tahoma, 'BB Alpha Sans', 'Slate Pro'; font-size: 10pt;"> <div><b>=
-From: </b>Grizzly B via Discourse Meta</div><div><b>Sent: </b>Wednesday, J=
-une 19, 2013 19:02</div><div><b>To: </b>aaron@breakingbad.com</div><div><b>=
-Reply To: </b>Grizzly B via Discourse Meta</div><div><b>Subject: </b>[Disc=
-ourse Meta] [PM] re: Regarding your post in "Site Customization<br> not wor=
-king"</div></div></td></tr></tbody></table><div id=3D"_persistentHeaderEnd"=
- style=3D"border-style: solid none none; border-top-color: rgb(186, 188, 20=
-9); border-top-width: 1pt; font-size: initial; text-align: initial; backgro=
-und-color: rgb(255, 255, 255);"></div><br><div id=3D"_originalContent" styl=
-e=3D""><p>Grizzly B just sent you a private message</p>
-
-<hr><p>Log in to our EC2 instance -or- log into a new Digital Ocean instanc=
-e?</p>
-
-<hr><p>Please visit this link to respond: <a href=3D"http://meta.discourse.=
-org/t/regarding-your-post-in-site-customization-not-working/7641/5">http://=
-meta.discourse.org/t/regarding-your-post-in-site-customization-not-working/=
-7641/5</a></p>
-
-<p>To unsubscribe from these emails, visit your <a href=3D"http://meta.disc=
-ourse.org/user_preferences">user preferences</a>.</p>
-<br><!--end of _originalContent --></div></body></html>
diff --git a/spec/fixtures/emails/plaintext_only.eml b/spec/fixtures/emails/plaintext_only.eml
new file mode 100644
index 00000000000..1bfaec771dc
--- /dev/null
+++ b/spec/fixtures/emails/plaintext_only.eml
@@ -0,0 +1,42 @@
+Delivered-To: reply@discourse.org
+Return-Path: <walter.white@googlemail.com>
+MIME-Version: 1.0
+From: <walter.white@googlemail.com>
+To:
+ =?utf-8?Q?Discourse_Meta?=
+ <reply@discourse.org>
+Subject:
+ =?utf-8?Q?Re:_[Discourse_Meta]_[Lounge]_Testing_default_email_replies?=
+Importance: Normal
+Date: Fri, 28 Nov 2014 21:29:10 +0000
+In-Reply-To: <topic/22638/86406@meta.discourse.org>
+References:
+ <topic/22638@meta.discourse.org>,<topic/22638/86406@meta.discourse.org>
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+IyMjIHJlcGx5IGZyb20gZGVmYXVsdCBtYWlsIGNsaWVudCBpbiBXaW5kb3dzIDguMSBNZXRybw0K
+DQoNClRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWlj
+ayBicm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gg
+anVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0
+aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cu
+IFRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBi
+cm93biBmb3gganVtcHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gganVt
+cHMgb3ZlciB0aGUgbGF6eSBkb2cuIFRoZSBxdWljayBicm93biBmb3gganVtcHMgb3ZlciB0aGUg
+bGF6eSBkb2cuDQoNCg0KVGhpcyBpcyBhICoqYm9sZCoqIHdvcmQgaW4gTWFya2Rvd24NCg0KDQpU
+aGlzIGlzIGEgbGluayBodHRwOi8vZXhhbXBsZS5jb20NCiANCg0KDQoNCg0KDQpGcm9tOiBBcnBp
+dCBKYWxhbg0KU2VudDog4oCORnJpZGF54oCOLCDigI5Ob3ZlbWJlcuKAjiDigI4yOOKAjiwg4oCO
+MjAxNCDigI4xMuKAjjrigI4zNeKAjiDigI5QTQ0KVG86IGplZmYgYXR3b29kDQoNCg0KDQoNCg0K
+DQogdGVjaEFQSg0KTm92ZW1iZXIgMjggDQoNClRlc3QgcmVwbHkuDQoNCkZpcnN0IHBhcmFncmFw
+aC4NCg0KU2Vjb25kIHBhcmFncmFwaC4NCg0KDQoNClRvIHJlc3BvbmQsIHJlcGx5IHRvIHRoaXMg
+ZW1haWwgb3IgdmlzaXQgaHR0cHM6Ly9tZXRhLmRpc2NvdXJzZS5vcmcvdC90ZXN0aW5nLWRlZmF1
+bHQtZW1haWwtcmVwbGllcy8yMjYzOC8zIGluIHlvdXIgYnJvd3Nlci4NCg0KDQoNClByZXZpb3Vz
+IFJlcGxpZXMNCg0KIGNvZGluZ2hvcnJvcg0KTm92ZW1iZXIgMjggDQoNCldlJ3JlIHRlc3Rpbmcg
+dGhlIGxhdGVzdCBHaXRIdWIgZW1haWwgcHJvY2Vzc2luZyBsaWJyYXJ5IHdoaWNoIHdlIGFyZSBp
+bnRlZ3JhdGluZyBub3cuDQoNCmh0dHBzOi8vZ2l0aHViLmNvbS9naXRodWIvZW1haWxfcmVwbHlf
+cGFyc2VyDQoNCkdvIGFoZWFkIGFuZCByZXBseSB0byB0aGlzIHRvcGljIGFuZCBJJ2xsIHJlcGx5
+IGZyb20gdmFyaW91cyBlbWFpbCBjbGllbnRzIGZvciB0ZXN0aW5nLg0KDQoNCg0KDQoNClRvIHJl
+c3BvbmQsIHJlcGx5IHRvIHRoaXMgZW1haWwgb3IgdmlzaXQgaHR0cHM6Ly9tZXRhLmRpc2NvdXJz
+ZS5vcmcvdC90ZXN0aW5nLWRlZmF1bHQtZW1haWwtcmVwbGllcy8yMjYzOC8zIGluIHlvdXIgYnJv
+d3Nlci4NCg0KDQpUbyB1bnN1YnNjcmliZSBmcm9tIHRoZXNlIGVtYWlscywgdmlzaXQgeW91ciB1
+c2VyIHByZWZlcmVuY2VzLg==
diff --git a/spec/lib/gitlab/email/reply_parser_spec.rb b/spec/lib/gitlab/email/reply_parser_spec.rb
index a94c92ad53c..7cae1da8050 100644
--- a/spec/lib/gitlab/email/reply_parser_spec.rb
+++ b/spec/lib/gitlab/email/reply_parser_spec.rb
@@ -19,9 +19,22 @@ describe Gitlab::Email::ReplyParser do
expect(test_parse_body(fixture_file("emails/no_content_reply.eml"))).to eq("")
end
- it "can parse the html section" do
- expect(test_parse_body(fixture_file("emails/html_only.eml"))).to eq("The EC2 instance - I've seen that there tends to be odd and " +
- "unrecommended settings on the Bitnami installs that I've checked out.")
+ it "properly renders plaintext-only email" do
+ expect(test_parse_body(fixture_file("emails/plaintext_only.eml"))).
+ to eq(
+ <<-BODY.strip_heredoc.chomp
+ ### reply from default mail client in Windows 8.1 Metro
+
+
+ The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
+
+
+ This is a **bold** word in Markdown
+
+
+ This is a link http://example.com
+ BODY
+ )
end
it "supports a Dutch reply" do