Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2015-08-20 21:05:06 +0300
committerDouwe Maan <douwe@gitlab.com>2015-08-20 21:05:06 +0300
commite9972efc2f3d730e989907585dd1438c517a0bba (patch)
tree7a38f9638cc50813d16d55f9276db98dd7cb041c /lib/gitlab/email/reply_parser.rb
parent3ff9d5c64cef8bf8daed5e253e388545987fb945 (diff)
Extract ReplyParser and AttachmentUploader from Receiver.
Diffstat (limited to 'lib/gitlab/email/reply_parser.rb')
-rw-r--r--lib/gitlab/email/reply_parser.rb91
1 files changed, 91 insertions, 0 deletions
diff --git a/lib/gitlab/email/reply_parser.rb b/lib/gitlab/email/reply_parser.rb
new file mode 100644
index 00000000000..6ceb755968c
--- /dev/null
+++ b/lib/gitlab/email/reply_parser.rb
@@ -0,0 +1,91 @@
+# Inspired in great part by Discourse's Email::Receiver
+module Gitlab
+ module Email
+ class ReplyParser
+ attr_accessor :message
+
+ def initialize(message)
+ @message = message
+ end
+
+ def execute
+ body = select_body(message)
+
+ encoding = body.encoding
+
+ body = discourse_email_trimmer(body)
+
+ body = EmailReplyParser.parse_reply(body)
+
+ body.force_encoding(encoding).encode("UTF-8")
+ end
+
+ private
+
+ def select_body(message)
+ html = nil
+ text = nil
+
+ if message.multipart?
+ html = fix_charset(message.html_part)
+ text = fix_charset(message.text_part)
+ elsif message.content_type =~ /text\/html/
+ html = fix_charset(message)
+ end
+
+ # prefer plain text
+ return text if text
+
+ if html
+ body = HtmlCleaner.new(html).output_html
+ else
+ body = fix_charset(message)
+ end
+
+ # Certain trigger phrases that means we didn't parse correctly
+ if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/
+ return ""
+ end
+
+ body
+ end
+
+ # Force encoding to UTF-8 on a Mail::Message or Mail::Part
+ def fix_charset(object)
+ return nil if object.nil?
+
+ if object.charset
+ object.body.decoded.force_encoding(object.charset.gsub(/utf8/i, "UTF-8")).encode("UTF-8").to_s
+ else
+ object.body.to_s
+ end
+ rescue
+ nil
+ end
+
+ REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date)
+ REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" })
+
+ def discourse_email_trimmer(body)
+ lines = body.scrub.lines.to_a
+ range_end = 0
+
+ lines.each_with_index do |l, idx|
+ # This one might be controversial but so many reply lines have years, times and end with a colon.
+ # Let's try it and see how well it works.
+ break if (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) ||
+ (l =~ /On \w+ \d+,? \d+,?.*wrote:/)
+
+ # Headers on subsequent lines
+ break if (0..2).all? { |off| lines[idx+off] =~ REPLYING_HEADER_REGEX }
+ # Headers on the same line
+ break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3
+
+ range_end = idx
+ end
+
+ lines[0..range_end].join.strip
+ end
+ end
+ end
+end