diff options
author | Douwe Maan <douwe@gitlab.com> | 2015-08-19 03:02:26 +0300 |
---|---|---|
committer | Douwe Maan <douwe@gitlab.com> | 2015-08-19 03:02:26 +0300 |
commit | 8906cabae7a6be44cafcedcaf27352614fcc462b (patch) | |
tree | 6ff7220890a5d8e22d16af0daa9689eddd0d4767 /lib | |
parent | a428838dc00dd95f5dad0e433ad2eb73df2eff0f (diff) |
Changes and stuff.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/email_html_cleaner.rb | 133 | ||||
-rw-r--r-- | lib/gitlab/email_receiver.rb | 144 | ||||
-rw-r--r-- | lib/gitlab/reply_by_email.rb | 47 |
3 files changed, 305 insertions, 19 deletions
diff --git a/lib/gitlab/email_html_cleaner.rb b/lib/gitlab/email_html_cleaner.rb new file mode 100644 index 00000000000..6d7a17fe87c --- /dev/null +++ b/lib/gitlab/email_html_cleaner.rb @@ -0,0 +1,133 @@ +# Taken mostly from Discourse's Email::HtmlCleaner +module Gitlab + # HtmlCleaner cleans up the extremely dirty HTML that many email clients + # generate by stripping out any excess divs or spans, removing styling in + # the process (which also makes the html more suitable to be parsed as + # Markdown). + class EmailHtmlCleaner + # Elements to hoist all children out of + HTML_HOIST_ELEMENTS = %w(div span font table tbody th tr td) + # Node types to always delete + HTML_DELETE_ELEMENT_TYPES = [ + Nokogiri::XML::Node::DTD_NODE, + Nokogiri::XML::Node::COMMENT_NODE, + ] + + # Private variables: + # @doc - nokogiri document + # @out - same as @doc, but only if trimming has occured + def initialize(html) + if html.is_a?(String) + @doc = Nokogiri::HTML(html) + else + @doc = html + end + end + + class << self + # EmailHtmlCleaner.trim(inp, opts={}) + # + # Arguments: + # inp - Either a HTML string or a Nokogiri document. + # Options: + # :return => :doc, :string + # Specify the desired return type. + # Defaults to the type of the input. + # A value of :string is equivalent to calling get_document_text() + # on the returned document. + def trim(inp, opts={}) + cleaner = EmailHtmlCleaner.new(inp) + + opts[:return] ||= (inp.is_a?(String) ? :string : :doc) + + if opts[:return] == :string + cleaner.output_html + else + cleaner.output_document + end + end + + # EmailHtmlCleaner.get_document_text(doc) + # + # Get the body portion of the document, including html, as a string. + def get_document_text(doc) + body = doc.xpath('//body') + if body + body.inner_html + else + doc.inner_html + end + end + end + + def output_document + @out ||= begin + doc = @doc + trim_process_node doc + add_newlines doc + doc + end + end + + def output_html + EmailHtmlCleaner.get_document_text(output_document) + end + + private + + def add_newlines(doc) + # Replace <br> tags with a markdown \n + doc.xpath('//br').each do |br| + br.replace(new_linebreak_node doc, 2) + end + # Surround <p> tags with newlines, to help with line-wise postprocessing + # and ensure markdown paragraphs + doc.xpath('//p').each do |p| + p.before(new_linebreak_node doc) + p.after(new_linebreak_node doc, 2) + end + end + + def new_linebreak_node(doc, count=1) + Nokogiri::XML::Text.new("\n" * count, doc) + end + + def trim_process_node(node) + if should_hoist?(node) + hoisted = trim_hoist_element node + hoisted.each { |child| trim_process_node child } + elsif should_delete?(node) + node.remove + else + if children = node.children + children.each { |child| trim_process_node child } + end + end + + node + end + + def trim_hoist_element(element) + hoisted = [] + element.children.each do |child| + element.before(child) + hoisted << child + end + element.remove + hoisted + end + + def should_hoist?(node) + return false unless node.element? + HTML_HOIST_ELEMENTS.include? node.name + end + + def should_delete?(node) + return true if HTML_DELETE_ELEMENT_TYPES.include? node.type + return true if node.element? && node.name == 'head' + return true if node.text? && node.text.strip.blank? + + false + end + end +end diff --git a/lib/gitlab/email_receiver.rb b/lib/gitlab/email_receiver.rb index a9f67bb5da0..18a06d2ee8c 100644 --- a/lib/gitlab/email_receiver.rb +++ b/lib/gitlab/email_receiver.rb @@ -1,44 +1,69 @@ +# Inspired in great part by Discourse's Email::Receiver module Gitlab class EmailReceiver + class ProcessingError < StandardError; end + class EmailUnparsableError < ProcessingError; end + class EmptyEmailError < ProcessingError; end + class UserNotFoundError < ProcessingError; end + class UserNotAuthorizedLevelError < ProcessingError; end + class NoteableNotFoundError < ProcessingError; end + class AutoGeneratedEmailError < ProcessingError; end + class SentNotificationNotFound < ProcessingError; end + class InvalidNote < ProcessingError; end + def initialize(raw) @raw = raw end def message @message ||= Mail::Message.new(@raw) + rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError => e + raise EmailUnparsableError, e end def process - return unless message && sent_notification + raise EmptyEmailError if @raw.blank? + + raise AutoGeneratedEmailError if message.header.to_s =~ /auto-(generated|replied)/ + + raise SentNotificationNotFound unless sent_notification + + author = sent_notification.recipient + + raise UserNotFoundError unless author + + project = sent_notification.project + + raise UserNotAuthorizedLevelError unless author.can?(:create_note, project) + + raise NoteableNotFoundError unless sent_notification.noteable + + body = parse_body(message) - Notes::CreateService.new( - sent_notification.project, - sent_notification.recipient, - note: message.text_part.to_s, + note = Notes::CreateService.new( + project, + author, + note: body, noteable_type: sent_notification.noteable_type, noteable_id: sent_notification.noteable_id, commit_id: sent_notification.commit_id ).execute + + unless note.persisted? + raise InvalidNote, note.errors.full_messages.join("\n") + end end private def reply_key - address = Gitlab.config.reply_by_email.address - return nil unless address - - regex = Regexp.escape(address) - regex = regex.gsub(Regexp.escape('%{reply_key}'), "(.*)") - regex = Regexp.new(regex) - - address = message.to.find { |address| address =~ regex } - return nil unless address + reply_key = nil + message.to.each do |address| + reply_key = Gitlab::ReplyByEmail.reply_key_from_address(address) + break if reply_key + end - match = address.match(regex) - - return nil unless match && match[1].present? - - match[1] + reply_key end def sent_notification @@ -46,5 +71,86 @@ module Gitlab SentNotification.for(reply_key) end + + def parse_body(message) + body = select_body(message) + + encoding = body.encoding + raise EmptyEmailError if body.strip.blank? + + body = discourse_email_trimmer(body) + raise EmptyEmailError if body.strip.blank? + + body = EmailReplyParser.parse_reply(body) + raise EmptyEmailError if body.strip.blank? + + body.force_encoding(encoding).encode("UTF-8") + end + + def select_body(message) + html = nil + text = nil + + if message.multipart? + html = fix_charset(message.html_part) + text = fix_charset(message.text_part) + elsif message.content_type =~ /text\/html/ + html = fix_charset(message) + end + + # prefer plain text + return text if text + + if html + body = EmailHtmlCleaner.new(html).output_html + else + body = fix_charset(message) + end + + # Certain trigger phrases that means we didn't parse correctly + if body =~ /(Content\-Type\:|multipart\/alternative|text\/plain)/ + raise EmptyEmailError + end + + body + end + + # Force encoding to UTF-8 on a Mail::Message or Mail::Part + def fix_charset(object) + return nil if object.nil? + + if object.charset + object.body.decoded.force_encoding(object.charset.gsub(/utf8/i, "UTF-8")).encode("UTF-8").to_s + else + object.body.to_s + end + rescue + nil + end + + REPLYING_HEADER_LABELS = %w(From Sent To Subject Reply To Cc Bcc Date) + REPLYING_HEADER_REGEX = Regexp.union(REPLYING_HEADER_LABELS.map { |label| "#{label}:" }) + + def discourse_email_trimmer(body) + lines = body.scrub.lines.to_a + range_end = 0 + + lines.each_with_index do |l, idx| + break if l =~ /\A\s*\-{3,80}\s*\z/ || + # This one might be controversial but so many reply lines have years, times and end with a colon. + # Let's try it and see how well it works. + (l =~ /\d{4}/ && l =~ /\d:\d\d/ && l =~ /\:$/) || + (l =~ /On \w+ \d+,? \d+,?.*wrote:/) + + # Headers on subsequent lines + break if (0..2).all? { |off| lines[idx+off] =~ REPLYING_HEADER_REGEX } + # Headers on the same line + break if REPLYING_HEADER_LABELS.count { |label| l.include?(label) } >= 3 + + range_end = idx + end + + lines[0..range_end].join.strip + end end end diff --git a/lib/gitlab/reply_by_email.rb b/lib/gitlab/reply_by_email.rb new file mode 100644 index 00000000000..b6157de3610 --- /dev/null +++ b/lib/gitlab/reply_by_email.rb @@ -0,0 +1,47 @@ +module Gitlab + module ReplyByEmail + class << self + def enabled? + config.enabled && + config.address && + config.address.include?("%{reply_key}") + end + + def reply_key + return nil unless enabled? + + SecureRandom.hex(16) + end + + def reply_address(reply_key) + config.address.gsub('%{reply_key}', reply_key) + end + + def reply_key_from_address(address) + return unless address_regex + + match = address.match(address_regex) + return unless match + + match[1] + end + + private + + def config + Gitlab.config.reply_by_email + end + + def address_regex + @address_regex ||= begin + wildcard_address = config.address + return nil unless wildcard_address + + regex = Regexp.escape(wildcard_address) + regex = regex.gsub(Regexp.escape('%{reply_key}'), "(.+)") + Regexp.new(regex).freeze + end + end + end + end +end |