Welcome to mirror list, hosted at ThFree Co, Russian Federation.

html_parser.rb « email « gitlab « lib - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 27ba5d2a3143000d82b5b5428451b2b2f15bb536 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# frozen_string_literal: true

module Gitlab
  module Email
    class HTMLParser
      def self.parse_reply(raw_body)
        new(raw_body).filtered_text
      end

      attr_reader :raw_body

      def initialize(raw_body)
        @raw_body = raw_body
      end

      def document
        @document ||= Nokogiri::HTML.parse(raw_body)
      end

      def filter_replies!
        document.xpath('//blockquote').each(&:remove)
        document.xpath('//table').each(&:remove)

        # bogus links with no href are sometimes added by outlook,
        # and can result in Html2Text adding extra square brackets
        # to the text, so we unwrap them here.
        document.xpath('//a[not(@href)]').each do |link|
          link.replace(link.children)
        end
      end

      def filtered_html
        @filtered_html ||= begin
          filter_replies!
          document.inner_html
        end
      end

      def filtered_text
        @filtered_text ||= Html2Text.convert(filtered_html)
      end
    end
  end
end