Welcome to mirror list, hosted at ThFree Co, Russian Federation.

syntax_highlight_filter.rb « filter « banzai « lib - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 766715d9e391d96ffd3c2a4efbcc15870b2f7f1b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# frozen_string_literal: true

require 'rouge/plugins/common_mark'
require 'asciidoctor/extensions/asciidoctor_kroki/version'
require 'asciidoctor/extensions/asciidoctor_kroki/extension'

# Generated HTML is transformed back to GFM by app/assets/javascripts/behaviors/markdown/nodes/code_block.js
module Banzai
  module Filter
    # HTML Filter to highlight fenced code blocks
    #
    class SyntaxHighlightFilter < HTML::Pipeline::Filter
      include OutputSafety

      LANG_PARAMS_DELIMITER = ':'
      LANG_PARAMS_ATTR = 'data-lang-params'
      CSS_CLASSES = 'code highlight js-syntax-highlight'

      CSS   = 'pre:not([data-kroki-style]) > code:only-child'
      XPATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS).freeze

      def call
        doc.xpath(XPATH).each do |node|
          highlight_node(node)
        end

        doc
      end

      def highlight_node(node)
        return if node.parent&.parent.nil?

        lang, lang_params = parse_lang_params(node)
        retried = false

        if use_rouge?(lang)
          lexer = lexer_for(lang)
          language = lexer.tag
        else
          lexer = Rouge::Lexers::PlainText.new
          language = lang
        end

        begin
          code = Rouge::Formatters::HTMLGitlab.format(lex(lexer, node.text), tag: language)
        rescue StandardError
          # Gracefully handle syntax highlighter bugs/errors to ensure users can
          # still access an issue/comment/etc. First, retry with the plain text
          # filter. If that fails, then just skip this entirely, but that would
          # be a pretty bad upstream bug.
          return if retried

          language = nil
          lexer = Rouge::Lexers::PlainText.new
          retried = true

          retry
        end

        # maintain existing attributes already added. e.g math and mermaid nodes
        node.children = code
        pre_node = node.parent

        # ensure there are no extra children, such as a text node that might
        # show up from an XSS attack
        pre_node.children = node

        pre_node[:lang] = language
        pre_node.add_class(CSS_CLASSES)
        pre_node.add_class("language-#{language}") if language
        pre_node.set_attribute('data-canonical-lang', escape_once(lang)) if lang != language
        pre_node.set_attribute(LANG_PARAMS_ATTR, escape_once(lang_params)) if lang_params.present?
        pre_node.set_attribute('v-pre', 'true')
        pre_node.remove_attribute('data-meta')

        highlighted = %(<div class="gl-relative markdown-code-block js-markdown-code">#{pre_node.to_html}<copy-code></copy-code></div>)

        # Extracted to a method to measure it
        replace_pre_element(pre_node, highlighted)
      end

      private

      def parse_lang_params(node)
        node = node.parent

        # Commonmarker's FULL_INFO_STRING render option works with the space delimiter.
        # But the current behavior of GitLab's markdown renderer is different - it grabs everything as the single
        # line, including language and its options. To keep backward compatibility, we have to parse the old format and
        # merge with the new one.
        #
        # Behaviors before separating language and its parameters:
        # Old ones:
        # "```ruby with options```" -> '<pre><code lang="ruby with options">'.
        # "```ruby:with:options```" -> '<pre><code lang="ruby:with:options">'.
        #
        # New ones:
        # "```ruby with options```" -> '<pre><code lang="ruby" data-meta="with options">'.
        # "```ruby:with:options```" -> '<pre><code lang="ruby:with:options">'.

        language = node.attr('lang')

        return unless language

        language, language_params = language.split(LANG_PARAMS_DELIMITER, 2)
        language_params = [node.attr('data-meta'), language_params].compact.join(' ')

        [language, language_params]
      end

      # Separate method so it can be instrumented.
      def lex(lexer, code)
        lexer.lex(code)
      end

      def lexer_for(language)
        (Rouge::Lexer.find(language) || Rouge::Lexers::PlainText).new
      end

      # Replace the `pre` element with the entire highlighted block
      def replace_pre_element(pre_node, highlighted)
        pre_node.replace(highlighted)
      end

      def use_rouge?(language)
        (%w(math suggestion) + ::AsciidoctorExtensions::Kroki::SUPPORTED_DIAGRAM_NAMES).exclude?(language)
      end
    end
  end
end