1 files changed, 53 insertions, 12 deletions
diff --git a/lib/banzai/filter/markdown_post_escape_filter.rb b/lib/banzai/filter/markdown_post_escape_filter.rb
index 8c0bd62f80a..4d37fba33aa 100644
--- a/lib/banzai/filter/markdown_post_escape_filter.rb
+++ b/lib/banzai/filter/markdown_post_escape_filter.rb
@@ -7,11 +7,11 @@ module Banzai
       LITERAL_KEYWORD   = MarkdownPreEscapeFilter::LITERAL_KEYWORD
       LITERAL_REGEX     = %r{#{LITERAL_KEYWORD}-(.*?)-#{LITERAL_KEYWORD}}.freeze
       NOT_LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-((%5C|\\).+?)-#{LITERAL_KEYWORD}}.freeze
-      SPAN_REGEX        = %r{<span>(.*?)</span>}.freeze
+      SPAN_REGEX        = %r{<span data-escaped-char>(.*?)</span>}.freeze
 
-      XPATH_A         = Gitlab::Utils::Nokogiri.css_to_xpath('a').freeze
-      XPATH_LANG_TAG  = Gitlab::Utils::Nokogiri.css_to_xpath('pre').freeze
-      XPATH_CODE_SPAN = Gitlab::Utils::Nokogiri.css_to_xpath('code > span').freeze
+      XPATH_A            = Gitlab::Utils::Nokogiri.css_to_xpath('a').freeze
+      XPATH_LANG_TAG     = Gitlab::Utils::Nokogiri.css_to_xpath('pre').freeze
+      XPATH_ESCAPED_CHAR = Gitlab::Utils::Nokogiri.css_to_xpath('span[data-escaped-char]').freeze
 
       def call
         return doc unless result[:escaped_literals]
@@ -22,7 +22,7 @@ module Banzai
         @doc = parse_html(new_html)
 
         remove_spans_in_certain_attributes
-        remove_spans_in_code
+        remove_unnecessary_escapes
 
         doc
       end
@@ -57,7 +57,7 @@ module Banzai
           escaped_item = Banzai::Filter::MarkdownPreEscapeFilter::ESCAPABLE_CHARS.find { |item| item[:token] == last_match_token }
           escaped_char = escaped_item ? escaped_item[:char] : ::Regexp.last_match(1)
 
-          "<span>#{escaped_char}</span>"
+          "<span data-escaped-char>#{escaped_char}</span>"
         end
 
         html
@@ -75,14 +75,55 @@ module Banzai
         end
       end
 
-      # Any `<span>` that makes it into a `<code>` element is from the math processing,
-      # convert back to the escaped character, such as `\$`
-      def remove_spans_in_code
-        doc.xpath(XPATH_CODE_SPAN).each do |node|
-          escaped_item = Banzai::Filter::MarkdownPreEscapeFilter::ESCAPABLE_CHARS.find { |item| item[:char] == node.content && item[:latex] }
+      def remove_unnecessary_escapes
+        doc.xpath(XPATH_ESCAPED_CHAR).each do |node|
+          escaped_item = Banzai::Filter::MarkdownPreEscapeFilter::ESCAPABLE_CHARS.find { |item| item[:char] == node.content }
+
+          next unless escaped_item
+
+          if node.parent.name == 'code'
+            # For any `data-escaped-char` that makes it into a `<code>` element,
+            # convert back to the escaped character, such as `\$`. Usually this would
+            # only happen for dollar math
+            content = +escaped_item[:escaped]
+          elsif escaped_item[:latex] && !escaped_item[:reference]
+            # Character only used in latex, since it's outside of a code block we can
+            # transform into the regular character
+            content = +escaped_item[:char]
+          else
+            # Escaped reference character, so leave as is. This is so that our normal
+            # reference processing can be short-circuited by escaping the reference,
+            # like \@username
+            next
+          end
+
+          merge_adjacent_text_nodes(node, content)
+        end
+      end
+
+      def text_node?(node)
+        node.is_a?(Nokogiri::XML::Text)
+      end
 
-          node.replace(escaped_item[:escaped]) if escaped_item
+      # Merge directly adjacent text nodes and replace existing node with
+      # the merged content. For example, the document could be
+      #   #(Text "~c_bug"), #(Element:0x57724 { name = "span" }, children = [ #(Text "_")] })]
+      # Our reference processing requires a single string of text to match against. So even if it was
+      #   #(Text "~c_bug"), #(Text "_")
+      # it wouldn't match.  Merging together will give
+      #   #(Text "~c_bug_")
+      def merge_adjacent_text_nodes(node, content)
+        if text_node?(node.previous)
+          content.prepend(node.previous.content)
+          node.previous.remove
         end
+
+        if text_node?(node.next)
+          content.concat(node.next.content)
+          node.next.remove
+        end
+
+        node.replace(content)
       end
     end
   end