1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
|
# frozen_string_literal: true
# Generated HTML is transformed back to GFM by app/assets/javascripts/behaviors/markdown/nodes/reference.js
module Banzai
module Filter
module References
# Base class for GitLab Flavored Markdown reference filters.
#
# References within <pre>, <code>, <a>, and <style> elements are ignored.
#
# Context options:
# :project (required) - Current project, ignored if reference is cross-project.
# :only_path - Generate path-only links.
class ReferenceFilter < HTML::Pipeline::Filter
include RequestStoreReferenceCache
include OutputSafety
REFERENCE_TYPE_DATA_ATTRIBUTE = 'data-reference-type='
class << self
# Implement in child class
# Example: self.reference_type = :merge_request
attr_accessor :reference_type
# Implement in child class
# Example: self.object_class = MergeRequest
attr_accessor :object_class
def call(doc, context = nil, result = nil)
new(doc, context, result).call_and_update_nodes
end
end
def initialize(doc, context = nil, result = nil)
super
@new_nodes = {}
@nodes = self.result[:reference_filter_nodes]
end
def call_and_update_nodes
with_update_nodes { call }
end
def call
ref_pattern_start = /\A#{object_reference_pattern}\z/
nodes.each_with_index do |node, index|
if text_node?(node)
replace_text_when_pattern_matches(node, index, object_reference_pattern) do |content|
object_link_filter(content, object_reference_pattern)
end
elsif element_node?(node)
yield_valid_link(node) do |link, inner_html|
if link =~ ref_pattern_start
replace_link_node_with_href(node, index, link) do
object_link_filter(link, object_reference_pattern, link_content: inner_html)
end
end
end
end
end
doc
end
# Public: Find references in text (like `!123` for merge requests)
#
# references_in(text) do |match, id, project_ref, matches|
# object = find_object(project_ref, id)
# "<a href=...>#{object.to_reference}</a>"
# end
#
# text - String text to search.
#
# Yields the String match, the Integer referenced object ID, an optional String
# of the external project reference, and all of the matchdata.
#
# Returns a String replaced with the return of the block.
def references_in(text, pattern = object_reference_pattern)
raise NotImplementedError, "#{self.class} must implement method: #{__callee__}"
end
# Iterates over all <a> and text() nodes in a document.
#
# Nodes are skipped whenever their ancestor is one of the nodes returned
# by `ignore_ancestor_query`. Link tags are not processed if they have a
# "gfm" class or the "href" attribute is empty.
def each_node
return to_enum(__method__) unless block_given?
doc.xpath(query).each do |node|
yield node
end
end
# Returns an Array containing all HTML nodes.
def nodes
@nodes ||= each_node.to_a
end
def object_class
self.class.object_class
end
def project
context[:project]
end
def group
context[:group]
end
def requires_unescaping?
false
end
private
# Returns a data attribute String to attach to a reference link
#
# attributes - Hash, where the key becomes the data attribute name and the
# value is the data attribute value
#
# Examples:
#
# data_attribute(project: 1, issue: 2)
# # => "data-reference-type=\"SomeReferenceFilter\" data-project=\"1\" data-issue=\"2\""
#
# data_attribute(project: 3, merge_request: 4)
# # => "data-reference-type=\"SomeReferenceFilter\" data-project=\"3\" data-merge-request=\"4\""
#
# Returns a String
def data_attribute(attributes = {})
attributes = attributes.reject { |_, v| v.nil? }
# "data-reference-type=" attribute got moved into a constant because we need
# to use it on ReferenceRewriter class to detect if the markdown contains any reference
reference_type_attribute = "#{REFERENCE_TYPE_DATA_ATTRIBUTE}#{escape_once(self.class.reference_type)} "
attributes[:container] ||= 'body'
attributes[:placement] ||= 'top'
attributes.delete(:original) if context[:no_original_data]
attributes.map do |key, value|
%(data-#{key.to_s.dasherize}="#{escape_once(value)}")
end
.join(' ')
.prepend(reference_type_attribute)
end
def ignore_ancestor_query
@ignore_ancestor_query ||= begin
parents = %w[pre code a style]
parents << 'blockquote' if context[:ignore_blockquotes]
parents.map { |n| "ancestor::#{n}" }.join(' or ')
end
end
# Ensure that a :project key exists in context
#
# Note that while the key might exist, its value could be nil!
def validate
needs :project unless skip_project_check?
end
def user
context[:user]
end
def skip_project_check?
context[:skip_project_check]
end
def reference_class(type, tooltip: true)
gfm_klass = "gfm gfm-#{type}"
return gfm_klass unless tooltip
"#{gfm_klass} has-tooltip"
end
# Yields the link's URL and inner HTML whenever the node is a valid <a> tag.
def yield_valid_link(node)
link = unescape_link(node.attr('href').to_s)
inner_html = node.inner_html
return unless link.force_encoding('UTF-8').valid_encoding?
yield link, inner_html
end
def unescape_link(href)
# We cannot use CGI.unescape here because it also converts `+` to spaces.
# We need to keep the `+` for expanded reference formats.
Addressable::URI.unescape(href)
end
def unescape_html_entities(text)
CGI.unescapeHTML(text.to_s)
end
def escape_html_entities(text)
CGI.escapeHTML(text.to_s)
end
def replace_text_when_pattern_matches(node, index, pattern)
return if pattern.is_a?(Gitlab::UntrustedRegexp) && !pattern.match?(node.text)
return if pattern.is_a?(Regexp) && !(pattern =~ node.text)
content = node.to_html
html = yield content
replace_text_with_html(node, index, html) unless html == content
end
def replace_link_node_with_text(node, index)
html = yield
replace_text_with_html(node, index, html) unless html == node.text
end
def replace_link_node_with_href(node, index, link)
html = yield
replace_text_with_html(node, index, html) unless html == link
end
def text_node?(node)
node.is_a?(Nokogiri::XML::Text)
end
def element_node?(node)
node.is_a?(Nokogiri::XML::Element)
end
def object_reference_pattern
@object_reference_pattern ||= object_class.reference_pattern
end
def object_name
@object_name ||= object_class.name.underscore
end
def object_sym
@object_sym ||= object_name.to_sym
end
def object_link_filter(text, pattern, link_content: nil, link_reference: false)
raise NotImplementedError, "#{self.class} must implement method: #{__callee__}"
end
def query
@query ||= %{descendant-or-self::text()[not(#{ignore_ancestor_query})]
| descendant-or-self::a[
not(contains(concat(" ", @class, " "), " gfm ")) and not(@href = "")
]}
end
def replace_text_with_html(node, index, html)
replace_and_update_new_nodes(node, index, html)
end
def replace_and_update_new_nodes(node, index, html)
previous_node = node.previous
next_node = node.next
parent_node = node.parent
# Unfortunately node.replace(html) returns re-parented nodes, not the actual replaced nodes in the doc
# We need to find the actual nodes in the doc that were replaced
node.replace(html)
@new_nodes[index] = []
# We replaced node with new nodes, so we find first new node. If previous_node is nil, we take first parent child
new_node = previous_node ? previous_node.next : parent_node&.children&.first
# We iterate from first to last replaced node and store replaced nodes in @new_nodes
while new_node && new_node != next_node
@new_nodes[index] << new_node.xpath(query)
new_node = new_node.next
end
@new_nodes[index].flatten!
end
def only_path?
context[:only_path]
end
def with_update_nodes
@new_nodes = {}
yield.tap { update_nodes! }
end
# Once Filter completes replacing nodes, we update nodes with @new_nodes
def update_nodes!
@new_nodes.sort_by { |index, _new_nodes| -index }.each do |index, new_nodes|
nodes[index, 1] = new_nodes
end
result[:reference_filter_nodes] = nodes
end
end
end
end
end
|