1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
# frozen_string_literal: true
module BulkImports
module Projects
module Pipelines
class ReferencesPipeline
include Pipeline
BATCH_SIZE = 100
def extract(_context)
data = Enumerator.new do |enum|
add_matching_objects(portable.issues, enum)
add_matching_objects(portable.merge_requests, enum)
add_notes(portable.issues, enum)
add_notes(portable.merge_requests, enum)
end
BulkImports::Pipeline::ExtractedData.new(data: data)
end
def transform(_context, object)
body = object_body(object).dup
body.gsub!(username_regex(mapped_usernames), mapped_usernames)
matching_urls(object).each do |old_url, new_url|
body.gsub!(old_url, new_url) if body.include?(old_url)
end
object.assign_attributes(body_field(object) => body)
object
end
def load(_context, object)
object.save! if object_body_changed?(object)
end
private
def mapped_usernames
@mapped_usernames ||= ::BulkImports::UsersMapper.new(context: context)
.map_usernames.transform_keys { |key| "@#{key}" }
.transform_values { |value| "@#{value}" }
end
def username_regex(mapped_usernames)
@username_regex ||= Regexp.new(mapped_usernames.keys.sort_by(&:length)
.reverse.map { |x| Regexp.escape(x) }.join('|'))
end
def add_matching_objects(collection, enum)
collection.each_batch(of: BATCH_SIZE, column: :iid) do |batch|
batch.each do |object|
enum << object if object_has_reference?(object) || object_has_username?(object)
end
end
end
def add_notes(collection, enum)
collection.each_batch(of: BATCH_SIZE, column: :iid) do |batch|
batch.each do |object|
object.notes.each_batch(of: BATCH_SIZE) do |notes_batch|
notes_batch.each do |note|
note.refresh_markdown_cache!
enum << note if object_has_reference?(note) || object_has_username?(note)
end
end
end
end
end
def object_has_reference?(object)
object_body(object)&.include?(source_full_path)
end
def object_has_username?(object)
return false unless object_body(object)
mapped_usernames.keys.any? { |old_username| object_body(object).include?(old_username) }
end
def object_body(object)
call_object_method(object)
end
def object_body_changed?(object)
call_object_method(object, suffix: '_changed?')
end
def call_object_method(object, suffix: nil)
method = body_field(object)
method = "#{method}#{suffix}" if suffix.present?
object.public_send(method) # rubocop:disable GitlabSecurity/PublicSend
end
def body_field(object)
object.is_a?(Note) ? 'note' : 'description'
end
def matching_urls(object)
URI.extract(object_body(object), %w[http https]).each_with_object([]) do |url, array|
parsed_url = URI.parse(url)
next unless source_host == parsed_url.host
next unless parsed_url.path&.start_with?("/#{source_full_path}")
array << [url, new_url(parsed_url)]
end
end
def new_url(parsed_old_url)
parsed_old_url.host = ::Gitlab.config.gitlab.host
parsed_old_url.port = ::Gitlab.config.gitlab.port
parsed_old_url.scheme = ::Gitlab.config.gitlab.https ? 'https' : 'http'
parsed_old_url.to_s.gsub!(source_full_path, portable.full_path)
end
def source_host
@source_host ||= URI.parse(context.configuration.url).host
end
def source_full_path
context.entity.source_full_path
end
end
end
end
end
|