1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
|
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Background migration for fixing merge_request_diff_commit rows that don't
# have committer/author details due to
# https://gitlab.com/gitlab-org/gitlab/-/issues/344080.
#
# This migration acts on a single project and corrects its data. Because
# this process needs Git/Gitaly access, and duplicating all that code is far
# too much, this migration relies on global models such as Project,
# MergeRequest, etc.
# rubocop: disable Metrics/ClassLength
class FixMergeRequestDiffCommitUsers
BATCH_SIZE = 100
def initialize
@commits = {}
@users = {}
end
def perform(project_id)
if (project = ::Project.find_by_id(project_id))
process(project)
end
::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
'FixMergeRequestDiffCommitUsers',
[project_id]
)
schedule_next_job
end
def process(project)
# Loading everything using one big query may result in timeouts (e.g.
# for projects the size of gitlab-org/gitlab). So instead we query
# data on a per merge request basis.
project.merge_requests.each_batch(column: :iid) do |mrs|
mrs.ids.each do |mr_id|
each_row_to_check(mr_id) do |commit|
update_commit(project, commit)
end
end
end
end
def each_row_to_check(merge_request_id, &block)
columns = %w[merge_request_diff_id relative_order].map do |col|
Pagination::Keyset::ColumnOrderDefinition.new(
attribute_name: col,
order_expression: MergeRequestDiffCommit.arel_table[col.to_sym].asc,
nullable: :not_nullable,
distinct: false
)
end
order = Pagination::Keyset::Order.build(columns)
scope = MergeRequestDiffCommit
.joins(:merge_request_diff)
.where(merge_request_diffs: { merge_request_id: merge_request_id })
.where('commit_author_id IS NULL OR committer_id IS NULL')
.order(order)
Pagination::Keyset::Iterator
.new(scope: scope, use_union_optimization: true)
.each_batch(of: BATCH_SIZE) do |rows|
rows
.select([
:merge_request_diff_id,
:relative_order,
:sha,
:committer_id,
:commit_author_id
])
.each(&block)
end
end
# rubocop: disable Metrics/AbcSize
def update_commit(project, row)
commit = find_commit(project, row.sha)
updates = []
unless row.commit_author_id
author_id = find_or_create_user(commit, :author_name, :author_email)
updates << [arel_table[:commit_author_id], author_id] if author_id
end
unless row.committer_id
committer_id =
find_or_create_user(commit, :committer_name, :committer_email)
updates << [arel_table[:committer_id], committer_id] if committer_id
end
return if updates.empty?
update = Arel::UpdateManager
.new
.table(MergeRequestDiffCommit.arel_table)
.where(matches_row(row))
.set(updates)
.to_sql
MergeRequestDiffCommit.connection.execute(update)
end
# rubocop: enable Metrics/AbcSize
def schedule_next_job
job = Database::BackgroundMigrationJob
.for_migration_class('FixMergeRequestDiffCommitUsers')
.pending
.first
return unless job
BackgroundMigrationWorker.perform_in(
2.minutes,
'FixMergeRequestDiffCommitUsers',
job.arguments
)
end
def find_commit(project, sha)
@commits[sha] ||= (project.commit(sha)&.to_hash || {})
end
def find_or_create_user(commit, name_field, email_field)
name = commit[name_field]
email = commit[email_field]
return unless name && email
@users[[name, email]] ||=
MergeRequest::DiffCommitUser.find_or_create(name, email).id
end
def matches_row(row)
primary_key = Arel::Nodes::Grouping
.new([arel_table[:merge_request_diff_id], arel_table[:relative_order]])
primary_val = Arel::Nodes::Grouping
.new([row.merge_request_diff_id, row.relative_order])
primary_key.eq(primary_val)
end
def arel_table
MergeRequestDiffCommit.arel_table
end
end
# rubocop: enable Metrics/ClassLength
end
end
|