Welcome to mirror list, hosted at ThFree Co, Russian Federation.

fix_merge_request_diff_commit_users.rb « background_migration « gitlab « lib - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: ea3e56cb14a5eb8fb8b302ace5462513a4e71b8a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# frozen_string_literal: true

module Gitlab
  module BackgroundMigration
    # Background migration for fixing merge_request_diff_commit rows that don't
    # have committer/author details due to
    # https://gitlab.com/gitlab-org/gitlab/-/issues/344080.
    #
    # This migration acts on a single project and corrects its data. Because
    # this process needs Git/Gitaly access, and duplicating all that code is far
    # too much, this migration relies on global models such as Project,
    # MergeRequest, etc.
    # rubocop: disable Metrics/ClassLength
    class FixMergeRequestDiffCommitUsers
      BATCH_SIZE = 100

      def initialize
        @commits = {}
        @users = {}
      end

      def perform(project_id)
        if (project = ::Project.find_by_id(project_id))
          process(project)
        end

        ::Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
          'FixMergeRequestDiffCommitUsers',
          [project_id]
        )

        schedule_next_job
      end

      def process(project)
        # Loading everything using one big query may result in timeouts (e.g.
        # for projects the size of gitlab-org/gitlab). So instead we query
        # data on a per merge request basis.
        project.merge_requests.each_batch(column: :iid) do |mrs|
          mrs.ids.each do |mr_id|
            each_row_to_check(mr_id) do |commit|
              update_commit(project, commit)
            end
          end
        end
      end

      def each_row_to_check(merge_request_id, &block)
        columns = %w[merge_request_diff_id relative_order].map do |col|
          Pagination::Keyset::ColumnOrderDefinition.new(
            attribute_name: col,
            order_expression: MergeRequestDiffCommit.arel_table[col.to_sym].asc,
            nullable: :not_nullable,
            distinct: false
          )
        end

        order = Pagination::Keyset::Order.build(columns)
        scope = MergeRequestDiffCommit
          .joins(:merge_request_diff)
          .where(merge_request_diffs: { merge_request_id: merge_request_id })
          .where('commit_author_id IS NULL OR committer_id IS NULL')
          .order(order)

        Pagination::Keyset::Iterator
          .new(scope: scope, use_union_optimization: true)
          .each_batch(of: BATCH_SIZE) do |rows|
            rows
              .select([
                :merge_request_diff_id,
                :relative_order,
                :sha,
                :committer_id,
                :commit_author_id
              ])
              .each(&block)
          end
      end

      # rubocop: disable Metrics/AbcSize
      def update_commit(project, row)
        commit = find_commit(project, row.sha)
        updates = []

        unless row.commit_author_id
          author_id = find_or_create_user(commit, :author_name, :author_email)

          updates << [arel_table[:commit_author_id], author_id] if author_id
        end

        unless row.committer_id
          committer_id =
            find_or_create_user(commit, :committer_name, :committer_email)

          updates << [arel_table[:committer_id], committer_id] if committer_id
        end

        return if updates.empty?

        update = Arel::UpdateManager
          .new
          .table(MergeRequestDiffCommit.arel_table)
          .where(matches_row(row))
          .set(updates)
          .to_sql

        MergeRequestDiffCommit.connection.execute(update)
      end
      # rubocop: enable Metrics/AbcSize

      def schedule_next_job
        job = Database::BackgroundMigrationJob
          .for_migration_class('FixMergeRequestDiffCommitUsers')
          .pending
          .first

        return unless job

        BackgroundMigrationWorker.perform_in(
          2.minutes,
          'FixMergeRequestDiffCommitUsers',
          job.arguments
        )
      end

      def find_commit(project, sha)
        @commits[sha] ||= (project.commit(sha)&.to_hash || {})
      end

      def find_or_create_user(commit, name_field, email_field)
        name = commit[name_field]
        email = commit[email_field]

        return unless name && email

        @users[[name, email]] ||=
          MergeRequest::DiffCommitUser.find_or_create(name, email).id
      end

      def matches_row(row)
        primary_key = Arel::Nodes::Grouping
          .new([arel_table[:merge_request_diff_id], arel_table[:relative_order]])

        primary_val = Arel::Nodes::Grouping
          .new([row.merge_request_diff_id, row.relative_order])

        primary_key.eq(primary_val)
      end

      def arel_table
        MergeRequestDiffCommit.arel_table
      end
    end
    # rubocop: enable Metrics/ClassLength
  end
end