Welcome to mirror list, hosted at ThFree Co, Russian Federation.

20210226141517_dedup_issue_metrics.rb « post_migrate « db - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 8228d509e07186d9393fd2f6d640b29d57de6aab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# frozen_string_literal: true

class DedupIssueMetrics < ActiveRecord::Migration[6.0]
  include Gitlab::Database::MigrationHelpers

  DOWNTIME = false
  TMP_INDEX_NAME = 'tmp_unique_issue_metrics_by_issue_id'
  OLD_INDEX_NAME = 'index_issue_metrics'
  INDEX_NAME = 'index_unique_issue_metrics_issue_id'
  BATCH_SIZE = 1_000

  disable_ddl_transaction!

  class IssueMetrics < ActiveRecord::Base
    self.table_name = 'issue_metrics'

    include EachBatch
  end

  def up
    IssueMetrics.reset_column_information

    last_metrics_record_id = IssueMetrics.maximum(:id) || 0

    # This index will disallow further duplicates while we're deduplicating the data.
    add_concurrent_index(:issue_metrics, :issue_id, where: "id > #{Integer(last_metrics_record_id)}", unique: true, name: TMP_INDEX_NAME)

    IssueMetrics.each_batch(of: BATCH_SIZE) do |relation|
      duplicated_issue_ids = IssueMetrics
        .where(issue_id: relation.select(:issue_id))
        .select(:issue_id)
        .group(:issue_id)
        .having('COUNT(issue_metrics.issue_id) > 1')
        .pluck(:issue_id)

      duplicated_issue_ids.each do |issue_id|
        deduplicate_item(issue_id)
      end
    end

    add_concurrent_index(:issue_metrics, :issue_id, unique: true, name: INDEX_NAME)
    remove_concurrent_index_by_name(:issue_metrics, TMP_INDEX_NAME)
    remove_concurrent_index_by_name(:issue_metrics, OLD_INDEX_NAME)
  end

  def down
    add_concurrent_index(:issue_metrics, :issue_id, name: OLD_INDEX_NAME)
    remove_concurrent_index_by_name(:issue_metrics, TMP_INDEX_NAME)
    remove_concurrent_index_by_name(:issue_metrics, INDEX_NAME)
  end

  private

  def deduplicate_item(issue_id)
    issue_metrics_records = IssueMetrics.where(issue_id: issue_id).order(updated_at: :asc).to_a

    attributes = {}
    issue_metrics_records.each do |issue_metrics_record|
      params = issue_metrics_record.attributes.except('id')
      attributes.merge!(params.compact)
    end

    ActiveRecord::Base.transaction do
      record_to_keep = issue_metrics_records.pop
      records_to_delete = issue_metrics_records

      IssueMetrics.where(id: records_to_delete.map(&:id)).delete_all
      record_to_keep.update!(attributes)
    end
  end
end