Welcome to mirror list, hosted at ThFree Co, Russian Federation.

20200716234259_remove_duplicate_labels_from_group.rb « post_migrate « db - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: f19a209092bff000c9cf6e4d72952e92f9b622b3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# frozen_string_literal: true

class RemoveDuplicateLabelsFromGroup < ActiveRecord::Migration[6.0]
  DOWNTIME = false

  CREATE = 1
  RENAME = 2

  disable_ddl_transaction!

  class BackupLabel < ApplicationRecord
    include EachBatch

    self.table_name = 'backup_labels'
  end

  class Label < ApplicationRecord
    self.table_name = 'labels'
  end

  class Group < ApplicationRecord
    include EachBatch

    self.table_name = 'namespaces'
  end

  BATCH_SIZE = 10_000

  def up
    # Split to smaller chunks
    # Loop rather than background job, every 10,000
    # there are ~1,800,000 groups in total (excluding personal namespaces, which can't have labels)
    Group.where(type: 'Group').each_batch(of: BATCH_SIZE) do |batch|
      range = batch.pluck('MIN(id)', 'MAX(id)').first

      transaction do
        remove_full_duplicates(*range)
      end

      transaction do
        rename_partial_duplicates(*range)
      end
    end
  end

  DOWN_BATCH_SIZE = 1000

  def down
    BackupLabel.where('project_id IS NULL AND group_id IS NOT NULL').each_batch(of: DOWN_BATCH_SIZE) do |batch|
      range = batch.pluck('MIN(id)', 'MAX(id)').first

      restore_renamed_labels(*range)
      restore_deleted_labels(*range)
    end
  end

  def remove_full_duplicates(start_id, stop_id)
    # Fields that are considered duplicate:
    # group_id title template description type color

    duplicate_labels = ApplicationRecord.connection.execute(<<-SQL.squish)
WITH data AS (
  SELECT labels.*,
  row_number() OVER (PARTITION BY labels.group_id, labels.title, labels.template, labels.description, labels.type, labels.color ORDER BY labels.id) AS row_number,
  #{CREATE} AS restore_action
  FROM labels
  WHERE labels.group_id BETWEEN #{start_id} AND #{stop_id}
  AND NOT EXISTS (SELECT * FROM board_labels WHERE board_labels.label_id = labels.id)
  AND NOT EXISTS (SELECT * FROM label_links WHERE label_links.label_id = labels.id)
  AND NOT EXISTS (SELECT * FROM label_priorities WHERE label_priorities.label_id = labels.id)
  AND NOT EXISTS (SELECT * FROM lists WHERE lists.label_id = labels.id)
  AND NOT EXISTS (SELECT * FROM resource_label_events WHERE resource_label_events.label_id = labels.id)
) SELECT * FROM data WHERE row_number > 1;
    SQL

    if duplicate_labels.any?
      # create backup records
      BackupLabel.insert_all!(duplicate_labels.map { |label| label.except("row_number") })

      Label.unscoped.where(id: duplicate_labels.pluck("id")).delete_all
    end
  end

  def rename_partial_duplicates(start_id, stop_id)
    # We need to ensure that the new title (with `_duplicate#{ID}`) doesn't exceed the limit.
    # Truncate the original title (if needed) to 245 characters minus the length of the ID
    # then add `_duplicate#{ID}`

    soft_duplicates = ApplicationRecord.connection.execute(<<-SQL.squish)
WITH data AS (
  SELECT
     *,
     substring(title from 1 for 245 - length(id::text)) || '_duplicate' || id::text as new_title,
     #{RENAME} AS restore_action,
     row_number() OVER (PARTITION BY group_id, title ORDER BY id) AS row_number
  FROM labels
  WHERE group_id BETWEEN #{start_id} AND #{stop_id}
) SELECT * FROM data WHERE row_number > 1;
    SQL

    if soft_duplicates.any?
      # create backup records
      BackupLabel.insert_all!(soft_duplicates.map { |label| label.except("row_number") })

      ApplicationRecord.connection.execute(<<-SQL.squish)
UPDATE labels SET title = substring(title from 1 for 245 - length(id::text)) || '_duplicate' || id::text
WHERE labels.id IN (#{soft_duplicates.map { |dup| dup["id"] }.join(", ")});
      SQL
    end
  end

  def restore_renamed_labels(start_id, stop_id)
    # the backup label IDs are not incremental, they are copied directly from the Labels table
    ApplicationRecord.connection.execute(<<-SQL.squish)
WITH backups AS (
  SELECT id, title
  FROM backup_labels
  WHERE id BETWEEN #{start_id} AND #{stop_id}
  AND restore_action = #{RENAME}
) UPDATE labels SET title = backups.title
FROM backups
WHERE labels.id = backups.id;
    SQL
  end

  def restore_deleted_labels(start_id, stop_id)
    ActiveRecord::Base.connection.execute(<<-SQL.squish)
INSERT INTO labels
SELECT id, title, color, group_id, created_at, updated_at, template, description, description_html, type, cached_markdown_version FROM backup_labels
  WHERE backup_labels.id BETWEEN #{start_id} AND #{stop_id}
  AND backup_labels.project_id IS NULL AND backup_labels.group_id IS NOT NULL
  AND backup_labels.restore_action = #{CREATE}
    SQL
  end
end