lib/gitlab/database/partitioning_migration_helpers/index_helpers.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

# frozen_string_literal: true

module Gitlab
  module Database
    module PartitioningMigrationHelpers
      module IndexHelpers
        include Gitlab::Database::MigrationHelpers
        include Gitlab::Database::SchemaHelpers

        ERROR_SCOPE = 'index'

        # Concurrently creates a new index on a partitioned table. In concept this works similarly to
        # `add_concurrent_index`, and won't block reads or writes on the table while the index is being built.
        #
        # A special helper is required for partitioning because Postgres does not support concurrently building indexes
        # on partitioned tables. This helper concurrently adds the same index to each partition, and creates the final
        # index on the parent table once all of the partitions are indexed. This is the recommended safe way to add
        # indexes to partitioned tables.
        #
        # Example:
        #
        #     add_concurrent_partitioned_index :users, :some_column
        #
        # See Rails' `add_index` for more info on the available arguments.
        def add_concurrent_partitioned_index(table_name, column_names, options = {})
          assert_not_in_transaction_block(scope: ERROR_SCOPE)

          raise ArgumentError, 'A name is required for indexes added to partitioned tables' unless options[:name]

          partitioned_table = find_partitioned_table(table_name)

          if index_name_exists?(table_name, options[:name])
            Gitlab::AppLogger.warn "Index not created because it already exists (this may be due to an aborted" \
              " migration or similar): table_name: #{table_name}, index_name: #{options[:name]}"

            return
          end

          partitioned_table.postgres_partitions.order(:name).each do |partition|
            partition_index_name = generated_index_name(partition.identifier, options[:name])
            partition_options = options.merge(name: partition_index_name)

            add_concurrent_index(partition.identifier, column_names, partition_options)
          end

          with_lock_retries do
            add_index(table_name, column_names, **options)
          end
        end

        # Safely removes an existing index from a partitioned table. The method name is a bit inaccurate as it does not
        # drop the index concurrently, but it's named as such to maintain consistency with other similar helpers, and
        # indicate that this should be safe to use in a production environment.
        #
        # In current versions of Postgres it's impossible to drop an index concurrently, or drop an index from an
        # individual partition that exists across the entire partitioned table. As a result this helper drops the index
        # from the parent table, which automatically cascades to all partitions. While this does require an exclusive
        # lock, dropping an index is a fast operation that won't block the table for a significant period of time.
        #
        # Example:
        #
        #     remove_concurrent_partitioned_index_by_name :users, 'index_name_goes_here'
        def remove_concurrent_partitioned_index_by_name(table_name, index_name)
          assert_not_in_transaction_block(scope: ERROR_SCOPE)

          find_partitioned_table(table_name)

          unless index_name_exists?(table_name, index_name)
            Gitlab::AppLogger.warn "Index not removed because it does not exist (this may be due to an aborted " \
              "migration or similar): table_name: #{table_name}, index_name: #{index_name}"

            return
          end

          with_lock_retries do
            remove_index(table_name, name: index_name)
          end
        end

        # Finds duplicate indexes for a given schema and table. This finds
        # indexes where the index definition is identical but the names are
        # different. Returns an array of arrays containing duplicate index name
        # pairs.
        #
        # Example:
        #
        #     find_duplicate_indexes('table_name_goes_here')
        def find_duplicate_indexes(table_name, schema_name: connection.current_schema)
          find_indexes(table_name, schema_name: schema_name)
            .group_by { |r| r['index_id'] }
            .select { |_, v| v.size > 1 }
            .map { |_, indexes| indexes.map { |index| index['index_name'] } }
        end

        private

        def find_indexes(table_name, schema_name: connection.current_schema)
          indexes = connection.select_all(<<~SQL, 'SQL', [schema_name, table_name])
            SELECT n.nspname AS schema_name,
                   c.relname AS table_name,
                   i.relname AS index_name,
                   regexp_replace(pg_get_indexdef(i.oid), 'INDEX .*? USING', '_') AS index_id
            FROM pg_index x
              JOIN pg_class c ON c.oid = x.indrelid
              JOIN pg_class i ON i.oid = x.indexrelid
              LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
            WHERE (c.relkind = ANY (ARRAY['r'::"char", 'm'::"char", 'p'::"char"]))
              AND (i.relkind = ANY (ARRAY['i'::"char", 'I'::"char"]))
              AND n.nspname = $1
              AND c.relname = $2;
          SQL

          indexes.to_a
        end

        def find_partitioned_table(table_name)
          partitioned_table = Gitlab::Database::PostgresPartitionedTable.find_by_name_in_current_schema(table_name)

          raise ArgumentError, "#{table_name} is not a partitioned table" unless partitioned_table

          partitioned_table
        end

        def generated_index_name(partition_name, index_name)
          object_name("#{partition_name}_#{index_name}", 'index')
        end
      end
    end
  end
end