Welcome to mirror list, hosted at ThFree Co, Russian Federation.

index_selection.rb « reindexing « database « gitlab « lib - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: ebe245bfadb5619ecff7419bcc67cd0e844a8449 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# frozen_string_literal: true

module Gitlab
  module Database
    module Reindexing
      class IndexSelection
        include Enumerable

        # Only reindex indexes with a relative bloat level (bloat estimate / size) higher than this
        MINIMUM_RELATIVE_BLOAT = 0.2

        # Only consider indexes beyond this size (before reindexing)
        INDEX_SIZE_MINIMUM = 1.gigabyte

        VERY_LARGE_TABLES = %i[
          ci_builds
        ].freeze

        delegate :each, to: :indexes

        def initialize(candidates)
          @candidates = candidates
        end

        private

        attr_reader :candidates

        def indexes
          # This is an explicit N+1 query:
          # Bloat estimates are generally available through a view
          # for all indexes. However, estimating bloat for all
          # indexes at once is an expensive operation. Therefore,
          # we force a N+1 pattern here and estimate bloat on a per-index
          # basis.

          @indexes ||= relations_that_need_cleaning_before_deadline
            .sort_by(&:relative_bloat_level) # forced N+1
            .reverse
            .select { |candidate| candidate.relative_bloat_level >= MINIMUM_RELATIVE_BLOAT }
        end

        def relations_that_need_cleaning_before_deadline
          relation = candidates.not_recently_reindexed.where('ondisk_size_bytes >= ?', INDEX_SIZE_MINIMUM)
          relation = relation.where.not(tablename: VERY_LARGE_TABLES) if too_late_for_very_large_table?
          relation
        end

        # The reindexing process takes place during the weekends and starting a
        # reindexing action on a large table late on Sunday could span during
        # Monday. We don't want this because it prevents vacuum from running.
        def too_late_for_very_large_table?
          !Date.today.saturday?
        end
      end
    end
  end
end