Welcome to mirror list, hosted at ThFree Co, Russian Federation.

batch_average_counter.rb « database « gitlab « lib - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 9cb1e34ab6718da0b7785d13dda42f69e18043db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# frozen_string_literal: true

module Gitlab
  module Database
    class BatchAverageCounter
      COLUMN_FALLBACK = 0
      DEFAULT_BATCH_SIZE = 1_000
      FALLBACK = -1
      MAX_ALLOWED_LOOPS = 10_000
      OFFSET_BY_ONE = 1
      SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep

      attr_reader :relation, :column

      def initialize(relation, column)
        @relation = relation
        @column = wrap_column(relation, column)
      end

      def count(batch_size: nil)
        raise 'BatchAverageCounter can not be run inside a transaction' if transaction_open?

        batch_size = batch_size.presence || DEFAULT_BATCH_SIZE

        start  = column_start
        finish = column_finish

        total_sum = 0
        total_records = 0

        batch_start = start

        while batch_start < finish
          begin
            batch_end      = [batch_start + batch_size, finish].min
            batch_relation = build_relation_batch(batch_start, batch_end)

            # We use `sum` and `count` instead of `average` here to not run into an "average of averages"
            # problem as batches will have different sizes, so we are essentially summing up the values for
            # each batch separately, and then dividing that result on the total number of records.
            batch_sum, batch_count = batch_relation.pick(column.sum, column.count)

            total_sum     += batch_sum.to_i
            total_records += batch_count

            batch_start = batch_end
          rescue ActiveRecord::QueryCanceled => error # rubocop:disable Database/RescueQueryCanceled
            # retry with a safe batch size & warmer cache
            if batch_size >= 2 * DEFAULT_BATCH_SIZE
              batch_size /= 2
            else
              log_canceled_batch_fetch(batch_start, batch_relation.to_sql, error)

              return FALLBACK
            end
          end

          sleep(SLEEP_TIME_IN_SECONDS)
        end

        return FALLBACK if total_records == 0

        total_sum.to_f / total_records
      end

      private

      def column_start
        relation.unscope(:group, :having).minimum(column) || COLUMN_FALLBACK
      end

      def column_finish
        (relation.unscope(:group, :having).maximum(column) || COLUMN_FALLBACK) + OFFSET_BY_ONE
      end

      def build_relation_batch(start, finish)
        relation.where(column.between(start...finish))
      end

      def log_canceled_batch_fetch(batch_start, query, error)
        Gitlab::AppJsonLogger
          .error(
            event: 'batch_count',
            relation: relation.table_name,
            operation: 'average',
            start: batch_start,
            query: query,
            message: "Query has been canceled with message: #{error.message}"
          )
      end

      def transaction_open?
        relation.connection.transaction_open?
      end

      def wrap_column(relation, column)
        return column if column.is_a?(Arel::Attributes::Attribute)

        relation.arel_table[column]
      end
    end
  end
end