diff options
Diffstat (limited to 'lib/gitlab/database/batch_average_counter.rb')
-rw-r--r-- | lib/gitlab/database/batch_average_counter.rb | 103 |
1 files changed, 103 insertions, 0 deletions
diff --git a/lib/gitlab/database/batch_average_counter.rb b/lib/gitlab/database/batch_average_counter.rb new file mode 100644 index 00000000000..9cb1e34ab67 --- /dev/null +++ b/lib/gitlab/database/batch_average_counter.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +module Gitlab + module Database + class BatchAverageCounter + COLUMN_FALLBACK = 0 + DEFAULT_BATCH_SIZE = 1_000 + FALLBACK = -1 + MAX_ALLOWED_LOOPS = 10_000 + OFFSET_BY_ONE = 1 + SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep + + attr_reader :relation, :column + + def initialize(relation, column) + @relation = relation + @column = wrap_column(relation, column) + end + + def count(batch_size: nil) + raise 'BatchAverageCounter can not be run inside a transaction' if transaction_open? + + batch_size = batch_size.presence || DEFAULT_BATCH_SIZE + + start = column_start + finish = column_finish + + total_sum = 0 + total_records = 0 + + batch_start = start + + while batch_start < finish + begin + batch_end = [batch_start + batch_size, finish].min + batch_relation = build_relation_batch(batch_start, batch_end) + + # We use `sum` and `count` instead of `average` here to not run into an "average of averages" + # problem as batches will have different sizes, so we are essentially summing up the values for + # each batch separately, and then dividing that result on the total number of records. + batch_sum, batch_count = batch_relation.pick(column.sum, column.count) + + total_sum += batch_sum.to_i + total_records += batch_count + + batch_start = batch_end + rescue ActiveRecord::QueryCanceled => error # rubocop:disable Database/RescueQueryCanceled + # retry with a safe batch size & warmer cache + if batch_size >= 2 * DEFAULT_BATCH_SIZE + batch_size /= 2 + else + log_canceled_batch_fetch(batch_start, batch_relation.to_sql, error) + + return FALLBACK + end + end + + sleep(SLEEP_TIME_IN_SECONDS) + end + + return FALLBACK if total_records == 0 + + total_sum.to_f / total_records + end + + private + + def column_start + relation.unscope(:group, :having).minimum(column) || COLUMN_FALLBACK + end + + def column_finish + (relation.unscope(:group, :having).maximum(column) || COLUMN_FALLBACK) + OFFSET_BY_ONE + end + + def build_relation_batch(start, finish) + relation.where(column.between(start...finish)) + end + + def log_canceled_batch_fetch(batch_start, query, error) + Gitlab::AppJsonLogger + .error( + event: 'batch_count', + relation: relation.table_name, + operation: 'average', + start: batch_start, + query: query, + message: "Query has been canceled with message: #{error.message}" + ) + end + + def transaction_open? + relation.connection.transaction_open? + end + + def wrap_column(relation, column) + return column if column.is_a?(Arel::Attributes::Attribute) + + relation.arel_table[column] + end + end + end +end |