Welcome to mirror list, hosted at ThFree Co, Russian Federation.

batch_optimizer.rb « background_migration « database « gitlab « lib - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: e99b63c0e4b8c20c1030ef57ba04e87202efbb65 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# frozen_string_literal: true

module Gitlab
  module Database
    module BackgroundMigration
      # This is an optimizer for throughput of batched migration jobs
      #
      # The underyling mechanic is based on the concept of time efficiency:
      #     time efficiency = job duration / interval
      # Ideally, this is close but lower than 1 - so we're using time efficiently.
      #
      # We aim to land in the 90%-98% range, which gives the database a little breathing room
      # in between.
      #
      # The optimizer is based on calculating the exponential moving average of time efficiencies
      # for the last N jobs. If we're outside the range, we add 10% to or decrease by 20% of the batch size.
      class BatchOptimizer
        # Target time efficiency for a job
        # Time efficiency is defined as: job duration / interval
        TARGET_EFFICIENCY = (0.9..0.95)

        # Lower and upper bound for the batch size
        MIN_BATCH_SIZE = 1_000
        MAX_BATCH_SIZE = 2_000_000

        # Limit for the multiplier of the batch size
        MAX_MULTIPLIER = 1.2

        # When smoothing time efficiency, use this many jobs
        NUMBER_OF_JOBS = 20

        # Smoothing factor for exponential moving average
        EMA_ALPHA = 0.4

        attr_reader :migration, :number_of_jobs, :ema_alpha

        def initialize(migration, number_of_jobs: NUMBER_OF_JOBS, ema_alpha: EMA_ALPHA)
          @migration = migration
          @number_of_jobs = number_of_jobs
          @ema_alpha = ema_alpha
        end

        def optimize!
          return unless Feature.enabled?(:optimize_batched_migrations, type: :ops)

          multiplier = batch_size_multiplier
          return if multiplier.nil?

          max_batch = migration.max_batch_size || MAX_BATCH_SIZE
          min_batch = [max_batch, MIN_BATCH_SIZE].min

          migration.batch_size = (migration.batch_size * multiplier).to_i.clamp(min_batch, max_batch)
          migration.save!
        end

        private

        def batch_size_multiplier
          efficiency = migration.smoothed_time_efficiency(number_of_jobs: number_of_jobs, alpha: ema_alpha)

          return if efficiency.nil? || efficiency == 0

          # We hit the range - no change
          return if TARGET_EFFICIENCY.include?(efficiency)

          # Assumption: time efficiency is linear in the batch size
          [TARGET_EFFICIENCY.max / efficiency, MAX_MULTIPLIER].min
        end
      end
    end
  end
end