Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2020-02-17 21:09:00 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2020-02-17 21:09:00 +0300
commiteb1f5a3e087b7d6a3e85f2724b5a475cc9d9d37d (patch)
treed572a6d931152ae0dd10427237f5811893438552 /lib/gitlab/database
parentb304a72312465ed4c0a568ee6a6ea5e97f705c9b (diff)
Add latest changes from gitlab-org/gitlab@master
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/batch_count.rb89
1 files changed, 89 insertions, 0 deletions
diff --git a/lib/gitlab/database/batch_count.rb b/lib/gitlab/database/batch_count.rb
new file mode 100644
index 00000000000..a9d4665bc5f
--- /dev/null
+++ b/lib/gitlab/database/batch_count.rb
@@ -0,0 +1,89 @@
+# frozen_string_literal: true
+
+# For large tables, PostgreSQL can take a long time to count rows due to MVCC.
+# Implements a distinct and ordinary batch counter
+# Needs indexes on the column below to calculate max, min and range queries
+# For larger tables just set use higher batch_size with index optimization
+# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
+# Examples:
+# extend ::Gitlab::Database::BatchCount
+# batch_count(User.active)
+# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
+# batch_distinct_count(::Project, :creator_id)
+module Gitlab
+ module Database
+ module BatchCount
+ def batch_count(relation, column = nil, batch_size: nil)
+ BatchCounter.new(relation, column: column).count(batch_size: batch_size)
+ end
+
+ def batch_distinct_count(relation, column = nil, batch_size: nil)
+ BatchCounter.new(relation, column: column).count(mode: :distinct, batch_size: batch_size)
+ end
+
+ class << self
+ include BatchCount
+ end
+ end
+
+ class BatchCounter
+ FALLBACK = -1
+ MIN_REQUIRED_BATCH_SIZE = 2_000
+ MAX_ALLOWED_LOOPS = 10_000
+ SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
+ # Each query should take <<500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
+ DEFAULT_DISTINCT_BATCH_SIZE = 100_000
+ DEFAULT_BATCH_SIZE = 10_000
+
+ def initialize(relation, column: nil)
+ @relation = relation
+ @column = column || relation.primary_key
+ end
+
+ def unwanted_configuration?(finish, batch_size, start)
+ batch_size <= MIN_REQUIRED_BATCH_SIZE ||
+ (finish - start) / batch_size >= MAX_ALLOWED_LOOPS ||
+ start > finish
+ end
+
+ def count(batch_size: nil, mode: :itself)
+ raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
+ raise "The mode #{mode.inspect} is not supported" unless [:itself, :distinct].include?(mode)
+
+ # non-distinct have better performance
+ batch_size ||= mode == :distinct ? DEFAULT_BATCH_SIZE : DEFAULT_DISTINCT_BATCH_SIZE
+
+ start = @relation.minimum(@column) || 0
+ finish = @relation.maximum(@column) || 0
+
+ raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
+ return FALLBACK if unwanted_configuration?(finish, batch_size, start)
+
+ counter = 0
+ batch_start = start
+
+ while batch_start <= finish
+ begin
+ counter += batch_fetch(batch_start, batch_start + batch_size, mode)
+ batch_start += batch_size
+ rescue ActiveRecord::QueryCanceled
+ # retry with a safe batch size & warmer cache
+ if batch_size >= 2 * MIN_REQUIRED_BATCH_SIZE
+ batch_size /= 2
+ else
+ return FALLBACK
+ end
+ end
+ sleep(SLEEP_TIME_IN_SECONDS)
+ end
+
+ counter
+ end
+
+ def batch_fetch(start, finish, mode)
+ # rubocop:disable GitlabSecurity/PublicSend
+ @relation.select(@column).public_send(mode).where(@column => start..(finish - 1)).count
+ end
+ end
+ end
+end