Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Brandl <abrandl@gitlab.com>2018-11-23 18:31:15 +0300
committerAndreas Brandl <abrandl@gitlab.com>2018-12-03 23:26:53 +0300
commit474fd9138c16b78c77f0d64a32c9cb722caf0cca (patch)
tree86b4841e2970e29a4131933fb2b311503d3c8423 /lib/gitlab/database
parentff35cb45e986b0d155c3954a608c5f94c28f0e64 (diff)
Move strategies in their own files
This improves readability quite a bit.
Diffstat (limited to 'lib/gitlab/database')
-rw-r--r--lib/gitlab/database/count.rb155
-rw-r--r--lib/gitlab/database/count/exact_count_strategy.rb31
-rw-r--r--lib/gitlab/database/count/reltuples_count_strategy.rb79
-rw-r--r--lib/gitlab/database/count/tablesample_count_strategy.rb66
4 files changed, 176 insertions, 155 deletions
diff --git a/lib/gitlab/database/count.rb b/lib/gitlab/database/count.rb
index df08e6916dd..c996d786909 100644
--- a/lib/gitlab/database/count.rb
+++ b/lib/gitlab/database/count.rb
@@ -50,161 +50,6 @@ module Gitlab
end
end
end
-
- # This strategy performs an exact count on the model.
- #
- # This is guaranteed to be accurate, however it also scans the
- # whole table. Hence, there are no guarantees with respect
- # to runtime.
- #
- # Note that for very large tables, this may even timeout.
- class ExactCountStrategy
- attr_reader :models
- def initialize(models)
- @models = models
- end
-
- def count
- models.each_with_object({}) do |model, data|
- data[model] = model.count
- end
- end
-
- def self.enabled?
- true
- end
- end
-
- class PgClass < ActiveRecord::Base
- self.table_name = 'pg_class'
- end
-
- # This strategy counts based on PostgreSQL's statistics in pg_stat_user_tables.
- #
- # Specifically, it relies on the column reltuples in said table. An additional
- # check is performed to make sure statistics were updated within the last hour.
- #
- # Otherwise, this strategy skips tables with outdated statistics.
- #
- # There are no guarantees with respect to the accuracy of this strategy. Runtime
- # however is guaranteed to be "fast", because it only looks up statistics.
- class ReltuplesCountStrategy
- attr_reader :models
- def initialize(models)
- @models = models
- end
-
- # Returns a hash of the table names that have recently updated tuples.
- #
- # @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
- def count
- size_estimates
- rescue *CONNECTION_ERRORS
- {}
- end
-
- def self.enabled?
- Gitlab::Database.postgresql?
- end
-
- private
-
- def table_names
- models.map(&:table_name)
- end
-
- def size_estimates(check_statistics: true)
- table_to_model = models.each_with_object({}) { |model, h| h[model.table_name] = model }
-
- # Querying tuple stats only works on the primary. Due to load balancing, the
- # easiest way to do this is to start a transaction.
- ActiveRecord::Base.transaction do
- get_statistics(table_names, check_statistics: check_statistics).each_with_object({}) do |row, data|
- model = table_to_model[row.table_name]
- data[model] = row.estimate
- end
- end
- end
-
- # Generates the PostgreSQL query to return the tuples for tables
- # that have been vacuumed or analyzed in the last hour.
- #
- # @param [Array] table names
- # @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
- def get_statistics(table_names, check_statistics: true)
- time = "to_timestamp(#{1.hour.ago.to_i})"
-
- query = PgClass.joins("LEFT JOIN pg_stat_user_tables USING (relname)")
- .where(relname: table_names)
- .select('pg_class.relname AS table_name, reltuples::bigint AS estimate')
-
- if check_statistics
- query = query.where('last_vacuum > ? OR last_autovacuum > ? OR last_analyze > ? OR last_autoanalyze > ?',
- time, time, time, time)
- end
-
- query
- end
- end
-
- # A tablesample count executes in two phases:
- # * Estimate table sizes based on reltuples.
- # * Based on the estimate:
- # * If the table is considered 'small', execute an exact relation count.
- # * Otherwise, count on a sample of the table using TABLESAMPLE.
- #
- # The size of the sample is chosen in a way that we always roughly scan
- # the same amount of rows (see TABLESAMPLE_ROW_TARGET).
- #
- # There are no guarantees with respect to the accuracy of the result or runtime.
- class TablesampleCountStrategy < ReltuplesCountStrategy
- EXACT_COUNT_THRESHOLD = 100_000
- TABLESAMPLE_ROW_TARGET = 100_000
-
- def count
- estimates = size_estimates(check_statistics: false)
-
- models.each_with_object({}) do |model, count_by_model|
- count = perform_count(model, estimates[model])
- count_by_model[model] = count if count
- end
- rescue *CONNECTION_ERRORS
- {}
- end
-
- def self.enabled?
- Gitlab::Database.postgresql? && Feature.enabled?(:tablesample_counts)
- end
-
- private
-
- def perform_count(model, estimate)
- # If we estimate 0, we may not have statistics at all. Don't use them.
- return nil unless estimate && estimate > 0
-
- if estimate < EXACT_COUNT_THRESHOLD
- # The table is considered small, the assumption here is that
- # the exact count will be fast anyways.
- model.count
- else
- # The table is considered large, let's only count on a sample.
- tablesample_count(model, estimate)
- end
- end
-
- def tablesample_count(model, estimate)
- portion = (TABLESAMPLE_ROW_TARGET.to_f / estimate).round(4)
- inverse = 1 / portion
- query = <<~SQL
- SELECT (COUNT(*)*#{inverse})::integer AS count
- FROM #{model.table_name} TABLESAMPLE SYSTEM (#{portion * 100})
- SQL
-
- rows = ActiveRecord::Base.connection.select_all(query)
-
- Integer(rows.first['count'])
- end
- end
end
end
end
diff --git a/lib/gitlab/database/count/exact_count_strategy.rb b/lib/gitlab/database/count/exact_count_strategy.rb
new file mode 100644
index 00000000000..0276fe2b54f
--- /dev/null
+++ b/lib/gitlab/database/count/exact_count_strategy.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Count
+ # This strategy performs an exact count on the model.
+ #
+ # This is guaranteed to be accurate, however it also scans the
+ # whole table. Hence, there are no guarantees with respect
+ # to runtime.
+ #
+ # Note that for very large tables, this may even timeout.
+ class ExactCountStrategy
+ attr_reader :models
+ def initialize(models)
+ @models = models
+ end
+
+ def count
+ models.each_with_object({}) do |model, data|
+ data[model] = model.count
+ end
+ end
+
+ def self.enabled?
+ true
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/count/reltuples_count_strategy.rb b/lib/gitlab/database/count/reltuples_count_strategy.rb
new file mode 100644
index 00000000000..c3a674aeb7e
--- /dev/null
+++ b/lib/gitlab/database/count/reltuples_count_strategy.rb
@@ -0,0 +1,79 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Count
+ class PgClass < ActiveRecord::Base
+ self.table_name = 'pg_class'
+ end
+
+ # This strategy counts based on PostgreSQL's statistics in pg_stat_user_tables.
+ #
+ # Specifically, it relies on the column reltuples in said table. An additional
+ # check is performed to make sure statistics were updated within the last hour.
+ #
+ # Otherwise, this strategy skips tables with outdated statistics.
+ #
+ # There are no guarantees with respect to the accuracy of this strategy. Runtime
+ # however is guaranteed to be "fast", because it only looks up statistics.
+ class ReltuplesCountStrategy
+ attr_reader :models
+ def initialize(models)
+ @models = models
+ end
+
+ # Returns a hash of the table names that have recently updated tuples.
+ #
+ # @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
+ def count
+ size_estimates
+ rescue *CONNECTION_ERRORS
+ {}
+ end
+
+ def self.enabled?
+ Gitlab::Database.postgresql?
+ end
+
+ private
+
+ def table_names
+ models.map(&:table_name)
+ end
+
+ def size_estimates(check_statistics: true)
+ table_to_model = models.each_with_object({}) { |model, h| h[model.table_name] = model }
+
+ # Querying tuple stats only works on the primary. Due to load balancing, the
+ # easiest way to do this is to start a transaction.
+ ActiveRecord::Base.transaction do
+ get_statistics(table_names, check_statistics: check_statistics).each_with_object({}) do |row, data|
+ model = table_to_model[row.table_name]
+ data[model] = row.estimate
+ end
+ end
+ end
+
+ # Generates the PostgreSQL query to return the tuples for tables
+ # that have been vacuumed or analyzed in the last hour.
+ #
+ # @param [Array] table names
+ # @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
+ def get_statistics(table_names, check_statistics: true)
+ time = 1.hour.ago
+
+ query = PgClass.joins("LEFT JOIN pg_stat_user_tables USING (relname)")
+ .where(relname: table_names)
+ .select('pg_class.relname AS table_name, reltuples::bigint AS estimate')
+
+ if check_statistics
+ query = query.where('last_vacuum > ? OR last_autovacuum > ? OR last_analyze > ? OR last_autoanalyze > ?',
+ time, time, time, time)
+ end
+
+ query
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/database/count/tablesample_count_strategy.rb b/lib/gitlab/database/count/tablesample_count_strategy.rb
new file mode 100644
index 00000000000..c211bb5bb16
--- /dev/null
+++ b/lib/gitlab/database/count/tablesample_count_strategy.rb
@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Count
+ # A tablesample count executes in two phases:
+ # * Estimate table sizes based on reltuples.
+ # * Based on the estimate:
+ # * If the table is considered 'small', execute an exact relation count.
+ # * Otherwise, count on a sample of the table using TABLESAMPLE.
+ #
+ # The size of the sample is chosen in a way that we always roughly scan
+ # the same amount of rows (see TABLESAMPLE_ROW_TARGET).
+ #
+ # There are no guarantees with respect to the accuracy of the result or runtime.
+ class TablesampleCountStrategy < ReltuplesCountStrategy
+ EXACT_COUNT_THRESHOLD = 100_000
+ TABLESAMPLE_ROW_TARGET = 100_000
+
+ def count
+ estimates = size_estimates(check_statistics: false)
+
+ models.each_with_object({}) do |model, count_by_model|
+ count = perform_count(model, estimates[model])
+ count_by_model[model] = count if count
+ end
+ rescue *CONNECTION_ERRORS
+ {}
+ end
+
+ def self.enabled?
+ Gitlab::Database.postgresql? && Feature.enabled?(:tablesample_counts)
+ end
+
+ private
+
+ def perform_count(model, estimate)
+ # If we estimate 0, we may not have statistics at all. Don't use them.
+ return nil unless estimate && estimate > 0
+
+ if estimate < EXACT_COUNT_THRESHOLD
+ # The table is considered small, the assumption here is that
+ # the exact count will be fast anyways.
+ model.count
+ else
+ # The table is considered large, let's only count on a sample.
+ tablesample_count(model, estimate)
+ end
+ end
+
+ def tablesample_count(model, estimate)
+ portion = (TABLESAMPLE_ROW_TARGET.to_f / estimate).round(4)
+ inverse = 1 / portion
+ query = <<~SQL
+ SELECT (COUNT(*)*#{inverse})::integer AS count
+ FROM #{model.table_name} TABLESAMPLE SYSTEM (#{portion * 100})
+ SQL
+
+ rows = ActiveRecord::Base.connection.select_all(query)
+
+ Integer(rows.first['count'])
+ end
+ end
+ end
+ end
+end