diff options
author | Andreas Brandl <abrandl@gitlab.com> | 2019-07-26 15:51:18 +0300 |
---|---|---|
committer | Andreas Brandl <abrandl@gitlab.com> | 2019-07-26 17:22:52 +0300 |
commit | 4f01cd96e063ab648960ea2cbe2fdfff59ed390a (patch) | |
tree | 18d8664f4fe899c268e30acf05218a44f4faa449 | |
parent | 6d830eaea38ba3b917ecad40e630afe2c0ec36c5 (diff) |
Basic implementation for batched countingab-batch-counts-for-admin-ui
TODO:
* Move services to workers
* Display count in admin UI
* Cronjob setup
* Put behind FF
-rw-r--r-- | app/models/background_counter.rb | 11 | ||||
-rw-r--r-- | app/workers/all_queues.yml | 1 | ||||
-rw-r--r-- | app/workers/background_counter_worker.rb | 12 | ||||
-rw-r--r-- | app/workers/batch_count.rb | 3 | ||||
-rw-r--r-- | app/workers/batch_count/projects_batch_count_worker.rb | 40 | ||||
-rw-r--r-- | config/initializers/1_settings.rb | 3 | ||||
-rw-r--r-- | db/migrate/20190726114448_create_background_counter_table.rb | 18 | ||||
-rw-r--r-- | db/schema.rb | 9 | ||||
-rw-r--r-- | lib/gitlab/database/count.rb | 19 | ||||
-rw-r--r-- | spec/factories/background_counter.rb | 5 | ||||
-rw-r--r-- | spec/lib/gitlab/database/count_spec.rb | 20 | ||||
-rw-r--r-- | spec/models/background_counter_spec.rb | 14 | ||||
-rw-r--r-- | spec/workers/background_counter_worker_spec.rb | 21 | ||||
-rw-r--r-- | spec/workers/batch_count/projects_batch_count_service_spec.rb | 34 |
14 files changed, 209 insertions, 1 deletions
diff --git a/app/models/background_counter.rb b/app/models/background_counter.rb new file mode 100644 index 00000000000..0b300d6ee5b --- /dev/null +++ b/app/models/background_counter.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class BackgroundCounter < ApplicationRecord + before_save :set_updated_at + + private + + def set_updated_at + self.updated_at = Time.now + end +end diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 991a177018e..dd8d339e753 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -33,6 +33,7 @@ - cronjob:prune_web_hook_logs - cronjob:schedule_migrate_external_diffs - cronjob:namespaces_prune_aggregation_schedules +- cronjob:batch_count_projects_batch_count - gcp_cluster:cluster_install_app - gcp_cluster:cluster_patch_app diff --git a/app/workers/background_counter_worker.rb b/app/workers/background_counter_worker.rb new file mode 100644 index 00000000000..f26bbc4c488 --- /dev/null +++ b/app/workers/background_counter_worker.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +class BackgroundCounterWorker + include ApplicationWorker + + def perform(class_name) + # todo: add lease + class_name.constantize.new.recalculate! + rescue NameError + # ignore + end +end diff --git a/app/workers/batch_count.rb b/app/workers/batch_count.rb new file mode 100644 index 00000000000..56b9b74e7d3 --- /dev/null +++ b/app/workers/batch_count.rb @@ -0,0 +1,3 @@ +module BatchCount + +end diff --git a/app/workers/batch_count/projects_batch_count_worker.rb b/app/workers/batch_count/projects_batch_count_worker.rb new file mode 100644 index 00000000000..d788db2f2fd --- /dev/null +++ b/app/workers/batch_count/projects_batch_count_worker.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module BatchCount + class ProjectsBatchCountWorker + include ApplicationWorker + include CronjobQueue + include ExclusiveLeaseGuard + + IDENTIFIER = 'projects' + LEASE_TIMEOUT = 5.minutes + + attr_reader :counter + + def initialize + @counter = find_counter(IDENTIFIER) + end + + def perform + try_obtain_lease do + update_with_batched_count(Project.all) + end + end + + def lease_timeout + LEASE_TIMEOUT + end + + private + def find_counter(identifier) + BackgroundCounter.find_or_create_by(identifier: identifier) + rescue ActiveRecord::RecordNotUnique + retry + end + + def update_with_batched_count(relation) + counter.counter_value = Gitlab::Database::Count.batched_count(relation) + counter.save! + end + end +end diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 32fec7c3d22..870595e0c6e 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -447,6 +447,9 @@ Settings.cron_jobs['schedule_migrate_external_diffs_worker']['job_class'] = 'Sch Settings.cron_jobs['namespaces_prune_aggregation_schedules_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['namespaces_prune_aggregation_schedules_worker']['cron'] ||= '5 1 * * *' Settings.cron_jobs['namespaces_prune_aggregation_schedules_worker']['job_class'] = 'Namespaces::PruneAggregationSchedulesWorker' +Settings.cron_jobs['batch_count_projects_batch_count_worker'] ||= Settingslogic.new({}) +Settings.cron_jobs['batch_count_projects_batch_count_worker']['cron'] ||= '*/10 * * * *' +Settings.cron_jobs['batch_count_projects_batch_count_worker']['job_class'] ||= 'BatchCount::ProjectsBatchCountWorker' Gitlab.ee do Settings.cron_jobs['clear_shared_runners_minutes_worker'] ||= Settingslogic.new({}) diff --git a/db/migrate/20190726114448_create_background_counter_table.rb b/db/migrate/20190726114448_create_background_counter_table.rb new file mode 100644 index 00000000000..09a4bf11df8 --- /dev/null +++ b/db/migrate/20190726114448_create_background_counter_table.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +class CreateBackgroundCounterTable < ActiveRecord::Migration[5.2] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def change + create_table :background_counters do |t| + t.string :identifier, null: false, limit: 100 + + t.datetime_with_timezone :updated_at, null: false + t.bigint :counter_value, null: false, default: 0 + + t.index :identifier, unique: true + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 67479937b47..e2e75db02ee 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_07_25_012225) do +ActiveRecord::Schema.define(version: 2019_07_26_114448) do # These are extensions that must be enabled in order to support this database enable_extension "pg_trgm" @@ -355,6 +355,13 @@ ActiveRecord::Schema.define(version: 2019_07_25_012225) do t.index ["user_id", "name"], name: "index_award_emoji_on_user_id_and_name" end + create_table "background_counters", force: :cascade do |t| + t.string "identifier", limit: 100, null: false + t.datetime_with_timezone "updated_at", null: false + t.bigint "counter_value", default: 0, null: false + t.index ["identifier"], name: "index_background_counters_on_identifier", unique: true + end + create_table "badges", id: :serial, force: :cascade do |t| t.string "link_url", null: false t.string "image_url", null: false diff --git a/lib/gitlab/database/count.rb b/lib/gitlab/database/count.rb index eac61254bdf..8af2806ed66 100644 --- a/lib/gitlab/database/count.rb +++ b/lib/gitlab/database/count.rb @@ -48,6 +48,25 @@ module Gitlab end end end + + # Performs an exact count on the given relation. + # + # It executes the counting in batches, which is useful on larger relations. + # For batching to work, the a primary key needs to be specified. For models, + # this gets automatically derived. + # + # Note the method must not be called inside a transaction. + # + # TODO: Optimize for memory with `select MAX(id), count(*) FROM (...)` + def self.batched_count(relation, batch_size: 10000, primary_key: nil) + raise "Batched counting must not run inside transaction" if ::Gitlab::Database.inside_transaction? + + primary_key ||= relation.primary_key if relation.respond_to?(:primary_key) + + relation.select(primary_key).find_in_batches(batch_size: batch_size).reduce(0) do |counter, batch| + counter += batch.size + end + end end end end diff --git a/spec/factories/background_counter.rb b/spec/factories/background_counter.rb new file mode 100644 index 00000000000..3571f290e8f --- /dev/null +++ b/spec/factories/background_counter.rb @@ -0,0 +1,5 @@ +FactoryBot.define do + factory :background_counter, class: BackgroundCounter do + identifier 'my_counter' + end +end diff --git a/spec/lib/gitlab/database/count_spec.rb b/spec/lib/gitlab/database/count_spec.rb index 71d6633f62f..5d2a5faf3fa 100644 --- a/spec/lib/gitlab/database/count_spec.rb +++ b/spec/lib/gitlab/database/count_spec.rb @@ -45,4 +45,24 @@ describe Gitlab::Database::Count do end end end + + describe '.batched_count' do + subject { described_class.batched_count(Project.all) } + + it 'counts the given relation' do + expect(subject).to eq(Project.all.size) + end + + it 'raises an error when run inside a transaction' do + expect { ActiveRecord::Base.transaction { subject } }.to raise_error(/must not run inside transaction/) + end + + it 'makes batch_size configurable' do + expect(described_class.batched_count(Project.all, batch_size: 1)).to eq(Project.all.size) + end + + it 'allows to specify a primary key' do + expect(described_class.batched_count(Project.all, primary_key: :id)).to eq(Project.all.size) + end + end end diff --git a/spec/models/background_counter_spec.rb b/spec/models/background_counter_spec.rb new file mode 100644 index 00000000000..19baf95837a --- /dev/null +++ b/spec/models/background_counter_spec.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe BackgroundCounter do + describe 'before_save hooks' do + let!(:counter) { create(:background_counter) } + + it 'sets the updated_at timestamp when changed' do + counter.counter_value = 1 + expect { counter.save }.to change { counter.updated_at } + end + end +end diff --git a/spec/workers/background_counter_worker_spec.rb b/spec/workers/background_counter_worker_spec.rb new file mode 100644 index 00000000000..3859a669b0d --- /dev/null +++ b/spec/workers/background_counter_worker_spec.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe BackgroundCounterWorker do + let(:worker) { described_class.new } + let(:counter_service ) { BatchCount::ProjectsBatchCountService } + let(:service) { double('batch count service', recalculate!: nil) } + + describe '#perform' do + subject { worker.perform(counter_service.to_s) } + + it 'triggers a recalculate on the batch count service' do + allow(counter_service).to receive(:new).and_return(service) + + expect(service).to receive(:recalculate!) + + subject + end + end +end diff --git a/spec/workers/batch_count/projects_batch_count_service_spec.rb b/spec/workers/batch_count/projects_batch_count_service_spec.rb new file mode 100644 index 00000000000..c44796e8622 --- /dev/null +++ b/spec/workers/batch_count/projects_batch_count_service_spec.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe BatchCount::ProjectsBatchCountWorker do + include ExclusiveLeaseHelpers + + subject { described_class.new.perform } + + it 'creates the BackgroundCounter if it doesnt exist yet' do + subject + + expect(BackgroundCounter.find_by(identifier: described_class::IDENTIFIER)).to be_present + end + + context 'with an existing counter' do + let!(:counter) { create(:background_counter, identifier: described_class::IDENTIFIER, counter_value: 0) } + let(:counter_value) { 42 } + + it 'performs a batched count and updates the counter' do + allow(Gitlab::Database::Count).to receive(:batched_count).with(Project.all).and_return(counter_value) + + expect { subject }.to change { counter.reload.counter_value }.from(0).to(counter_value) + end + + it 'is guarded with an exclusive lease' do + stub_exclusive_lease_taken(described_class.new.lease_key) + + expect(Gitlab::Database::Count).not_to receive(:batched_count) + + subject + end + end +end |