From a5f6182753502a5e74b06836c9bb70d287bf6fe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Trzci=C5=84ski?= Date: Thu, 22 Aug 2019 13:08:46 +0200 Subject: Optimise build queue service This makes BuildQueueService to force refresh runners that are considered to have recent queue. Such runners are the ones that connected within online interval + time to expire runner cache. --- app/models/ci/runner.rb | 28 ++++-- app/services/ci/update_build_queue_service.rb | 4 + .../unreleased/optimise-build-queue-service.yml | 5 + spec/models/ci/runner_spec.rb | 7 ++ .../services/ci/update_build_queue_service_spec.rb | 110 +++++++++++++-------- 5 files changed, 103 insertions(+), 51 deletions(-) create mode 100644 changelogs/unreleased/optimise-build-queue-service.yml diff --git a/app/models/ci/runner.rb b/app/models/ci/runner.rb index 43ff874ac23..1c1c7a5ae7a 100644 --- a/app/models/ci/runner.rb +++ b/app/models/ci/runner.rb @@ -23,13 +23,17 @@ module Ci project_type: 3 } - RUNNER_QUEUE_EXPIRY_TIME = 60.minutes ONLINE_CONTACT_TIMEOUT = 1.hour - UPDATE_DB_RUNNER_INFO_EVERY = 40.minutes + RUNNER_QUEUE_EXPIRY_TIME = 1.hour + + # This needs to be less than `ONLINE_CONTACT_TIMEOUT` + UPDATE_CONTACT_COLUMN_EVERY = (40.minutes..55.minutes).freeze + AVAILABLE_TYPES_LEGACY = %w[specific shared].freeze AVAILABLE_TYPES = runner_types.keys.freeze AVAILABLE_STATUSES = %w[active paused online offline].freeze AVAILABLE_SCOPES = (AVAILABLE_TYPES_LEGACY + AVAILABLE_TYPES + AVAILABLE_STATUSES).freeze + FORM_EDITABLE = %i[description tag_list active run_untagged locked access_level maximum_timeout_human_readable].freeze ignore_column :is_shared @@ -46,7 +50,7 @@ module Ci scope :active, -> { where(active: true) } scope :paused, -> { where(active: false) } - scope :online, -> { where('contacted_at > ?', contact_time_deadline) } + scope :online, -> { where('contacted_at > ?', online_contact_time_deadline) } # The following query using negation is cheaper than using `contacted_at <= ?` # because there are less runners online than have been created. The # resulting query is quickly finding online ones and then uses the regular @@ -56,6 +60,8 @@ module Ci scope :offline, -> { where.not(id: online) } scope :ordered, -> { order(id: :desc) } + scope :with_recent_runner_queue, -> { where('contacted_at > ?', recent_queue_deadline) } + # BACKWARD COMPATIBILITY: There are needed to maintain compatibility with `AVAILABLE_SCOPES` used by `lib/api/runners.rb` scope :deprecated_shared, -> { instance_type } scope :deprecated_specific, -> { project_type.or(group_type) } @@ -137,10 +143,18 @@ module Ci fuzzy_search(query, [:token, :description]) end - def self.contact_time_deadline + def self.online_contact_time_deadline ONLINE_CONTACT_TIMEOUT.ago end + def self.recent_queue_deadline + # we add queue expiry + online + # - contacted_at can be updated at any time within this interval + # we have always accurate `contacted_at` but it is stored in Redis + # and not persisted in database + (ONLINE_CONTACT_TIMEOUT + RUNNER_QUEUE_EXPIRY_TIME).ago + end + def self.order_by(order) if order == 'contacted_asc' order_contacted_at_asc @@ -174,7 +188,7 @@ module Ci end def online? - contacted_at && contacted_at > self.class.contact_time_deadline + contacted_at && contacted_at > self.class.online_contact_time_deadline end def status @@ -275,9 +289,7 @@ module Ci def persist_cached_data? # Use a random threshold to prevent beating DB updates. - # It generates a distribution between [40m, 80m]. - - contacted_at_max_age = UPDATE_DB_RUNNER_INFO_EVERY + Random.rand(UPDATE_DB_RUNNER_INFO_EVERY) + contacted_at_max_age = Random.rand(UPDATE_CONTACT_COLUMN_EVERY) real_contacted_at = read_attribute(:contacted_at) real_contacted_at.nil? || diff --git a/app/services/ci/update_build_queue_service.rb b/app/services/ci/update_build_queue_service.rb index 9c589d910eb..31c7178c9e7 100644 --- a/app/services/ci/update_build_queue_service.rb +++ b/app/services/ci/update_build_queue_service.rb @@ -9,6 +9,10 @@ module Ci private def tick_for(build, runners) + if Feature.enabled?(:ci_update_queues_for_online_runners, build.project, default_enabled: true) + runners = runners.with_recent_runner_queue + end + runners.each do |runner| runner.pick_build!(build) end diff --git a/changelogs/unreleased/optimise-build-queue-service.yml b/changelogs/unreleased/optimise-build-queue-service.yml new file mode 100644 index 00000000000..8ffaa3bfbdc --- /dev/null +++ b/changelogs/unreleased/optimise-build-queue-service.yml @@ -0,0 +1,5 @@ +--- +title: Optimise UpdateBuildQueueService +merge_request: 32095 +author: +type: performance diff --git a/spec/models/ci/runner_spec.rb b/spec/models/ci/runner_spec.rb index 78b151631c1..70ff3cf5dc4 100644 --- a/spec/models/ci/runner_spec.rb +++ b/spec/models/ci/runner_spec.rb @@ -80,6 +80,13 @@ describe Ci::Runner do end end + describe 'constraints' do + it '.UPDATE_CONTACT_COLUMN_EVERY' do + expect(described_class::UPDATE_CONTACT_COLUMN_EVERY.max) + .to be <= described_class::ONLINE_CONTACT_TIMEOUT + end + end + describe '#access_level' do context 'when creating new runner and access_level is nil' do let(:runner) do diff --git a/spec/services/ci/update_build_queue_service_spec.rb b/spec/services/ci/update_build_queue_service_spec.rb index 4b869385128..522dd1ba1c2 100644 --- a/spec/services/ci/update_build_queue_service_spec.rb +++ b/spec/services/ci/update_build_queue_service_spec.rb @@ -7,84 +7,108 @@ describe Ci::UpdateBuildQueueService do let(:build) { create(:ci_build, pipeline: pipeline) } let(:pipeline) { create(:ci_pipeline, project: project) } - context 'when updating specific runners' do - let(:runner) { create(:ci_runner, :project, projects: [project]) } - - context 'when there is a runner that can pick build' do - it 'ticks runner queue value' do - expect { subject.execute(build) }.to change { runner.ensure_runner_queue_value } - end + shared_examples 'refreshes runner' do + it 'ticks runner queue value' do + expect { subject.execute(build) }.to change { runner.ensure_runner_queue_value } end + end - context 'when there is no runner that can pick build' do - let(:another_project) { create(:project) } - let(:runner) { create(:ci_runner, :project, projects: [another_project]) } - - it 'does not tick runner queue value' do - expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value } - end + shared_examples 'does not refresh runner' do + it 'ticks runner queue value' do + expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value } end end - context 'when updating shared runners' do - let(:runner) { create(:ci_runner, :instance) } - - context 'when there is no runner that can pick build' do - it 'ticks runner queue value' do - expect { subject.execute(build) }.to change { runner.ensure_runner_queue_value } + shared_examples 'matching build' do + context 'when there is a online runner that can pick build' do + before do + runner.update!(contacted_at: 30.minutes.ago) end + + it_behaves_like 'refreshes runner' end + end + shared_examples 'mismatching tags' do context 'when there is no runner that can pick build due to tag mismatch' do before do build.tag_list = [:docker] end - it 'does not tick runner queue value' do - expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value } - end + it_behaves_like 'does not refresh runner' end + end - context 'when there is no runner that can pick build due to being disabled on project' do + shared_examples 'recent runner queue' do + context 'when there is runner with expired cache' do before do - build.project.shared_runners_enabled = false + runner.update!(contacted_at: Ci::Runner.recent_queue_deadline) end - it 'does not tick runner queue value' do - expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value } + context 'when ci_update_queues_for_online_runners is enabled' do + before do + stub_feature_flags(ci_update_queues_for_online_runners: true) + end + + it_behaves_like 'does not refresh runner' + end + + context 'when ci_update_queues_for_online_runners is disabled' do + before do + stub_feature_flags(ci_update_queues_for_online_runners: false) + end + + it_behaves_like 'refreshes runner' end end end - context 'when updating group runners' do - let(:group) { create(:group) } - let(:project) { create(:project, group: group) } - let(:runner) { create(:ci_runner, :group, groups: [group]) } + context 'when updating specific runners' do + let(:runner) { create(:ci_runner, :project, projects: [project]) } - context 'when there is a runner that can pick build' do - it 'ticks runner queue value' do - expect { subject.execute(build) }.to change { runner.ensure_runner_queue_value } - end + it_behaves_like 'matching build' + it_behaves_like 'mismatching tags' + it_behaves_like 'recent runner queue' + + context 'when the runner is assigned to another project' do + let(:another_project) { create(:project) } + let(:runner) { create(:ci_runner, :project, projects: [another_project]) } + + it_behaves_like 'does not refresh runner' end + end - context 'when there is no runner that can pick build due to tag mismatch' do + context 'when updating shared runners' do + let(:runner) { create(:ci_runner, :instance) } + + it_behaves_like 'matching build' + it_behaves_like 'mismatching tags' + it_behaves_like 'recent runner queue' + + context 'when there is no runner that can pick build due to being disabled on project' do before do - build.tag_list = [:docker] + build.project.shared_runners_enabled = false end - it 'does not tick runner queue value' do - expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value } - end + it_behaves_like 'does not refresh runner' end + end + + context 'when updating group runners' do + let(:group) { create(:group) } + let(:project) { create(:project, group: group) } + let(:runner) { create(:ci_runner, :group, groups: [group]) } + + it_behaves_like 'matching build' + it_behaves_like 'mismatching tags' + it_behaves_like 'recent runner queue' context 'when there is no runner that can pick build due to being disabled on project' do before do build.project.group_runners_enabled = false end - it 'does not tick runner queue value' do - expect { subject.execute(build) }.not_to change { runner.ensure_runner_queue_value } - end + it_behaves_like 'does not refresh runner' end end end -- cgit v1.2.3