diff options
author | Douwe Maan <douwe@gitlab.com> | 2017-07-08 03:05:50 +0300 |
---|---|---|
committer | Douwe Maan <douwe@gitlab.com> | 2017-07-08 03:05:50 +0300 |
commit | e916e7274863492d6dec55d888c83d715e1fd60b (patch) | |
tree | 30310e7e84c06c6ce368b35bf436607182154b29 | |
parent | 294d1804d3c9f56f6931a996b90c57ba63d61c7e (diff) | |
parent | c3aa2dee088c1dc4859d0bff93391ff206f35e7d (diff) |
Merge branch 'increase-parallelism-of-geo-repository-sync-worker' into 'master'
Increase parallelism of geo repository sync worker
Closes #2742
See merge request !2351
-rw-r--r-- | app/models/geo/project_registry.rb | 16 | ||||
-rw-r--r-- | app/services/geo/base_sync_service.rb | 81 | ||||
-rw-r--r-- | app/services/geo/repository_sync_service.rb | 130 | ||||
-rw-r--r-- | app/services/geo/wiki_sync_service.rb | 29 | ||||
-rw-r--r-- | app/workers/geo/project_sync_worker.rb | 35 | ||||
-rw-r--r-- | app/workers/geo_repository_sync_worker.rb | 137 | ||||
-rw-r--r-- | changelogs/unreleased-ee/increase-parallelism-of-geo-repository-sync-worker.yml | 4 | ||||
-rw-r--r-- | spec/models/geo/project_registry_spec.rb | 82 | ||||
-rw-r--r-- | spec/services/geo/repository_sync_service_spec.rb | 297 | ||||
-rw-r--r-- | spec/services/geo/wiki_sync_service_spec.rb | 102 | ||||
-rw-r--r-- | spec/workers/geo/project_sync_worker_spec.rb | 125 | ||||
-rw-r--r-- | spec/workers/geo_repository_sync_worker_spec.rb | 75 |
12 files changed, 649 insertions, 464 deletions
diff --git a/app/models/geo/project_registry.rb b/app/models/geo/project_registry.rb index 247bceb712c..84f996f193c 100644 --- a/app/models/geo/project_registry.rb +++ b/app/models/geo/project_registry.rb @@ -10,4 +10,20 @@ class Geo::ProjectRegistry < Geo::BaseRegistry where.not(last_repository_synced_at: nil, last_repository_successful_sync_at: nil) .where(resync_repository: false, resync_wiki: false) end + + def resync_repository? + resync_repository || last_repository_successful_sync_at.nil? + end + + def resync_wiki? + resync_wiki || last_wiki_successful_sync_at.nil? + end + + def repository_synced_since?(timestamp) + last_repository_synced_at && last_repository_synced_at > timestamp + end + + def wiki_synced_since?(timestamp) + last_wiki_synced_at && last_wiki_synced_at > timestamp + end end diff --git a/app/services/geo/base_sync_service.rb b/app/services/geo/base_sync_service.rb new file mode 100644 index 00000000000..4dc7ebafa8a --- /dev/null +++ b/app/services/geo/base_sync_service.rb @@ -0,0 +1,81 @@ +module Geo + class BaseSyncService + class << self + attr_accessor :type + end + + attr_reader :project + + LEASE_TIMEOUT = 8.hours.freeze + LEASE_KEY_PREFIX = 'geo_sync_service'.freeze + + def initialize(project) + @project = project + end + + def execute + try_obtain_lease do + log("Started #{type} sync") + sync_repository + log("Finished #{type} sync") + end + end + + def lease_key + @lease_key ||= "#{LEASE_KEY_PREFIX}:#{type}:#{project.id}" + end + + private + + def registry + @registry ||= Geo::ProjectRegistry.find_or_initialize_by(project_id: project.id) + end + + def try_obtain_lease + log("Trying to obtain lease to sync #{type}") + repository_lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT).try_obtain + + unless repository_lease + log("Could not obtain lease to sync #{type}") + return + end + + yield + + # We should release the lease for a repository, only if we have obtained + # it. If something went wrong when syncing the repository, we should wait + # for the lease timeout to try again. + log("Releasing leases to sync #{type}") + Gitlab::ExclusiveLease.cancel(lease_key, repository_lease) + end + + def update_registry(type, started_at: nil, finished_at: nil) + return unless started_at || finished_at + + log("Updating #{type} sync information") + + attrs = {} + + attrs["last_#{type}_synced_at"] = started_at if started_at + + if finished_at + attrs["last_#{type}_successful_sync_at"] = finished_at + attrs["resync_#{type}"] = false + end + + registry.update!(attrs) + end + + def type + self.class.type + end + + def primary_ssh_path_prefix + @primary_ssh_path_prefix ||= Gitlab::Geo.primary_node.clone_url_prefix + end + + def log(message) + Rails.logger.info("#{self.class.name}: #{message} for project #{project.path_with_namespace} (#{project.id})") + end + end +end diff --git a/app/services/geo/repository_sync_service.rb b/app/services/geo/repository_sync_service.rb index 40cdaeb7cea..9eb0a070b9c 100644 --- a/app/services/geo/repository_sync_service.rb +++ b/app/services/geo/repository_sync_service.rb @@ -1,74 +1,23 @@ module Geo - class RepositorySyncService - attr_reader :project_id - - LEASE_TIMEOUT = 8.hours.freeze - LEASE_KEY_PREFIX = 'repository_sync_service'.freeze - - def initialize(project_id) - @project_id = project_id - end - - def execute - try_obtain_lease do - log('Started repository sync') - sync_project_repository - sync_wiki_repository - log('Finished repository sync') - end - rescue ActiveRecord::RecordNotFound - Rails.logger.error("#{self.class.name}: Couldn't find project with ID=#{project_id}, skipping syncing") - end + class RepositorySyncService < BaseSyncService + self.type = :repository private - def project - @project ||= Project.find(project_id) - end - - def registry - @registry ||= Geo::ProjectRegistry.find_or_initialize_by(project_id: project_id) - end - - def sync_project_repository - return unless sync_repository? - - started_at, finished_at = fetch_project_repository - update_registry(:repository, started_at, finished_at) + def sync_repository + fetch_project_repository expire_repository_caches end - def sync_repository? - registry.resync_repository? || - registry.last_repository_successful_sync_at.nil? || - registry.last_repository_synced_at.nil? - end - - def sync_wiki_repository - return unless sync_wiki? - - started_at, finished_at = fetch_wiki_repository - update_registry(:wiki, started_at, finished_at) - end - - def sync_wiki? - registry.resync_wiki? || - registry.last_wiki_successful_sync_at.nil? || - registry.last_wiki_synced_at.nil? - end - def fetch_project_repository - return unless sync_repository? - log('Fetching project repository') - started_at = DateTime.now - finished_at = nil + update_registry(:repository, started_at: DateTime.now) begin project.ensure_repository project.repository.fetch_geo_mirror(ssh_url_to_repo) - finished_at = DateTime.now + update_registry(:repository, finished_at: DateTime.now) rescue Gitlab::Shell::Error => e Rails.logger.error("#{self.class.name}: Error syncing repository for project #{project.path_with_namespace}: #{e}") rescue Gitlab::Git::Repository::NoRepository => e @@ -76,27 +25,6 @@ module Geo log('Expiring caches') project.repository.after_create end - - [started_at, finished_at] - end - - def fetch_wiki_repository - return unless sync_wiki? - - log('Fetching wiki repository') - started_at = DateTime.now - finished_at = nil - - begin - project.wiki.ensure_repository - project.wiki.repository.fetch_geo_mirror(ssh_url_to_wiki) - - finished_at = DateTime.now - rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error, ProjectWiki::CouldNotCreateWikiError => e - Rails.logger.error("#{self.class.name}: Error syncing wiki repository for project #{project.path_with_namespace}: #{e}") - end - - [started_at, finished_at] end def expire_repository_caches @@ -104,54 +32,8 @@ module Geo project.repository.after_sync end - def try_obtain_lease - log('Trying to obtain lease to sync repository') - repository_lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT).try_obtain - - unless repository_lease - log('Could not obtain lease to sync repository') - return - end - - yield - - # We should release the lease for a repository, only if we have obtained - # it. If something went wrong when syncing the repository, we should wait - # for the lease timeout to try again. - log('Releasing leases to sync repository') - Gitlab::ExclusiveLease.cancel(lease_key, repository_lease) - end - - def update_registry(type, started_at, finished_at) - log("Updating #{type} sync information") - registry.public_send("last_#{type}_synced_at=", started_at) - - if finished_at - registry.public_send("last_#{type}_successful_sync_at=", finished_at) - registry.public_send("resync_#{type}=", false) - end - - registry.save - end - - def lease_key - @lease_key ||= "#{LEASE_KEY_PREFIX}:#{project.id}" - end - - def primary_ssh_path_prefix - Gitlab::Geo.primary_node.clone_url_prefix - end - def ssh_url_to_repo "#{primary_ssh_path_prefix}#{project.path_with_namespace}.git" end - - def ssh_url_to_wiki - "#{primary_ssh_path_prefix}#{project.path_with_namespace}.wiki.git" - end - - def log(message) - Rails.logger.info("#{self.class.name}: #{message} for project #{project.path_with_namespace} (#{project.id})") - end end end diff --git a/app/services/geo/wiki_sync_service.rb b/app/services/geo/wiki_sync_service.rb new file mode 100644 index 00000000000..679c31f58d2 --- /dev/null +++ b/app/services/geo/wiki_sync_service.rb @@ -0,0 +1,29 @@ +module Geo + class WikiSyncService < BaseSyncService + self.type = :wiki + + private + + def sync_repository + fetch_wiki_repository + end + + def fetch_wiki_repository + log('Fetching wiki repository') + update_registry(:wiki, started_at: DateTime.now) + + begin + project.wiki.ensure_repository + project.wiki.repository.fetch_geo_mirror(ssh_url_to_wiki) + + update_registry(:wiki, finished_at: DateTime.now) + rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error, ProjectWiki::CouldNotCreateWikiError => e + Rails.logger.error("#{self.class.name}: Error syncing wiki repository for project #{project.path_with_namespace}: #{e}") + end + end + + def ssh_url_to_wiki + "#{primary_ssh_path_prefix}#{project.path_with_namespace}.wiki.git" + end + end +end diff --git a/app/workers/geo/project_sync_worker.rb b/app/workers/geo/project_sync_worker.rb new file mode 100644 index 00000000000..9530b013980 --- /dev/null +++ b/app/workers/geo/project_sync_worker.rb @@ -0,0 +1,35 @@ +module Geo + class ProjectSyncWorker + include Sidekiq::Worker + + sidekiq_options queue: :geo, retry: 3, dead: false + + sidekiq_retry_in { |count| 30 * count } + + sidekiq_retries_exhausted do |msg, _| + Sidekiq.logger.warn "Failed #{msg['class']} with #{msg['args']}: #{msg['error_message']}" + end + + def perform(project_id, scheduled_time) + project = Project.find(project_id) + registry = Geo::ProjectRegistry.find_or_initialize_by(project_id: project_id) + + Geo::RepositorySyncService.new(project).execute if sync_repository?(registry, scheduled_time) + Geo::WikiSyncService.new(project).execute if sync_wiki?(registry, scheduled_time) + rescue ActiveRecord::RecordNotFound + logger.error("Couldn't find project with ID=#{project_id}, skipping syncing") + end + + private + + def sync_repository?(registry, scheduled_time) + !registry.repository_synced_since?(scheduled_time) && + registry.resync_repository? + end + + def sync_wiki?(registry, scheduled_time) + !registry.wiki_synced_since?(scheduled_time) && + registry.resync_wiki? + end + end +end diff --git a/app/workers/geo_repository_sync_worker.rb b/app/workers/geo_repository_sync_worker.rb index e5603918751..302eef7b00b 100644 --- a/app/workers/geo_repository_sync_worker.rb +++ b/app/workers/geo_repository_sync_worker.rb @@ -2,45 +2,74 @@ class GeoRepositorySyncWorker include Sidekiq::Worker include CronjobQueue - RUN_TIME = 5.minutes.to_i - BATCH_SIZE = 100 - LAST_SYNC_INTERVAL = 24.hours + LEASE_KEY = 'geo_repository_sync_worker'.freeze + LEASE_TIMEOUT = 10.minutes + BATCH_SIZE = 1000 + BACKOFF_DELAY = 5.minutes + MAX_CAPACITY = 25 + RUN_TIME = 60.minutes.to_i + + def initialize + @pending_projects = [] + @scheduled_jobs = [] + end def perform return unless Gitlab::Geo.secondary_role_enabled? return unless Gitlab::Geo.primary_node.present? - start_time = Time.now - project_ids_not_synced = find_project_ids_not_synced - project_ids_updated_recently = find_project_ids_updated_recently - project_ids = interleave(project_ids_not_synced, project_ids_updated_recently) + logger.info "Started Geo repository sync scheduler" - logger.info "Started Geo repository syncing for #{project_ids.length} project(s)" + @start_time = Time.now - project_ids.each do |project_id| - begin - break if over_time?(start_time) + # Prevent multiple Sidekiq workers from attempting to schedule projects synchronization + try_obtain_lease do + loop do break unless node_enabled? - # We try to obtain a lease here for the entire sync process because we - # want to sync the repositories continuously at a controlled rate - # instead of hammering the primary node. Initially, we are syncing - # one repo at a time. If we don't obtain the lease here, every 5 - # minutes all of 100 projects will be synced. - try_obtain_lease do |lease| - Geo::RepositorySyncService.new(project_id).execute - end - rescue ActiveRecord::RecordNotFound - logger.error("Couldn't find project with ID=#{project_id}, skipping syncing") - next + update_jobs_in_progress + load_pending_projects if reload_queue? + + # If we are still under the limit after refreshing our DB, we can end + # after scheduling the remaining transfers. + last_batch = reload_queue? + + break if over_time? + break unless projects_remain? + + schedule_jobs + + break if last_batch + break unless renew_lease! + + sleep(1) end - end - logger.info "Finished Geo repository syncing for #{project_ids.length} project(s)" + logger.info "Finished Geo repository sync scheduler" + end end private + def reload_queue? + @pending_projects.size < MAX_CAPACITY + end + + def projects_remain? + @pending_projects.size > 0 + end + + def over_time? + Time.now - @start_time >= RUN_TIME + end + + def load_pending_projects + project_ids_not_synced = find_project_ids_not_synced + project_ids_updated_recently = find_project_ids_updated_recently + + @pending_projects = interleave(project_ids_not_synced, project_ids_updated_recently) + end + def find_project_ids_not_synced Project.where.not(id: Geo::ProjectRegistry.synced.pluck(:project_id)) .order(last_repository_updated_at: :desc) @@ -63,37 +92,65 @@ class GeoRepositorySyncWorker end.flatten(1).uniq.compact.take(BATCH_SIZE) end - def over_time?(start_time) - Time.now - start_time >= RUN_TIME - end + def schedule_jobs + num_to_schedule = [MAX_CAPACITY - scheduled_job_ids.size, @pending_projects.size].min + return unless projects_remain? - def node_enabled? - # Only check every minute to avoid polling the DB excessively - unless @last_enabled_check.present? && @last_enabled_check > 1.minute.ago - @last_enabled_check = Time.now - @current_node_enabled = nil + num_to_schedule.times do + project_id = @pending_projects.shift + job_id = Geo::ProjectSyncWorker.perform_in(BACKOFF_DELAY, project_id, Time.now) + + @scheduled_jobs << { id: project_id, job_id: job_id } if job_id end + end - @current_node_enabled ||= Gitlab::Geo.current_node_enabled? + def scheduled_job_ids + @scheduled_jobs.map { |data| data[:job_id] } + end + + def update_jobs_in_progress + status = Gitlab::SidekiqStatus.job_status(scheduled_job_ids) + + # SidekiqStatus returns an array of booleans: true if the job has completed, false otherwise. + # For each entry, first use `zip` to make { job_id: 123, id: 10 } -> [ { job_id: 123, id: 10 }, bool ] + # Next, filter out the jobs that have completed. + @scheduled_jobs = @scheduled_jobs.zip(status).map { |(job, completed)| job if completed }.compact end def try_obtain_lease - lease = Gitlab::ExclusiveLease.new(lease_key, timeout: lease_timeout).try_obtain + lease = exclusive_lease.try_obtain - return unless lease + unless lease + logger.info "Cannot obtain an exclusive lease. There must be another worker already in execution." + return + end begin yield lease ensure - Gitlab::ExclusiveLease.cancel(lease_key, lease) + release_lease(lease) end end - def lease_key - Geo::RepositorySyncService::LEASE_KEY_PREFIX + def exclusive_lease + @lease ||= Gitlab::ExclusiveLease.new(LEASE_KEY, timeout: LEASE_TIMEOUT) end - def lease_timeout - Geo::RepositorySyncService::LEASE_TIMEOUT + def renew_lease! + exclusive_lease.renew + end + + def release_lease(uuid) + Gitlab::ExclusiveLease.cancel(LEASE_KEY, uuid) + end + + def node_enabled? + # Only check every minute to avoid polling the DB excessively + unless @last_enabled_check.present? && @last_enabled_check > 1.minute.ago + @last_enabled_check = Time.now + @current_node_enabled = nil + end + + @current_node_enabled ||= Gitlab::Geo.current_node_enabled? end end diff --git a/changelogs/unreleased-ee/increase-parallelism-of-geo-repository-sync-worker.yml b/changelogs/unreleased-ee/increase-parallelism-of-geo-repository-sync-worker.yml new file mode 100644 index 00000000000..34ef7e8ab22 --- /dev/null +++ b/changelogs/unreleased-ee/increase-parallelism-of-geo-repository-sync-worker.yml @@ -0,0 +1,4 @@ +--- +title: Geo - Makes the projects synchronization faster on secondaries nodes +merge_request: +author: diff --git a/spec/models/geo/project_registry_spec.rb b/spec/models/geo/project_registry_spec.rb index 8b7d4ce6148..c6033141c53 100644 --- a/spec/models/geo/project_registry_spec.rb +++ b/spec/models/geo/project_registry_spec.rb @@ -31,4 +31,86 @@ describe Geo::ProjectRegistry, models: true do expect(described_class.synced).to match_array([registry]) end end + + describe '#resync_repository?' do + it 'returns true when resync_repository is true' do + subject.resync_repository = true + + expect(subject.resync_repository).to be true + end + + it 'returns true when last_repository_successful_sync_at is nil' do + subject.last_repository_successful_sync_at = nil + + expect(subject.resync_repository).to be true + end + + it 'returns false when resync_repository is false and last_repository_successful_sync_at is present' do + subject.resync_repository = false + subject.last_repository_successful_sync_at = Time.now + + expect(subject.resync_repository).to be false + end + end + + describe '#resync_wiki?' do + it 'returns true when resync_wiki is true' do + subject.resync_wiki = true + + expect(subject.resync_wiki).to be true + end + + it 'returns true when last_wiki_successful_sync_at is nil' do + subject.last_wiki_successful_sync_at = nil + + expect(subject.resync_wiki).to be true + end + + it 'returns false when resync_wiki is false and last_wiki_successful_sync_at is present' do + subject.resync_wiki = false + subject.last_wiki_successful_sync_at = Time.now + + expect(subject.resync_wiki).to be false + end + end + + describe '#repository_synced_since?' do + it 'returns false when last_repository_synced_at is nil' do + subject.last_repository_synced_at = nil + + expect(subject.repository_synced_since?(Time.now)).to be_nil + end + + it 'returns false when last_repository_synced_at before timestamp' do + subject.last_repository_synced_at = Time.now - 2.hours + + expect(subject.repository_synced_since?(Time.now)).to be false + end + + it 'returns true when last_repository_synced_at after timestamp' do + subject.last_repository_synced_at = Time.now + 2.hours + + expect(subject.repository_synced_since?(Time.now)).to be true + end + end + + describe '#wiki_synced_since?' do + it 'returns false when last_wiki_synced_at is nil' do + subject.last_wiki_synced_at = nil + + expect(subject.wiki_synced_since?(Time.now)).to be_nil + end + + it 'returns false when last_wiki_synced_at before timestamp' do + subject.last_wiki_synced_at = Time.now - 2.hours + + expect(subject.wiki_synced_since?(Time.now)).to be false + end + + it 'returns true when last_wiki_synced_at after timestamp' do + subject.last_wiki_synced_at = Time.now + 2.hours + + expect(subject.wiki_synced_since?(Time.now)).to be true + end + end end diff --git a/spec/services/geo/repository_sync_service_spec.rb b/spec/services/geo/repository_sync_service_spec.rb index 0d40cdc5d80..f1c9e7b8469 100644 --- a/spec/services/geo/repository_sync_service_spec.rb +++ b/spec/services/geo/repository_sync_service_spec.rb @@ -1,14 +1,14 @@ require 'spec_helper' -describe Geo::RepositorySyncService, services: true do +RSpec.describe Geo::RepositorySyncService, services: true do let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') } let(:lease) { double(try_obtain: true) } - subject { described_class.new(project.id) } + subject { described_class.new(project) } before do allow(Gitlab::ExclusiveLease).to receive(:new) - .with(subject.__send__(:lease_key), anything) + .with(subject.lease_key, anything) .and_return(lease) allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) @@ -16,285 +16,96 @@ describe Geo::RepositorySyncService, services: true do end describe '#execute' do - context 'when project has never been synced' do - let(:project) { create(:project_empty_repo) } + let(:project) { create(:project_empty_repo) } + let(:repository) { project.repository } + let(:url_to_repo) { "#{primary.clone_url_prefix}#{project.path_with_namespace}.git" } - it 'fetches project repositories' do - fetch_count = 0 + it 'fetches project repository' do + expect(repository).to receive(:fetch_geo_mirror).with(url_to_repo).once - allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do - fetch_count += 1 - end - - subject.execute - - expect(fetch_count).to eq 2 - end - - it 'expires repository caches' do - expect_any_instance_of(Repository).to receive(:expire_all_method_caches).once - expect_any_instance_of(Repository).to receive(:expire_branch_cache).once - expect_any_instance_of(Repository).to receive(:expire_content_cache).once - - subject.execute - end - - it 'releases lease' do - expect(Gitlab::ExclusiveLease).to receive(:cancel).once.with( - subject.__send__(:lease_key), anything).and_call_original - - subject.execute - end - - it 'does not fetch project repositories if cannot obtain a lease' do - allow(lease).to receive(:try_obtain) { false } - - expect_any_instance_of(Repository).not_to receive(:fetch_geo_mirror) - - subject.execute - end - - context 'tracking database' do - it 'creates a new registry' do - expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1) - end - - it 'sets last_repository_successful_sync_at when repository sync succeed' do - subject.execute - - registry = Geo::ProjectRegistry.find_by(project_id: project.id) - - expect(registry.last_repository_successful_sync_at).not_to be_nil - end - - it 'resets last_repository_successful_sync_at when repository sync fail' do - allow_any_instance_of(Repository).to receive(:fetch_geo_mirror).with(/#{project.path_with_namespace}\.git/) { raise Gitlab::Shell::Error } - - subject.execute - - registry = Geo::ProjectRegistry.find_by(project_id: project.id) - - expect(registry.last_repository_successful_sync_at).to be_nil - end - - it 'sets last_wiki_successful_sync_at when wiki sync succeed' do - subject.execute - - registry = Geo::ProjectRegistry.find_by(project_id: project.id) - - expect(registry.last_wiki_successful_sync_at).not_to be_nil - end - - it 'resets last_wiki_successful_sync_at when wiki sync fail' do - allow_any_instance_of(Repository).to receive(:fetch_geo_mirror).with(/#{project.path_with_namespace}\.wiki.git/) { raise Gitlab::Shell::Error } - - subject.execute - - registry = Geo::ProjectRegistry.find_by(project_id: project.id) - - expect(registry.last_wiki_successful_sync_at).to be_nil - end - end + subject.execute end - context 'when project has been synced' do - let(:project) { create(:project) } - let(:last_repository_synced_at) { 5.days.ago } - let(:last_wiki_synced_at) { 4.days.ago } - - let!(:registry) do - create(:geo_project_registry, :synced, - project: project, - last_repository_synced_at: last_repository_synced_at, - last_repository_successful_sync_at: last_repository_synced_at, - last_wiki_synced_at: last_wiki_synced_at, - last_wiki_successful_sync_at: last_wiki_synced_at) - end - - it 'does not fetch project repositories' do - expect_any_instance_of(Repository).not_to receive(:fetch_geo_mirror) - - subject.execute - end - - context 'tracking database' do - it 'does not create a new registry' do - expect { subject.execute }.not_to change(Geo::ProjectRegistry, :count) - end - - it 'does not update last_repository_successful_sync_at' do - subject.execute - - registry.reload - - expect(registry.last_repository_synced_at).to be_within(1.minute).of(last_repository_synced_at) - expect(registry.last_repository_successful_sync_at).to be_within(1.minute).of(last_repository_synced_at) - end - - it 'does not update last_wiki_successful_sync_at' do - subject.execute - - registry.reload + it 'expires repository caches' do + expect_any_instance_of(Repository).to receive(:expire_all_method_caches).once + expect_any_instance_of(Repository).to receive(:expire_branch_cache).once + expect_any_instance_of(Repository).to receive(:expire_content_cache).once - expect(registry.last_wiki_synced_at).to be_within(1.minute).of(last_wiki_synced_at) - expect(registry.last_wiki_successful_sync_at).to be_within(1.minute).of(last_wiki_synced_at) - end - end + subject.execute end - context 'when last attempt to sync project repositories failed' do - let(:project) { create(:project) } - let!(:registry) { create(:geo_project_registry, :sync_failed, project: project) } - - it 'fetches project repositories' do - fetch_count = 0 + it 'releases lease' do + expect(Gitlab::ExclusiveLease).to receive(:cancel).once.with( + subject.__send__(:lease_key), anything).and_call_original - allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do - fetch_count += 1 - end + subject.execute + end - subject.execute + it 'does not fetch project repository if cannot obtain a lease' do + allow(lease).to receive(:try_obtain) { false } - expect(fetch_count).to eq 2 - end + expect(repository).not_to receive(:fetch_geo_mirror) - context 'tracking database' do - it 'sets last_repository_successful_sync_at' do - subject.execute + subject.execute + end - registry.reload + it 'rescues when Gitlab::Shell::Error is raised' do + allow(repository).to receive(:fetch_geo_mirror).with(url_to_repo) { raise Gitlab::Shell::Error } - expect(registry.last_repository_successful_sync_at).not_to be_nil - end + expect { subject.execute }.not_to raise_error + end - it 'sets last_wiki_successful_sync_at' do - subject.execute + it 'rescues exception and fires after_create hook when Gitlab::Git::Repository::NoRepository is raised' do + allow(repository).to receive(:fetch_geo_mirror).with(url_to_repo) { raise Gitlab::Git::Repository::NoRepository } - registry.reload + expect(repository).to receive(:after_create) - expect(registry.last_wiki_successful_sync_at).not_to be_nil - end - end + expect { subject.execute }.not_to raise_error end - context 'when project repository is dirty' do - let(:project) { create(:project) } - let(:last_wiki_synced_at) { 4.days.ago } - - let!(:registry) do - create(:geo_project_registry, :synced, :repository_dirty, - project: project, - last_wiki_synced_at: last_wiki_synced_at, - last_wiki_successful_sync_at: last_wiki_synced_at) + context 'tracking database' do + it 'creates a new registry if does not exists' do + expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1) end - it 'fetches project repository' do - expect_any_instance_of(Repository).to receive(:fetch_geo_mirror).once + it 'does not create a new registry if one exist' do + create(:geo_project_registry, project: project) - subject.execute + expect { subject.execute }.not_to change(Geo::ProjectRegistry, :count) end - context 'exceptions' do - it 'rescues when Gitlab::Shell::Error is raised' do - allow_any_instance_of(Repository).to receive(:fetch_geo_mirror).with(/#{project.path_with_namespace}\.git/) { raise Gitlab::Shell::Error } - - expect { subject.execute }.not_to raise_error - end - - it 'rescues exception and fires after_create hook when Gitlab::Git::Repository::NoRepository is raised' do - allow_any_instance_of(Repository).to receive(:fetch_geo_mirror).with(/#{project.path_with_namespace}\.git/) { raise Gitlab::Git::Repository::NoRepository } - - expect_any_instance_of(Repository).to receive(:after_create) + context 'when repository sync succeed' do + let(:registry) { Geo::ProjectRegistry.find_by(project_id: project.id) } - expect { subject.execute }.not_to raise_error - end - end - - context 'tracking database' do - it 'updates last_repository_successful_sync_at' do + before do subject.execute - - registry.reload - - expect(registry.last_repository_synced_at).to be_within(1.minute).of(DateTime.now) - expect(registry.last_repository_successful_sync_at).to be_within(1.minute).of(DateTime.now) end - it 'does not update last_wiki_successful_sync_at' do - subject.execute - - registry.reload - - expect(registry.last_wiki_synced_at).to be_within(1.minute).of(last_wiki_synced_at) - expect(registry.last_wiki_successful_sync_at).to be_within(1.minute).of(last_wiki_synced_at) + it 'sets last_repository_synced_at' do + expect(registry.last_repository_synced_at).not_to be_nil end - it 'resets resync_repository' do - subject.execute - - registry.reload - - expect(registry.resync_repository).to be false + it 'sets last_repository_successful_sync_at' do + expect(registry.last_repository_successful_sync_at).not_to be_nil end end - end - - context 'when project wiki is dirty' do - let(:project) { create(:project) } - let(:last_repository_synced_at) { 5.days.ago } - - let!(:registry) do - create(:geo_project_registry, :synced, :wiki_dirty, - project: project, - last_repository_synced_at: last_repository_synced_at, - last_repository_successful_sync_at: last_repository_synced_at) - end - it 'fetches wiki repository' do - expect_any_instance_of(Repository).to receive(:fetch_geo_mirror).once + context 'when repository sync fail' do + let(:registry) { Geo::ProjectRegistry.find_by(project_id: project.id) } + let(:url_to_repo) { "#{primary.clone_url_prefix}#{project.path_with_namespace}.git" } - subject.execute - end - - context 'exceptions' do - it 'rescues exception when Gitlab::Shell::Error is raised' do - allow_any_instance_of(Repository).to receive(:fetch_geo_mirror).with(/#{project.path_with_namespace}\.wiki\.git/) { raise Gitlab::Shell::Error } - - expect { subject.execute }.not_to raise_error - end + before do + allow(repository).to receive(:fetch_geo_mirror).with(url_to_repo) { raise Gitlab::Shell::Error } - it 'rescues exception when Gitlab::Git::Repository::NoRepository is raised' do - allow_any_instance_of(Repository).to receive(:fetch_geo_mirror).with(/#{project.path_with_namespace}\.wiki\.git/) { raise Gitlab::Git::Repository::NoRepository } - - expect { subject.execute }.not_to raise_error - end - end - - context 'tracking database' do - it 'updates last_wiki_successful_sync_at' do subject.execute - - registry.reload - - expect(registry.last_wiki_synced_at).to be_within(1.minute).of(DateTime.now) - expect(registry.last_wiki_successful_sync_at).to be_within(1.minute).of(DateTime.now) end - it 'does not update last_repository_successful_sync_at' do - subject.execute - - registry.reload - - expect(registry.last_repository_synced_at).to be_within(1.minute).of(last_repository_synced_at) - expect(registry.last_repository_successful_sync_at).to be_within(1.minute).of(last_repository_synced_at) + it 'sets last_repository_synced_at' do + expect(registry.last_repository_synced_at).not_to be_nil end - it 'resets resync_wiki' do - subject.execute - - registry.reload - - expect(registry.resync_wiki).to be false + it 'resets last_repository_successful_sync_at' do + expect(registry.last_repository_successful_sync_at).to be_nil end end end diff --git a/spec/services/geo/wiki_sync_service_spec.rb b/spec/services/geo/wiki_sync_service_spec.rb new file mode 100644 index 00000000000..f01cbb2393d --- /dev/null +++ b/spec/services/geo/wiki_sync_service_spec.rb @@ -0,0 +1,102 @@ +require 'spec_helper' + +RSpec.describe Geo::WikiSyncService, services: true do + let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') } + let(:lease) { double(try_obtain: true) } + + subject { described_class.new(project) } + + before do + allow(Gitlab::ExclusiveLease).to receive(:new) + .with(subject.lease_key, anything) + .and_return(lease) + + allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) + .and_return(true) + end + + describe '#execute' do + let(:project) { create(:project_empty_repo) } + let(:repository) { project.wiki.repository } + let(:url_to_repo) { "#{primary.clone_url_prefix}#{project.path_with_namespace}.wiki.git" } + + it 'fetches wiki repository' do + expect(repository).to receive(:fetch_geo_mirror).with(url_to_repo).once + + subject.execute + end + + it 'releases lease' do + expect(Gitlab::ExclusiveLease).to receive(:cancel).once.with( + subject.__send__(:lease_key), anything).and_call_original + + subject.execute + end + + it 'does not fetch wiki repository if cannot obtain a lease' do + allow(lease).to receive(:try_obtain) { false } + + expect(repository).not_to receive(:fetch_geo_mirror) + + subject.execute + end + + it 'rescues exception when Gitlab::Shell::Error is raised' do + allow(repository).to receive(:fetch_geo_mirror).with(url_to_repo) { raise Gitlab::Shell::Error } + + expect { subject.execute }.not_to raise_error + end + + it 'rescues exception when Gitlab::Git::Repository::NoRepository is raised' do + allow(repository).to receive(:fetch_geo_mirror).with(url_to_repo) { raise Gitlab::Git::Repository::NoRepository } + + expect { subject.execute }.not_to raise_error + end + + context 'tracking database' do + it 'creates a new registry if does not exists' do + expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1) + end + + it 'does not create a new registry if one exists' do + create(:geo_project_registry, project: project) + + expect { subject.execute }.not_to change(Geo::ProjectRegistry, :count) + end + + context 'when repository sync succeed' do + let(:registry) { Geo::ProjectRegistry.find_by(project_id: project.id) } + + before do + subject.execute + end + + it 'sets last_wiki_synced_at' do + expect(registry.last_wiki_synced_at).not_to be_nil + end + + it 'sets last_wiki_successful_sync_at' do + expect(registry.last_wiki_successful_sync_at).not_to be_nil + end + end + + context 'when wiki sync fail' do + let(:registry) { Geo::ProjectRegistry.find_by(project_id: project.id) } + + before do + allow(repository).to receive(:fetch_geo_mirror).with(url_to_repo) { raise Gitlab::Shell::Error } + + subject.execute + end + + it 'sets last_wiki_synced_at' do + expect(registry.last_wiki_synced_at).not_to be_nil + end + + it 'resets last_wiki_successful_sync_at' do + expect(registry.last_wiki_successful_sync_at).to be_nil + end + end + end + end +end diff --git a/spec/workers/geo/project_sync_worker_spec.rb b/spec/workers/geo/project_sync_worker_spec.rb new file mode 100644 index 00000000000..a2cedff807b --- /dev/null +++ b/spec/workers/geo/project_sync_worker_spec.rb @@ -0,0 +1,125 @@ +require 'rails_helper' + +RSpec.describe Geo::ProjectSyncWorker do + describe '#perform' do + let(:project) { create(:empty_project) } + let(:repository_sync_service) { spy } + let(:wiki_sync_service) { spy } + + before do + allow(Geo::RepositorySyncService).to receive(:new) + .with(instance_of(Project)).once.and_return(repository_sync_service) + + allow(Geo::WikiSyncService).to receive(:new) + .with(instance_of(Project)).once.and_return(wiki_sync_service) + end + + context 'when project could not be found' do + it 'does not raise an error' do + expect { subject.perform(999, Time.now) }.not_to raise_error + end + end + + context 'when project repositories has never been synced' do + it 'performs Geo::RepositorySyncService for the given project' do + subject.perform(project.id, Time.now) + + expect(repository_sync_service).to have_received(:execute).once + end + + it 'performs Geo::WikiSyncService for the given project' do + subject.perform(project.id, Time.now) + + expect(wiki_sync_service).to have_received(:execute).once + end + end + + context 'when project repositories has been synced' do + let!(:registry) { create(:geo_project_registry, :synced, project: project) } + + it 'does not perform Geo::RepositorySyncService for the given project' do + subject.perform(project.id, Time.now) + + expect(repository_sync_service).not_to have_received(:execute) + end + + it 'does not perform Geo::WikiSyncService for the given project' do + subject.perform(project.id, Time.now) + + expect(wiki_sync_service).not_to have_received(:execute) + end + end + + context 'when last attempt to sync project repositories failed' do + let!(:registry) { create(:geo_project_registry, :sync_failed, project: project) } + + it 'performs Geo::RepositorySyncService for the given project' do + subject.perform(project.id, Time.now) + + expect(repository_sync_service).to have_received(:execute).once + end + + it 'performs Geo::WikiSyncService for the given project' do + subject.perform(project.id, Time.now) + + expect(wiki_sync_service).to have_received(:execute).once + end + end + + context 'when project repository is dirty' do + let!(:registry) do + create(:geo_project_registry, :synced, :repository_dirty, project: project) + end + + it 'performs Geo::RepositorySyncService for the given project' do + subject.perform(project.id, Time.now) + + expect(repository_sync_service).to have_received(:execute).once + end + + it 'does not perform Geo::WikiSyncService for the given project' do + subject.perform(project.id, Time.now) + + expect(wiki_sync_service).not_to have_received(:execute) + end + end + + context 'when wiki is dirty' do + let!(:registry) do + create(:geo_project_registry, :synced, :wiki_dirty, project: project) + end + + it 'does not perform Geo::RepositorySyncService for the given project' do + subject.perform(project.id, Time.now) + + expect(repository_sync_service).not_to have_received(:execute) + end + + it 'performs Geo::WikiSyncService for the given project' do + subject.perform(project.id, Time.now) + + expect(wiki_sync_service).to have_received(:execute) + end + end + + context 'when project repository was synced after the time the job was scheduled in' do + it 'does not perform Geo::RepositorySyncService for the given project' do + create(:geo_project_registry, :synced, :repository_dirty, project: project, last_repository_synced_at: Time.now) + + subject.perform(project.id, Time.now - 5.minutes) + + expect(repository_sync_service).not_to have_received(:execute) + end + end + + context 'when wiki repository was synced after the time the job was scheduled in' do + it 'does not perform Geo::RepositorySyncService for the given project' do + create(:geo_project_registry, :synced, :wiki_dirty, project: project, last_wiki_synced_at: Time.now) + + subject.perform(project.id, Time.now - 5.minutes) + + expect(wiki_sync_service).not_to have_received(:execute) + end + end + end +end diff --git a/spec/workers/geo_repository_sync_worker_spec.rb b/spec/workers/geo_repository_sync_worker_spec.rb index 48b7f7886dd..161db95db2b 100644 --- a/spec/workers/geo_repository_sync_worker_spec.rb +++ b/spec/workers/geo_repository_sync_worker_spec.rb @@ -13,90 +13,51 @@ describe GeoRepositorySyncWorker do allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain) { true } end - it 'performs Geo::RepositorySyncService for each project' do - expect(Geo::RepositorySyncService).to receive(:new).twice.and_return(spy) + it 'performs Geo::ProjectSyncWorker for each project' do + expect(Geo::ProjectSyncWorker).to receive(:perform_in).twice.and_return(spy) subject.perform end - it 'performs Geo::RepositorySyncService for projects where last attempt to sync failed' do - Geo::ProjectRegistry.create( - project: project_1, - last_repository_synced_at: DateTime.now, - last_repository_successful_sync_at: nil - ) + it 'performs Geo::ProjectSyncWorker for projects where last attempt to sync failed' do + create(:geo_project_registry, :sync_failed, project: project_1) + create(:geo_project_registry, :synced, project: project_2) - Geo::ProjectRegistry.create( - project: project_2, - last_repository_synced_at: DateTime.now, - last_repository_successful_sync_at: DateTime.now, - resync_repository: false, - resync_wiki: false - ) - - expect(Geo::RepositorySyncService).to receive(:new).once.and_return(spy) + expect(Geo::ProjectSyncWorker).to receive(:perform_in).once.and_return(spy) subject.perform end - it 'performs Geo::RepositorySyncService for synced projects updated recently' do - Geo::ProjectRegistry.create( - project: project_1, - last_repository_synced_at: 2.days.ago, - last_repository_successful_sync_at: 2.days.ago, - resync_repository: true, - resync_wiki: false - ) - - Geo::ProjectRegistry.create( - project: project_2, - last_repository_synced_at: 10.minutes.ago, - last_repository_successful_sync_at: 10.minutes.ago, - resync_repository: false, - resync_wiki: false - ) - - Geo::ProjectRegistry.create( - project: create(:empty_project), - last_repository_synced_at: 5.minutes.ago, - last_repository_successful_sync_at: 5.minutes.ago, - resync_repository: false, - resync_wiki: true - ) - - expect(Geo::RepositorySyncService).to receive(:new).twice.and_return(spy) + it 'performs Geo::ProjectSyncWorker for synced projects updated recently' do + create(:geo_project_registry, :synced, :repository_dirty, project: project_1) + create(:geo_project_registry, :synced, project: project_2) + create(:geo_project_registry, :synced, :wiki_dirty) + + expect(Geo::ProjectSyncWorker).to receive(:perform_in).twice.and_return(spy) subject.perform end - it 'does not perform Geo::RepositorySyncService when secondary role is disabled' do + it 'does not perform Geo::ProjectSyncWorker when secondary role is disabled' do allow(Gitlab::Geo).to receive(:secondary_role_enabled?) { false } - expect(Geo::RepositorySyncService).not_to receive(:new) + expect(Geo::ProjectSyncWorker).not_to receive(:perform_in) subject.perform end - it 'does not perform Geo::RepositorySyncService when primary node does not exists' do + it 'does not perform Geo::ProjectSyncWorker when primary node does not exists' do allow(Gitlab::Geo).to receive(:primary_node) { nil } - expect(Geo::RepositorySyncService).not_to receive(:new) + expect(Geo::ProjectSyncWorker).not_to receive(:perform_in) subject.perform end - it 'does not perform Geo::RepositorySyncService when node is disabled' do + it 'does not perform Geo::ProjectSyncWorker when node is disabled' do allow_any_instance_of(GeoNode).to receive(:enabled?) { false } - expect(Geo::RepositorySyncService).not_to receive(:new) - - subject.perform - end - - it 'does not perform Geo::RepositorySyncService when can not obtain a lease' do - allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain) { false } - - expect(Geo::RepositorySyncService).not_to receive(:new) + expect(Geo::ProjectSyncWorker).not_to receive(:perform_in) subject.perform end |