Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2017-07-08 03:05:50 +0300
committerDouwe Maan <douwe@gitlab.com>2017-07-08 03:05:50 +0300
commite916e7274863492d6dec55d888c83d715e1fd60b (patch)
tree30310e7e84c06c6ce368b35bf436607182154b29
parent294d1804d3c9f56f6931a996b90c57ba63d61c7e (diff)
parentc3aa2dee088c1dc4859d0bff93391ff206f35e7d (diff)
Merge branch 'increase-parallelism-of-geo-repository-sync-worker' into 'master'
Increase parallelism of geo repository sync worker Closes #2742 See merge request !2351
-rw-r--r--app/models/geo/project_registry.rb16
-rw-r--r--app/services/geo/base_sync_service.rb81
-rw-r--r--app/services/geo/repository_sync_service.rb130
-rw-r--r--app/services/geo/wiki_sync_service.rb29
-rw-r--r--app/workers/geo/project_sync_worker.rb35
-rw-r--r--app/workers/geo_repository_sync_worker.rb137
-rw-r--r--changelogs/unreleased-ee/increase-parallelism-of-geo-repository-sync-worker.yml4
-rw-r--r--spec/models/geo/project_registry_spec.rb82
-rw-r--r--spec/services/geo/repository_sync_service_spec.rb297
-rw-r--r--spec/services/geo/wiki_sync_service_spec.rb102
-rw-r--r--spec/workers/geo/project_sync_worker_spec.rb125
-rw-r--r--spec/workers/geo_repository_sync_worker_spec.rb75
12 files changed, 649 insertions, 464 deletions
diff --git a/app/models/geo/project_registry.rb b/app/models/geo/project_registry.rb
index 247bceb712c..84f996f193c 100644
--- a/app/models/geo/project_registry.rb
+++ b/app/models/geo/project_registry.rb
@@ -10,4 +10,20 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
where.not(last_repository_synced_at: nil, last_repository_successful_sync_at: nil)
.where(resync_repository: false, resync_wiki: false)
end
+
+ def resync_repository?
+ resync_repository || last_repository_successful_sync_at.nil?
+ end
+
+ def resync_wiki?
+ resync_wiki || last_wiki_successful_sync_at.nil?
+ end
+
+ def repository_synced_since?(timestamp)
+ last_repository_synced_at && last_repository_synced_at > timestamp
+ end
+
+ def wiki_synced_since?(timestamp)
+ last_wiki_synced_at && last_wiki_synced_at > timestamp
+ end
end
diff --git a/app/services/geo/base_sync_service.rb b/app/services/geo/base_sync_service.rb
new file mode 100644
index 00000000000..4dc7ebafa8a
--- /dev/null
+++ b/app/services/geo/base_sync_service.rb
@@ -0,0 +1,81 @@
+module Geo
+ class BaseSyncService
+ class << self
+ attr_accessor :type
+ end
+
+ attr_reader :project
+
+ LEASE_TIMEOUT = 8.hours.freeze
+ LEASE_KEY_PREFIX = 'geo_sync_service'.freeze
+
+ def initialize(project)
+ @project = project
+ end
+
+ def execute
+ try_obtain_lease do
+ log("Started #{type} sync")
+ sync_repository
+ log("Finished #{type} sync")
+ end
+ end
+
+ def lease_key
+ @lease_key ||= "#{LEASE_KEY_PREFIX}:#{type}:#{project.id}"
+ end
+
+ private
+
+ def registry
+ @registry ||= Geo::ProjectRegistry.find_or_initialize_by(project_id: project.id)
+ end
+
+ def try_obtain_lease
+ log("Trying to obtain lease to sync #{type}")
+ repository_lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT).try_obtain
+
+ unless repository_lease
+ log("Could not obtain lease to sync #{type}")
+ return
+ end
+
+ yield
+
+ # We should release the lease for a repository, only if we have obtained
+ # it. If something went wrong when syncing the repository, we should wait
+ # for the lease timeout to try again.
+ log("Releasing leases to sync #{type}")
+ Gitlab::ExclusiveLease.cancel(lease_key, repository_lease)
+ end
+
+ def update_registry(type, started_at: nil, finished_at: nil)
+ return unless started_at || finished_at
+
+ log("Updating #{type} sync information")
+
+ attrs = {}
+
+ attrs["last_#{type}_synced_at"] = started_at if started_at
+
+ if finished_at
+ attrs["last_#{type}_successful_sync_at"] = finished_at
+ attrs["resync_#{type}"] = false
+ end
+
+ registry.update!(attrs)
+ end
+
+ def type
+ self.class.type
+ end
+
+ def primary_ssh_path_prefix
+ @primary_ssh_path_prefix ||= Gitlab::Geo.primary_node.clone_url_prefix
+ end
+
+ def log(message)
+ Rails.logger.info("#{self.class.name}: #{message} for project #{project.path_with_namespace} (#{project.id})")
+ end
+ end
+end
diff --git a/app/services/geo/repository_sync_service.rb b/app/services/geo/repository_sync_service.rb
index 40cdaeb7cea..9eb0a070b9c 100644
--- a/app/services/geo/repository_sync_service.rb
+++ b/app/services/geo/repository_sync_service.rb
@@ -1,74 +1,23 @@
module Geo
- class RepositorySyncService
- attr_reader :project_id
-
- LEASE_TIMEOUT = 8.hours.freeze
- LEASE_KEY_PREFIX = 'repository_sync_service'.freeze
-
- def initialize(project_id)
- @project_id = project_id
- end
-
- def execute
- try_obtain_lease do
- log('Started repository sync')
- sync_project_repository
- sync_wiki_repository
- log('Finished repository sync')
- end
- rescue ActiveRecord::RecordNotFound
- Rails.logger.error("#{self.class.name}: Couldn't find project with ID=#{project_id}, skipping syncing")
- end
+ class RepositorySyncService < BaseSyncService
+ self.type = :repository
private
- def project
- @project ||= Project.find(project_id)
- end
-
- def registry
- @registry ||= Geo::ProjectRegistry.find_or_initialize_by(project_id: project_id)
- end
-
- def sync_project_repository
- return unless sync_repository?
-
- started_at, finished_at = fetch_project_repository
- update_registry(:repository, started_at, finished_at)
+ def sync_repository
+ fetch_project_repository
expire_repository_caches
end
- def sync_repository?
- registry.resync_repository? ||
- registry.last_repository_successful_sync_at.nil? ||
- registry.last_repository_synced_at.nil?
- end
-
- def sync_wiki_repository
- return unless sync_wiki?
-
- started_at, finished_at = fetch_wiki_repository
- update_registry(:wiki, started_at, finished_at)
- end
-
- def sync_wiki?
- registry.resync_wiki? ||
- registry.last_wiki_successful_sync_at.nil? ||
- registry.last_wiki_synced_at.nil?
- end
-
def fetch_project_repository
- return unless sync_repository?
-
log('Fetching project repository')
- started_at = DateTime.now
- finished_at = nil
+ update_registry(:repository, started_at: DateTime.now)
begin
project.ensure_repository
project.repository.fetch_geo_mirror(ssh_url_to_repo)
- finished_at = DateTime.now
+ update_registry(:repository, finished_at: DateTime.now)
rescue Gitlab::Shell::Error => e
Rails.logger.error("#{self.class.name}: Error syncing repository for project #{project.path_with_namespace}: #{e}")
rescue Gitlab::Git::Repository::NoRepository => e
@@ -76,27 +25,6 @@ module Geo
log('Expiring caches')
project.repository.after_create
end
-
- [started_at, finished_at]
- end
-
- def fetch_wiki_repository
- return unless sync_wiki?
-
- log('Fetching wiki repository')
- started_at = DateTime.now
- finished_at = nil
-
- begin
- project.wiki.ensure_repository
- project.wiki.repository.fetch_geo_mirror(ssh_url_to_wiki)
-
- finished_at = DateTime.now
- rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error, ProjectWiki::CouldNotCreateWikiError => e
- Rails.logger.error("#{self.class.name}: Error syncing wiki repository for project #{project.path_with_namespace}: #{e}")
- end
-
- [started_at, finished_at]
end
def expire_repository_caches
@@ -104,54 +32,8 @@ module Geo
project.repository.after_sync
end
- def try_obtain_lease
- log('Trying to obtain lease to sync repository')
- repository_lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT).try_obtain
-
- unless repository_lease
- log('Could not obtain lease to sync repository')
- return
- end
-
- yield
-
- # We should release the lease for a repository, only if we have obtained
- # it. If something went wrong when syncing the repository, we should wait
- # for the lease timeout to try again.
- log('Releasing leases to sync repository')
- Gitlab::ExclusiveLease.cancel(lease_key, repository_lease)
- end
-
- def update_registry(type, started_at, finished_at)
- log("Updating #{type} sync information")
- registry.public_send("last_#{type}_synced_at=", started_at)
-
- if finished_at
- registry.public_send("last_#{type}_successful_sync_at=", finished_at)
- registry.public_send("resync_#{type}=", false)
- end
-
- registry.save
- end
-
- def lease_key
- @lease_key ||= "#{LEASE_KEY_PREFIX}:#{project.id}"
- end
-
- def primary_ssh_path_prefix
- Gitlab::Geo.primary_node.clone_url_prefix
- end
-
def ssh_url_to_repo
"#{primary_ssh_path_prefix}#{project.path_with_namespace}.git"
end
-
- def ssh_url_to_wiki
- "#{primary_ssh_path_prefix}#{project.path_with_namespace}.wiki.git"
- end
-
- def log(message)
- Rails.logger.info("#{self.class.name}: #{message} for project #{project.path_with_namespace} (#{project.id})")
- end
end
end
diff --git a/app/services/geo/wiki_sync_service.rb b/app/services/geo/wiki_sync_service.rb
new file mode 100644
index 00000000000..679c31f58d2
--- /dev/null
+++ b/app/services/geo/wiki_sync_service.rb
@@ -0,0 +1,29 @@
+module Geo
+ class WikiSyncService < BaseSyncService
+ self.type = :wiki
+
+ private
+
+ def sync_repository
+ fetch_wiki_repository
+ end
+
+ def fetch_wiki_repository
+ log('Fetching wiki repository')
+ update_registry(:wiki, started_at: DateTime.now)
+
+ begin
+ project.wiki.ensure_repository
+ project.wiki.repository.fetch_geo_mirror(ssh_url_to_wiki)
+
+ update_registry(:wiki, finished_at: DateTime.now)
+ rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error, ProjectWiki::CouldNotCreateWikiError => e
+ Rails.logger.error("#{self.class.name}: Error syncing wiki repository for project #{project.path_with_namespace}: #{e}")
+ end
+ end
+
+ def ssh_url_to_wiki
+ "#{primary_ssh_path_prefix}#{project.path_with_namespace}.wiki.git"
+ end
+ end
+end
diff --git a/app/workers/geo/project_sync_worker.rb b/app/workers/geo/project_sync_worker.rb
new file mode 100644
index 00000000000..9530b013980
--- /dev/null
+++ b/app/workers/geo/project_sync_worker.rb
@@ -0,0 +1,35 @@
+module Geo
+ class ProjectSyncWorker
+ include Sidekiq::Worker
+
+ sidekiq_options queue: :geo, retry: 3, dead: false
+
+ sidekiq_retry_in { |count| 30 * count }
+
+ sidekiq_retries_exhausted do |msg, _|
+ Sidekiq.logger.warn "Failed #{msg['class']} with #{msg['args']}: #{msg['error_message']}"
+ end
+
+ def perform(project_id, scheduled_time)
+ project = Project.find(project_id)
+ registry = Geo::ProjectRegistry.find_or_initialize_by(project_id: project_id)
+
+ Geo::RepositorySyncService.new(project).execute if sync_repository?(registry, scheduled_time)
+ Geo::WikiSyncService.new(project).execute if sync_wiki?(registry, scheduled_time)
+ rescue ActiveRecord::RecordNotFound
+ logger.error("Couldn't find project with ID=#{project_id}, skipping syncing")
+ end
+
+ private
+
+ def sync_repository?(registry, scheduled_time)
+ !registry.repository_synced_since?(scheduled_time) &&
+ registry.resync_repository?
+ end
+
+ def sync_wiki?(registry, scheduled_time)
+ !registry.wiki_synced_since?(scheduled_time) &&
+ registry.resync_wiki?
+ end
+ end
+end
diff --git a/app/workers/geo_repository_sync_worker.rb b/app/workers/geo_repository_sync_worker.rb
index e5603918751..302eef7b00b 100644
--- a/app/workers/geo_repository_sync_worker.rb
+++ b/app/workers/geo_repository_sync_worker.rb
@@ -2,45 +2,74 @@ class GeoRepositorySyncWorker
include Sidekiq::Worker
include CronjobQueue
- RUN_TIME = 5.minutes.to_i
- BATCH_SIZE = 100
- LAST_SYNC_INTERVAL = 24.hours
+ LEASE_KEY = 'geo_repository_sync_worker'.freeze
+ LEASE_TIMEOUT = 10.minutes
+ BATCH_SIZE = 1000
+ BACKOFF_DELAY = 5.minutes
+ MAX_CAPACITY = 25
+ RUN_TIME = 60.minutes.to_i
+
+ def initialize
+ @pending_projects = []
+ @scheduled_jobs = []
+ end
def perform
return unless Gitlab::Geo.secondary_role_enabled?
return unless Gitlab::Geo.primary_node.present?
- start_time = Time.now
- project_ids_not_synced = find_project_ids_not_synced
- project_ids_updated_recently = find_project_ids_updated_recently
- project_ids = interleave(project_ids_not_synced, project_ids_updated_recently)
+ logger.info "Started Geo repository sync scheduler"
- logger.info "Started Geo repository syncing for #{project_ids.length} project(s)"
+ @start_time = Time.now
- project_ids.each do |project_id|
- begin
- break if over_time?(start_time)
+ # Prevent multiple Sidekiq workers from attempting to schedule projects synchronization
+ try_obtain_lease do
+ loop do
break unless node_enabled?
- # We try to obtain a lease here for the entire sync process because we
- # want to sync the repositories continuously at a controlled rate
- # instead of hammering the primary node. Initially, we are syncing
- # one repo at a time. If we don't obtain the lease here, every 5
- # minutes all of 100 projects will be synced.
- try_obtain_lease do |lease|
- Geo::RepositorySyncService.new(project_id).execute
- end
- rescue ActiveRecord::RecordNotFound
- logger.error("Couldn't find project with ID=#{project_id}, skipping syncing")
- next
+ update_jobs_in_progress
+ load_pending_projects if reload_queue?
+
+ # If we are still under the limit after refreshing our DB, we can end
+ # after scheduling the remaining transfers.
+ last_batch = reload_queue?
+
+ break if over_time?
+ break unless projects_remain?
+
+ schedule_jobs
+
+ break if last_batch
+ break unless renew_lease!
+
+ sleep(1)
end
- end
- logger.info "Finished Geo repository syncing for #{project_ids.length} project(s)"
+ logger.info "Finished Geo repository sync scheduler"
+ end
end
private
+ def reload_queue?
+ @pending_projects.size < MAX_CAPACITY
+ end
+
+ def projects_remain?
+ @pending_projects.size > 0
+ end
+
+ def over_time?
+ Time.now - @start_time >= RUN_TIME
+ end
+
+ def load_pending_projects
+ project_ids_not_synced = find_project_ids_not_synced
+ project_ids_updated_recently = find_project_ids_updated_recently
+
+ @pending_projects = interleave(project_ids_not_synced, project_ids_updated_recently)
+ end
+
def find_project_ids_not_synced
Project.where.not(id: Geo::ProjectRegistry.synced.pluck(:project_id))
.order(last_repository_updated_at: :desc)
@@ -63,37 +92,65 @@ class GeoRepositorySyncWorker
end.flatten(1).uniq.compact.take(BATCH_SIZE)
end
- def over_time?(start_time)
- Time.now - start_time >= RUN_TIME
- end
+ def schedule_jobs
+ num_to_schedule = [MAX_CAPACITY - scheduled_job_ids.size, @pending_projects.size].min
+ return unless projects_remain?
- def node_enabled?
- # Only check every minute to avoid polling the DB excessively
- unless @last_enabled_check.present? && @last_enabled_check > 1.minute.ago
- @last_enabled_check = Time.now
- @current_node_enabled = nil
+ num_to_schedule.times do
+ project_id = @pending_projects.shift
+ job_id = Geo::ProjectSyncWorker.perform_in(BACKOFF_DELAY, project_id, Time.now)
+
+ @scheduled_jobs << { id: project_id, job_id: job_id } if job_id
end
+ end
- @current_node_enabled ||= Gitlab::Geo.current_node_enabled?
+ def scheduled_job_ids
+ @scheduled_jobs.map { |data| data[:job_id] }
+ end
+
+ def update_jobs_in_progress
+ status = Gitlab::SidekiqStatus.job_status(scheduled_job_ids)
+
+ # SidekiqStatus returns an array of booleans: true if the job has completed, false otherwise.
+ # For each entry, first use `zip` to make { job_id: 123, id: 10 } -> [ { job_id: 123, id: 10 }, bool ]
+ # Next, filter out the jobs that have completed.
+ @scheduled_jobs = @scheduled_jobs.zip(status).map { |(job, completed)| job if completed }.compact
end
def try_obtain_lease
- lease = Gitlab::ExclusiveLease.new(lease_key, timeout: lease_timeout).try_obtain
+ lease = exclusive_lease.try_obtain
- return unless lease
+ unless lease
+ logger.info "Cannot obtain an exclusive lease. There must be another worker already in execution."
+ return
+ end
begin
yield lease
ensure
- Gitlab::ExclusiveLease.cancel(lease_key, lease)
+ release_lease(lease)
end
end
- def lease_key
- Geo::RepositorySyncService::LEASE_KEY_PREFIX
+ def exclusive_lease
+ @lease ||= Gitlab::ExclusiveLease.new(LEASE_KEY, timeout: LEASE_TIMEOUT)
end
- def lease_timeout
- Geo::RepositorySyncService::LEASE_TIMEOUT
+ def renew_lease!
+ exclusive_lease.renew
+ end
+
+ def release_lease(uuid)
+ Gitlab::ExclusiveLease.cancel(LEASE_KEY, uuid)
+ end
+
+ def node_enabled?
+ # Only check every minute to avoid polling the DB excessively
+ unless @last_enabled_check.present? && @last_enabled_check > 1.minute.ago
+ @last_enabled_check = Time.now
+ @current_node_enabled = nil
+ end
+
+ @current_node_enabled ||= Gitlab::Geo.current_node_enabled?
end
end
diff --git a/changelogs/unreleased-ee/increase-parallelism-of-geo-repository-sync-worker.yml b/changelogs/unreleased-ee/increase-parallelism-of-geo-repository-sync-worker.yml
new file mode 100644
index 00000000000..34ef7e8ab22
--- /dev/null
+++ b/changelogs/unreleased-ee/increase-parallelism-of-geo-repository-sync-worker.yml
@@ -0,0 +1,4 @@
+---
+title: Geo - Makes the projects synchronization faster on secondaries nodes
+merge_request:
+author:
diff --git a/spec/models/geo/project_registry_spec.rb b/spec/models/geo/project_registry_spec.rb
index 8b7d4ce6148..c6033141c53 100644
--- a/spec/models/geo/project_registry_spec.rb
+++ b/spec/models/geo/project_registry_spec.rb
@@ -31,4 +31,86 @@ describe Geo::ProjectRegistry, models: true do
expect(described_class.synced).to match_array([registry])
end
end
+
+ describe '#resync_repository?' do
+ it 'returns true when resync_repository is true' do
+ subject.resync_repository = true
+
+ expect(subject.resync_repository).to be true
+ end
+
+ it 'returns true when last_repository_successful_sync_at is nil' do
+ subject.last_repository_successful_sync_at = nil
+
+ expect(subject.resync_repository).to be true
+ end
+
+ it 'returns false when resync_repository is false and last_repository_successful_sync_at is present' do
+ subject.resync_repository = false
+ subject.last_repository_successful_sync_at = Time.now
+
+ expect(subject.resync_repository).to be false
+ end
+ end
+
+ describe '#resync_wiki?' do
+ it 'returns true when resync_wiki is true' do
+ subject.resync_wiki = true
+
+ expect(subject.resync_wiki).to be true
+ end
+
+ it 'returns true when last_wiki_successful_sync_at is nil' do
+ subject.last_wiki_successful_sync_at = nil
+
+ expect(subject.resync_wiki).to be true
+ end
+
+ it 'returns false when resync_wiki is false and last_wiki_successful_sync_at is present' do
+ subject.resync_wiki = false
+ subject.last_wiki_successful_sync_at = Time.now
+
+ expect(subject.resync_wiki).to be false
+ end
+ end
+
+ describe '#repository_synced_since?' do
+ it 'returns false when last_repository_synced_at is nil' do
+ subject.last_repository_synced_at = nil
+
+ expect(subject.repository_synced_since?(Time.now)).to be_nil
+ end
+
+ it 'returns false when last_repository_synced_at before timestamp' do
+ subject.last_repository_synced_at = Time.now - 2.hours
+
+ expect(subject.repository_synced_since?(Time.now)).to be false
+ end
+
+ it 'returns true when last_repository_synced_at after timestamp' do
+ subject.last_repository_synced_at = Time.now + 2.hours
+
+ expect(subject.repository_synced_since?(Time.now)).to be true
+ end
+ end
+
+ describe '#wiki_synced_since?' do
+ it 'returns false when last_wiki_synced_at is nil' do
+ subject.last_wiki_synced_at = nil
+
+ expect(subject.wiki_synced_since?(Time.now)).to be_nil
+ end
+
+ it 'returns false when last_wiki_synced_at before timestamp' do
+ subject.last_wiki_synced_at = Time.now - 2.hours
+
+ expect(subject.wiki_synced_since?(Time.now)).to be false
+ end
+
+ it 'returns true when last_wiki_synced_at after timestamp' do
+ subject.last_wiki_synced_at = Time.now + 2.hours
+
+ expect(subject.wiki_synced_since?(Time.now)).to be true
+ end
+ end
end
diff --git a/spec/services/geo/repository_sync_service_spec.rb b/spec/services/geo/repository_sync_service_spec.rb
index 0d40cdc5d80..f1c9e7b8469 100644
--- a/spec/services/geo/repository_sync_service_spec.rb
+++ b/spec/services/geo/repository_sync_service_spec.rb
@@ -1,14 +1,14 @@
require 'spec_helper'
-describe Geo::RepositorySyncService, services: true do
+RSpec.describe Geo::RepositorySyncService, services: true do
let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') }
let(:lease) { double(try_obtain: true) }
- subject { described_class.new(project.id) }
+ subject { described_class.new(project) }
before do
allow(Gitlab::ExclusiveLease).to receive(:new)
- .with(subject.__send__(:lease_key), anything)
+ .with(subject.lease_key, anything)
.and_return(lease)
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror)
@@ -16,285 +16,96 @@ describe Geo::RepositorySyncService, services: true do
end
describe '#execute' do
- context 'when project has never been synced' do
- let(:project) { create(:project_empty_repo) }
+ let(:project) { create(:project_empty_repo) }
+ let(:repository) { project.repository }
+ let(:url_to_repo) { "#{primary.clone_url_prefix}#{project.path_with_namespace}.git" }
- it 'fetches project repositories' do
- fetch_count = 0
+ it 'fetches project repository' do
+ expect(repository).to receive(:fetch_geo_mirror).with(url_to_repo).once
- allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do
- fetch_count += 1
- end
-
- subject.execute
-
- expect(fetch_count).to eq 2
- end
-
- it 'expires repository caches' do
- expect_any_instance_of(Repository).to receive(:expire_all_method_caches).once
- expect_any_instance_of(Repository).to receive(:expire_branch_cache).once
- expect_any_instance_of(Repository).to receive(:expire_content_cache).once
-
- subject.execute
- end
-
- it 'releases lease' do
- expect(Gitlab::ExclusiveLease).to receive(:cancel).once.with(
- subject.__send__(:lease_key), anything).and_call_original
-
- subject.execute
- end
-
- it 'does not fetch project repositories if cannot obtain a lease' do
- allow(lease).to receive(:try_obtain) { false }
-
- expect_any_instance_of(Repository).not_to receive(:fetch_geo_mirror)
-
- subject.execute
- end
-
- context 'tracking database' do
- it 'creates a new registry' do
- expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1)
- end
-
- it 'sets last_repository_successful_sync_at when repository sync succeed' do
- subject.execute
-
- registry = Geo::ProjectRegistry.find_by(project_id: project.id)
-
- expect(registry.last_repository_successful_sync_at).not_to be_nil
- end
-
- it 'resets last_repository_successful_sync_at when repository sync fail' do
- allow_any_instance_of(Repository).to receive(:fetch_geo_mirror).with(/#{project.path_with_namespace}\.git/) { raise Gitlab::Shell::Error }
-
- subject.execute
-
- registry = Geo::ProjectRegistry.find_by(project_id: project.id)
-
- expect(registry.last_repository_successful_sync_at).to be_nil
- end
-
- it 'sets last_wiki_successful_sync_at when wiki sync succeed' do
- subject.execute
-
- registry = Geo::ProjectRegistry.find_by(project_id: project.id)
-
- expect(registry.last_wiki_successful_sync_at).not_to be_nil
- end
-
- it 'resets last_wiki_successful_sync_at when wiki sync fail' do
- allow_any_instance_of(Repository).to receive(:fetch_geo_mirror).with(/#{project.path_with_namespace}\.wiki.git/) { raise Gitlab::Shell::Error }
-
- subject.execute
-
- registry = Geo::ProjectRegistry.find_by(project_id: project.id)
-
- expect(registry.last_wiki_successful_sync_at).to be_nil
- end
- end
+ subject.execute
end
- context 'when project has been synced' do
- let(:project) { create(:project) }
- let(:last_repository_synced_at) { 5.days.ago }
- let(:last_wiki_synced_at) { 4.days.ago }
-
- let!(:registry) do
- create(:geo_project_registry, :synced,
- project: project,
- last_repository_synced_at: last_repository_synced_at,
- last_repository_successful_sync_at: last_repository_synced_at,
- last_wiki_synced_at: last_wiki_synced_at,
- last_wiki_successful_sync_at: last_wiki_synced_at)
- end
-
- it 'does not fetch project repositories' do
- expect_any_instance_of(Repository).not_to receive(:fetch_geo_mirror)
-
- subject.execute
- end
-
- context 'tracking database' do
- it 'does not create a new registry' do
- expect { subject.execute }.not_to change(Geo::ProjectRegistry, :count)
- end
-
- it 'does not update last_repository_successful_sync_at' do
- subject.execute
-
- registry.reload
-
- expect(registry.last_repository_synced_at).to be_within(1.minute).of(last_repository_synced_at)
- expect(registry.last_repository_successful_sync_at).to be_within(1.minute).of(last_repository_synced_at)
- end
-
- it 'does not update last_wiki_successful_sync_at' do
- subject.execute
-
- registry.reload
+ it 'expires repository caches' do
+ expect_any_instance_of(Repository).to receive(:expire_all_method_caches).once
+ expect_any_instance_of(Repository).to receive(:expire_branch_cache).once
+ expect_any_instance_of(Repository).to receive(:expire_content_cache).once
- expect(registry.last_wiki_synced_at).to be_within(1.minute).of(last_wiki_synced_at)
- expect(registry.last_wiki_successful_sync_at).to be_within(1.minute).of(last_wiki_synced_at)
- end
- end
+ subject.execute
end
- context 'when last attempt to sync project repositories failed' do
- let(:project) { create(:project) }
- let!(:registry) { create(:geo_project_registry, :sync_failed, project: project) }
-
- it 'fetches project repositories' do
- fetch_count = 0
+ it 'releases lease' do
+ expect(Gitlab::ExclusiveLease).to receive(:cancel).once.with(
+ subject.__send__(:lease_key), anything).and_call_original
- allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do
- fetch_count += 1
- end
+ subject.execute
+ end
- subject.execute
+ it 'does not fetch project repository if cannot obtain a lease' do
+ allow(lease).to receive(:try_obtain) { false }
- expect(fetch_count).to eq 2
- end
+ expect(repository).not_to receive(:fetch_geo_mirror)
- context 'tracking database' do
- it 'sets last_repository_successful_sync_at' do
- subject.execute
+ subject.execute
+ end
- registry.reload
+ it 'rescues when Gitlab::Shell::Error is raised' do
+ allow(repository).to receive(:fetch_geo_mirror).with(url_to_repo) { raise Gitlab::Shell::Error }
- expect(registry.last_repository_successful_sync_at).not_to be_nil
- end
+ expect { subject.execute }.not_to raise_error
+ end
- it 'sets last_wiki_successful_sync_at' do
- subject.execute
+ it 'rescues exception and fires after_create hook when Gitlab::Git::Repository::NoRepository is raised' do
+ allow(repository).to receive(:fetch_geo_mirror).with(url_to_repo) { raise Gitlab::Git::Repository::NoRepository }
- registry.reload
+ expect(repository).to receive(:after_create)
- expect(registry.last_wiki_successful_sync_at).not_to be_nil
- end
- end
+ expect { subject.execute }.not_to raise_error
end
- context 'when project repository is dirty' do
- let(:project) { create(:project) }
- let(:last_wiki_synced_at) { 4.days.ago }
-
- let!(:registry) do
- create(:geo_project_registry, :synced, :repository_dirty,
- project: project,
- last_wiki_synced_at: last_wiki_synced_at,
- last_wiki_successful_sync_at: last_wiki_synced_at)
+ context 'tracking database' do
+ it 'creates a new registry if does not exists' do
+ expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1)
end
- it 'fetches project repository' do
- expect_any_instance_of(Repository).to receive(:fetch_geo_mirror).once
+ it 'does not create a new registry if one exist' do
+ create(:geo_project_registry, project: project)
- subject.execute
+ expect { subject.execute }.not_to change(Geo::ProjectRegistry, :count)
end
- context 'exceptions' do
- it 'rescues when Gitlab::Shell::Error is raised' do
- allow_any_instance_of(Repository).to receive(:fetch_geo_mirror).with(/#{project.path_with_namespace}\.git/) { raise Gitlab::Shell::Error }
-
- expect { subject.execute }.not_to raise_error
- end
-
- it 'rescues exception and fires after_create hook when Gitlab::Git::Repository::NoRepository is raised' do
- allow_any_instance_of(Repository).to receive(:fetch_geo_mirror).with(/#{project.path_with_namespace}\.git/) { raise Gitlab::Git::Repository::NoRepository }
-
- expect_any_instance_of(Repository).to receive(:after_create)
+ context 'when repository sync succeed' do
+ let(:registry) { Geo::ProjectRegistry.find_by(project_id: project.id) }
- expect { subject.execute }.not_to raise_error
- end
- end
-
- context 'tracking database' do
- it 'updates last_repository_successful_sync_at' do
+ before do
subject.execute
-
- registry.reload
-
- expect(registry.last_repository_synced_at).to be_within(1.minute).of(DateTime.now)
- expect(registry.last_repository_successful_sync_at).to be_within(1.minute).of(DateTime.now)
end
- it 'does not update last_wiki_successful_sync_at' do
- subject.execute
-
- registry.reload
-
- expect(registry.last_wiki_synced_at).to be_within(1.minute).of(last_wiki_synced_at)
- expect(registry.last_wiki_successful_sync_at).to be_within(1.minute).of(last_wiki_synced_at)
+ it 'sets last_repository_synced_at' do
+ expect(registry.last_repository_synced_at).not_to be_nil
end
- it 'resets resync_repository' do
- subject.execute
-
- registry.reload
-
- expect(registry.resync_repository).to be false
+ it 'sets last_repository_successful_sync_at' do
+ expect(registry.last_repository_successful_sync_at).not_to be_nil
end
end
- end
-
- context 'when project wiki is dirty' do
- let(:project) { create(:project) }
- let(:last_repository_synced_at) { 5.days.ago }
-
- let!(:registry) do
- create(:geo_project_registry, :synced, :wiki_dirty,
- project: project,
- last_repository_synced_at: last_repository_synced_at,
- last_repository_successful_sync_at: last_repository_synced_at)
- end
- it 'fetches wiki repository' do
- expect_any_instance_of(Repository).to receive(:fetch_geo_mirror).once
+ context 'when repository sync fail' do
+ let(:registry) { Geo::ProjectRegistry.find_by(project_id: project.id) }
+ let(:url_to_repo) { "#{primary.clone_url_prefix}#{project.path_with_namespace}.git" }
- subject.execute
- end
-
- context 'exceptions' do
- it 'rescues exception when Gitlab::Shell::Error is raised' do
- allow_any_instance_of(Repository).to receive(:fetch_geo_mirror).with(/#{project.path_with_namespace}\.wiki\.git/) { raise Gitlab::Shell::Error }
-
- expect { subject.execute }.not_to raise_error
- end
+ before do
+ allow(repository).to receive(:fetch_geo_mirror).with(url_to_repo) { raise Gitlab::Shell::Error }
- it 'rescues exception when Gitlab::Git::Repository::NoRepository is raised' do
- allow_any_instance_of(Repository).to receive(:fetch_geo_mirror).with(/#{project.path_with_namespace}\.wiki\.git/) { raise Gitlab::Git::Repository::NoRepository }
-
- expect { subject.execute }.not_to raise_error
- end
- end
-
- context 'tracking database' do
- it 'updates last_wiki_successful_sync_at' do
subject.execute
-
- registry.reload
-
- expect(registry.last_wiki_synced_at).to be_within(1.minute).of(DateTime.now)
- expect(registry.last_wiki_successful_sync_at).to be_within(1.minute).of(DateTime.now)
end
- it 'does not update last_repository_successful_sync_at' do
- subject.execute
-
- registry.reload
-
- expect(registry.last_repository_synced_at).to be_within(1.minute).of(last_repository_synced_at)
- expect(registry.last_repository_successful_sync_at).to be_within(1.minute).of(last_repository_synced_at)
+ it 'sets last_repository_synced_at' do
+ expect(registry.last_repository_synced_at).not_to be_nil
end
- it 'resets resync_wiki' do
- subject.execute
-
- registry.reload
-
- expect(registry.resync_wiki).to be false
+ it 'resets last_repository_successful_sync_at' do
+ expect(registry.last_repository_successful_sync_at).to be_nil
end
end
end
diff --git a/spec/services/geo/wiki_sync_service_spec.rb b/spec/services/geo/wiki_sync_service_spec.rb
new file mode 100644
index 00000000000..f01cbb2393d
--- /dev/null
+++ b/spec/services/geo/wiki_sync_service_spec.rb
@@ -0,0 +1,102 @@
+require 'spec_helper'
+
+RSpec.describe Geo::WikiSyncService, services: true do
+ let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') }
+ let(:lease) { double(try_obtain: true) }
+
+ subject { described_class.new(project) }
+
+ before do
+ allow(Gitlab::ExclusiveLease).to receive(:new)
+ .with(subject.lease_key, anything)
+ .and_return(lease)
+
+ allow_any_instance_of(Repository).to receive(:fetch_geo_mirror)
+ .and_return(true)
+ end
+
+ describe '#execute' do
+ let(:project) { create(:project_empty_repo) }
+ let(:repository) { project.wiki.repository }
+ let(:url_to_repo) { "#{primary.clone_url_prefix}#{project.path_with_namespace}.wiki.git" }
+
+ it 'fetches wiki repository' do
+ expect(repository).to receive(:fetch_geo_mirror).with(url_to_repo).once
+
+ subject.execute
+ end
+
+ it 'releases lease' do
+ expect(Gitlab::ExclusiveLease).to receive(:cancel).once.with(
+ subject.__send__(:lease_key), anything).and_call_original
+
+ subject.execute
+ end
+
+ it 'does not fetch wiki repository if cannot obtain a lease' do
+ allow(lease).to receive(:try_obtain) { false }
+
+ expect(repository).not_to receive(:fetch_geo_mirror)
+
+ subject.execute
+ end
+
+ it 'rescues exception when Gitlab::Shell::Error is raised' do
+ allow(repository).to receive(:fetch_geo_mirror).with(url_to_repo) { raise Gitlab::Shell::Error }
+
+ expect { subject.execute }.not_to raise_error
+ end
+
+ it 'rescues exception when Gitlab::Git::Repository::NoRepository is raised' do
+ allow(repository).to receive(:fetch_geo_mirror).with(url_to_repo) { raise Gitlab::Git::Repository::NoRepository }
+
+ expect { subject.execute }.not_to raise_error
+ end
+
+ context 'tracking database' do
+ it 'creates a new registry if does not exists' do
+ expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1)
+ end
+
+ it 'does not create a new registry if one exists' do
+ create(:geo_project_registry, project: project)
+
+ expect { subject.execute }.not_to change(Geo::ProjectRegistry, :count)
+ end
+
+ context 'when repository sync succeed' do
+ let(:registry) { Geo::ProjectRegistry.find_by(project_id: project.id) }
+
+ before do
+ subject.execute
+ end
+
+ it 'sets last_wiki_synced_at' do
+ expect(registry.last_wiki_synced_at).not_to be_nil
+ end
+
+ it 'sets last_wiki_successful_sync_at' do
+ expect(registry.last_wiki_successful_sync_at).not_to be_nil
+ end
+ end
+
+ context 'when wiki sync fail' do
+ let(:registry) { Geo::ProjectRegistry.find_by(project_id: project.id) }
+
+ before do
+ allow(repository).to receive(:fetch_geo_mirror).with(url_to_repo) { raise Gitlab::Shell::Error }
+
+ subject.execute
+ end
+
+ it 'sets last_wiki_synced_at' do
+ expect(registry.last_wiki_synced_at).not_to be_nil
+ end
+
+ it 'resets last_wiki_successful_sync_at' do
+ expect(registry.last_wiki_successful_sync_at).to be_nil
+ end
+ end
+ end
+ end
+end
diff --git a/spec/workers/geo/project_sync_worker_spec.rb b/spec/workers/geo/project_sync_worker_spec.rb
new file mode 100644
index 00000000000..a2cedff807b
--- /dev/null
+++ b/spec/workers/geo/project_sync_worker_spec.rb
@@ -0,0 +1,125 @@
+require 'rails_helper'
+
+RSpec.describe Geo::ProjectSyncWorker do
+ describe '#perform' do
+ let(:project) { create(:empty_project) }
+ let(:repository_sync_service) { spy }
+ let(:wiki_sync_service) { spy }
+
+ before do
+ allow(Geo::RepositorySyncService).to receive(:new)
+ .with(instance_of(Project)).once.and_return(repository_sync_service)
+
+ allow(Geo::WikiSyncService).to receive(:new)
+ .with(instance_of(Project)).once.and_return(wiki_sync_service)
+ end
+
+ context 'when project could not be found' do
+ it 'does not raise an error' do
+ expect { subject.perform(999, Time.now) }.not_to raise_error
+ end
+ end
+
+ context 'when project repositories has never been synced' do
+ it 'performs Geo::RepositorySyncService for the given project' do
+ subject.perform(project.id, Time.now)
+
+ expect(repository_sync_service).to have_received(:execute).once
+ end
+
+ it 'performs Geo::WikiSyncService for the given project' do
+ subject.perform(project.id, Time.now)
+
+ expect(wiki_sync_service).to have_received(:execute).once
+ end
+ end
+
+ context 'when project repositories has been synced' do
+ let!(:registry) { create(:geo_project_registry, :synced, project: project) }
+
+ it 'does not perform Geo::RepositorySyncService for the given project' do
+ subject.perform(project.id, Time.now)
+
+ expect(repository_sync_service).not_to have_received(:execute)
+ end
+
+ it 'does not perform Geo::WikiSyncService for the given project' do
+ subject.perform(project.id, Time.now)
+
+ expect(wiki_sync_service).not_to have_received(:execute)
+ end
+ end
+
+ context 'when last attempt to sync project repositories failed' do
+ let!(:registry) { create(:geo_project_registry, :sync_failed, project: project) }
+
+ it 'performs Geo::RepositorySyncService for the given project' do
+ subject.perform(project.id, Time.now)
+
+ expect(repository_sync_service).to have_received(:execute).once
+ end
+
+ it 'performs Geo::WikiSyncService for the given project' do
+ subject.perform(project.id, Time.now)
+
+ expect(wiki_sync_service).to have_received(:execute).once
+ end
+ end
+
+ context 'when project repository is dirty' do
+ let!(:registry) do
+ create(:geo_project_registry, :synced, :repository_dirty, project: project)
+ end
+
+ it 'performs Geo::RepositorySyncService for the given project' do
+ subject.perform(project.id, Time.now)
+
+ expect(repository_sync_service).to have_received(:execute).once
+ end
+
+ it 'does not perform Geo::WikiSyncService for the given project' do
+ subject.perform(project.id, Time.now)
+
+ expect(wiki_sync_service).not_to have_received(:execute)
+ end
+ end
+
+ context 'when wiki is dirty' do
+ let!(:registry) do
+ create(:geo_project_registry, :synced, :wiki_dirty, project: project)
+ end
+
+ it 'does not perform Geo::RepositorySyncService for the given project' do
+ subject.perform(project.id, Time.now)
+
+ expect(repository_sync_service).not_to have_received(:execute)
+ end
+
+ it 'performs Geo::WikiSyncService for the given project' do
+ subject.perform(project.id, Time.now)
+
+ expect(wiki_sync_service).to have_received(:execute)
+ end
+ end
+
+ context 'when project repository was synced after the time the job was scheduled in' do
+ it 'does not perform Geo::RepositorySyncService for the given project' do
+ create(:geo_project_registry, :synced, :repository_dirty, project: project, last_repository_synced_at: Time.now)
+
+ subject.perform(project.id, Time.now - 5.minutes)
+
+ expect(repository_sync_service).not_to have_received(:execute)
+ end
+ end
+
+ context 'when wiki repository was synced after the time the job was scheduled in' do
+ it 'does not perform Geo::RepositorySyncService for the given project' do
+ create(:geo_project_registry, :synced, :wiki_dirty, project: project, last_wiki_synced_at: Time.now)
+
+ subject.perform(project.id, Time.now - 5.minutes)
+
+ expect(wiki_sync_service).not_to have_received(:execute)
+ end
+ end
+ end
+end
diff --git a/spec/workers/geo_repository_sync_worker_spec.rb b/spec/workers/geo_repository_sync_worker_spec.rb
index 48b7f7886dd..161db95db2b 100644
--- a/spec/workers/geo_repository_sync_worker_spec.rb
+++ b/spec/workers/geo_repository_sync_worker_spec.rb
@@ -13,90 +13,51 @@ describe GeoRepositorySyncWorker do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain) { true }
end
- it 'performs Geo::RepositorySyncService for each project' do
- expect(Geo::RepositorySyncService).to receive(:new).twice.and_return(spy)
+ it 'performs Geo::ProjectSyncWorker for each project' do
+ expect(Geo::ProjectSyncWorker).to receive(:perform_in).twice.and_return(spy)
subject.perform
end
- it 'performs Geo::RepositorySyncService for projects where last attempt to sync failed' do
- Geo::ProjectRegistry.create(
- project: project_1,
- last_repository_synced_at: DateTime.now,
- last_repository_successful_sync_at: nil
- )
+ it 'performs Geo::ProjectSyncWorker for projects where last attempt to sync failed' do
+ create(:geo_project_registry, :sync_failed, project: project_1)
+ create(:geo_project_registry, :synced, project: project_2)
- Geo::ProjectRegistry.create(
- project: project_2,
- last_repository_synced_at: DateTime.now,
- last_repository_successful_sync_at: DateTime.now,
- resync_repository: false,
- resync_wiki: false
- )
-
- expect(Geo::RepositorySyncService).to receive(:new).once.and_return(spy)
+ expect(Geo::ProjectSyncWorker).to receive(:perform_in).once.and_return(spy)
subject.perform
end
- it 'performs Geo::RepositorySyncService for synced projects updated recently' do
- Geo::ProjectRegistry.create(
- project: project_1,
- last_repository_synced_at: 2.days.ago,
- last_repository_successful_sync_at: 2.days.ago,
- resync_repository: true,
- resync_wiki: false
- )
-
- Geo::ProjectRegistry.create(
- project: project_2,
- last_repository_synced_at: 10.minutes.ago,
- last_repository_successful_sync_at: 10.minutes.ago,
- resync_repository: false,
- resync_wiki: false
- )
-
- Geo::ProjectRegistry.create(
- project: create(:empty_project),
- last_repository_synced_at: 5.minutes.ago,
- last_repository_successful_sync_at: 5.minutes.ago,
- resync_repository: false,
- resync_wiki: true
- )
-
- expect(Geo::RepositorySyncService).to receive(:new).twice.and_return(spy)
+ it 'performs Geo::ProjectSyncWorker for synced projects updated recently' do
+ create(:geo_project_registry, :synced, :repository_dirty, project: project_1)
+ create(:geo_project_registry, :synced, project: project_2)
+ create(:geo_project_registry, :synced, :wiki_dirty)
+
+ expect(Geo::ProjectSyncWorker).to receive(:perform_in).twice.and_return(spy)
subject.perform
end
- it 'does not perform Geo::RepositorySyncService when secondary role is disabled' do
+ it 'does not perform Geo::ProjectSyncWorker when secondary role is disabled' do
allow(Gitlab::Geo).to receive(:secondary_role_enabled?) { false }
- expect(Geo::RepositorySyncService).not_to receive(:new)
+ expect(Geo::ProjectSyncWorker).not_to receive(:perform_in)
subject.perform
end
- it 'does not perform Geo::RepositorySyncService when primary node does not exists' do
+ it 'does not perform Geo::ProjectSyncWorker when primary node does not exists' do
allow(Gitlab::Geo).to receive(:primary_node) { nil }
- expect(Geo::RepositorySyncService).not_to receive(:new)
+ expect(Geo::ProjectSyncWorker).not_to receive(:perform_in)
subject.perform
end
- it 'does not perform Geo::RepositorySyncService when node is disabled' do
+ it 'does not perform Geo::ProjectSyncWorker when node is disabled' do
allow_any_instance_of(GeoNode).to receive(:enabled?) { false }
- expect(Geo::RepositorySyncService).not_to receive(:new)
-
- subject.perform
- end
-
- it 'does not perform Geo::RepositorySyncService when can not obtain a lease' do
- allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain) { false }
-
- expect(Geo::RepositorySyncService).not_to receive(:new)
+ expect(Geo::ProjectSyncWorker).not_to receive(:perform_in)
subject.perform
end