Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/app
diff options
context:
space:
mode:
authorBob Van Landuyt <bob@gitlab.com>2019-08-13 23:52:01 +0300
committerDouwe Maan <douwe@gitlab.com>2019-08-13 23:52:01 +0300
commit452bc36d603ed89d3fa5e3409338dd905230bd2f (patch)
tree3ef260430db93ef2b9fa9236ea601a0b3e53adee /app
parent1c3b570c117cc41f5f4838a8366c4367ef0749cb (diff)
Rework retry strategy for remote mirrors
**Prevention of running 2 simultaneous updates** Instead of using `RemoteMirror#update_status` and raise an error if it's already running to prevent the same mirror being updated at the same time we now use `Gitlab::ExclusiveLease` for that. When we fail to obtain a lease in 3 tries, 30 seconds apart, we bail and reschedule. We'll reschedule faster for the protected branches. If the mirror already ran since it was scheduled, the job will be skipped. **Error handling: Remote side** When an update fails because of a `Gitlab::Git::CommandError`, we won't track this error in sentry, this could be on the remote side: for example when branches have diverged. In this case, we'll try 3 times scheduled 1 or 5 minutes apart. In between, the mirror is marked as "to_retry", the error would be visible to the user when they visit the settings page. After 3 tries we'll mark the mirror as failed and notify the user. We won't track this error in sentry, as it's not likely we can help it. The next event that would trigger a new refresh. **Error handling: our side** If an unexpected error occurs, we mark the mirror as failed, but we'd still retry the job based on the regular sidekiq retries with backoff. Same as we used to The error would be reported in sentry, since its likely we need to do something about it.
Diffstat (limited to 'app')
-rw-r--r--app/finders/remote_mirror_finder.rb15
-rw-r--r--app/mailers/emails/remote_mirrors.rb2
-rw-r--r--app/models/remote_mirror.rb57
-rw-r--r--app/services/projects/update_remote_mirror_service.rb53
-rw-r--r--app/views/projects/mirrors/_mirror_repos.html.haml5
-rw-r--r--app/workers/remote_mirror_notification_worker.rb2
-rw-r--r--app/workers/repository_update_remote_mirror_worker.rb61
7 files changed, 118 insertions, 77 deletions
diff --git a/app/finders/remote_mirror_finder.rb b/app/finders/remote_mirror_finder.rb
deleted file mode 100644
index 420db0077aa..00000000000
--- a/app/finders/remote_mirror_finder.rb
+++ /dev/null
@@ -1,15 +0,0 @@
-# frozen_string_literal: true
-
-class RemoteMirrorFinder
- attr_accessor :params
-
- def initialize(params)
- @params = params
- end
-
- # rubocop: disable CodeReuse/ActiveRecord
- def execute
- RemoteMirror.find_by(id: params[:id])
- end
- # rubocop: enable CodeReuse/ActiveRecord
-end
diff --git a/app/mailers/emails/remote_mirrors.rb b/app/mailers/emails/remote_mirrors.rb
index 2d8137843ec..f3938a052b0 100644
--- a/app/mailers/emails/remote_mirrors.rb
+++ b/app/mailers/emails/remote_mirrors.rb
@@ -3,7 +3,7 @@
module Emails
module RemoteMirrors
def remote_mirror_update_failed_email(remote_mirror_id, recipient_id)
- @remote_mirror = RemoteMirrorFinder.new(id: remote_mirror_id).execute
+ @remote_mirror = RemoteMirror.find_by_id(remote_mirror_id)
@project = @remote_mirror.project
mail(to: recipient(recipient_id, @project.group), subject: subject('Remote mirror update failed'))
diff --git a/app/models/remote_mirror.rb b/app/models/remote_mirror.rb
index 6b5605f9999..c9ee0653d86 100644
--- a/app/models/remote_mirror.rb
+++ b/app/models/remote_mirror.rb
@@ -4,6 +4,8 @@ class RemoteMirror < ApplicationRecord
include AfterCommitQueue
include MirrorAuthentication
+ MAX_FIRST_RUNTIME = 3.hours
+ MAX_INCREMENTAL_RUNTIME = 1.hour
PROTECTED_BACKOFF_DELAY = 1.minute
UNPROTECTED_BACKOFF_DELAY = 5.minutes
@@ -31,11 +33,18 @@ class RemoteMirror < ApplicationRecord
scope :enabled, -> { where(enabled: true) }
scope :started, -> { with_update_status(:started) }
- scope :stuck, -> { started.where('last_update_at < ? OR (last_update_at IS NULL AND updated_at < ?)', 1.hour.ago, 3.hours.ago) }
+
+ scope :stuck, -> do
+ started
+ .where('(last_update_started_at < ? AND last_update_at IS NOT NULL)',
+ MAX_INCREMENTAL_RUNTIME.ago)
+ .or(where('(last_update_started_at < ? AND last_update_at IS NULL)',
+ MAX_FIRST_RUNTIME.ago))
+ end
state_machine :update_status, initial: :none do
event :update_start do
- transition [:none, :finished, :failed] => :started
+ transition any => :started
end
event :update_finish do
@@ -46,9 +55,14 @@ class RemoteMirror < ApplicationRecord
transition started: :failed
end
+ event :update_retry do
+ transition started: :to_retry
+ end
+
state :started
state :finished
state :failed
+ state :to_retry
after_transition any => :started do |remote_mirror, _|
Gitlab::Metrics.add_event(:remote_mirrors_running)
@@ -138,16 +152,27 @@ class RemoteMirror < ApplicationRecord
end
def updated_since?(timestamp)
- last_update_started_at && last_update_started_at > timestamp && !update_failed?
+ return false if failed?
+
+ last_update_started_at && last_update_started_at > timestamp
end
def mark_for_delete_if_blank_url
mark_for_destruction if url.blank?
end
- def mark_as_failed(error_message)
- update_column(:last_error, Gitlab::UrlSanitizer.sanitize(error_message))
- update_fail
+ def update_error_message(error_message)
+ self.last_error = Gitlab::UrlSanitizer.sanitize(error_message)
+ end
+
+ def mark_for_retry!(error_message)
+ update_error_message(error_message)
+ update_retry!
+ end
+
+ def mark_as_failed!(error_message)
+ update_error_message(error_message)
+ update_fail!
end
def url=(value)
@@ -190,6 +215,18 @@ class RemoteMirror < ApplicationRecord
update_column(:error_notification_sent, true)
end
+ def backoff_delay
+ if self.only_protected_branches
+ PROTECTED_BACKOFF_DELAY
+ else
+ UNPROTECTED_BACKOFF_DELAY
+ end
+ end
+
+ def max_runtime
+ last_update_at.present? ? MAX_INCREMENTAL_RUNTIME : MAX_FIRST_RUNTIME
+ end
+
private
def store_credentials
@@ -219,14 +256,6 @@ class RemoteMirror < ApplicationRecord
self.last_update_started_at >= Time.now - backoff_delay
end
- def backoff_delay
- if self.only_protected_branches
- PROTECTED_BACKOFF_DELAY
- else
- UNPROTECTED_BACKOFF_DELAY
- end
- end
-
def reset_fields
update_columns(
last_error: nil,
diff --git a/app/services/projects/update_remote_mirror_service.rb b/app/services/projects/update_remote_mirror_service.rb
index 1244a0f72a7..13a467a3ef9 100644
--- a/app/services/projects/update_remote_mirror_service.rb
+++ b/app/services/projects/update_remote_mirror_service.rb
@@ -2,31 +2,52 @@
module Projects
class UpdateRemoteMirrorService < BaseService
- attr_reader :errors
+ MAX_TRIES = 3
- def execute(remote_mirror)
+ def execute(remote_mirror, tries)
return success unless remote_mirror.enabled?
- errors = []
+ update_mirror(remote_mirror)
- begin
- remote_mirror.ensure_remote!
- repository.fetch_remote(remote_mirror.remote_name, ssh_auth: remote_mirror, no_tags: true)
+ success
+ rescue Gitlab::Git::CommandError => e
+ # This happens if one of the gitaly calls above fail, for example when
+ # branches have diverged, or the pre-receive hook fails.
+ retry_or_fail(remote_mirror, e.message, tries)
- opts = {}
- if remote_mirror.only_protected_branches?
- opts[:only_branches_matching] = project.protected_branches.select(:name).map(&:name)
- end
+ error(e.message)
+ rescue => e
+ remote_mirror.mark_as_failed!(e.message)
+ raise e
+ end
+
+ private
+
+ def update_mirror(remote_mirror)
+ remote_mirror.update_start!
+
+ remote_mirror.ensure_remote!
+ repository.fetch_remote(remote_mirror.remote_name, ssh_auth: remote_mirror, no_tags: true)
- remote_mirror.update_repository(opts)
- rescue => e
- errors << e.message.strip
+ opts = {}
+ if remote_mirror.only_protected_branches?
+ opts[:only_branches_matching] = project.protected_branches.select(:name).map(&:name)
end
- if errors.present?
- error(errors.join("\n\n"))
+ remote_mirror.update_repository(opts)
+
+ remote_mirror.update_finish!
+ end
+
+ def retry_or_fail(mirror, message, tries)
+ if tries < MAX_TRIES
+ mirror.mark_for_retry!(message)
else
- success
+ # It's not likely we'll be able to recover from this ourselves, so we'll
+ # notify the users of the problem, and don't trigger any sidekiq retries
+ # Instead, we'll wait for the next change to try the push again, or until
+ # a user manually retries.
+ mirror.mark_as_failed!(message)
end
end
end
diff --git a/app/views/projects/mirrors/_mirror_repos.html.haml b/app/views/projects/mirrors/_mirror_repos.html.haml
index 280ec6d715b..eb100e5cf47 100644
--- a/app/views/projects/mirrors/_mirror_repos.html.haml
+++ b/app/views/projects/mirrors/_mirror_repos.html.haml
@@ -43,7 +43,8 @@
= _('Mirrored repositories')
= render_if_exists 'projects/mirrors/mirrored_repositories_count'
%th= _('Direction')
- %th= _('Last update')
+ %th= _('Last update attempt')
+ %th= _('Last successful update')
%th
%th
%tbody.js-mirrors-table-body
@@ -53,6 +54,8 @@
%tr.qa-mirrored-repository-row.rspec-mirrored-repository-row{ class: ('bg-secondary' if mirror.disabled?) }
%td.qa-mirror-repository-url= mirror.safe_url
%td= _('Push')
+ %td
+ = mirror.last_update_started_at.present? ? time_ago_with_tooltip(mirror.last_update_started_at) : _('Never')
%td.qa-mirror-last-update-at= mirror.last_update_at.present? ? time_ago_with_tooltip(mirror.last_update_at) : _('Never')
%td
- if mirror.disabled?
diff --git a/app/workers/remote_mirror_notification_worker.rb b/app/workers/remote_mirror_notification_worker.rb
index 5bafe8e2046..368abfeda99 100644
--- a/app/workers/remote_mirror_notification_worker.rb
+++ b/app/workers/remote_mirror_notification_worker.rb
@@ -4,7 +4,7 @@ class RemoteMirrorNotificationWorker
include ApplicationWorker
def perform(remote_mirror_id)
- remote_mirror = RemoteMirrorFinder.new(id: remote_mirror_id).execute
+ remote_mirror = RemoteMirror.find_by_id(remote_mirror_id)
# We check again if there's an error because a newer run since this job was
# fired could've completed successfully.
diff --git a/app/workers/repository_update_remote_mirror_worker.rb b/app/workers/repository_update_remote_mirror_worker.rb
index 03a7ff2cd7a..d13c7641eb3 100644
--- a/app/workers/repository_update_remote_mirror_worker.rb
+++ b/app/workers/repository_update_remote_mirror_worker.rb
@@ -1,50 +1,53 @@
# frozen_string_literal: true
class RepositoryUpdateRemoteMirrorWorker
- UpdateAlreadyInProgressError = Class.new(StandardError)
UpdateError = Class.new(StandardError)
include ApplicationWorker
+ include Gitlab::ExclusiveLeaseHelpers
sidekiq_options retry: 3, dead: false
- sidekiq_retry_in { |count| 30 * count }
+ LOCK_WAIT_TIME = 30.seconds
+ MAX_TRIES = 3
- sidekiq_retries_exhausted do |msg, _|
- Sidekiq.logger.warn "Failed #{msg['class']} with #{msg['args']}: #{msg['error_message']}"
- end
-
- def perform(remote_mirror_id, scheduled_time)
- remote_mirror = RemoteMirrorFinder.new(id: remote_mirror_id).execute
+ def perform(remote_mirror_id, scheduled_time, tries = 0)
+ remote_mirror = RemoteMirror.find_by_id(remote_mirror_id)
+ return unless remote_mirror
return if remote_mirror.updated_since?(scheduled_time)
- raise UpdateAlreadyInProgressError if remote_mirror.update_in_progress?
+ # If the update is already running, wait for it to finish before running again
+ # This will wait for a total of 90 seconds in 3 steps
+ in_lock(remote_mirror_update_lock(remote_mirror.id),
+ retries: 3,
+ ttl: remote_mirror.max_runtime,
+ sleep_sec: LOCK_WAIT_TIME) do
+ update_mirror(remote_mirror, scheduled_time, tries)
+ end
+ rescue Gitlab::ExclusiveLeaseHelpers::FailedToObtainLockError
+ # If an update runs longer than 1.5 minutes, we'll reschedule it
+ # with a backoff. The next run will check if the previous update would
+ # include the changes that triggered this update and become a no-op.
+ self.class.perform_in(remote_mirror.backoff_delay, remote_mirror.id, scheduled_time, tries)
+ end
- remote_mirror.update_start
+ private
- project = remote_mirror.project
+ def update_mirror(mirror, scheduled_time, tries)
+ project = mirror.project
current_user = project.creator
- result = Projects::UpdateRemoteMirrorService.new(project, current_user).execute(remote_mirror)
- raise UpdateError, result[:message] if result[:status] == :error
-
- remote_mirror.update_finish
- rescue UpdateAlreadyInProgressError
- raise
- rescue UpdateError => ex
- fail_remote_mirror(remote_mirror, ex.message)
- raise
- rescue => ex
- return unless remote_mirror
+ result = Projects::UpdateRemoteMirrorService.new(project, current_user).execute(mirror, tries)
- fail_remote_mirror(remote_mirror, ex.message)
- raise UpdateError, "#{ex.class}: #{ex.message}"
+ if result[:status] == :error && mirror.to_retry?
+ schedule_retry(mirror, scheduled_time, tries)
+ end
end
- private
-
- def fail_remote_mirror(remote_mirror, message)
- remote_mirror.mark_as_failed(message)
+ def remote_mirror_update_lock(mirror_id)
+ [self.class.name, mirror_id].join(':')
+ end
- Rails.logger.error(message) # rubocop:disable Gitlab/RailsLogger
+ def schedule_retry(mirror, scheduled_time, tries)
+ self.class.perform_in(mirror.backoff_delay, mirror.id, scheduled_time, tries + 1)
end
end