Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Vosmaer <contact@jacobvosmaer.nl>2016-04-13 16:56:05 +0300
committerJacob Vosmaer <contact@jacobvosmaer.nl>2016-04-13 16:56:05 +0300
commit0f602be99f99f1ae493478a8a28df2907cfa0082 (patch)
treea49a9a7ac70ca6825547db88a32f8b4c00343aa3 /app/workers/repository_check/batch_worker.rb
parent9a30d3b5aef732e782e9496b2e8ae62069ba521a (diff)
Clear repository check columns asynchronously
Diffstat (limited to 'app/workers/repository_check/batch_worker.rb')
-rw-r--r--app/workers/repository_check/batch_worker.rb63
1 files changed, 63 insertions, 0 deletions
diff --git a/app/workers/repository_check/batch_worker.rb b/app/workers/repository_check/batch_worker.rb
new file mode 100644
index 00000000000..16cd77a9bf0
--- /dev/null
+++ b/app/workers/repository_check/batch_worker.rb
@@ -0,0 +1,63 @@
+module RepositoryCheck
+ class BatchWorker
+ include Sidekiq::Worker
+
+ RUN_TIME = 3600
+
+ sidekiq_options retry: false
+
+ def perform
+ start = Time.now
+
+ # This loop will break after a little more than one hour ('a little
+ # more' because `git fsck` may take a few minutes), or if it runs out of
+ # projects to check. By default sidekiq-cron will start a new
+ # RepositoryCheckWorker each hour so that as long as there are repositories to
+ # check, only one (or two) will be checked at a time.
+ project_ids.each do |project_id|
+ break if Time.now - start >= RUN_TIME
+ break unless current_settings.repository_checks_enabled
+
+ next unless try_obtain_lease(project_id)
+
+ SingleRepositoryWorker.new.perform(project_id)
+ end
+ end
+
+ private
+
+ # Project.find_each does not support WHERE clauses and
+ # Project.find_in_batches does not support ordering. So we just build an
+ # array of ID's. This is OK because we do it only once an hour, because
+ # getting ID's from Postgres is not terribly slow, and because no user
+ # has to sit and wait for this query to finish.
+ def project_ids
+ limit = 10_000
+ never_checked_projects = Project.where('last_repository_check_at IS NULL').limit(limit).
+ pluck(:id)
+ old_check_projects = Project.where('last_repository_check_at < ?', 1.week.ago).
+ reorder('last_repository_check_at ASC').limit(limit).pluck(:id)
+ never_checked_projects + old_check_projects
+ end
+
+ def try_obtain_lease(id)
+ # Use a 24-hour timeout because on servers/projects where 'git fsck' is
+ # super slow we definitely do not want to run it twice in parallel.
+ Gitlab::ExclusiveLease.new(
+ "project_repository_check:#{id}",
+ timeout: 24.hours
+ ).try_obtain
+ end
+
+ def current_settings
+ # No caching of the settings! If we cache them and an admin disables
+ # this feature, an active RepositoryCheckWorker would keep going for up
+ # to 1 hour after the feature was disabled.
+ if Rails.env.test?
+ Gitlab::CurrentSettings.fake_application_settings
+ else
+ ApplicationSetting.current
+ end
+ end
+ end
+end