diff options
author | Jacob Vosmaer <contact@jacobvosmaer.nl> | 2016-04-13 16:56:05 +0300 |
---|---|---|
committer | Jacob Vosmaer <contact@jacobvosmaer.nl> | 2016-04-13 16:56:05 +0300 |
commit | 0f602be99f99f1ae493478a8a28df2907cfa0082 (patch) | |
tree | a49a9a7ac70ca6825547db88a32f8b4c00343aa3 /app/workers/repository_check/batch_worker.rb | |
parent | 9a30d3b5aef732e782e9496b2e8ae62069ba521a (diff) |
Clear repository check columns asynchronously
Diffstat (limited to 'app/workers/repository_check/batch_worker.rb')
-rw-r--r-- | app/workers/repository_check/batch_worker.rb | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/app/workers/repository_check/batch_worker.rb b/app/workers/repository_check/batch_worker.rb new file mode 100644 index 00000000000..16cd77a9bf0 --- /dev/null +++ b/app/workers/repository_check/batch_worker.rb @@ -0,0 +1,63 @@ +module RepositoryCheck + class BatchWorker + include Sidekiq::Worker + + RUN_TIME = 3600 + + sidekiq_options retry: false + + def perform + start = Time.now + + # This loop will break after a little more than one hour ('a little + # more' because `git fsck` may take a few minutes), or if it runs out of + # projects to check. By default sidekiq-cron will start a new + # RepositoryCheckWorker each hour so that as long as there are repositories to + # check, only one (or two) will be checked at a time. + project_ids.each do |project_id| + break if Time.now - start >= RUN_TIME + break unless current_settings.repository_checks_enabled + + next unless try_obtain_lease(project_id) + + SingleRepositoryWorker.new.perform(project_id) + end + end + + private + + # Project.find_each does not support WHERE clauses and + # Project.find_in_batches does not support ordering. So we just build an + # array of ID's. This is OK because we do it only once an hour, because + # getting ID's from Postgres is not terribly slow, and because no user + # has to sit and wait for this query to finish. + def project_ids + limit = 10_000 + never_checked_projects = Project.where('last_repository_check_at IS NULL').limit(limit). + pluck(:id) + old_check_projects = Project.where('last_repository_check_at < ?', 1.week.ago). + reorder('last_repository_check_at ASC').limit(limit).pluck(:id) + never_checked_projects + old_check_projects + end + + def try_obtain_lease(id) + # Use a 24-hour timeout because on servers/projects where 'git fsck' is + # super slow we definitely do not want to run it twice in parallel. + Gitlab::ExclusiveLease.new( + "project_repository_check:#{id}", + timeout: 24.hours + ).try_obtain + end + + def current_settings + # No caching of the settings! If we cache them and an admin disables + # this feature, an active RepositoryCheckWorker would keep going for up + # to 1 hour after the feature was disabled. + if Rails.env.test? + Gitlab::CurrentSettings.fake_application_settings + else + ApplicationSetting.current + end + end + end +end |