diff options
author | Nick Thomas <nick@gitlab.com> | 2018-12-07 22:58:15 +0300 |
---|---|---|
committer | Nick Thomas <nick@gitlab.com> | 2018-12-07 22:58:15 +0300 |
commit | d5994552b2088c0c27e419b3ae2c4432ea329a82 (patch) | |
tree | 8b3549e7a9e618dc038efc73f8e90e7a9b2226fa /app | |
parent | 88c0984d077e2a85d684d71d036d27278cd81182 (diff) | |
parent | 896c0bdbfb1a83ff5a7d0a755ac249ac2a895798 (diff) |
Merge branch 'zj-pool-repository-creation' into 'master'
Allow public forks to be deduplicated
See merge request gitlab-org/gitlab-ce!23508
Diffstat (limited to 'app')
-rw-r--r-- | app/models/pool_repository.rb | 77 | ||||
-rw-r--r-- | app/models/project.rb | 41 | ||||
-rw-r--r-- | app/services/projects/fork_service.rb | 2 | ||||
-rw-r--r-- | app/workers/all_queues.yml | 4 | ||||
-rw-r--r-- | app/workers/concerns/object_pool_queue.rb | 12 | ||||
-rw-r--r-- | app/workers/git_garbage_collect_worker.rb | 2 | ||||
-rw-r--r-- | app/workers/object_pool/create_worker.rb | 44 | ||||
-rw-r--r-- | app/workers/object_pool/join_worker.rb | 20 | ||||
-rw-r--r-- | app/workers/object_pool/schedule_join_worker.rb | 19 |
9 files changed, 221 insertions, 0 deletions
diff --git a/app/models/pool_repository.rb b/app/models/pool_repository.rb index bad0e30ceb5..dbde00b5584 100644 --- a/app/models/pool_repository.rb +++ b/app/models/pool_repository.rb @@ -1,12 +1,89 @@ # frozen_string_literal: true +# The PoolRepository model is the database equivalent of an ObjectPool for Gitaly +# That is; PoolRepository is the record in the database, ObjectPool is the +# repository on disk class PoolRepository < ActiveRecord::Base include Shardable + include AfterCommitQueue + + has_one :source_project, class_name: 'Project' + validates :source_project, presence: true has_many :member_projects, class_name: 'Project' after_create :correct_disk_path + state_machine :state, initial: :none do + state :scheduled + state :ready + state :failed + + event :schedule do + transition none: :scheduled + end + + event :mark_ready do + transition [:scheduled, :failed] => :ready + end + + event :mark_failed do + transition all => :failed + end + + state all - [:ready] do + def joinable? + false + end + end + + state :ready do + def joinable? + true + end + end + + after_transition none: :scheduled do |pool, _| + pool.run_after_commit do + ::ObjectPool::CreateWorker.perform_async(pool.id) + end + end + + after_transition scheduled: :ready do |pool, _| + pool.run_after_commit do + ::ObjectPool::ScheduleJoinWorker.perform_async(pool.id) + end + end + end + + def create_object_pool + object_pool.create + end + + # The members of the pool should have fetched the missing objects to their own + # objects directory. If the caller fails to do so, data loss might occur + def delete_object_pool + object_pool.delete + end + + def link_repository(repository) + object_pool.link(repository.raw) + end + + # This RPC can cause data loss, as not all objects are present the local repository + # No execution path yet, will be added through: + # https://gitlab.com/gitlab-org/gitaly/issues/1415 + def delete_repository_alternate(repository) + object_pool.unlink_repository(repository.raw) + end + + def object_pool + @object_pool ||= Gitlab::Git::ObjectPool.new( + shard.name, + disk_path + '.git', + source_project.repository.raw) + end + private def correct_disk_path diff --git a/app/models/project.rb b/app/models/project.rb index 9e736a3b03c..f5dc58cd67f 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -1585,6 +1585,7 @@ class Project < ActiveRecord::Base import_state.remove_jid update_project_counter_caches after_create_default_branch + join_pool_repository refresh_markdown_cache! end @@ -1981,8 +1982,48 @@ class Project < ActiveRecord::Base Gitlab::CurrentSettings.max_attachment_size.megabytes.to_i end + def object_pool_params + return {} unless !forked? && git_objects_poolable? + + { + repository_storage: repository_storage, + pool_repository: pool_repository || create_new_pool_repository + } + end + + # Git objects are only poolable when the project is or has: + # - Hashed storage -> The object pool will have a remote to its members, using relative paths. + # If the repository path changes we would have to update the remote. + # - Public -> User will be able to fetch Git objects that might not exist + # in their own repository. + # - Repository -> Else the disk path will be empty, and there's nothing to pool + def git_objects_poolable? + hashed_storage?(:repository) && + public? && + repository_exists? && + Gitlab::CurrentSettings.hashed_storage_enabled && + Feature.enabled?(:object_pools, self) + end + private + def create_new_pool_repository + pool = begin + create_or_find_pool_repository!(shard: Shard.by_name(repository_storage), source_project: self) + rescue ActiveRecord::RecordNotUnique + retry + end + + pool.schedule + pool + end + + def join_pool_repository + return unless pool_repository + + ObjectPool::JoinWorker.perform_async(pool_repository.id, self.id) + end + def use_hashed_storage if self.new_record? && Gitlab::CurrentSettings.hashed_storage_enabled self.storage_version = LATEST_STORAGE_VERSION diff --git a/app/services/projects/fork_service.rb b/app/services/projects/fork_service.rb index 8dc0e044875..91091c4393d 100644 --- a/app/services/projects/fork_service.rb +++ b/app/services/projects/fork_service.rb @@ -54,6 +54,8 @@ module Projects new_params[:avatar] = @project.avatar end + new_params.merge!(@project.object_pool_params) + new_project = CreateService.new(current_user, new_params).execute return new_project unless new_project.persisted? diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 672c77539af..dfce00a10a1 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -85,6 +85,10 @@ - todos_destroyer:todos_destroyer_project_private - todos_destroyer:todos_destroyer_private_features +- object_pool:object_pool_create +- object_pool:object_pool_schedule_join +- object_pool:object_pool_join + - default - mailers # ActionMailer::DeliveryJob.queue_name diff --git a/app/workers/concerns/object_pool_queue.rb b/app/workers/concerns/object_pool_queue.rb new file mode 100644 index 00000000000..5b648df9c72 --- /dev/null +++ b/app/workers/concerns/object_pool_queue.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +## +# Concern for setting Sidekiq settings for the various ObjectPool queues +# +module ObjectPoolQueue + extend ActiveSupport::Concern + + included do + queue_namespace :object_pool + end +end diff --git a/app/workers/git_garbage_collect_worker.rb b/app/workers/git_garbage_collect_worker.rb index 2d381c6fd6c..d3628b23189 100644 --- a/app/workers/git_garbage_collect_worker.rb +++ b/app/workers/git_garbage_collect_worker.rb @@ -28,6 +28,8 @@ class GitGarbageCollectWorker # Refresh the branch cache in case garbage collection caused a ref lookup to fail flush_ref_caches(project) if task == :gc + project.repository.expire_statistics_caches + # In case pack files are deleted, release libgit2 cache and open file # descriptors ASAP instead of waiting for Ruby garbage collection project.cleanup diff --git a/app/workers/object_pool/create_worker.rb b/app/workers/object_pool/create_worker.rb new file mode 100644 index 00000000000..135b99886dc --- /dev/null +++ b/app/workers/object_pool/create_worker.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module ObjectPool + class CreateWorker + include ApplicationWorker + include ObjectPoolQueue + include ExclusiveLeaseGuard + + attr_reader :pool + + def perform(pool_id) + @pool = PoolRepository.find_by_id(pool_id) + return unless pool + + try_obtain_lease do + perform_pool_creation + end + end + + private + + def perform_pool_creation + return unless pool.failed? || pool.scheduled? + + # If this is a retry and the previous execution failed, deletion will + # bring the pool back to a pristine state + pool.delete_object_pool if pool.failed? + + pool.create_object_pool + pool.mark_ready + rescue => e + pool.mark_failed + raise e + end + + def lease_key + "object_pool:create:#{pool.id}" + end + + def lease_timeout + 1.hour + end + end +end diff --git a/app/workers/object_pool/join_worker.rb b/app/workers/object_pool/join_worker.rb new file mode 100644 index 00000000000..07676011b2a --- /dev/null +++ b/app/workers/object_pool/join_worker.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module ObjectPool + class JoinWorker + include ApplicationWorker + include ObjectPoolQueue + + def perform(pool_id, project_id) + pool = PoolRepository.find_by_id(pool_id) + return unless pool&.joinable? + + project = Project.find_by_id(project_id) + return unless project + + pool.link_repository(project.repository) + + Projects::HousekeepingService.new(project).execute + end + end +end diff --git a/app/workers/object_pool/schedule_join_worker.rb b/app/workers/object_pool/schedule_join_worker.rb new file mode 100644 index 00000000000..647a8b72435 --- /dev/null +++ b/app/workers/object_pool/schedule_join_worker.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module ObjectPool + class ScheduleJoinWorker + include ApplicationWorker + include ObjectPoolQueue + + def perform(pool_id) + pool = PoolRepository.find_by_id(pool_id) + return unless pool&.joinable? + + pool.member_projects.find_each do |project| + next if project.forked? && !project.import_finished? + + ObjectPool::JoinWorker.perform_async(pool.id, project.id) + end + end + end +end |