diff options
author | Stan Hu <stanhu@gmail.com> | 2019-08-16 22:53:56 +0300 |
---|---|---|
committer | Douwe Maan <douwe@gitlab.com> | 2019-08-16 22:53:56 +0300 |
commit | f14647fdae4a07c3c59665576b70f847ab866c58 (patch) | |
tree | 3dbeb5f11039722f520694041e6e161d15b628f0 /app | |
parent | b3145bc976836f4c28a2f97a57e40b9f315aa3a4 (diff) |
Expire project caches once per push instead of once per ref
Previously `ProjectCacheWorker` would be scheduled once per ref, which
would generate unnecessary I/O and load on Sidekiq, especially if many
tags or branches were pushed at once. `ProjectCacheWorker` would expire
three items:
1. Repository size: This only needs to be updated once per push.
2. Commit count: This only needs to be updated if the default branch
is updated.
3. Project method caches: This only needs to be updated if the default
branch changes, but only if certain files change (e.g. README,
CHANGELOG, etc.).
Because the third item requires looking at the actual changes in the
commit deltas, we schedule one `ProjectCacheWorker` to handle the first
two cases, and schedule a separate `ProjectCacheWorker` for the third
case if it is needed. As a result, this brings down the number of
`ProjectCacheWorker` jobs from N to 2.
Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/52046
Diffstat (limited to 'app')
-rw-r--r-- | app/models/repository.rb | 8 | ||||
-rw-r--r-- | app/services/git/base_hooks_service.rb | 12 | ||||
-rw-r--r-- | app/workers/post_receive.rb | 30 | ||||
-rw-r--r-- | app/workers/project_cache_worker.rb | 6 |
4 files changed, 41 insertions, 15 deletions
diff --git a/app/models/repository.rb b/app/models/repository.rb index 9d45a12fa6e..6f63cd32da4 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -389,11 +389,15 @@ class Repository expire_statistics_caches end - # Runs code after a repository has been created. - def after_create + def expire_status_cache expire_exists_cache expire_root_ref_cache expire_emptiness_caches + end + + # Runs code after a repository has been created. + def after_create + expire_status_cache repository_event(:create_repository) end diff --git a/app/services/git/base_hooks_service.rb b/app/services/git/base_hooks_service.rb index 1db18fcf401..3fd38444196 100644 --- a/app/services/git/base_hooks_service.rb +++ b/app/services/git/base_hooks_service.rb @@ -8,8 +8,6 @@ module Git PROCESS_COMMIT_LIMIT = 100 def execute - project.repository.after_create if project.empty_repo? - create_events create_pipelines execute_project_hooks @@ -70,11 +68,11 @@ module Git end def enqueue_invalidate_cache - ProjectCacheWorker.perform_async( - project.id, - invalidated_file_types, - [:commit_count, :repository_size] - ) + file_types = invalidated_file_types + + return unless file_types.present? + + ProjectCacheWorker.perform_async(project.id, file_types, [], false) end def base_params diff --git a/app/workers/post_receive.rb b/app/workers/post_receive.rb index 622bd6f1f48..61d34981458 100644 --- a/app/workers/post_receive.rb +++ b/app/workers/post_receive.rb @@ -42,10 +42,8 @@ class PostReceive user = identify_user(post_received) return false unless user - # Expire the branches cache so we have updated data for this push - post_received.project.repository.expire_branches_cache if post_received.includes_branches? - # We only need to expire tags once per push - post_received.project.repository.expire_caches_for_tags if post_received.includes_tags? + # We only need to expire certain caches once per push + expire_caches(post_received) post_received.enum_for(:changes_refs).with_index do |(oldrev, newrev, ref), index| service_klass = @@ -74,6 +72,30 @@ class PostReceive after_project_changes_hooks(post_received, user, refs.to_a, changes) end + # Expire the project, branch, and tag cache once per push. Schedule an + # update for the repository size and commit count if necessary. + def expire_caches(post_received) + project = post_received.project + + project.repository.expire_status_cache if project.empty_repo? + project.repository.expire_branches_cache if post_received.includes_branches? + project.repository.expire_caches_for_tags if post_received.includes_tags? + + enqueue_repository_cache_update(post_received) + end + + def enqueue_repository_cache_update(post_received) + stats_to_invalidate = [:repository_size] + stats_to_invalidate << :commit_count if post_received.includes_default_branch? + + ProjectCacheWorker.perform_async( + post_received.project.id, + [], + stats_to_invalidate, + true + ) + end + def after_project_changes_hooks(post_received, user, refs, changes) hook_data = Gitlab::DataBuilder::Repository.update(post_received.project, user, changes, refs) SystemHooksService.new.execute_hooks(hook_data, :repository_update_hooks) diff --git a/app/workers/project_cache_worker.rb b/app/workers/project_cache_worker.rb index 4e8ea903139..5ac860c93e0 100644 --- a/app/workers/project_cache_worker.rb +++ b/app/workers/project_cache_worker.rb @@ -12,13 +12,15 @@ class ProjectCacheWorker # CHANGELOG. # statistics - An Array containing columns from ProjectStatistics to # refresh, if empty all columns will be refreshed + # refresh_statistics - A boolean that determines whether project statistics should + # be updated. # rubocop: disable CodeReuse/ActiveRecord - def perform(project_id, files = [], statistics = []) + def perform(project_id, files = [], statistics = [], refresh_statistics = true) project = Project.find_by(id: project_id) return unless project - update_statistics(project, statistics) + update_statistics(project, statistics) if refresh_statistics return unless project.repository.exists? |