From 4e2bb4e5e7df1273a4d2fdd370b6c17a27c394d8 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Mon, 12 Aug 2019 15:31:58 -0700 Subject: Reduce Gitaly calls in PostReceive This commit reduces I/O load and memory utilization during PostReceive for the common case when no project hooks or services are set up. We saw a Gitaly N+1 issue in `CommitDelta` when many tags or branches are pushed. We can reduce this overhead in the common case because we observe that most new projects do not have any Web hooks or services, especially when they are first created. Previously, `BaseHooksService` unconditionally iterated through the last 20 commits of each ref to build the `push_data` structure. The `push_data` structured was used in numerous places: 1. Building the push payload in `EventCreateService` 2. Creating a CI pipeline 3. Executing project Web or system hooks 4. Executing project services 5. As the return value of `BaseHooksService#execute` 6. `BranchHooksService#invalidated_file_types` We only need to generate the full `push_data` for items 3, 4, and 6. Item 1: `EventCreateService` only needs the last commit and doesn't actually need the commit deltas. Item 2: In addition, `Ci::CreatePipelineService` only needed a subset of the parameters. Item 5: The return value of `BaseHooksService#execute` also wasn't being used anywhere. Item 6: This is only used when pushing to the default branch, so if many tags are pushed we can save significant I/O here. Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/65878 Fic --- app/services/git/base_hooks_service.rb | 45 +++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 12 deletions(-) (limited to 'app/services/git') diff --git a/app/services/git/base_hooks_service.rb b/app/services/git/base_hooks_service.rb index d30df34e54b..1db18fcf401 100644 --- a/app/services/git/base_hooks_service.rb +++ b/app/services/git/base_hooks_service.rb @@ -19,7 +19,7 @@ module Git update_remote_mirrors - push_data + success end private @@ -33,7 +33,7 @@ module Git end def limited_commits - commits.last(PROCESS_COMMIT_LIMIT) + @limited_commits ||= commits.last(PROCESS_COMMIT_LIMIT) end def commits_count @@ -48,21 +48,25 @@ module Git [] end + # Push events in the activity feed only show information for the + # last commit. def create_events - EventCreateService.new.push(project, current_user, push_data) + EventCreateService.new.push(project, current_user, event_push_data) end def create_pipelines return unless params.fetch(:create_pipelines, true) Ci::CreatePipelineService - .new(project, current_user, push_data) + .new(project, current_user, base_params) .execute(:push, pipeline_options) end def execute_project_hooks - project.execute_hooks(push_data, hook_name) - project.execute_services(push_data, hook_name) + # Creating push_data invokes one CommitDelta RPC per commit. Only + # build this data if we actually need it. + project.execute_hooks(push_data, hook_name) if project.has_active_hooks?(hook_name) + project.execute_services(push_data, hook_name) if project.has_active_services?(hook_name) end def enqueue_invalidate_cache @@ -73,18 +77,35 @@ module Git ) end - def push_data - @push_data ||= Gitlab::DataBuilder::Push.build( - project: project, - user: current_user, + def base_params + { oldrev: params[:oldrev], newrev: params[:newrev], ref: params[:ref], - commits: limited_commits, + push_options: params[:push_options] || {} + } + end + + def push_data_params(commits:, with_changed_files: true) + base_params.merge( + project: project, + user: current_user, + commits: commits, message: event_message, commits_count: commits_count, - push_options: params[:push_options] || {} + with_changed_files: with_changed_files ) + end + + def event_push_data + # We only need the last commit for the event push, and we don't + # need the full deltas either. + @event_push_data ||= Gitlab::DataBuilder::Push.build( + push_data_params(commits: commits.last, with_changed_files: false)) + end + + def push_data + @push_data ||= Gitlab::DataBuilder::Push.build(push_data_params(commits: limited_commits)) # Dependent code may modify the push data, so return a duplicate each time @push_data.dup -- cgit v1.2.3