diff options
author | Yorick Peterse <yorickpeterse@gmail.com> | 2018-05-07 19:22:07 +0300 |
---|---|---|
committer | Yorick Peterse <yorickpeterse@gmail.com> | 2018-05-17 14:53:00 +0300 |
commit | 19428e800895ba20eacb3357285acef8d69f6d8c (patch) | |
tree | 0f16e630b6a808b6013d463146c32a134d6ee9c0 | |
parent | 70985aa19b389c2ee8234edfbb516b5403a7bfcf (diff) |
Preload pipeline data for project pipelines
When displaying the pipelines of a project we now preload the following
data:
1. Authors of the commits that belong to these pipelines
2. The number of warnings per pipeline, which is used by
Ci::Pipeline#has_warnings?
== Commit Authors
Previously this data was queried for every Commit separately, leading to
20 SQL queries being executed in the worst case. With an average of 3 to
5 milliseconds per SQL query this could result in 100 milliseconds being
spent in _just_ getting Commit authors.
To preload this data Commit#author now uses BatchLoader (through
Commit#lazy_author), and a separate module
Gitlab::Ci::Pipeline::Preloader is used to ensure all authors are loaded
before they are used.
== Number of warnings
This changes Ci::Pipeline#has_warnings? so it supports preloading of the
number of warnings per pipeline. This removes the need for executing a
COUNT(*) query for every pipeline just to see if it has any warnings or
not.
-rw-r--r-- | app/controllers/projects/pipelines_controller.rb | 2 | ||||
-rw-r--r-- | app/models/ci/pipeline.rb | 13 | ||||
-rw-r--r-- | app/models/commit.rb | 28 | ||||
-rw-r--r-- | lib/gitlab/ci/pipeline/preloader.rb | 28 | ||||
-rw-r--r-- | spec/lib/gitlab/ci/pipeline/preloader_spec.rb | 20 | ||||
-rw-r--r-- | spec/models/ci/pipeline_spec.rb | 27 | ||||
-rw-r--r-- | spec/models/commit_spec.rb | 84 |
7 files changed, 195 insertions, 7 deletions
diff --git a/app/controllers/projects/pipelines_controller.rb b/app/controllers/projects/pipelines_controller.rb index 329aaafb76d..91afe24b707 100644 --- a/app/controllers/projects/pipelines_controller.rb +++ b/app/controllers/projects/pipelines_controller.rb @@ -23,7 +23,7 @@ class Projects::PipelinesController < Projects::ApplicationController @finished_count = limited_pipelines_count(project, 'finished') @pipelines_count = limited_pipelines_count(project) - @pipelines.map(&:commit) # List commits for batch loading + Gitlab::Ci::Pipeline::Preloader.preload(@pipelines) respond_to do |format| format.html diff --git a/app/models/ci/pipeline.rb b/app/models/ci/pipeline.rb index 1f49764e7cc..c26f0b6dcdc 100644 --- a/app/models/ci/pipeline.rb +++ b/app/models/ci/pipeline.rb @@ -406,7 +406,18 @@ module Ci end def has_warnings? - builds.latest.failed_but_allowed.any? + number_of_warnings.positive? + end + + def number_of_warnings + BatchLoader.for(id).batch(default_value: 0) do |pipeline_ids, loader| + Build.where(commit_id: pipeline_ids) + .latest + .failed_but_allowed + .group(:commit_id) + .count + .each { |id, amount| loader.call(id, amount) } + end end def set_config_source diff --git a/app/models/commit.rb b/app/models/commit.rb index b46f9f34689..56d4c86774e 100644 --- a/app/models/commit.rb +++ b/app/models/commit.rb @@ -224,8 +224,34 @@ class Commit Gitlab::ClosingIssueExtractor.new(project, current_user).closed_by_message(safe_message) end + def lazy_author + BatchLoader.for(author_email.downcase).batch do |emails, loader| + # A Hash that maps user Emails to the corresponding User objects. The + # Emails at this point are the _primary_ Emails of the Users. + users_for_emails = User + .by_any_email(emails) + .each_with_object({}) { |user, hash| hash[user.email] = user } + + users_for_ids = users_for_emails + .values + .each_with_object({}) { |user, hash| hash[user.id] = user } + + # Some commits may have used an alternative Email address. In this case we + # need to query the "emails" table to map those addresses to User objects. + Email + .where(email: emails - users_for_emails.keys) + .pluck(:email, :user_id) + .each { |(email, id)| users_for_emails[email] = users_for_ids[id] } + + users_for_emails.each { |email, user| loader.call(email, user) } + end + end + def author - User.find_by_any_email(author_email.downcase) + # We use __sync so that we get the actual objects back (including an actual + # nil), instead of a wrapper, as returning a wrapped nil breaks a lot of + # code. + lazy_author.__sync end request_cache(:author) { author_email.downcase } diff --git a/lib/gitlab/ci/pipeline/preloader.rb b/lib/gitlab/ci/pipeline/preloader.rb new file mode 100644 index 00000000000..e7a2e5511cf --- /dev/null +++ b/lib/gitlab/ci/pipeline/preloader.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Gitlab + module Ci + module Pipeline + # Class for preloading data associated with pipelines such as commit + # authors. + module Preloader + def self.preload(pipelines) + # This ensures that all the pipeline commits are eager loaded before we + # start using them. + pipelines.each(&:commit) + + pipelines.each do |pipeline| + # This preloads the author of every commit. We're using "lazy_author" + # here since "author" immediately loads the data on the first call. + pipeline.commit.try(:lazy_author) + + # This preloads the number of warnings for every pipeline, ensuring + # that Ci::Pipeline#has_warnings? doesn't execute any additional + # queries. + pipeline.number_of_warnings + end + end + end + end + end +end diff --git a/spec/lib/gitlab/ci/pipeline/preloader_spec.rb b/spec/lib/gitlab/ci/pipeline/preloader_spec.rb new file mode 100644 index 00000000000..477c7477df0 --- /dev/null +++ b/spec/lib/gitlab/ci/pipeline/preloader_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Gitlab::Ci::Pipeline::Preloader do + describe '.preload' do + it 'preloads the author of every pipeline commit' do + commit = double(:commit) + pipeline = double(:pipeline, commit: commit) + + expect(commit) + .to receive(:lazy_author) + + expect(pipeline) + .to receive(:number_of_warnings) + + described_class.preload([pipeline]) + end + end +end diff --git a/spec/models/ci/pipeline_spec.rb b/spec/models/ci/pipeline_spec.rb index ddd66a6be87..e7845b693a1 100644 --- a/spec/models/ci/pipeline_spec.rb +++ b/spec/models/ci/pipeline_spec.rb @@ -774,6 +774,33 @@ describe Ci::Pipeline, :mailer do end end + describe '#number_of_warnings' do + it 'returns the number of warnings' do + create(:ci_build, :allowed_to_fail, :failed, pipeline: pipeline, name: 'rubocop') + + expect(pipeline.number_of_warnings).to eq(1) + end + + it 'supports eager loading of the number of warnings' do + pipeline2 = create(:ci_empty_pipeline, status: :created, project: project) + + create(:ci_build, :allowed_to_fail, :failed, pipeline: pipeline, name: 'rubocop') + create(:ci_build, :allowed_to_fail, :failed, pipeline: pipeline2, name: 'rubocop') + + pipelines = project.pipelines.to_a + + pipelines.each(&:number_of_warnings) + + # To run the queries we need to actually use the lazy objects, which we do + # by just sending "to_i" to them. + amount = ActiveRecord::QueryRecorder + .new { pipelines.each { |p| p.number_of_warnings.to_i } } + .count + + expect(amount).to eq(1) + end + end + shared_context 'with some outdated pipelines' do before do create_pipeline(:canceled, 'ref', 'A', project) diff --git a/spec/models/commit_spec.rb b/spec/models/commit_spec.rb index 448b813c2e1..090f91168ad 100644 --- a/spec/models/commit_spec.rb +++ b/spec/models/commit_spec.rb @@ -52,22 +52,98 @@ describe Commit do end end - describe '#author' do + describe '#author', :request_store do it 'looks up the author in a case-insensitive way' do user = create(:user, email: commit.author_email.upcase) expect(commit.author).to eq(user) end - it 'caches the author', :request_store do + it 'caches the author' do user = create(:user, email: commit.author_email) - expect(User).to receive(:find_by_any_email).and_call_original expect(commit.author).to eq(user) + key = "Commit:author:#{commit.author_email.downcase}" - expect(RequestStore.store[key]).to eq(user) + expect(RequestStore.store[key]).to eq(user) expect(commit.author).to eq(user) end + + context 'using eager loading' do + let!(:alice) { create(:user, email: 'alice@example.com') } + let!(:bob) { create(:user, email: 'hunter2@example.com') } + + let(:alice_commit) do + described_class.new(RepoHelpers.sample_commit, project).tap do |c| + c.author_email = 'alice@example.com' + end + end + + let(:bob_commit) do + # The commit for Bob uses one of his alternative Emails, instead of the + # primary one. + described_class.new(RepoHelpers.sample_commit, project).tap do |c| + c.author_email = 'bob@example.com' + end + end + + let(:eve_commit) do + described_class.new(RepoHelpers.sample_commit, project).tap do |c| + c.author_email = 'eve@example.com' + end + end + + let!(:commits) { [alice_commit, bob_commit, eve_commit] } + + before do + create(:email, user: bob, email: 'bob@example.com') + end + + it 'executes only two SQL queries' do + recorder = ActiveRecord::QueryRecorder.new do + # Running this first ensures we don't run one query for every + # commit. + commits.each(&:lazy_author) + + # This forces the execution of the SQL queries necessary to load the + # data. + commits.each { |c| c.author.try(:id) } + end + + expect(recorder.count).to eq(2) + end + + it "preloads the authors for Commits matching a user's primary Email" do + commits.each(&:lazy_author) + + expect(alice_commit.author).to eq(alice) + end + + it "preloads the authors for Commits using a User's alternative Email" do + commits.each(&:lazy_author) + + expect(bob_commit.author).to eq(bob) + end + + it 'sets the author to Nil if an author could not be found for a Commit' do + commits.each(&:lazy_author) + + expect(eve_commit.author).to be_nil + end + + it 'does not execute SQL queries once the authors are preloaded' do + commits.each(&:lazy_author) + commits.each { |c| c.author.try(:id) } + + recorder = ActiveRecord::QueryRecorder.new do + alice_commit.author + bob_commit.author + eve_commit.author + end + + expect(recorder.count).to be_zero + end + end end describe '#to_reference' do |