From 2cd98a638fe2700f4cf1bed1160717b269ce7856 Mon Sep 17 00:00:00 2001 From: Sean McGivern Date: Wed, 15 Nov 2017 17:22:18 +0000 Subject: Use latest_merge_request_diff association Compared to the merge_request_diff association: 1. It's simpler to query. The query uses a foreign key to the merge_request_diffs table, so no ordering is necessary. 2. It's faster for preloading. The merge_request_diff association has to load every diff for the MRs in the set, then discard all but the most recent for each. This association means that Rails can just query for N diffs from N MRs. 3. It's more complicated to update. This is a bidirectional foreign key, so we need to update two tables when adding a diff record. This also means we need to handle this as a special case when importing a GitLab project. There is some juggling with this association in the merge request model: * `MergeRequest#latest_merge_request_diff` is _always_ the latest diff. * `MergeRequest#merge_request_diff` reuses `MergeRequest#latest_merge_request_diff` unless: * Arguments are passed. These are typically to force-reload the association. * It doesn't exist. That means we might be trying to implicitly create a diff. This only seems to happen in specs. * The association is already loaded. This is important for the reasons explained in the comment, which I'll reiterate here: if we a) load a non-latest diff, then b) get its `merge_request`, then c) get that MR's `merge_request_diff`, we should get the diff we loaded in c), even though that's not the latest diff. Basically, `MergeRequest#merge_request_diff` is the latest diff in most cases, but not quite all. --- app/controllers/concerns/issuable_collections.rb | 2 +- app/models/ci/build.rb | 2 +- app/models/concerns/manual_inverse_association.rb | 17 ++++++++ app/models/merge_request.rb | 33 ++++++++++++++ app/models/merge_request_diff.rb | 5 ++- app/services/merge_requests/refresh_service.rb | 2 +- .../use-merge-requests-diff-id-column.yml | 5 +++ ...quests_latest_merge_request_diff_id_take_two.rb | 29 ++++++++++++ db/schema.rb | 2 +- lib/api/merge_requests.rb | 2 +- lib/gitlab/import_export/project_tree_restorer.rb | 6 +++ spec/lib/gitlab/import_export/all_models.yml | 1 + .../import_export/project_tree_restorer_spec.rb | 6 +++ .../concerns/manual_inverse_association_spec.rb | 51 ++++++++++++++++++++++ spec/models/merge_request_diff_spec.rb | 6 +-- spec/models/merge_request_spec.rb | 37 ++++++++++++++++ 16 files changed, 197 insertions(+), 9 deletions(-) create mode 100644 app/models/concerns/manual_inverse_association.rb create mode 100644 changelogs/unreleased/use-merge-requests-diff-id-column.yml create mode 100644 db/migrate/20171115164540_populate_merge_requests_latest_merge_request_diff_id_take_two.rb create mode 100644 spec/models/concerns/manual_inverse_association_spec.rb diff --git a/app/controllers/concerns/issuable_collections.rb b/app/controllers/concerns/issuable_collections.rb index 2b011bc87b0..f3c9251225f 100644 --- a/app/controllers/concerns/issuable_collections.rb +++ b/app/controllers/concerns/issuable_collections.rb @@ -150,7 +150,7 @@ module IssuableCollections when 'MergeRequest' [ :source_project, :target_project, :author, :assignee, :labels, :milestone, - head_pipeline: :project, target_project: :namespace, merge_request_diff: :merge_request_diff_commits + head_pipeline: :project, target_project: :namespace, latest_merge_request_diff: :merge_request_diff_commits ] end end diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index 1b2b0d17910..a73e2882b27 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -243,7 +243,7 @@ module Ci @merge_request ||= begin - merge_requests = MergeRequest.includes(:merge_request_diff) + merge_requests = MergeRequest.includes(:latest_merge_request_diff) .where(source_branch: ref, source_project: pipeline.project) .reorder(iid: :desc) diff --git a/app/models/concerns/manual_inverse_association.rb b/app/models/concerns/manual_inverse_association.rb new file mode 100644 index 00000000000..0fca8feaf89 --- /dev/null +++ b/app/models/concerns/manual_inverse_association.rb @@ -0,0 +1,17 @@ +module ManualInverseAssociation + extend ActiveSupport::Concern + + module ClassMethods + def manual_inverse_association(association, inverse) + define_method(association) do |*args| + super(*args).tap do |value| + next unless value + + child_association = value.association(inverse) + child_association.set_inverse_instance(self) + child_association.target = self + end + end + end + end +end diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index f1a5cc73e83..1cbf27e0bb5 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -5,6 +5,7 @@ class MergeRequest < ActiveRecord::Base include Referable include IgnorableColumn include TimeTrackable + include ManualInverseAssociation ignore_column :locked_at, :ref_fetched @@ -14,9 +15,28 @@ class MergeRequest < ActiveRecord::Base belongs_to :merge_user, class_name: "User" has_many :merge_request_diffs + has_one :merge_request_diff, -> { order('merge_request_diffs.id DESC') }, inverse_of: :merge_request + belongs_to :latest_merge_request_diff, class_name: 'MergeRequestDiff' + manual_inverse_association :latest_merge_request_diff, :merge_request + + # We don't want to perform another query if: + # 1. There are no arguments. + # 2. This association isn't already loaded. + # 3. The latest diff exists. + # + # The second one in particular is important - MergeRequestDiff#merge_request + # is the inverse of MergeRequest#merge_request_diff, which means it may not be + # the latest diff, because we could have loaded any diff from this particular + # MR. If we haven't already loaded a diff, then it's fine to load the latest. + def merge_request_diff(*args) + fallback = args.empty? && !association(:merge_request_diff).loaded? && latest_merge_request_diff + + fallback || super + end + belongs_to :head_pipeline, foreign_key: "head_pipeline_id", class_name: "Ci::Pipeline" has_many :events, as: :target, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent @@ -167,6 +187,19 @@ class MergeRequest < ActiveRecord::Base where("merge_requests.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection end + def self.set_latest_merge_request_diff_ids(relation) + update = ' + latest_merge_request_diff_id = ( + SELECT MAX(id) + FROM merge_request_diffs + WHERE merge_requests.id = merge_request_diffs.merge_request_id + )'.squish + + relation.in_batches do |batch| + batch.update_all(update) + end + end + WIP_REGEX = /\A\s*(\[WIP\]\s*|WIP:\s*|WIP\s+)+\s*/i.freeze def self.work_in_progress?(title) diff --git a/app/models/merge_request_diff.rb b/app/models/merge_request_diff.rb index 1eda0f9cbbd..2883e5bfcd0 100644 --- a/app/models/merge_request_diff.rb +++ b/app/models/merge_request_diff.rb @@ -2,6 +2,7 @@ class MergeRequestDiff < ActiveRecord::Base include Sortable include Importable include Gitlab::EncodingHelper + include ManualInverseAssociation # Prevent store of diff if commits amount more then 500 COMMITS_SAFE_SIZE = 100 @@ -10,6 +11,8 @@ class MergeRequestDiff < ActiveRecord::Base VALID_CLASSES = [Hash, Rugged::Patch, Rugged::Diff::Delta].freeze belongs_to :merge_request + manual_inverse_association :merge_request, :merge_request_diff + has_many :merge_request_diff_files, -> { order(:merge_request_diff_id, :relative_order) } has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) } @@ -194,7 +197,7 @@ class MergeRequestDiff < ActiveRecord::Base end def latest? - self == merge_request.merge_request_diff + self == merge_request.latest_merge_request_diff end def compare_with(sha) diff --git a/app/services/merge_requests/refresh_service.rb b/app/services/merge_requests/refresh_service.rb index fc100580c4f..bf3d4855122 100644 --- a/app/services/merge_requests/refresh_service.rb +++ b/app/services/merge_requests/refresh_service.rb @@ -35,7 +35,7 @@ module MergeRequests # target branch manually def close_merge_requests commit_ids = @commits.map(&:id) - merge_requests = @project.merge_requests.preload(:merge_request_diff).opened.where(target_branch: @branch_name).to_a + merge_requests = @project.merge_requests.preload(:latest_merge_request_diff).opened.where(target_branch: @branch_name).to_a merge_requests = merge_requests.select(&:diff_head_commit) merge_requests = merge_requests.select do |merge_request| diff --git a/changelogs/unreleased/use-merge-requests-diff-id-column.yml b/changelogs/unreleased/use-merge-requests-diff-id-column.yml new file mode 100644 index 00000000000..da4106ec8cf --- /dev/null +++ b/changelogs/unreleased/use-merge-requests-diff-id-column.yml @@ -0,0 +1,5 @@ +--- +title: Make finding most recent merge request diffs more efficient +merge_request: +author: +type: performance diff --git a/db/migrate/20171115164540_populate_merge_requests_latest_merge_request_diff_id_take_two.rb b/db/migrate/20171115164540_populate_merge_requests_latest_merge_request_diff_id_take_two.rb new file mode 100644 index 00000000000..6bbb8d3b8d1 --- /dev/null +++ b/db/migrate/20171115164540_populate_merge_requests_latest_merge_request_diff_id_take_two.rb @@ -0,0 +1,29 @@ +# This is identical to PopulateMergeRequestsLatestMergeRequestDiffId, which +# already has specs. +class PopulateMergeRequestsLatestMergeRequestDiffIdTakeTwo < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + BATCH_SIZE = 1_000 + + class MergeRequest < ActiveRecord::Base + self.table_name = 'merge_requests' + + include ::EachBatch + end + + disable_ddl_transaction! + + def up + update = ' + latest_merge_request_diff_id = ( + SELECT MAX(id) + FROM merge_request_diffs + WHERE merge_requests.id = merge_request_diffs.merge_request_id + )'.squish + + MergeRequest.where(latest_merge_request_diff_id: nil).each_batch(of: BATCH_SIZE) do |relation| + relation.update_all(update) + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 37e08d453c8..e8caf37e99f 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20171106180641) do +ActiveRecord::Schema.define(version: 20171115164540) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" diff --git a/lib/api/merge_requests.rb b/lib/api/merge_requests.rb index 726f09e3669..5b4642a2f57 100644 --- a/lib/api/merge_requests.rb +++ b/lib/api/merge_requests.rb @@ -21,7 +21,7 @@ module API return merge_requests if args[:view] == 'simple' merge_requests - .preload(:notes, :author, :assignee, :milestone, :merge_request_diff, :labels, :timelogs) + .preload(:notes, :author, :assignee, :milestone, :latest_merge_request_diff, :labels, :timelogs) end params :merge_requests_params do diff --git a/lib/gitlab/import_export/project_tree_restorer.rb b/lib/gitlab/import_export/project_tree_restorer.rb index 639f4f0c3f0..94bd71f2563 100644 --- a/lib/gitlab/import_export/project_tree_restorer.rb +++ b/lib/gitlab/import_export/project_tree_restorer.rb @@ -60,9 +60,15 @@ module Gitlab end end + restore_latest_merge_request_diff_ids + @saved end + def restore_latest_merge_request_diff_ids + MergeRequest.set_latest_merge_request_diff_ids(@project.merge_requests) + end + def save_relation_hash(relation_hash_batch, relation_key) relation_hash = create_relation(relation_key, relation_hash_batch) diff --git a/spec/lib/gitlab/import_export/all_models.yml b/spec/lib/gitlab/import_export/all_models.yml index bf1e97654e5..0ecb50f7110 100644 --- a/spec/lib/gitlab/import_export/all_models.yml +++ b/spec/lib/gitlab/import_export/all_models.yml @@ -88,6 +88,7 @@ merge_requests: - metrics - timelogs - head_pipeline +- latest_merge_request_diff merge_request_diff: - merge_request - merge_request_diff_commits diff --git a/spec/lib/gitlab/import_export/project_tree_restorer_spec.rb b/spec/lib/gitlab/import_export/project_tree_restorer_spec.rb index e4b4cf5ba85..5f6db0b3dfd 100644 --- a/spec/lib/gitlab/import_export/project_tree_restorer_spec.rb +++ b/spec/lib/gitlab/import_export/project_tree_restorer_spec.rb @@ -117,6 +117,12 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do expect(st_commits.first[:committed_date]).to be_kind_of(Time) end + it 'has the correct data for merge request latest_merge_request_diff' do + MergeRequest.find_each do |merge_request| + expect(merge_request.latest_merge_request_diff_id).to eq(merge_request.merge_request_diffs.maximum(:id)) + end + end + it 'has labels associated to label links, associated to issues' do expect(Label.first.label_links.first.target).not_to be_nil end diff --git a/spec/models/concerns/manual_inverse_association_spec.rb b/spec/models/concerns/manual_inverse_association_spec.rb new file mode 100644 index 00000000000..aad40883854 --- /dev/null +++ b/spec/models/concerns/manual_inverse_association_spec.rb @@ -0,0 +1,51 @@ +require 'spec_helper' + +describe ManualInverseAssociation do + let(:model) do + Class.new(MergeRequest) do + belongs_to :manual_association, class_name: 'MergeRequestDiff', foreign_key: :latest_merge_request_diff_id + manual_inverse_association :manual_association, :merge_request + end + end + + before do + stub_const("#{described_class}::Model", model) + end + + let(:instance) { create(:merge_request).becomes(model) } + + describe '.manual_inverse_association' do + context 'when the relation exists' do + before do + instance.create_merge_request_diff + instance.reload + end + + it 'loads the relation' do + expect(instance.manual_association).to be_an_instance_of(MergeRequestDiff) + end + + it 'does not perform extra queries after loading' do + instance.manual_association + + expect { instance.manual_association.merge_request } + .not_to exceed_query_limit(0) + end + + it 'passes arguments to the default association method, to allow reloading' do + query_count = ActiveRecord::QueryRecorder.new do + instance.manual_association + instance.manual_association(true) + end.count + + expect(query_count).to eq(2) + end + end + + context 'when the relation does not return a value' do + it 'does not try to set an inverse' do + expect(instance.manual_association).to be_nil + end + end + end +end diff --git a/spec/models/merge_request_diff_spec.rb b/spec/models/merge_request_diff_spec.rb index 0cfaa17676e..e2a9233a496 100644 --- a/spec/models/merge_request_diff_spec.rb +++ b/spec/models/merge_request_diff_spec.rb @@ -18,8 +18,8 @@ describe MergeRequestDiff do let!(:first_diff) { mr.merge_request_diff } let!(:last_diff) { mr.create_merge_request_diff } - it { expect(last_diff.latest?).to be_truthy } - it { expect(first_diff.latest?).to be_falsey } + it { expect(last_diff.reload).to be_latest } + it { expect(first_diff.reload).not_to be_latest } end describe '#diffs' do @@ -29,7 +29,7 @@ describe MergeRequestDiff do context 'when the :ignore_whitespace_change option is set' do it 'creates a new compare object instead of loading from the DB' do expect(mr_diff).not_to receive(:load_diffs) - expect(Gitlab::Git::Compare).to receive(:new).and_call_original + expect(mr_diff.compare).to receive(:diffs).and_call_original mr_diff.raw_diffs(ignore_whitespace_change: true) end diff --git a/spec/models/merge_request_spec.rb b/spec/models/merge_request_spec.rb index d250ad50713..e510cc3ba47 100644 --- a/spec/models/merge_request_spec.rb +++ b/spec/models/merge_request_spec.rb @@ -79,6 +79,43 @@ describe MergeRequest do end end + describe '.set_latest_merge_request_diff_ids' do + def create_merge_request_with_diffs(source_branch, diffs: 2) + params = { + target_project: project, + target_branch: 'master', + source_project: project, + source_branch: source_branch + } + + create(:merge_request, params).tap do |mr| + diffs.times { mr.merge_request_diffs.create } + end + end + + let(:project) { create(:project) } + + it 'sets IDs for merge requests, whether they are already set or not' do + merge_requests = [ + create_merge_request_with_diffs('feature'), + create_merge_request_with_diffs('feature-conflict'), + create_merge_request_with_diffs('wip', diffs: 0), + create_merge_request_with_diffs('csv') + ] + + merge_requests.take(2).each do |merge_request| + merge_request.update_column(:latest_merge_request_diff_id, nil) + end + + expected = merge_requests.map do |merge_request| + merge_request.merge_request_diffs.maximum(:id) + end + + expect { MergeRequest.set_latest_merge_request_diff_ids(project.merge_requests) } + .to change { merge_requests.map { |mr| mr.reload.latest_merge_request_diff_id } }.to(expected) + end + end + describe '#target_branch_sha' do let(:project) { create(:project, :repository) } -- cgit v1.2.3