diff options
author | Stan Hu <stanhu@gmail.com> | 2018-08-14 01:36:15 +0300 |
---|---|---|
committer | Stan Hu <stanhu@gmail.com> | 2018-08-24 15:34:38 +0300 |
commit | 0377c015cf903a1d22d14086e68d1e07fe3c2643 (patch) | |
tree | d0be528c1d6a1fe44aa937fc3aeb1076aa7e3c7c /lib/gitlab/import | |
parent | 842377ab3c5b80a3758ad8c36dc3358bd265bc10 (diff) |
Refactor GitHub Importer database helpers into helper methods
This in preparation for addressing idle-in-transaction timeouts for other importers.
Part of #50021
Diffstat (limited to 'lib/gitlab/import')
-rw-r--r-- | lib/gitlab/import/database_helpers.rb | 25 | ||||
-rw-r--r-- | lib/gitlab/import/merge_request_helpers.rb | 70 |
2 files changed, 95 insertions, 0 deletions
diff --git a/lib/gitlab/import/database_helpers.rb b/lib/gitlab/import/database_helpers.rb new file mode 100644 index 00000000000..80857061933 --- /dev/null +++ b/lib/gitlab/import/database_helpers.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Gitlab + module Import + module DatabaseHelpers + # Inserts a raw row and returns the ID of the inserted row. + # + # attributes - The attributes/columns to set. + # relation - An ActiveRecord::Relation to use for finding the ID of the row + # when using MySQL. + def insert_and_return_id(attributes, relation) + # We use bulk_insert here so we can bypass any queries executed by + # callbacks or validation rules, as doing this wouldn't scale when + # importing very large projects. + result = Gitlab::Database + .bulk_insert(relation.table_name, [attributes], return_ids: true) + + # MySQL doesn't support returning the IDs of a bulk insert in a way that + # is not a pain, so in this case we'll issue an extra query instead. + result.first || + relation.where(iid: attributes[:iid]).limit(1).pluck(:id).first + end + end + end +end diff --git a/lib/gitlab/import/merge_request_helpers.rb b/lib/gitlab/import/merge_request_helpers.rb new file mode 100644 index 00000000000..8ba70700dc1 --- /dev/null +++ b/lib/gitlab/import/merge_request_helpers.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +module Gitlab + module Import + module MergeRequestHelpers + include DatabaseHelpers + + def create_merge_request_without_hooks(project, attributes, iid) + # This work must be wrapped in a transaction as otherwise we can leave + # behind incomplete data in the event of an error. This can then lead + # to duplicate key errors when jobs are retried. + MergeRequest.transaction do + # When creating merge requests there are a lot of hooks that may + # run, for many different reasons. Many of these hooks (e.g. the + # ones used for rendering Markdown) are completely unnecessary and + # may even lead to transaction timeouts. + # + # To ensure importing pull requests has a minimal impact and can + # complete in a reasonable time we bypass all the hooks by inserting + # the row and then retrieving it. We then only perform the + # additional work that is strictly necessary. + merge_request_id = insert_and_return_id(attributes, project.merge_requests) + + merge_request = project.merge_requests.find(merge_request_id) + + # We use .insert_and_return_id which effectively disables all callbacks. + # Trigger iid logic here to make sure we track internal id values consistently. + merge_request.ensure_target_project_iid! + + [merge_request, false] + end + rescue ActiveRecord::InvalidForeignKey + # It's possible the project has been deleted since scheduling this + # job. In this case we'll just skip creating the merge request. + [] + rescue ActiveRecord::RecordNotUnique + # It's possible we previously created the MR, but failed when updating + # the Git data. In this case we'll just continue working on the + # existing row. + [project.merge_requests.find_by(iid: iid), true] + end + + def insert_or_replace_git_data(merge_request, source_branch_sha, target_branch_sha, already_exists = false) + # These fields are set so we can create the correct merge request + # diffs. + merge_request.source_branch_sha = source_branch_sha + merge_request.target_branch_sha = target_branch_sha + + merge_request.keep_around_commit + + # MR diffs normally use an "after_save" hook to pull data from Git. + # All of this happens in the transaction started by calling + # create/save/etc. This in turn can lead to these transactions being + # held open for much longer than necessary. To work around this we + # first save the diff, then populate it. + diff = + if already_exists + merge_request.merge_request_diffs.take || + merge_request.merge_request_diffs.build + else + merge_request.merge_request_diffs.build + end + + diff.importing = true + diff.save + diff.save_git_content + end + end + end +end |