Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Lopez <james@jameslopez.es>2016-07-08 11:44:07 +0300
committerJames Lopez <james@jameslopez.es>2016-07-08 11:44:07 +0300
commit6d09e946d22727ce595aeb382685292a1ad8f5a8 (patch)
tree879c1fcdd1778057362a501cc07c452417b459e4
parent86d83a3a85dfc252ee5efb8cc630e1a4295adb93 (diff)
import_url migration performance improvements
Nullifying empty import_urls upfront so the number of projects with import_url not NULL decreases to 1/5. Also, now processing batches in blocks of 1000, with a threaded process - a bit experimental.
-rw-r--r--db/migrate/20160620110927_fix_no_validatable_import_url.rb34
1 files changed, 28 insertions, 6 deletions
diff --git a/db/migrate/20160620110927_fix_no_validatable_import_url.rb b/db/migrate/20160620110927_fix_no_validatable_import_url.rb
index 82a616c62d9..02ff1962e3f 100644
--- a/db/migrate/20160620110927_fix_no_validatable_import_url.rb
+++ b/db/migrate/20160620110927_fix_no_validatable_import_url.rb
@@ -11,7 +11,7 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration
attr_reader :results, :query
- def initialize(batch_size: 100, query:)
+ def initialize(batch_size: 1000, query:)
@offset = 0
@batch_size = batch_size
@query = query
@@ -58,22 +58,40 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration
return
end
+ say('Nullifying empty import URLs')
+
+ nullify_empty_urls
+
say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.')
- invalid_import_url_project_ids.each { |project_id| cleanup_import_url(project_id) }
+ process_invalid_import_urls
end
- def invalid_import_url_project_ids
- ids = []
+ def process_invalid_import_urls
+ @threads = []
batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL")
while batches.next?
+ project_ids = []
+
batches.results.each do |result|
- ids << result['id'] unless valid_url?(result['import_url'])
+ project_ids << result['id'] unless valid_url?(result['import_url'])
end
+
+ process_batch(project_ids)
end
- ids
+ @threads.each(&:join)
+ end
+
+ def process_batch(project_ids)
+ @threads << Thread.new do
+ begin
+ project_ids.each { |project_id| cleanup_import_url(project_id) }
+ ensure
+ ActiveRecord::Base.connection.close
+ end
+ end
end
def valid_url?(url)
@@ -83,4 +101,8 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration
def cleanup_import_url(project_id)
execute("UPDATE projects SET import_url = NULL WHERE id = #{project_id}")
end
+
+ def nullify_empty_urls
+ execute("UPDATE projects SET import_url = NULL WHERE import_url = ''")
+ end
end