Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorRobert Speicher <robert@gitlab.com>2016-06-17 19:36:22 +0300
committerRobert Speicher <rspeicher@gmail.com>2016-06-17 21:14:25 +0300
commit432347426c577781c22e0358e739206230c33e91 (patch)
tree82b9b78b094731e410b24c65eedf7c76c8894a08 /lib
parent4a1b42b0c05e27062c9068948971c249ea18c4d4 (diff)
Merge branch 'update-column-in-batches-where' into 'master'
Allow customising of queries used for `update_column_in_batches` This MR makes two changes to `add_column_with_default` and `update_column_in_batches`: 1. `add_column_with_default` no longer wraps the entire set of updates in a single transaction, preventing any locks from sticking around for the duration of the entire transaction 2. `update_column_in_batches` now takes a block which can be used to customise the queries. This uses Arel as messing with raw SQL strings is a total pain In !4381 there's a need for updating existing rows/columns in a table in batches using a custom `WHERE` condition. Without the changes in this MR this would not be possible. See merge request !4680
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/database/migration_helpers.rb119
1 files changed, 67 insertions, 52 deletions
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index dd3ff0ab18b..dec20d8659b 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -28,65 +28,79 @@ module Gitlab
# Updates the value of a column in batches.
#
# This method updates the table in batches of 5% of the total row count.
- # Any data inserted while running this method (or after it has finished
- # running) is _not_ updated automatically.
+ # This method will continue updating rows until no rows remain.
+ #
+ # When given a block this method will yield two values to the block:
+ #
+ # 1. An instance of `Arel::Table` for the table that is being updated.
+ # 2. The query to run as an Arel object.
+ #
+ # By supplying a block one can add extra conditions to the queries being
+ # executed. Note that the same block is used for _all_ queries.
+ #
+ # Example:
+ #
+ # update_column_in_batches(:projects, :foo, 10) do |table, query|
+ # query.where(table[:some_column].eq('hello'))
+ # end
+ #
+ # This would result in this method updating only rows where
+ # `projects.some_column` equals "hello".
#
# table - The name of the table.
# column - The name of the column to update.
# value - The value for the column.
+ #
+ # Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop
+ # determines this method to be too complex while there's no way to make it
+ # less "complex" without introducing extra methods (which actually will
+ # make things _more_ complex).
+ #
+ # rubocop: disable Metrics/AbcSize
def update_column_in_batches(table, column, value)
- quoted_table = quote_table_name(table)
- quoted_column = quote_column_name(column)
-
- ##
- # Workaround for #17711
- #
- # It looks like for MySQL `ActiveRecord::Base.conntection.quote(true)`
- # returns correct value (1), but `ActiveRecord::Migration.new.quote`
- # returns incorrect value ('true'), which causes migrations to fail.
- #
- quoted_value = connection.quote(value)
- processed = 0
-
- total = exec_query("SELECT COUNT(*) AS count FROM #{quoted_table}").
- to_hash.
- first['count'].
- to_i
+ table = Arel::Table.new(table)
+
+ count_arel = table.project(Arel.star.count.as('count'))
+ count_arel = yield table, count_arel if block_given?
+
+ total = exec_query(count_arel.to_sql).to_hash.first['count'].to_i
+
+ return if total == 0
# Update in batches of 5% until we run out of any rows to update.
batch_size = ((total / 100.0) * 5.0).ceil
+ start_arel = table.project(table[:id]).order(table[:id].asc).take(1)
+ start_arel = yield table, start_arel if block_given?
+ start_id = exec_query(start_arel.to_sql).to_hash.first['id'].to_i
+
loop do
- start_row = exec_query(%Q{
- SELECT id
- FROM #{quoted_table}
- ORDER BY id ASC
- LIMIT 1 OFFSET #{processed}
- }).to_hash.first
-
- # There are no more rows to process
- break unless start_row
-
- stop_row = exec_query(%Q{
- SELECT id
- FROM #{quoted_table}
- ORDER BY id ASC
- LIMIT 1 OFFSET #{processed + batch_size}
- }).to_hash.first
-
- query = %Q{
- UPDATE #{quoted_table}
- SET #{quoted_column} = #{quoted_value}
- WHERE id >= #{start_row['id']}
- }
+ stop_arel = table.project(table[:id]).
+ where(table[:id].gteq(start_id)).
+ order(table[:id].asc).
+ take(1).
+ skip(batch_size)
+
+ stop_arel = yield table, stop_arel if block_given?
+ stop_row = exec_query(stop_arel.to_sql).to_hash.first
+
+ update_arel = Arel::UpdateManager.new(ActiveRecord::Base).
+ table(table).
+ set([[table[column], value]]).
+ where(table[:id].gteq(start_id))
if stop_row
- query += " AND id < #{stop_row['id']}"
+ stop_id = stop_row['id'].to_i
+ start_id = stop_id
+ update_arel = update_arel.where(table[:id].lt(stop_id))
end
- execute(query)
+ update_arel = yield table, update_arel if block_given?
+
+ execute(update_arel.to_sql)
- processed += batch_size
+ # There are no more rows left to update.
+ break unless stop_row
end
end
@@ -95,9 +109,9 @@ module Gitlab
# This method runs the following steps:
#
# 1. Add the column with a default value of NULL.
- # 2. Update all existing rows in batches.
- # 3. Change the default value of the column to the specified value.
- # 4. Update any remaining rows.
+ # 2. Change the default value of the column to the specified value.
+ # 3. Update all existing rows in batches.
+ # 4. Set a `NOT NULL` constraint on the column if desired (the default).
#
# These steps ensure a column can be added to a large and commonly used
# table without locking the entire table for the duration of the table
@@ -109,7 +123,10 @@ module Gitlab
# default - The default value for the column.
# allow_null - When set to `true` the column will allow NULL values, the
# default is to not allow NULL values.
- def add_column_with_default(table, column, type, default:, allow_null: false)
+ #
+ # This method can also take a block which is passed directly to the
+ # `update_column_in_batches` method.
+ def add_column_with_default(table, column, type, default:, allow_null: false, &block)
if transaction_open?
raise 'add_column_with_default can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
@@ -125,11 +142,9 @@ module Gitlab
end
begin
- transaction do
- update_column_in_batches(table, column, default)
+ update_column_in_batches(table, column, default, &block)
- change_column_null(table, column, false) unless allow_null
- end
+ change_column_null(table, column, false) unless allow_null
# We want to rescue _all_ exceptions here, even those that don't inherit
# from StandardError.
rescue Exception => error # rubocop: disable all