diff options
author | Robert Speicher <robert@gitlab.com> | 2016-06-17 19:36:22 +0300 |
---|---|---|
committer | Robert Speicher <rspeicher@gmail.com> | 2016-06-17 21:14:25 +0300 |
commit | 432347426c577781c22e0358e739206230c33e91 (patch) | |
tree | 82b9b78b094731e410b24c65eedf7c76c8894a08 /lib | |
parent | 4a1b42b0c05e27062c9068948971c249ea18c4d4 (diff) |
Merge branch 'update-column-in-batches-where' into 'master'
Allow customising of queries used for `update_column_in_batches`
This MR makes two changes to `add_column_with_default` and `update_column_in_batches`:
1. `add_column_with_default` no longer wraps the entire set of updates in a single transaction, preventing any locks from sticking around for the duration of the entire transaction
2. `update_column_in_batches` now takes a block which can be used to customise the queries. This uses Arel as messing with raw SQL strings is a total pain
In !4381 there's a need for updating existing rows/columns in a table in batches using a custom `WHERE` condition. Without the changes in this MR this would not be possible.
See merge request !4680
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/database/migration_helpers.rb | 119 |
1 files changed, 67 insertions, 52 deletions
diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index dd3ff0ab18b..dec20d8659b 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -28,65 +28,79 @@ module Gitlab # Updates the value of a column in batches. # # This method updates the table in batches of 5% of the total row count. - # Any data inserted while running this method (or after it has finished - # running) is _not_ updated automatically. + # This method will continue updating rows until no rows remain. + # + # When given a block this method will yield two values to the block: + # + # 1. An instance of `Arel::Table` for the table that is being updated. + # 2. The query to run as an Arel object. + # + # By supplying a block one can add extra conditions to the queries being + # executed. Note that the same block is used for _all_ queries. + # + # Example: + # + # update_column_in_batches(:projects, :foo, 10) do |table, query| + # query.where(table[:some_column].eq('hello')) + # end + # + # This would result in this method updating only rows where + # `projects.some_column` equals "hello". # # table - The name of the table. # column - The name of the column to update. # value - The value for the column. + # + # Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop + # determines this method to be too complex while there's no way to make it + # less "complex" without introducing extra methods (which actually will + # make things _more_ complex). + # + # rubocop: disable Metrics/AbcSize def update_column_in_batches(table, column, value) - quoted_table = quote_table_name(table) - quoted_column = quote_column_name(column) - - ## - # Workaround for #17711 - # - # It looks like for MySQL `ActiveRecord::Base.conntection.quote(true)` - # returns correct value (1), but `ActiveRecord::Migration.new.quote` - # returns incorrect value ('true'), which causes migrations to fail. - # - quoted_value = connection.quote(value) - processed = 0 - - total = exec_query("SELECT COUNT(*) AS count FROM #{quoted_table}"). - to_hash. - first['count']. - to_i + table = Arel::Table.new(table) + + count_arel = table.project(Arel.star.count.as('count')) + count_arel = yield table, count_arel if block_given? + + total = exec_query(count_arel.to_sql).to_hash.first['count'].to_i + + return if total == 0 # Update in batches of 5% until we run out of any rows to update. batch_size = ((total / 100.0) * 5.0).ceil + start_arel = table.project(table[:id]).order(table[:id].asc).take(1) + start_arel = yield table, start_arel if block_given? + start_id = exec_query(start_arel.to_sql).to_hash.first['id'].to_i + loop do - start_row = exec_query(%Q{ - SELECT id - FROM #{quoted_table} - ORDER BY id ASC - LIMIT 1 OFFSET #{processed} - }).to_hash.first - - # There are no more rows to process - break unless start_row - - stop_row = exec_query(%Q{ - SELECT id - FROM #{quoted_table} - ORDER BY id ASC - LIMIT 1 OFFSET #{processed + batch_size} - }).to_hash.first - - query = %Q{ - UPDATE #{quoted_table} - SET #{quoted_column} = #{quoted_value} - WHERE id >= #{start_row['id']} - } + stop_arel = table.project(table[:id]). + where(table[:id].gteq(start_id)). + order(table[:id].asc). + take(1). + skip(batch_size) + + stop_arel = yield table, stop_arel if block_given? + stop_row = exec_query(stop_arel.to_sql).to_hash.first + + update_arel = Arel::UpdateManager.new(ActiveRecord::Base). + table(table). + set([[table[column], value]]). + where(table[:id].gteq(start_id)) if stop_row - query += " AND id < #{stop_row['id']}" + stop_id = stop_row['id'].to_i + start_id = stop_id + update_arel = update_arel.where(table[:id].lt(stop_id)) end - execute(query) + update_arel = yield table, update_arel if block_given? + + execute(update_arel.to_sql) - processed += batch_size + # There are no more rows left to update. + break unless stop_row end end @@ -95,9 +109,9 @@ module Gitlab # This method runs the following steps: # # 1. Add the column with a default value of NULL. - # 2. Update all existing rows in batches. - # 3. Change the default value of the column to the specified value. - # 4. Update any remaining rows. + # 2. Change the default value of the column to the specified value. + # 3. Update all existing rows in batches. + # 4. Set a `NOT NULL` constraint on the column if desired (the default). # # These steps ensure a column can be added to a large and commonly used # table without locking the entire table for the duration of the table @@ -109,7 +123,10 @@ module Gitlab # default - The default value for the column. # allow_null - When set to `true` the column will allow NULL values, the # default is to not allow NULL values. - def add_column_with_default(table, column, type, default:, allow_null: false) + # + # This method can also take a block which is passed directly to the + # `update_column_in_batches` method. + def add_column_with_default(table, column, type, default:, allow_null: false, &block) if transaction_open? raise 'add_column_with_default can not be run inside a transaction, ' \ 'you can disable transactions by calling disable_ddl_transaction! ' \ @@ -125,11 +142,9 @@ module Gitlab end begin - transaction do - update_column_in_batches(table, column, default) + update_column_in_batches(table, column, default, &block) - change_column_null(table, column, false) unless allow_null - end + change_column_null(table, column, false) unless allow_null # We want to rescue _all_ exceptions here, even those that don't inherit # from StandardError. rescue Exception => error # rubocop: disable all |