diff options
Diffstat (limited to 'spec/lib/gitlab/database')
18 files changed, 1393 insertions, 119 deletions
diff --git a/spec/lib/gitlab/database/background_migration/batched_migration_spec.rb b/spec/lib/gitlab/database/background_migration/batched_migration_spec.rb index 06c2bc32db3..3daed2508a2 100644 --- a/spec/lib/gitlab/database/background_migration/batched_migration_spec.rb +++ b/spec/lib/gitlab/database/background_migration/batched_migration_spec.rb @@ -59,6 +59,50 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigration, type: :m end end + describe '#pause!' do + context 'when an invalid transition is applied' do + %i[finished failed finalizing].each do |state| + it 'raises an exception' do + batched_migration = create(:batched_background_migration, state) + + expect { batched_migration.pause! }.to raise_error(StateMachines::InvalidTransition, /Cannot transition status/) + end + end + end + + context 'when a valid transition is applied' do + %i[active paused].each do |state| + it 'moves to pause' do + batched_migration = create(:batched_background_migration, state) + + expect(batched_migration.pause!).to be_truthy + end + end + end + end + + describe '#execute!' do + context 'when an invalid transition is applied' do + %i[finished finalizing].each do |state| + it 'raises an exception' do + batched_migration = create(:batched_background_migration, state) + + expect { batched_migration.execute! }.to raise_error(StateMachines::InvalidTransition, /Cannot transition status/) + end + end + end + + context 'when a valid transition is applied' do + %i[active paused failed].each do |state| + it 'moves to active' do + batched_migration = create(:batched_background_migration, state) + + expect(batched_migration.execute!).to be_truthy + end + end + end + end + describe '.valid_status' do valid_status = [:paused, :active, :finished, :failed, :finalizing] @@ -77,6 +121,16 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigration, type: :m end end + describe '.ordered_by_created_at_desc' do + let!(:migration_1) { create(:batched_background_migration, created_at: Time.zone.now - 2) } + let!(:migration_2) { create(:batched_background_migration, created_at: Time.zone.now - 1) } + let!(:migration_3) { create(:batched_background_migration, created_at: Time.zone.now - 3) } + + it 'returns batched migrations ordered by created_at (DESC)' do + expect(described_class.ordered_by_created_at_desc).to eq([migration_2, migration_1, migration_3]) + end + end + describe '.active_migration' do let(:connection) { Gitlab::Database.database_base_models[:main].connection } let!(:migration1) { create(:batched_background_migration, :finished) } @@ -620,7 +674,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigration, type: :m describe '#progress' do subject { migration.progress } - context 'when the migration is finished' do + context 'when the migration is completed' do let(:migration) do create(:batched_background_migration, :finished, total_tuple_count: 1).tap do |record| create(:batched_background_migration_job, :succeeded, batched_migration: record, batch_size: 1) @@ -632,6 +686,18 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigration, type: :m end end + context 'when the status is finished' do + let(:migration) do + create(:batched_background_migration, :finished, total_tuple_count: 100).tap do |record| + create(:batched_background_migration_job, :succeeded, batched_migration: record, batch_size: 5) + end + end + + it 'returns 100' do + expect(subject).to be 100 + end + end + context 'when the migration does not have jobs' do let(:migration) { create(:batched_background_migration, :active) } diff --git a/spec/lib/gitlab/database/batch_average_counter_spec.rb b/spec/lib/gitlab/database/batch_average_counter_spec.rb new file mode 100644 index 00000000000..43c7a1554f7 --- /dev/null +++ b/spec/lib/gitlab/database/batch_average_counter_spec.rb @@ -0,0 +1,107 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::BatchAverageCounter do + let(:model) { Issue } + let(:column) { :weight } + + let(:in_transaction) { false } + + before do + allow(model.connection).to receive(:transaction_open?).and_return(in_transaction) + end + + describe '#count' do + before do + create_list(:issue, 2, weight: 4) + create_list(:issue, 2, weight: 2) + create_list(:issue, 2, weight: 3) + end + + subject(:batch_average_counter) { described_class.new(model, column) } + + it 'returns correct average of weights' do + expect(subject.count).to eq(3.0) + end + + it 'does no raise an exception if transaction is not open' do + expect { subject.count }.not_to raise_error + end + + context 'when transaction is open' do + let(:in_transaction) { true } + + it 'raises an error' do + expect { subject.count }.to raise_error('BatchAverageCounter can not be run inside a transaction') + end + end + + context 'when batch size is small' do + let(:batch_size) { 2 } + + it 'returns correct average of weights' do + expect(subject.count(batch_size: batch_size)).to eq(3.0) + end + end + + context 'when column passed is an Arel attribute' do + let(:column) { model.arel_table[:weight] } + + it 'returns correct average of weights' do + expect(subject.count).to eq(3.0) + end + end + + context 'when column has total count of zero' do + before do + Issue.update_all(weight: nil) + end + + it 'returns the fallback value' do + expect(subject.count).to eq(-1) + end + end + + context 'when one batch has nil weights (no average)' do + before do + issues = Issue.where(weight: 4) + issues.update_all(weight: nil) + end + + let(:batch_size) { 2 } + + it 'calculates average of weights with no errors' do + expect(subject.count(batch_size: batch_size)).to eq(2.5) + end + end + + context 'when batch fetch query is cancelled' do + let(:batch_size) { 22_000 } + let(:relation) { instance_double(ActiveRecord::Relation, to_sql: batch_average_query) } + + context 'when all retries fail' do + let(:batch_average_query) { 'SELECT AVG(weight) FROM issues WHERE weight BETWEEN 2 and 5' } + let(:query_timed_out_exception) { ActiveRecord::QueryCanceled.new('query timed out') } + + before do + allow(model).to receive(:where).and_return(relation) + allow(relation).to receive(:pick).and_raise(query_timed_out_exception) + end + + it 'logs failing query' do + expect(Gitlab::AppJsonLogger).to receive(:error).with( + event: 'batch_count', + relation: model.table_name, + operation: 'average', + start: 2, + query: batch_average_query, + message: 'Query has been canceled with message: query timed out' + ) + + expect(subject.count(batch_size: batch_size)).to eq(-1) + end + end + end + end +end diff --git a/spec/lib/gitlab/database/batch_count_spec.rb b/spec/lib/gitlab/database/batch_count_spec.rb index 811d4fad95c..a87b0c1a3a8 100644 --- a/spec/lib/gitlab/database/batch_count_spec.rb +++ b/spec/lib/gitlab/database/batch_count_spec.rb @@ -86,48 +86,48 @@ RSpec.describe Gitlab::Database::BatchCount do query: batch_count_query, message: 'Query has been canceled with message: query timed out' ) - expect(subject.call(model, column, batch_size: batch_size, start: 0)).to eq(-1) + expect(subject.call(model, column, batch_size: batch_size, start: 0)).to eq(fallback) end end end describe '#batch_count' do it 'counts table' do - expect(described_class.batch_count(model)).to eq(5) + expect(described_class.batch_count(model)).to eq(model.count) end it 'counts with :id field' do - expect(described_class.batch_count(model, :id)).to eq(5) + expect(described_class.batch_count(model, :id)).to eq(model.count) end it 'counts with "id" field' do - expect(described_class.batch_count(model, 'id')).to eq(5) + expect(described_class.batch_count(model, 'id')).to eq(model.count) end it 'counts with table.id field' do - expect(described_class.batch_count(model, "#{model.table_name}.id")).to eq(5) + expect(described_class.batch_count(model, "#{model.table_name}.id")).to eq(model.count) end it 'counts with Arel column' do - expect(described_class.batch_count(model, model.arel_table[:id])).to eq(5) + expect(described_class.batch_count(model, model.arel_table[:id])).to eq(model.count) end it 'counts table with batch_size 50K' do - expect(described_class.batch_count(model, batch_size: 50_000)).to eq(5) + expect(described_class.batch_count(model, batch_size: 50_000)).to eq(model.count) end it 'will not count table with a batch size less than allowed' do expect(described_class.batch_count(model, batch_size: small_batch_size)).to eq(fallback) end - it 'counts with a small edge case batch_sizes than result' do + it 'produces the same result with different batch sizes' do stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 0) - [1, 2, 4, 5, 6].each { |i| expect(described_class.batch_count(model, batch_size: i)).to eq(5) } + [1, 2, 4, 5, 6].each { |i| expect(described_class.batch_count(model, batch_size: i)).to eq(model.count) } end it 'counts with a start and finish' do - expect(described_class.batch_count(model, start: model.minimum(:id), finish: model.maximum(:id))).to eq(5) + expect(described_class.batch_count(model, start: model.minimum(:id), finish: model.maximum(:id))).to eq(model.count) end it 'stops counting when finish value is reached' do @@ -217,6 +217,113 @@ RSpec.describe Gitlab::Database::BatchCount do end end + describe '#batch_count_with_timeout' do + it 'counts table' do + expect(described_class.batch_count_with_timeout(model)).to eq({ status: :completed, count: model.count }) + end + + it 'counts with :id field' do + expect(described_class.batch_count_with_timeout(model, :id)).to eq({ status: :completed, count: model.count }) + end + + it 'counts with "id" field' do + expect(described_class.batch_count_with_timeout(model, 'id')).to eq({ status: :completed, count: model.count }) + end + + it 'counts with table.id field' do + expect(described_class.batch_count_with_timeout(model, "#{model.table_name}.id")).to eq({ status: :completed, count: model.count }) + end + + it 'counts with Arel column' do + expect(described_class.batch_count_with_timeout(model, model.arel_table[:id])).to eq({ status: :completed, count: model.count }) + end + + it 'counts table with batch_size 50K' do + expect(described_class.batch_count_with_timeout(model, batch_size: 50_000)).to eq({ status: :completed, count: model.count }) + end + + it 'will not count table with a batch size less than allowed' do + expect(described_class.batch_count_with_timeout(model, batch_size: small_batch_size)).to eq({ status: :bad_config }) + end + + it 'produces the same result with different batch sizes' do + stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 0) + + [1, 2, 4, 5, 6].each { |i| expect(described_class.batch_count_with_timeout(model, batch_size: i)).to eq({ status: :completed, count: model.count }) } + end + + it 'counts with a start and finish' do + expect(described_class.batch_count_with_timeout(model, start: model.minimum(:id), finish: model.maximum(:id))).to eq({ status: :completed, count: model.count }) + end + + it 'stops counting when finish value is reached' do + stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 0) + + expect(described_class.batch_count_with_timeout(model, + start: model.minimum(:id), + finish: model.maximum(:id) - 1, # Do not count the last record + batch_size: model.count - 2 # Ensure there are multiple batches + )).to eq({ status: :completed, count: model.count - 1 }) + end + + it 'returns a partial count when timeout elapses' do + stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 0) + + expect(::Gitlab::Metrics::System).to receive(:monotonic_time).and_return(1, 10, 300) + + expect( + described_class.batch_count_with_timeout(model, batch_size: 1, timeout: 250.seconds) + ).to eq({ status: :timeout, partial_results: 1, continue_from: model.minimum(:id) + 1 }) + end + + it 'starts counting from a given partial result' do + expect(described_class.batch_count_with_timeout(model, partial_results: 3)).to eq({ status: :completed, count: 3 + model.count }) + end + + it_behaves_like 'when a transaction is open' do + subject { described_class.batch_count_with_timeout(model) } + end + + it_behaves_like 'when batch fetch query is canceled' do + let(:mode) { :itself } + let(:operation) { :count } + let(:operation_args) { nil } + let(:column) { nil } + let(:fallback) { { status: :cancelled } } + + subject { described_class.method(:batch_count_with_timeout) } + end + + context 'disallowed_configurations' do + include_examples 'disallowed configurations', :batch_count do + let(:args) { [Issue] } + let(:default_batch_size) { Gitlab::Database::BatchCounter::DEFAULT_BATCH_SIZE } + end + + it 'raises an error if distinct count is requested' do + expect { described_class.batch_count_with_timeout(model.distinct(column)) }.to raise_error 'Use distinct count for optimized distinct counting' + end + end + + context 'when a relation is grouped' do + let!(:one_more_issue) { create(:issue, author: user, project: model.first.project) } + + before do + stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 1) + end + + context 'count by default column' do + let(:count) do + described_class.batch_count_with_timeout(model.group(column), batch_size: 2) + end + + it 'counts grouped records' do + expect(count).to eq({ status: :completed, count: { user.id => 4, another_user.id => 2 } }) + end + end + end + end + describe '#batch_distinct_count' do it 'counts with column field' do expect(described_class.batch_distinct_count(model, column)).to eq(2) @@ -242,7 +349,7 @@ RSpec.describe Gitlab::Database::BatchCount do expect(described_class.batch_distinct_count(model, column, batch_size: small_batch_size)).to eq(fallback) end - it 'counts with a small edge case batch_sizes than result' do + it 'produces the same result with different batch sizes' do stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 0) [1, 2, 4, 5, 6].each { |i| expect(described_class.batch_distinct_count(model, column, batch_size: i)).to eq(2) } @@ -386,56 +493,18 @@ RSpec.describe Gitlab::Database::BatchCount do end describe '#batch_average' do - let(:model) { Issue } let(:column) { :weight } before do - Issue.update_all(weight: 2) - end - - it 'returns the average of values in the given column' do - expect(described_class.batch_average(model, column)).to eq(2) - end - - it 'works when given an Arel column' do - expect(described_class.batch_average(model, model.arel_table[column])).to eq(2) - end - - it 'works with a batch size of 50K' do - expect(described_class.batch_average(model, column, batch_size: 50_000)).to eq(2) - end - - it 'works with start and finish provided' do - expect(described_class.batch_average(model, column, start: model.minimum(:id), finish: model.maximum(:id))).to eq(2) + allow_next_instance_of(Gitlab::Database::BatchAverageCounter) do |instance| + allow(instance).to receive(:count).and_return + end end - it "defaults the batch size to #{Gitlab::Database::BatchCounter::DEFAULT_AVERAGE_BATCH_SIZE}" do - min_id = model.minimum(:id) - relation = instance_double(ActiveRecord::Relation) - allow(model).to receive_message_chain(:select, public_send: relation) - batch_end_id = min_id + calculate_batch_size(Gitlab::Database::BatchCounter::DEFAULT_AVERAGE_BATCH_SIZE) - - expect(relation).to receive(:where).with("id" => min_id..batch_end_id).and_return(double(send: 1)) + it 'calls BatchAverageCounter' do + expect(Gitlab::Database::BatchAverageCounter).to receive(:new).with(model, column).and_call_original described_class.batch_average(model, column) end - - it_behaves_like 'when a transaction is open' do - subject { described_class.batch_average(model, column) } - end - - it_behaves_like 'disallowed configurations', :batch_average do - let(:args) { [model, column] } - let(:default_batch_size) { Gitlab::Database::BatchCounter::DEFAULT_AVERAGE_BATCH_SIZE } - let(:small_batch_size) { Gitlab::Database::BatchCounter::DEFAULT_AVERAGE_BATCH_SIZE - 1 } - end - - it_behaves_like 'when batch fetch query is canceled' do - let(:mode) { :itself } - let(:operation) { :average } - let(:operation_args) { [column] } - - subject { described_class.method(:batch_average) } - end end end diff --git a/spec/lib/gitlab/database/lock_writes_manager_spec.rb b/spec/lib/gitlab/database/lock_writes_manager_spec.rb index eb527d492cf..b1cc8add55a 100644 --- a/spec/lib/gitlab/database/lock_writes_manager_spec.rb +++ b/spec/lib/gitlab/database/lock_writes_manager_spec.rb @@ -6,13 +6,15 @@ RSpec.describe Gitlab::Database::LockWritesManager do let(:connection) { ApplicationRecord.connection } let(:test_table) { '_test_table' } let(:logger) { instance_double(Logger) } + let(:dry_run) { false } subject(:lock_writes_manager) do described_class.new( table_name: test_table, connection: connection, database_name: 'main', - logger: logger + logger: logger, + dry_run: dry_run ) end @@ -27,6 +29,16 @@ RSpec.describe Gitlab::Database::LockWritesManager do SQL end + describe "#table_locked_for_writes?" do + it 'returns false for a table that is not locked for writes' do + expect(subject.table_locked_for_writes?(test_table)).to eq(false) + end + + it 'returns true for a table that is locked for writes' do + expect { subject.lock_writes }.to change { subject.table_locked_for_writes?(test_table) }.from(false).to(true) + end + end + describe '#lock_writes' do it 'prevents any writes on the table' do subject.lock_writes @@ -84,11 +96,57 @@ RSpec.describe Gitlab::Database::LockWritesManager do subject.lock_writes end.to raise_error(ActiveRecord::QueryCanceled) end + + it 'skips the operation if the table is already locked for writes' do + subject.lock_writes + + expect(logger).to receive(:info).with("Skipping lock_writes, because #{test_table} is already locked for writes") + expect(connection).not_to receive(:execute).with(/CREATE TRIGGER/) + + expect do + subject.lock_writes + end.not_to change { + number_of_triggers_on(connection, test_table) + } + end + + context 'when running in dry_run mode' do + let(:dry_run) { true } + + it 'prints the sql statement to the logger' do + expect(logger).to receive(:info).with("Database: 'main', Table: '#{test_table}': Lock Writes") + expected_sql_statement = <<~SQL + CREATE TRIGGER gitlab_schema_write_trigger_for_#{test_table} + BEFORE INSERT OR UPDATE OR DELETE OR TRUNCATE + ON #{test_table} + FOR EACH STATEMENT EXECUTE FUNCTION gitlab_schema_prevent_write(); + SQL + expect(logger).to receive(:info).with(expected_sql_statement) + + subject.lock_writes + end + + it 'does not lock the tables for writes' do + subject.lock_writes + + expect do + connection.execute("delete from #{test_table}") + connection.execute("truncate #{test_table}") + end.not_to raise_error + end + end end describe '#unlock_writes' do before do - subject.lock_writes + # Locking the table without the considering the value of dry_run + described_class.new( + table_name: test_table, + connection: connection, + database_name: 'main', + logger: logger, + dry_run: false + ).lock_writes end it 'allows writing on the table again' do @@ -114,6 +172,28 @@ RSpec.describe Gitlab::Database::LockWritesManager do subject.unlock_writes end + + context 'when running in dry_run mode' do + let(:dry_run) { true } + + it 'prints the sql statement to the logger' do + expect(logger).to receive(:info).with("Database: 'main', Table: '#{test_table}': Allow Writes") + expected_sql_statement = <<~SQL + DROP TRIGGER IF EXISTS gitlab_schema_write_trigger_for_#{test_table} ON #{test_table}; + SQL + expect(logger).to receive(:info).with(expected_sql_statement) + + subject.unlock_writes + end + + it 'does not unlock the tables for writes' do + subject.unlock_writes + + expect do + connection.execute("delete from #{test_table}") + end.to raise_error(ActiveRecord::StatementInvalid, /Table: "#{test_table}" is write protected/) + end + end end def number_of_triggers_on(connection, table_name) diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb index dd5ad40d8ef..d73b478ee7c 100644 --- a/spec/lib/gitlab/database/migration_helpers_spec.rb +++ b/spec/lib/gitlab/database/migration_helpers_spec.rb @@ -667,7 +667,7 @@ RSpec.describe Gitlab::Database::MigrationHelpers do column: :user_id, on_delete: :cascade, name: name, - primary_key: :id).and_return(true) + primary_key: :id).and_return(true) expect(model).not_to receive(:execute).with(/ADD CONSTRAINT/) expect(model).to receive(:execute).with(/VALIDATE CONSTRAINT/) diff --git a/spec/lib/gitlab/database/migrations/test_batched_background_runner_spec.rb b/spec/lib/gitlab/database/migrations/test_batched_background_runner_spec.rb index 9451a6bd34a..3ac483c8ab7 100644 --- a/spec/lib/gitlab/database/migrations/test_batched_background_runner_spec.rb +++ b/spec/lib/gitlab/database/migrations/test_batched_background_runner_spec.rb @@ -24,7 +24,7 @@ RSpec.describe Gitlab::Database::Migrations::TestBatchedBackgroundRunner, :freez connection.execute(<<~SQL) CREATE TABLE #{table_name} ( id bigint primary key not null, - data bigint + data bigint default 0 ); insert into #{table_name} (id) select i from generate_series(1, 1000) g(i); @@ -40,10 +40,12 @@ RSpec.describe Gitlab::Database::Migrations::TestBatchedBackgroundRunner, :freez :id, :data, batch_size: 100, job_interval: 5.minutes) # job_interval is skipped when testing - described_class.new(result_dir: result_dir, connection: connection).run_jobs(for_duration: 1.minute) - unmigrated_row_count = define_batchable_model(table_name).where('id != data').count - expect(unmigrated_row_count).to eq(0) + # Expect that running sampling for this migration processes some of the rows. Sampling doesn't run + # over every row in the table, so this does not completely migrate the table. + expect { described_class.new(result_dir: result_dir, connection: connection).run_jobs(for_duration: 1.minute) } + .to change { define_batchable_model(table_name).where('id IS DISTINCT FROM data').count } + .by_at_most(-1) end end @@ -62,7 +64,7 @@ RSpec.describe Gitlab::Database::Migrations::TestBatchedBackgroundRunner, :freez described_class.new(result_dir: result_dir, connection: connection).run_jobs(for_duration: 3.minutes) - expect(calls.count).to eq(10) # 1000 rows / batch size 100 = 10 + expect(calls).not_to be_empty end context 'with multiple jobs to run' do @@ -92,4 +94,19 @@ RSpec.describe Gitlab::Database::Migrations::TestBatchedBackgroundRunner, :freez end end end + + context 'choosing uniform batches to run' do + subject { described_class.new(result_dir: result_dir, connection: connection) } + + describe '#uniform_fractions' do + it 'generates evenly distributed sequences of fractions' do + received = subject.uniform_fractions.take(9) + expected = [0, 1, 1.0 / 2, 1.0 / 4, 3.0 / 4, 1.0 / 8, 3.0 / 8, 5.0 / 8, 7.0 / 8] + + # All the fraction numerators are small integers, and all denominators are powers of 2, so these + # fit perfectly into floating point numbers with zero loss of precision + expect(received).to eq(expected) + end + end + end end diff --git a/spec/lib/gitlab/database/partitioning/convert_table_to_first_list_partition_spec.rb b/spec/lib/gitlab/database/partitioning/convert_table_to_first_list_partition_spec.rb new file mode 100644 index 00000000000..af7d751a404 --- /dev/null +++ b/spec/lib/gitlab/database/partitioning/convert_table_to_first_list_partition_spec.rb @@ -0,0 +1,246 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Partitioning::ConvertTableToFirstListPartition do + include Gitlab::Database::DynamicModelHelpers + include Database::TableSchemaHelpers + + let(:migration_context) { Gitlab::Database::Migration[2.0].new } + + let(:connection) { migration_context.connection } + let(:table_name) { '_test_table_to_partition' } + let(:table_identifier) { "#{connection.current_schema}.#{table_name}" } + let(:partitioning_column) { :partition_number } + let(:partitioning_default) { 1 } + let(:referenced_table_name) { '_test_referenced_table' } + let(:other_referenced_table_name) { '_test_other_referenced_table' } + let(:parent_table_name) { "#{table_name}_parent" } + + let(:model) { define_batchable_model(table_name, connection: connection) } + + let(:parent_model) { define_batchable_model(parent_table_name, connection: connection) } + + let(:converter) do + described_class.new( + migration_context: migration_context, + table_name: table_name, + partitioning_column: partitioning_column, + parent_table_name: parent_table_name, + zero_partition_value: partitioning_default + ) + end + + before do + # Suppress printing migration progress + allow(migration_context).to receive(:puts) + allow(migration_context.connection).to receive(:transaction_open?).and_return(false) + + connection.execute(<<~SQL) + create table #{referenced_table_name} ( + id bigserial primary key not null + ) + SQL + + connection.execute(<<~SQL) + create table #{other_referenced_table_name} ( + id bigserial primary key not null + ) + SQL + + connection.execute(<<~SQL) + insert into #{referenced_table_name} default values; + insert into #{other_referenced_table_name} default values; + SQL + + connection.execute(<<~SQL) + create table #{table_name} ( + id bigserial not null, + #{partitioning_column} bigint not null default #{partitioning_default}, + referenced_id bigint not null references #{referenced_table_name} (id) on delete cascade, + other_referenced_id bigint not null references #{other_referenced_table_name} (id) on delete set null, + primary key (id, #{partitioning_column}) + ) + SQL + + connection.execute(<<~SQL) + insert into #{table_name} (referenced_id, other_referenced_id) + select #{referenced_table_name}.id, #{other_referenced_table_name}.id + from #{referenced_table_name}, #{other_referenced_table_name}; + SQL + end + + describe "#prepare_for_partitioning" do + subject(:prepare) { converter.prepare_for_partitioning } + + it 'adds a check constraint' do + expect { prepare }.to change { + Gitlab::Database::PostgresConstraint + .check_constraints + .by_table_identifier(table_identifier) + .count + }.from(0).to(1) + end + end + + describe '#revert_prepare_for_partitioning' do + before do + converter.prepare_for_partitioning + end + + subject(:revert_prepare) { converter.revert_preparation_for_partitioning } + + it 'removes a check constraint' do + expect { revert_prepare }.to change { + Gitlab::Database::PostgresConstraint + .check_constraints + .by_table_identifier("#{connection.current_schema}.#{table_name}") + .count + }.from(1).to(0) + end + end + + describe "#convert_to_zero_partition" do + subject(:partition) { converter.partition } + + before do + converter.prepare_for_partitioning + end + + context 'when the primary key is incorrect' do + before do + connection.execute(<<~SQL) + alter table #{table_name} drop constraint #{table_name}_pkey; + alter table #{table_name} add constraint #{table_name}_pkey PRIMARY KEY (id); + SQL + end + + it 'throws a reasonable error message' do + expect { partition }.to raise_error(described_class::UnableToPartition, /#{partitioning_column}/) + end + end + + context 'when there is not a supporting check constraint' do + before do + connection.execute(<<~SQL) + alter table #{table_name} drop constraint partitioning_constraint; + SQL + end + + it 'throws a reasonable error message' do + expect { partition }.to raise_error(described_class::UnableToPartition, /constraint /) + end + end + + it 'migrates the table to a partitioned table' do + fks_before = migration_context.foreign_keys(table_name) + + partition + + expect(Gitlab::Database::PostgresPartition.for_parent_table(parent_table_name).count).to eq(1) + expect(migration_context.foreign_keys(parent_table_name).map(&:options)).to match_array(fks_before.map(&:options)) + + connection.execute(<<~SQL) + insert into #{table_name} (referenced_id, other_referenced_id) select #{referenced_table_name}.id, #{other_referenced_table_name}.id from #{referenced_table_name}, #{other_referenced_table_name}; + SQL + + # Create a second partition + connection.execute(<<~SQL) + create table #{table_name}2 partition of #{parent_table_name} FOR VALUES IN (2) + SQL + + parent_model.create!(partitioning_column => 2, :referenced_id => 1, :other_referenced_id => 1) + expect(parent_model.pluck(:id)).to match_array([1, 2, 3]) + end + + context 'when an error occurs during the conversion' do + def fail_first_time + # We can't directly use a boolean here, as we need something that will be passed by-reference to the proc + fault_status = { faulted: false } + proc do |m, *args, **kwargs| + next m.call(*args, **kwargs) if fault_status[:faulted] + + fault_status[:faulted] = true + raise 'fault!' + end + end + + def fail_sql_matching(regex) + proc do + allow(migration_context.connection).to receive(:execute).and_call_original + allow(migration_context.connection).to receive(:execute).with(regex).and_wrap_original(&fail_first_time) + end + end + + def fail_adding_fk(from_table, to_table) + proc do + allow(migration_context.connection).to receive(:add_foreign_key).and_call_original + expect(migration_context.connection).to receive(:add_foreign_key).with(from_table, to_table, any_args) + .and_wrap_original(&fail_first_time) + end + end + + where(:case_name, :fault) do + [ + ["creating parent table", lazy { fail_sql_matching(/CREATE/i) }], + ["adding the first foreign key", lazy { fail_adding_fk(parent_table_name, referenced_table_name) }], + ["adding the second foreign key", lazy { fail_adding_fk(parent_table_name, other_referenced_table_name) }], + ["attaching table", lazy { fail_sql_matching(/ATTACH/i) }] + ] + end + + before do + # Set up the fault that we'd like to inject + fault.call + end + + with_them do + it 'recovers from a fault', :aggregate_failures do + expect { converter.partition }.to raise_error(/fault/) + expect(Gitlab::Database::PostgresPartition.for_parent_table(parent_table_name).count).to eq(0) + + expect { converter.partition }.not_to raise_error + expect(Gitlab::Database::PostgresPartition.for_parent_table(parent_table_name).count).to eq(1) + end + end + end + end + + describe '#revert_conversion_to_zero_partition' do + before do + converter.prepare_for_partitioning + converter.partition + end + + subject(:revert_conversion) { converter.revert_partitioning } + + it 'detaches the partition' do + expect { revert_conversion }.to change { + Gitlab::Database::PostgresPartition + .for_parent_table(parent_table_name).count + }.from(1).to(0) + end + + it 'does not drop the child partition' do + expect { revert_conversion }.not_to change { table_oid(table_name) } + end + + it 'removes the parent table' do + expect { revert_conversion }.to change { table_oid(parent_table_name).present? }.from(true).to(false) + end + + it 're-adds the check constraint' do + expect { revert_conversion }.to change { + Gitlab::Database::PostgresConstraint + .check_constraints + .by_table_identifier(table_identifier) + .count + }.by(1) + end + + it 'moves sequences back to the original table' do + expect { revert_conversion }.to change { converter.send(:sequences_owned_by, table_name).count }.from(0) + .and change { converter.send(:sequences_owned_by, parent_table_name).count }.to(0) + end + end +end diff --git a/spec/lib/gitlab/database/partitioning/partition_manager_spec.rb b/spec/lib/gitlab/database/partitioning/partition_manager_spec.rb index dca4548a0a3..8027990a546 100644 --- a/spec/lib/gitlab/database/partitioning/partition_manager_spec.rb +++ b/spec/lib/gitlab/database/partitioning/partition_manager_spec.rb @@ -21,20 +21,11 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionManager do let(:model) { double(partitioning_strategy: partitioning_strategy, table_name: table, connection: connection) } let(:connection) { ActiveRecord::Base.connection } - let(:table) { "issues" } + let(:table) { "my_model_example_table" } let(:partitioning_strategy) do double(missing_partitions: partitions, extra_partitions: [], after_adding_partitions: nil) end - before do - allow(connection).to receive(:table_exists?).and_call_original - allow(connection).to receive(:table_exists?).with(table).and_return(true) - allow(connection).to receive(:execute).and_call_original - expect(partitioning_strategy).to receive(:validate_and_fix) - - stub_exclusive_lease(described_class::MANAGEMENT_LEASE_KEY % table, timeout: described_class::LEASE_TIMEOUT) - end - let(:partitions) do [ instance_double(Gitlab::Database::Partitioning::TimePartition, table: 'bar', partition_name: 'foo', to_sql: "SELECT 1"), @@ -42,19 +33,63 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionManager do ] end - it 'creates the partition' do - expect(connection).to receive(:execute).with("LOCK TABLE \"#{table}\" IN ACCESS EXCLUSIVE MODE") - expect(connection).to receive(:execute).with(partitions.first.to_sql) - expect(connection).to receive(:execute).with(partitions.second.to_sql) + context 'when the given table is partitioned' do + before do + create_partitioned_table(connection, table) - sync_partitions + allow(connection).to receive(:table_exists?).and_call_original + allow(connection).to receive(:table_exists?).with(table).and_return(true) + allow(connection).to receive(:execute).and_call_original + expect(partitioning_strategy).to receive(:validate_and_fix) + + stub_exclusive_lease(described_class::MANAGEMENT_LEASE_KEY % table, timeout: described_class::LEASE_TIMEOUT) + end + + it 'creates the partition' do + expect(connection).to receive(:execute).with("LOCK TABLE \"#{table}\" IN ACCESS EXCLUSIVE MODE") + expect(connection).to receive(:execute).with(partitions.first.to_sql) + expect(connection).to receive(:execute).with(partitions.second.to_sql) + + sync_partitions + end + + context 'with eplicitly provided connection' do + let(:connection) { Ci::ApplicationRecord.connection } + + it 'uses the explicitly provided connection when any' do + skip_if_multiple_databases_not_setup + + expect(connection).to receive(:execute).with("LOCK TABLE \"#{table}\" IN ACCESS EXCLUSIVE MODE") + expect(connection).to receive(:execute).with(partitions.first.to_sql) + expect(connection).to receive(:execute).with(partitions.second.to_sql) + + described_class.new(model, connection: connection).sync_partitions + end + end + + context 'when an error occurs during partition management' do + it 'does not raise an error' do + expect(partitioning_strategy).to receive(:missing_partitions).and_raise('this should never happen (tm)') + + expect { sync_partitions }.not_to raise_error + end + end end - context 'when an error occurs during partition management' do - it 'does not raise an error' do - expect(partitioning_strategy).to receive(:missing_partitions).and_raise('this should never happen (tm)') + context 'when the table is not partitioned' do + let(:table) { 'this_does_not_need_to_be_real_table' } + + it 'does not try creating the partitions' do + expect(connection).not_to receive(:execute).with("LOCK TABLE \"#{table}\" IN ACCESS EXCLUSIVE MODE") + expect(Gitlab::AppLogger).to receive(:warn).with( + { + message: 'Skipping synching partitions', + table_name: table, + connection_name: 'main' + } + ) - expect { sync_partitions }.not_to raise_error + sync_partitions end end end @@ -74,11 +109,7 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionManager do end before do - connection.execute(<<~SQL) - CREATE TABLE my_model_example_table - (id serial not null, created_at timestamptz not null, primary key (id, created_at)) - PARTITION BY RANGE (created_at); - SQL + create_partitioned_table(connection, 'my_model_example_table') end it 'creates partitions' do @@ -98,6 +129,8 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionManager do end before do + create_partitioned_table(connection, table) + allow(connection).to receive(:table_exists?).and_call_original allow(connection).to receive(:table_exists?).with(table).and_return(true) expect(partitioning_strategy).to receive(:validate_and_fix) @@ -260,4 +293,12 @@ RSpec.describe Gitlab::Database::Partitioning::PartitionManager do expect { described_class.new(my_model).sync_partitions }.to change { has_partition(my_model, 2.months.ago.beginning_of_month) }.from(true).to(false).and(change { num_partitions(my_model) }.by(0)) end end + + def create_partitioned_table(connection, table) + connection.execute(<<~SQL) + CREATE TABLE #{table} + (id serial not null, created_at timestamptz not null, primary key (id, created_at)) + PARTITION BY RANGE (created_at); + SQL + end end diff --git a/spec/lib/gitlab/database/partitioning/sliding_list_strategy_spec.rb b/spec/lib/gitlab/database/partitioning/sliding_list_strategy_spec.rb index 04b9fba5b2f..07c2c6606d8 100644 --- a/spec/lib/gitlab/database/partitioning/sliding_list_strategy_spec.rb +++ b/spec/lib/gitlab/database/partitioning/sliding_list_strategy_spec.rb @@ -136,7 +136,7 @@ RSpec.describe Gitlab::Database::Partitioning::SlidingListStrategy do end context 'when some partitions are true for detach_partition_if' do - let(:detach_partition_if) { ->(p) { p != 5 } } + let(:detach_partition_if) { ->(p) { p.value != 5 } } it 'is the leading set of partitions before that value' do # should not contain partition 2 since it's the default value for the partition column @@ -181,9 +181,10 @@ RSpec.describe Gitlab::Database::Partitioning::SlidingListStrategy do Class.new(ApplicationRecord) do include PartitionedTable - partitioned_by :partition, strategy: :sliding_list, - next_partition_if: proc { false }, - detach_partition_if: proc { false } + partitioned_by :partition, + strategy: :sliding_list, + next_partition_if: proc { false }, + detach_partition_if: proc { false } end end.to raise_error(/ignored_columns/) end @@ -195,7 +196,8 @@ RSpec.describe Gitlab::Database::Partitioning::SlidingListStrategy do self.ignored_columns = [:partition] - partitioned_by :partition, strategy: :sliding_list, + partitioned_by :partition, + strategy: :sliding_list, next_partition_if: proc { false }, detach_partition_if: proc { false } end @@ -221,7 +223,8 @@ RSpec.describe Gitlab::Database::Partitioning::SlidingListStrategy do def self.detach_partition_if_wrapper(...) detach_partition?(...) end - partitioned_by :partition, strategy: :sliding_list, + partitioned_by :partition, + strategy: :sliding_list, next_partition_if: method(:next_partition_if_wrapper), detach_partition_if: method(:detach_partition_if_wrapper) diff --git a/spec/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table_spec.rb b/spec/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table_spec.rb index 3072c413246..1885e84ac4c 100644 --- a/spec/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table_spec.rb +++ b/spec/lib/gitlab/database/partitioning_migration_helpers/backfill_partitioned_table_spec.rb @@ -97,7 +97,8 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::BackfillPartition end it 'marks each job record as succeeded after processing' do - create(:background_migration_job, class_name: "::#{described_class.name}", + create(:background_migration_job, + class_name: "::#{described_class.name}", arguments: [source1.id, source3.id, source_table, destination_table, unique_key]) expect(::Gitlab::Database::BackgroundMigrationJob).to receive(:mark_all_as_succeeded).and_call_original @@ -108,7 +109,8 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::BackfillPartition end it 'returns the number of job records marked as succeeded' do - create(:background_migration_job, class_name: "::#{described_class.name}", + create(:background_migration_job, + class_name: "::#{described_class.name}", arguments: [source1.id, source3.id, source_table, destination_table, unique_key]) jobs_updated = backfill_job.perform(source1.id, source3.id, source_table, destination_table, unique_key) diff --git a/spec/lib/gitlab/database/partitioning_migration_helpers/index_helpers_spec.rb b/spec/lib/gitlab/database/partitioning_migration_helpers/index_helpers_spec.rb index edb8ae36c45..7465f69b87c 100644 --- a/spec/lib/gitlab/database/partitioning_migration_helpers/index_helpers_spec.rb +++ b/spec/lib/gitlab/database/partitioning_migration_helpers/index_helpers_spec.rb @@ -26,6 +26,7 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::IndexHelpers do CREATE TABLE #{table_name} ( id serial NOT NULL, created_at timestamptz NOT NULL, + updated_at timestamptz NOT NULL, PRIMARY KEY (id, created_at) ) PARTITION BY RANGE (created_at); @@ -204,4 +205,30 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::IndexHelpers do end end end + + describe '#find_duplicate_indexes' do + context 'when duplicate and non-duplicate indexes exist' do + let(:nonduplicate_column_name) { 'updated_at' } + let(:nonduplicate_index_name) { 'updated_at_idx' } + let(:duplicate_column_name) { 'created_at' } + let(:duplicate_index_name1) { 'created_at_idx' } + let(:duplicate_index_name2) { 'index_on_created_at' } + + before do + connection.execute(<<~SQL) + CREATE INDEX #{nonduplicate_index_name} ON #{table_name} (#{nonduplicate_column_name}); + CREATE INDEX #{duplicate_index_name1} ON #{table_name} (#{duplicate_column_name}); + CREATE INDEX #{duplicate_index_name2} ON #{table_name} (#{duplicate_column_name}); + SQL + end + + subject do + migration.find_duplicate_indexes(table_name) + end + + it 'finds the duplicate index' do + expect(subject).to match_array([match_array([duplicate_index_name1, duplicate_index_name2])]) + end + end + end end diff --git a/spec/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers_spec.rb b/spec/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers_spec.rb index 1026b4370a5..8bb9ad2737a 100644 --- a/spec/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers_spec.rb +++ b/spec/lib/gitlab/database/partitioning_migration_helpers/table_management_helpers_spec.rb @@ -41,6 +41,76 @@ RSpec.describe Gitlab::Database::PartitioningMigrationHelpers::TableManagementHe allow(migration).to receive(:assert_table_is_allowed) end + context 'list partitioning conversion helpers' do + shared_examples_for 'delegates to ConvertTableToFirstListPartition' do + it 'throws an error if in a transaction' do + allow(migration).to receive(:transaction_open?).and_return(true) + expect { migrate }.to raise_error(/cannot be run inside a transaction/) + end + + it 'delegates to a method on ConvertTableToFirstListPartition' do + expect_next_instance_of(Gitlab::Database::Partitioning::ConvertTableToFirstListPartition, + migration_context: migration, + table_name: source_table, + parent_table_name: partitioned_table, + partitioning_column: partition_column, + zero_partition_value: min_date) do |converter| + expect(converter).to receive(expected_method) + end + + migrate + end + end + + describe '#convert_table_to_first_list_partition' do + it_behaves_like 'delegates to ConvertTableToFirstListPartition' do + let(:expected_method) { :partition } + let(:migrate) do + migration.convert_table_to_first_list_partition(table_name: source_table, + partitioning_column: partition_column, + parent_table_name: partitioned_table, + initial_partitioning_value: min_date) + end + end + end + + describe '#revert_converting_table_to_first_list_partition' do + it_behaves_like 'delegates to ConvertTableToFirstListPartition' do + let(:expected_method) { :revert_partitioning } + let(:migrate) do + migration.revert_converting_table_to_first_list_partition(table_name: source_table, + partitioning_column: partition_column, + parent_table_name: partitioned_table, + initial_partitioning_value: min_date) + end + end + end + + describe '#prepare_constraint_for_list_partitioning' do + it_behaves_like 'delegates to ConvertTableToFirstListPartition' do + let(:expected_method) { :prepare_for_partitioning } + let(:migrate) do + migration.prepare_constraint_for_list_partitioning(table_name: source_table, + partitioning_column: partition_column, + parent_table_name: partitioned_table, + initial_partitioning_value: min_date) + end + end + end + + describe '#revert_preparing_constraint_for_list_partitioning' do + it_behaves_like 'delegates to ConvertTableToFirstListPartition' do + let(:expected_method) { :revert_preparation_for_partitioning } + let(:migrate) do + migration.revert_preparing_constraint_for_list_partitioning(table_name: source_table, + partitioning_column: partition_column, + parent_table_name: partitioned_table, + initial_partitioning_value: min_date) + end + end + end + end + describe '#partition_table_by_date' do let(:partition_column) { 'created_at' } let(:old_primary_key) { 'id' } diff --git a/spec/lib/gitlab/database/partitioning_spec.rb b/spec/lib/gitlab/database/partitioning_spec.rb index 36c8b0811fe..94cdbfb2328 100644 --- a/spec/lib/gitlab/database/partitioning_spec.rb +++ b/spec/lib/gitlab/database/partitioning_spec.rb @@ -56,7 +56,7 @@ RSpec.describe Gitlab::Database::Partitioning do end it 'does not call sync_partitions' do - expect(described_class).to receive(:sync_partitions).never + expect(described_class).not_to receive(:sync_partitions) described_class.sync_partitions_ignore_db_error end @@ -64,6 +64,7 @@ RSpec.describe Gitlab::Database::Partitioning do end describe '.sync_partitions' do + let(:ci_connection) { Ci::ApplicationRecord.connection } let(:table_names) { %w[partitioning_test1 partitioning_test2] } let(:models) do table_names.map do |table_name| @@ -94,6 +95,38 @@ RSpec.describe Gitlab::Database::Partitioning do .and change { find_partitions(table_names.last).size }.from(0) end + context 'with multiple databases' do + before do + table_names.each do |table_name| + ci_connection.execute("DROP TABLE IF EXISTS #{table_name}") + + ci_connection.execute(<<~SQL) + CREATE TABLE #{table_name} ( + id serial not null, + created_at timestamptz not null, + PRIMARY KEY (id, created_at)) + PARTITION BY RANGE (created_at); + SQL + end + end + + after do + table_names.each do |table_name| + ci_connection.execute("DROP TABLE IF EXISTS #{table_name}") + end + end + + it 'creates partitions in each database' do + skip_if_multiple_databases_not_setup + + expect { described_class.sync_partitions(models) } + .to change { find_partitions(table_names.first, conn: connection).size }.from(0) + .and change { find_partitions(table_names.last, conn: connection).size }.from(0) + .and change { find_partitions(table_names.first, conn: ci_connection).size }.from(0) + .and change { find_partitions(table_names.last, conn: ci_connection).size }.from(0) + end + end + context 'when no partitioned models are given' do it 'manages partitions for each registered model' do described_class.register_models([models.first]) @@ -111,16 +144,44 @@ RSpec.describe Gitlab::Database::Partitioning do end context 'when only a specific database is requested' do + let(:ci_model) do + Class.new(Ci::ApplicationRecord) do + include PartitionedTable + + self.table_name = 'partitioning_test3' + partitioned_by :created_at, strategy: :monthly + end + end + before do - allow(models.first).to receive_message_chain('connection_db_config.name').and_return('main') - allow(models.last).to receive_message_chain('connection_db_config.name').and_return('ci') + (table_names + ['partitioning_test3']).each do |table_name| + ci_connection.execute("DROP TABLE IF EXISTS #{table_name}") + + ci_connection.execute(<<~SQL) + CREATE TABLE #{table_name} ( + id serial not null, + created_at timestamptz not null, + PRIMARY KEY (id, created_at)) + PARTITION BY RANGE (created_at); + SQL + end + end + + after do + (table_names + ['partitioning_test3']).each do |table_name| + ci_connection.execute("DROP TABLE IF EXISTS #{table_name}") + end end it 'manages partitions for models for the given database', :aggregate_failures do - expect { described_class.sync_partitions(models, only_on: 'ci') } - .to change { find_partitions(table_names.last).size }.from(0) + skip_if_multiple_databases_not_setup + + expect { described_class.sync_partitions([models.first, ci_model], only_on: 'ci') } + .to change { find_partitions(ci_model.table_name, conn: ci_connection).size }.from(0) - expect(find_partitions(table_names.first).size).to eq(0) + expect(find_partitions(models.first.table_name).size).to eq(0) + expect(find_partitions(models.first.table_name, conn: ci_connection).size).to eq(0) + expect(find_partitions(ci_model.table_name).size).to eq(0) end end end diff --git a/spec/lib/gitlab/database/postgres_constraint_spec.rb b/spec/lib/gitlab/database/postgres_constraint_spec.rb new file mode 100644 index 00000000000..75084a69115 --- /dev/null +++ b/spec/lib/gitlab/database/postgres_constraint_spec.rb @@ -0,0 +1,123 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::PostgresConstraint, type: :model do + # PostgresConstraint does not `behaves_like 'a postgres model'` because it does not correspond 1-1 with a single entry + # in pg_class + let(:schema) { ActiveRecord::Base.connection.current_schema } + let(:table_name) { '_test_table' } + let(:table_identifier) { "#{schema}.#{table_name}" } + let(:referenced_name) { '_test_referenced' } + let(:check_constraint_a_positive) { 'check_constraint_a_positive' } + let(:check_constraint_a_gt_b) { 'check_constraint_a_gt_b' } + let(:invalid_constraint_a) { 'check_constraint_b_positive_invalid' } + let(:unique_constraint_a) { "#{table_name}_a_key" } + + before do + ActiveRecord::Base.connection.execute(<<~SQL) + create table #{referenced_name} ( + id bigserial primary key not null + ); + + create table #{table_name} ( + id bigserial not null, + referenced_id bigint not null references #{referenced_name}(id), + a integer unique, + b integer, + primary key (id, referenced_id), + constraint #{check_constraint_a_positive} check (a > 0), + constraint #{check_constraint_a_gt_b} check (a > b) + ); + + alter table #{table_name} add constraint #{invalid_constraint_a} CHECK (a > 1) NOT VALID; + SQL + end + + describe '#by_table_identifier' do + subject(:constraints_for_table) { described_class.by_table_identifier(table_identifier) } + + it 'includes all constraints on the table' do + all_constraints_for_table = described_class.all.to_a.select { |c| c.table_identifier == table_identifier } + expect(all_constraints_for_table.map(&:oid)).to match_array(constraints_for_table.pluck(:oid)) + end + + it 'throws an error if the format is incorrect' do + expect { described_class.by_table_identifier('not-an-identifier') }.to raise_error(ArgumentError) + end + end + + describe '#check_constraints' do + subject(:check_constraints) { described_class.check_constraints.by_table_identifier(table_identifier) } + + it 'finds check constraints for the table' do + expect(check_constraints.map(&:name)).to contain_exactly(check_constraint_a_positive, + check_constraint_a_gt_b, + invalid_constraint_a) + end + + it 'includes columns for the check constraints', :aggregate_failures do + expect(check_constraints.find_by(name: check_constraint_a_positive).column_names).to contain_exactly('a') + expect(check_constraints.find_by(name: check_constraint_a_gt_b).column_names).to contain_exactly('a', 'b') + end + end + + describe "#valid" do + subject(:valid_constraint_names) { described_class.valid.by_table_identifier(table_identifier).pluck(:name) } + + let(:all_constraint_names) { described_class.by_table_identifier(table_identifier).pluck(:name) } + + it 'excludes invalid constraints' do + expect(valid_constraint_names).not_to include(invalid_constraint_a) + expect(valid_constraint_names).to match_array(all_constraint_names - [invalid_constraint_a]) + end + end + + describe '#primary_key_constraints' do + subject(:pk_constraints) { described_class.primary_key_constraints.by_table_identifier(table_identifier) } + + it 'finds the primary key constraint for the table' do + expect(pk_constraints.count).to eq(1) + expect(pk_constraints.first.constraint_type).to eq('p') + end + + it 'finds the columns in the primary key constraint' do + constraint = pk_constraints.first + expect(constraint.column_names).to contain_exactly('id', 'referenced_id') + end + end + + describe '#unique_constraints' do + subject(:unique_constraints) { described_class.unique_constraints.by_table_identifier(table_identifier) } + + it 'finds the unique constraints for the table' do + expect(unique_constraints.pluck(:name)).to contain_exactly(unique_constraint_a) + end + end + + describe '#primary_or_unique_constraints' do + subject(:pk_or_unique_constraints) do + described_class.primary_or_unique_constraints.by_table_identifier(table_identifier) + end + + it 'finds primary and unique constraints' do + expect(pk_or_unique_constraints.pluck(:name)).to contain_exactly("#{table_name}_pkey", unique_constraint_a) + end + end + + describe '#including_column' do + it 'only matches constraints on the given column' do + constraints_on_a = described_class.by_table_identifier(table_identifier).including_column('a').map(&:name) + expect(constraints_on_a).to contain_exactly(check_constraint_a_positive, check_constraint_a_gt_b, + unique_constraint_a, invalid_constraint_a) + end + end + + describe '#not_including_column' do + it 'only matches constraints not including the given column' do + constraints_not_on_a = described_class.by_table_identifier(table_identifier).not_including_column('a').map(&:name) + + expect(constraints_not_on_a).to contain_exactly("#{table_name}_pkey", "#{table_name}_referenced_id_fkey") + end + end +end diff --git a/spec/lib/gitlab/database/query_analyzers/ci/partitioning_analyzer_spec.rb b/spec/lib/gitlab/database/query_analyzers/ci/partitioning_analyzer_spec.rb new file mode 100644 index 00000000000..ef7c7965c09 --- /dev/null +++ b/spec/lib/gitlab/database/query_analyzers/ci/partitioning_analyzer_spec.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::QueryAnalyzers::Ci::PartitioningAnalyzer, query_analyzers: false do + let(:analyzer) { described_class } + + before do + allow(Gitlab::Database::QueryAnalyzer.instance).to receive(:all_analyzers).and_return([analyzer]) + end + + context 'when ci_partitioning_analyze_queries is disabled' do + before do + stub_feature_flags(ci_partitioning_analyze_queries: false) + end + + it 'does not analyze the query' do + expect(analyzer).not_to receive(:analyze) + + process_sql(Ci::BuildMetadata, "SELECT 1 FROM ci_builds_metadata") + end + end + + context 'when ci_partitioning_analyze_queries is enabled' do + context 'when analyzing targeted tables' do + described_class::ENABLED_TABLES.each do |enabled_table| + context 'when querying a non routing table' do + it 'tracks exception' do + expect(::Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception) + process_sql(Ci::ApplicationRecord, "SELECT 1 FROM #{enabled_table}") + end + + it 'raises RoutingTableNotUsedError' do + expect { process_sql(Ci::ApplicationRecord, "SELECT 1 FROM #{enabled_table}") } + .to raise_error(described_class::RoutingTableNotUsedError) + end + end + end + + context 'when updating a record' do + it 'raises RoutingTableNotUsedError' do + expect { process_sql(Ci::BuildMetadata, "UPDATE ci_builds_metadata SET id = 1") } + .to raise_error(described_class::RoutingTableNotUsedError) + end + end + + context 'when inserting a record' do + it 'raises RoutingTableNotUsedError' do + expect { process_sql(Ci::BuildMetadata, "INSERT INTO ci_builds_metadata (id) VALUES(1)") } + .to raise_error(described_class::RoutingTableNotUsedError) + end + end + end + + context 'when analyzing non targeted table' do + it 'does not raise error' do + expect { process_sql(Ci::BuildMetadata, "SELECT 1 FROM projects") } + .not_to raise_error + end + end + + context 'when querying a routing table' do + it 'does not raise error' do + expect { process_sql(Ci::BuildMetadata, "SELECT 1 FROM p_ci_builds_metadata") } + .not_to raise_error + end + end + end + + private + + def process_sql(model, sql) + Gitlab::Database::QueryAnalyzer.instance.within do + # Skip load balancer and retrieve connection assigned to model + Gitlab::Database::QueryAnalyzer.instance.send(:process_sql, sql, model.retrieve_connection) + end + end +end diff --git a/spec/lib/gitlab/database/reindexing_spec.rb b/spec/lib/gitlab/database/reindexing_spec.rb index 495e953f993..4c98185e780 100644 --- a/spec/lib/gitlab/database/reindexing_spec.rb +++ b/spec/lib/gitlab/database/reindexing_spec.rb @@ -46,25 +46,11 @@ RSpec.describe Gitlab::Database::Reindexing do end end - context 'when async index destruction is enabled' do - it 'executes async index destruction prior to any reindexing actions' do - stub_feature_flags(database_async_index_destruction: true) + it 'executes async index destruction prior to any reindexing actions' do + expect(Gitlab::Database::AsyncIndexes).to receive(:drop_pending_indexes!).ordered.exactly(databases_count).times + expect(described_class).to receive(:automatic_reindexing).ordered.exactly(databases_count).times - expect(Gitlab::Database::AsyncIndexes).to receive(:drop_pending_indexes!).ordered.exactly(databases_count).times - expect(described_class).to receive(:automatic_reindexing).ordered.exactly(databases_count).times - - described_class.invoke - end - end - - context 'when async index destruction is disabled' do - it 'does not execute async index destruction' do - stub_feature_flags(database_async_index_destruction: false) - - expect(Gitlab::Database::AsyncIndexes).not_to receive(:drop_pending_indexes!) - - described_class.invoke - end + described_class.invoke end context 'calls automatic reindexing' do diff --git a/spec/lib/gitlab/database/tables_sorted_by_foreign_keys_spec.rb b/spec/lib/gitlab/database/tables_sorted_by_foreign_keys_spec.rb new file mode 100644 index 00000000000..97abd6d23bd --- /dev/null +++ b/spec/lib/gitlab/database/tables_sorted_by_foreign_keys_spec.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::TablesSortedByForeignKeys do + let(:connection) { ApplicationRecord.connection } + let(:tables) { %w[_test_gitlab_main_items _test_gitlab_main_references] } + + subject do + described_class.new(connection, tables).execute + end + + before do + statement = <<~SQL + CREATE TABLE _test_gitlab_main_items (id serial NOT NULL PRIMARY KEY); + + CREATE TABLE _test_gitlab_main_references ( + id serial NOT NULL PRIMARY KEY, + item_id BIGINT NOT NULL, + CONSTRAINT fk_constrained_1 FOREIGN KEY(item_id) REFERENCES _test_gitlab_main_items(id) + ); + SQL + connection.execute(statement) + end + + describe '#execute' do + it 'returns the tables sorted by the foreign keys dependency' do + expect(subject).to eq([['_test_gitlab_main_references'], ['_test_gitlab_main_items']]) + end + + it 'returns both tables together if they are strongly connected' do + statement = <<~SQL + ALTER TABLE _test_gitlab_main_items ADD COLUMN reference_id BIGINT + REFERENCES _test_gitlab_main_references(id) + SQL + connection.execute(statement) + + expect(subject).to eq([tables]) + end + end +end diff --git a/spec/lib/gitlab/database/tables_truncate_spec.rb b/spec/lib/gitlab/database/tables_truncate_spec.rb new file mode 100644 index 00000000000..01af9efd782 --- /dev/null +++ b/spec/lib/gitlab/database/tables_truncate_spec.rb @@ -0,0 +1,257 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::TablesTruncate, :reestablished_active_record_base, + :suppress_gitlab_schemas_validate_connection do + include MigrationsHelpers + + let(:logger) { instance_double(Logger) } + let(:dry_run) { false } + let(:until_table) { nil } + let(:min_batch_size) { 1 } + let(:main_connection) { ApplicationRecord.connection } + let(:ci_connection) { Ci::ApplicationRecord.connection } + let(:test_gitlab_main_table) { '_test_gitlab_main_table' } + let(:test_gitlab_ci_table) { '_test_gitlab_ci_table' } + + # Main Database + let(:main_db_main_item_model) { table("_test_gitlab_main_items", database: "main") } + let(:main_db_main_reference_model) { table("_test_gitlab_main_references", database: "main") } + let(:main_db_ci_item_model) { table("_test_gitlab_ci_items", database: "main") } + let(:main_db_ci_reference_model) { table("_test_gitlab_ci_references", database: "main") } + let(:main_db_shared_item_model) { table("_test_gitlab_shared_items", database: "main") } + # CI Database + let(:ci_db_main_item_model) { table("_test_gitlab_main_items", database: "ci") } + let(:ci_db_main_reference_model) { table("_test_gitlab_main_references", database: "ci") } + let(:ci_db_ci_item_model) { table("_test_gitlab_ci_items", database: "ci") } + let(:ci_db_ci_reference_model) { table("_test_gitlab_ci_references", database: "ci") } + let(:ci_db_shared_item_model) { table("_test_gitlab_shared_items", database: "ci") } + + subject(:truncate_legacy_tables) do + described_class.new( + database_name: database_name, + min_batch_size: min_batch_size, + logger: logger, + dry_run: dry_run, + until_table: until_table + ).execute + end + + shared_examples 'truncating legacy tables on a database' do + before do + skip_if_multiple_databases_not_setup + + # Creating some test tables on the main database + main_tables_sql = <<~SQL + CREATE TABLE _test_gitlab_main_items (id serial NOT NULL PRIMARY KEY); + + CREATE TABLE _test_gitlab_main_references ( + id serial NOT NULL PRIMARY KEY, + item_id BIGINT NOT NULL, + CONSTRAINT fk_constrained_1 FOREIGN KEY(item_id) REFERENCES _test_gitlab_main_items(id) + ); + SQL + + main_connection.execute(main_tables_sql) + ci_connection.execute(main_tables_sql) + + ci_tables_sql = <<~SQL + CREATE TABLE _test_gitlab_ci_items (id serial NOT NULL PRIMARY KEY); + + CREATE TABLE _test_gitlab_ci_references ( + id serial NOT NULL PRIMARY KEY, + item_id BIGINT NOT NULL, + CONSTRAINT fk_constrained_1 FOREIGN KEY(item_id) REFERENCES _test_gitlab_ci_items(id) + ); + SQL + + main_connection.execute(ci_tables_sql) + ci_connection.execute(ci_tables_sql) + + internal_tables_sql = <<~SQL + CREATE TABLE _test_gitlab_shared_items (id serial NOT NULL PRIMARY KEY); + SQL + + main_connection.execute(internal_tables_sql) + ci_connection.execute(internal_tables_sql) + + # Filling the tables + 5.times do |i| + # Main Database + main_db_main_item_model.create!(id: i) + main_db_main_reference_model.create!(item_id: i) + main_db_ci_item_model.create!(id: i) + main_db_ci_reference_model.create!(item_id: i) + main_db_shared_item_model.create!(id: i) + # CI Database + ci_db_main_item_model.create!(id: i) + ci_db_main_reference_model.create!(item_id: i) + ci_db_ci_item_model.create!(id: i) + ci_db_ci_reference_model.create!(item_id: i) + ci_db_shared_item_model.create!(id: i) + end + + allow(Gitlab::Database::GitlabSchema).to receive(:tables_to_schema).and_return( + { + "_test_gitlab_main_items" => :gitlab_main, + "_test_gitlab_main_references" => :gitlab_main, + "_test_gitlab_ci_items" => :gitlab_ci, + "_test_gitlab_ci_references" => :gitlab_ci, + "_test_gitlab_shared_items" => :gitlab_shared, + "_test_gitlab_geo_items" => :gitlab_geo + } + ) + + allow(logger).to receive(:info).with(any_args) + end + + context 'when the truncated tables are not locked for writes' do + it 'raises an error that the tables are not locked for writes' do + error_message = /is not locked for writes. Run the rake task gitlab:db:lock_writes first/ + expect { truncate_legacy_tables }.to raise_error(error_message) + end + end + + context 'when the truncated tables are locked for writes' do + before do + legacy_tables_models.map(&:table_name).each do |table| + Gitlab::Database::LockWritesManager.new( + table_name: table, + connection: connection, + database_name: database_name + ).lock_writes + end + end + + it 'truncates the legacy tables' do + old_counts = legacy_tables_models.map(&:count) + expect do + truncate_legacy_tables + end.to change { legacy_tables_models.map(&:count) }.from(old_counts).to([0] * legacy_tables_models.length) + end + + it 'does not affect the other tables' do + expect do + truncate_legacy_tables + end.not_to change { other_tables_models.map(&:count) } + end + + it 'logs the sql statements to the logger' do + expect(logger).to receive(:info).with("SET LOCAL lock_timeout = 0") + expect(logger).to receive(:info).with("SET LOCAL statement_timeout = 0") + expect(logger).to receive(:info) + .with(/TRUNCATE TABLE #{legacy_tables_models.map(&:table_name).sort.join(', ')} RESTRICT/) + truncate_legacy_tables + end + + context 'when running in dry_run mode' do + let(:dry_run) { true } + + it 'does not truncate the legacy tables if running in dry run mode' do + legacy_tables_models = [main_db_ci_reference_model, main_db_ci_reference_model] + expect do + truncate_legacy_tables + end.not_to change { legacy_tables_models.map(&:count) } + end + end + + context 'when passing until_table parameter' do + context 'with a table that exists' do + let(:until_table) { referencing_table_model.table_name } + + it 'only truncates until the table specified' do + expect do + truncate_legacy_tables + end.to change(referencing_table_model, :count).by(-5) + .and change(referenced_table_model, :count).by(0) + end + end + + context 'with a table that does not exist' do + let(:until_table) { 'foobar' } + + it 'raises an error if the specified table does not exist' do + expect do + truncate_legacy_tables + end.to raise_error(/The table 'foobar' is not within the truncated tables/) + end + end + end + + context 'with geo configured' do + let(:geo_connection) { Gitlab::Database.database_base_models[:geo].connection } + + before do + skip unless geo_configured? + geo_connection.execute('CREATE TABLE _test_gitlab_geo_items (id serial NOT NULL PRIMARY KEY)') + geo_connection.execute('INSERT INTO _test_gitlab_geo_items VALUES(generate_series(1, 50))') + end + + it 'does not truncate gitlab_geo tables' do + expect do + truncate_legacy_tables + end.not_to change { geo_connection.select_value("select count(*) from _test_gitlab_geo_items") } + end + end + end + end + + context 'when truncating gitlab_ci tables on the main database' do + let(:connection) { ApplicationRecord.connection } + let(:database_name) { "main" } + let(:legacy_tables_models) { [main_db_ci_item_model, main_db_ci_reference_model] } + let(:referencing_table_model) { main_db_ci_reference_model } + let(:referenced_table_model) { main_db_ci_item_model } + let(:other_tables_models) do + [ + main_db_main_item_model, main_db_main_reference_model, + ci_db_ci_item_model, ci_db_ci_reference_model, + ci_db_main_item_model, ci_db_main_reference_model, + main_db_shared_item_model, ci_db_shared_item_model + ] + end + + it_behaves_like 'truncating legacy tables on a database' + end + + context 'when truncating gitlab_main tables on the ci database' do + let(:connection) { Ci::ApplicationRecord.connection } + let(:database_name) { "ci" } + let(:legacy_tables_models) { [ci_db_main_item_model, ci_db_main_reference_model] } + let(:referencing_table_model) { ci_db_main_reference_model } + let(:referenced_table_model) { ci_db_main_item_model } + let(:other_tables_models) do + [ + main_db_main_item_model, main_db_main_reference_model, + ci_db_ci_item_model, ci_db_ci_reference_model, + main_db_ci_item_model, main_db_ci_reference_model, + main_db_shared_item_model, ci_db_shared_item_model + ] + end + + it_behaves_like 'truncating legacy tables on a database' + end + + context 'when running in a single database mode' do + before do + skip_if_multiple_databases_are_setup + end + + it 'raises an error when truncating the main database that it is a single database setup' do + expect do + described_class.new(database_name: 'main', min_batch_size: min_batch_size).execute + end.to raise_error(/Cannot truncate legacy tables in single-db setup/) + end + + it 'raises an error when truncating the ci database that it is a single database setup' do + expect do + described_class.new(database_name: 'ci', min_batch_size: min_batch_size).execute + end.to raise_error(/Cannot truncate legacy tables in single-db setup/) + end + end + + def geo_configured? + !!ActiveRecord::Base.configurations.configs_for(env_name: Rails.env, name: 'geo') + end +end |