diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2020-12-17 14:59:07 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2020-12-17 14:59:07 +0300 |
commit | 8b573c94895dc0ac0e1d9d59cf3e8745e8b539ca (patch) | |
tree | 544930fb309b30317ae9797a9683768705d664c4 /spec/lib/gitlab/database | |
parent | 4b1de649d0168371549608993deac953eb692019 (diff) |
Add latest changes from gitlab-org/gitlab@13-7-stable-eev13.7.0-rc42
Diffstat (limited to 'spec/lib/gitlab/database')
10 files changed, 389 insertions, 8 deletions
diff --git a/spec/lib/gitlab/database/batch_count_spec.rb b/spec/lib/gitlab/database/batch_count_spec.rb index a1cc759e011..29688b18e94 100644 --- a/spec/lib/gitlab/database/batch_count_spec.rb +++ b/spec/lib/gitlab/database/batch_count_spec.rb @@ -130,6 +130,16 @@ RSpec.describe Gitlab::Database::BatchCount do expect(described_class.batch_count(model, start: model.minimum(:id), finish: model.maximum(:id))).to eq(5) end + it 'stops counting when finish value is reached' do + stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 0) + + expect(described_class.batch_count(model, + start: model.minimum(:id), + finish: model.maximum(:id) - 1, # Do not count the last record + batch_size: model.count - 2 # Ensure there are multiple batches + )).to eq(model.count - 1) + end + it "defaults the batch size to #{Gitlab::Database::BatchCounter::DEFAULT_BATCH_SIZE}" do min_id = model.minimum(:id) relation = instance_double(ActiveRecord::Relation) @@ -242,6 +252,19 @@ RSpec.describe Gitlab::Database::BatchCount do expect(described_class.batch_distinct_count(model, column, start: model.minimum(column), finish: model.maximum(column))).to eq(2) end + it 'stops counting when finish value is reached' do + # Create a new unique author that should not be counted + create(:issue) + + stub_const('Gitlab::Database::BatchCounter::MIN_REQUIRED_BATCH_SIZE', 0) + + expect(described_class.batch_distinct_count(model, column, + start: User.minimum(:id), + finish: User.maximum(:id) - 1, # Do not count the newly created issue + batch_size: model.count - 2 # Ensure there are multiple batches + )).to eq(2) + end + it 'counts with User min and max as start and finish' do expect(described_class.batch_distinct_count(model, column, start: User.minimum(:id), finish: User.maximum(:id))).to eq(2) end diff --git a/spec/lib/gitlab/database/migrations/background_migration_helpers_spec.rb b/spec/lib/gitlab/database/migrations/background_migration_helpers_spec.rb index 48132d68031..3e8563376ce 100644 --- a/spec/lib/gitlab/database/migrations/background_migration_helpers_spec.rb +++ b/spec/lib/gitlab/database/migrations/background_migration_helpers_spec.rb @@ -189,7 +189,51 @@ RSpec.describe Gitlab::Database::Migrations::BackgroundMigrationHelpers do end end - context "when the model doesn't have an ID column" do + context 'when the model specifies a primary_column_name' do + let!(:id1) { create(:container_expiration_policy).id } + let!(:id2) { create(:container_expiration_policy).id } + let!(:id3) { create(:container_expiration_policy).id } + + around do |example| + freeze_time { example.run } + end + + before do + ContainerExpirationPolicy.class_eval do + include EachBatch + end + end + + it 'returns the final expected delay', :aggregate_failures do + Sidekiq::Testing.fake! do + final_delay = model.queue_background_migration_jobs_by_range_at_intervals(ContainerExpirationPolicy, 'FooJob', 10.minutes, batch_size: 2, primary_column_name: :project_id) + + expect(final_delay.to_f).to eq(20.minutes.to_f) + expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]]) + expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.minutes.from_now.to_f) + expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]]) + expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(20.minutes.from_now.to_f) + end + end + + context "when the primary_column_name is not an integer" do + it 'raises error' do + expect do + model.queue_background_migration_jobs_by_range_at_intervals(ContainerExpirationPolicy, 'FooJob', 10.minutes, primary_column_name: :enabled) + end.to raise_error(StandardError, /is not an integer column/) + end + end + + context "when the primary_column_name does not exist" do + it 'raises error' do + expect do + model.queue_background_migration_jobs_by_range_at_intervals(ContainerExpirationPolicy, 'FooJob', 10.minutes, primary_column_name: :foo) + end.to raise_error(StandardError, /does not have an ID column of foo/) + end + end + end + + context "when the model doesn't have an ID or primary_column_name column" do it 'raises error (for now)' do expect do model.queue_background_migration_jobs_by_range_at_intervals(ProjectAuthorization, 'FooJob', 10.seconds) diff --git a/spec/lib/gitlab/database/postgres_hll/batch_distinct_counter_spec.rb b/spec/lib/gitlab/database/postgres_hll/batch_distinct_counter_spec.rb new file mode 100644 index 00000000000..934e2274358 --- /dev/null +++ b/spec/lib/gitlab/database/postgres_hll/batch_distinct_counter_spec.rb @@ -0,0 +1,132 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::PostgresHll::BatchDistinctCounter do + let_it_be(:error_rate) { described_class::ERROR_RATE } # HyperLogLog is a probabilistic algorithm, which provides estimated data, with given error margin + let_it_be(:fallback) { ::Gitlab::Database::BatchCounter::FALLBACK } + let_it_be(:small_batch_size) { calculate_batch_size(described_class::MIN_REQUIRED_BATCH_SIZE) } + let(:model) { Issue } + let(:column) { :author_id } + + let(:in_transaction) { false } + + let_it_be(:user) { create(:user, email: 'email1@domain.com') } + let_it_be(:another_user) { create(:user, email: 'email2@domain.com') } + + def calculate_batch_size(batch_size) + zero_offset_modifier = -1 + + batch_size + zero_offset_modifier + end + + before do + allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(in_transaction) + end + + context 'different distribution of relation records' do + [10, 100, 100_000].each do |spread| + context "records are spread within #{spread}" do + before do + ids = (1..spread).to_a.sample(10) + create_list(:issue, 10).each_with_index do |issue, i| + issue.id = ids[i] + end + end + + it 'counts table' do + expect(described_class.new(model).estimate_distinct_count).to be_within(error_rate).percent_of(10) + end + end + end + end + + context 'unit test for different counting parameters' do + before_all do + create_list(:issue, 3, author: user) + create_list(:issue, 2, author: another_user) + end + + describe '#estimate_distinct_count' do + it 'counts table' do + expect(described_class.new(model).estimate_distinct_count).to be_within(error_rate).percent_of(5) + end + + it 'counts with column field' do + expect(described_class.new(model, column).estimate_distinct_count).to be_within(error_rate).percent_of(2) + end + + it 'counts with :id field' do + expect(described_class.new(model, :id).estimate_distinct_count).to be_within(error_rate).percent_of(5) + end + + it 'counts with "id" field' do + expect(described_class.new(model, "id").estimate_distinct_count).to be_within(error_rate).percent_of(5) + end + + it 'counts with table.column field' do + expect(described_class.new(model, "#{model.table_name}.#{column}").estimate_distinct_count).to be_within(error_rate).percent_of(2) + end + + it 'counts with Arel column' do + expect(described_class.new(model, model.arel_table[column]).estimate_distinct_count).to be_within(error_rate).percent_of(2) + end + + it 'counts over joined relations' do + expect(described_class.new(model.joins(:author), "users.email").estimate_distinct_count).to be_within(error_rate).percent_of(2) + end + + it 'counts with :column field with batch_size of 50K' do + expect(described_class.new(model, column).estimate_distinct_count(batch_size: 50_000)).to be_within(error_rate).percent_of(2) + end + + it 'will not count table with a batch size less than allowed' do + expect(described_class.new(model, column).estimate_distinct_count(batch_size: small_batch_size)).to eq(fallback) + end + + it 'counts with different number of batches and aggregates total result' do + stub_const('Gitlab::Database::PostgresHll::BatchDistinctCounter::MIN_REQUIRED_BATCH_SIZE', 0) + + [1, 2, 4, 5, 6].each { |i| expect(described_class.new(model).estimate_distinct_count(batch_size: i)).to be_within(error_rate).percent_of(5) } + end + + it 'counts with a start and finish' do + expect(described_class.new(model, column).estimate_distinct_count(start: model.minimum(:id), finish: model.maximum(:id))).to be_within(error_rate).percent_of(2) + end + + it "defaults the batch size to #{Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE}" do + min_id = model.minimum(:id) + batch_end_id = min_id + calculate_batch_size(Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE) + + expect(model).to receive(:where).with("id" => min_id..batch_end_id).and_call_original + + described_class.new(model).estimate_distinct_count + end + + context 'when a transaction is open' do + let(:in_transaction) { true } + + it 'raises an error' do + expect { described_class.new(model, column).estimate_distinct_count }.to raise_error('BatchCount can not be run inside a transaction') + end + end + + context 'disallowed configurations' do + let(:default_batch_size) { Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE } + + it 'returns fallback if start is bigger than finish' do + expect(described_class.new(model, column).estimate_distinct_count(start: 1, finish: 0)).to eq(fallback) + end + + it 'returns fallback if data volume exceeds upper limit' do + large_finish = Gitlab::Database::PostgresHll::BatchDistinctCounter::MAX_DATA_VOLUME + 1 + expect(described_class.new(model, column).estimate_distinct_count(start: 1, finish: large_finish)).to eq(fallback) + end + + it 'returns fallback if batch size is less than min required' do + expect(described_class.new(model, column).estimate_distinct_count(batch_size: small_batch_size)).to eq(fallback) + end + end + end + end +end diff --git a/spec/lib/gitlab/database/postgres_index_bloat_estimate_spec.rb b/spec/lib/gitlab/database/postgres_index_bloat_estimate_spec.rb new file mode 100644 index 00000000000..da4422bd442 --- /dev/null +++ b/spec/lib/gitlab/database/postgres_index_bloat_estimate_spec.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::PostgresIndexBloatEstimate do + before do + ActiveRecord::Base.connection.execute(<<~SQL) + ANALYZE schema_migrations + SQL + end + + subject { described_class.find(identifier) } + + let(:identifier) { 'public.schema_migrations_pkey' } + + describe '#bloat_size' do + it 'returns the bloat size in bytes' do + # We cannot reach much more about the bloat size estimate here + expect(subject.bloat_size).to be >= 0 + end + end + + describe '#bloat_size_bytes' do + it 'is an alias of #bloat_size' do + expect(subject.bloat_size_bytes).to eq(subject.bloat_size) + end + end + + describe '#index' do + it 'belongs to a PostgresIndex' do + expect(subject.index.identifier).to eq(identifier) + end + end +end diff --git a/spec/lib/gitlab/database/postgres_index_spec.rb b/spec/lib/gitlab/database/postgres_index_spec.rb index d65b638f7bc..2fda9b85c5a 100644 --- a/spec/lib/gitlab/database/postgres_index_spec.rb +++ b/spec/lib/gitlab/database/postgres_index_spec.rb @@ -27,7 +27,7 @@ RSpec.describe Gitlab::Database::PostgresIndex do expect(described_class.regular).to all(have_attributes(unique: false)) end - it 'only non partitioned indexes ' do + it 'only non partitioned indexes' do expect(described_class.regular).to all(have_attributes(partitioned: false)) end @@ -46,9 +46,24 @@ RSpec.describe Gitlab::Database::PostgresIndex do end end - describe '.random_few' do - it 'limits to two records by default' do - expect(described_class.random_few(2).size).to eq(2) + describe '#bloat_size' do + subject { build(:postgres_index, bloat_estimate: bloat_estimate) } + + let(:bloat_estimate) { build(:postgres_index_bloat_estimate) } + let(:bloat_size) { double } + + it 'returns the bloat size from the estimate' do + expect(bloat_estimate).to receive(:bloat_size).and_return(bloat_size) + + expect(subject.bloat_size).to eq(bloat_size) + end + + context 'without a bloat estimate available' do + let(:bloat_estimate) { nil } + + it 'returns 0' do + expect(subject.bloat_size).to eq(0) + end end end diff --git a/spec/lib/gitlab/database/postgresql_adapter/empty_query_ping_spec.rb b/spec/lib/gitlab/database/postgresql_adapter/empty_query_ping_spec.rb new file mode 100644 index 00000000000..6e1e53e0e41 --- /dev/null +++ b/spec/lib/gitlab/database/postgresql_adapter/empty_query_ping_spec.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::PostgresqlAdapter::EmptyQueryPing do + describe '#active?' do + let(:adapter_class) do + Class.new do + include Gitlab::Database::PostgresqlAdapter::EmptyQueryPing + + def initialize(connection, lock) + @connection = connection + @lock = lock + end + end + end + + subject { adapter_class.new(connection, lock).active? } + + let(:connection) { double(query: nil) } + let(:lock) { double } + + before do + allow(lock).to receive(:synchronize).and_yield + end + + it 'uses an empty query to check liveness' do + expect(connection).to receive(:query).with(';') + + subject + end + + it 'returns true if no error was signaled' do + expect(subject).to be_truthy + end + + it 'returns false when an error occurs' do + expect(lock).to receive(:synchronize).and_raise(PG::Error) + + expect(subject).to be_falsey + end + end +end diff --git a/spec/lib/gitlab/database/reindexing/concurrent_reindex_spec.rb b/spec/lib/gitlab/database/reindexing/concurrent_reindex_spec.rb index 2d6765aac2e..51fc7c6620b 100644 --- a/spec/lib/gitlab/database/reindexing/concurrent_reindex_spec.rb +++ b/spec/lib/gitlab/database/reindexing/concurrent_reindex_spec.rb @@ -8,7 +8,7 @@ RSpec.describe Gitlab::Database::Reindexing::ConcurrentReindex, '#perform' do let(:table_name) { '_test_reindex_table' } let(:column_name) { '_test_column' } let(:index_name) { '_test_reindex_index' } - let(:index) { instance_double(Gitlab::Database::PostgresIndex, indexrelid: 42, name: index_name, schema: 'public', partitioned?: false, unique?: false, exclusion?: false, definition: 'CREATE INDEX _test_reindex_index ON public._test_reindex_table USING btree (_test_column)') } + let(:index) { instance_double(Gitlab::Database::PostgresIndex, indexrelid: 42, name: index_name, schema: 'public', tablename: table_name, partitioned?: false, unique?: false, exclusion?: false, expression?: false, definition: 'CREATE INDEX _test_reindex_index ON public._test_reindex_table USING btree (_test_column)') } let(:logger) { double('logger', debug: nil, info: nil, error: nil ) } let(:connection) { ActiveRecord::Base.connection } @@ -130,6 +130,36 @@ RSpec.describe Gitlab::Database::Reindexing::ConcurrentReindex, '#perform' do check_index_exists end + context 'for expression indexes' do + before do + allow(index).to receive(:expression?).and_return(true) + end + + it 'rebuilds table statistics before dropping the original index' do + expect(connection).to receive(:execute).with('SET statement_timeout TO \'21600s\'').twice + + expect_to_execute_concurrently_in_order(create_index) + + expect_to_execute_concurrently_in_order(<<~SQL) + ANALYZE "#{index.schema}"."#{index.tablename}" + SQL + + expect_next_instance_of(::Gitlab::Database::WithLockRetries) do |instance| + expect(instance).to receive(:run).with(raise_on_exhaustion: true).and_yield + end + + expect_index_rename(index.name, replaced_name) + expect_index_rename(replacement_name, index.name) + expect_index_rename(replaced_name, replacement_name) + + expect_to_execute_concurrently_in_order(drop_index) + + subject.perform + + check_index_exists + end + end + context 'when a dangling index is left from a previous run' do before do connection.execute("CREATE INDEX #{replacement_name} ON #{table_name} (#{column_name})") diff --git a/spec/lib/gitlab/database/reindexing/index_selection_spec.rb b/spec/lib/gitlab/database/reindexing/index_selection_spec.rb new file mode 100644 index 00000000000..a5e2f368f40 --- /dev/null +++ b/spec/lib/gitlab/database/reindexing/index_selection_spec.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Reindexing::IndexSelection do + include DatabaseHelpers + + subject { described_class.new(Gitlab::Database::PostgresIndex.all).to_a } + + before do + swapout_view_for_table(:postgres_index_bloat_estimates) + swapout_view_for_table(:postgres_indexes) + end + + def execute(sql) + ActiveRecord::Base.connection.execute(sql) + end + + it 'orders by highest bloat first' do + create_list(:postgres_index, 10).each_with_index do |index, i| + create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 1.megabyte * i) + end + + expected = Gitlab::Database::PostgresIndexBloatEstimate.order(bloat_size_bytes: :desc).map(&:index) + + expect(subject).to eq(expected) + end + + context 'with time frozen' do + around do |example| + freeze_time { example.run } + end + + it 'does not return indexes with reindex action in the last 7 days' do + not_recently_reindexed = create_list(:postgres_index, 2).each_with_index do |index, i| + create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 1.megabyte * i) + create(:reindex_action, index: index, action_end: Time.zone.now - 7.days - 1.minute) + end + + create_list(:postgres_index, 2).each_with_index do |index, i| + create(:postgres_index_bloat_estimate, index: index, bloat_size_bytes: 1.megabyte * i) + create(:reindex_action, index: index, action_end: Time.zone.now) + end + + expected = Gitlab::Database::PostgresIndexBloatEstimate.where(identifier: not_recently_reindexed.map(&:identifier)).map(&:index).map(&:identifier).sort + + expect(subject.map(&:identifier).sort).to eq(expected) + end + end +end diff --git a/spec/lib/gitlab/database/reindexing/reindex_action_spec.rb b/spec/lib/gitlab/database/reindexing/reindex_action_spec.rb index efb5b8463a1..225f23d2135 100644 --- a/spec/lib/gitlab/database/reindexing/reindex_action_spec.rb +++ b/spec/lib/gitlab/database/reindexing/reindex_action_spec.rb @@ -3,7 +3,7 @@ require 'spec_helper' RSpec.describe Gitlab::Database::Reindexing::ReindexAction, '.keep_track_of' do - let(:index) { double('index', identifier: 'public.something', ondisk_size_bytes: 10240, reload: nil) } + let(:index) { double('index', identifier: 'public.something', ondisk_size_bytes: 10240, reload: nil, bloat_size: 42) } let(:size_after) { 512 } it 'yields to the caller' do @@ -47,6 +47,12 @@ RSpec.describe Gitlab::Database::Reindexing::ReindexAction, '.keep_track_of' do expect(find_record.ondisk_size_bytes_end).to eq(size_after) end + it 'creates the record with the indexes bloat estimate' do + described_class.keep_track_of(index) do + expect(find_record.bloat_estimate_bytes_start).to eq(index.bloat_size) + end + end + context 'in case of errors' do it 'sets the state to failed' do expect do diff --git a/spec/lib/gitlab/database/reindexing_spec.rb b/spec/lib/gitlab/database/reindexing_spec.rb index 359e0597f4e..eb78a5fe8ea 100644 --- a/spec/lib/gitlab/database/reindexing_spec.rb +++ b/spec/lib/gitlab/database/reindexing_spec.rb @@ -6,12 +6,16 @@ RSpec.describe Gitlab::Database::Reindexing do include ExclusiveLeaseHelpers describe '.perform' do - subject { described_class.perform(indexes) } + subject { described_class.perform(candidate_indexes) } let(:coordinator) { instance_double(Gitlab::Database::Reindexing::Coordinator) } + let(:index_selection) { instance_double(Gitlab::Database::Reindexing::IndexSelection) } + let(:candidate_indexes) { double } let(:indexes) { double } it 'delegates to Coordinator' do + expect(Gitlab::Database::Reindexing::IndexSelection).to receive(:new).with(candidate_indexes).and_return(index_selection) + expect(index_selection).to receive(:take).with(2).and_return(indexes) expect(Gitlab::Database::Reindexing::Coordinator).to receive(:new).with(indexes).and_return(coordinator) expect(coordinator).to receive(:perform) |