diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2023-12-19 14:01:45 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2023-12-19 14:01:45 +0300 |
commit | 9297025d0b7ddf095eb618dfaaab2ff8f2018d8b (patch) | |
tree | 865198c01d1824a9b098127baa3ab980c9cd2c06 /lib/backup | |
parent | 6372471f43ee03c05a7c1f8b0c6ac6b8a7431dbe (diff) |
Add latest changes from gitlab-org/gitlab@16-7-stable-eev16.7.0-rc42
Diffstat (limited to 'lib/backup')
-rw-r--r-- | lib/backup/database.rb | 94 | ||||
-rw-r--r-- | lib/backup/database_configuration.rb | 107 | ||||
-rw-r--r-- | lib/backup/database_connection.rb | 59 | ||||
-rw-r--r-- | lib/backup/database_model.rb | 19 | ||||
-rw-r--r-- | lib/backup/dump/postgres.rb | 13 | ||||
-rw-r--r-- | lib/backup/files.rb | 14 | ||||
-rw-r--r-- | lib/backup/helper.rb | 29 | ||||
-rw-r--r-- | lib/backup/repositories.rb | 19 |
8 files changed, 263 insertions, 91 deletions
diff --git a/lib/backup/database.rb b/lib/backup/database.rb index 58a8c19c1ce..a0eaccb1ca4 100644 --- a/lib/backup/database.rb +++ b/lib/backup/database.rb @@ -24,44 +24,37 @@ module Backup end override :dump - def dump(destination_dir, backup_id) + def dump(destination_dir, _) FileUtils.mkdir_p(destination_dir) - each_database(destination_dir) do |database_name, current_db| - model = current_db[:model] - snapshot_id = current_db[:snapshot_id] + each_database(destination_dir) do |backup_connection| + pg_env = backup_connection.database_configuration.pg_env_variables + active_record_config = backup_connection.database_configuration.activerecord_variables + pg_database_name = active_record_config[:database] - pg_env = model.config[:pg_env] - connection = model.connection - active_record_config = model.config[:activerecord] - pg_database = active_record_config[:database] + dump_file_name = file_name(destination_dir, backup_connection.connection_name) + FileUtils.rm_f(dump_file_name) - db_file_name = file_name(destination_dir, database_name) - FileUtils.rm_f(db_file_name) - - progress.print "Dumping PostgreSQL database #{pg_database} ... " + progress.print "Dumping PostgreSQL database #{pg_database_name} ... " - pgsql_args = ["--clean"] # Pass '--clean' to include 'DROP TABLE' statements in the DB dump. - pgsql_args << '--if-exists' - pgsql_args << "--snapshot=#{snapshot_id}" if snapshot_id + schemas = [] if Gitlab.config.backup.pg_schema - pgsql_args << '-n' - pgsql_args << Gitlab.config.backup.pg_schema - - Gitlab::Database::EXTRA_SCHEMAS.each do |schema| - pgsql_args << '-n' - pgsql_args << schema.to_s - end + schemas << Gitlab.config.backup.pg_schema + schemas.push(*Gitlab::Database::EXTRA_SCHEMAS.map(&:to_s)) end - success = with_transient_pg_env(pg_env) do - Backup::Dump::Postgres.new.dump(pg_database, db_file_name, pgsql_args) - end + pg_dump = ::Gitlab::Backup::Cli::Utils::PgDump.new( + database_name: pg_database_name, + snapshot_id: backup_connection.snapshot_id, + schemas: schemas, + env: pg_env) + + success = Backup::Dump::Postgres.new.dump(dump_file_name, pg_dump) - connection.rollback_transaction if snapshot_id + backup_connection.release_snapshot! if backup_connection.snapshot_id - raise DatabaseBackupError.new(active_record_config, db_file_name) unless success + raise DatabaseBackupError.new(active_record_config, dump_file_name) unless success report_success(success) progress.flush @@ -76,10 +69,10 @@ module Backup override :restore def restore(destination_dir, backup_id) - base_models_for_backup.each do |database_name, _base_model| - backup_model = Backup::DatabaseModel.new(database_name) + base_models_for_backup.each do |database_name, _| + backup_connection = Backup::DatabaseConnection.new(database_name) - config = backup_model.config[:activerecord] + config = backup_connection.database_configuration.activerecord_variables db_file_name = file_name(destination_dir, database_name) database = config[:database] @@ -100,10 +93,10 @@ module Backup # hanging out from a failed upgrade drop_tables(database_name) - pg_env = backup_model.config[:pg_env] + pg_env = backup_connection.database_configuration.pg_env_variables success = with_transient_pg_env(pg_env) do decompress_rd, decompress_wr = IO.pipe - decompress_pid = spawn(*%w[gzip -cd], out: decompress_wr, in: db_file_name) + decompress_pid = spawn(decompress_cmd, out: decompress_wr, in: db_file_name) decompress_wr.close status, @errors = @@ -235,6 +228,7 @@ module Backup puts_time 'done'.color(:green) end + # @deprecated This will be removed when restore operation is refactored to use extended_env directly def with_transient_pg_env(extended_env) ENV.merge!(extended_env) result = yield @@ -248,32 +242,36 @@ module Backup end def each_database(destination_dir, &block) - databases = {} + databases = [] + + # each connection will loop through all database connections defined in `database.yml` + # and reject the ones that are shared, so we don't get duplicates + # + # we consider a connection to be shared when it has `database_tasks: false` ::Gitlab::Database::EachDatabase.each_connection( only: base_models_for_backup.keys, include_shared: false - ) do |_connection, name| - next if databases[name] - - backup_model = Backup::DatabaseModel.new(name) - - databases[name] = { - model: backup_model - } + ) do |_, database_connection_name| + backup_connection = Backup::DatabaseConnection.new(database_connection_name) + databases << backup_connection - next unless Gitlab::Database.database_mode == Gitlab::Database::MODE_MULTIPLE_DATABASES - - connection = backup_model.connection + next unless multiple_databases? begin - Gitlab::Database::TransactionTimeoutSettings.new(connection).disable_timeouts - connection.begin_transaction(isolation: :repeatable_read) - databases[name][:snapshot_id] = connection.select_value("SELECT pg_export_snapshot()") + # Trigger a transaction snapshot export that will be used by pg_dump later on + backup_connection.export_snapshot! rescue ActiveRecord::ConnectionNotEstablished - raise Backup::DatabaseBackupError.new(backup_model.config[:activerecord], file_name(destination_dir, name)) + raise Backup::DatabaseBackupError.new( + backup_connection.database_configuration.activerecord_variables, + file_name(destination_dir, database_connection_name) + ) end end databases.each(&block) end + + def multiple_databases? + Gitlab::Database.database_mode == Gitlab::Database::MODE_MULTIPLE_DATABASES + end end end diff --git a/lib/backup/database_configuration.rb b/lib/backup/database_configuration.rb new file mode 100644 index 00000000000..1a6a476f9c1 --- /dev/null +++ b/lib/backup/database_configuration.rb @@ -0,0 +1,107 @@ +# frozen_string_literal: true + +module Backup + class DatabaseConfiguration + # Connection name is the key used in `config/database.yml` for multi-database connection configuration + # + # @return [String] + attr_reader :connection_name + + # ActiveRecord base model that is configured to connect to the database identified by connection_name key + # + # @return [ActiveRecord::Base] + attr_reader :source_model + + # Initializes configuration + # + # @param [String] connection_name the key from `database.yml` for multi-database connection configuration + def initialize(connection_name) + @connection_name = connection_name + @source_model = Gitlab::Database.database_base_models_with_gitlab_shared[connection_name] || + Gitlab::Database.database_base_models_with_gitlab_shared['main'] + @activerecord_database_config = ActiveRecord::Base.configurations.find_db_config(connection_name) + end + + # ENV variables that can override each database configuration + # These are used along with OVERRIDE_PREFIX and database name + # @see #process_config_overrides! + SUPPORTED_OVERRIDES = { + username: 'PGUSER', + host: 'PGHOST', + port: 'PGPORT', + password: 'PGPASSWORD', + # SSL + sslmode: 'PGSSLMODE', + sslkey: 'PGSSLKEY', + sslcert: 'PGSSLCERT', + sslrootcert: 'PGSSLROOTCERT', + sslcrl: 'PGSSLCRL', + sslcompression: 'PGSSLCOMPRESSION' + }.freeze + + # Prefixes used for ENV variables overriding database configuration + OVERRIDE_PREFIXES = %w[GITLAB_BACKUP_ GITLAB_OVERRIDE_].freeze + + # Return the HashConfig for the database + # + # @return [ActiveRecord::DatabaseConfigurations::HashConfig] + def activerecord_configuration + ActiveRecord::DatabaseConfigurations::HashConfig.new( + @activerecord_database_config.env_name, + connection_name, + activerecord_variables + ) + end + + # Return postgres ENV variable values for current database with overrided values + # + # @return[Hash<String,String>] hash of postgres ENV variables + def pg_env_variables + process_config_overrides! unless @pg_env_variables + + @pg_env_variables + end + + # Return activerecord configuration values for current database with overrided values + # + # @return[Hash<String,String>] activerecord database.yml configuration compatible values + def activerecord_variables + process_config_overrides! unless @activerecord_variables + + @activerecord_variables + end + + private + + def process_config_overrides! + @activerecord_variables = original_activerecord_config + @pg_env_variables = {} + + SUPPORTED_OVERRIDES.each do |config_key, env_variable_name| + # This enables the use of different PostgreSQL settings in + # case PgBouncer is used. PgBouncer clears the search path, + # which wreaks havoc on Rails if connections are reused. + OVERRIDE_PREFIXES.each do |override_prefix| + override_all = "#{override_prefix}#{env_variable_name}" + override_db = "#{override_prefix}#{connection_name.upcase}_#{env_variable_name}" + val = ENV[override_db].presence || + ENV[override_all].presence || + @activerecord_variables[config_key].to_s.presence + + next unless val + + @pg_env_variables[env_variable_name] = val + @activerecord_variables[config_key] = val + end + end + end + + # Return the database configuration from rails config/database.yml file + # in the format expected by ActiveRecord::DatabaseConfigurations::HashConfig + # + # @return [Hash] configuration hash + def original_activerecord_config + @activerecord_database_config.configuration_hash.dup + end + end +end diff --git a/lib/backup/database_connection.rb b/lib/backup/database_connection.rb new file mode 100644 index 00000000000..f3f0a5dfcb5 --- /dev/null +++ b/lib/backup/database_connection.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +module Backup + class DatabaseConnection + attr_reader :database_configuration, :snapshot_id + + delegate :connection_name, to: :database_configuration + delegate :connection, to: :@backup_model + + # Initializes a database connection + # + # @param [String] connection_name the key from `database.yml` for multi-database connection configuration + def initialize(connection_name) + @database_configuration = Backup::DatabaseConfiguration.new(connection_name) + @backup_model = backup_model + @snapshot_id = nil + + configure_backup_model + end + + # Start a new transaction and run pg_export_snapshot() + # Returns the snapshot identifier + # + # @return [String] snapshot identifier + def export_snapshot! + Gitlab::Database::TransactionTimeoutSettings.new(connection).disable_timeouts + + connection.begin_transaction(isolation: :repeatable_read) + @snapshot_id = connection.select_value("SELECT pg_export_snapshot()") + end + + # Rollback the transaction to release the effects of pg_export_snapshot() + def release_snapshot! + return unless snapshot_id + + connection.rollback_transaction + @snapshot_id = nil + end + + private + + delegate :activerecord_configuration, to: :database_configuration, private: true + + def configure_backup_model + @backup_model.establish_connection(activerecord_configuration) + + Gitlab::Database::LoadBalancing::Setup.new(@backup_model).setup + end + + # Creates a disposable model to be used to host the Backup connection only + def backup_model + klass_name = connection_name.camelize + + return "#{self.class.name}::#{klass_name}".constantize if self.class.const_defined?(klass_name.to_sym, false) + + self.class.const_set(klass_name, Class.new(ApplicationRecord)) + end + end +end diff --git a/lib/backup/database_model.rb b/lib/backup/database_model.rb index b2202ad7794..228a7fa5383 100644 --- a/lib/backup/database_model.rb +++ b/lib/backup/database_model.rb @@ -16,7 +16,7 @@ module Backup sslcompression: 'PGSSLCOMPRESSION' }.freeze - OVERRIDE_PREFIX = "GITLAB_BACKUP_" + OVERRIDE_PREFIXES = %w[GITLAB_BACKUP_ GITLAB_OVERRIDE_].freeze attr_reader :config @@ -31,7 +31,8 @@ module Backup private def configure_model(name) - source_model = Gitlab::Database.database_base_models_with_gitlab_shared[name] + source_model = Gitlab::Database.database_base_models_with_gitlab_shared[name] || + Gitlab::Database.database_base_models_with_gitlab_shared['main'] @model = backup_model_for(name) @@ -67,14 +68,16 @@ module Backup # This enables the use of different PostgreSQL settings in # case PgBouncer is used. PgBouncer clears the search path, # which wreaks havoc on Rails if connections are reused. - override_all = "#{OVERRIDE_PREFIX}#{arg}" - override_db = "#{OVERRIDE_PREFIX}#{name.upcase}_#{arg}" - val = ENV[override_db].presence || ENV[override_all].presence || config[opt].to_s.presence + OVERRIDE_PREFIXES.each do |override_prefix| + override_all = "#{override_prefix}#{arg}" + override_db = "#{override_prefix}#{name.upcase}_#{arg}" + val = ENV[override_db].presence || ENV[override_all].presence || config[opt].to_s.presence - next unless val + next unless val - db_config[:pg_env][arg] = val - db_config[:activerecord][opt] = val + db_config[:pg_env][arg] = val + db_config[:activerecord][opt] = val + end end db_config diff --git a/lib/backup/dump/postgres.rb b/lib/backup/dump/postgres.rb index 1a5128b5a6b..80a49971140 100644 --- a/lib/backup/dump/postgres.rb +++ b/lib/backup/dump/postgres.rb @@ -4,14 +4,21 @@ module Backup class Postgres include Backup::Helper + # Owner can read/write, group no permission, others no permission FILE_PERMISSION = 0o600 - def dump(database_name, output_file, pgsql_args) + # Triggers PgDump and outputs to the provided file path + # + # @param [String] output_file_path full path to the output destination + # @param [Gitlab::Backup::Cli::Utils::PgDump] pg_dump + # @return [Boolean] whether pg_dump finished with success + def dump(output_file_path, pg_dump) compress_rd, compress_wr = IO.pipe - compress_pid = spawn(gzip_cmd, in: compress_rd, out: [output_file, 'w', FILE_PERMISSION]) + + compress_pid = spawn(compress_cmd, in: compress_rd, out: [output_file_path, 'w', FILE_PERMISSION]) compress_rd.close - dump_pid = Process.spawn('pg_dump', *pgsql_args, database_name, out: compress_wr) + dump_pid = pg_dump.spawn(output: compress_wr) compress_wr.close [compress_pid, dump_pid].all? do |pid| diff --git a/lib/backup/files.rb b/lib/backup/files.rb index b8ff7fff591..e3a8290e2e3 100644 --- a/lib/backup/files.rb +++ b/lib/backup/files.rb @@ -40,14 +40,14 @@ module Backup end tar_cmd = [tar, exclude_dirs(:tar), %W[-C #{backup_files_realpath} -cf - .]].flatten - status_list, output = run_pipeline!([tar_cmd, gzip_cmd], out: [backup_tarball, 'w', 0600]) + status_list, output = run_pipeline!([tar_cmd, compress_cmd], out: [backup_tarball, 'w', 0600]) FileUtils.rm_rf(backup_files_realpath) else tar_cmd = [tar, exclude_dirs(:tar), %W[-C #{app_files_realpath} -cf - .]].flatten - status_list, output = run_pipeline!([tar_cmd, gzip_cmd], out: [backup_tarball, 'w', 0600]) + status_list, output = run_pipeline!([tar_cmd, compress_cmd], out: [backup_tarball, 'w', 0600]) end - unless pipeline_succeeded?(tar_status: status_list[0], gzip_status: status_list[1], output: output) + unless pipeline_succeeded?(tar_status: status_list[0], compress_status: status_list[1], output: output) raise_custom_error(backup_tarball) end end @@ -56,9 +56,9 @@ module Backup def restore(backup_tarball, backup_id) backup_existing_files_dir(backup_tarball) - cmd_list = [%w[gzip -cd], %W[#{tar} --unlink-first --recursive-unlink -C #{app_files_realpath} -xf -]] + cmd_list = [decompress_cmd, %W[#{tar} --unlink-first --recursive-unlink -C #{app_files_realpath} -xf -]] status_list, output = run_pipeline!(cmd_list, in: backup_tarball) - unless pipeline_succeeded?(gzip_status: status_list[0], tar_status: status_list[1], output: output) + unless pipeline_succeeded?(compress_status: status_list[0], tar_status: status_list[1], output: output) raise Backup::Error, "Restore operation failed: #{output}" end end @@ -108,8 +108,8 @@ module Backup noncritical_warnings.map { |w| warning =~ w }.any? end - def pipeline_succeeded?(tar_status:, gzip_status:, output:) - return false unless gzip_status&.success? + def pipeline_succeeded?(tar_status:, compress_status:, output:) + return false unless compress_status&.success? tar_status&.success? || tar_ignore_non_success?(tar_status.exitstatus, output) end diff --git a/lib/backup/helper.rb b/lib/backup/helper.rb index 2c2e35add0e..3af786654be 100644 --- a/lib/backup/helper.rb +++ b/lib/backup/helper.rb @@ -2,6 +2,8 @@ module Backup module Helper + include ::Gitlab::Utils::StrongMemoize + def access_denied_error(path) message = <<~EOS @@ -30,12 +32,27 @@ module Backup raise message end - def gzip_cmd - @gzip_cmd ||= if ENV['GZIP_RSYNCABLE'] == 'yes' - "gzip --rsyncable -c -1" - else - "gzip -c -1" - end + def compress_cmd + if ENV['COMPRESS_CMD'].present? + puts "Using custom COMPRESS_CMD '#{ENV['COMPRESS_CMD']}'" + puts "Ignoring GZIP_RSYNCABLE" if ENV['GZIP_RSYNCABLE'] == 'yes' + ENV['COMPRESS_CMD'] + elsif ENV['GZIP_RSYNCABLE'] == 'yes' + "gzip --rsyncable -c -1" + else + "gzip -c -1" + end + end + strong_memoize_attr :compress_cmd + + def decompress_cmd + if ENV['DECOMPRESS_CMD'].present? + puts "Using custom DECOMPRESS_CMD '#{ENV['DECOMPRESS_CMD']}'" + ENV['DECOMPRESS_CMD'] + else + "gzip -cd" + end end + strong_memoize_attr :decompress_cmd end end diff --git a/lib/backup/repositories.rb b/lib/backup/repositories.rb index 46825dbd203..c3154ccfbb5 100644 --- a/lib/backup/repositories.rb +++ b/lib/backup/repositories.rb @@ -38,7 +38,6 @@ module Backup ensure strategy.finish! - cleanup_snippets_without_repositories restore_object_pools end @@ -133,24 +132,6 @@ module Backup pool.schedule end end - - # Snippets without a repository should be removed because they failed to import - # due to having invalid repositories - def cleanup_snippets_without_repositories - invalid_snippets = [] - - snippet_relation.find_each(batch_size: 1000).each do |snippet| - response = Snippets::RepositoryValidationService.new(nil, snippet).execute - next if response.success? - - snippet.repository.remove - progress.puts("Snippet #{snippet.full_path} can't be restored: #{response.message}") - - invalid_snippets << snippet.id - end - - Snippet.id_in(invalid_snippets).delete_all - end end end |