Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2023-12-19 14:01:45 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2023-12-19 14:01:45 +0300
commit9297025d0b7ddf095eb618dfaaab2ff8f2018d8b (patch)
tree865198c01d1824a9b098127baa3ab980c9cd2c06 /lib/backup
parent6372471f43ee03c05a7c1f8b0c6ac6b8a7431dbe (diff)
Add latest changes from gitlab-org/gitlab@16-7-stable-eev16.7.0-rc42
Diffstat (limited to 'lib/backup')
-rw-r--r--lib/backup/database.rb94
-rw-r--r--lib/backup/database_configuration.rb107
-rw-r--r--lib/backup/database_connection.rb59
-rw-r--r--lib/backup/database_model.rb19
-rw-r--r--lib/backup/dump/postgres.rb13
-rw-r--r--lib/backup/files.rb14
-rw-r--r--lib/backup/helper.rb29
-rw-r--r--lib/backup/repositories.rb19
8 files changed, 263 insertions, 91 deletions
diff --git a/lib/backup/database.rb b/lib/backup/database.rb
index 58a8c19c1ce..a0eaccb1ca4 100644
--- a/lib/backup/database.rb
+++ b/lib/backup/database.rb
@@ -24,44 +24,37 @@ module Backup
end
override :dump
- def dump(destination_dir, backup_id)
+ def dump(destination_dir, _)
FileUtils.mkdir_p(destination_dir)
- each_database(destination_dir) do |database_name, current_db|
- model = current_db[:model]
- snapshot_id = current_db[:snapshot_id]
+ each_database(destination_dir) do |backup_connection|
+ pg_env = backup_connection.database_configuration.pg_env_variables
+ active_record_config = backup_connection.database_configuration.activerecord_variables
+ pg_database_name = active_record_config[:database]
- pg_env = model.config[:pg_env]
- connection = model.connection
- active_record_config = model.config[:activerecord]
- pg_database = active_record_config[:database]
+ dump_file_name = file_name(destination_dir, backup_connection.connection_name)
+ FileUtils.rm_f(dump_file_name)
- db_file_name = file_name(destination_dir, database_name)
- FileUtils.rm_f(db_file_name)
-
- progress.print "Dumping PostgreSQL database #{pg_database} ... "
+ progress.print "Dumping PostgreSQL database #{pg_database_name} ... "
- pgsql_args = ["--clean"] # Pass '--clean' to include 'DROP TABLE' statements in the DB dump.
- pgsql_args << '--if-exists'
- pgsql_args << "--snapshot=#{snapshot_id}" if snapshot_id
+ schemas = []
if Gitlab.config.backup.pg_schema
- pgsql_args << '-n'
- pgsql_args << Gitlab.config.backup.pg_schema
-
- Gitlab::Database::EXTRA_SCHEMAS.each do |schema|
- pgsql_args << '-n'
- pgsql_args << schema.to_s
- end
+ schemas << Gitlab.config.backup.pg_schema
+ schemas.push(*Gitlab::Database::EXTRA_SCHEMAS.map(&:to_s))
end
- success = with_transient_pg_env(pg_env) do
- Backup::Dump::Postgres.new.dump(pg_database, db_file_name, pgsql_args)
- end
+ pg_dump = ::Gitlab::Backup::Cli::Utils::PgDump.new(
+ database_name: pg_database_name,
+ snapshot_id: backup_connection.snapshot_id,
+ schemas: schemas,
+ env: pg_env)
+
+ success = Backup::Dump::Postgres.new.dump(dump_file_name, pg_dump)
- connection.rollback_transaction if snapshot_id
+ backup_connection.release_snapshot! if backup_connection.snapshot_id
- raise DatabaseBackupError.new(active_record_config, db_file_name) unless success
+ raise DatabaseBackupError.new(active_record_config, dump_file_name) unless success
report_success(success)
progress.flush
@@ -76,10 +69,10 @@ module Backup
override :restore
def restore(destination_dir, backup_id)
- base_models_for_backup.each do |database_name, _base_model|
- backup_model = Backup::DatabaseModel.new(database_name)
+ base_models_for_backup.each do |database_name, _|
+ backup_connection = Backup::DatabaseConnection.new(database_name)
- config = backup_model.config[:activerecord]
+ config = backup_connection.database_configuration.activerecord_variables
db_file_name = file_name(destination_dir, database_name)
database = config[:database]
@@ -100,10 +93,10 @@ module Backup
# hanging out from a failed upgrade
drop_tables(database_name)
- pg_env = backup_model.config[:pg_env]
+ pg_env = backup_connection.database_configuration.pg_env_variables
success = with_transient_pg_env(pg_env) do
decompress_rd, decompress_wr = IO.pipe
- decompress_pid = spawn(*%w[gzip -cd], out: decompress_wr, in: db_file_name)
+ decompress_pid = spawn(decompress_cmd, out: decompress_wr, in: db_file_name)
decompress_wr.close
status, @errors =
@@ -235,6 +228,7 @@ module Backup
puts_time 'done'.color(:green)
end
+ # @deprecated This will be removed when restore operation is refactored to use extended_env directly
def with_transient_pg_env(extended_env)
ENV.merge!(extended_env)
result = yield
@@ -248,32 +242,36 @@ module Backup
end
def each_database(destination_dir, &block)
- databases = {}
+ databases = []
+
+ # each connection will loop through all database connections defined in `database.yml`
+ # and reject the ones that are shared, so we don't get duplicates
+ #
+ # we consider a connection to be shared when it has `database_tasks: false`
::Gitlab::Database::EachDatabase.each_connection(
only: base_models_for_backup.keys, include_shared: false
- ) do |_connection, name|
- next if databases[name]
-
- backup_model = Backup::DatabaseModel.new(name)
-
- databases[name] = {
- model: backup_model
- }
+ ) do |_, database_connection_name|
+ backup_connection = Backup::DatabaseConnection.new(database_connection_name)
+ databases << backup_connection
- next unless Gitlab::Database.database_mode == Gitlab::Database::MODE_MULTIPLE_DATABASES
-
- connection = backup_model.connection
+ next unless multiple_databases?
begin
- Gitlab::Database::TransactionTimeoutSettings.new(connection).disable_timeouts
- connection.begin_transaction(isolation: :repeatable_read)
- databases[name][:snapshot_id] = connection.select_value("SELECT pg_export_snapshot()")
+ # Trigger a transaction snapshot export that will be used by pg_dump later on
+ backup_connection.export_snapshot!
rescue ActiveRecord::ConnectionNotEstablished
- raise Backup::DatabaseBackupError.new(backup_model.config[:activerecord], file_name(destination_dir, name))
+ raise Backup::DatabaseBackupError.new(
+ backup_connection.database_configuration.activerecord_variables,
+ file_name(destination_dir, database_connection_name)
+ )
end
end
databases.each(&block)
end
+
+ def multiple_databases?
+ Gitlab::Database.database_mode == Gitlab::Database::MODE_MULTIPLE_DATABASES
+ end
end
end
diff --git a/lib/backup/database_configuration.rb b/lib/backup/database_configuration.rb
new file mode 100644
index 00000000000..1a6a476f9c1
--- /dev/null
+++ b/lib/backup/database_configuration.rb
@@ -0,0 +1,107 @@
+# frozen_string_literal: true
+
+module Backup
+ class DatabaseConfiguration
+ # Connection name is the key used in `config/database.yml` for multi-database connection configuration
+ #
+ # @return [String]
+ attr_reader :connection_name
+
+ # ActiveRecord base model that is configured to connect to the database identified by connection_name key
+ #
+ # @return [ActiveRecord::Base]
+ attr_reader :source_model
+
+ # Initializes configuration
+ #
+ # @param [String] connection_name the key from `database.yml` for multi-database connection configuration
+ def initialize(connection_name)
+ @connection_name = connection_name
+ @source_model = Gitlab::Database.database_base_models_with_gitlab_shared[connection_name] ||
+ Gitlab::Database.database_base_models_with_gitlab_shared['main']
+ @activerecord_database_config = ActiveRecord::Base.configurations.find_db_config(connection_name)
+ end
+
+ # ENV variables that can override each database configuration
+ # These are used along with OVERRIDE_PREFIX and database name
+ # @see #process_config_overrides!
+ SUPPORTED_OVERRIDES = {
+ username: 'PGUSER',
+ host: 'PGHOST',
+ port: 'PGPORT',
+ password: 'PGPASSWORD',
+ # SSL
+ sslmode: 'PGSSLMODE',
+ sslkey: 'PGSSLKEY',
+ sslcert: 'PGSSLCERT',
+ sslrootcert: 'PGSSLROOTCERT',
+ sslcrl: 'PGSSLCRL',
+ sslcompression: 'PGSSLCOMPRESSION'
+ }.freeze
+
+ # Prefixes used for ENV variables overriding database configuration
+ OVERRIDE_PREFIXES = %w[GITLAB_BACKUP_ GITLAB_OVERRIDE_].freeze
+
+ # Return the HashConfig for the database
+ #
+ # @return [ActiveRecord::DatabaseConfigurations::HashConfig]
+ def activerecord_configuration
+ ActiveRecord::DatabaseConfigurations::HashConfig.new(
+ @activerecord_database_config.env_name,
+ connection_name,
+ activerecord_variables
+ )
+ end
+
+ # Return postgres ENV variable values for current database with overrided values
+ #
+ # @return[Hash<String,String>] hash of postgres ENV variables
+ def pg_env_variables
+ process_config_overrides! unless @pg_env_variables
+
+ @pg_env_variables
+ end
+
+ # Return activerecord configuration values for current database with overrided values
+ #
+ # @return[Hash<String,String>] activerecord database.yml configuration compatible values
+ def activerecord_variables
+ process_config_overrides! unless @activerecord_variables
+
+ @activerecord_variables
+ end
+
+ private
+
+ def process_config_overrides!
+ @activerecord_variables = original_activerecord_config
+ @pg_env_variables = {}
+
+ SUPPORTED_OVERRIDES.each do |config_key, env_variable_name|
+ # This enables the use of different PostgreSQL settings in
+ # case PgBouncer is used. PgBouncer clears the search path,
+ # which wreaks havoc on Rails if connections are reused.
+ OVERRIDE_PREFIXES.each do |override_prefix|
+ override_all = "#{override_prefix}#{env_variable_name}"
+ override_db = "#{override_prefix}#{connection_name.upcase}_#{env_variable_name}"
+ val = ENV[override_db].presence ||
+ ENV[override_all].presence ||
+ @activerecord_variables[config_key].to_s.presence
+
+ next unless val
+
+ @pg_env_variables[env_variable_name] = val
+ @activerecord_variables[config_key] = val
+ end
+ end
+ end
+
+ # Return the database configuration from rails config/database.yml file
+ # in the format expected by ActiveRecord::DatabaseConfigurations::HashConfig
+ #
+ # @return [Hash] configuration hash
+ def original_activerecord_config
+ @activerecord_database_config.configuration_hash.dup
+ end
+ end
+end
diff --git a/lib/backup/database_connection.rb b/lib/backup/database_connection.rb
new file mode 100644
index 00000000000..f3f0a5dfcb5
--- /dev/null
+++ b/lib/backup/database_connection.rb
@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+
+module Backup
+ class DatabaseConnection
+ attr_reader :database_configuration, :snapshot_id
+
+ delegate :connection_name, to: :database_configuration
+ delegate :connection, to: :@backup_model
+
+ # Initializes a database connection
+ #
+ # @param [String] connection_name the key from `database.yml` for multi-database connection configuration
+ def initialize(connection_name)
+ @database_configuration = Backup::DatabaseConfiguration.new(connection_name)
+ @backup_model = backup_model
+ @snapshot_id = nil
+
+ configure_backup_model
+ end
+
+ # Start a new transaction and run pg_export_snapshot()
+ # Returns the snapshot identifier
+ #
+ # @return [String] snapshot identifier
+ def export_snapshot!
+ Gitlab::Database::TransactionTimeoutSettings.new(connection).disable_timeouts
+
+ connection.begin_transaction(isolation: :repeatable_read)
+ @snapshot_id = connection.select_value("SELECT pg_export_snapshot()")
+ end
+
+ # Rollback the transaction to release the effects of pg_export_snapshot()
+ def release_snapshot!
+ return unless snapshot_id
+
+ connection.rollback_transaction
+ @snapshot_id = nil
+ end
+
+ private
+
+ delegate :activerecord_configuration, to: :database_configuration, private: true
+
+ def configure_backup_model
+ @backup_model.establish_connection(activerecord_configuration)
+
+ Gitlab::Database::LoadBalancing::Setup.new(@backup_model).setup
+ end
+
+ # Creates a disposable model to be used to host the Backup connection only
+ def backup_model
+ klass_name = connection_name.camelize
+
+ return "#{self.class.name}::#{klass_name}".constantize if self.class.const_defined?(klass_name.to_sym, false)
+
+ self.class.const_set(klass_name, Class.new(ApplicationRecord))
+ end
+ end
+end
diff --git a/lib/backup/database_model.rb b/lib/backup/database_model.rb
index b2202ad7794..228a7fa5383 100644
--- a/lib/backup/database_model.rb
+++ b/lib/backup/database_model.rb
@@ -16,7 +16,7 @@ module Backup
sslcompression: 'PGSSLCOMPRESSION'
}.freeze
- OVERRIDE_PREFIX = "GITLAB_BACKUP_"
+ OVERRIDE_PREFIXES = %w[GITLAB_BACKUP_ GITLAB_OVERRIDE_].freeze
attr_reader :config
@@ -31,7 +31,8 @@ module Backup
private
def configure_model(name)
- source_model = Gitlab::Database.database_base_models_with_gitlab_shared[name]
+ source_model = Gitlab::Database.database_base_models_with_gitlab_shared[name] ||
+ Gitlab::Database.database_base_models_with_gitlab_shared['main']
@model = backup_model_for(name)
@@ -67,14 +68,16 @@ module Backup
# This enables the use of different PostgreSQL settings in
# case PgBouncer is used. PgBouncer clears the search path,
# which wreaks havoc on Rails if connections are reused.
- override_all = "#{OVERRIDE_PREFIX}#{arg}"
- override_db = "#{OVERRIDE_PREFIX}#{name.upcase}_#{arg}"
- val = ENV[override_db].presence || ENV[override_all].presence || config[opt].to_s.presence
+ OVERRIDE_PREFIXES.each do |override_prefix|
+ override_all = "#{override_prefix}#{arg}"
+ override_db = "#{override_prefix}#{name.upcase}_#{arg}"
+ val = ENV[override_db].presence || ENV[override_all].presence || config[opt].to_s.presence
- next unless val
+ next unless val
- db_config[:pg_env][arg] = val
- db_config[:activerecord][opt] = val
+ db_config[:pg_env][arg] = val
+ db_config[:activerecord][opt] = val
+ end
end
db_config
diff --git a/lib/backup/dump/postgres.rb b/lib/backup/dump/postgres.rb
index 1a5128b5a6b..80a49971140 100644
--- a/lib/backup/dump/postgres.rb
+++ b/lib/backup/dump/postgres.rb
@@ -4,14 +4,21 @@ module Backup
class Postgres
include Backup::Helper
+ # Owner can read/write, group no permission, others no permission
FILE_PERMISSION = 0o600
- def dump(database_name, output_file, pgsql_args)
+ # Triggers PgDump and outputs to the provided file path
+ #
+ # @param [String] output_file_path full path to the output destination
+ # @param [Gitlab::Backup::Cli::Utils::PgDump] pg_dump
+ # @return [Boolean] whether pg_dump finished with success
+ def dump(output_file_path, pg_dump)
compress_rd, compress_wr = IO.pipe
- compress_pid = spawn(gzip_cmd, in: compress_rd, out: [output_file, 'w', FILE_PERMISSION])
+
+ compress_pid = spawn(compress_cmd, in: compress_rd, out: [output_file_path, 'w', FILE_PERMISSION])
compress_rd.close
- dump_pid = Process.spawn('pg_dump', *pgsql_args, database_name, out: compress_wr)
+ dump_pid = pg_dump.spawn(output: compress_wr)
compress_wr.close
[compress_pid, dump_pid].all? do |pid|
diff --git a/lib/backup/files.rb b/lib/backup/files.rb
index b8ff7fff591..e3a8290e2e3 100644
--- a/lib/backup/files.rb
+++ b/lib/backup/files.rb
@@ -40,14 +40,14 @@ module Backup
end
tar_cmd = [tar, exclude_dirs(:tar), %W[-C #{backup_files_realpath} -cf - .]].flatten
- status_list, output = run_pipeline!([tar_cmd, gzip_cmd], out: [backup_tarball, 'w', 0600])
+ status_list, output = run_pipeline!([tar_cmd, compress_cmd], out: [backup_tarball, 'w', 0600])
FileUtils.rm_rf(backup_files_realpath)
else
tar_cmd = [tar, exclude_dirs(:tar), %W[-C #{app_files_realpath} -cf - .]].flatten
- status_list, output = run_pipeline!([tar_cmd, gzip_cmd], out: [backup_tarball, 'w', 0600])
+ status_list, output = run_pipeline!([tar_cmd, compress_cmd], out: [backup_tarball, 'w', 0600])
end
- unless pipeline_succeeded?(tar_status: status_list[0], gzip_status: status_list[1], output: output)
+ unless pipeline_succeeded?(tar_status: status_list[0], compress_status: status_list[1], output: output)
raise_custom_error(backup_tarball)
end
end
@@ -56,9 +56,9 @@ module Backup
def restore(backup_tarball, backup_id)
backup_existing_files_dir(backup_tarball)
- cmd_list = [%w[gzip -cd], %W[#{tar} --unlink-first --recursive-unlink -C #{app_files_realpath} -xf -]]
+ cmd_list = [decompress_cmd, %W[#{tar} --unlink-first --recursive-unlink -C #{app_files_realpath} -xf -]]
status_list, output = run_pipeline!(cmd_list, in: backup_tarball)
- unless pipeline_succeeded?(gzip_status: status_list[0], tar_status: status_list[1], output: output)
+ unless pipeline_succeeded?(compress_status: status_list[0], tar_status: status_list[1], output: output)
raise Backup::Error, "Restore operation failed: #{output}"
end
end
@@ -108,8 +108,8 @@ module Backup
noncritical_warnings.map { |w| warning =~ w }.any?
end
- def pipeline_succeeded?(tar_status:, gzip_status:, output:)
- return false unless gzip_status&.success?
+ def pipeline_succeeded?(tar_status:, compress_status:, output:)
+ return false unless compress_status&.success?
tar_status&.success? || tar_ignore_non_success?(tar_status.exitstatus, output)
end
diff --git a/lib/backup/helper.rb b/lib/backup/helper.rb
index 2c2e35add0e..3af786654be 100644
--- a/lib/backup/helper.rb
+++ b/lib/backup/helper.rb
@@ -2,6 +2,8 @@
module Backup
module Helper
+ include ::Gitlab::Utils::StrongMemoize
+
def access_denied_error(path)
message = <<~EOS
@@ -30,12 +32,27 @@ module Backup
raise message
end
- def gzip_cmd
- @gzip_cmd ||= if ENV['GZIP_RSYNCABLE'] == 'yes'
- "gzip --rsyncable -c -1"
- else
- "gzip -c -1"
- end
+ def compress_cmd
+ if ENV['COMPRESS_CMD'].present?
+ puts "Using custom COMPRESS_CMD '#{ENV['COMPRESS_CMD']}'"
+ puts "Ignoring GZIP_RSYNCABLE" if ENV['GZIP_RSYNCABLE'] == 'yes'
+ ENV['COMPRESS_CMD']
+ elsif ENV['GZIP_RSYNCABLE'] == 'yes'
+ "gzip --rsyncable -c -1"
+ else
+ "gzip -c -1"
+ end
+ end
+ strong_memoize_attr :compress_cmd
+
+ def decompress_cmd
+ if ENV['DECOMPRESS_CMD'].present?
+ puts "Using custom DECOMPRESS_CMD '#{ENV['DECOMPRESS_CMD']}'"
+ ENV['DECOMPRESS_CMD']
+ else
+ "gzip -cd"
+ end
end
+ strong_memoize_attr :decompress_cmd
end
end
diff --git a/lib/backup/repositories.rb b/lib/backup/repositories.rb
index 46825dbd203..c3154ccfbb5 100644
--- a/lib/backup/repositories.rb
+++ b/lib/backup/repositories.rb
@@ -38,7 +38,6 @@ module Backup
ensure
strategy.finish!
- cleanup_snippets_without_repositories
restore_object_pools
end
@@ -133,24 +132,6 @@ module Backup
pool.schedule
end
end
-
- # Snippets without a repository should be removed because they failed to import
- # due to having invalid repositories
- def cleanup_snippets_without_repositories
- invalid_snippets = []
-
- snippet_relation.find_each(batch_size: 1000).each do |snippet|
- response = Snippets::RepositoryValidationService.new(nil, snippet).execute
- next if response.success?
-
- snippet.repository.remove
- progress.puts("Snippet #{snippet.full_path} can't be restored: #{response.message}")
-
- invalid_snippets << snippet.id
- end
-
- Snippet.id_in(invalid_snippets).delete_all
- end
end
end