1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
|
# frozen_string_literal: true
databases = ActiveRecord::Tasks::DatabaseTasks.setup_initial_database_yaml
namespace :gitlab do
namespace :db do
DB_CONFIG_NAME_KEY = 'gitlab_db_config_name'
DB_IDENTIFIER_SQL = <<-SQL
SELECT system_identifier, current_database()
FROM pg_control_system()
SQL
# We fetch timestamp as a way to properly handle race conditions
# fail in such cases, which should not really happen in production environment
DB_IDENTIFIER_WITH_DB_CONFIG_NAME_SQL = <<-SQL
SELECT
system_identifier, current_database(),
value as db_config_name, created_at as timestamp
FROM pg_control_system()
LEFT JOIN ar_internal_metadata ON ar_internal_metadata.key=$1
SQL
desc 'Validates `config/database.yml` to ensure a correct behavior is configured'
task validate_config: :environment do
original_db_config = ActiveRecord::Base.connection_db_config # rubocop:disable Database/MultipleDatabases
# The include_replicas: is a legacy name to fetch all hidden entries (replica: true or database_tasks: false)
# Once we upgrade to Rails 7.x this should be changed to `include_hidden: true`
# Ref.: https://github.com/rails/rails/blob/f2d9316ba965e150ad04596085ee10eea4f58d3e/activerecord/lib/active_record/database_configurations.rb#L48
db_configs = ActiveRecord::Base.configurations.configs_for(env_name: Rails.env, include_replicas: true)
db_configs = db_configs.reject(&:replica?)
# The `pg_control_system()` is not enough to properly discover matching database systems
# since in case of cluster promotion it will return the same identifier as main cluster
# We instead set an `ar_internal_metadata` information with configured database name
db_configs.reverse_each do |db_config|
insert_db_identifier(db_config)
end
# Map each database connection into unique identifier of system+database
all_connections = db_configs.map do |db_config|
{
name: db_config.name,
config: db_config,
database_tasks?: db_config.database_tasks?,
identifier: get_db_identifier(db_config)
}
end
unique_connections = all_connections.group_by { |connection| connection[:identifier] }
primary_connection = all_connections.find { |connection| ActiveRecord::Base.configurations.primary?(connection[:name]) }
named_connections = all_connections.index_by { |connection| connection[:name] }
warnings = []
# The `main:` should always have `database_tasks: true`
unless primary_connection[:database_tasks?]
warnings << "- The '#{primary_connection[:name]}' is required to use 'database_tasks: true'"
end
# Each unique database should have exactly one configuration with `database_tasks: true`
unique_connections.each do |identifier, connections|
next unless identifier
connections_with_tasks = connections.select { |connection| connection[:database_tasks?] }
next unless connections_with_tasks.many?
names = connections_with_tasks.pluck(:name)
warnings << "- Many configurations (#{names.join(', ')}) " \
"share the same database (#{identifier}). " \
"This will result in failures provisioning or migrating this database. " \
"Ensure that additional databases are configured " \
"with 'database_tasks: false' or are pointing to a dedicated database host."
end
# Each configuration with `database_tasks: false` should share the database with `main:`
all_connections.each do |connection|
share_with = Gitlab::Database.db_config_share_with(connection[:config])
next unless share_with
shared_connection = named_connections[share_with]
unless shared_connection
warnings << "- The '#{connection[:name]}' is expecting to share configuration with '#{share_with}', " \
"but no such is to be found."
next
end
# Skip if databases are yet to be provisioned
next unless connection[:identifier] && shared_connection[:identifier]
unless connection[:identifier] == shared_connection[:identifier]
warnings << "- The '#{connection[:name]}' since it is using 'database_tasks: false' " \
"should share database with '#{share_with}:'."
end
end
if warnings.any?
warnings.unshift("Database config validation failure:")
# Warn (for now) by default in production environment
if Gitlab::Utils.to_boolean(ENV['GITLAB_VALIDATE_DATABASE_CONFIG'], default: true)
warnings << "Use `export GITLAB_VALIDATE_DATABASE_CONFIG=0` to ignore this validation."
raise warnings.join("\n")
else
warnings << "Use `export GITLAB_VALIDATE_DATABASE_CONFIG=1` to enforce this validation."
warn warnings.join("\n")
end
end
ensure
ActiveRecord::Base.establish_connection(original_db_config) # rubocop: disable Database/EstablishConnection
end
Rake::Task['db:migrate'].enhance(['gitlab:db:validate_config'])
Rake::Task['db:schema:load'].enhance(['gitlab:db:validate_config'])
Rake::Task['db:schema:dump'].enhance(['gitlab:db:validate_config'])
ActiveRecord::Tasks::DatabaseTasks.for_each(databases) do |name|
Rake::Task["db:migrate:#{name}"].enhance(['gitlab:db:validate_config'])
Rake::Task["db:schema:load:#{name}"].enhance(['gitlab:db:validate_config'])
Rake::Task["db:schema:dump:#{name}"].enhance(['gitlab:db:validate_config'])
end
def insert_db_identifier(db_config)
ActiveRecord::Base.establish_connection(db_config) # rubocop: disable Database/EstablishConnection
if ActiveRecord::InternalMetadata.table_exists?
ts = Time.zone.now
ActiveRecord::InternalMetadata.upsert(
{ key: DB_CONFIG_NAME_KEY,
value: db_config.name,
created_at: ts,
updated_at: ts }
)
end
rescue ActiveRecord::ConnectionNotEstablished, PG::ConnectionBad => err
warn "WARNING: Could not establish database connection for #{db_config.name}: #{err.message}"
rescue ActiveRecord::NoDatabaseError
rescue ActiveRecord::StatementInvalid => err
raise unless err.cause.is_a?(PG::ReadOnlySqlTransaction)
warn "WARNING: Could not write to the database #{db_config.name}: cannot execute UPSERT in a read-only transaction"
end
def get_db_identifier(db_config)
ActiveRecord::Base.establish_connection(db_config) # rubocop: disable Database/EstablishConnection
# rubocop:disable Database/MultipleDatabases
if ActiveRecord::InternalMetadata.table_exists?
ActiveRecord::Base.connection.select_one(
DB_IDENTIFIER_WITH_DB_CONFIG_NAME_SQL, nil, [DB_CONFIG_NAME_KEY])
else
ActiveRecord::Base.connection.select_one(DB_IDENTIFIER_SQL)
end
# rubocop:enable Database/MultipleDatabases
rescue ActiveRecord::ConnectionNotEstablished, PG::ConnectionBad => err
warn "WARNING: Could not establish database connection for #{db_config.name}: #{err.message}"
rescue ActiveRecord::NoDatabaseError
end
end
end
|