diff options
author | Pavlo Strokov <pstrokov@gitlab.com> | 2021-09-01 13:05:44 +0300 |
---|---|---|
committer | Pavlo Strokov <pstrokov@gitlab.com> | 2021-09-30 00:09:54 +0300 |
commit | 4aa02bd441b23818a39cd5966d442d48e88a3b3c (patch) | |
tree | 321c6241010c656e17dc598d1afe5569132dba93 /internal | |
parent | 36bd7e0f2f90d6a3ad379e084314fdea681b7118 (diff) |
repoclean: Configure the execution
The repository cleanup operation is more an exception then
a required background task that is why we need to have ability
to configure if we want it to run and how ofter it should run.
The default for it is to run once a day. If check_interval is
set to 0 we won't start the task at all.
Also, as each storage can have a lot of repositories it makes
sense to process them in batches. The size of the batch is
another configuration option for the operation we want. The third
option run_interval is used to run operation not more often
than configured value.
Part of: https://gitlab.com/gitlab-org/gitaly/-/issues/3719
Diffstat (limited to 'internal')
-rw-r--r-- | internal/praefect/config/config.go | 44 | ||||
-rw-r--r-- | internal/praefect/config/config_test.go | 32 | ||||
-rw-r--r-- | internal/praefect/config/testdata/config.overwritedefaults.toml | 5 | ||||
-rw-r--r-- | internal/praefect/config/testdata/config.toml | 5 |
4 files changed, 80 insertions, 6 deletions
diff --git a/internal/praefect/config/config.go b/internal/praefect/config/config.go index 6dc66b2b7..0a26236e0 100644 --- a/internal/praefect/config/config.go +++ b/internal/praefect/config/config.go @@ -36,6 +36,9 @@ const ( ElectionStrategySQL ElectionStrategy = "sql" // ElectionStrategyPerRepository configures an SQL based strategy that elects different primaries per repository. ElectionStrategyPerRepository ElectionStrategy = "per_repository" + + minimalSyncCheckInterval = time.Minute + minimalSyncRunInterval = time.Minute ) type Failover struct { @@ -122,10 +125,10 @@ type Config struct { DB `toml:"database"` Failover Failover `toml:"failover"` // Keep for legacy reasons: remove after Omnibus has switched - FailoverEnabled bool `toml:"failover_enabled"` - MemoryQueueEnabled bool `toml:"memory_queue_enabled"` - GracefulStopTimeout config.Duration `toml:"graceful_stop_timeout"` - + FailoverEnabled bool `toml:"failover_enabled"` + MemoryQueueEnabled bool `toml:"memory_queue_enabled"` + GracefulStopTimeout config.Duration `toml:"graceful_stop_timeout"` + RepositoriesCleanup RepositoriesCleanup `toml:"repositories_cleanup"` // ForceCreateRepositories will enable force-creation of repositories in the // coordinator when routing repository-scoped mutators. This must never be used // outside of tests. @@ -156,7 +159,8 @@ func FromFile(filePath string) (Config, error) { Replication: DefaultReplicationConfig(), Prometheus: prometheus.DefaultConfig(), // Sets the default Failover, to be overwritten when deserializing the TOML - Failover: Failover{Enabled: true, ElectionStrategy: ElectionStrategyPerRepository}, + Failover: Failover{Enabled: true, ElectionStrategy: ElectionStrategyPerRepository}, + RepositoriesCleanup: DefaultRepositoriesCleanup(), } if err := toml.Unmarshal(b, conf); err != nil { return Config{}, err @@ -249,6 +253,15 @@ func (c *Config) Validate() error { } } + if c.RepositoriesCleanup.RunInterval.Duration() > 0 { + if c.RepositoriesCleanup.CheckInterval.Duration() < minimalSyncCheckInterval { + return fmt.Errorf("repositories_cleanup.check_interval is less then %s, which could lead to a database performance problem", minimalSyncCheckInterval.String()) + } + if c.RepositoriesCleanup.RunInterval.Duration() < minimalSyncRunInterval { + return fmt.Errorf("repositories_cleanup.run_interval is less then %s, which could lead to a database performance problem", minimalSyncRunInterval.String()) + } + } + return nil } @@ -416,3 +429,24 @@ func (db DB) ToPQString(direct bool) string { return strings.Join(fields, " ") } + +// RepositoriesCleanup configures repository synchronisation. +type RepositoriesCleanup struct { + // CheckInterval is a time period used to check if operation should be executed. + // It is recommended to keep it less than run_interval configuration as some + // nodes may be out of service, so they can be stale for too long. + CheckInterval config.Duration `toml:"check_interval"` + // RunInterval: the check runs if the previous operation was done at least RunInterval before. + RunInterval config.Duration `toml:"run_interval"` + // RepositoriesInBatch is the number of repositories to pass as a batch for processing. + RepositoriesInBatch int `toml:"repositories_in_batch"` +} + +// DefaultRepositoriesCleanup contains default configuration values for the RepositoriesCleanup. +func DefaultRepositoriesCleanup() RepositoriesCleanup { + return RepositoriesCleanup{ + CheckInterval: config.Duration(30 * time.Minute), + RunInterval: config.Duration(24 * time.Hour), + RepositoriesInBatch: 16, + } +} diff --git a/internal/praefect/config/config_test.go b/internal/praefect/config/config_test.go index 205f2a57d..16280d9b1 100644 --- a/internal/praefect/config/config_test.go +++ b/internal/praefect/config/config_test.go @@ -198,6 +198,20 @@ func TestConfigValidation(t *testing.T) { }, errMsg: `virtual storage "default" has a default replication factor (2) which is higher than the number of storages (1)`, }, + { + desc: "repositories_cleanup minimal duration is too low", + changeConfig: func(cfg *Config) { + cfg.RepositoriesCleanup.CheckInterval = config.Duration(minimalSyncCheckInterval - time.Nanosecond) + }, + errMsg: `repositories_cleanup.check_interval is less then 1m0s, which could lead to a database performance problem`, + }, + { + desc: "repositories_cleanup minimal duration is too low", + changeConfig: func(cfg *Config) { + cfg.RepositoriesCleanup.RunInterval = config.Duration(minimalSyncRunInterval - time.Nanosecond) + }, + errMsg: `repositories_cleanup.run_interval is less then 1m0s, which could lead to a database performance problem`, + }, } for _, tc := range testCases { @@ -209,7 +223,8 @@ func TestConfigValidation(t *testing.T) { {Name: "default", Nodes: vs1Nodes}, {Name: "secondary", Nodes: vs2Nodes}, }, - Failover: Failover{ElectionStrategy: ElectionStrategySQL}, + Failover: Failover{ElectionStrategy: ElectionStrategySQL}, + RepositoriesCleanup: DefaultRepositoriesCleanup(), } tc.changeConfig(&config) @@ -312,6 +327,11 @@ func TestConfigParsing(t *testing.T) { BootstrapInterval: config.Duration(1 * time.Second), MonitorInterval: config.Duration(3 * time.Second), }, + RepositoriesCleanup: RepositoriesCleanup{ + CheckInterval: config.Duration(time.Second), + RunInterval: config.Duration(3 * time.Second), + RepositoriesInBatch: 10, + }, }, }, { @@ -331,6 +351,11 @@ func TestConfigParsing(t *testing.T) { BootstrapInterval: config.Duration(5 * time.Second), MonitorInterval: config.Duration(10 * time.Second), }, + RepositoriesCleanup: RepositoriesCleanup{ + CheckInterval: config.Duration(time.Second), + RunInterval: config.Duration(4 * time.Second), + RepositoriesInBatch: 11, + }, }, }, { @@ -347,6 +372,11 @@ func TestConfigParsing(t *testing.T) { BootstrapInterval: config.Duration(time.Second), MonitorInterval: config.Duration(3 * time.Second), }, + RepositoriesCleanup: RepositoriesCleanup{ + CheckInterval: config.Duration(30 * time.Minute), + RunInterval: config.Duration(24 * time.Hour), + RepositoriesInBatch: 16, + }, }, }, { diff --git a/internal/praefect/config/testdata/config.overwritedefaults.toml b/internal/praefect/config/testdata/config.overwritedefaults.toml index 9b204b1fc..e1834e2e1 100644 --- a/internal/praefect/config/testdata/config.overwritedefaults.toml +++ b/internal/praefect/config/testdata/config.overwritedefaults.toml @@ -11,3 +11,8 @@ election_strategy = "local" read_only_after_failover = false bootstrap_interval = "5s" monitor_interval = "10s" + +[repositories_cleanup] +check_interval = "1s" +run_interval = "4s" +repositories_in_batch = 11 diff --git a/internal/praefect/config/testdata/config.toml b/internal/praefect/config/testdata/config.toml index 09175ed50..7f2464670 100644 --- a/internal/praefect/config/testdata/config.toml +++ b/internal/praefect/config/testdata/config.toml @@ -71,3 +71,8 @@ sslrootcert = "/path/to/sp/root-cert" error_threshold_window = "20s" write_error_threshold_count = 1500 read_error_threshold_count = 100 + +[repositories_cleanup] +check_interval = "1s" +run_interval = "3s" +repositories_in_batch = 10 |