diff options
author | Stan Hu <stanhu@gmail.com> | 2018-08-14 17:09:58 +0300 |
---|---|---|
committer | Stan Hu <stanhu@gmail.com> | 2018-08-14 17:09:58 +0300 |
commit | 24204ec7eed6359aa3aaaca4073cbbe2ced4817c (patch) | |
tree | e57ea7b0e0795f8464645882296b3f6f1c7baf11 | |
parent | 7edf64c1951e2184890403ab60c26381dd81e942 (diff) |
Abort domain scan if a failure is encountered
This prevents the total domain list to be cleared out completely while
the system is running.
Closes https://gitlab.com/gitlab-com/infrastructure/issues/4749
-rw-r--r-- | internal/domain/map.go | 2 | ||||
-rw-r--r-- | metrics/metrics.go | 6 |
2 files changed, 8 insertions, 0 deletions
diff --git a/internal/domain/map.go b/internal/domain/map.go index 943f5c20..d4a9764c 100644 --- a/internal/domain/map.go +++ b/internal/domain/map.go @@ -206,6 +206,8 @@ func Watch(rootDomain string, updater domainsUpdater, interval time.Duration) { dm := make(Map) if err := dm.ReadGroups(rootDomain); err != nil { log.WithError(err).Warn("domain scan failed") + metrics.FailedDomainUpdates.Inc() + continue } duration := time.Since(started).Seconds() diff --git a/metrics/metrics.go b/metrics/metrics.go index b15c6711..edc72398 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -11,6 +11,12 @@ var ( Help: "The total number of sites served by this Pages app", }) + // FailedDomainUpdates counts the number of failed site updates + FailedDomainUpdates = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "gitlab_pages_domains_failed_total", + Help: "The total number of site updates that have failed since daemon start", + }) + // DomainUpdates counts the number of site updates processed DomainUpdates = prometheus.NewCounter(prometheus.CounterOpts{ Name: "gitlab_pages_domains_updated_total", |