diff options
author | Paul Okstad <pokstad@gitlab.com> | 2020-01-17 18:56:46 +0300 |
---|---|---|
committer | John Cai <jcai@gitlab.com> | 2020-01-17 18:56:46 +0300 |
commit | 467470812f065fb2e3df8dc6fde80e286c232f4e (patch) | |
tree | ed5aea3112bd89ae8a1e49207808df653a9bd995 | |
parent | abb7351a69cb6138170e6ab731ac4e1f5dc2deda (diff) |
Refine telemetry for cache walker
-rw-r--r-- | changelogs/unreleased/po-refine-cache-walker-logs.yml | 5 | ||||
-rw-r--r-- | internal/cache/prometheus.go | 39 | ||||
-rw-r--r-- | internal/cache/walker.go | 60 | ||||
-rw-r--r-- | internal/cache/walker_internal_test.go | 5 |
4 files changed, 76 insertions, 33 deletions
diff --git a/changelogs/unreleased/po-refine-cache-walker-logs.yml b/changelogs/unreleased/po-refine-cache-walker-logs.yml new file mode 100644 index 000000000..774130cd5 --- /dev/null +++ b/changelogs/unreleased/po-refine-cache-walker-logs.yml @@ -0,0 +1,5 @@ +--- +title: Refine telemetry for cache walker +merge_request: 1759 +author: +type: other diff --git a/internal/cache/prometheus.go b/internal/cache/prometheus.go index d25405854..40f5927e0 100644 --- a/internal/cache/prometheus.go +++ b/internal/cache/prometheus.go @@ -1,6 +1,9 @@ package cache -import "github.com/prometheus/client_golang/prometheus" +import ( + "github.com/prometheus/client_golang/prometheus" + "gitlab.com/gitlab-org/gitaly/internal/config" +) var ( requestTotals = prometheus.NewCounter( @@ -58,6 +61,20 @@ var ( Help: "Total number of errors during diskcache filesystem walks", }, ) + walkerEmptyDirTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "gitaly_diskcache_walker_empty_dir_total", + Help: "Total number of empty directories encountered", + }, + []string{"storage"}, + ) + walkerEmptyDirRemovalTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "gitaly_diskcache_walker_empty_dir_removal_total", + Help: "Total number of empty directories removed", + }, + []string{"storage"}, + ) ) func init() { @@ -70,6 +87,8 @@ func init() { prometheus.MustRegister(walkerCheckTotal) prometheus.MustRegister(walkerRemovalTotal) prometheus.MustRegister(walkerErrorTotal) + prometheus.MustRegister(walkerEmptyDirTotal) + prometheus.MustRegister(walkerEmptyDirRemovalTotal) } func countErr(err error) error { @@ -83,12 +102,14 @@ func countErr(err error) error { } var ( - countRequest = func() { requestTotals.Inc() } - countMiss = func() { missTotals.Inc() } - countWriteBytes = func(n float64) { bytesStoredtotals.Add(n) } - countReadBytes = func(n float64) { bytesFetchedtotals.Add(n) } - countLoserBytes = func(n float64) { bytesLoserTotals.Add(n) } - countWalkRemoval = func() { walkerRemovalTotal.Inc() } - countWalkCheck = func() { walkerCheckTotal.Inc() } - countWalkError = func() { walkerErrorTotal.Inc() } + countRequest = func() { requestTotals.Inc() } + countMiss = func() { missTotals.Inc() } + countWriteBytes = func(n float64) { bytesStoredtotals.Add(n) } + countReadBytes = func(n float64) { bytesFetchedtotals.Add(n) } + countLoserBytes = func(n float64) { bytesLoserTotals.Add(n) } + countWalkRemoval = func() { walkerRemovalTotal.Inc() } + countWalkCheck = func() { walkerCheckTotal.Inc() } + countWalkError = func() { walkerErrorTotal.Inc() } + countEmptyDir = func(s config.Storage) { walkerEmptyDirTotal.With(prometheus.Labels{"storage": s.Name}).Inc() } + countEmptyDirRemoval = func(s config.Storage) { walkerEmptyDirRemovalTotal.With(prometheus.Labels{"storage": s.Name}).Inc() } ) diff --git a/internal/cache/walker.go b/internal/cache/walker.go index 942d3b958..bdc13b634 100644 --- a/internal/cache/walker.go +++ b/internal/cache/walker.go @@ -9,6 +9,7 @@ import ( "io/ioutil" "os" "path/filepath" + "sync" "time" "github.com/sirupsen/logrus" @@ -26,16 +27,16 @@ func logWalkErr(err error, path, msg string) { Warn(msg) } -func cleanWalk(path string) error { +func cleanWalk(s config.Storage, path string) error { defer time.Sleep(100 * time.Microsecond) // relieve pressure countWalkCheck() entries, err := ioutil.ReadDir(path) if err != nil { if os.IsNotExist(err) { - logWalkErr(err, path, "unable to stat directory") return nil } + logWalkErr(err, path, "unable to stat directory") return err } @@ -43,32 +44,39 @@ func cleanWalk(path string) error { ePath := filepath.Join(path, e.Name()) if e.IsDir() { - if err := cleanWalk(ePath); err != nil { + if err := cleanWalk(s, ePath); err != nil { return err } continue } countWalkCheck() - if time.Since(e.ModTime()) >= staleAge { - if err := os.Remove(ePath); err != nil { - if os.IsNotExist(err) { - continue - } - logWalkErr(err, ePath, "unable to remove file") - return err + if time.Since(e.ModTime()) < staleAge { + continue // still fresh + } + + // file is stale + if err := os.Remove(ePath); err != nil { + if os.IsNotExist(err) { + continue } - countWalkRemoval() + logWalkErr(err, ePath, "unable to remove file") + return err } + countWalkRemoval() } files, err := ioutil.ReadDir(path) if err != nil { + if os.IsNotExist(err) { + return nil + } logWalkErr(err, path, "unable to stat directory after walk") return err } if len(files) == 0 { + countEmptyDir(s) if err := os.Remove(path); err != nil { if os.IsNotExist(err) { return nil @@ -76,6 +84,7 @@ func cleanWalk(path string) error { logWalkErr(err, path, "unable to remove empty directory") return err } + countEmptyDirRemoval(s) countWalkRemoval() } @@ -84,13 +93,13 @@ func cleanWalk(path string) error { const cleanWalkFrequency = 10 * time.Minute -func walkLoop(storageName, walkPath string) { - logrus.WithField("storage", storageName).Infof("Starting file walker for %s", walkPath) +func walkLoop(s config.Storage, walkPath string) { + logrus.WithField("storage", s.Name).Infof("Starting file walker for %s", walkPath) walkTick := time.NewTicker(cleanWalkFrequency) dontpanic.GoForever(time.Minute, func() { for { - if err := cleanWalk(walkPath); err != nil { - logrus.WithField("storage", storageName).Error(err) + if err := cleanWalk(s, walkPath); err != nil { + logrus.WithField("storage", s.Name).Error(err) } <-walkTick.C @@ -103,8 +112,8 @@ func startCleanWalker(storage config.Storage) { return } - walkLoop(storage.Name, tempdir.CacheDir(storage)) - walkLoop(storage.Name, tempdir.StateDir(storage)) + walkLoop(storage, tempdir.CacheDir(storage)) + walkLoop(storage, tempdir.StateDir(storage)) } var ( @@ -156,14 +165,21 @@ func moveAndClear(storage config.Storage) error { } func init() { + oncePerStorage := map[string]*sync.Once{} + var err error + config.RegisterHook(func(cfg config.Cfg) error { for _, storage := range cfg.Storages { - if err := moveAndClear(storage); err != nil { - return err + if _, ok := oncePerStorage[storage.Name]; !ok { + oncePerStorage[storage.Name] = new(sync.Once) } - - startCleanWalker(storage) + oncePerStorage[storage.Name].Do(func() { + if err = moveAndClear(storage); err != nil { + return + } + startCleanWalker(storage) + }) } - return nil + return err }) } diff --git a/internal/cache/walker_internal_test.go b/internal/cache/walker_internal_test.go index bd5d3e768..c39ac548b 100644 --- a/internal/cache/walker_internal_test.go +++ b/internal/cache/walker_internal_test.go @@ -11,10 +11,11 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/internal/config" ) func TestCleanWalkDirNotExists(t *testing.T) { - err := cleanWalk("/path/that/does/not/exist") + err := cleanWalk(config.Storage{}, "/path/that/does/not/exist") assert.NoError(t, err, "cleanWalk returned an error for a non existing directory") } @@ -46,7 +47,7 @@ func TestCleanWalkEmptyDirs(t *testing.T) { } } - require.NoError(t, cleanWalk(tmp)) + require.NoError(t, cleanWalk(config.Storage{}, tmp)) actual := findFiles(t, tmp) expect := `. |