Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Okstad <pokstad@gitlab.com>2019-10-02 15:54:37 +0300
committerZeger-Jan van de Weg <git@zjvandeweg.nl>2019-10-02 15:54:37 +0300
commit3dc6de091f9aabb07a2e1fa04a0f9cd7a199e2a3 (patch)
tree3e4cfca4ede301d8f0b5f3abc88d4dd26c7e43b0
parent863c6b516554a6ca2da3763d308bf24ff8c3f79c (diff)
Don't panic, go retry
-rw-r--r--changelogs/unreleased/po-dont-panic.yml5
-rw-r--r--internal/cache/walk_test.go3
-rw-r--r--internal/cache/walker.go34
-rw-r--r--internal/dontpanic/retry.go55
-rw-r--r--internal/dontpanic/retry_test.go59
5 files changed, 140 insertions, 16 deletions
diff --git a/changelogs/unreleased/po-dont-panic.yml b/changelogs/unreleased/po-dont-panic.yml
new file mode 100644
index 000000000..7c4c5ca45
--- /dev/null
+++ b/changelogs/unreleased/po-dont-panic.yml
@@ -0,0 +1,5 @@
+---
+title: Don't panic, go retry
+merge_request: 1523
+author:
+type: added
diff --git a/internal/cache/walk_test.go b/internal/cache/walk_test.go
index e7260214b..bfe037ff4 100644
--- a/internal/cache/walk_test.go
+++ b/internal/cache/walk_test.go
@@ -4,10 +4,9 @@ import (
"testing"
"github.com/stretchr/testify/assert"
- "gitlab.com/gitlab-org/gitaly/internal/config"
)
func TestcleanWalkDirNotExists(t *testing.T) {
- err := cleanWalk(config.Storage{Name: "foo", Path: "/path/that/does/not/exist"})
+ err := cleanWalk("/path/that/does/not/exist")
assert.NoError(t, err, "cleanWalk returned an error for a non existing directory")
}
diff --git a/internal/cache/walker.go b/internal/cache/walker.go
index aacd3c58a..94737fa9f 100644
--- a/internal/cache/walker.go
+++ b/internal/cache/walker.go
@@ -13,11 +13,12 @@ import (
"github.com/sirupsen/logrus"
"gitlab.com/gitlab-org/gitaly/internal/config"
+ "gitlab.com/gitlab-org/gitaly/internal/dontpanic"
"gitlab.com/gitlab-org/gitaly/internal/tempdir"
)
-func cleanWalk(storage config.Storage) error {
- walkErr := filepath.Walk(tempdir.CacheDir(storage), func(path string, info os.FileInfo, err error) error {
+func cleanWalk(walkPath string) error {
+ walkErr := filepath.Walk(walkPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
@@ -57,22 +58,27 @@ func cleanWalk(storage config.Storage) error {
const cleanWalkFrequency = 10 * time.Minute
-func startCleanWalker(storage config.Storage) {
- if disableWalker {
- return
- }
-
- logrus.WithField("storage", storage.Name).Info("Starting disk cache object walker")
+func walkLoop(storageName, walkPath string) {
+ logrus.WithField("storage", storageName).Infof("Starting file walker for %s", walkPath)
walkTick := time.NewTicker(cleanWalkFrequency)
- go func() {
+ dontpanic.GoForever(time.Minute, func() {
for {
- if err := cleanWalk(storage); err != nil {
- logrus.WithField("storage", storage.Name).Error(err)
+ if err := cleanWalk(walkPath); err != nil {
+ logrus.WithField("storage", storageName).Error(err)
}
<-walkTick.C
}
- }()
+ })
+}
+
+func startCleanWalker(storage config.Storage) {
+ if disableWalker {
+ return
+ }
+
+ walkLoop(storage.Name, tempdir.CacheDir(storage))
+ walkLoop(storage.Name, tempdir.StateDir(storage))
}
var (
@@ -111,14 +117,14 @@ func moveAndClear(storage config.Storage) error {
return err
}
- go func() {
+ dontpanic.Go(func() {
start := time.Now()
if err := os.RemoveAll(tmpDir); err != nil {
logger.Errorf("unable to remove disk cache objects: %q", err)
}
logger.Infof("cleared all cache object files in %s after %s", tmpDir, time.Since(start))
- }()
+ })
return nil
}
diff --git a/internal/dontpanic/retry.go b/internal/dontpanic/retry.go
new file mode 100644
index 000000000..8ba68259f
--- /dev/null
+++ b/internal/dontpanic/retry.go
@@ -0,0 +1,55 @@
+// Package dontpanic provides function wrappers and supervisors to ensure
+// that wrapped code does not panic and cause program crashes.
+//
+// When should you use this package? Anytime you are running a function or
+// goroutine where it isn't obvious whether it can or can't panic. This may
+// be a higher risk in long running goroutines and functions or ones that are
+// difficult to test completely.
+package dontpanic
+
+import (
+ "time"
+
+ raven "github.com/getsentry/raven-go"
+ "gitlab.com/gitlab-org/gitaly/internal/log"
+)
+
+// Try will wrap the provided function with a panic recovery. If a panic occurs,
+// the recovered panic will be sent to Sentry and logged as an error.
+func Try(fn func()) { catchAndLog(fn) }
+
+// Go will run the provided function in a goroutine and recover from any
+// panics. If a panic occurs, the recovered panic will be sent to Sentry
+// and logged as an error. Go is best used in fire-and-forget goroutines where
+// observability is lost.
+func Go(fn func()) { go func() { Try(fn) }() }
+
+var logger = log.Default()
+
+func catchAndLog(fn func()) {
+ recovered, id := raven.CapturePanic(fn, nil)
+ if id == "" {
+ return
+ }
+
+ logger.WithField("sentry_id", id).Errorf(
+ "dontpanic: recovered value sent to Sentry: %+v", recovered,
+ )
+}
+
+// GoForever will keep retrying a function fn in a goroutine forever in the
+// background (until the process exits) while recovering from panics. Each
+// time a closure panics, the recovered value will be sent to Sentry and
+// logged at level error. The provided backoff will delay retries to enable
+// "breathing" room to prevent potentially worsening the situation.
+func GoForever(backoff time.Duration, fn func()) {
+ go func() {
+ for {
+ Try(fn)
+ }
+ if backoff > 0 {
+ logger.Infof("dontpanic: backing off %s before retrying", backoff)
+ time.Sleep(backoff)
+ }
+ }()
+}
diff --git a/internal/dontpanic/retry_test.go b/internal/dontpanic/retry_test.go
new file mode 100644
index 000000000..fe5c88092
--- /dev/null
+++ b/internal/dontpanic/retry_test.go
@@ -0,0 +1,59 @@
+package dontpanic_test
+
+import (
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/internal/dontpanic"
+)
+
+func TestTry(t *testing.T) {
+ dontpanic.Try(func() { panic("don't panic") })
+}
+
+func TestTryNoPanic(t *testing.T) {
+ invoked := false
+ dontpanic.Try(func() { invoked = true })
+ require.True(t, invoked)
+}
+
+func TestGo(t *testing.T) {
+ done := make(chan struct{})
+ dontpanic.Go(func() {
+ defer close(done)
+ panic("don't panic")
+ })
+ <-done
+}
+
+func TestGoNoPanic(t *testing.T) {
+ done := make(chan struct{})
+ dontpanic.Go(func() { close(done) })
+ <-done
+}
+
+func TestGoForever(t *testing.T) {
+ var i int
+ recoveredQ := make(chan struct{})
+ expectPanics := 5
+
+ fn := func() {
+ defer func() { recoveredQ <- struct{}{} }()
+ i++
+
+ if i > expectPanics {
+ close(recoveredQ)
+ }
+
+ panic("don't panic")
+ }
+
+ dontpanic.GoForever(time.Microsecond, fn)
+
+ var actualPanics int
+ for range recoveredQ {
+ actualPanics++
+ }
+ require.Equal(t, expectPanics, actualPanics)
+}