Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2020-12-09 12:51:29 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2020-12-09 12:51:29 +0300
commitdc17627aeba0607f00330404bf1ae7311f213c0b (patch)
tree4101b49e786f0f14a07dea26d5fcfdf37c1b1ca6
parente3ca8c97340568060513c08f3417d0a71f669fef (diff)
housekeeping: Prune empty references
While git is quite careful with how it writes objects as it always uses a temporary file first and only renames it into place as soon as all data has been written to disk, it's not as careful when writing references. While it does lock references before writing them, the result is that under some circumstances we may end up with references which have proper names, but don't have any contents at all. The exact cause is unclear, but it's quite plausible that crashes, hard reboots or similar things may cause this to happen. Let's extend our housekeeping to also check for such empty references and delete them. Detecting them is quite easy: they have a size of zero bytes and thus cannot contain anything meaningful. There still is the possibility of a race with any concurrently running git process though, which is why the new cleanup logic will only cleanup empty references older than a day. That should be more than enough time to create the file and fill it with data.
-rw-r--r--internal/git/housekeeping/housekeeping.go47
-rw-r--r--internal/git/housekeeping/housekeeping_test.go99
2 files changed, 142 insertions, 4 deletions
diff --git a/internal/git/housekeeping/housekeeping.go b/internal/git/housekeeping/housekeeping.go
index c83586386..36bf4086f 100644
--- a/internal/git/housekeeping/housekeeping.go
+++ b/internal/git/housekeeping/housekeeping.go
@@ -13,6 +13,7 @@ import (
const (
deleteTempFilesOlderThanDuration = 7 * 24 * time.Hour
+ brokenRefsGracePeriod = 24 * time.Hour
minimumDirPerm = 0700
)
@@ -25,17 +26,25 @@ func Perform(ctx context.Context, repoPath string) error {
return err
}
+ brokenRefs, err := findBrokenLooseReferences(ctx, repoPath)
+ if err != nil {
+ return err
+ }
+
+ filesToPrune := append(temporaryObjects, brokenRefs...)
+
unremovableFiles := 0
- for _, path := range temporaryObjects {
+ for _, path := range filesToPrune {
if err := os.Remove(path); err != nil {
unremovableFiles++
logger.WithError(err).WithField("path", path).Warn("unable to remove stray file")
}
}
- if len(temporaryObjects) > 0 {
+ if len(filesToPrune) > 0 {
logger.WithFields(log.Fields{
- "files": len(temporaryObjects),
+ "objects": len(temporaryObjects),
+ "refs": len(brokenRefs),
"failures": unremovableFiles,
}).Info("removed files")
}
@@ -85,6 +94,38 @@ func isStaleTemporaryObject(path string, modTime time.Time, mode os.FileMode) bo
return strings.HasPrefix(base, "tmp_") && time.Since(modTime) >= deleteTempFilesOlderThanDuration
}
+func findBrokenLooseReferences(ctx context.Context, repoPath string) ([]string, error) {
+ logger := myLogger(ctx)
+
+ var brokenRefs []string
+ err := filepath.Walk(filepath.Join(repoPath, "refs"), func(path string, info os.FileInfo, err error) error {
+ if info == nil {
+ logger.WithFields(log.Fields{
+ "path": path,
+ }).WithError(err).Error("nil FileInfo in housekeeping.Perform")
+
+ return nil
+ }
+
+ // When git crashes or a node reboots, it may happen that it leaves behind empty
+ // references. These references break various assumptions made by git and cause it
+ // to error in various circumstances. We thus clean them up to work around the
+ // issue.
+ if info.IsDir() || info.Size() > 0 || time.Since(info.ModTime()) < brokenRefsGracePeriod {
+ return nil
+ }
+
+ brokenRefs = append(brokenRefs, path)
+
+ return nil
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ return brokenRefs, nil
+}
+
// FixDirectoryPermissions does a recursive directory walk to look for
// directories that cannot be accessed by the current user, and tries to
// fix those with chmod. The motivating problem is that directories with mode
diff --git a/internal/git/housekeeping/housekeeping_test.go b/internal/git/housekeeping/housekeeping_test.go
index 6920acdd5..ffdef18e5 100644
--- a/internal/git/housekeeping/housekeeping_test.go
+++ b/internal/git/housekeeping/housekeeping_test.go
@@ -1,6 +1,8 @@
package housekeeping
import (
+ "bytes"
+ "io/ioutil"
"os"
"path/filepath"
"testing"
@@ -182,7 +184,7 @@ func TestPerform(t *testing.T) {
name: "files outside of object database",
entries: []entry{
f("tmp_a", 0770, 240*time.Hour, Keep),
- d("refs", 0700, 240*time.Hour, Keep, []entry{
+ d("info", 0700, 240*time.Hour, Keep, []entry{
f("tmp_a", 0770, 240*time.Hour, Keep),
}),
},
@@ -214,6 +216,101 @@ func TestPerform(t *testing.T) {
}
}
+func TestPerform_references(t *testing.T) {
+ type ref struct {
+ name string
+ age time.Duration
+ size int
+ }
+
+ testcases := []struct {
+ desc string
+ refs []ref
+ expected []string
+ }{
+ {
+ desc: "normal reference",
+ refs: []ref{
+ {name: "refs/heads/master", age: 1 * time.Second, size: 40},
+ },
+ expected: []string{
+ "refs/heads/master",
+ },
+ },
+ {
+ desc: "recent empty reference is not deleted",
+ refs: []ref{
+ {name: "refs/heads/master", age: 1 * time.Hour, size: 0},
+ },
+ expected: []string{
+ "refs/heads/master",
+ },
+ },
+ {
+ desc: "old empty reference is deleted",
+ refs: []ref{
+ {name: "refs/heads/master", age: 25 * time.Hour, size: 0},
+ },
+ expected: nil,
+ },
+ {
+ desc: "multiple references",
+ refs: []ref{
+ {name: "refs/keep/kept-because-recent", age: 1 * time.Hour, size: 0},
+ {name: "refs/keep/kept-because-nonempty", age: 25 * time.Hour, size: 1},
+ {name: "refs/keep/prune", age: 25 * time.Hour, size: 0},
+ {name: "refs/tags/kept-because-recent", age: 1 * time.Hour, size: 0},
+ {name: "refs/tags/kept-because-nonempty", age: 25 * time.Hour, size: 1},
+ {name: "refs/tags/prune", age: 25 * time.Hour, size: 0},
+ {name: "refs/heads/kept-because-recent", age: 1 * time.Hour, size: 0},
+ {name: "refs/heads/kept-because-nonempty", age: 25 * time.Hour, size: 1},
+ {name: "refs/heads/prune", age: 25 * time.Hour, size: 0},
+ },
+ expected: []string{
+ "refs/keep/kept-because-recent",
+ "refs/keep/kept-because-nonempty",
+ "refs/tags/kept-because-recent",
+ "refs/tags/kept-because-nonempty",
+ "refs/heads/kept-because-recent",
+ "refs/heads/kept-because-nonempty",
+ },
+ },
+ }
+
+ for _, tc := range testcases {
+ t.Run(tc.desc, func(t *testing.T) {
+ rootPath, cleanup := testhelper.TempDir(t)
+ defer cleanup()
+
+ for _, ref := range tc.refs {
+ path := filepath.Join(rootPath, ref.name)
+
+ require.NoError(t, os.MkdirAll(filepath.Dir(path), 0755))
+ require.NoError(t, ioutil.WriteFile(path, bytes.Repeat([]byte{0}, ref.size), 0644))
+ filetime := time.Now().Add(-ref.age)
+ require.NoError(t, os.Chtimes(path, filetime, filetime))
+ }
+
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ require.NoError(t, Perform(ctx, rootPath))
+
+ var actual []string
+ filepath.Walk(filepath.Join(rootPath), func(path string, info os.FileInfo, _ error) error {
+ if !info.IsDir() {
+ ref, err := filepath.Rel(rootPath, path)
+ require.NoError(t, err)
+ actual = append(actual, ref)
+ }
+ return nil
+ })
+
+ require.ElementsMatch(t, tc.expected, actual)
+ })
+ }
+}
+
func TestShouldRemoveTemporaryObject(t *testing.T) {
type args struct {
path string