Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2020-12-10 13:13:22 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2020-12-10 13:13:22 +0300
commitee8cb3bd753698786693f440f0de4ebb04f065a9 (patch)
tree2dceaee9548ec5f4b37f968036bac13183e66588
parent423723a84218d48ee95ead1441e9736a58a20c23 (diff)
parent58cc4948bad1a3e51be3740eea2b61b9f9f1c345 (diff)
Merge branch 'pks-object-pool-housekeeping' into 'master'
Perform housekeeping for object pools See merge request gitlab-org/gitaly!2885
-rw-r--r--changelogs/unreleased/pks-object-pool-housekeeping.yml5
-rw-r--r--internal/git/housekeeping/housekeeping.go (renamed from internal/helper/housekeeping/housekeeping.go)131
-rw-r--r--internal/git/housekeeping/housekeeping_test.go (renamed from internal/helper/housekeeping/housekeeping_test.go)198
-rw-r--r--internal/git/housekeeping/testhelper_test.go18
-rw-r--r--internal/git/objectpool/fetch.go6
-rw-r--r--internal/gitaly/service/repository/gc.go2
-rw-r--r--internal/gitaly/service/repository/rebase_in_progress.go2
-rw-r--r--internal/tempdir/tempdir.go2
8 files changed, 270 insertions, 94 deletions
diff --git a/changelogs/unreleased/pks-object-pool-housekeeping.yml b/changelogs/unreleased/pks-object-pool-housekeeping.yml
new file mode 100644
index 000000000..9c3f0be0d
--- /dev/null
+++ b/changelogs/unreleased/pks-object-pool-housekeeping.yml
@@ -0,0 +1,5 @@
+---
+title: Perform housekeeping for object pools
+merge_request: 2885
+author:
+type: added
diff --git a/internal/helper/housekeeping/housekeeping.go b/internal/git/housekeeping/housekeeping.go
index 7dfc57c60..36bf4086f 100644
--- a/internal/helper/housekeeping/housekeeping.go
+++ b/internal/git/housekeeping/housekeeping.go
@@ -11,16 +11,53 @@ import (
log "github.com/sirupsen/logrus"
)
-const deleteTempFilesOlderThanDuration = 7 * 24 * time.Hour
+const (
+ deleteTempFilesOlderThanDuration = 7 * 24 * time.Hour
+ brokenRefsGracePeriod = 24 * time.Hour
+ minimumDirPerm = 0700
+)
// Perform will perform housekeeping duties on a repository
func Perform(ctx context.Context, repoPath string) error {
logger := myLogger(ctx)
- filesMarkedForRemoval := 0
+ temporaryObjects, err := findTemporaryObjects(ctx, repoPath)
+ if err != nil {
+ return err
+ }
+
+ brokenRefs, err := findBrokenLooseReferences(ctx, repoPath)
+ if err != nil {
+ return err
+ }
+
+ filesToPrune := append(temporaryObjects, brokenRefs...)
+
unremovableFiles := 0
+ for _, path := range filesToPrune {
+ if err := os.Remove(path); err != nil {
+ unremovableFiles++
+ logger.WithError(err).WithField("path", path).Warn("unable to remove stray file")
+ }
+ }
- err := filepath.Walk(repoPath, func(path string, info os.FileInfo, err error) error {
+ if len(filesToPrune) > 0 {
+ logger.WithFields(log.Fields{
+ "objects": len(temporaryObjects),
+ "refs": len(brokenRefs),
+ "failures": unremovableFiles,
+ }).Info("removed files")
+ }
+
+ return err
+}
+
+func findTemporaryObjects(ctx context.Context, repoPath string) ([]string, error) {
+ var temporaryObjects []string
+
+ logger := myLogger(ctx)
+
+ err := filepath.Walk(filepath.Join(repoPath, "objects"), func(path string, info os.FileInfo, err error) error {
if info == nil {
logger.WithFields(log.Fields{
"path": path,
@@ -29,42 +66,64 @@ func Perform(ctx context.Context, repoPath string) error {
return nil
}
- if repoPath == path {
- // Never consider the root path
+ // Git will never create temporary directories, but only
+ // temporary objects, packfiles and packfile indices.
+ if info.IsDir() {
return nil
}
- if !shouldRemove(path, info.ModTime(), info.Mode()) {
+ if !isStaleTemporaryObject(path, info.ModTime(), info.Mode()) {
return nil
}
- filesMarkedForRemoval++
+ temporaryObjects = append(temporaryObjects, path)
- if err := forceRemove(ctx, path); err != nil {
- unremovableFiles++
- logger.WithError(err).WithField("path", path).Warn("unable to remove stray file")
+ return nil
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ return temporaryObjects, nil
+}
+
+func isStaleTemporaryObject(path string, modTime time.Time, mode os.FileMode) bool {
+ base := filepath.Base(path)
+
+ // Only delete entries starting with `tmp_` and older than a week
+ return strings.HasPrefix(base, "tmp_") && time.Since(modTime) >= deleteTempFilesOlderThanDuration
+}
+
+func findBrokenLooseReferences(ctx context.Context, repoPath string) ([]string, error) {
+ logger := myLogger(ctx)
+
+ var brokenRefs []string
+ err := filepath.Walk(filepath.Join(repoPath, "refs"), func(path string, info os.FileInfo, err error) error {
+ if info == nil {
+ logger.WithFields(log.Fields{
+ "path": path,
+ }).WithError(err).Error("nil FileInfo in housekeeping.Perform")
+
+ return nil
}
- if info.IsDir() {
- // Do not walk removed directories
- return filepath.SkipDir
+ // When git crashes or a node reboots, it may happen that it leaves behind empty
+ // references. These references break various assumptions made by git and cause it
+ // to error in various circumstances. We thus clean them up to work around the
+ // issue.
+ if info.IsDir() || info.Size() > 0 || time.Since(info.ModTime()) < brokenRefsGracePeriod {
+ return nil
}
+ brokenRefs = append(brokenRefs, path)
+
return nil
})
-
- if filesMarkedForRemoval > 0 {
- logger.WithFields(log.Fields{
- "files": filesMarkedForRemoval,
- "failures": unremovableFiles,
- }).Info("removed files")
+ if err != nil {
+ return nil, err
}
- return err
-}
-
-func myLogger(ctx context.Context) *log.Entry {
- return ctxlogrus.Extract(ctx).WithField("system", "housekeeping")
+ return brokenRefs, nil
}
// FixDirectoryPermissions does a recursive directory walk to look for
@@ -75,8 +134,6 @@ func FixDirectoryPermissions(ctx context.Context, path string) error {
return fixDirectoryPermissions(ctx, path, make(map[string]struct{}))
}
-const minimumDirPerm = 0700
-
func fixDirectoryPermissions(ctx context.Context, path string, retriedPaths map[string]struct{}) error {
logger := myLogger(ctx)
return filepath.Walk(path, func(path string, info os.FileInfo, errIncoming error) error {
@@ -105,24 +162,6 @@ func fixDirectoryPermissions(ctx context.Context, path string, retriedPaths map[
})
}
-// Delete a directory structure while ensuring the current user has permission to delete the directory structure
-func forceRemove(ctx context.Context, path string) error {
- err := os.RemoveAll(path)
- if err == nil {
- return nil
- }
-
- // Delete failed. Try again after chmod'ing directories recursively
- if err := FixDirectoryPermissions(ctx, path); err != nil {
- return err
- }
-
- return os.RemoveAll(path)
-}
-
-func shouldRemove(path string, modTime time.Time, mode os.FileMode) bool {
- base := filepath.Base(path)
-
- // Only delete entries starting with `tmp_` and older than a week
- return strings.HasPrefix(base, "tmp_") && time.Since(modTime) >= deleteTempFilesOlderThanDuration
+func myLogger(ctx context.Context) *log.Entry {
+ return ctxlogrus.Extract(ctx).WithField("system", "housekeeping")
}
diff --git a/internal/helper/housekeeping/housekeeping_test.go b/internal/git/housekeeping/housekeeping_test.go
index 4077c9f9c..ffdef18e5 100644
--- a/internal/helper/housekeeping/housekeeping_test.go
+++ b/internal/git/housekeeping/housekeeping_test.go
@@ -1,6 +1,7 @@
package housekeeping
import (
+ "bytes"
"io/ioutil"
"os"
"path/filepath"
@@ -106,106 +107,217 @@ func d(name string, mode os.FileMode, age time.Duration, finalState entryFinalSt
}
func TestPerform(t *testing.T) {
- tests := []struct {
+ testcases := []struct {
name string
entries []entry
- wantErr bool
}{
{
name: "clean",
entries: []entry{
- f("a", os.FileMode(0700), 24*time.Hour, Keep),
- f("b", os.FileMode(0700), 24*time.Hour, Keep),
- f("c", os.FileMode(0700), 24*time.Hour, Keep),
+ d("objects", 0700, 240*time.Hour, Keep, []entry{
+ f("a", 0700, 24*time.Hour, Keep),
+ f("b", 0700, 24*time.Hour, Keep),
+ f("c", 0700, 24*time.Hour, Keep),
+ }),
},
- wantErr: false,
},
{
name: "emptyperms",
entries: []entry{
- f("b", os.FileMode(0700), 24*time.Hour, Keep),
- f("tmp_a", os.FileMode(0000), 2*time.Hour, Keep),
+ d("objects", 0700, 240*time.Hour, Keep, []entry{
+ f("b", 0700, 24*time.Hour, Keep),
+ f("tmp_a", 0000, 2*time.Hour, Keep),
+ }),
},
- wantErr: false,
},
{
name: "emptytempdir",
entries: []entry{
- d("tmp_d", os.FileMode(0000), 240*time.Hour, Delete, []entry{}),
- f("b", os.FileMode(0700), 24*time.Hour, Keep),
+ d("objects", 0700, 240*time.Hour, Keep, []entry{
+ d("tmp_d", 0000, 240*time.Hour, Keep, []entry{}),
+ f("b", 0700, 24*time.Hour, Keep),
+ }),
},
- wantErr: false,
},
{
name: "oldtempfile",
entries: []entry{
- f("tmp_a", os.FileMode(0770), 240*time.Hour, Delete),
- f("b", os.FileMode(0700), 24*time.Hour, Keep),
+ d("objects", 0700, 240*time.Hour, Keep, []entry{
+ f("tmp_a", 0770, 240*time.Hour, Delete),
+ f("b", 0700, 24*time.Hour, Keep),
+ }),
},
- wantErr: false,
},
{
name: "subdir temp file",
entries: []entry{
- d("a", os.FileMode(0770), 240*time.Hour, Keep, []entry{
- f("tmp_b", os.FileMode(0700), 240*time.Hour, Delete),
+ d("objects", 0700, 240*time.Hour, Keep, []entry{
+ d("a", 0770, 240*time.Hour, Keep, []entry{
+ f("tmp_b", 0700, 240*time.Hour, Delete),
+ }),
}),
},
- wantErr: false,
},
{
name: "inaccessible tmp directory",
entries: []entry{
- d("tmp_a", os.FileMode(0000), 240*time.Hour, Delete, []entry{
- f("tmp_b", os.FileMode(0700), 240*time.Hour, Delete),
+ d("objects", 0700, 240*time.Hour, Keep, []entry{
+ d("tmp_a", 0000, 240*time.Hour, Keep, []entry{
+ f("tmp_b", 0700, 240*time.Hour, Delete),
+ }),
}),
},
- wantErr: false,
},
{
name: "deeply nested inaccessible tmp directory",
entries: []entry{
- d("tmp_a", os.FileMode(0000), 240*time.Hour, Delete, []entry{
- d("tmp_a", os.FileMode(0000), 24*time.Hour, Delete, []entry{
- f("tmp_b", os.FileMode(0000), 24*time.Hour, Delete),
+ d("objects", 0700, 240*time.Hour, Keep, []entry{
+ d("tmp_a", 0700, 240*time.Hour, Keep, []entry{
+ d("tmp_a", 0700, 24*time.Hour, Keep, []entry{
+ f("tmp_b", 0000, 240*time.Hour, Delete),
+ }),
}),
}),
},
- wantErr: false,
+ },
+ {
+ name: "files outside of object database",
+ entries: []entry{
+ f("tmp_a", 0770, 240*time.Hour, Keep),
+ d("info", 0700, 240*time.Hour, Keep, []entry{
+ f("tmp_a", 0770, 240*time.Hour, Keep),
+ }),
+ },
},
}
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- rootPath, err := ioutil.TempDir("", "test")
- assert.NoError(t, err, "TempDir creation failed")
- defer os.RemoveAll(rootPath)
-
- for _, e := range tt.entries {
- e.create(t, rootPath)
- }
+ for _, tc := range testcases {
+ t.Run(tc.name, func(t *testing.T) {
+ rootPath, cleanup := testhelper.TempDir(t)
+ defer cleanup()
ctx, cancel := testhelper.Context()
defer cancel()
- if err = Perform(ctx, rootPath); (err != nil) != tt.wantErr {
- t.Errorf("Perform() error = %v, wantErr %v", err, tt.wantErr)
+ // We need to fix permissions so we don't fail to
+ // remove the temporary directory after the test.
+ defer FixDirectoryPermissions(ctx, rootPath)
+
+ for _, e := range tc.entries {
+ e.create(t, rootPath)
}
- for _, e := range tt.entries {
+ require.NoError(t, Perform(ctx, rootPath))
+
+ for _, e := range tc.entries {
e.validate(t, rootPath)
}
})
}
}
-func TestShouldUnlink(t *testing.T) {
+func TestPerform_references(t *testing.T) {
+ type ref struct {
+ name string
+ age time.Duration
+ size int
+ }
+
+ testcases := []struct {
+ desc string
+ refs []ref
+ expected []string
+ }{
+ {
+ desc: "normal reference",
+ refs: []ref{
+ {name: "refs/heads/master", age: 1 * time.Second, size: 40},
+ },
+ expected: []string{
+ "refs/heads/master",
+ },
+ },
+ {
+ desc: "recent empty reference is not deleted",
+ refs: []ref{
+ {name: "refs/heads/master", age: 1 * time.Hour, size: 0},
+ },
+ expected: []string{
+ "refs/heads/master",
+ },
+ },
+ {
+ desc: "old empty reference is deleted",
+ refs: []ref{
+ {name: "refs/heads/master", age: 25 * time.Hour, size: 0},
+ },
+ expected: nil,
+ },
+ {
+ desc: "multiple references",
+ refs: []ref{
+ {name: "refs/keep/kept-because-recent", age: 1 * time.Hour, size: 0},
+ {name: "refs/keep/kept-because-nonempty", age: 25 * time.Hour, size: 1},
+ {name: "refs/keep/prune", age: 25 * time.Hour, size: 0},
+ {name: "refs/tags/kept-because-recent", age: 1 * time.Hour, size: 0},
+ {name: "refs/tags/kept-because-nonempty", age: 25 * time.Hour, size: 1},
+ {name: "refs/tags/prune", age: 25 * time.Hour, size: 0},
+ {name: "refs/heads/kept-because-recent", age: 1 * time.Hour, size: 0},
+ {name: "refs/heads/kept-because-nonempty", age: 25 * time.Hour, size: 1},
+ {name: "refs/heads/prune", age: 25 * time.Hour, size: 0},
+ },
+ expected: []string{
+ "refs/keep/kept-because-recent",
+ "refs/keep/kept-because-nonempty",
+ "refs/tags/kept-because-recent",
+ "refs/tags/kept-because-nonempty",
+ "refs/heads/kept-because-recent",
+ "refs/heads/kept-because-nonempty",
+ },
+ },
+ }
+
+ for _, tc := range testcases {
+ t.Run(tc.desc, func(t *testing.T) {
+ rootPath, cleanup := testhelper.TempDir(t)
+ defer cleanup()
+
+ for _, ref := range tc.refs {
+ path := filepath.Join(rootPath, ref.name)
+
+ require.NoError(t, os.MkdirAll(filepath.Dir(path), 0755))
+ require.NoError(t, ioutil.WriteFile(path, bytes.Repeat([]byte{0}, ref.size), 0644))
+ filetime := time.Now().Add(-ref.age)
+ require.NoError(t, os.Chtimes(path, filetime, filetime))
+ }
+
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ require.NoError(t, Perform(ctx, rootPath))
+
+ var actual []string
+ filepath.Walk(filepath.Join(rootPath), func(path string, info os.FileInfo, _ error) error {
+ if !info.IsDir() {
+ ref, err := filepath.Rel(rootPath, path)
+ require.NoError(t, err)
+ actual = append(actual, ref)
+ }
+ return nil
+ })
+
+ require.ElementsMatch(t, tc.expected, actual)
+ })
+ }
+}
+
+func TestShouldRemoveTemporaryObject(t *testing.T) {
type args struct {
path string
modTime time.Time
mode os.FileMode
}
- tests := []struct {
+ testcases := []struct {
name string
args args
want bool
@@ -266,11 +378,9 @@ func TestShouldUnlink(t *testing.T) {
},
}
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- if got := shouldRemove(tt.args.path, tt.args.modTime, tt.args.mode); got != tt.want {
- t.Errorf("shouldUnlink() = %v, want %v", got, tt.want)
- }
+ for _, tc := range testcases {
+ t.Run(tc.name, func(t *testing.T) {
+ require.Equal(t, tc.want, isStaleTemporaryObject(tc.args.path, tc.args.modTime, tc.args.mode))
})
}
}
diff --git a/internal/git/housekeeping/testhelper_test.go b/internal/git/housekeeping/testhelper_test.go
new file mode 100644
index 000000000..449a89caa
--- /dev/null
+++ b/internal/git/housekeeping/testhelper_test.go
@@ -0,0 +1,18 @@
+package housekeeping
+
+import (
+ "os"
+ "testing"
+
+ "gitlab.com/gitlab-org/gitaly/internal/testhelper"
+)
+
+func TestMain(m *testing.M) {
+ os.Exit(testMain(m))
+}
+
+func testMain(m *testing.M) int {
+ cleanup := testhelper.Configure()
+ defer cleanup()
+ return m.Run()
+}
diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go
index 4ac559962..41967d029 100644
--- a/internal/git/objectpool/fetch.go
+++ b/internal/git/objectpool/fetch.go
@@ -15,6 +15,7 @@ import (
"github.com/sirupsen/logrus"
"gitlab.com/gitlab-org/gitaly/internal/command"
"gitlab.com/gitlab-org/gitaly/internal/git"
+ "gitlab.com/gitlab-org/gitaly/internal/git/housekeeping"
"gitlab.com/gitlab-org/gitaly/internal/git/repository"
"gitlab.com/gitlab-org/gitaly/internal/git/updateref"
"gitlab.com/gitlab-org/gitaly/internal/helper"
@@ -33,11 +34,14 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *gitalypb.Repos
}
originPath, err := o.locator.GetPath(origin)
-
if err != nil {
return err
}
+ if err := housekeeping.Perform(ctx, originPath); err != nil {
+ return err
+ }
+
opts := []git.CmdOpt{
git.WithRefTxHook(ctx, helper.ProtoRepoFromRepo(o), o.cfg),
}
diff --git a/internal/gitaly/service/repository/gc.go b/internal/gitaly/service/repository/gc.go
index 469f18cea..28528e9b1 100644
--- a/internal/gitaly/service/repository/gc.go
+++ b/internal/gitaly/service/repository/gc.go
@@ -12,9 +12,9 @@ import (
log "github.com/sirupsen/logrus"
"gitlab.com/gitlab-org/gitaly/internal/git"
"gitlab.com/gitlab-org/gitaly/internal/git/catfile"
+ "gitlab.com/gitlab-org/gitaly/internal/git/housekeeping"
"gitlab.com/gitlab-org/gitaly/internal/git/stats"
"gitlab.com/gitlab-org/gitaly/internal/helper"
- "gitlab.com/gitlab-org/gitaly/internal/helper/housekeeping"
"gitlab.com/gitlab-org/gitaly/proto/go/gitalypb"
"google.golang.org/grpc/status"
)
diff --git a/internal/gitaly/service/repository/rebase_in_progress.go b/internal/gitaly/service/repository/rebase_in_progress.go
index b72ead574..2ad901b8c 100644
--- a/internal/gitaly/service/repository/rebase_in_progress.go
+++ b/internal/gitaly/service/repository/rebase_in_progress.go
@@ -8,7 +8,7 @@ import (
"strings"
"time"
- "gitlab.com/gitlab-org/gitaly/internal/helper/housekeeping"
+ "gitlab.com/gitlab-org/gitaly/internal/git/housekeeping"
"gitlab.com/gitlab-org/gitaly/proto/go/gitalypb"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
diff --git a/internal/tempdir/tempdir.go b/internal/tempdir/tempdir.go
index 4878c6e1c..a94b88d44 100644
--- a/internal/tempdir/tempdir.go
+++ b/internal/tempdir/tempdir.go
@@ -11,8 +11,8 @@ import (
log "github.com/sirupsen/logrus"
"gitlab.com/gitlab-org/gitaly/internal/dontpanic"
+ "gitlab.com/gitlab-org/gitaly/internal/git/housekeeping"
"gitlab.com/gitlab-org/gitaly/internal/gitaly/config"
- "gitlab.com/gitlab-org/gitaly/internal/helper/housekeeping"
"gitlab.com/gitlab-org/gitaly/internal/storage"
"gitlab.com/gitlab-org/gitaly/proto/go/gitalypb"
)