diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2020-12-10 13:13:22 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2020-12-10 13:13:22 +0300 |
commit | ee8cb3bd753698786693f440f0de4ebb04f065a9 (patch) | |
tree | 2dceaee9548ec5f4b37f968036bac13183e66588 | |
parent | 423723a84218d48ee95ead1441e9736a58a20c23 (diff) | |
parent | 58cc4948bad1a3e51be3740eea2b61b9f9f1c345 (diff) |
Merge branch 'pks-object-pool-housekeeping' into 'master'
Perform housekeeping for object pools
See merge request gitlab-org/gitaly!2885
-rw-r--r-- | changelogs/unreleased/pks-object-pool-housekeeping.yml | 5 | ||||
-rw-r--r-- | internal/git/housekeeping/housekeeping.go (renamed from internal/helper/housekeeping/housekeeping.go) | 131 | ||||
-rw-r--r-- | internal/git/housekeeping/housekeeping_test.go (renamed from internal/helper/housekeeping/housekeeping_test.go) | 198 | ||||
-rw-r--r-- | internal/git/housekeeping/testhelper_test.go | 18 | ||||
-rw-r--r-- | internal/git/objectpool/fetch.go | 6 | ||||
-rw-r--r-- | internal/gitaly/service/repository/gc.go | 2 | ||||
-rw-r--r-- | internal/gitaly/service/repository/rebase_in_progress.go | 2 | ||||
-rw-r--r-- | internal/tempdir/tempdir.go | 2 |
8 files changed, 270 insertions, 94 deletions
diff --git a/changelogs/unreleased/pks-object-pool-housekeeping.yml b/changelogs/unreleased/pks-object-pool-housekeeping.yml new file mode 100644 index 000000000..9c3f0be0d --- /dev/null +++ b/changelogs/unreleased/pks-object-pool-housekeeping.yml @@ -0,0 +1,5 @@ +--- +title: Perform housekeeping for object pools +merge_request: 2885 +author: +type: added diff --git a/internal/helper/housekeeping/housekeeping.go b/internal/git/housekeeping/housekeeping.go index 7dfc57c60..36bf4086f 100644 --- a/internal/helper/housekeeping/housekeeping.go +++ b/internal/git/housekeeping/housekeeping.go @@ -11,16 +11,53 @@ import ( log "github.com/sirupsen/logrus" ) -const deleteTempFilesOlderThanDuration = 7 * 24 * time.Hour +const ( + deleteTempFilesOlderThanDuration = 7 * 24 * time.Hour + brokenRefsGracePeriod = 24 * time.Hour + minimumDirPerm = 0700 +) // Perform will perform housekeeping duties on a repository func Perform(ctx context.Context, repoPath string) error { logger := myLogger(ctx) - filesMarkedForRemoval := 0 + temporaryObjects, err := findTemporaryObjects(ctx, repoPath) + if err != nil { + return err + } + + brokenRefs, err := findBrokenLooseReferences(ctx, repoPath) + if err != nil { + return err + } + + filesToPrune := append(temporaryObjects, brokenRefs...) + unremovableFiles := 0 + for _, path := range filesToPrune { + if err := os.Remove(path); err != nil { + unremovableFiles++ + logger.WithError(err).WithField("path", path).Warn("unable to remove stray file") + } + } - err := filepath.Walk(repoPath, func(path string, info os.FileInfo, err error) error { + if len(filesToPrune) > 0 { + logger.WithFields(log.Fields{ + "objects": len(temporaryObjects), + "refs": len(brokenRefs), + "failures": unremovableFiles, + }).Info("removed files") + } + + return err +} + +func findTemporaryObjects(ctx context.Context, repoPath string) ([]string, error) { + var temporaryObjects []string + + logger := myLogger(ctx) + + err := filepath.Walk(filepath.Join(repoPath, "objects"), func(path string, info os.FileInfo, err error) error { if info == nil { logger.WithFields(log.Fields{ "path": path, @@ -29,42 +66,64 @@ func Perform(ctx context.Context, repoPath string) error { return nil } - if repoPath == path { - // Never consider the root path + // Git will never create temporary directories, but only + // temporary objects, packfiles and packfile indices. + if info.IsDir() { return nil } - if !shouldRemove(path, info.ModTime(), info.Mode()) { + if !isStaleTemporaryObject(path, info.ModTime(), info.Mode()) { return nil } - filesMarkedForRemoval++ + temporaryObjects = append(temporaryObjects, path) - if err := forceRemove(ctx, path); err != nil { - unremovableFiles++ - logger.WithError(err).WithField("path", path).Warn("unable to remove stray file") + return nil + }) + if err != nil { + return nil, err + } + + return temporaryObjects, nil +} + +func isStaleTemporaryObject(path string, modTime time.Time, mode os.FileMode) bool { + base := filepath.Base(path) + + // Only delete entries starting with `tmp_` and older than a week + return strings.HasPrefix(base, "tmp_") && time.Since(modTime) >= deleteTempFilesOlderThanDuration +} + +func findBrokenLooseReferences(ctx context.Context, repoPath string) ([]string, error) { + logger := myLogger(ctx) + + var brokenRefs []string + err := filepath.Walk(filepath.Join(repoPath, "refs"), func(path string, info os.FileInfo, err error) error { + if info == nil { + logger.WithFields(log.Fields{ + "path": path, + }).WithError(err).Error("nil FileInfo in housekeeping.Perform") + + return nil } - if info.IsDir() { - // Do not walk removed directories - return filepath.SkipDir + // When git crashes or a node reboots, it may happen that it leaves behind empty + // references. These references break various assumptions made by git and cause it + // to error in various circumstances. We thus clean them up to work around the + // issue. + if info.IsDir() || info.Size() > 0 || time.Since(info.ModTime()) < brokenRefsGracePeriod { + return nil } + brokenRefs = append(brokenRefs, path) + return nil }) - - if filesMarkedForRemoval > 0 { - logger.WithFields(log.Fields{ - "files": filesMarkedForRemoval, - "failures": unremovableFiles, - }).Info("removed files") + if err != nil { + return nil, err } - return err -} - -func myLogger(ctx context.Context) *log.Entry { - return ctxlogrus.Extract(ctx).WithField("system", "housekeeping") + return brokenRefs, nil } // FixDirectoryPermissions does a recursive directory walk to look for @@ -75,8 +134,6 @@ func FixDirectoryPermissions(ctx context.Context, path string) error { return fixDirectoryPermissions(ctx, path, make(map[string]struct{})) } -const minimumDirPerm = 0700 - func fixDirectoryPermissions(ctx context.Context, path string, retriedPaths map[string]struct{}) error { logger := myLogger(ctx) return filepath.Walk(path, func(path string, info os.FileInfo, errIncoming error) error { @@ -105,24 +162,6 @@ func fixDirectoryPermissions(ctx context.Context, path string, retriedPaths map[ }) } -// Delete a directory structure while ensuring the current user has permission to delete the directory structure -func forceRemove(ctx context.Context, path string) error { - err := os.RemoveAll(path) - if err == nil { - return nil - } - - // Delete failed. Try again after chmod'ing directories recursively - if err := FixDirectoryPermissions(ctx, path); err != nil { - return err - } - - return os.RemoveAll(path) -} - -func shouldRemove(path string, modTime time.Time, mode os.FileMode) bool { - base := filepath.Base(path) - - // Only delete entries starting with `tmp_` and older than a week - return strings.HasPrefix(base, "tmp_") && time.Since(modTime) >= deleteTempFilesOlderThanDuration +func myLogger(ctx context.Context) *log.Entry { + return ctxlogrus.Extract(ctx).WithField("system", "housekeeping") } diff --git a/internal/helper/housekeeping/housekeeping_test.go b/internal/git/housekeeping/housekeeping_test.go index 4077c9f9c..ffdef18e5 100644 --- a/internal/helper/housekeeping/housekeeping_test.go +++ b/internal/git/housekeeping/housekeeping_test.go @@ -1,6 +1,7 @@ package housekeeping import ( + "bytes" "io/ioutil" "os" "path/filepath" @@ -106,106 +107,217 @@ func d(name string, mode os.FileMode, age time.Duration, finalState entryFinalSt } func TestPerform(t *testing.T) { - tests := []struct { + testcases := []struct { name string entries []entry - wantErr bool }{ { name: "clean", entries: []entry{ - f("a", os.FileMode(0700), 24*time.Hour, Keep), - f("b", os.FileMode(0700), 24*time.Hour, Keep), - f("c", os.FileMode(0700), 24*time.Hour, Keep), + d("objects", 0700, 240*time.Hour, Keep, []entry{ + f("a", 0700, 24*time.Hour, Keep), + f("b", 0700, 24*time.Hour, Keep), + f("c", 0700, 24*time.Hour, Keep), + }), }, - wantErr: false, }, { name: "emptyperms", entries: []entry{ - f("b", os.FileMode(0700), 24*time.Hour, Keep), - f("tmp_a", os.FileMode(0000), 2*time.Hour, Keep), + d("objects", 0700, 240*time.Hour, Keep, []entry{ + f("b", 0700, 24*time.Hour, Keep), + f("tmp_a", 0000, 2*time.Hour, Keep), + }), }, - wantErr: false, }, { name: "emptytempdir", entries: []entry{ - d("tmp_d", os.FileMode(0000), 240*time.Hour, Delete, []entry{}), - f("b", os.FileMode(0700), 24*time.Hour, Keep), + d("objects", 0700, 240*time.Hour, Keep, []entry{ + d("tmp_d", 0000, 240*time.Hour, Keep, []entry{}), + f("b", 0700, 24*time.Hour, Keep), + }), }, - wantErr: false, }, { name: "oldtempfile", entries: []entry{ - f("tmp_a", os.FileMode(0770), 240*time.Hour, Delete), - f("b", os.FileMode(0700), 24*time.Hour, Keep), + d("objects", 0700, 240*time.Hour, Keep, []entry{ + f("tmp_a", 0770, 240*time.Hour, Delete), + f("b", 0700, 24*time.Hour, Keep), + }), }, - wantErr: false, }, { name: "subdir temp file", entries: []entry{ - d("a", os.FileMode(0770), 240*time.Hour, Keep, []entry{ - f("tmp_b", os.FileMode(0700), 240*time.Hour, Delete), + d("objects", 0700, 240*time.Hour, Keep, []entry{ + d("a", 0770, 240*time.Hour, Keep, []entry{ + f("tmp_b", 0700, 240*time.Hour, Delete), + }), }), }, - wantErr: false, }, { name: "inaccessible tmp directory", entries: []entry{ - d("tmp_a", os.FileMode(0000), 240*time.Hour, Delete, []entry{ - f("tmp_b", os.FileMode(0700), 240*time.Hour, Delete), + d("objects", 0700, 240*time.Hour, Keep, []entry{ + d("tmp_a", 0000, 240*time.Hour, Keep, []entry{ + f("tmp_b", 0700, 240*time.Hour, Delete), + }), }), }, - wantErr: false, }, { name: "deeply nested inaccessible tmp directory", entries: []entry{ - d("tmp_a", os.FileMode(0000), 240*time.Hour, Delete, []entry{ - d("tmp_a", os.FileMode(0000), 24*time.Hour, Delete, []entry{ - f("tmp_b", os.FileMode(0000), 24*time.Hour, Delete), + d("objects", 0700, 240*time.Hour, Keep, []entry{ + d("tmp_a", 0700, 240*time.Hour, Keep, []entry{ + d("tmp_a", 0700, 24*time.Hour, Keep, []entry{ + f("tmp_b", 0000, 240*time.Hour, Delete), + }), }), }), }, - wantErr: false, + }, + { + name: "files outside of object database", + entries: []entry{ + f("tmp_a", 0770, 240*time.Hour, Keep), + d("info", 0700, 240*time.Hour, Keep, []entry{ + f("tmp_a", 0770, 240*time.Hour, Keep), + }), + }, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - rootPath, err := ioutil.TempDir("", "test") - assert.NoError(t, err, "TempDir creation failed") - defer os.RemoveAll(rootPath) - - for _, e := range tt.entries { - e.create(t, rootPath) - } + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + rootPath, cleanup := testhelper.TempDir(t) + defer cleanup() ctx, cancel := testhelper.Context() defer cancel() - if err = Perform(ctx, rootPath); (err != nil) != tt.wantErr { - t.Errorf("Perform() error = %v, wantErr %v", err, tt.wantErr) + // We need to fix permissions so we don't fail to + // remove the temporary directory after the test. + defer FixDirectoryPermissions(ctx, rootPath) + + for _, e := range tc.entries { + e.create(t, rootPath) } - for _, e := range tt.entries { + require.NoError(t, Perform(ctx, rootPath)) + + for _, e := range tc.entries { e.validate(t, rootPath) } }) } } -func TestShouldUnlink(t *testing.T) { +func TestPerform_references(t *testing.T) { + type ref struct { + name string + age time.Duration + size int + } + + testcases := []struct { + desc string + refs []ref + expected []string + }{ + { + desc: "normal reference", + refs: []ref{ + {name: "refs/heads/master", age: 1 * time.Second, size: 40}, + }, + expected: []string{ + "refs/heads/master", + }, + }, + { + desc: "recent empty reference is not deleted", + refs: []ref{ + {name: "refs/heads/master", age: 1 * time.Hour, size: 0}, + }, + expected: []string{ + "refs/heads/master", + }, + }, + { + desc: "old empty reference is deleted", + refs: []ref{ + {name: "refs/heads/master", age: 25 * time.Hour, size: 0}, + }, + expected: nil, + }, + { + desc: "multiple references", + refs: []ref{ + {name: "refs/keep/kept-because-recent", age: 1 * time.Hour, size: 0}, + {name: "refs/keep/kept-because-nonempty", age: 25 * time.Hour, size: 1}, + {name: "refs/keep/prune", age: 25 * time.Hour, size: 0}, + {name: "refs/tags/kept-because-recent", age: 1 * time.Hour, size: 0}, + {name: "refs/tags/kept-because-nonempty", age: 25 * time.Hour, size: 1}, + {name: "refs/tags/prune", age: 25 * time.Hour, size: 0}, + {name: "refs/heads/kept-because-recent", age: 1 * time.Hour, size: 0}, + {name: "refs/heads/kept-because-nonempty", age: 25 * time.Hour, size: 1}, + {name: "refs/heads/prune", age: 25 * time.Hour, size: 0}, + }, + expected: []string{ + "refs/keep/kept-because-recent", + "refs/keep/kept-because-nonempty", + "refs/tags/kept-because-recent", + "refs/tags/kept-because-nonempty", + "refs/heads/kept-because-recent", + "refs/heads/kept-because-nonempty", + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.desc, func(t *testing.T) { + rootPath, cleanup := testhelper.TempDir(t) + defer cleanup() + + for _, ref := range tc.refs { + path := filepath.Join(rootPath, ref.name) + + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0755)) + require.NoError(t, ioutil.WriteFile(path, bytes.Repeat([]byte{0}, ref.size), 0644)) + filetime := time.Now().Add(-ref.age) + require.NoError(t, os.Chtimes(path, filetime, filetime)) + } + + ctx, cancel := testhelper.Context() + defer cancel() + + require.NoError(t, Perform(ctx, rootPath)) + + var actual []string + filepath.Walk(filepath.Join(rootPath), func(path string, info os.FileInfo, _ error) error { + if !info.IsDir() { + ref, err := filepath.Rel(rootPath, path) + require.NoError(t, err) + actual = append(actual, ref) + } + return nil + }) + + require.ElementsMatch(t, tc.expected, actual) + }) + } +} + +func TestShouldRemoveTemporaryObject(t *testing.T) { type args struct { path string modTime time.Time mode os.FileMode } - tests := []struct { + testcases := []struct { name string args args want bool @@ -266,11 +378,9 @@ func TestShouldUnlink(t *testing.T) { }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := shouldRemove(tt.args.path, tt.args.modTime, tt.args.mode); got != tt.want { - t.Errorf("shouldUnlink() = %v, want %v", got, tt.want) - } + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.want, isStaleTemporaryObject(tc.args.path, tc.args.modTime, tc.args.mode)) }) } } diff --git a/internal/git/housekeeping/testhelper_test.go b/internal/git/housekeeping/testhelper_test.go new file mode 100644 index 000000000..449a89caa --- /dev/null +++ b/internal/git/housekeeping/testhelper_test.go @@ -0,0 +1,18 @@ +package housekeeping + +import ( + "os" + "testing" + + "gitlab.com/gitlab-org/gitaly/internal/testhelper" +) + +func TestMain(m *testing.M) { + os.Exit(testMain(m)) +} + +func testMain(m *testing.M) int { + cleanup := testhelper.Configure() + defer cleanup() + return m.Run() +} diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go index 4ac559962..41967d029 100644 --- a/internal/git/objectpool/fetch.go +++ b/internal/git/objectpool/fetch.go @@ -15,6 +15,7 @@ import ( "github.com/sirupsen/logrus" "gitlab.com/gitlab-org/gitaly/internal/command" "gitlab.com/gitlab-org/gitaly/internal/git" + "gitlab.com/gitlab-org/gitaly/internal/git/housekeeping" "gitlab.com/gitlab-org/gitaly/internal/git/repository" "gitlab.com/gitlab-org/gitaly/internal/git/updateref" "gitlab.com/gitlab-org/gitaly/internal/helper" @@ -33,11 +34,14 @@ func (o *ObjectPool) FetchFromOrigin(ctx context.Context, origin *gitalypb.Repos } originPath, err := o.locator.GetPath(origin) - if err != nil { return err } + if err := housekeeping.Perform(ctx, originPath); err != nil { + return err + } + opts := []git.CmdOpt{ git.WithRefTxHook(ctx, helper.ProtoRepoFromRepo(o), o.cfg), } diff --git a/internal/gitaly/service/repository/gc.go b/internal/gitaly/service/repository/gc.go index 469f18cea..28528e9b1 100644 --- a/internal/gitaly/service/repository/gc.go +++ b/internal/gitaly/service/repository/gc.go @@ -12,9 +12,9 @@ import ( log "github.com/sirupsen/logrus" "gitlab.com/gitlab-org/gitaly/internal/git" "gitlab.com/gitlab-org/gitaly/internal/git/catfile" + "gitlab.com/gitlab-org/gitaly/internal/git/housekeeping" "gitlab.com/gitlab-org/gitaly/internal/git/stats" "gitlab.com/gitlab-org/gitaly/internal/helper" - "gitlab.com/gitlab-org/gitaly/internal/helper/housekeeping" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" "google.golang.org/grpc/status" ) diff --git a/internal/gitaly/service/repository/rebase_in_progress.go b/internal/gitaly/service/repository/rebase_in_progress.go index b72ead574..2ad901b8c 100644 --- a/internal/gitaly/service/repository/rebase_in_progress.go +++ b/internal/gitaly/service/repository/rebase_in_progress.go @@ -8,7 +8,7 @@ import ( "strings" "time" - "gitlab.com/gitlab-org/gitaly/internal/helper/housekeeping" + "gitlab.com/gitlab-org/gitaly/internal/git/housekeeping" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" diff --git a/internal/tempdir/tempdir.go b/internal/tempdir/tempdir.go index 4878c6e1c..a94b88d44 100644 --- a/internal/tempdir/tempdir.go +++ b/internal/tempdir/tempdir.go @@ -11,8 +11,8 @@ import ( log "github.com/sirupsen/logrus" "gitlab.com/gitlab-org/gitaly/internal/dontpanic" + "gitlab.com/gitlab-org/gitaly/internal/git/housekeeping" "gitlab.com/gitlab-org/gitaly/internal/gitaly/config" - "gitlab.com/gitlab-org/gitaly/internal/helper/housekeeping" "gitlab.com/gitlab-org/gitaly/internal/storage" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" ) |