diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-30 13:34:36 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-30 13:34:36 +0300 |
commit | 8b95897d918a4fefdc0542a72121182cd1f27f8f (patch) | |
tree | fd04f159b9e74fc37c40dfd5deaef3861bd5db17 | |
parent | 3008eae57d4fae16b19833954b888c03f0699c33 (diff) | |
parent | 01da350f39dea1bb777565eb7fcb6b85df95e6ec (diff) |
Merge branch 'pks-houskeeping-clean-stale-data-walkdir' into 'master'
housekeeping: Avoid stat calls when searching for stale files
See merge request gitlab-org/gitaly!4432
-rw-r--r-- | internal/git/housekeeping/clean_stale_data.go | 119 | ||||
-rw-r--r-- | internal/git/housekeeping/clean_stale_data_test.go | 115 |
2 files changed, 141 insertions, 93 deletions
diff --git a/internal/git/housekeeping/clean_stale_data.go b/internal/git/housekeeping/clean_stale_data.go index ee1699151..7f99deb8f 100644 --- a/internal/git/housekeeping/clean_stale_data.go +++ b/internal/git/housekeeping/clean_stale_data.go @@ -5,6 +5,7 @@ import ( "context" "errors" "fmt" + "io/fs" "os" "path/filepath" "strings" @@ -268,72 +269,97 @@ func findStaleLockfiles(ctx context.Context, repoPath string) ([]string, error) func findTemporaryObjects(ctx context.Context, repoPath string) ([]string, error) { var temporaryObjects []string - logger := myLogger(ctx) - - err := filepath.Walk(filepath.Join(repoPath, "objects"), func(path string, info os.FileInfo, err error) error { - if info == nil { - logger.WithFields(log.Fields{ - "path": path, - }).WithError(err).Error("nil FileInfo in housekeeping.Perform") + if err := filepath.WalkDir(filepath.Join(repoPath, "objects"), func(path string, dirEntry fs.DirEntry, err error) error { + if err != nil { + if errors.Is(err, fs.ErrPermission) || errors.Is(err, fs.ErrNotExist) { + return nil + } + return err + } + // Git will never create temporary directories, but only temporary objects, + // packfiles and packfile indices. + if dirEntry.IsDir() { return nil } - // Git will never create temporary directories, but only - // temporary objects, packfiles and packfile indices. - if info.IsDir() { - return nil + isStale, err := isStaleTemporaryObject(dirEntry) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return nil + } + + return fmt.Errorf("checking for stale temporary object: %w", err) } - if !isStaleTemporaryObject(path, info.ModTime(), info.Mode()) { + if !isStale { return nil } temporaryObjects = append(temporaryObjects, path) return nil - }) - if err != nil { - return nil, err + }); err != nil { + return nil, fmt.Errorf("walking object directory: %w", err) } return temporaryObjects, nil } -func isStaleTemporaryObject(path string, modTime time.Time, mode os.FileMode) bool { - base := filepath.Base(path) +func isStaleTemporaryObject(dirEntry fs.DirEntry) (bool, error) { + // Check the entry's name first so that we can ideally avoid stat'ting the entry. + if !strings.HasPrefix(dirEntry.Name(), "tmp_") { + return false, nil + } + + fi, err := dirEntry.Info() + if err != nil { + return false, err + } + + if time.Since(fi.ModTime()) <= deleteTempFilesOlderThanDuration { + return false, nil + } - // Only delete entries starting with `tmp_` and older than a week - return strings.HasPrefix(base, "tmp_") && time.Since(modTime) >= deleteTempFilesOlderThanDuration + return true, nil } func findBrokenLooseReferences(ctx context.Context, repoPath string) ([]string, error) { - logger := myLogger(ctx) - var brokenRefs []string - err := filepath.Walk(filepath.Join(repoPath, "refs"), func(path string, info os.FileInfo, err error) error { - if info == nil { - logger.WithFields(log.Fields{ - "path": path, - }).WithError(err).Error("nil FileInfo in housekeeping.Perform") + if err := filepath.WalkDir(filepath.Join(repoPath, "refs"), func(path string, dirEntry fs.DirEntry, err error) error { + if err != nil { + if errors.Is(err, fs.ErrPermission) || errors.Is(err, fs.ErrNotExist) { + return nil + } + return err + } + if dirEntry.IsDir() { return nil } + fi, err := dirEntry.Info() + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return nil + } + + return fmt.Errorf("statting loose ref: %w", err) + } + // When git crashes or a node reboots, it may happen that it leaves behind empty // references. These references break various assumptions made by git and cause it // to error in various circumstances. We thus clean them up to work around the // issue. - if info.IsDir() || info.Size() > 0 || time.Since(info.ModTime()) < brokenRefsGracePeriod { + if fi.Size() > 0 || time.Since(fi.ModTime()) < brokenRefsGracePeriod { return nil } brokenRefs = append(brokenRefs, path) return nil - }) - if err != nil { - return nil, err + }); err != nil { + return nil, fmt.Errorf("walking references: %w", err) } return brokenRefs, nil @@ -343,29 +369,40 @@ func findBrokenLooseReferences(ctx context.Context, repoPath string) ([]string, func findStaleReferenceLocks(ctx context.Context, repoPath string) ([]string, error) { var staleReferenceLocks []string - err := filepath.Walk(filepath.Join(repoPath, "refs"), func(path string, info os.FileInfo, err error) error { - if os.IsNotExist(err) { - // Race condition: somebody already deleted the file for us. Ignore this file. - return nil - } - + if err := filepath.WalkDir(filepath.Join(repoPath, "refs"), func(path string, dirEntry fs.DirEntry, err error) error { if err != nil { + if errors.Is(err, fs.ErrNotExist) || errors.Is(err, fs.ErrPermission) { + return nil + } + return err } - if info.IsDir() { + if dirEntry.IsDir() { return nil } - if !strings.HasSuffix(info.Name(), ".lock") || time.Since(info.ModTime()) < referenceLockfileGracePeriod { + if !strings.HasSuffix(dirEntry.Name(), ".lock") { + return nil + } + + fi, err := dirEntry.Info() + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return nil + } + + return fmt.Errorf("statting reference lock: %w", err) + } + + if time.Since(fi.ModTime()) < referenceLockfileGracePeriod { return nil } staleReferenceLocks = append(staleReferenceLocks, path) return nil - }) - if err != nil { - return nil, err + }); err != nil { + return nil, fmt.Errorf("walking refs: %w", err) } return staleReferenceLocks, nil diff --git a/internal/git/housekeeping/clean_stale_data_test.go b/internal/git/housekeeping/clean_stale_data_test.go index 44a0edc1f..a1ac03ebc 100644 --- a/internal/git/housekeeping/clean_stale_data_test.go +++ b/internal/git/housekeeping/clean_stale_data_test.go @@ -3,6 +3,7 @@ package housekeeping import ( "bytes" "fmt" + "io/fs" "os" "path/filepath" "runtime" @@ -767,76 +768,86 @@ func TestRepositoryManager_CleanStaleData_referenceLocks(t *testing.T) { } } -func TestShouldRemoveTemporaryObject(t *testing.T) { - type args struct { - path string - modTime time.Time - mode os.FileMode - } - testcases := []struct { - name string - args args - want bool +type mockDirEntry struct { + fs.DirEntry + isDir bool + name string + fi fs.FileInfo +} + +func (m mockDirEntry) Name() string { + return m.name +} + +func (m mockDirEntry) IsDir() bool { + return m.isDir +} + +func (m mockDirEntry) Info() (fs.FileInfo, error) { + return m.fi, nil +} + +type mockFileInfo struct { + fs.FileInfo + modTime time.Time +} + +func (m mockFileInfo) ModTime() time.Time { + return m.modTime +} + +func TestIsStaleTemporaryObject(t *testing.T) { + for _, tc := range []struct { + name string + dirEntry fs.DirEntry + expectIsStale bool }{ { name: "regular_file", - args: args{ - path: "/tmp/objects", - modTime: time.Now().Add(-1 * time.Hour), - mode: 0o700, + dirEntry: mockDirEntry{ + name: "objects", + fi: mockFileInfo{ + modTime: time.Now().Add(-1 * time.Hour), + }, }, - want: false, + expectIsStale: false, }, { name: "directory", - args: args{ - path: "/tmp/", - modTime: time.Now().Add(-1 * time.Hour), - mode: 0o770, + dirEntry: mockDirEntry{ + name: "tmp", + isDir: true, + fi: mockFileInfo{ + modTime: time.Now().Add(-1 * time.Hour), + }, }, - want: false, + expectIsStale: false, }, { name: "recent_time_file", - args: args{ - path: "/tmp/tmp_DELETEME", - modTime: time.Now().Add(-1 * time.Hour), - mode: 0o600, + dirEntry: mockDirEntry{ + name: "tmp_DELETEME", + fi: mockFileInfo{ + modTime: time.Now().Add(-1 * time.Hour), + }, }, - want: false, + expectIsStale: false, }, { name: "old temp file", - args: args{ - path: "/tmp/tmp_DELETEME", - modTime: time.Now().Add(-8 * 24 * time.Hour), - mode: 0o600, - }, - want: true, - }, - { - name: "old_temp_file", - args: args{ - path: "/tmp/tmp_DELETEME", - modTime: time.Now().Add(-1 * time.Hour), - mode: 0o000, - }, - want: false, - }, - { - name: "inaccessible_recent_file", - args: args{ - path: "/tmp/tmp_DELETEME", - modTime: time.Now().Add(-1 * time.Hour), - mode: 0o000, + dirEntry: mockDirEntry{ + name: "tmp_DELETEME", + fi: mockFileInfo{ + modTime: time.Now().Add(-8 * 24 * time.Hour), + }, }, - want: false, + expectIsStale: true, }, - } - - for _, tc := range testcases { + } { t.Run(tc.name, func(t *testing.T) { - require.Equal(t, tc.want, isStaleTemporaryObject(tc.args.path, tc.args.modTime, tc.args.mode)) + isStale, err := isStaleTemporaryObject(tc.dirEntry) + require.NoError(t, err) + require.Equal(t, tc.expectIsStale, isStale) }) } } |