diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-24 16:30:24 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-30 12:36:37 +0300 |
commit | f67491c045d12f817389987e7f856fec28c8abef (patch) | |
tree | 5874b6b6b8e464c2d67369ff8dbee7c511ff32c8 | |
parent | d7eb8938be2bd217e37e5cf83a9374ae27c0a6b9 (diff) |
housekeeping: Avoid stat calls when searching for temporary objects
We use `filepath.Walk()` to search for temporary objects, which will
stat every single directory entry we're finding. This is needless
busywork though: we only need to stat files which have a `tmp_` prefix.
Convert the function to instead use `filepath.WalkDir()` to reduce the
number of stat calls.
Changelog: performance
-rw-r--r-- | internal/git/housekeeping/clean_stale_data.go | 55 | ||||
-rw-r--r-- | internal/git/housekeeping/clean_stale_data_test.go | 115 |
2 files changed, 99 insertions, 71 deletions
diff --git a/internal/git/housekeeping/clean_stale_data.go b/internal/git/housekeeping/clean_stale_data.go index ee1699151..8d300ef48 100644 --- a/internal/git/housekeeping/clean_stale_data.go +++ b/internal/git/housekeeping/clean_stale_data.go @@ -5,6 +5,7 @@ import ( "context" "errors" "fmt" + "io/fs" "os" "path/filepath" "strings" @@ -268,43 +269,59 @@ func findStaleLockfiles(ctx context.Context, repoPath string) ([]string, error) func findTemporaryObjects(ctx context.Context, repoPath string) ([]string, error) { var temporaryObjects []string - logger := myLogger(ctx) - - err := filepath.Walk(filepath.Join(repoPath, "objects"), func(path string, info os.FileInfo, err error) error { - if info == nil { - logger.WithFields(log.Fields{ - "path": path, - }).WithError(err).Error("nil FileInfo in housekeeping.Perform") + if err := filepath.WalkDir(filepath.Join(repoPath, "objects"), func(path string, dirEntry fs.DirEntry, err error) error { + if err != nil { + if errors.Is(err, fs.ErrPermission) || errors.Is(err, fs.ErrNotExist) { + return nil + } + return err + } + // Git will never create temporary directories, but only temporary objects, + // packfiles and packfile indices. + if dirEntry.IsDir() { return nil } - // Git will never create temporary directories, but only - // temporary objects, packfiles and packfile indices. - if info.IsDir() { - return nil + isStale, err := isStaleTemporaryObject(dirEntry) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return nil + } + + return fmt.Errorf("checking for stale temporary object: %w", err) } - if !isStaleTemporaryObject(path, info.ModTime(), info.Mode()) { + if !isStale { return nil } temporaryObjects = append(temporaryObjects, path) return nil - }) - if err != nil { - return nil, err + }); err != nil { + return nil, fmt.Errorf("walking object directory: %w", err) } return temporaryObjects, nil } -func isStaleTemporaryObject(path string, modTime time.Time, mode os.FileMode) bool { - base := filepath.Base(path) +func isStaleTemporaryObject(dirEntry fs.DirEntry) (bool, error) { + // Check the entry's name first so that we can ideally avoid stat'ting the entry. + if !strings.HasPrefix(dirEntry.Name(), "tmp_") { + return false, nil + } + + fi, err := dirEntry.Info() + if err != nil { + return false, err + } + + if time.Since(fi.ModTime()) <= deleteTempFilesOlderThanDuration { + return false, nil + } - // Only delete entries starting with `tmp_` and older than a week - return strings.HasPrefix(base, "tmp_") && time.Since(modTime) >= deleteTempFilesOlderThanDuration + return true, nil } func findBrokenLooseReferences(ctx context.Context, repoPath string) ([]string, error) { diff --git a/internal/git/housekeeping/clean_stale_data_test.go b/internal/git/housekeeping/clean_stale_data_test.go index 44a0edc1f..a1ac03ebc 100644 --- a/internal/git/housekeeping/clean_stale_data_test.go +++ b/internal/git/housekeeping/clean_stale_data_test.go @@ -3,6 +3,7 @@ package housekeeping import ( "bytes" "fmt" + "io/fs" "os" "path/filepath" "runtime" @@ -767,76 +768,86 @@ func TestRepositoryManager_CleanStaleData_referenceLocks(t *testing.T) { } } -func TestShouldRemoveTemporaryObject(t *testing.T) { - type args struct { - path string - modTime time.Time - mode os.FileMode - } - testcases := []struct { - name string - args args - want bool +type mockDirEntry struct { + fs.DirEntry + isDir bool + name string + fi fs.FileInfo +} + +func (m mockDirEntry) Name() string { + return m.name +} + +func (m mockDirEntry) IsDir() bool { + return m.isDir +} + +func (m mockDirEntry) Info() (fs.FileInfo, error) { + return m.fi, nil +} + +type mockFileInfo struct { + fs.FileInfo + modTime time.Time +} + +func (m mockFileInfo) ModTime() time.Time { + return m.modTime +} + +func TestIsStaleTemporaryObject(t *testing.T) { + for _, tc := range []struct { + name string + dirEntry fs.DirEntry + expectIsStale bool }{ { name: "regular_file", - args: args{ - path: "/tmp/objects", - modTime: time.Now().Add(-1 * time.Hour), - mode: 0o700, + dirEntry: mockDirEntry{ + name: "objects", + fi: mockFileInfo{ + modTime: time.Now().Add(-1 * time.Hour), + }, }, - want: false, + expectIsStale: false, }, { name: "directory", - args: args{ - path: "/tmp/", - modTime: time.Now().Add(-1 * time.Hour), - mode: 0o770, + dirEntry: mockDirEntry{ + name: "tmp", + isDir: true, + fi: mockFileInfo{ + modTime: time.Now().Add(-1 * time.Hour), + }, }, - want: false, + expectIsStale: false, }, { name: "recent_time_file", - args: args{ - path: "/tmp/tmp_DELETEME", - modTime: time.Now().Add(-1 * time.Hour), - mode: 0o600, + dirEntry: mockDirEntry{ + name: "tmp_DELETEME", + fi: mockFileInfo{ + modTime: time.Now().Add(-1 * time.Hour), + }, }, - want: false, + expectIsStale: false, }, { name: "old temp file", - args: args{ - path: "/tmp/tmp_DELETEME", - modTime: time.Now().Add(-8 * 24 * time.Hour), - mode: 0o600, - }, - want: true, - }, - { - name: "old_temp_file", - args: args{ - path: "/tmp/tmp_DELETEME", - modTime: time.Now().Add(-1 * time.Hour), - mode: 0o000, - }, - want: false, - }, - { - name: "inaccessible_recent_file", - args: args{ - path: "/tmp/tmp_DELETEME", - modTime: time.Now().Add(-1 * time.Hour), - mode: 0o000, + dirEntry: mockDirEntry{ + name: "tmp_DELETEME", + fi: mockFileInfo{ + modTime: time.Now().Add(-8 * 24 * time.Hour), + }, }, - want: false, + expectIsStale: true, }, - } - - for _, tc := range testcases { + } { t.Run(tc.name, func(t *testing.T) { - require.Equal(t, tc.want, isStaleTemporaryObject(tc.args.path, tc.args.modTime, tc.args.mode)) + isStale, err := isStaleTemporaryObject(tc.dirEntry) + require.NoError(t, err) + require.Equal(t, tc.expectIsStale, isStale) }) } } |