Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-30 13:34:36 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-30 13:34:36 +0300
commit8b95897d918a4fefdc0542a72121182cd1f27f8f (patch)
treefd04f159b9e74fc37c40dfd5deaef3861bd5db17
parent3008eae57d4fae16b19833954b888c03f0699c33 (diff)
parent01da350f39dea1bb777565eb7fcb6b85df95e6ec (diff)
Merge branch 'pks-houskeeping-clean-stale-data-walkdir' into 'master'
housekeeping: Avoid stat calls when searching for stale files See merge request gitlab-org/gitaly!4432
-rw-r--r--internal/git/housekeeping/clean_stale_data.go119
-rw-r--r--internal/git/housekeeping/clean_stale_data_test.go115
2 files changed, 141 insertions, 93 deletions
diff --git a/internal/git/housekeeping/clean_stale_data.go b/internal/git/housekeeping/clean_stale_data.go
index ee1699151..7f99deb8f 100644
--- a/internal/git/housekeeping/clean_stale_data.go
+++ b/internal/git/housekeeping/clean_stale_data.go
@@ -5,6 +5,7 @@ import (
"context"
"errors"
"fmt"
+ "io/fs"
"os"
"path/filepath"
"strings"
@@ -268,72 +269,97 @@ func findStaleLockfiles(ctx context.Context, repoPath string) ([]string, error)
func findTemporaryObjects(ctx context.Context, repoPath string) ([]string, error) {
var temporaryObjects []string
- logger := myLogger(ctx)
-
- err := filepath.Walk(filepath.Join(repoPath, "objects"), func(path string, info os.FileInfo, err error) error {
- if info == nil {
- logger.WithFields(log.Fields{
- "path": path,
- }).WithError(err).Error("nil FileInfo in housekeeping.Perform")
+ if err := filepath.WalkDir(filepath.Join(repoPath, "objects"), func(path string, dirEntry fs.DirEntry, err error) error {
+ if err != nil {
+ if errors.Is(err, fs.ErrPermission) || errors.Is(err, fs.ErrNotExist) {
+ return nil
+ }
+ return err
+ }
+ // Git will never create temporary directories, but only temporary objects,
+ // packfiles and packfile indices.
+ if dirEntry.IsDir() {
return nil
}
- // Git will never create temporary directories, but only
- // temporary objects, packfiles and packfile indices.
- if info.IsDir() {
- return nil
+ isStale, err := isStaleTemporaryObject(dirEntry)
+ if err != nil {
+ if errors.Is(err, fs.ErrNotExist) {
+ return nil
+ }
+
+ return fmt.Errorf("checking for stale temporary object: %w", err)
}
- if !isStaleTemporaryObject(path, info.ModTime(), info.Mode()) {
+ if !isStale {
return nil
}
temporaryObjects = append(temporaryObjects, path)
return nil
- })
- if err != nil {
- return nil, err
+ }); err != nil {
+ return nil, fmt.Errorf("walking object directory: %w", err)
}
return temporaryObjects, nil
}
-func isStaleTemporaryObject(path string, modTime time.Time, mode os.FileMode) bool {
- base := filepath.Base(path)
+func isStaleTemporaryObject(dirEntry fs.DirEntry) (bool, error) {
+ // Check the entry's name first so that we can ideally avoid stat'ting the entry.
+ if !strings.HasPrefix(dirEntry.Name(), "tmp_") {
+ return false, nil
+ }
+
+ fi, err := dirEntry.Info()
+ if err != nil {
+ return false, err
+ }
+
+ if time.Since(fi.ModTime()) <= deleteTempFilesOlderThanDuration {
+ return false, nil
+ }
- // Only delete entries starting with `tmp_` and older than a week
- return strings.HasPrefix(base, "tmp_") && time.Since(modTime) >= deleteTempFilesOlderThanDuration
+ return true, nil
}
func findBrokenLooseReferences(ctx context.Context, repoPath string) ([]string, error) {
- logger := myLogger(ctx)
-
var brokenRefs []string
- err := filepath.Walk(filepath.Join(repoPath, "refs"), func(path string, info os.FileInfo, err error) error {
- if info == nil {
- logger.WithFields(log.Fields{
- "path": path,
- }).WithError(err).Error("nil FileInfo in housekeeping.Perform")
+ if err := filepath.WalkDir(filepath.Join(repoPath, "refs"), func(path string, dirEntry fs.DirEntry, err error) error {
+ if err != nil {
+ if errors.Is(err, fs.ErrPermission) || errors.Is(err, fs.ErrNotExist) {
+ return nil
+ }
+ return err
+ }
+ if dirEntry.IsDir() {
return nil
}
+ fi, err := dirEntry.Info()
+ if err != nil {
+ if errors.Is(err, fs.ErrNotExist) {
+ return nil
+ }
+
+ return fmt.Errorf("statting loose ref: %w", err)
+ }
+
// When git crashes or a node reboots, it may happen that it leaves behind empty
// references. These references break various assumptions made by git and cause it
// to error in various circumstances. We thus clean them up to work around the
// issue.
- if info.IsDir() || info.Size() > 0 || time.Since(info.ModTime()) < brokenRefsGracePeriod {
+ if fi.Size() > 0 || time.Since(fi.ModTime()) < brokenRefsGracePeriod {
return nil
}
brokenRefs = append(brokenRefs, path)
return nil
- })
- if err != nil {
- return nil, err
+ }); err != nil {
+ return nil, fmt.Errorf("walking references: %w", err)
}
return brokenRefs, nil
@@ -343,29 +369,40 @@ func findBrokenLooseReferences(ctx context.Context, repoPath string) ([]string,
func findStaleReferenceLocks(ctx context.Context, repoPath string) ([]string, error) {
var staleReferenceLocks []string
- err := filepath.Walk(filepath.Join(repoPath, "refs"), func(path string, info os.FileInfo, err error) error {
- if os.IsNotExist(err) {
- // Race condition: somebody already deleted the file for us. Ignore this file.
- return nil
- }
-
+ if err := filepath.WalkDir(filepath.Join(repoPath, "refs"), func(path string, dirEntry fs.DirEntry, err error) error {
if err != nil {
+ if errors.Is(err, fs.ErrNotExist) || errors.Is(err, fs.ErrPermission) {
+ return nil
+ }
+
return err
}
- if info.IsDir() {
+ if dirEntry.IsDir() {
return nil
}
- if !strings.HasSuffix(info.Name(), ".lock") || time.Since(info.ModTime()) < referenceLockfileGracePeriod {
+ if !strings.HasSuffix(dirEntry.Name(), ".lock") {
+ return nil
+ }
+
+ fi, err := dirEntry.Info()
+ if err != nil {
+ if errors.Is(err, fs.ErrNotExist) {
+ return nil
+ }
+
+ return fmt.Errorf("statting reference lock: %w", err)
+ }
+
+ if time.Since(fi.ModTime()) < referenceLockfileGracePeriod {
return nil
}
staleReferenceLocks = append(staleReferenceLocks, path)
return nil
- })
- if err != nil {
- return nil, err
+ }); err != nil {
+ return nil, fmt.Errorf("walking refs: %w", err)
}
return staleReferenceLocks, nil
diff --git a/internal/git/housekeeping/clean_stale_data_test.go b/internal/git/housekeeping/clean_stale_data_test.go
index 44a0edc1f..a1ac03ebc 100644
--- a/internal/git/housekeeping/clean_stale_data_test.go
+++ b/internal/git/housekeeping/clean_stale_data_test.go
@@ -3,6 +3,7 @@ package housekeeping
import (
"bytes"
"fmt"
+ "io/fs"
"os"
"path/filepath"
"runtime"
@@ -767,76 +768,86 @@ func TestRepositoryManager_CleanStaleData_referenceLocks(t *testing.T) {
}
}
-func TestShouldRemoveTemporaryObject(t *testing.T) {
- type args struct {
- path string
- modTime time.Time
- mode os.FileMode
- }
- testcases := []struct {
- name string
- args args
- want bool
+type mockDirEntry struct {
+ fs.DirEntry
+ isDir bool
+ name string
+ fi fs.FileInfo
+}
+
+func (m mockDirEntry) Name() string {
+ return m.name
+}
+
+func (m mockDirEntry) IsDir() bool {
+ return m.isDir
+}
+
+func (m mockDirEntry) Info() (fs.FileInfo, error) {
+ return m.fi, nil
+}
+
+type mockFileInfo struct {
+ fs.FileInfo
+ modTime time.Time
+}
+
+func (m mockFileInfo) ModTime() time.Time {
+ return m.modTime
+}
+
+func TestIsStaleTemporaryObject(t *testing.T) {
+ for _, tc := range []struct {
+ name string
+ dirEntry fs.DirEntry
+ expectIsStale bool
}{
{
name: "regular_file",
- args: args{
- path: "/tmp/objects",
- modTime: time.Now().Add(-1 * time.Hour),
- mode: 0o700,
+ dirEntry: mockDirEntry{
+ name: "objects",
+ fi: mockFileInfo{
+ modTime: time.Now().Add(-1 * time.Hour),
+ },
},
- want: false,
+ expectIsStale: false,
},
{
name: "directory",
- args: args{
- path: "/tmp/",
- modTime: time.Now().Add(-1 * time.Hour),
- mode: 0o770,
+ dirEntry: mockDirEntry{
+ name: "tmp",
+ isDir: true,
+ fi: mockFileInfo{
+ modTime: time.Now().Add(-1 * time.Hour),
+ },
},
- want: false,
+ expectIsStale: false,
},
{
name: "recent_time_file",
- args: args{
- path: "/tmp/tmp_DELETEME",
- modTime: time.Now().Add(-1 * time.Hour),
- mode: 0o600,
+ dirEntry: mockDirEntry{
+ name: "tmp_DELETEME",
+ fi: mockFileInfo{
+ modTime: time.Now().Add(-1 * time.Hour),
+ },
},
- want: false,
+ expectIsStale: false,
},
{
name: "old temp file",
- args: args{
- path: "/tmp/tmp_DELETEME",
- modTime: time.Now().Add(-8 * 24 * time.Hour),
- mode: 0o600,
- },
- want: true,
- },
- {
- name: "old_temp_file",
- args: args{
- path: "/tmp/tmp_DELETEME",
- modTime: time.Now().Add(-1 * time.Hour),
- mode: 0o000,
- },
- want: false,
- },
- {
- name: "inaccessible_recent_file",
- args: args{
- path: "/tmp/tmp_DELETEME",
- modTime: time.Now().Add(-1 * time.Hour),
- mode: 0o000,
+ dirEntry: mockDirEntry{
+ name: "tmp_DELETEME",
+ fi: mockFileInfo{
+ modTime: time.Now().Add(-8 * 24 * time.Hour),
+ },
},
- want: false,
+ expectIsStale: true,
},
- }
-
- for _, tc := range testcases {
+ } {
t.Run(tc.name, func(t *testing.T) {
- require.Equal(t, tc.want, isStaleTemporaryObject(tc.args.path, tc.args.modTime, tc.args.mode))
+ isStale, err := isStaleTemporaryObject(tc.dirEntry)
+ require.NoError(t, err)
+ require.Equal(t, tc.expectIsStale, isStale)
})
}
}