Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-14 12:55:15 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-21 12:27:23 +0300
commit2f07489459488cafdfd2ba5d2f7def42a282b4a2 (patch)
tree894ee3b8e248de6eac9b35495a943cf7167e7351
parent0d588e4038d7539232cbae149bbba6d804d6e78e (diff)
housekeeping: Clean up server info data
Even though we don't need it, git-repack(1) has been generating server info via git-update-server-info(1) used for dumb HTTP protocols by default in the past. The result is that we have lots of those files on disk, which take away precious disk space without any merit. In some cases, we have even accumulated many of those files in a single repo because git-repack(1) didn't correctly clean those files if it got interrupted, with some accumulating many gigabytes of temporary server info files. We have stopped generating those files in the most important places in the preceding commit. This commit also introduces cleanup logic such that we automatically remove all server info to make the disk space available again. Changelog: performance
-rw-r--r--internal/git/housekeeping/clean_stale_data.go38
-rw-r--r--internal/git/housekeeping/clean_stale_data_test.go51
2 files changed, 89 insertions, 0 deletions
diff --git a/internal/git/housekeeping/clean_stale_data.go b/internal/git/housekeeping/clean_stale_data.go
index 2cea9164a..931a97553 100644
--- a/internal/git/housekeeping/clean_stale_data.go
+++ b/internal/git/housekeeping/clean_stale_data.go
@@ -59,6 +59,7 @@ func (m *RepositoryManager) CleanStaleData(ctx context.Context, repo *localrepo.
"reflocks": findStaleReferenceLocks,
"packedrefslock": findPackedRefsLock,
"packedrefsnew": findPackedRefsNew,
+ "serverinfo": findServerInfo,
} {
staleFiles, err := staleFileFinder(ctx, repoPath)
if err != nil {
@@ -364,6 +365,43 @@ func findPackedRefsNew(ctx context.Context, repoPath string) ([]string, error) {
return findStaleFiles(repoPath, packedRefsNewGracePeriod, "packed-refs.new")
}
+// findServerInfo returns files generated by git-update-server-info(1). These files are only
+// required to serve Git fetches via the dumb HTTP protocol, which we don't serve at all. It's thus
+// safe to remove all of those files without a grace period.
+func findServerInfo(ctx context.Context, repoPath string) ([]string, error) {
+ var serverInfoFiles []string
+
+ for directory, basename := range map[string]string{
+ filepath.Join(repoPath, "info"): "refs",
+ filepath.Join(repoPath, "objects", "info"): "packs",
+ } {
+ entries, err := os.ReadDir(directory)
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ continue
+ }
+
+ return nil, fmt.Errorf("reading info directory: %w", err)
+ }
+
+ for _, entry := range entries {
+ if !entry.Type().IsRegular() {
+ continue
+ }
+
+ // An exact match is the actual file we care about, while the latter pattern
+ // refers to the temporary files Git uses to write those files.
+ if entry.Name() != basename && !strings.HasPrefix(entry.Name(), basename+"_") {
+ continue
+ }
+
+ serverInfoFiles = append(serverInfoFiles, filepath.Join(directory, entry.Name()))
+ }
+ }
+
+ return serverInfoFiles, nil
+}
+
// FixDirectoryPermissions does a recursive directory walk to look for
// directories that cannot be accessed by the current user, and tries to
// fix those with chmod. The motivating problem is that directories with mode
diff --git a/internal/git/housekeeping/clean_stale_data_test.go b/internal/git/housekeeping/clean_stale_data_test.go
index 3586f35ba..e6a121c14 100644
--- a/internal/git/housekeeping/clean_stale_data_test.go
+++ b/internal/git/housekeeping/clean_stale_data_test.go
@@ -139,6 +139,7 @@ type cleanStaleDataMetrics struct {
refsEmptyDir int
packedRefsLock int
packedRefsNew int
+ serverInfo int
}
func requireCleanStaleDataMetrics(t *testing.T, m *RepositoryManager, metrics cleanStaleDataMetrics) {
@@ -159,6 +160,7 @@ func requireCleanStaleDataMetrics(t *testing.T, m *RepositoryManager, metrics cl
"packedrefslock": metrics.packedRefsLock,
"packedrefsnew": metrics.packedRefsNew,
"refsemptydir": metrics.refsEmptyDir,
+ "serverinfo": metrics.serverInfo,
} {
_, err := builder.WriteString(fmt.Sprintf("gitaly_housekeeping_pruned_files_total{filetype=%q} %d\n", metric, expectedValue))
require.NoError(t, err)
@@ -618,6 +620,55 @@ func TestRepositoryManager_CleanStaleData_withSpecificFile(t *testing.T) {
}
}
+func TestRepositoryManager_CleanStaleData_serverInfo(t *testing.T) {
+ ctx := testhelper.Context(t)
+
+ cfg, repoProto, repoPath := testcfg.BuildWithRepo(t)
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ entries := []entry{
+ d("info", 0o755, 0, Keep, []entry{
+ f("ref", 0, 0o644, Keep),
+ f("refs", 0, 0o644, Delete),
+ f("refsx", 0, 0o644, Keep),
+ f("refs_123456", 0, 0o644, Delete),
+ }),
+ d("objects", 0o755, 0, Keep, []entry{
+ d("info", 0o755, 0, Keep, []entry{
+ f("pack", 0, 0o644, Keep),
+ f("packs", 0, 0o644, Delete),
+ f("packsx", 0, 0o644, Keep),
+ f("packs_123456", 0, 0o644, Delete),
+ }),
+ }),
+ }
+
+ for _, entry := range entries {
+ entry.create(t, repoPath)
+ }
+
+ staleFiles, err := findServerInfo(ctx, repoPath)
+ require.NoError(t, err)
+ require.ElementsMatch(t, []string{
+ filepath.Join(repoPath, "info/refs"),
+ filepath.Join(repoPath, "info/refs_123456"),
+ filepath.Join(repoPath, "objects/info/packs"),
+ filepath.Join(repoPath, "objects/info/packs_123456"),
+ }, staleFiles)
+
+ mgr := NewManager(cfg.Prometheus, nil)
+
+ require.NoError(t, mgr.CleanStaleData(ctx, repo))
+
+ for _, entry := range entries {
+ entry.validate(t, repoPath)
+ }
+
+ requireCleanStaleDataMetrics(t, mgr, cleanStaleDataMetrics{
+ serverInfo: 4,
+ })
+}
+
func TestRepositoryManager_CleanStaleData_referenceLocks(t *testing.T) {
ctx := testhelper.Context(t)