Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-14 12:55:15 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-18 15:47:12 +0300
commitf89e4b0220003a4112584e85ee7438ba7978f7e3 (patch)
treee38f6b525d9b7ef384ba2fb275c4c9f248f54433
parentf5154e0141f634218e844d9565fbeb24999c378a (diff)
housekeeping: Clean up server info datapks-git-prune-server-info
Even though we don't need it, git-repack(1) has been generating server info via git-update-server-info(1) used for dumb HTTP protocols by default in the past. The result is that we have lots of those files on disk, which take away precious disk space without any merit. In some cases, we have even accumulated many of those files in a single repo because git-repack(1) didn't correctly clean those files if it got interrupted, with some accumulating many gigabytes of temporary server info files. We have stopped generating those files in the most important places in the preceding commit. This commit also introduces cleanup logic such that we automatically remove all server info to make the disk space available again. Changelog: performance
-rw-r--r--internal/git/housekeeping/clean_stale_data.go38
-rw-r--r--internal/git/housekeeping/clean_stale_data_test.go51
2 files changed, 89 insertions, 0 deletions
diff --git a/internal/git/housekeeping/clean_stale_data.go b/internal/git/housekeeping/clean_stale_data.go
index 2cea9164a..931a97553 100644
--- a/internal/git/housekeeping/clean_stale_data.go
+++ b/internal/git/housekeeping/clean_stale_data.go
@@ -59,6 +59,7 @@ func (m *RepositoryManager) CleanStaleData(ctx context.Context, repo *localrepo.
"reflocks": findStaleReferenceLocks,
"packedrefslock": findPackedRefsLock,
"packedrefsnew": findPackedRefsNew,
+ "serverinfo": findServerInfo,
} {
staleFiles, err := staleFileFinder(ctx, repoPath)
if err != nil {
@@ -364,6 +365,43 @@ func findPackedRefsNew(ctx context.Context, repoPath string) ([]string, error) {
return findStaleFiles(repoPath, packedRefsNewGracePeriod, "packed-refs.new")
}
+// findServerInfo returns files generated by git-update-server-info(1). These files are only
+// required to serve Git fetches via the dumb HTTP protocol, which we don't serve at all. It's thus
+// safe to remove all of those files without a grace period.
+func findServerInfo(ctx context.Context, repoPath string) ([]string, error) {
+ var serverInfoFiles []string
+
+ for directory, basename := range map[string]string{
+ filepath.Join(repoPath, "info"): "refs",
+ filepath.Join(repoPath, "objects", "info"): "packs",
+ } {
+ entries, err := os.ReadDir(directory)
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ continue
+ }
+
+ return nil, fmt.Errorf("reading info directory: %w", err)
+ }
+
+ for _, entry := range entries {
+ if !entry.Type().IsRegular() {
+ continue
+ }
+
+ // An exact match is the actual file we care about, while the latter pattern
+ // refers to the temporary files Git uses to write those files.
+ if entry.Name() != basename && !strings.HasPrefix(entry.Name(), basename+"_") {
+ continue
+ }
+
+ serverInfoFiles = append(serverInfoFiles, filepath.Join(directory, entry.Name()))
+ }
+ }
+
+ return serverInfoFiles, nil
+}
+
// FixDirectoryPermissions does a recursive directory walk to look for
// directories that cannot be accessed by the current user, and tries to
// fix those with chmod. The motivating problem is that directories with mode
diff --git a/internal/git/housekeeping/clean_stale_data_test.go b/internal/git/housekeeping/clean_stale_data_test.go
index 7d9796417..75ba47119 100644
--- a/internal/git/housekeeping/clean_stale_data_test.go
+++ b/internal/git/housekeeping/clean_stale_data_test.go
@@ -139,6 +139,7 @@ type cleanStaleDataMetrics struct {
refsEmptyDir int
packedRefsLock int
packedRefsNew int
+ serverInfo int
}
func requireCleanStaleDataMetrics(t *testing.T, m *RepositoryManager, metrics cleanStaleDataMetrics) {
@@ -159,6 +160,7 @@ func requireCleanStaleDataMetrics(t *testing.T, m *RepositoryManager, metrics cl
"packedrefslock": metrics.packedRefsLock,
"packedrefsnew": metrics.packedRefsNew,
"refsemptydir": metrics.refsEmptyDir,
+ "serverinfo": metrics.serverInfo,
} {
_, err := builder.WriteString(fmt.Sprintf("gitaly_housekeeping_pruned_files_total{filetype=%q} %d\n", metric, expectedValue))
require.NoError(t, err)
@@ -618,6 +620,55 @@ func TestRepositoryManager_CleanStaleData_withSpecificFile(t *testing.T) {
}
}
+func TestRepositoryManager_CleanStaleData_serverInfo(t *testing.T) {
+ ctx := testhelper.Context(t)
+
+ cfg, repoProto, repoPath := testcfg.BuildWithRepo(t)
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ entries := []entry{
+ d("info", 0o755, 0, Keep, []entry{
+ f("ref", 0, 0o644, Keep),
+ f("refs", 0, 0o644, Delete),
+ f("refsx", 0, 0o644, Keep),
+ f("refs_123456", 0, 0o644, Delete),
+ }),
+ d("objects", 0o755, 0, Keep, []entry{
+ d("info", 0o755, 0, Keep, []entry{
+ f("pack", 0, 0o644, Keep),
+ f("packs", 0, 0o644, Delete),
+ f("packsx", 0, 0o644, Keep),
+ f("packs_123456", 0, 0o644, Delete),
+ }),
+ }),
+ }
+
+ for _, entry := range entries {
+ entry.create(t, repoPath)
+ }
+
+ staleFiles, err := findServerInfo(ctx, repoPath)
+ require.NoError(t, err)
+ require.ElementsMatch(t, []string{
+ filepath.Join(repoPath, "info/refs"),
+ filepath.Join(repoPath, "info/refs_123456"),
+ filepath.Join(repoPath, "objects/info/packs"),
+ filepath.Join(repoPath, "objects/info/packs_123456"),
+ }, staleFiles)
+
+ mgr := NewManager(nil)
+
+ require.NoError(t, mgr.CleanStaleData(ctx, repo))
+
+ for _, entry := range entries {
+ entry.validate(t, repoPath)
+ }
+
+ requireCleanStaleDataMetrics(t, mgr, cleanStaleDataMetrics{
+ serverInfo: 4,
+ })
+}
+
func TestRepositoryManager_CleanStaleData_referenceLocks(t *testing.T) {
ctx := testhelper.Context(t)