diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-21 13:29:45 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-21 13:29:45 +0300 |
commit | da590aad2d93420747e4e88d60ec9f9a12f7b734 (patch) | |
tree | 894ee3b8e248de6eac9b35495a943cf7167e7351 | |
parent | dca634281da8ef08f51eaa0019c6ddac7a531d73 (diff) | |
parent | 2f07489459488cafdfd2ba5d2f7def42a282b4a2 (diff) |
Merge branch 'pks-git-prune-server-info' into 'master'
git: Disable generation of server info in git-repack(1) and prune existing files
Closes #4027
See merge request gitlab-org/gitaly!4405
-rw-r--r-- | internal/git/housekeeping/clean_stale_data.go | 38 | ||||
-rw-r--r-- | internal/git/housekeeping/clean_stale_data_test.go | 82 | ||||
-rw-r--r-- | internal/git/housekeeping/objects.go | 10 | ||||
-rw-r--r-- | internal/git/housekeeping/objects_test.go | 54 | ||||
-rw-r--r-- | internal/git/objectpool/fetch.go | 10 | ||||
-rw-r--r-- | internal/git/objectpool/fetch_test.go | 3 | ||||
-rw-r--r-- | internal/gitaly/service/objectpool/reduplicate.go | 11 | ||||
-rw-r--r-- | internal/gitaly/service/objectpool/reduplicate_test.go | 10 |
8 files changed, 203 insertions, 15 deletions
diff --git a/internal/git/housekeeping/clean_stale_data.go b/internal/git/housekeeping/clean_stale_data.go index 2cea9164a..931a97553 100644 --- a/internal/git/housekeeping/clean_stale_data.go +++ b/internal/git/housekeeping/clean_stale_data.go @@ -59,6 +59,7 @@ func (m *RepositoryManager) CleanStaleData(ctx context.Context, repo *localrepo. "reflocks": findStaleReferenceLocks, "packedrefslock": findPackedRefsLock, "packedrefsnew": findPackedRefsNew, + "serverinfo": findServerInfo, } { staleFiles, err := staleFileFinder(ctx, repoPath) if err != nil { @@ -364,6 +365,43 @@ func findPackedRefsNew(ctx context.Context, repoPath string) ([]string, error) { return findStaleFiles(repoPath, packedRefsNewGracePeriod, "packed-refs.new") } +// findServerInfo returns files generated by git-update-server-info(1). These files are only +// required to serve Git fetches via the dumb HTTP protocol, which we don't serve at all. It's thus +// safe to remove all of those files without a grace period. +func findServerInfo(ctx context.Context, repoPath string) ([]string, error) { + var serverInfoFiles []string + + for directory, basename := range map[string]string{ + filepath.Join(repoPath, "info"): "refs", + filepath.Join(repoPath, "objects", "info"): "packs", + } { + entries, err := os.ReadDir(directory) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + continue + } + + return nil, fmt.Errorf("reading info directory: %w", err) + } + + for _, entry := range entries { + if !entry.Type().IsRegular() { + continue + } + + // An exact match is the actual file we care about, while the latter pattern + // refers to the temporary files Git uses to write those files. + if entry.Name() != basename && !strings.HasPrefix(entry.Name(), basename+"_") { + continue + } + + serverInfoFiles = append(serverInfoFiles, filepath.Join(directory, entry.Name())) + } + } + + return serverInfoFiles, nil +} + // FixDirectoryPermissions does a recursive directory walk to look for // directories that cannot be accessed by the current user, and tries to // fix those with chmod. The motivating problem is that directories with mode diff --git a/internal/git/housekeeping/clean_stale_data_test.go b/internal/git/housekeeping/clean_stale_data_test.go index 0811e5805..e6a121c14 100644 --- a/internal/git/housekeeping/clean_stale_data_test.go +++ b/internal/git/housekeeping/clean_stale_data_test.go @@ -44,6 +44,8 @@ type fileEntry struct { } func (f *fileEntry) create(t *testing.T, parent string) { + t.Helper() + filename := filepath.Join(parent, f.name) ff, err := os.OpenFile(filename, os.O_RDONLY|os.O_CREATE, 0o700) assert.NoError(t, err, "file creation failed: %v", filename) @@ -55,22 +57,29 @@ func (f *fileEntry) create(t *testing.T, parent string) { } func (f *fileEntry) validate(t *testing.T, parent string) { + t.Helper() + filename := filepath.Join(parent, f.name) f.checkExistence(t, filename) } func (f *fileEntry) chmod(t *testing.T, filename string) { + t.Helper() + err := os.Chmod(filename, f.mode) assert.NoError(t, err, "chmod failed") } func (f *fileEntry) chtimes(t *testing.T, filename string) { + t.Helper() + filetime := time.Now().Add(-f.age) err := os.Chtimes(filename, filetime, filetime) assert.NoError(t, err, "chtimes failed") } func (f *fileEntry) checkExistence(t *testing.T, filename string) { + t.Helper() _, err := os.Stat(filename) if err == nil && f.finalState == Delete { t.Errorf("Expected %v to have been deleted.", filename) @@ -86,6 +95,8 @@ type dirEntry struct { } func (d *dirEntry) create(t *testing.T, parent string) { + t.Helper() + dirname := filepath.Join(parent, d.name) if err := os.Mkdir(dirname, 0o700); err != nil { @@ -102,6 +113,8 @@ func (d *dirEntry) create(t *testing.T, parent string) { } func (d *dirEntry) validate(t *testing.T, parent string) { + t.Helper() + dirname := filepath.Join(parent, d.name) d.checkExistence(t, dirname) @@ -126,6 +139,7 @@ type cleanStaleDataMetrics struct { refsEmptyDir int packedRefsLock int packedRefsNew int + serverInfo int } func requireCleanStaleDataMetrics(t *testing.T, m *RepositoryManager, metrics cleanStaleDataMetrics) { @@ -146,6 +160,7 @@ func requireCleanStaleDataMetrics(t *testing.T, m *RepositoryManager, metrics cl "packedrefslock": metrics.packedRefsLock, "packedrefsnew": metrics.packedRefsNew, "refsemptydir": metrics.refsEmptyDir, + "serverinfo": metrics.serverInfo, } { _, err := builder.WriteString(fmt.Sprintf("gitaly_housekeeping_pruned_files_total{filetype=%q} %d\n", metric, expectedValue)) require.NoError(t, err) @@ -154,7 +169,7 @@ func requireCleanStaleDataMetrics(t *testing.T, m *RepositoryManager, metrics cl require.NoError(t, testutil.CollectAndCompare(m, strings.NewReader(builder.String()), "gitaly_housekeeping_pruned_files_total")) } -func TestPerform(t *testing.T) { +func TestRepositoryManager_CleanStaleData(t *testing.T) { testcases := []struct { name string entries []entry @@ -278,7 +293,7 @@ func TestPerform(t *testing.T) { } } -func TestPerform_references(t *testing.T) { +func TestRepositoryManager_CleanStaleData_references(t *testing.T) { type ref struct { name string age time.Duration @@ -382,7 +397,7 @@ func TestPerform_references(t *testing.T) { } } -func TestPerform_emptyRefDirs(t *testing.T) { +func TestRepositoryManager_CleanStaleData_emptyRefDirs(t *testing.T) { testcases := []struct { name string entries []entry @@ -492,7 +507,7 @@ func TestPerform_emptyRefDirs(t *testing.T) { } } -func TestPerform_withSpecificFile(t *testing.T) { +func TestRepositoryManager_CleanStaleData_withSpecificFile(t *testing.T) { t.Parallel() for _, tc := range []struct { @@ -605,7 +620,56 @@ func TestPerform_withSpecificFile(t *testing.T) { } } -func TestPerform_referenceLocks(t *testing.T) { +func TestRepositoryManager_CleanStaleData_serverInfo(t *testing.T) { + ctx := testhelper.Context(t) + + cfg, repoProto, repoPath := testcfg.BuildWithRepo(t) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + entries := []entry{ + d("info", 0o755, 0, Keep, []entry{ + f("ref", 0, 0o644, Keep), + f("refs", 0, 0o644, Delete), + f("refsx", 0, 0o644, Keep), + f("refs_123456", 0, 0o644, Delete), + }), + d("objects", 0o755, 0, Keep, []entry{ + d("info", 0o755, 0, Keep, []entry{ + f("pack", 0, 0o644, Keep), + f("packs", 0, 0o644, Delete), + f("packsx", 0, 0o644, Keep), + f("packs_123456", 0, 0o644, Delete), + }), + }), + } + + for _, entry := range entries { + entry.create(t, repoPath) + } + + staleFiles, err := findServerInfo(ctx, repoPath) + require.NoError(t, err) + require.ElementsMatch(t, []string{ + filepath.Join(repoPath, "info/refs"), + filepath.Join(repoPath, "info/refs_123456"), + filepath.Join(repoPath, "objects/info/packs"), + filepath.Join(repoPath, "objects/info/packs_123456"), + }, staleFiles) + + mgr := NewManager(cfg.Prometheus, nil) + + require.NoError(t, mgr.CleanStaleData(ctx, repo)) + + for _, entry := range entries { + entry.validate(t, repoPath) + } + + requireCleanStaleDataMetrics(t, mgr, cleanStaleDataMetrics{ + serverInfo: 4, + }) +} + +func TestRepositoryManager_CleanStaleData_referenceLocks(t *testing.T) { ctx := testhelper.Context(t) for _, tc := range []struct { @@ -773,7 +837,7 @@ func TestShouldRemoveTemporaryObject(t *testing.T) { } } -func TestPerformRepoDoesNotExist(t *testing.T) { +func TestRepositoryManager_CleanStaleData_missingRepo(t *testing.T) { cfg, repoProto, repoPath := testcfg.BuildWithRepo(t) repo := localrepo.NewTestRepo(t, cfg, repoProto) ctx := testhelper.Context(t) @@ -783,7 +847,7 @@ func TestPerformRepoDoesNotExist(t *testing.T) { require.NoError(t, NewManager(cfg.Prometheus, nil).CleanStaleData(ctx, repo)) } -func TestPerform_UnsetConfiguration(t *testing.T) { +func TestRepositoryManager_CleanStaleData_unsetConfiguration(t *testing.T) { cfg := testcfg.Build(t) repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) repo := localrepo.NewTestRepo(t, cfg, repoProto) @@ -831,7 +895,7 @@ func TestPerform_UnsetConfiguration(t *testing.T) { `, string(testhelper.MustReadFile(t, configPath))) } -func TestPerform_UnsetConfiguration_transactional(t *testing.T) { +func TestRepositoryManager_CleanStaleData_unsetConfigurationTransactional(t *testing.T) { t.Parallel() ctx := testhelper.Context(t) @@ -862,7 +926,7 @@ func TestPerform_UnsetConfiguration_transactional(t *testing.T) { require.Equal(t, expectedConfig, string(configKeys)) } -func TestPerform_cleanupConfig(t *testing.T) { +func TestRepositoryManager_CleanStaleData_pruneEmptyConfigSections(t *testing.T) { t.Parallel() ctx := testhelper.Context(t) diff --git a/internal/git/housekeeping/objects.go b/internal/git/housekeeping/objects.go index afa52b291..7fbe33f06 100644 --- a/internal/git/housekeeping/objects.go +++ b/internal/git/housekeeping/objects.go @@ -43,8 +43,14 @@ func RepackObjects(ctx context.Context, repo *localrepo.Repo, cfg RepackObjectsC } if err := repo.ExecAndWait(ctx, git.SubCmd{ - Name: "repack", - Flags: append([]git.Option{git.Flag{Name: "-d"}}, options...), + Name: "repack", + Flags: append([]git.Option{ + git.Flag{Name: "-d"}, + // This can be removed as soon as we have upstreamed a + // `repack.updateServerInfo` config option. See gitlab-org/git#105 for more + // details. + git.Flag{Name: "-n"}, + }, options...), }, git.WithConfig(GetRepackGitConfig(ctx, cfg.WriteBitmap)...)); err != nil { return err } diff --git a/internal/git/housekeeping/objects_test.go b/internal/git/housekeeping/objects_test.go new file mode 100644 index 000000000..feaddcf2e --- /dev/null +++ b/internal/git/housekeeping/objects_test.go @@ -0,0 +1,54 @@ +package housekeeping + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/stats" + "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper" + "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testcfg" +) + +func requireObjectCount(t *testing.T, repoPath string, expectedObjects int64) { + t.Helper() + + objects, err := stats.UnpackedObjects(repoPath) + require.NoError(t, err) + require.Equal(t, expectedObjects, objects) +} + +func requirePackfileCount(t *testing.T, repoPath string, expectedPackfiles int) { + t.Helper() + + packfiles, err := stats.PackfilesCount(repoPath) + require.NoError(t, err) + require.Equal(t, expectedPackfiles, packfiles) +} + +func TestRepackObjects(t *testing.T) { + t.Parallel() + + ctx := testhelper.Context(t) + cfg := testcfg.Build(t) + + t.Run("no server info is written", func(t *testing.T) { + repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(), gittest.WithBranch("main")) + + requireObjectCount(t, repoPath, 1) + requirePackfileCount(t, repoPath, 0) + + require.NoError(t, RepackObjects(ctx, repo, RepackObjectsConfig{})) + + requireObjectCount(t, repoPath, 0) + requirePackfileCount(t, repoPath, 1) + + require.NoFileExists(t, filepath.Join(repoPath, "info", "refs")) + require.NoFileExists(t, filepath.Join(repoPath, "objects", "info", "packs")) + }) +} diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go index c04406d6b..f094cee73 100644 --- a/internal/git/objectpool/fetch.go +++ b/internal/git/objectpool/fetch.go @@ -147,8 +147,14 @@ func (o *ObjectPool) repackPool(ctx context.Context, pool repository.GitRepo) er } if err := o.Repo.ExecAndWait(ctx, git.SubCmd{ - Name: "repack", - Flags: []git.Option{git.Flag{Name: "-aidb"}}, + Name: "repack", + Flags: []git.Option{ + git.Flag{Name: "-aidb"}, + // This can be removed as soon as we have upstreamed a + // `repack.updateServerInfo` config option. See gitlab-org/git#105 for more + // details. + git.Flag{Name: "-n"}, + }, }, git.WithConfig(config...)); err != nil { return err } diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index 860cf1195..299b92868 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -81,6 +81,9 @@ func TestFetchFromOriginDangling(t *testing.T) { for _, id := range []git.ObjectID{newBlob, newTree, newCommit, newTag} { require.Contains(t, refsAfterLines, fmt.Sprintf("refs/dangling/%s %s", id, id)) } + + require.NoFileExists(t, filepath.Join(pool.FullPath(), "info", "refs")) + require.NoFileExists(t, filepath.Join(pool.FullPath(), "objects", "info", "packs")) } func TestFetchFromOriginFsck(t *testing.T) { diff --git a/internal/gitaly/service/objectpool/reduplicate.go b/internal/gitaly/service/objectpool/reduplicate.go index f2705951f..d96c98dd2 100644 --- a/internal/gitaly/service/objectpool/reduplicate.go +++ b/internal/gitaly/service/objectpool/reduplicate.go @@ -15,8 +15,15 @@ func (s *server) ReduplicateRepository(ctx context.Context, req *gitalypb.Redupl } cmd, err := s.gitCmdFactory.New(ctx, req.GetRepository(), git.SubCmd{ - Name: "repack", - Flags: []git.Option{git.Flag{Name: "--quiet"}, git.Flag{Name: "-a"}}, + Name: "repack", + Flags: []git.Option{ + git.Flag{Name: "--quiet"}, + git.Flag{Name: "-a"}, + // This can be removed as soon as we have upstreamed a + // `repack.updateServerInfo` config option. See gitlab-org/git#105 for more + // details. + git.Flag{Name: "-n"}, + }, }) if err != nil { return nil, err diff --git a/internal/gitaly/service/objectpool/reduplicate_test.go b/internal/gitaly/service/objectpool/reduplicate_test.go index 76a7406d8..721fc6f26 100644 --- a/internal/gitaly/service/objectpool/reduplicate_test.go +++ b/internal/gitaly/service/objectpool/reduplicate_test.go @@ -2,6 +2,7 @@ package objectpool import ( "os" + "path/filepath" "testing" "github.com/stretchr/testify/require" @@ -24,6 +25,12 @@ func TestReduplicate(t *testing.T) { gittest.Exec(t, cfg, "-C", repoPath, "gc") + // git-gc(1) invokes git-repack(1), which by defaults generates these files. Manually remove + // them so that we can assert further down that repository reduplication doesn't regenerate + // those paths. + require.NoError(t, os.Remove(filepath.Join(repoPath, "info", "refs"))) + require.NoError(t, os.Remove(filepath.Join(repoPath, "objects", "info", "packs"))) + existingObjectID := "55bc176024cfa3baaceb71db584c7e5df900ea65" // Corrupt the repository to check if the object can't be found @@ -43,4 +50,7 @@ func TestReduplicate(t *testing.T) { require.NoError(t, os.RemoveAll(altPath)) gittest.Exec(t, cfg, "-C", repoPath, "cat-file", "-e", existingObjectID) + + require.NoFileExists(t, filepath.Join(repoPath, "info", "refs")) + require.NoFileExists(t, filepath.Join(repoPath, "objects", "info", "packs")) } |