diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-11 14:21:07 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-03-21 12:16:56 +0300 |
commit | 0d588e4038d7539232cbae149bbba6d804d6e78e (patch) | |
tree | 5d914190e2c3183539122dec52fba8012caa07d0 | |
parent | 91f715960d12402539df049da134b4222e968fa4 (diff) |
git: Disable generation of server info in git-repack(1)
In order to be able to serve Git repositories via a dumb HTTP server,
Git needs to create a bunch of metadata that tells clients what they
need to fetch. This metadata is generated via git-update-server-info(1),
which is can be automatically called via both git-receive-pack(1) and
git-repack(1). While the former doesn't do so by default, the latter
does.
For us this is a waste of resources because we don't ever serve repos
via the dumb HTTP protocol. Generating this information thus wastes
both precious CPU cycles and disk space for data that is ultimately
never used by anything. The waste of disk space is even more pronounced
because git-repack(1) doesn't always clean up the temporary files it
uses to atomically update the final files. So when the command gets
killed, we may accumulate more and more temporary files. In extreme
cases we have seen in production, a repository whose on-disk size of
actual data was less than 5GB had accumulated about 35GB of these
temporary files.
Stop generating this information in git-repack(1) completely. Ideally,
we'd do so by injecting configuration into all repack commands, but
there is no such config option right now. Instead, we need to pass the
`-n` flag everywhere we execute git-repack(1).
Note that this doesn't stop generating the data in all places: commands
like git-gc(1) invoke git-repack(1), but we have no ability to tell it
to pass `-n` to git-repack(1). Neither is there a config option which
would allow us to globally disable the generation. The current approach
is thus only a best-effort one as a stop-gap solution while we're in the
process of upstreaming patches which introduce such a config option.
The cleanup logic for existing repositories is added in a later step in
this patch series.
Changelog: performance
-rw-r--r-- | internal/git/housekeeping/objects.go | 10 | ||||
-rw-r--r-- | internal/git/housekeeping/objects_test.go | 54 | ||||
-rw-r--r-- | internal/git/objectpool/fetch.go | 10 | ||||
-rw-r--r-- | internal/git/objectpool/fetch_test.go | 3 | ||||
-rw-r--r-- | internal/gitaly/service/objectpool/reduplicate.go | 11 | ||||
-rw-r--r-- | internal/gitaly/service/objectpool/reduplicate_test.go | 10 |
6 files changed, 92 insertions, 6 deletions
diff --git a/internal/git/housekeeping/objects.go b/internal/git/housekeeping/objects.go index afa52b291..7fbe33f06 100644 --- a/internal/git/housekeeping/objects.go +++ b/internal/git/housekeeping/objects.go @@ -43,8 +43,14 @@ func RepackObjects(ctx context.Context, repo *localrepo.Repo, cfg RepackObjectsC } if err := repo.ExecAndWait(ctx, git.SubCmd{ - Name: "repack", - Flags: append([]git.Option{git.Flag{Name: "-d"}}, options...), + Name: "repack", + Flags: append([]git.Option{ + git.Flag{Name: "-d"}, + // This can be removed as soon as we have upstreamed a + // `repack.updateServerInfo` config option. See gitlab-org/git#105 for more + // details. + git.Flag{Name: "-n"}, + }, options...), }, git.WithConfig(GetRepackGitConfig(ctx, cfg.WriteBitmap)...)); err != nil { return err } diff --git a/internal/git/housekeeping/objects_test.go b/internal/git/housekeeping/objects_test.go new file mode 100644 index 000000000..feaddcf2e --- /dev/null +++ b/internal/git/housekeeping/objects_test.go @@ -0,0 +1,54 @@ +package housekeeping + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/stats" + "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper" + "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testcfg" +) + +func requireObjectCount(t *testing.T, repoPath string, expectedObjects int64) { + t.Helper() + + objects, err := stats.UnpackedObjects(repoPath) + require.NoError(t, err) + require.Equal(t, expectedObjects, objects) +} + +func requirePackfileCount(t *testing.T, repoPath string, expectedPackfiles int) { + t.Helper() + + packfiles, err := stats.PackfilesCount(repoPath) + require.NoError(t, err) + require.Equal(t, expectedPackfiles, packfiles) +} + +func TestRepackObjects(t *testing.T) { + t.Parallel() + + ctx := testhelper.Context(t) + cfg := testcfg.Build(t) + + t.Run("no server info is written", func(t *testing.T) { + repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(), gittest.WithBranch("main")) + + requireObjectCount(t, repoPath, 1) + requirePackfileCount(t, repoPath, 0) + + require.NoError(t, RepackObjects(ctx, repo, RepackObjectsConfig{})) + + requireObjectCount(t, repoPath, 0) + requirePackfileCount(t, repoPath, 1) + + require.NoFileExists(t, filepath.Join(repoPath, "info", "refs")) + require.NoFileExists(t, filepath.Join(repoPath, "objects", "info", "packs")) + }) +} diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go index c04406d6b..f094cee73 100644 --- a/internal/git/objectpool/fetch.go +++ b/internal/git/objectpool/fetch.go @@ -147,8 +147,14 @@ func (o *ObjectPool) repackPool(ctx context.Context, pool repository.GitRepo) er } if err := o.Repo.ExecAndWait(ctx, git.SubCmd{ - Name: "repack", - Flags: []git.Option{git.Flag{Name: "-aidb"}}, + Name: "repack", + Flags: []git.Option{ + git.Flag{Name: "-aidb"}, + // This can be removed as soon as we have upstreamed a + // `repack.updateServerInfo` config option. See gitlab-org/git#105 for more + // details. + git.Flag{Name: "-n"}, + }, }, git.WithConfig(config...)); err != nil { return err } diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go index 860cf1195..299b92868 100644 --- a/internal/git/objectpool/fetch_test.go +++ b/internal/git/objectpool/fetch_test.go @@ -81,6 +81,9 @@ func TestFetchFromOriginDangling(t *testing.T) { for _, id := range []git.ObjectID{newBlob, newTree, newCommit, newTag} { require.Contains(t, refsAfterLines, fmt.Sprintf("refs/dangling/%s %s", id, id)) } + + require.NoFileExists(t, filepath.Join(pool.FullPath(), "info", "refs")) + require.NoFileExists(t, filepath.Join(pool.FullPath(), "objects", "info", "packs")) } func TestFetchFromOriginFsck(t *testing.T) { diff --git a/internal/gitaly/service/objectpool/reduplicate.go b/internal/gitaly/service/objectpool/reduplicate.go index f2705951f..d96c98dd2 100644 --- a/internal/gitaly/service/objectpool/reduplicate.go +++ b/internal/gitaly/service/objectpool/reduplicate.go @@ -15,8 +15,15 @@ func (s *server) ReduplicateRepository(ctx context.Context, req *gitalypb.Redupl } cmd, err := s.gitCmdFactory.New(ctx, req.GetRepository(), git.SubCmd{ - Name: "repack", - Flags: []git.Option{git.Flag{Name: "--quiet"}, git.Flag{Name: "-a"}}, + Name: "repack", + Flags: []git.Option{ + git.Flag{Name: "--quiet"}, + git.Flag{Name: "-a"}, + // This can be removed as soon as we have upstreamed a + // `repack.updateServerInfo` config option. See gitlab-org/git#105 for more + // details. + git.Flag{Name: "-n"}, + }, }) if err != nil { return nil, err diff --git a/internal/gitaly/service/objectpool/reduplicate_test.go b/internal/gitaly/service/objectpool/reduplicate_test.go index 76a7406d8..721fc6f26 100644 --- a/internal/gitaly/service/objectpool/reduplicate_test.go +++ b/internal/gitaly/service/objectpool/reduplicate_test.go @@ -2,6 +2,7 @@ package objectpool import ( "os" + "path/filepath" "testing" "github.com/stretchr/testify/require" @@ -24,6 +25,12 @@ func TestReduplicate(t *testing.T) { gittest.Exec(t, cfg, "-C", repoPath, "gc") + // git-gc(1) invokes git-repack(1), which by defaults generates these files. Manually remove + // them so that we can assert further down that repository reduplication doesn't regenerate + // those paths. + require.NoError(t, os.Remove(filepath.Join(repoPath, "info", "refs"))) + require.NoError(t, os.Remove(filepath.Join(repoPath, "objects", "info", "packs"))) + existingObjectID := "55bc176024cfa3baaceb71db584c7e5df900ea65" // Corrupt the repository to check if the object can't be found @@ -43,4 +50,7 @@ func TestReduplicate(t *testing.T) { require.NoError(t, os.RemoveAll(altPath)) gittest.Exec(t, cfg, "-C", repoPath, "cat-file", "-e", existingObjectID) + + require.NoFileExists(t, filepath.Join(repoPath, "info", "refs")) + require.NoFileExists(t, filepath.Join(repoPath, "objects", "info", "packs")) } |