Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-11 14:21:07 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-03-21 12:16:56 +0300
commit0d588e4038d7539232cbae149bbba6d804d6e78e (patch)
tree5d914190e2c3183539122dec52fba8012caa07d0
parent91f715960d12402539df049da134b4222e968fa4 (diff)
git: Disable generation of server info in git-repack(1)
In order to be able to serve Git repositories via a dumb HTTP server, Git needs to create a bunch of metadata that tells clients what they need to fetch. This metadata is generated via git-update-server-info(1), which is can be automatically called via both git-receive-pack(1) and git-repack(1). While the former doesn't do so by default, the latter does. For us this is a waste of resources because we don't ever serve repos via the dumb HTTP protocol. Generating this information thus wastes both precious CPU cycles and disk space for data that is ultimately never used by anything. The waste of disk space is even more pronounced because git-repack(1) doesn't always clean up the temporary files it uses to atomically update the final files. So when the command gets killed, we may accumulate more and more temporary files. In extreme cases we have seen in production, a repository whose on-disk size of actual data was less than 5GB had accumulated about 35GB of these temporary files. Stop generating this information in git-repack(1) completely. Ideally, we'd do so by injecting configuration into all repack commands, but there is no such config option right now. Instead, we need to pass the `-n` flag everywhere we execute git-repack(1). Note that this doesn't stop generating the data in all places: commands like git-gc(1) invoke git-repack(1), but we have no ability to tell it to pass `-n` to git-repack(1). Neither is there a config option which would allow us to globally disable the generation. The current approach is thus only a best-effort one as a stop-gap solution while we're in the process of upstreaming patches which introduce such a config option. The cleanup logic for existing repositories is added in a later step in this patch series. Changelog: performance
-rw-r--r--internal/git/housekeeping/objects.go10
-rw-r--r--internal/git/housekeeping/objects_test.go54
-rw-r--r--internal/git/objectpool/fetch.go10
-rw-r--r--internal/git/objectpool/fetch_test.go3
-rw-r--r--internal/gitaly/service/objectpool/reduplicate.go11
-rw-r--r--internal/gitaly/service/objectpool/reduplicate_test.go10
6 files changed, 92 insertions, 6 deletions
diff --git a/internal/git/housekeeping/objects.go b/internal/git/housekeeping/objects.go
index afa52b291..7fbe33f06 100644
--- a/internal/git/housekeeping/objects.go
+++ b/internal/git/housekeeping/objects.go
@@ -43,8 +43,14 @@ func RepackObjects(ctx context.Context, repo *localrepo.Repo, cfg RepackObjectsC
}
if err := repo.ExecAndWait(ctx, git.SubCmd{
- Name: "repack",
- Flags: append([]git.Option{git.Flag{Name: "-d"}}, options...),
+ Name: "repack",
+ Flags: append([]git.Option{
+ git.Flag{Name: "-d"},
+ // This can be removed as soon as we have upstreamed a
+ // `repack.updateServerInfo` config option. See gitlab-org/git#105 for more
+ // details.
+ git.Flag{Name: "-n"},
+ }, options...),
}, git.WithConfig(GetRepackGitConfig(ctx, cfg.WriteBitmap)...)); err != nil {
return err
}
diff --git a/internal/git/housekeeping/objects_test.go b/internal/git/housekeeping/objects_test.go
new file mode 100644
index 000000000..feaddcf2e
--- /dev/null
+++ b/internal/git/housekeeping/objects_test.go
@@ -0,0 +1,54 @@
+package housekeeping
+
+import (
+ "path/filepath"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/stats"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testcfg"
+)
+
+func requireObjectCount(t *testing.T, repoPath string, expectedObjects int64) {
+ t.Helper()
+
+ objects, err := stats.UnpackedObjects(repoPath)
+ require.NoError(t, err)
+ require.Equal(t, expectedObjects, objects)
+}
+
+func requirePackfileCount(t *testing.T, repoPath string, expectedPackfiles int) {
+ t.Helper()
+
+ packfiles, err := stats.PackfilesCount(repoPath)
+ require.NoError(t, err)
+ require.Equal(t, expectedPackfiles, packfiles)
+}
+
+func TestRepackObjects(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+ cfg := testcfg.Build(t)
+
+ t.Run("no server info is written", func(t *testing.T) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(), gittest.WithBranch("main"))
+
+ requireObjectCount(t, repoPath, 1)
+ requirePackfileCount(t, repoPath, 0)
+
+ require.NoError(t, RepackObjects(ctx, repo, RepackObjectsConfig{}))
+
+ requireObjectCount(t, repoPath, 0)
+ requirePackfileCount(t, repoPath, 1)
+
+ require.NoFileExists(t, filepath.Join(repoPath, "info", "refs"))
+ require.NoFileExists(t, filepath.Join(repoPath, "objects", "info", "packs"))
+ })
+}
diff --git a/internal/git/objectpool/fetch.go b/internal/git/objectpool/fetch.go
index c04406d6b..f094cee73 100644
--- a/internal/git/objectpool/fetch.go
+++ b/internal/git/objectpool/fetch.go
@@ -147,8 +147,14 @@ func (o *ObjectPool) repackPool(ctx context.Context, pool repository.GitRepo) er
}
if err := o.Repo.ExecAndWait(ctx, git.SubCmd{
- Name: "repack",
- Flags: []git.Option{git.Flag{Name: "-aidb"}},
+ Name: "repack",
+ Flags: []git.Option{
+ git.Flag{Name: "-aidb"},
+ // This can be removed as soon as we have upstreamed a
+ // `repack.updateServerInfo` config option. See gitlab-org/git#105 for more
+ // details.
+ git.Flag{Name: "-n"},
+ },
}, git.WithConfig(config...)); err != nil {
return err
}
diff --git a/internal/git/objectpool/fetch_test.go b/internal/git/objectpool/fetch_test.go
index 860cf1195..299b92868 100644
--- a/internal/git/objectpool/fetch_test.go
+++ b/internal/git/objectpool/fetch_test.go
@@ -81,6 +81,9 @@ func TestFetchFromOriginDangling(t *testing.T) {
for _, id := range []git.ObjectID{newBlob, newTree, newCommit, newTag} {
require.Contains(t, refsAfterLines, fmt.Sprintf("refs/dangling/%s %s", id, id))
}
+
+ require.NoFileExists(t, filepath.Join(pool.FullPath(), "info", "refs"))
+ require.NoFileExists(t, filepath.Join(pool.FullPath(), "objects", "info", "packs"))
}
func TestFetchFromOriginFsck(t *testing.T) {
diff --git a/internal/gitaly/service/objectpool/reduplicate.go b/internal/gitaly/service/objectpool/reduplicate.go
index f2705951f..d96c98dd2 100644
--- a/internal/gitaly/service/objectpool/reduplicate.go
+++ b/internal/gitaly/service/objectpool/reduplicate.go
@@ -15,8 +15,15 @@ func (s *server) ReduplicateRepository(ctx context.Context, req *gitalypb.Redupl
}
cmd, err := s.gitCmdFactory.New(ctx, req.GetRepository(), git.SubCmd{
- Name: "repack",
- Flags: []git.Option{git.Flag{Name: "--quiet"}, git.Flag{Name: "-a"}},
+ Name: "repack",
+ Flags: []git.Option{
+ git.Flag{Name: "--quiet"},
+ git.Flag{Name: "-a"},
+ // This can be removed as soon as we have upstreamed a
+ // `repack.updateServerInfo` config option. See gitlab-org/git#105 for more
+ // details.
+ git.Flag{Name: "-n"},
+ },
})
if err != nil {
return nil, err
diff --git a/internal/gitaly/service/objectpool/reduplicate_test.go b/internal/gitaly/service/objectpool/reduplicate_test.go
index 76a7406d8..721fc6f26 100644
--- a/internal/gitaly/service/objectpool/reduplicate_test.go
+++ b/internal/gitaly/service/objectpool/reduplicate_test.go
@@ -2,6 +2,7 @@ package objectpool
import (
"os"
+ "path/filepath"
"testing"
"github.com/stretchr/testify/require"
@@ -24,6 +25,12 @@ func TestReduplicate(t *testing.T) {
gittest.Exec(t, cfg, "-C", repoPath, "gc")
+ // git-gc(1) invokes git-repack(1), which by defaults generates these files. Manually remove
+ // them so that we can assert further down that repository reduplication doesn't regenerate
+ // those paths.
+ require.NoError(t, os.Remove(filepath.Join(repoPath, "info", "refs")))
+ require.NoError(t, os.Remove(filepath.Join(repoPath, "objects", "info", "packs")))
+
existingObjectID := "55bc176024cfa3baaceb71db584c7e5df900ea65"
// Corrupt the repository to check if the object can't be found
@@ -43,4 +50,7 @@ func TestReduplicate(t *testing.T) {
require.NoError(t, os.RemoveAll(altPath))
gittest.Exec(t, cfg, "-C", repoPath, "cat-file", "-e", existingObjectID)
+
+ require.NoFileExists(t, filepath.Join(repoPath, "info", "refs"))
+ require.NoFileExists(t, filepath.Join(repoPath, "objects", "info", "packs"))
}