Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-07-08 11:47:22 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-07-15 16:15:09 +0300
commitaba6f57d36e45034fd2c516b964a9ff4bc3528d7 (patch)
tree85896ba94bb0379d01f72bf2dce9bebd5e8e1d47
parent219ce8a325bb722e13ab70539dc1bc70a44c37e6 (diff)
housekeeping: Improve heuristics for writing commit-graphs
Now that writing commit-graphs has been disentangled from repacking objects we can iterate on the heuristics we use to determine whether we need to write commit-graphs: - We stop writing commit-graphs in case there are no references. This wouldn't make any sense anyway given that we use only write commit-graphs for reachable commits, of which there aren't any in case there are no references. - We stop repacking objects based on whether the repository has bloom filters or not. This logic was previously mixed into the heuristics for `RepackObjects()`, but that has only be a historic artifact because that function handled both repacks and rewrites of commit-graphs. Instead, we only rewrite commit-graphs based on that information. Ultimately, the end result should be that we repack objects less often, but keep rewriting commit-graphs in the same circumstances as we did before. Add tests to verify this new behaviour and adjust existing tests for repacking of objects.
-rw-r--r--internal/git/housekeeping/optimize_repository.go87
-rw-r--r--internal/git/housekeeping/optimize_repository_ext_test.go5
-rw-r--r--internal/git/housekeeping/optimize_repository_test.go164
3 files changed, 209 insertions, 47 deletions
diff --git a/internal/git/housekeeping/optimize_repository.go b/internal/git/housekeeping/optimize_repository.go
index a6ec186f9..28f984287 100644
--- a/internal/git/housekeeping/optimize_repository.go
+++ b/internal/git/housekeeping/optimize_repository.go
@@ -88,12 +88,10 @@ func optimizeRepository(ctx context.Context, m *RepositoryManager, repo *localre
timer.ObserveDuration()
timer = prometheus.NewTimer(m.tasksLatency.WithLabelValues("commit-graph"))
- if didRepack {
- if err := WriteCommitGraph(ctx, repo); err != nil {
- optimizations["written_commit_graph"] = "failure"
- return fmt.Errorf("could not write commit-graph: %w", err)
- }
-
+ if didWriteCommitGraph, err := writeCommitGraphIfNeeded(ctx, repo, didRepack); err != nil {
+ optimizations["written_commit_graph"] = "failure"
+ return fmt.Errorf("could not write commit-graph: %w", err)
+ } else if didWriteCommitGraph {
optimizations["written_commit_graph"] = "success"
}
timer.ObserveDuration()
@@ -196,26 +194,6 @@ func needsRepacking(repo *localrepo.Repo) (bool, RepackObjectsConfig, error) {
}, nil
}
- missingBloomFilters, err := stats.IsMissingBloomFilters(repoPath)
- if err != nil {
- return false, RepackObjectsConfig{}, fmt.Errorf("checking for bloom filters: %w", err)
- }
-
- // Bloom filters are part of the commit-graph and allow us to efficiently determine which
- // paths have been modified in a given commit without having to look into the object
- // database. In the past we didn't compute bloom filters at all, so we want to rewrite the
- // whole commit-graph to generate them.
- //
- // Note that we'll eventually want to move out commit-graph generation from repacking. When
- // that happens we should update the commit-graph either if it's missing, when bloom filters
- // are missing or when packfiles have been updated.
- if missingBloomFilters {
- return true, RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: !hasAlternate,
- }, nil
- }
-
// Whenever we do an incremental repack we create a new packfile, and as a result Git may
// have to look into every one of the packfiles to find objects. This is less efficient the
// more packfiles we have, but we cannot repack the whole repository every time either given
@@ -375,6 +353,63 @@ func estimateLooseObjectCount(repo *localrepo.Repo, cutoffDate time.Time) (int64
return looseObjects * 256, nil
}
+// writeCommitGraphIfNeeded writes the commit-graph if required.
+func writeCommitGraphIfNeeded(ctx context.Context, repo *localrepo.Repo, didRepack bool) (bool, error) {
+ needed, err := needsWriteCommitGraph(ctx, repo, didRepack)
+ if err != nil {
+ return false, fmt.Errorf("determining whether repo needs commit-graph update: %w", err)
+ }
+ if !needed {
+ return false, nil
+ }
+
+ if err := WriteCommitGraph(ctx, repo); err != nil {
+ return true, fmt.Errorf("writing commit-graph: %w", err)
+ }
+
+ return true, nil
+}
+
+// needsWriteCommitGraph determines whether we need to write the commit-graph.
+func needsWriteCommitGraph(ctx context.Context, repo *localrepo.Repo, didRepack bool) (bool, error) {
+ looseRefs, packedRefsSize, err := countLooseAndPackedRefs(ctx, repo)
+ if err != nil {
+ return false, fmt.Errorf("counting refs: %w", err)
+ }
+
+ // If the repository doesn't have any references at all then there is no point in writing
+ // commit-graphs given that it would only contain reachable objects, of which there are
+ // none.
+ if looseRefs == 0 && packedRefsSize == 0 {
+ return false, nil
+ }
+
+ // When we repacked the repository then chances are high that we have accumulated quite some
+ // objects since the last time we wrote a commit-graph.
+ if didRepack {
+ return true, nil
+ }
+
+ repoPath, err := repo.Path()
+ if err != nil {
+ return false, fmt.Errorf("getting repository path: %w", err)
+ }
+
+ // Bloom filters are part of the commit-graph and allow us to efficiently determine which
+ // paths have been modified in a given commit without having to look into the object
+ // database. In the past we didn't compute bloom filters at all, so we want to rewrite the
+ // whole commit-graph to generate them.
+ missingBloomFilters, err := stats.IsMissingBloomFilters(repoPath)
+ if err != nil {
+ return false, fmt.Errorf("checking for bloom filters: %w", err)
+ }
+ if missingBloomFilters {
+ return true, nil
+ }
+
+ return false, nil
+}
+
// pruneIfNeeded removes objects from the repository which are either unreachable or which are
// already part of a packfile. We use a grace period of two weeks.
func pruneIfNeeded(ctx context.Context, repo *localrepo.Repo) (bool, error) {
diff --git a/internal/git/housekeeping/optimize_repository_ext_test.go b/internal/git/housekeeping/optimize_repository_ext_test.go
index 7eae05993..13940b9b6 100644
--- a/internal/git/housekeeping/optimize_repository_ext_test.go
+++ b/internal/git/housekeeping/optimize_repository_ext_test.go
@@ -120,9 +120,8 @@ func TestPruneIfNeeded(t *testing.T) {
require.NoError(t, housekeeping.NewManager(cfg.Prometheus, nil).OptimizeRepository(ctx, repo))
expectedLogEntry := map[string]string{
- "packed_objects_full": "success",
- "written_commit_graph": "success",
- "written_bitmap": "success",
+ "packed_objects_full": "success",
+ "written_bitmap": "success",
}
if tc.expectedPrune {
diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go
index 453dd4ca7..09c5476c4 100644
--- a/internal/git/housekeeping/optimize_repository_test.go
+++ b/internal/git/housekeeping/optimize_repository_test.go
@@ -73,11 +73,10 @@ func TestNeedsRepacking(t *testing.T) {
return repoProto
},
- expectedNeeded: true,
- expectedConfig: RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: false,
- },
+ // If we have no bitmap in the repository we'd normally want to fully repack
+ // the repository. But because we have an alternates file we know that the
+ // repository must not have a bitmap anyway, so we can skip the repack here.
+ expectedNeeded: false,
},
{
desc: "missing commit-graph",
@@ -90,11 +89,10 @@ func TestNeedsRepacking(t *testing.T) {
return repoProto
},
- expectedNeeded: true,
- expectedConfig: RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: true,
- },
+ // The commit-graph used to influence whether we repacked a repository or
+ // not. That was due to historic reasons only, though, and ultimately does
+ // not make any sense.
+ expectedNeeded: false,
},
{
desc: "commit-graph without bloom filters",
@@ -108,11 +106,10 @@ func TestNeedsRepacking(t *testing.T) {
return repoProto
},
- expectedNeeded: true,
- expectedConfig: RepackObjectsConfig{
- FullRepack: true,
- WriteBitmap: true,
- },
+ // The commit-graph used to influence whether we repacked a repository or
+ // not. That was due to historic reasons only, though, and ultimately does
+ // not make any sense.
+ expectedNeeded: false,
},
{
desc: "no repack needed",
@@ -552,7 +549,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1
`,
},
{
- desc: "repository without commit-graph repacks objects",
+ desc: "repository without commit-graph writes commit-graph",
setup: func(t *testing.T, relativePath string) *gitalypb.Repository {
repo, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0], gittest.CloneRepoOpts{
RelativePath: relativePath,
@@ -562,9 +559,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1
},
expectedMetrics: `# HELP gitaly_housekeeping_tasks_total Total number of housekeeping tasks performed in the repository
# TYPE gitaly_housekeeping_tasks_total counter
-gitaly_housekeeping_tasks_total{housekeeping_task="packed_objects_full", status="success"} 1
gitaly_housekeeping_tasks_total{housekeeping_task="written_commit_graph", status="success"} 1
-gitaly_housekeeping_tasks_total{housekeeping_task="written_bitmap", status="success"} 1
gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1
`,
},
@@ -973,3 +968,136 @@ func TestPruneIfNeeded(t *testing.T) {
}
})
}
+
+func TestWriteCommitGraphIfNeeded(t *testing.T) {
+ ctx := testhelper.Context(t)
+ cfg := testcfg.Build(t)
+
+ for _, tc := range []struct {
+ desc string
+ setup func(t *testing.T) (*gitalypb.Repository, string)
+ didRepack bool
+ expectedWrite bool
+ expectedCommitGraph bool
+ }{
+ {
+ desc: "empty repository",
+ setup: func(t *testing.T) (*gitalypb.Repository, string) {
+ return gittest.InitRepo(t, cfg, cfg.Storages[0])
+ },
+ didRepack: true,
+ expectedWrite: false,
+ },
+ {
+ desc: "repository with objects but no refs",
+ setup: func(t *testing.T) (*gitalypb.Repository, string) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ gittest.WriteBlob(t, cfg, repoPath, []byte("something"))
+ return repoProto, repoPath
+ },
+ didRepack: true,
+ expectedWrite: false,
+ },
+ {
+ desc: "repository without commit-graph",
+ setup: func(t *testing.T) (*gitalypb.Repository, string) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(), gittest.WithBranch("main"))
+ return repoProto, repoPath
+ },
+ expectedWrite: true,
+ expectedCommitGraph: true,
+ },
+ {
+ desc: "repository with old-style unsplit commit-graph",
+ setup: func(t *testing.T) (*gitalypb.Repository, string) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(), gittest.WithBranch("main"))
+
+ // Write a non-split commit-graph with bloom filters. We should
+ // always rewrite the commit-graphs when we're not using a split
+ // commit-graph. We make sure to add bloom filters via
+ // `--changed-paths` given that it would otherwise cause us to
+ // rewrite the graph regardless of whether the graph is split or not
+ // if they were missing.
+ gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--reachable", "--changed-paths")
+
+ return repoProto, repoPath
+ },
+ expectedWrite: true,
+ expectedCommitGraph: true,
+ },
+ {
+ desc: "repository with split commit-graph without bitmap",
+ setup: func(t *testing.T) (*gitalypb.Repository, string) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(), gittest.WithBranch("main"))
+
+ // Generate a split commit-graph, but don't enable computation of
+ // changed paths. This should trigger a rewrite so that we can
+ // recompute all graphs and generate the changed paths.
+ gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--reachable", "--split")
+
+ return repoProto, repoPath
+ },
+ expectedWrite: true,
+ expectedCommitGraph: true,
+ },
+ {
+ desc: "repository with split commit-graph with bitmap without repack",
+ setup: func(t *testing.T) (*gitalypb.Repository, string) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(), gittest.WithBranch("main"))
+
+ // Write a split commit-graph with bitmaps. This is the state we
+ // want to be in.
+ gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--reachable", "--split", "--changed-paths")
+
+ return repoProto, repoPath
+ },
+ // We use the information about whether we repacked objects as an indicator
+ // whether something has changed in the repository. If it didn't, then we
+ // assume no new objects exist and thus we don't rewrite the commit-graph.
+ didRepack: false,
+ expectedWrite: false,
+ expectedCommitGraph: true,
+ },
+ {
+ desc: "repository with split commit-graph with bitmap with repack",
+ setup: func(t *testing.T) (*gitalypb.Repository, string) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(), gittest.WithBranch("main"))
+
+ // Write a split commit-graph with bitmaps. This is the state we
+ // want to be in, so there is no write required if we didn't also
+ // repack objects.
+ gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--reachable", "--split", "--changed-paths")
+
+ return repoProto, repoPath
+ },
+ // When we have a valid commit-graph, but objects have been repacked, we
+ // assume that there are new objects in the repository. So consequentially,
+ // we should write the commit-graphs.
+ didRepack: true,
+ expectedWrite: true,
+ expectedCommitGraph: true,
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ repoProto, repoPath := tc.setup(t)
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ didWrite, err := writeCommitGraphIfNeeded(ctx, repo, tc.didRepack)
+ require.NoError(t, err)
+ require.Equal(t, tc.expectedWrite, didWrite)
+
+ commitGraphPath := filepath.Join(repoPath, "objects", "info", "commit-graphs", "commit-graph-chain")
+ if tc.expectedCommitGraph {
+ require.FileExists(t, commitGraphPath)
+ } else {
+ require.NoFileExists(t, commitGraphPath)
+ }
+ gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "verify")
+ })
+ }
+}