diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2023-01-03 17:10:12 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2023-01-03 17:52:48 +0300 |
commit | 6003c7fda5638f24b3d5d118206f208bcca1fa73 (patch) | |
tree | aed292017e95a1890dd2347d4592f3357940e37d | |
parent | fa399ddc106ea4e165c11ab723382816bc2bdecb (diff) |
housekeeping: Fix rewriting of commit-graphs without generation data
With eab4c4758 (housekeeping: Replace commit-graph chain when missing
generation data, 2022-12-13) we have introduced logic to rewrite the
commit-graph in case it is missing generation data. One thing we missed
though is that we essentially have the same logic in two different
places: once via `WriteCommitGraphConfigForRepository()`, which is used
for the legacy fine-grained repository housekeeping RPCs. And once via
new the optimization strategy in `ShouldWriteCommitGraph()`. And while
we have adjusted the former, we didn't adjust the latter, and as a
result we don't properly rewrite in `OptimizeRepository()`.
Deduplicate the logic by introducing a new `commitGraphNeedsRewrite()`
function that is shared between both implementations so that we now also
correctly rewrite the commit-graphs in `OptimizeRepository()`.
Changelog: fixed
-rw-r--r-- | internal/git/housekeeping/commit_graph.go | 21 | ||||
-rw-r--r-- | internal/git/housekeeping/optimization_strategy.go | 6 | ||||
-rw-r--r-- | internal/git/housekeeping/optimization_strategy_test.go | 28 | ||||
-rw-r--r-- | internal/git/housekeeping/optimize_repository_test.go | 48 |
4 files changed, 79 insertions, 24 deletions
diff --git a/internal/git/housekeeping/commit_graph.go b/internal/git/housekeeping/commit_graph.go index 2476083e1..b2e71a235 100644 --- a/internal/git/housekeeping/commit_graph.go +++ b/internal/git/housekeeping/commit_graph.go @@ -29,31 +29,36 @@ func WriteCommitGraphConfigForRepository(ctx context.Context, repo *localrepo.Re return WriteCommitGraphConfig{}, err } - var replaceChain bool - commitGraphInfo, err := stats.CommitGraphInfoForRepository(repoPath) if err != nil { return WriteCommitGraphConfig{}, structerr.NewInternal("getting commit-graph info: %w", err) } + return WriteCommitGraphConfig{ + ReplaceChain: commitGraphNeedsRewrite(ctx, commitGraphInfo), + }, nil +} + +// commitGraphNeedsRewrite determines whether the commit-graph needs to be rewritten. This can be +// the case when it is either a monolithic commit-graph or when it is missing some extensions that +// only get written on a full rewrite. +func commitGraphNeedsRewrite(ctx context.Context, commitGraphInfo stats.CommitGraphInfo) bool { if commitGraphInfo.CommitGraphChainLength == 0 { // The repository does not have a commit-graph chain. This either indicates we ain't // got no commit-graph at all, or that it's monolithic. In both cases we want to // replace the commit-graph chain. - replaceChain = true + return true } else if !commitGraphInfo.HasBloomFilters { // If the commit-graph-chain exists, we want to rewrite it in case we see that it // ain't got bloom filters enabled. This is because Git will refuse to write any // bloom filters as long as any of the commit-graph slices is missing this info. - replaceChain = true + return true } else if !commitGraphInfo.HasGenerationData && featureflag.UseCommitGraphGenerationData.IsEnabled(ctx) { // The same is true for generation data. - replaceChain = true + return true } - return WriteCommitGraphConfig{ - ReplaceChain: replaceChain, - }, nil + return false } // WriteCommitGraph updates the commit-graph in the given repository. The commit-graph is updated diff --git a/internal/git/housekeeping/optimization_strategy.go b/internal/git/housekeeping/optimization_strategy.go index a2808758d..b0c62d35c 100644 --- a/internal/git/housekeeping/optimization_strategy.go +++ b/internal/git/housekeeping/optimization_strategy.go @@ -171,11 +171,7 @@ func (s HeuristicalOptimizationStrategy) ShouldWriteCommitGraph(ctx context.Cont } } - // Bloom filters are part of the commit-graph and allow us to efficiently determine which - // paths have been modified in a given commit without having to look into the object - // database. In the past we didn't compute bloom filters at all, so we want to rewrite the - // whole commit-graph to generate them. - if s.info.CommitGraph.CommitGraphChainLength == 0 || !s.info.CommitGraph.HasBloomFilters { + if commitGraphNeedsRewrite(ctx, s.info.CommitGraph) { return true, WriteCommitGraphConfig{ ReplaceChain: true, } diff --git a/internal/git/housekeeping/optimization_strategy_test.go b/internal/git/housekeeping/optimization_strategy_test.go index fa903aa44..8f257064a 100644 --- a/internal/git/housekeeping/optimization_strategy_test.go +++ b/internal/git/housekeeping/optimization_strategy_test.go @@ -12,6 +12,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v15/internal/git/stats" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" @@ -667,9 +668,11 @@ func TestHeuristicalOptimizationStrategy_ShouldRepackReferences(t *testing.T) { } func TestHeuristicalOptimizationStrategy_NeedsWriteCommitGraph(t *testing.T) { - t.Parallel() + testhelper.NewFeatureSets(featureflag.UseCommitGraphGenerationData).Run(t, testHeuristicalOptimizationStrategyNeedsWriteCommitGraph) +} - ctx := testhelper.Context(t) +func testHeuristicalOptimizationStrategyNeedsWriteCommitGraph(t *testing.T, ctx context.Context) { + t.Parallel() for _, tc := range []struct { desc string @@ -739,6 +742,27 @@ func TestHeuristicalOptimizationStrategy_NeedsWriteCommitGraph(t *testing.T) { }, }, }, + // If we have no generation data then we want to rewrite the commit-graph, + // but only if the feature flag is enabled. + expectedNeeded: featureflag.UseCommitGraphGenerationData.IsEnabled(ctx), + expectedCfg: WriteCommitGraphConfig{ + ReplaceChain: featureflag.UseCommitGraphGenerationData.IsEnabled(ctx), + }, + }, + { + desc: "repository with split commit-graph and generation data with bitmap without repack", + strategy: HeuristicalOptimizationStrategy{ + info: stats.RepositoryInfo{ + References: stats.ReferencesInfo{ + LooseReferencesCount: 1, + }, + CommitGraph: stats.CommitGraphInfo{ + CommitGraphChainLength: 1, + HasBloomFilters: true, + HasGenerationData: true, + }, + }, + }, // We use the information about whether we repacked objects as an indicator // whether something has changed in the repository. If it didn't, then we // assume no new objects exist and thus we don't rewrite the commit-graph. diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go index 565c2e267..746ee9ad4 100644 --- a/internal/git/housekeeping/optimize_repository_test.go +++ b/internal/git/housekeeping/optimize_repository_test.go @@ -19,6 +19,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v15/internal/git/stats" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/transaction" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" @@ -151,9 +152,13 @@ func TestPackRefsIfNeeded(t *testing.T) { func TestOptimizeRepository(t *testing.T) { t.Parallel() - ctx := testhelper.Context(t) - cfg := testcfg.Build(t) + testhelper.NewFeatureSets(featureflag.UseCommitGraphGenerationData).Run(t, testOptimizeRepository) +} +func testOptimizeRepository(t *testing.T, ctx context.Context) { + t.Parallel() + + cfg := testcfg.Build(t) txManager := transaction.NewManager(cfg, backchannel.NewRegistry()) for _, tc := range []struct { @@ -213,6 +218,33 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 `, }, { + desc: "repository with commit-graph without generation data writes commit-graph", + setup: func(t *testing.T, relativePath string) *gitalypb.Repository { + repo, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + RelativePath: relativePath, + }) + gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "-d", "--write-bitmap-index") + gittest.Exec(t, cfg, "-c", "commitGraph.generationVersion=1", "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths") + return repo + }, + expectedMetrics: func() string { + if featureflag.UseCommitGraphGenerationData.IsEnabled(ctx) { + return `# HELP gitaly_housekeeping_tasks_total Total number of housekeeping tasks performed in the repository +# TYPE gitaly_housekeeping_tasks_total counter +gitaly_housekeeping_tasks_total{housekeeping_task="written_commit_graph_full", status="success"} 1 +gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 +` + } + + return `# HELP gitaly_housekeeping_tasks_total Total number of housekeeping tasks performed in the repository +# TYPE gitaly_housekeeping_tasks_total counter +gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 +` + }(), + }, + { desc: "repository with multiple packfiles packs only for object pool", setup: func(t *testing.T, relativePath string) *gitalypb.Repository { repo, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ @@ -228,7 +260,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("second"), gittest.WithMessage("second")) gittest.Exec(t, cfg, "-C", repoPath, "repack") - gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths") + gittest.Exec(t, cfg, "-c", "commitGraph.generationVersion=2", "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths") return repo }, @@ -253,7 +285,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 }) gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "-d", "--write-bitmap-index") - gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths") + gittest.Exec(t, cfg, "-c", "commitGraph.generationVersion=2", "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths") return repo }, expectedMetrics: `# HELP gitaly_housekeeping_tasks_total Total number of housekeeping tasks performed in the repository @@ -270,7 +302,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 }) gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "-d", "--write-bitmap-index") - gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths") + gittest.Exec(t, cfg, "-c", "commitGraph.generationVersion=2", "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths") // The repack won't repack the following objects because they're // broken, and thus we'll retry to prune them afterwards. @@ -305,7 +337,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 }) gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "-d", "--write-bitmap-index") - gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths") + gittest.Exec(t, cfg, "-c", "commitGraph.generationVersion=2", "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths") // The repack won't repack the following objects because they're // broken, and thus we'll retry to prune them afterwards. @@ -349,7 +381,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 } gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "--write-bitmap-index") - gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths") + gittest.Exec(t, cfg, "-c", "commitGraph.generationVersion=2", "-C", repoPath, "commit-graph", "write", "--split", "--changed-paths") return repo }, @@ -365,8 +397,6 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 testRepoAndPool(t, tc.desc, func(t *testing.T, relativePath string) { t.Parallel() - ctx := testhelper.Context(t) - repoProto := tc.setup(t, relativePath) repo := localrepo.NewTestRepo(t, cfg, repoProto) |