diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-07-08 14:04:13 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-07-15 16:15:09 +0300 |
commit | 83802e116052ba032ddb76770bf29ff346b547ea (patch) | |
tree | 10528317023541cca9dbaeaa371fec769607151e | |
parent | 8e372baf6221ba7e5dfef949122e52608e0b0f37 (diff) |
housekeeping: Fix commit-graphs referencing pruned commits
When pruning objects we need to rewrite our commit-graphs so that they
don't reference any commits that have just been pruned. While this is
not typically an issue, git-fsck(1) will report any such inconsistencies
in the commit-graph. Furthermore, Git may return an error when being
asked to parse such a missing commit directly.
Fix `OptimizeRepository()` to do so when it's pruned objects.
Changelog: fixed
-rw-r--r-- | internal/git/housekeeping/optimize_repository.go | 22 | ||||
-rw-r--r-- | internal/git/housekeeping/optimize_repository_test.go | 54 |
2 files changed, 63 insertions, 13 deletions
diff --git a/internal/git/housekeeping/optimize_repository.go b/internal/git/housekeeping/optimize_repository.go index 566583951..3ebd4df9a 100644 --- a/internal/git/housekeeping/optimize_repository.go +++ b/internal/git/housekeeping/optimize_repository.go @@ -108,7 +108,7 @@ func optimizeRepository(ctx context.Context, m *RepositoryManager, repo *localre timer.ObserveDuration() timer = prometheus.NewTimer(m.tasksLatency.WithLabelValues("commit-graph")) - if didWriteCommitGraph, writeCommitGraphCfg, err := writeCommitGraphIfNeeded(ctx, repo, didRepack); err != nil { + if didWriteCommitGraph, writeCommitGraphCfg, err := writeCommitGraphIfNeeded(ctx, repo, didRepack, didPrune); err != nil { optimizations["written_commit_graph_full"] = "failure" optimizations["written_commit_graph_incremental"] = "failure" return fmt.Errorf("could not write commit-graph: %w", err) @@ -359,8 +359,8 @@ func estimateLooseObjectCount(repo *localrepo.Repo, cutoffDate time.Time) (int64 } // writeCommitGraphIfNeeded writes the commit-graph if required. -func writeCommitGraphIfNeeded(ctx context.Context, repo *localrepo.Repo, didRepack bool) (bool, WriteCommitGraphConfig, error) { - needed, cfg, err := needsWriteCommitGraph(ctx, repo, didRepack) +func writeCommitGraphIfNeeded(ctx context.Context, repo *localrepo.Repo, didRepack, didPrune bool) (bool, WriteCommitGraphConfig, error) { + needed, cfg, err := needsWriteCommitGraph(ctx, repo, didRepack, didPrune) if err != nil { return false, WriteCommitGraphConfig{}, fmt.Errorf("determining whether repo needs commit-graph update: %w", err) } @@ -376,7 +376,7 @@ func writeCommitGraphIfNeeded(ctx context.Context, repo *localrepo.Repo, didRepa } // needsWriteCommitGraph determines whether we need to write the commit-graph. -func needsWriteCommitGraph(ctx context.Context, repo *localrepo.Repo, didRepack bool) (bool, WriteCommitGraphConfig, error) { +func needsWriteCommitGraph(ctx context.Context, repo *localrepo.Repo, didRepack, didPrune bool) (bool, WriteCommitGraphConfig, error) { looseRefs, packedRefsSize, err := countLooseAndPackedRefs(ctx, repo) if err != nil { return false, WriteCommitGraphConfig{}, fmt.Errorf("counting refs: %w", err) @@ -389,6 +389,20 @@ func needsWriteCommitGraph(ctx context.Context, repo *localrepo.Repo, didRepack return false, WriteCommitGraphConfig{}, nil } + // When we have pruned objects in the repository then it may happen that the commit-graph + // still refers to commits that have now been deleted. While this wouldn't typically cause + // any issues during runtime, it may cause errors when explicitly asking for any commit that + // does exist in the commit-graph, only. Furthermore, it causes git-fsck(1) to report that + // the commit-graph is inconsistent. + // + // To fix this case we will replace the complete commit-chain when we have pruned objects + // from the repository. + if didPrune { + return true, WriteCommitGraphConfig{ + ReplaceChain: true, + }, nil + } + // When we repacked the repository then chances are high that we have accumulated quite some // objects since the last time we wrote a commit-graph. if didRepack { diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go index 1ca66c568..8a890674c 100644 --- a/internal/git/housekeeping/optimize_repository_test.go +++ b/internal/git/housekeeping/optimize_repository_test.go @@ -663,7 +663,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 expectedMetrics: `# HELP gitaly_housekeeping_tasks_total Total number of housekeeping tasks performed in the repository # TYPE gitaly_housekeeping_tasks_total counter gitaly_housekeeping_tasks_total{housekeeping_task="packed_objects_incremental", status="success"} 1 -gitaly_housekeeping_tasks_total{housekeeping_task="written_commit_graph_incremental", status="success"} 1 +gitaly_housekeeping_tasks_total{housekeeping_task="written_commit_graph_full", status="success"} 1 gitaly_housekeeping_tasks_total{housekeeping_task="pruned_objects",status="success"} 1 gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 `, @@ -977,6 +977,7 @@ func TestWriteCommitGraphIfNeeded(t *testing.T) { desc string setup func(t *testing.T) (*gitalypb.Repository, string) didRepack bool + didPrune bool expectedWrite bool expectedCfg WriteCommitGraphConfig expectedCommitGraph bool @@ -987,6 +988,7 @@ func TestWriteCommitGraphIfNeeded(t *testing.T) { return gittest.InitRepo(t, cfg, cfg.Storages[0]) }, didRepack: true, + didPrune: true, expectedWrite: false, }, { @@ -997,6 +999,7 @@ func TestWriteCommitGraphIfNeeded(t *testing.T) { return repoProto, repoPath }, didRepack: true, + didPrune: true, expectedWrite: false, }, { @@ -1092,12 +1095,35 @@ func TestWriteCommitGraphIfNeeded(t *testing.T) { expectedWrite: true, expectedCommitGraph: true, }, + { + desc: "repository with split commit-graph with bitmap with pruned objects", + setup: func(t *testing.T) (*gitalypb.Repository, string) { + repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) + gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(), gittest.WithBranch("main")) + + // Write a split commit-graph with bitmaps. This is the state we + // want to be in, so there is no write required if we didn't also + // repack objects. + gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--reachable", "--split", "--changed-paths") + + return repoProto, repoPath + }, + // When we have a valid commit-graph, but objects have been repacked, we + // assume that there are new objects in the repository. So consequentially, + // we should write the commit-graphs. + didPrune: true, + expectedWrite: true, + expectedCfg: WriteCommitGraphConfig{ + ReplaceChain: true, + }, + expectedCommitGraph: true, + }, } { t.Run(tc.desc, func(t *testing.T) { repoProto, repoPath := tc.setup(t) repo := localrepo.NewTestRepo(t, cfg, repoProto) - didWrite, writeCommitGraphCfg, err := writeCommitGraphIfNeeded(ctx, repo, tc.didRepack) + didWrite, writeCommitGraphCfg, err := writeCommitGraphIfNeeded(ctx, repo, tc.didRepack, tc.didPrune) require.NoError(t, err) require.Equal(t, tc.expectedWrite, didWrite) require.Equal(t, tc.expectedCfg, writeCommitGraphCfg) @@ -1138,21 +1164,31 @@ func TestWriteCommitGraphIfNeeded(t *testing.T) { require.EqualError(t, verifyCmd.Run(), "exit status 1") require.Equal(t, stderr.String(), fmt.Sprintf("error: Could not read %[1]s\nfailed to parse commit %[1]s from object database for commit-graph\n", unreachableCommitID)) - // Write the commit-graph and pretend that objects have been rewritten. Ideally, - // this causes us to fix up the broken commit-graph to not contain references to the - // pruned commit anymore. - didWrite, writeCommitGraphCfg, err := writeCommitGraphIfNeeded(ctx, repo, true) + // Write the commit-graph and pretend that objects have been rewritten, but not + // pruned. + didWrite, writeCommitGraphCfg, err := writeCommitGraphIfNeeded(ctx, repo, true, false) require.NoError(t, err) require.True(t, didWrite) require.Equal(t, WriteCommitGraphConfig{}, writeCommitGraphCfg) - // But right now, this is not the case. This is caused by the fact that Git will - // only do an incremental update and thus not rewrite all parts of the commit-graph - // chain. + // When pretending that no objects have been pruned we still observe the same + // failure. stderr.Reset() verifyCmd = gittest.NewCommand(t, cfg, "-C", repoPath, "commit-graph", "verify") verifyCmd.Stderr = &stderr require.EqualError(t, verifyCmd.Run(), "exit status 1") require.Equal(t, stderr.String(), fmt.Sprintf("error: Could not read %[1]s\nfailed to parse commit %[1]s from object database for commit-graph\n", unreachableCommitID)) + + // Write the commit-graph a second time, but this time we pretend we have just + // pruned objects. This should cause the commit-graph to be rewritten. + didWrite, writeCommitGraphCfg, err = writeCommitGraphIfNeeded(ctx, repo, false, true) + require.NoError(t, err) + require.True(t, didWrite) + require.Equal(t, WriteCommitGraphConfig{ + ReplaceChain: true, + }, writeCommitGraphCfg) + + // The commit-graph should now have been fixed. + gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "verify") }) } |