Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Chandler <wchandler@gitlab.com>2024-01-12 23:13:26 +0300
committerWill Chandler <wchandler@gitlab.com>2024-01-22 17:21:22 +0300
commita103d1d7e84585f627d9cee8ed6a0117a77c5698 (patch)
treeaec3cc25e39e04185411df1e849579ad11ffc3ea
parent7630f4b49669114fa8190e23f39978da902c6192 (diff)
cleanup: Don't run filter-repo in-place
git-filter-repo(1) uses git-fast-import(1) to import the rewritten repository history. This will unpack the new objects, then iterate over references serially and update them using reference transactions. This does not atomically update the references[0], so an interruption during this final stage will result in partially applied changes. To mitigate this risk, create a temporary staging repository to write the updated history into, then atomically force fetch that into the original repo. This has the downside of being slower than modifying the repository in-place[1], but improving safety for a high-risk operation like this is a greater priority. [0] https://gitlab.com/gitlab-org/git/-/blob/d4dbce1db5cd227a57074bcfc7ec9f0655961bba/builtin/fast-import.c#L1659-1668 [1] https://github.com/newren/git-filter-repo/issues/66#issuecomment-602100316
-rw-r--r--internal/gitaly/service/cleanup/rewrite_history.go132
1 files changed, 125 insertions, 7 deletions
diff --git a/internal/gitaly/service/cleanup/rewrite_history.go b/internal/gitaly/service/cleanup/rewrite_history.go
index 7bbdd258e..85e45ab55 100644
--- a/internal/gitaly/service/cleanup/rewrite_history.go
+++ b/internal/gitaly/service/cleanup/rewrite_history.go
@@ -85,8 +85,8 @@ func (s *server) RewriteHistory(server gitalypb.CleanupService_RewriteHistorySer
}
}
- if err := s.runFilterRepo(ctx, repo, repoProto, blobsToRemove, redactions); err != nil {
- return fmt.Errorf("rewriting repository history: %w", err)
+ if err := s.rewriteHistory(ctx, repo, repoProto, blobsToRemove, redactions); err != nil {
+ return err
}
if err := server.SendAndClose(&gitalypb.RewriteHistoryResponse{}); err != nil {
@@ -96,15 +96,110 @@ func (s *server) RewriteHistory(server gitalypb.CleanupService_RewriteHistorySer
return nil
}
-func (s *server) runFilterRepo(
+func (s *server) rewriteHistory(
ctx context.Context,
repo *localrepo.Repo,
repoProto *gitalypb.Repository,
blobsToRemove []string,
redactions [][]byte,
) error {
+ defaultBranch, err := repo.HeadReference(ctx)
+ if err != nil {
+ return fmt.Errorf("finding HEAD reference: %w", err)
+ }
+
+ stagingRepo, stagingRepoPath, err := s.initStagingRepo(ctx, repoProto, defaultBranch)
+ if err != nil {
+ return fmt.Errorf("setting up staging repo: %w", err)
+ }
+
+ if err := s.runFilterRepo(ctx, repo, stagingRepo, blobsToRemove, redactions); err != nil {
+ return fmt.Errorf("rewriting repository history: %w", err)
+ }
+
+ var stderr strings.Builder
+ if err := repo.ExecAndWait(ctx,
+ git.Command{
+ Name: "fetch",
+ Flags: []git.Option{
+ // Delete any refs that were removed by filter-repo.
+ git.Flag{Name: "--prune"},
+ // The mirror refspec includes tags, don't fetch them again.
+ git.Flag{Name: "--no-tags"},
+ // New history will be disjoint from the original repo.
+ git.Flag{Name: "--force"},
+ // Ensure we don't partially apply the rewritten history.
+ // We don't expect file / directory conflicts as all refs
+ // in the staging repo are from the original.
+ git.Flag{Name: "--atomic"},
+ // We're going to have a lot of these, don't waste
+ // time displaying them.
+ git.Flag{Name: "--no-show-forced-updates"},
+ // No need for FETCH_HEAD when mirroring.
+ git.Flag{Name: "--no-write-fetch-head"},
+ git.Flag{Name: "--quiet"},
+ },
+ Args: append(
+ []string{"file://" + stagingRepoPath},
+ git.MirrorRefSpec,
+ ),
+ },
+ git.WithRefTxHook(repo),
+ git.WithStderr(&stderr),
+ git.WithConfig(git.ConfigPair{
+ Key: "advice.fetchShowForcedUpdates", Value: "false",
+ }),
+ ); err != nil {
+ return structerr.New("fetching rewritten history: %w", err).WithMetadata("stderr", &stderr)
+ }
+
+ return nil
+}
+
+// initStagingRepo creates a new bare repository to write the rewritten history into
+// with default branch is set to match the source repo.
+func (s *server) initStagingRepo(ctx context.Context, repo *gitalypb.Repository, defaultBranch git.ReferenceName) (*localrepo.Repo, string, error) {
+ stagingRepoProto, stagingRepoDir, err := tempdir.NewRepository(ctx, repo.GetStorageName(), s.logger, s.locator)
+ if err != nil {
+ return nil, "", err
+ }
+
+ var stderr strings.Builder
+ cmd, err := s.gitCmdFactory.NewWithoutRepo(ctx, git.Command{
+ Name: "init",
+ Flags: []git.Option{
+ git.Flag{Name: "--bare"},
+ git.Flag{Name: "--quiet"},
+ },
+ Args: []string{stagingRepoDir.Path()},
+ }, git.WithStderr(&stderr))
+ if err != nil {
+ return nil, "", fmt.Errorf("spawning git-init: %w", err)
+ }
+
+ if err := cmd.Wait(); err != nil {
+ return nil, "", structerr.New("creating repository: %w", err).WithMetadata("stderr", &stderr)
+ }
+
+ stagingRepo := s.localrepo(stagingRepoProto)
+
+ // Ensure HEAD matches the source repository. In practice a mismatch doesn't cause problems,
+ // but out of an abundance of caution let's keep the two repos as similar as possible.
+ if err := stagingRepo.SetDefaultBranch(ctx, s.txManager, defaultBranch); err != nil {
+ return nil, "", fmt.Errorf("setting default branch: %w", err)
+ }
+
+ return stagingRepo, stagingRepoDir.Path(), nil
+}
+
+func (s *server) runFilterRepo(
+ ctx context.Context,
+ srcRepo, stagingRepo *localrepo.Repo,
+ blobsToRemove []string,
+ redactions [][]byte,
+) error {
// Place argument files in a tempdir so that cleanup is handled automatically.
- tmpDir, err := tempdir.New(ctx, repo.GetStorageName(), s.logger, s.locator)
+ tmpDir, err := tempdir.New(ctx, srcRepo.GetStorageName(), s.logger, s.locator)
if err != nil {
return fmt.Errorf("create tempdir: %w", err)
}
@@ -129,11 +224,29 @@ func (s *server) runFilterRepo(
flags = append(flags, git.Flag{Name: "--replace-text=" + replacePath})
}
+ srcPath, err := srcRepo.Path()
+ if err != nil {
+ return fmt.Errorf("getting source repo path: %w", err)
+ }
+
+ stagingPath, err := stagingRepo.Path()
+ if err != nil {
+ return fmt.Errorf("getting target repo path: %w", err)
+ }
+
+ // We must run this using 'NewWithoutRepo' because setting '--git-dir',
+ // as 'repo.ExecAndWait' does, will override the '--target' flag and
+ // write the updates directly to the original repository.
var stdout, stderr strings.Builder
- if err := repo.ExecAndWait(ctx,
+ cmd, err := s.gitCmdFactory.NewWithoutRepo(ctx,
git.Command{
Name: "filter-repo",
Flags: append([]git.Option{
+ // Repository to write filtered history into.
+ git.Flag{Name: "--target=" + stagingPath},
+ // Repository to read from.
+ git.Flag{Name: "--source=" + srcPath},
+ // git.Flag{Name: "--refs=refs/*"},
// Prevent automatic cleanup tasks like deleting 'origin' and running git-gc(1).
git.Flag{Name: "--partial"},
// Bypass check that repository is not a fresh clone.
@@ -146,10 +259,15 @@ func (s *server) runFilterRepo(
git.Flag{Name: "--quiet"},
}, flags...),
},
- git.WithRefTxHook(repo),
+ git.WithDisabledHooks(),
git.WithStdout(&stdout),
git.WithStderr(&stderr),
- ); err != nil {
+ )
+ if err != nil {
+ return fmt.Errorf("spawning git-filter-repo: %w", err)
+ }
+
+ if err := cmd.Wait(); err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
return structerr.New("git-filter-repo failed with exit code %d", exitErr.ExitCode()).WithMetadataItems(