diff options
author | Will Chandler <wchandler@gitlab.com> | 2024-01-19 08:20:51 +0300 |
---|---|---|
committer | Will Chandler <wchandler@gitlab.com> | 2024-01-22 17:21:22 +0300 |
commit | c23a2ab30f27b449061ca85c8d79321def12f3c1 (patch) | |
tree | 025cc90927bd24ed1d876badca5191a8fa98afe9 | |
parent | a103d1d7e84585f627d9cee8ed6a0117a77c5698 (diff) |
cleanup: Validate repo was not modified before fetching
On large repositories git-filter-repo(1) make take a significant amount
of time to run. Should a write occur after the git-fast-export(1)
portion of the task has completed, it is possible that the repository
history will not be fully rewritten.
To guard against this condition, we checksum the repository before and
after running filter-repo. If the checksums do not match we abort and do
not fetch the updated history into the repository.
-rw-r--r-- | internal/gitaly/service/cleanup/rewrite_history.go | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/internal/gitaly/service/cleanup/rewrite_history.go b/internal/gitaly/service/cleanup/rewrite_history.go index 85e45ab55..e6fceefc5 100644 --- a/internal/gitaly/service/cleanup/rewrite_history.go +++ b/internal/gitaly/service/cleanup/rewrite_history.go @@ -1,6 +1,7 @@ package cleanup import ( + "bufio" "bytes" "context" "errors" @@ -113,10 +114,31 @@ func (s *server) rewriteHistory( return fmt.Errorf("setting up staging repo: %w", err) } + // Check state of source repository prior to running filter-repo. + initialChecksum, err := checksumRepo(ctx, s.gitCmdFactory, repo) + if err != nil { + return fmt.Errorf("calculate initial checksum: %w", err) + } + if err := s.runFilterRepo(ctx, repo, stagingRepo, blobsToRemove, redactions); err != nil { return fmt.Errorf("rewriting repository history: %w", err) } + // Recheck repository state to confirm no changes occurred while filter-repo ran. The + // repository may not be fully rewritten if it was modified after git-fast-export(1) + // completed. + validationChecksum, err := checksumRepo(ctx, s.gitCmdFactory, repo) + if err != nil { + return fmt.Errorf("recalculate checksum: %w", err) + } + + if initialChecksum != validationChecksum { + return structerr.NewAborted("source repository checksum altered").WithMetadataItems( + structerr.MetadataItem{Key: "initial checksum", Value: initialChecksum}, + structerr.MetadataItem{Key: "validation checksum", Value: validationChecksum}, + ) + } + var stderr strings.Builder if err := repo.ExecAndWait(ctx, git.Command{ @@ -300,3 +322,37 @@ func writeArgFile(name string, dir string, input []byte) (string, error) { return path, nil } + +func checksumRepo(ctx context.Context, cmdFactory git.CommandFactory, repo *localrepo.Repo) (string, error) { + var stderr strings.Builder + cmd, err := cmdFactory.New(ctx, repo, git.Command{ + Name: "show-ref", + Flags: []git.Option{ + git.Flag{Name: "--head"}, + }, + }, git.WithSetupStdout(), git.WithStderr(&stderr)) + if err != nil { + return "", fmt.Errorf("spawning git-show-ref: %w", err) + } + + var checksum git.Checksum + + scanner := bufio.NewScanner(cmd) + for scanner.Scan() { + checksum.AddBytes(scanner.Bytes()) + } + + if err := scanner.Err(); err != nil { + return "", err + } + + if err := cmd.Wait(); err != nil { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + return "", structerr.New("git-show-ref failed with exit code %d", exitErr.ExitCode()).WithMetadata("stderr", stderr.String()) + } + return "", fmt.Errorf("running git-show-ref: %w", err) + } + + return checksum.String(), nil +} |