Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'internal/gitaly/service/cleanup/rewrite_history.go')
-rw-r--r--internal/gitaly/service/cleanup/rewrite_history.go358
1 files changed, 358 insertions, 0 deletions
diff --git a/internal/gitaly/service/cleanup/rewrite_history.go b/internal/gitaly/service/cleanup/rewrite_history.go
new file mode 100644
index 000000000..e6fceefc5
--- /dev/null
+++ b/internal/gitaly/service/cleanup/rewrite_history.go
@@ -0,0 +1,358 @@
+package cleanup
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "os/exec"
+ "strings"
+
+ "gitlab.com/gitlab-org/gitaly/v16/internal/git"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/structerr"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/tempdir"
+ "gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb"
+)
+
+// RewriteHistory uses git-filter-repo(1) to remove specified blobs from commit history and
+// replace blobs to redact specified text patterns. This does not delete the removed blobs from
+// the object database, they must be garbage collected separately.
+func (s *server) RewriteHistory(server gitalypb.CleanupService_RewriteHistoryServer) error {
+ ctx := server.Context()
+
+ request, err := server.Recv()
+ if err != nil {
+ return fmt.Errorf("receiving initial request: %w", err)
+ }
+
+ repoProto := request.GetRepository()
+ if err := s.locator.ValidateRepository(repoProto); err != nil {
+ return structerr.NewInvalidArgument("%w", err)
+ }
+
+ repo := s.localrepo(repoProto)
+
+ objectHash, err := repo.ObjectHash(ctx)
+ if err != nil {
+ return fmt.Errorf("detecting object hash: %w", err)
+ }
+
+ if objectHash.Format == "sha256" {
+ return structerr.NewInvalidArgument("git-filter-repo does not support repositories using the SHA256 object format")
+ }
+
+ // Unset repository so that we can validate that repository is not sent on subsequent requests.
+ request.Repository = nil
+
+ blobsToRemove := make([]string, 0, len(request.GetBlobs()))
+ redactions := make([][]byte, 0, len(request.GetRedactions()))
+
+ for {
+ if request.GetRepository() != nil {
+ return structerr.NewInvalidArgument("subsequent requests must not contain repository")
+ }
+
+ if len(request.GetBlobs()) == 0 && len(request.GetRedactions()) == 0 {
+ return structerr.NewInvalidArgument("no object IDs or text replacements specified")
+ }
+
+ for _, oid := range request.GetBlobs() {
+ if err := objectHash.ValidateHex(oid); err != nil {
+ return structerr.NewInvalidArgument("validating object ID: %w", err).WithMetadata("oid", oid)
+ }
+ blobsToRemove = append(blobsToRemove, oid)
+ }
+
+ for _, pattern := range request.GetRedactions() {
+ if strings.Contains(string(pattern), "\n") {
+ // We deliberately do not log the invalid pattern as this is
+ // likely to contain sensitive information.
+ return structerr.NewInvalidArgument("redaction pattern contains newline")
+ }
+ redactions = append(redactions, pattern)
+ }
+
+ request, err = server.Recv()
+ if err != nil {
+ if errors.Is(err, io.EOF) {
+ break
+ }
+
+ return fmt.Errorf("receiving next request: %w", err)
+ }
+ }
+
+ if err := s.rewriteHistory(ctx, repo, repoProto, blobsToRemove, redactions); err != nil {
+ return err
+ }
+
+ if err := server.SendAndClose(&gitalypb.RewriteHistoryResponse{}); err != nil {
+ return fmt.Errorf("sending RewriteHistoryResponse: %w", err)
+ }
+
+ return nil
+}
+
+func (s *server) rewriteHistory(
+ ctx context.Context,
+ repo *localrepo.Repo,
+ repoProto *gitalypb.Repository,
+ blobsToRemove []string,
+ redactions [][]byte,
+) error {
+ defaultBranch, err := repo.HeadReference(ctx)
+ if err != nil {
+ return fmt.Errorf("finding HEAD reference: %w", err)
+ }
+
+ stagingRepo, stagingRepoPath, err := s.initStagingRepo(ctx, repoProto, defaultBranch)
+ if err != nil {
+ return fmt.Errorf("setting up staging repo: %w", err)
+ }
+
+ // Check state of source repository prior to running filter-repo.
+ initialChecksum, err := checksumRepo(ctx, s.gitCmdFactory, repo)
+ if err != nil {
+ return fmt.Errorf("calculate initial checksum: %w", err)
+ }
+
+ if err := s.runFilterRepo(ctx, repo, stagingRepo, blobsToRemove, redactions); err != nil {
+ return fmt.Errorf("rewriting repository history: %w", err)
+ }
+
+ // Recheck repository state to confirm no changes occurred while filter-repo ran. The
+ // repository may not be fully rewritten if it was modified after git-fast-export(1)
+ // completed.
+ validationChecksum, err := checksumRepo(ctx, s.gitCmdFactory, repo)
+ if err != nil {
+ return fmt.Errorf("recalculate checksum: %w", err)
+ }
+
+ if initialChecksum != validationChecksum {
+ return structerr.NewAborted("source repository checksum altered").WithMetadataItems(
+ structerr.MetadataItem{Key: "initial checksum", Value: initialChecksum},
+ structerr.MetadataItem{Key: "validation checksum", Value: validationChecksum},
+ )
+ }
+
+ var stderr strings.Builder
+ if err := repo.ExecAndWait(ctx,
+ git.Command{
+ Name: "fetch",
+ Flags: []git.Option{
+ // Delete any refs that were removed by filter-repo.
+ git.Flag{Name: "--prune"},
+ // The mirror refspec includes tags, don't fetch them again.
+ git.Flag{Name: "--no-tags"},
+ // New history will be disjoint from the original repo.
+ git.Flag{Name: "--force"},
+ // Ensure we don't partially apply the rewritten history.
+ // We don't expect file / directory conflicts as all refs
+ // in the staging repo are from the original.
+ git.Flag{Name: "--atomic"},
+ // We're going to have a lot of these, don't waste
+ // time displaying them.
+ git.Flag{Name: "--no-show-forced-updates"},
+ // No need for FETCH_HEAD when mirroring.
+ git.Flag{Name: "--no-write-fetch-head"},
+ git.Flag{Name: "--quiet"},
+ },
+ Args: append(
+ []string{"file://" + stagingRepoPath},
+ git.MirrorRefSpec,
+ ),
+ },
+ git.WithRefTxHook(repo),
+ git.WithStderr(&stderr),
+ git.WithConfig(git.ConfigPair{
+ Key: "advice.fetchShowForcedUpdates", Value: "false",
+ }),
+ ); err != nil {
+ return structerr.New("fetching rewritten history: %w", err).WithMetadata("stderr", &stderr)
+ }
+
+ return nil
+}
+
+// initStagingRepo creates a new bare repository to write the rewritten history into
+// with default branch is set to match the source repo.
+func (s *server) initStagingRepo(ctx context.Context, repo *gitalypb.Repository, defaultBranch git.ReferenceName) (*localrepo.Repo, string, error) {
+ stagingRepoProto, stagingRepoDir, err := tempdir.NewRepository(ctx, repo.GetStorageName(), s.logger, s.locator)
+ if err != nil {
+ return nil, "", err
+ }
+
+ var stderr strings.Builder
+ cmd, err := s.gitCmdFactory.NewWithoutRepo(ctx, git.Command{
+ Name: "init",
+ Flags: []git.Option{
+ git.Flag{Name: "--bare"},
+ git.Flag{Name: "--quiet"},
+ },
+ Args: []string{stagingRepoDir.Path()},
+ }, git.WithStderr(&stderr))
+ if err != nil {
+ return nil, "", fmt.Errorf("spawning git-init: %w", err)
+ }
+
+ if err := cmd.Wait(); err != nil {
+ return nil, "", structerr.New("creating repository: %w", err).WithMetadata("stderr", &stderr)
+ }
+
+ stagingRepo := s.localrepo(stagingRepoProto)
+
+ // Ensure HEAD matches the source repository. In practice a mismatch doesn't cause problems,
+ // but out of an abundance of caution let's keep the two repos as similar as possible.
+ if err := stagingRepo.SetDefaultBranch(ctx, s.txManager, defaultBranch); err != nil {
+ return nil, "", fmt.Errorf("setting default branch: %w", err)
+ }
+
+ return stagingRepo, stagingRepoDir.Path(), nil
+}
+
+func (s *server) runFilterRepo(
+ ctx context.Context,
+ srcRepo, stagingRepo *localrepo.Repo,
+ blobsToRemove []string,
+ redactions [][]byte,
+) error {
+ // Place argument files in a tempdir so that cleanup is handled automatically.
+ tmpDir, err := tempdir.New(ctx, srcRepo.GetStorageName(), s.logger, s.locator)
+ if err != nil {
+ return fmt.Errorf("create tempdir: %w", err)
+ }
+
+ flags := make([]git.Option, 0, 2)
+
+ if len(blobsToRemove) > 0 {
+ blobPath, err := writeArgFile("strip-blobs", tmpDir.Path(), []byte(strings.Join(blobsToRemove, "\n")))
+ if err != nil {
+ return err
+ }
+
+ flags = append(flags, git.Flag{Name: "--strip-blobs-with-ids=" + blobPath})
+ }
+
+ if len(redactions) > 0 {
+ replacePath, err := writeArgFile("replace-text", tmpDir.Path(), bytes.Join(redactions, []byte("\n")))
+ if err != nil {
+ return err
+ }
+
+ flags = append(flags, git.Flag{Name: "--replace-text=" + replacePath})
+ }
+
+ srcPath, err := srcRepo.Path()
+ if err != nil {
+ return fmt.Errorf("getting source repo path: %w", err)
+ }
+
+ stagingPath, err := stagingRepo.Path()
+ if err != nil {
+ return fmt.Errorf("getting target repo path: %w", err)
+ }
+
+ // We must run this using 'NewWithoutRepo' because setting '--git-dir',
+ // as 'repo.ExecAndWait' does, will override the '--target' flag and
+ // write the updates directly to the original repository.
+ var stdout, stderr strings.Builder
+ cmd, err := s.gitCmdFactory.NewWithoutRepo(ctx,
+ git.Command{
+ Name: "filter-repo",
+ Flags: append([]git.Option{
+ // Repository to write filtered history into.
+ git.Flag{Name: "--target=" + stagingPath},
+ // Repository to read from.
+ git.Flag{Name: "--source=" + srcPath},
+ // git.Flag{Name: "--refs=refs/*"},
+ // Prevent automatic cleanup tasks like deleting 'origin' and running git-gc(1).
+ git.Flag{Name: "--partial"},
+ // Bypass check that repository is not a fresh clone.
+ git.Flag{Name: "--force"},
+ // filter-repo will by default create 'replace' refs for refs it rewrites, but Gitaly
+ // disables this feature. This option will update any existing user-created replace refs,
+ // while preventing the creation of new ones.
+ git.Flag{Name: "--replace-refs=update-no-add"},
+ // Pass '--quiet' to child git processes.
+ git.Flag{Name: "--quiet"},
+ }, flags...),
+ },
+ git.WithDisabledHooks(),
+ git.WithStdout(&stdout),
+ git.WithStderr(&stderr),
+ )
+ if err != nil {
+ return fmt.Errorf("spawning git-filter-repo: %w", err)
+ }
+
+ if err := cmd.Wait(); err != nil {
+ var exitErr *exec.ExitError
+ if errors.As(err, &exitErr) {
+ return structerr.New("git-filter-repo failed with exit code %d", exitErr.ExitCode()).WithMetadataItems(
+ structerr.MetadataItem{Key: "stdout", Value: stdout.String()},
+ structerr.MetadataItem{Key: "stderr", Value: stderr.String()},
+ )
+ }
+ return fmt.Errorf("running git-filter-repo: %w", err)
+ }
+
+ return nil
+}
+
+func writeArgFile(name string, dir string, input []byte) (string, error) {
+ f, err := os.CreateTemp(dir, name)
+ if err != nil {
+ return "", fmt.Errorf("creating %q file: %w", name, err)
+ }
+
+ path := f.Name()
+
+ _, err = f.Write(input)
+ if err != nil {
+ return "", fmt.Errorf("writing %q file: %w", name, err)
+ }
+
+ if err := f.Close(); err != nil {
+ return "", fmt.Errorf("closing %q file: %w", name, err)
+ }
+
+ return path, nil
+}
+
+func checksumRepo(ctx context.Context, cmdFactory git.CommandFactory, repo *localrepo.Repo) (string, error) {
+ var stderr strings.Builder
+ cmd, err := cmdFactory.New(ctx, repo, git.Command{
+ Name: "show-ref",
+ Flags: []git.Option{
+ git.Flag{Name: "--head"},
+ },
+ }, git.WithSetupStdout(), git.WithStderr(&stderr))
+ if err != nil {
+ return "", fmt.Errorf("spawning git-show-ref: %w", err)
+ }
+
+ var checksum git.Checksum
+
+ scanner := bufio.NewScanner(cmd)
+ for scanner.Scan() {
+ checksum.AddBytes(scanner.Bytes())
+ }
+
+ if err := scanner.Err(); err != nil {
+ return "", err
+ }
+
+ if err := cmd.Wait(); err != nil {
+ var exitErr *exec.ExitError
+ if errors.As(err, &exitErr) {
+ return "", structerr.New("git-show-ref failed with exit code %d", exitErr.ExitCode()).WithMetadata("stderr", stderr.String())
+ }
+ return "", fmt.Errorf("running git-show-ref: %w", err)
+ }
+
+ return checksum.String(), nil
+}