Welcome to mirror list, hosted at ThFree Co, Russian Federation.

rewrite_history.go « cleanup « service « gitaly « internal - gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 7bbdd258e208246382aff5d2bd140f0a81773d4e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
package cleanup

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"io"
	"os"
	"os/exec"
	"strings"

	"gitlab.com/gitlab-org/gitaly/v16/internal/git"
	"gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo"
	"gitlab.com/gitlab-org/gitaly/v16/internal/structerr"
	"gitlab.com/gitlab-org/gitaly/v16/internal/tempdir"
	"gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb"
)

// RewriteHistory uses git-filter-repo(1) to remove specified blobs from commit history and
// replace blobs to redact specified text patterns. This does not delete the removed blobs from
// the object database, they must be garbage collected separately.
func (s *server) RewriteHistory(server gitalypb.CleanupService_RewriteHistoryServer) error {
	ctx := server.Context()

	request, err := server.Recv()
	if err != nil {
		return fmt.Errorf("receiving initial request: %w", err)
	}

	repoProto := request.GetRepository()
	if err := s.locator.ValidateRepository(repoProto); err != nil {
		return structerr.NewInvalidArgument("%w", err)
	}

	repo := s.localrepo(repoProto)

	objectHash, err := repo.ObjectHash(ctx)
	if err != nil {
		return fmt.Errorf("detecting object hash: %w", err)
	}

	if objectHash.Format == "sha256" {
		return structerr.NewInvalidArgument("git-filter-repo does not support repositories using the SHA256 object format")
	}

	// Unset repository so that we can validate that repository is not sent on subsequent requests.
	request.Repository = nil

	blobsToRemove := make([]string, 0, len(request.GetBlobs()))
	redactions := make([][]byte, 0, len(request.GetRedactions()))

	for {
		if request.GetRepository() != nil {
			return structerr.NewInvalidArgument("subsequent requests must not contain repository")
		}

		if len(request.GetBlobs()) == 0 && len(request.GetRedactions()) == 0 {
			return structerr.NewInvalidArgument("no object IDs or text replacements specified")
		}

		for _, oid := range request.GetBlobs() {
			if err := objectHash.ValidateHex(oid); err != nil {
				return structerr.NewInvalidArgument("validating object ID: %w", err).WithMetadata("oid", oid)
			}
			blobsToRemove = append(blobsToRemove, oid)
		}

		for _, pattern := range request.GetRedactions() {
			if strings.Contains(string(pattern), "\n") {
				// We deliberately do not log the invalid pattern as this is
				// likely to contain sensitive information.
				return structerr.NewInvalidArgument("redaction pattern contains newline")
			}
			redactions = append(redactions, pattern)
		}

		request, err = server.Recv()
		if err != nil {
			if errors.Is(err, io.EOF) {
				break
			}

			return fmt.Errorf("receiving next request: %w", err)
		}
	}

	if err := s.runFilterRepo(ctx, repo, repoProto, blobsToRemove, redactions); err != nil {
		return fmt.Errorf("rewriting repository history: %w", err)
	}

	if err := server.SendAndClose(&gitalypb.RewriteHistoryResponse{}); err != nil {
		return fmt.Errorf("sending RewriteHistoryResponse: %w", err)
	}

	return nil
}

func (s *server) runFilterRepo(
	ctx context.Context,
	repo *localrepo.Repo,
	repoProto *gitalypb.Repository,
	blobsToRemove []string,
	redactions [][]byte,
) error {
	// Place argument files in a tempdir so that cleanup is handled automatically.
	tmpDir, err := tempdir.New(ctx, repo.GetStorageName(), s.logger, s.locator)
	if err != nil {
		return fmt.Errorf("create tempdir: %w", err)
	}

	flags := make([]git.Option, 0, 2)

	if len(blobsToRemove) > 0 {
		blobPath, err := writeArgFile("strip-blobs", tmpDir.Path(), []byte(strings.Join(blobsToRemove, "\n")))
		if err != nil {
			return err
		}

		flags = append(flags, git.Flag{Name: "--strip-blobs-with-ids=" + blobPath})
	}

	if len(redactions) > 0 {
		replacePath, err := writeArgFile("replace-text", tmpDir.Path(), bytes.Join(redactions, []byte("\n")))
		if err != nil {
			return err
		}

		flags = append(flags, git.Flag{Name: "--replace-text=" + replacePath})
	}

	var stdout, stderr strings.Builder
	if err := repo.ExecAndWait(ctx,
		git.Command{
			Name: "filter-repo",
			Flags: append([]git.Option{
				// Prevent automatic cleanup tasks like deleting 'origin' and running git-gc(1).
				git.Flag{Name: "--partial"},
				// Bypass check that repository is not a fresh clone.
				git.Flag{Name: "--force"},
				// filter-repo will by default create 'replace' refs for refs it rewrites, but Gitaly
				// disables this feature. This option will update any existing user-created replace refs,
				// while preventing the creation of new ones.
				git.Flag{Name: "--replace-refs=update-no-add"},
				// Pass '--quiet' to child git processes.
				git.Flag{Name: "--quiet"},
			}, flags...),
		},
		git.WithRefTxHook(repo),
		git.WithStdout(&stdout),
		git.WithStderr(&stderr),
	); err != nil {
		var exitErr *exec.ExitError
		if errors.As(err, &exitErr) {
			return structerr.New("git-filter-repo failed with exit code %d", exitErr.ExitCode()).WithMetadataItems(
				structerr.MetadataItem{Key: "stdout", Value: stdout.String()},
				structerr.MetadataItem{Key: "stderr", Value: stderr.String()},
			)
		}
		return fmt.Errorf("running git-filter-repo: %w", err)
	}

	return nil
}

func writeArgFile(name string, dir string, input []byte) (string, error) {
	f, err := os.CreateTemp(dir, name)
	if err != nil {
		return "", fmt.Errorf("creating %q file: %w", name, err)
	}

	path := f.Name()

	_, err = f.Write(input)
	if err != nil {
		return "", fmt.Errorf("writing %q file: %w", name, err)
	}

	if err := f.Close(); err != nil {
		return "", fmt.Errorf("closing %q file: %w", name, err)
	}

	return path, nil
}