Welcome to mirror list, hosted at ThFree Co, Russian Federation.

rewrite_history.go « cleanup « service « gitaly « internal - gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 85e45ab55aece62b1960b1d698f1d3404f31bf64 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
package cleanup

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"io"
	"os"
	"os/exec"
	"strings"

	"gitlab.com/gitlab-org/gitaly/v16/internal/git"
	"gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo"
	"gitlab.com/gitlab-org/gitaly/v16/internal/structerr"
	"gitlab.com/gitlab-org/gitaly/v16/internal/tempdir"
	"gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb"
)

// RewriteHistory uses git-filter-repo(1) to remove specified blobs from commit history and
// replace blobs to redact specified text patterns. This does not delete the removed blobs from
// the object database, they must be garbage collected separately.
func (s *server) RewriteHistory(server gitalypb.CleanupService_RewriteHistoryServer) error {
	ctx := server.Context()

	request, err := server.Recv()
	if err != nil {
		return fmt.Errorf("receiving initial request: %w", err)
	}

	repoProto := request.GetRepository()
	if err := s.locator.ValidateRepository(repoProto); err != nil {
		return structerr.NewInvalidArgument("%w", err)
	}

	repo := s.localrepo(repoProto)

	objectHash, err := repo.ObjectHash(ctx)
	if err != nil {
		return fmt.Errorf("detecting object hash: %w", err)
	}

	if objectHash.Format == "sha256" {
		return structerr.NewInvalidArgument("git-filter-repo does not support repositories using the SHA256 object format")
	}

	// Unset repository so that we can validate that repository is not sent on subsequent requests.
	request.Repository = nil

	blobsToRemove := make([]string, 0, len(request.GetBlobs()))
	redactions := make([][]byte, 0, len(request.GetRedactions()))

	for {
		if request.GetRepository() != nil {
			return structerr.NewInvalidArgument("subsequent requests must not contain repository")
		}

		if len(request.GetBlobs()) == 0 && len(request.GetRedactions()) == 0 {
			return structerr.NewInvalidArgument("no object IDs or text replacements specified")
		}

		for _, oid := range request.GetBlobs() {
			if err := objectHash.ValidateHex(oid); err != nil {
				return structerr.NewInvalidArgument("validating object ID: %w", err).WithMetadata("oid", oid)
			}
			blobsToRemove = append(blobsToRemove, oid)
		}

		for _, pattern := range request.GetRedactions() {
			if strings.Contains(string(pattern), "\n") {
				// We deliberately do not log the invalid pattern as this is
				// likely to contain sensitive information.
				return structerr.NewInvalidArgument("redaction pattern contains newline")
			}
			redactions = append(redactions, pattern)
		}

		request, err = server.Recv()
		if err != nil {
			if errors.Is(err, io.EOF) {
				break
			}

			return fmt.Errorf("receiving next request: %w", err)
		}
	}

	if err := s.rewriteHistory(ctx, repo, repoProto, blobsToRemove, redactions); err != nil {
		return err
	}

	if err := server.SendAndClose(&gitalypb.RewriteHistoryResponse{}); err != nil {
		return fmt.Errorf("sending RewriteHistoryResponse: %w", err)
	}

	return nil
}

func (s *server) rewriteHistory(
	ctx context.Context,
	repo *localrepo.Repo,
	repoProto *gitalypb.Repository,
	blobsToRemove []string,
	redactions [][]byte,
) error {
	defaultBranch, err := repo.HeadReference(ctx)
	if err != nil {
		return fmt.Errorf("finding HEAD reference: %w", err)
	}

	stagingRepo, stagingRepoPath, err := s.initStagingRepo(ctx, repoProto, defaultBranch)
	if err != nil {
		return fmt.Errorf("setting up staging repo: %w", err)
	}

	if err := s.runFilterRepo(ctx, repo, stagingRepo, blobsToRemove, redactions); err != nil {
		return fmt.Errorf("rewriting repository history: %w", err)
	}

	var stderr strings.Builder
	if err := repo.ExecAndWait(ctx,
		git.Command{
			Name: "fetch",
			Flags: []git.Option{
				// Delete any refs that were removed by filter-repo.
				git.Flag{Name: "--prune"},
				// The mirror refspec includes tags, don't fetch them again.
				git.Flag{Name: "--no-tags"},
				// New history will be disjoint from the original repo.
				git.Flag{Name: "--force"},
				// Ensure we don't partially apply the rewritten history.
				// We don't expect file / directory conflicts as all refs
				// in the staging repo are from the original.
				git.Flag{Name: "--atomic"},
				// We're going to have a lot of these, don't waste
				// time displaying them.
				git.Flag{Name: "--no-show-forced-updates"},
				// No need for FETCH_HEAD when mirroring.
				git.Flag{Name: "--no-write-fetch-head"},
				git.Flag{Name: "--quiet"},
			},
			Args: append(
				[]string{"file://" + stagingRepoPath},
				git.MirrorRefSpec,
			),
		},
		git.WithRefTxHook(repo),
		git.WithStderr(&stderr),
		git.WithConfig(git.ConfigPair{
			Key: "advice.fetchShowForcedUpdates", Value: "false",
		}),
	); err != nil {
		return structerr.New("fetching rewritten history: %w", err).WithMetadata("stderr", &stderr)
	}

	return nil
}

// initStagingRepo creates a new bare repository to write the rewritten history into
// with default branch is set to match the source repo.
func (s *server) initStagingRepo(ctx context.Context, repo *gitalypb.Repository, defaultBranch git.ReferenceName) (*localrepo.Repo, string, error) {
	stagingRepoProto, stagingRepoDir, err := tempdir.NewRepository(ctx, repo.GetStorageName(), s.logger, s.locator)
	if err != nil {
		return nil, "", err
	}

	var stderr strings.Builder
	cmd, err := s.gitCmdFactory.NewWithoutRepo(ctx, git.Command{
		Name: "init",
		Flags: []git.Option{
			git.Flag{Name: "--bare"},
			git.Flag{Name: "--quiet"},
		},
		Args: []string{stagingRepoDir.Path()},
	}, git.WithStderr(&stderr))
	if err != nil {
		return nil, "", fmt.Errorf("spawning git-init: %w", err)
	}

	if err := cmd.Wait(); err != nil {
		return nil, "", structerr.New("creating repository: %w", err).WithMetadata("stderr", &stderr)
	}

	stagingRepo := s.localrepo(stagingRepoProto)

	// Ensure HEAD matches the source repository. In practice a mismatch doesn't cause problems,
	// but out of an abundance of caution let's keep the two repos as similar as possible.
	if err := stagingRepo.SetDefaultBranch(ctx, s.txManager, defaultBranch); err != nil {
		return nil, "", fmt.Errorf("setting default branch: %w", err)
	}

	return stagingRepo, stagingRepoDir.Path(), nil
}

func (s *server) runFilterRepo(
	ctx context.Context,
	srcRepo, stagingRepo *localrepo.Repo,
	blobsToRemove []string,
	redactions [][]byte,
) error {
	// Place argument files in a tempdir so that cleanup is handled automatically.
	tmpDir, err := tempdir.New(ctx, srcRepo.GetStorageName(), s.logger, s.locator)
	if err != nil {
		return fmt.Errorf("create tempdir: %w", err)
	}

	flags := make([]git.Option, 0, 2)

	if len(blobsToRemove) > 0 {
		blobPath, err := writeArgFile("strip-blobs", tmpDir.Path(), []byte(strings.Join(blobsToRemove, "\n")))
		if err != nil {
			return err
		}

		flags = append(flags, git.Flag{Name: "--strip-blobs-with-ids=" + blobPath})
	}

	if len(redactions) > 0 {
		replacePath, err := writeArgFile("replace-text", tmpDir.Path(), bytes.Join(redactions, []byte("\n")))
		if err != nil {
			return err
		}

		flags = append(flags, git.Flag{Name: "--replace-text=" + replacePath})
	}

	srcPath, err := srcRepo.Path()
	if err != nil {
		return fmt.Errorf("getting source repo path: %w", err)
	}

	stagingPath, err := stagingRepo.Path()
	if err != nil {
		return fmt.Errorf("getting target repo path: %w", err)
	}

	// We must run this using 'NewWithoutRepo' because setting '--git-dir',
	// as 'repo.ExecAndWait' does, will override the '--target' flag and
	// write the updates directly to the original repository.
	var stdout, stderr strings.Builder
	cmd, err := s.gitCmdFactory.NewWithoutRepo(ctx,
		git.Command{
			Name: "filter-repo",
			Flags: append([]git.Option{
				// Repository to write filtered history into.
				git.Flag{Name: "--target=" + stagingPath},
				// Repository to read from.
				git.Flag{Name: "--source=" + srcPath},
				// git.Flag{Name: "--refs=refs/*"},
				// Prevent automatic cleanup tasks like deleting 'origin' and running git-gc(1).
				git.Flag{Name: "--partial"},
				// Bypass check that repository is not a fresh clone.
				git.Flag{Name: "--force"},
				// filter-repo will by default create 'replace' refs for refs it rewrites, but Gitaly
				// disables this feature. This option will update any existing user-created replace refs,
				// while preventing the creation of new ones.
				git.Flag{Name: "--replace-refs=update-no-add"},
				// Pass '--quiet' to child git processes.
				git.Flag{Name: "--quiet"},
			}, flags...),
		},
		git.WithDisabledHooks(),
		git.WithStdout(&stdout),
		git.WithStderr(&stderr),
	)
	if err != nil {
		return fmt.Errorf("spawning git-filter-repo: %w", err)
	}

	if err := cmd.Wait(); err != nil {
		var exitErr *exec.ExitError
		if errors.As(err, &exitErr) {
			return structerr.New("git-filter-repo failed with exit code %d", exitErr.ExitCode()).WithMetadataItems(
				structerr.MetadataItem{Key: "stdout", Value: stdout.String()},
				structerr.MetadataItem{Key: "stderr", Value: stderr.String()},
			)
		}
		return fmt.Errorf("running git-filter-repo: %w", err)
	}

	return nil
}

func writeArgFile(name string, dir string, input []byte) (string, error) {
	f, err := os.CreateTemp(dir, name)
	if err != nil {
		return "", fmt.Errorf("creating %q file: %w", name, err)
	}

	path := f.Name()

	_, err = f.Write(input)
	if err != nil {
		return "", fmt.Errorf("writing %q file: %w", name, err)
	}

	if err := f.Close(); err != nil {
		return "", fmt.Errorf("closing %q file: %w", name, err)
	}

	return path, nil
}