Welcome to mirror list, hosted at ThFree Co, Russian Federation.

objects.go « housekeeping « git « internal - gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: d83a13ef58d8fd32f6ce6ce2c142fe5e8838d9f8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
package housekeeping

import (
	"context"
	"errors"
	"fmt"
	"os/exec"
	"path/filepath"
	"strconv"
	"strings"
	"time"

	"gitlab.com/gitlab-org/gitaly/v16/internal/git"
	"gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo"
	"gitlab.com/gitlab-org/gitaly/v16/internal/git/stats"
	"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage"
	"gitlab.com/gitlab-org/gitaly/v16/internal/structerr"
)

const (
	// looseObjectLimit is the limit of loose objects we accept both when doing incremental
	// repacks and when pruning objects.
	looseObjectLimit = 1024
)

// RepackObjectsStrategy defines how objects shall be repacked.
type RepackObjectsStrategy string

const (
	// RepackObjectsStrategyIncrementalWithUnreachable performs an incremental repack by writing
	// all loose objects into a new packfile, regardless of their reachability. The loose
	// objects will be deleted.
	RepackObjectsStrategyIncrementalWithUnreachable = RepackObjectsStrategy("incremental_with_unreachable")
	// RepackObjectsStrategyFullWithCruft performs a full repack by writing all reachable
	// objects into a new packfile. Unreachable objects will be written into a separate cruft
	// packfile.
	RepackObjectsStrategyFullWithCruft = RepackObjectsStrategy("full_with_cruft")
	// RepackObjectsStrategyFullWithUnreachable performs a full repack by writing all reachable
	// objects into a new packfile. Packed unreachable objects will be appended to the packfile
	// and redundant loose object files will be deleted.
	//
	// Note that this will not include unreachable loose objects, but only packed loose objects.
	// git-repack(1) does not currently expose an option to make it include all objects.
	// Combined with geometric repacks though this is acceptable as the geometric strategy will
	// include all loose objects.
	RepackObjectsStrategyFullWithUnreachable = RepackObjectsStrategy("full_with_unreachable")
	// RepackObjectsStrategyGeometric performs an geometric repack. This strategy will repack
	// packfiles so that the resulting pack structure forms a geometric sequence in the number
	// of objects. Loose objects will get soaked up as part of the repack regardless of their
	// reachability.
	RepackObjectsStrategyGeometric = RepackObjectsStrategy("geometric")
)

// RepackObjectsConfig is configuration for RepackObjects.
type RepackObjectsConfig struct {
	// Strategy determines the strategy with which to repack objects.
	Strategy RepackObjectsStrategy
	// WriteBitmap determines whether reachability bitmaps should be written or not. There is no
	// reason to set this to `false`, except for legacy compatibility reasons with existing RPC
	// behaviour
	WriteBitmap bool
	// WriteMultiPackIndex determines whether a multi-pack index should be written or not.
	WriteMultiPackIndex bool
	// CruftExpireBefore determines the cutoff date before which unreachable cruft objects shall
	// be expired and thus deleted.
	CruftExpireBefore time.Time
}

// RepackObjects repacks objects in the given repository and updates the commit-graph. The way
// objects are repacked is determined via the RepackObjectsConfig.
func RepackObjects(ctx context.Context, repo *localrepo.Repo, cfg RepackObjectsConfig) error {
	repoPath, err := repo.Path()
	if err != nil {
		return err
	}

	var isFullRepack bool
	switch cfg.Strategy {
	case RepackObjectsStrategyIncrementalWithUnreachable, RepackObjectsStrategyGeometric:
		isFullRepack = false
	case RepackObjectsStrategyFullWithCruft, RepackObjectsStrategyFullWithUnreachable:
		isFullRepack = true
	default:
		return structerr.NewInvalidArgument("invalid strategy: %q", cfg.Strategy)
	}

	if !isFullRepack && !cfg.WriteMultiPackIndex && cfg.WriteBitmap {
		return structerr.NewInvalidArgument("cannot write packfile bitmap for an incremental repack")
	}
	if cfg.Strategy != RepackObjectsStrategyFullWithCruft && !cfg.CruftExpireBefore.IsZero() {
		return structerr.NewInvalidArgument("cannot expire cruft objects when not writing cruft packs")
	}

	if isFullRepack {
		// When we have performed a full repack we're updating the "full-repack-timestamp"
		// file. This is done so that we can tell when we have last performed a full repack
		// in a repository. This information can be used by our heuristics to effectively
		// rate-limit the frequency of full repacks.
		//
		// Note that we write the file _before_ actually writing the new pack, which means
		// that even if the full repack fails, we would still pretend to have done it. This
		// is done intentionally, as the likelihood for huge repositories to fail during a
		// full repack is comparatively high. So if we didn't update the timestamp in case
		// of a failure we'd potentially busy-spin trying to do a full repack.
		if err := stats.UpdateFullRepackTimestamp(repoPath, time.Now()); err != nil {
			return fmt.Errorf("updating full-repack timestamp: %w", err)
		}
	}

	switch cfg.Strategy {
	case RepackObjectsStrategyIncrementalWithUnreachable:
		if cfg.WriteBitmap {
			return structerr.NewInvalidArgument("cannot write packfile bitmap for an incremental repack")
		}
		if cfg.WriteMultiPackIndex {
			return structerr.NewInvalidArgument("cannot write multi-pack index for an incremental repack")
		}

		var stderr strings.Builder

		// Pack all loose objects into a new packfile, regardless of their reachability.
		// There is no git-repack(1) mode that would allow us to do this, so we have to
		// instead do it ourselves.
		if err := repo.ExecAndWait(ctx,
			git.Command{
				Name: "pack-objects",
				Flags: []git.Option{
					// We ask git-pack-objects(1) to pack loose unreachable
					// objects. This implies `--revs`, but as we don't supply
					// any revisions via stdin all objects will be considered
					// unreachable. The effect is that we simply pack all loose
					// objects into a new packfile, regardless of whether they
					// are reachable or not.
					git.Flag{Name: "--pack-loose-unreachable"},
					// Skip any objects which are part of an alternative object
					// directory.
					git.Flag{Name: "--local"},
					// Only pack objects which are not yet part of a different,
					// local pack.
					git.Flag{Name: "--incremental"},
					// Only create the packfile if it would contain at least one
					// object.
					git.Flag{Name: "--non-empty"},
					// We don't care about any kind of progress meter.
					git.Flag{Name: "--quiet"},
				},
				Args: []string{
					// We need to tell git-pack-objects(1) where to write the
					// new packfile and what prefix it should have. We of course
					// want to write it into the main object directory and have
					// the same "pack-" prefix like normal packfiles would.
					filepath.Join(repoPath, "objects", "pack", "pack"),
				},
			},
			// Note: we explicitly do not pass `GetRepackGitConfig()` here as none of
			// its opitons apply to this kind of repack: we have no delta islands given
			// that we do not walk the revision graph, and we won't ever write bitmaps.
			git.WithStderr(&stderr),
		); err != nil {
			var exitErr *exec.ExitError
			if errors.As(err, &exitErr) {
				return structerr.New("pack-objects failed with error code %d", exitErr.ExitCode()).WithMetadata("stderr", stderr.String())
			}

			return fmt.Errorf("pack-objects failed: %w", err)
		}

		stderr.Reset()

		// The `-d` switch of git-repack(1) handles deletion of objects that have just been
		// packed into a new packfile. As we pack objects ourselves, we have to manually
		// ensure that packed loose objects are deleted.
		if err := repo.ExecAndWait(ctx,
			git.Command{
				Name: "prune-packed",
				Flags: []git.Option{
					// We don't care about any kind of progress meter.
					git.Flag{Name: "--quiet"},
				},
			},
			git.WithStderr(&stderr),
		); err != nil {
			var exitErr *exec.ExitError
			if errors.As(err, &exitErr) {
				return structerr.New("prune-packed failed with error code %d", exitErr.ExitCode()).WithMetadata("stderr", stderr.String())
			}

			return fmt.Errorf("prune-packed failed: %w", err)
		}

		return nil
	case RepackObjectsStrategyFullWithCruft:
		options := []git.Option{
			git.Flag{Name: "--cruft"},
			git.Flag{Name: "--pack-kept-objects"},
			git.Flag{Name: "-l"},
			git.Flag{Name: "-d"},
		}

		if !cfg.CruftExpireBefore.IsZero() {
			options = append(options, git.ValueFlag{
				Name:  "--cruft-expiration",
				Value: git.FormatTime(cfg.CruftExpireBefore),
			})
		}

		return performRepack(ctx, repo, cfg, options...)
	case RepackObjectsStrategyFullWithUnreachable:
		return performRepack(ctx, repo, cfg,
			// Do a full repack.
			git.Flag{Name: "-a"},
			// Don't include objects part of alternate.
			git.Flag{Name: "-l"},
			// Delete loose objects made redundant by this repack.
			git.Flag{Name: "-d"},
			// Keep unreachable objects part of the old packs in the new pack.
			git.Flag{Name: "--keep-unreachable"},
		)
	case RepackObjectsStrategyGeometric:
		return performRepack(ctx, repo, cfg,
			// We use a geometric factor `r`, which means that every successively larger
			// packfile must have at least `r` times the number of objects.
			//
			// This factor ultimately determines how many packfiles there can be at a
			// maximum in a repository for a given number of objects. The maximum number
			// of objects with `n` packfiles and a factor `r` is `(1 - r^n) / (1 - r)`.
			// E.g. with a factor of 4 and 10 packfiles, we can have at most 349,525
			// objects, with 16 packfiles we can have 1,431,655,765 objects. Contrary to
			// that, having a factor of 2 will translate to 1023 objects at 10 packfiles
			// and 65535 objects at 16 packfiles at a maximum.
			//
			// So what we're effectively choosing here is how often we need to repack
			// larger parts of the repository. The higher the factor the more we'll have
			// to repack as the packfiles will be larger. On the other hand, having a
			// smaller factor means we'll have to repack less objects as the slices we
			// need to repack will have less objects.
			//
			// The end result is a hybrid approach between incremental repacks and full
			// repacks: we won't typically repack the full repository, but only a subset
			// of packfiles.
			//
			// For now, we choose a geometric factor of two. Large repositories nowadays
			// typically have a few million objects, which would boil down to having at
			// most 32 packfiles in the repository. This number is not scientifically
			// chosen though any may be changed at a later point in time.
			git.ValueFlag{Name: "--geometric", Value: "2"},
			// Make sure to delete loose objects and packfiles that are made obsolete
			// by the new packfile.
			git.Flag{Name: "-d"},
			// Don't include objects part of an alternate.
			git.Flag{Name: "-l"},
		)
	default:
		return structerr.NewInvalidArgument("invalid strategy: %q", cfg.Strategy)
	}
}

func performRepack(ctx context.Context, repo *localrepo.Repo, cfg RepackObjectsConfig, opts ...git.Option) error {
	if cfg.WriteMultiPackIndex {
		opts = append(opts, git.Flag{Name: "--write-midx"})
	}

	var stderr strings.Builder
	if err := repo.ExecAndWait(ctx,
		git.Command{
			Name:  "repack",
			Flags: opts,
		},
		git.WithConfig(GetRepackGitConfig(ctx, repo, cfg.WriteBitmap)...),
		git.WithStderr(&stderr),
	); err != nil {
		var exitErr *exec.ExitError
		if errors.As(err, &exitErr) {
			// We do not embed the `exec.ExitError` directly as it wouldn't typically
			// contain any useful information anyway except for its error code. So we
			// instead only expose what matters and attach stderr to the error metadata.
			return structerr.New("repack failed with error code %d", exitErr.ExitCode()).WithMetadata("stderr", stderr.String())
		}

		return fmt.Errorf("repack failed: %w", err)
	}

	return nil
}

// GetRepackGitConfig returns configuration suitable for Git commands which write new packfiles.
func GetRepackGitConfig(ctx context.Context, repo storage.Repository, bitmap bool) []git.ConfigPair {
	config := []git.ConfigPair{
		{Key: "repack.useDeltaIslands", Value: "true"},
		{Key: "repack.writeBitmaps", Value: strconv.FormatBool(bitmap)},
		{Key: "pack.writeBitmapLookupTable", Value: "true"},
	}

	if storage.IsPoolRepository(repo) {
		config = append(config,
			git.ConfigPair{Key: "pack.island", Value: git.ObjectPoolRefNamespace + "/he(a)ds"},
			git.ConfigPair{Key: "pack.island", Value: git.ObjectPoolRefNamespace + "/t(a)gs"},
			git.ConfigPair{Key: "pack.islandCore", Value: "a"},
		)
	} else {
		config = append(config,
			git.ConfigPair{Key: "pack.island", Value: "r(e)fs/heads"},
			git.ConfigPair{Key: "pack.island", Value: "r(e)fs/tags"},
			git.ConfigPair{Key: "pack.islandCore", Value: "e"},
		)
	}

	return config
}

// PruneObjectsConfig determines which objects should be pruned in PruneObjects.
type PruneObjectsConfig struct {
	// ExpireBefore controls the grace period after which unreachable objects shall be pruned.
	// An unreachable object must be older than the given date in order to be considered for
	// deletion.
	ExpireBefore time.Time
}

// PruneObjects prunes loose objects from the repository that are already packed or which are
// unreachable and older than the configured expiry date.
func PruneObjects(ctx context.Context, repo *localrepo.Repo, cfg PruneObjectsConfig) error {
	if err := repo.ExecAndWait(ctx, git.Command{
		Name: "prune",
		Flags: []git.Option{
			// By default, this prunes all unreachable objects regardless of when they
			// have last been accessed. This opens us up for races when there are
			// concurrent commands which are just at the point of writing objects into
			// the repository, but which haven't yet updated any references to make them
			// reachable.
			//
			// To avoid this race, we use a grace window that can be specified by the
			// caller so that we only delete objects that are older than this grace
			// window.
			git.ValueFlag{Name: "--expire", Value: git.FormatTime(cfg.ExpireBefore)},
		},
	}); err != nil {
		return fmt.Errorf("executing prune: %w", err)
	}

	return nil
}