Welcome to mirror list, hosted at ThFree Co, Russian Federation.

revision.go « gitpipe « git « internal - gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 6cd576f5083cb36c8be2b34637b6d6afdccf47fe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
package gitpipe

import (
	"bufio"
	"bytes"
	"context"
	"fmt"
	"strings"
	"time"

	"gitlab.com/gitlab-org/gitaly/v14/internal/git"
	"gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo"
)

// RevisionResult is a result for the revlist pipeline step.
type RevisionResult struct {
	// err is an error which occurred during execution of the pipeline.
	err error

	// OID is the object ID of an object printed by git-rev-list(1).
	OID git.ObjectID
	// ObjectName is the name of the object. This is typically the path of the object if it was
	// traversed via either a tree or a commit. The path depends on the order in which objects
	// are traversed: if e.g. two different trees refer to the same blob with different names,
	// the blob's path depends on which of the trees was traversed first.
	ObjectName []byte
}

// ObjectType is a Git object type used for filtering objects.
type ObjectType string

const (
	// ObjectTypeCommit is the type of a Git commit.
	ObjectTypeCommit = ObjectType("commit")
	// ObjectTypeBlob is the type of a Git blob.
	ObjectTypeBlob = ObjectType("blob")
	// ObjectTypeTree is the type of a Git tree.
	ObjectTypeTree = ObjectType("tree")
	// ObjectTypeTag is the type of a Git tag.
	ObjectTypeTag = ObjectType("tag")
)

// revlistConfig is configuration for the revlist pipeline step.
type revlistConfig struct {
	blobLimit     int
	objects       bool
	objectType    ObjectType
	order         Order
	reverse       bool
	maxParents    uint
	disabledWalk  bool
	firstParent   bool
	before, after time.Time
	author        []byte
}

// RevlistOption is an option for the revlist pipeline step.
type RevlistOption func(cfg *revlistConfig)

// WithObjects will cause git-rev-list(1) to not only list commits, but also objects referenced by
// those commits.
func WithObjects() RevlistOption {
	return func(cfg *revlistConfig) {
		cfg.objects = true
	}
}

// WithBlobLimit sets up a size limit for blobs. Only blobs whose size is smaller than this limit
// will be returned by the pipeline step.
func WithBlobLimit(limit int) RevlistOption {
	return func(cfg *revlistConfig) {
		cfg.blobLimit = limit
	}
}

// WithObjectTypeFilter will set up a `--filter=object:type=` filter for git-rev-list(1). This will
// cause it to filter out any objects which do not match the given type. Because git-rev-list(1) by
// default never filters provided arguments, this option also sets up the `--filter-provided` flag.
// Note that this option is only supported starting with Git v2.32.0 or later.
func WithObjectTypeFilter(t ObjectType) RevlistOption {
	return func(cfg *revlistConfig) {
		cfg.objectType = t
	}
}

// Order is the order in which objects are printed.
type Order int

const (
	// OrderNone is the default ordering, which is reverse chronological order.
	OrderNone = Order(iota)
	// OrderTopo will cause no parents to be shown before all of its children are shown.
	// Furthermore, multiple lines of history will not be intermixed.
	OrderTopo
	// OrderDate order will cause no parents to be shown before all of its children are shown.
	// Otherwise, commits are shown in commit timestamp order. This can cause history to be
	// shown intermixed.
	OrderDate
)

// WithOrder will change the ordering of how objects are listed.
func WithOrder(o Order) RevlistOption {
	return func(cfg *revlistConfig) {
		cfg.order = o
	}
}

// WithReverse will reverse the ordering of commits.
func WithReverse() RevlistOption {
	return func(cfg *revlistConfig) {
		cfg.reverse = true
	}
}

// WithMaxParents will cause git-rev-list(1) to list only commits with at most p parents. If set to
// 1, then merge commits will be skipped. While the zero-value for git-rev-list(1) would cause it to
// only print the root commit, we use it as the default value and simply print all commits in that
// case.
func WithMaxParents(p uint) RevlistOption {
	return func(cfg *revlistConfig) {
		cfg.maxParents = p
	}
}

// WithDisabledWalk will cause git-rev-list(1) to not do a graph walk beyond the immediate specified
// tips.
func WithDisabledWalk() RevlistOption {
	return func(cfg *revlistConfig) {
		cfg.disabledWalk = true
	}
}

// WithFirstParent will cause git-rev-list(1) to only walk down the first-parent chain of commits.
func WithFirstParent() RevlistOption {
	return func(cfg *revlistConfig) {
		cfg.firstParent = true
	}
}

// WithBefore will cause git-rev-list(1) to only show commits older than the specified time.
func WithBefore(t time.Time) RevlistOption {
	return func(cfg *revlistConfig) {
		cfg.before = t
	}
}

// WithAfter will cause git-rev-list(1) to only show commits newer than the specified time.
func WithAfter(t time.Time) RevlistOption {
	return func(cfg *revlistConfig) {
		cfg.after = t
	}
}

// WithAuthor will cause git-rev-list(1) to only show commits created by an author matching the
// given pattern.
func WithAuthor(author []byte) RevlistOption {
	return func(cfg *revlistConfig) {
		cfg.author = author
	}
}

// Revlist runs git-rev-list(1) with objects and object names enabled. The returned channel will
// contain all object IDs listed by this command. Cancelling the context will cause the pipeline to
// be cancelled, too.
func Revlist(
	ctx context.Context,
	repo *localrepo.Repo,
	revisions []string,
	options ...RevlistOption,
) RevisionIterator {
	var cfg revlistConfig
	for _, option := range options {
		option(&cfg)
	}

	resultChan := make(chan RevisionResult)
	go func() {
		defer close(resultChan)

		flags := []git.Option{}

		if cfg.objects {
			flags = append(flags,
				git.Flag{Name: "--in-commit-order"},
				git.Flag{Name: "--objects"},
				git.Flag{Name: "--object-names"},
			)
		}

		if cfg.blobLimit > 0 {
			flags = append(flags, git.Flag{
				Name: fmt.Sprintf("--filter=blob:limit=%d", cfg.blobLimit),
			})
		}

		if cfg.objectType != "" {
			flags = append(flags,
				git.Flag{Name: fmt.Sprintf("--filter=object:type=%s", cfg.objectType)},
				git.Flag{Name: "--filter-provided-objects"},
			)
		}

		switch cfg.order {
		case OrderNone:
			// Default order, nothing to do.
		case OrderTopo:
			flags = append(flags, git.Flag{Name: "--topo-order"})
		case OrderDate:
			flags = append(flags, git.Flag{Name: "--date-order"})
		}

		if cfg.reverse {
			flags = append(flags, git.Flag{Name: "--reverse"})
		}

		if cfg.maxParents > 0 {
			flags = append(flags, git.Flag{
				Name: fmt.Sprintf("--max-parents=%d", cfg.maxParents)},
			)
		}

		if cfg.disabledWalk {
			flags = append(flags, git.Flag{Name: "--no-walk"})
		}

		if cfg.firstParent {
			flags = append(flags, git.Flag{Name: "--first-parent"})
		}

		if !cfg.before.IsZero() {
			flags = append(flags, git.Flag{
				Name: fmt.Sprintf("--before=%s", cfg.before.String()),
			})
		}

		if !cfg.after.IsZero() {
			flags = append(flags, git.Flag{
				Name: fmt.Sprintf("--after=%s", cfg.after.String()),
			})
		}

		if len(cfg.author) > 0 {
			flags = append(flags, git.Flag{
				Name: fmt.Sprintf("--author=%s", string(cfg.author)),
			})
		}

		revlist, err := repo.Exec(ctx, git.SubCmd{
			Name:  "rev-list",
			Flags: flags,
			Args:  revisions,
		})
		if err != nil {
			sendRevisionResult(ctx, resultChan, RevisionResult{err: err})
			return
		}

		scanner := bufio.NewScanner(revlist)
		for scanner.Scan() {
			// We need to copy the line here because we'll hand it over to the caller
			// asynchronously, and the next call to `Scan()` will overwrite the buffer.
			line := make([]byte, len(scanner.Bytes()))
			copy(line, scanner.Bytes())

			oidAndName := bytes.SplitN(line, []byte{' '}, 2)

			result := RevisionResult{
				OID: git.ObjectID(oidAndName[0]),
			}
			if len(oidAndName) == 2 && len(oidAndName[1]) > 0 {
				result.ObjectName = oidAndName[1]
			}

			if isDone := sendRevisionResult(ctx, resultChan, result); isDone {
				return
			}
		}

		if err := scanner.Err(); err != nil {
			sendRevisionResult(ctx, resultChan, RevisionResult{
				err: fmt.Errorf("scanning rev-list output: %w", err),
			})
			return
		}

		if err := revlist.Wait(); err != nil {
			sendRevisionResult(ctx, resultChan, RevisionResult{
				err: fmt.Errorf("rev-list pipeline command: %w", err),
			})
			return
		}
	}()

	return &revisionIterator{
		ch: resultChan,
	}
}

// ForEachRef runs git-for-each-ref(1) with the given patterns and returns a RevisionIterator for
// found references. Patterns must always refer to fully qualified reference names. Patterns for
// which no branch is found do not result in an error. The iterator's object name is set to the
// reference, while its object ID is the target object the reference points to. Cancelling the
// context will cause the pipeline to be cancelled, too.
func ForEachRef(
	ctx context.Context,
	repo *localrepo.Repo,
	patterns []string,
) RevisionIterator {
	resultChan := make(chan RevisionResult)

	go func() {
		defer close(resultChan)

		forEachRef, err := repo.Exec(ctx, git.SubCmd{
			Name: "for-each-ref",
			Flags: []git.Option{
				// The default format also includes the object type, which requires
				// us to read the referenced commit's object. It would thus be about
				// 2-3x slower to use the default format, and instead we move the
				// burden into the next pipeline step.
				git.ValueFlag{Name: "--format", Value: "%(objectname) %(refname)"},
			},
			Args: patterns,
		})
		if err != nil {
			sendRevisionResult(ctx, resultChan, RevisionResult{err: err})
			return
		}

		scanner := bufio.NewScanner(forEachRef)
		for scanner.Scan() {
			line := scanner.Text()

			oidAndRef := strings.SplitN(line, " ", 2)
			if len(oidAndRef) != 2 {
				sendRevisionResult(ctx, resultChan, RevisionResult{
					err: fmt.Errorf("invalid for-each-ref format: %q", line),
				})
				return
			}

			if isDone := sendRevisionResult(ctx, resultChan, RevisionResult{
				OID:        git.ObjectID(oidAndRef[0]),
				ObjectName: []byte(oidAndRef[1]),
			}); isDone {
				return
			}
		}

		if err := scanner.Err(); err != nil {
			sendRevisionResult(ctx, resultChan, RevisionResult{
				err: fmt.Errorf("scanning for-each-ref output: %w", err),
			})
			return
		}

		if err := forEachRef.Wait(); err != nil {
			sendRevisionResult(ctx, resultChan, RevisionResult{
				err: fmt.Errorf("for-each-ref pipeline command: %w", err),
			})
			return
		}
	}()

	return &revisionIterator{
		ch: resultChan,
	}
}

// RevisionFilter filters the RevisionResult from the provided iterator with the filter function: if
// the filter returns `false` for a given item, then it will be dropped from the pipeline. Errors
// cannot be filtered and will always be passed through.
func RevisionFilter(ctx context.Context, it RevisionIterator, filter func(RevisionResult) bool) RevisionIterator {
	return RevisionTransform(ctx, it, func(r RevisionResult) []RevisionResult {
		if filter(r) {
			return []RevisionResult{r}
		}
		return []RevisionResult{}
	})
}

// RevisionTransform transforms each RevisionResult from the provided iterator with the transforming
// function. Instead of sending the original RevisionResult, it will instead send transformed
// results.
func RevisionTransform(ctx context.Context, it RevisionIterator, transform func(RevisionResult) []RevisionResult) RevisionIterator {
	resultChan := make(chan RevisionResult)

	go func() {
		defer close(resultChan)

		for it.Next() {
			for _, transformed := range transform(it.Result()) {
				if sendRevisionResult(ctx, resultChan, transformed) {
					return
				}
			}
		}

		if err := it.Err(); err != nil {
			if sendRevisionResult(ctx, resultChan, RevisionResult{err: err}) {
				return
			}
		}
	}()

	return &revisionIterator{
		ch: resultChan,
	}
}

func sendRevisionResult(ctx context.Context, ch chan<- RevisionResult, result RevisionResult) bool {
	// In case the context has been cancelled, we have a race between observing an error from
	// the killed Git process and observing the context cancellation itself. But if we end up
	// here because of cancellation of the Git process, we don't want to pass that one down the
	// pipeline but instead just stop the pipeline gracefully. We thus have this check here up
	// front to error messages from the Git process.
	select {
	case <-ctx.Done():
		return true
	default:
	}

	select {
	case ch <- result:
		return false
	case <-ctx.Done():
		return true
	}
}