Welcome to mirror list, hosted at ThFree Co, Russian Federation.

batch_process.go « catfile « git « internal - gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 1f014a13e0efcc61cf4f0e2e3bd4abb1761f9739 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
package catfile

import (
	"bufio"
	"context"
	"fmt"
	"io"
	"io/ioutil"
	"sync"

	"github.com/opentracing/opentracing-go"
	"gitlab.com/gitlab-org/gitaly/internal/git"
	"gitlab.com/gitlab-org/labkit/correlation"
)

// batch encapsulates a 'git cat-file --batch' process
type batchProcess struct {
	r *bufio.Reader
	w io.WriteCloser

	// n is a state machine that tracks how much data we still have to read
	// from r. Legal states are: n==0, this means we can do a new request on
	// the cat-file process. n==1, this means that we have to discard a
	// trailing newline. n>0, this means we are in the middle of reading a
	// raw git object.
	n int64

	// Even though the batch type should not be used concurrently, I think
	// that if that does happen by mistake we should give proper errors
	// instead of doing unsafe memory writes (to n) and failing in some
	// unpredictable way.
	sync.Mutex
}

func (bc *BatchCache) newBatchProcess(ctx context.Context, repo git.RepositoryExecutor) (*batchProcess, error) {
	bc.totalCatfileProcesses.Inc()
	b := &batchProcess{}

	var stdinReader io.Reader
	stdinReader, b.w = io.Pipe()

	// batch processes are long-lived and reused across RPCs,
	// so we de-correlate the process from the RPC
	ctx = correlation.ContextWithCorrelation(ctx, "")
	ctx = opentracing.ContextWithSpan(ctx, nil)

	batchCmd, err := repo.Exec(ctx,
		git.SubCmd{
			Name: "cat-file",
			Flags: []git.Option{
				git.Flag{Name: "--batch"},
			},
		},
		git.WithStdin(stdinReader),
	)
	if err != nil {
		return nil, err
	}

	b.r = bufio.NewReader(batchCmd)

	bc.currentCatfileProcesses.Inc()
	go func() {
		<-ctx.Done()
		// This Close() is crucial to prevent leaking file descriptors.
		b.w.Close()
		bc.currentCatfileProcesses.Dec()
	}()

	if bc.injectSpawnErrors {
		// Testing only: intentionally leak process
		return nil, &simulatedBatchSpawnError{}
	}

	return b, nil
}

func (b *batchProcess) reader(revision git.Revision, expectedType string) (*Object, error) {
	b.Lock()
	defer b.Unlock()

	if b.n == 1 {
		// Consume linefeed
		if _, err := b.r.ReadByte(); err != nil {
			return nil, err
		}
		b.n--
	}

	if b.n != 0 {
		return nil, fmt.Errorf("cannot create new Object: batch contains %d unread bytes", b.n)
	}

	if _, err := fmt.Fprintln(b.w, revision.String()); err != nil {
		return nil, err
	}

	oi, err := ParseObjectInfo(b.r)
	if err != nil {
		return nil, err
	}

	b.n = oi.Size + 1

	if oi.Type != expectedType {
		// This is a programmer error and it should never happen. But if it does,
		// we need to leave the cat-file process in a good state
		if _, err := io.CopyN(ioutil.Discard, b.r, b.n); err != nil {
			return nil, err
		}
		b.n = 0

		return nil, NotFoundError{error: fmt.Errorf("expected %s to be a %s, got %s", oi.Oid, expectedType, oi.Type)}
	}

	return &Object{
		ObjectInfo: *oi,
		Reader: &batchReader{
			batchProcess: b,
			r:            io.LimitReader(b.r, oi.Size),
		},
	}, nil
}

func (b *batchProcess) consume(nBytes int) {
	b.n -= int64(nBytes)
	if b.n < 1 {
		panic("too many bytes read from batch")
	}
}

func (b *batchProcess) hasUnreadData() bool {
	b.Lock()
	defer b.Unlock()

	return b.n > 1
}

type batchReader struct {
	*batchProcess
	r io.Reader
}

func (br *batchReader) Read(p []byte) (int, error) {
	br.batchProcess.Lock()
	defer br.batchProcess.Unlock()

	n, err := br.r.Read(p)
	br.batchProcess.consume(n)
	return n, err
}