Welcome to mirror list, hosted at ThFree Co, Russian Federation.

http_reader.go « httprange « internal - gitlab.com/gitlab-org/gitlab-pages.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: c573781397c3e8753f1c45724570b1baf2bae5fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
package httprange

import (
	"context"
	"errors"
	"fmt"
	"io"
	"net/http"
	"net/http/httputil"
	"time"

	"gitlab.com/gitlab-org/gitlab-pages/internal/httptransport"
	"gitlab.com/gitlab-org/gitlab-pages/internal/vfs"
	"gitlab.com/gitlab-org/gitlab-pages/metrics"
)

var (
	// ErrRangeRequestsNotSupported is returned by Seek and Read
	// when the remote server does not allow range requests (Accept-Ranges was not set)
	ErrRangeRequestsNotSupported = errors.New("range requests are not supported by the remote server")

	// ErrInvalidRange is returned by Read when trying to read past the end of the file
	ErrInvalidRange = errors.New("invalid range")

	// ErrContentHasChanged is returned by Read when the content has changed since the first request
	ErrContentHasChanged = errors.New("content has changed since first request")

	// seek errors no need to export them
	errSeekInvalidWhence = errors.New("invalid whence")
	errSeekOutsideRange  = errors.New("outside of range")
)

// Reader holds a Resource and specifies ranges to read from at a time.
// Implements the io.Reader, io.Seeker and io.Closer  interfaces.
type Reader struct {
	// ctx for read requests
	ctx context.Context
	// Resource to read from
	Resource *Resource
	// res defines a current response serving data
	res *http.Response
	// rangeStart defines a starting range
	rangeStart int64
	// rangeSize defines a size of range
	rangeSize int64
	// offset defines a current place where data is being read from
	offset int64
}

// ensure that Reader is seekable
var _ vfs.SeekableFile = &Reader{}

// TODO: make this configurable/take an http client when creating a reader/ranged reader
//  instead https://gitlab.com/gitlab-org/gitlab-pages/-/issues/457
var httpClient = &http.Client{
	// The longest time the request can be executed
	Timeout: 30 * time.Minute,
	Transport: httptransport.NewTransportWithMetrics(
		"httprange_client",
		metrics.HTTPRangeTraceDuration,
		metrics.HTTPRangeRequestDuration,
		metrics.HTTPRangeRequestsTotal,
	),
}

// ensureResponse is set before reading from it.
// It will do the request if the reader hasn't got it yet.
func (r *Reader) ensureResponse() error {
	if r.res != nil {
		return nil
	}

	req, err := r.prepareRequest()
	if err != nil {
		return err
	}

	metrics.HTTPRangeOpenRequests.Inc()

	dreq, err := httputil.DumpRequestOut(req, true)
	fmt.Printf("req: %s err: %+v\n", dreq, err)

	res, err := httpClient.Do(req)
	if err != nil {
		metrics.HTTPRangeOpenRequests.Dec()
		return err
	}

	dres, err := httputil.DumpResponse(res, false)
	fmt.Printf("res: %s err: %+v\n", dres, err)

	err = r.setResponse(res)
	if err != nil {
		metrics.HTTPRangeOpenRequests.Dec()

		// cleanup body on failure from r.setResponse to avoid memory leak
		res.Body.Close()
	}

	return err
}

func (r *Reader) prepareRequest() (*http.Request, error) {
	if r.rangeStart < 0 || r.rangeSize < 0 || r.rangeStart+r.rangeSize > r.Resource.Size {
		return nil, ErrInvalidRange
	}

	if r.offset < r.rangeStart || r.offset >= r.rangeStart+r.rangeSize {
		return nil, ErrInvalidRange
	}

	req, err := http.NewRequest("GET", r.Resource.URL, nil)
	if err != nil {
		return nil, err
	}

	req = req.WithContext(r.ctx)

	if r.Resource.ETag != "" {
		req.Header.Set("ETag", r.Resource.ETag)
	} else if r.Resource.LastModified != "" {
		// Last-Modified should be a fallback mechanism in case ETag is not present
		// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified
		req.Header.Set("If-Range", r.Resource.LastModified)
	}

	req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", r.offset, r.rangeStart+r.rangeSize-1))

	return req, nil
}

func (r *Reader) setResponse(res *http.Response) error {
	// TODO: add metrics https://gitlab.com/gitlab-org/gitlab-pages/-/issues/448
	switch res.StatusCode {
	case http.StatusOK:
		// some servers return 200 OK for bytes=0-
		// TODO: should we handle r.Resource.Last-Modified as well?
		if r.offset > 0 || r.Resource.ETag != "" && r.Resource.ETag != res.Header.Get("ETag") {
			return ErrContentHasChanged
		}
	case http.StatusPartialContent:
		// Requested `Range` request succeeded https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/206
		break
	case http.StatusRequestedRangeNotSatisfiable:
		return ErrRangeRequestsNotSupported
	default:
		return fmt.Errorf("httprange: read response %d: %q", res.StatusCode, res.Status)
	}

	r.res = res

	return nil
}

// Seek returns the new offset relative to the start of the file and an error, if any.
// io.SeekStart means relative to the start of the file,
// io.SeekCurrent means relative to the current offset, and
// io.SeekEnd means relative to the end.
func (r *Reader) Seek(offset int64, whence int) (int64, error) {
	var newOffset int64

	switch whence {
	case io.SeekStart:
		newOffset = r.rangeStart + offset

	case io.SeekCurrent:
		newOffset = r.offset + offset

	case io.SeekEnd:
		newOffset = r.rangeStart + r.rangeSize + offset

	default:
		return 0, errSeekInvalidWhence
	}

	if newOffset < r.rangeStart || newOffset > r.rangeStart+r.rangeSize {
		return 0, errSeekOutsideRange
	}

	if newOffset != r.offset {
		// recycle r.res
		r.Close()
	}

	r.offset = newOffset
	return newOffset - r.rangeStart, nil
}

// Read data into a given buffer.
func (r *Reader) Read(buf []byte) (int, error) {
	if len(buf) == 0 {
		return 0, nil
	}

	if err := r.ensureResponse(); err != nil {
		return 0, err
	}

	n, err := r.res.Body.Read(buf)
	if err == nil || err == io.EOF {
		r.offset += int64(n)
	}

	return n, err
}

// Close closes a requests body
func (r *Reader) Close() error {
	if r.res != nil {
		// no need to read until the end
		err := r.res.Body.Close()
		r.res = nil

		metrics.HTTPRangeOpenRequests.Dec()

		return err
	}

	return nil
}

// NewReader creates a Reader object on a given resource for a given range
func NewReader(ctx context.Context, resource *Resource, offset, size int64) *Reader {
	return &Reader{ctx: ctx, Resource: resource, rangeStart: offset, rangeSize: size, offset: offset}
}