diff options
author | Vladimir Shushlin <vshushlin@gitlab.com> | 2020-10-01 11:55:10 +0300 |
---|---|---|
committer | Vladimir Shushlin <vshushlin@gitlab.com> | 2020-10-01 11:55:10 +0300 |
commit | 628b1ce93bd807c76c56e93155d245b156dcd714 (patch) | |
tree | ae6b05bfb3857fd0e74ed0ba982d5e7227fdb8e2 | |
parent | 5e2c8a6f5eb0b38f415c67e42bc5c314b029a873 (diff) | |
parent | 98168fa262eb166223306ac6a6e4e0ceb1073c8c (diff) |
Merge branch '423-add-more-zip-serving-metrics' into 'master'
Resolve "Add metrics for zip serving"
Closes #423
See merge request gitlab-org/gitlab-pages!363
-rw-r--r-- | acceptance_test.go | 15 | ||||
-rw-r--r-- | internal/httprange/http_reader.go | 21 | ||||
-rw-r--r-- | internal/httprange/transport.go | 91 | ||||
-rw-r--r-- | internal/httptransport/transport.go | 8 | ||||
-rw-r--r-- | internal/httptransport/transport_test.go | 7 | ||||
-rw-r--r-- | internal/vfs/zip/archive.go | 17 | ||||
-rw-r--r-- | internal/vfs/zip/vfs.go | 22 | ||||
-rw-r--r-- | metrics/metrics.go | 104 |
8 files changed, 251 insertions, 34 deletions
diff --git a/acceptance_test.go b/acceptance_test.go index 7817bf81..c946167e 100644 --- a/acceptance_test.go +++ b/acceptance_test.go @@ -486,9 +486,18 @@ func TestPrometheusMetricsCanBeScraped(t *testing.T) { require.Contains(t, string(body), "gitlab_pages_disk_serving_file_size_bytes_sum") require.Contains(t, string(body), "gitlab_pages_serving_time_seconds_sum") require.Contains(t, string(body), `gitlab_pages_domains_source_api_requests_total{status_code="200"}`) - require.Contains(t, string(body), `gitlab_pages_domains_source_api_call_duration{status_code="200"}`) - require.Contains(t, string(body), `gitlab_pages_object_storage_backend_requests_total{status_code="206"}`) - require.Contains(t, string(body), `gitlab_pages_object_storage_backend_requests_duration{status_code="206"}`) + require.Contains(t, string(body), `gitlab_pages_domains_source_api_call_duration_bucket`) + // httprange + require.Contains(t, string(body), `gitlab_pages_httprange_requests_total{status_code="206"}`) + require.Contains(t, string(body), "gitlab_pages_httprange_requests_duration_bucket") + require.Contains(t, string(body), "gitlab_pages_httprange_trace_duration") + require.Contains(t, string(body), "gitlab_pages_httprange_open_requests") + // zip archives + require.Contains(t, string(body), "gitlab_pages_zip_opened") + require.Contains(t, string(body), "gitlab_pages_zip_cache_requests") + require.Contains(t, string(body), "gitlab_pages_zip_cached_archives") + require.Contains(t, string(body), "gitlab_pages_zip_archive_entries_cached") + require.Contains(t, string(body), "gitlab_pages_zip_opened_entries_count") } func TestDisabledRedirects(t *testing.T) { diff --git a/internal/httprange/http_reader.go b/internal/httprange/http_reader.go index 17e92f65..efb3b981 100644 --- a/internal/httprange/http_reader.go +++ b/internal/httprange/http_reader.go @@ -54,11 +54,13 @@ var _ vfs.SeekableFile = &Reader{} var httpClient = &http.Client{ // The longest time the request can be executed Timeout: 30 * time.Minute, - Transport: httptransport.NewTransportWithMetrics( - "object_storage_client", - metrics.ObjectStorageBackendReqDuration, - metrics.ObjectStorageBackendReqTotal, - ), + Transport: &tracedTransport{ + next: httptransport.NewTransportWithMetrics( + "object_storage_client", + metrics.HTTPRangeRequestDuration, + metrics.HTTPRangeRequestsTotal, + ), + }, } // ensureResponse is set before reading from it. @@ -73,14 +75,18 @@ func (r *Reader) ensureResponse() error { return err } - // TODO: add Traceln info for HTTP calls with headers and response https://gitlab.com/gitlab-org/gitlab-pages/-/issues/448 + metrics.HTTPRangeOpenRequests.Inc() + res, err := httpClient.Do(req) if err != nil { + metrics.HTTPRangeOpenRequests.Dec() return err } err = r.setResponse(res) if err != nil { + metrics.HTTPRangeOpenRequests.Dec() + // cleanup body on failure from r.setResponse to avoid memory leak res.Body.Close() } @@ -198,6 +204,9 @@ func (r *Reader) Close() error { // no need to read until the end err := r.res.Body.Close() r.res = nil + + metrics.HTTPRangeOpenRequests.Dec() + return err } diff --git a/internal/httprange/transport.go b/internal/httprange/transport.go new file mode 100644 index 00000000..9bb6e805 --- /dev/null +++ b/internal/httprange/transport.go @@ -0,0 +1,91 @@ +package httprange + +import ( + "crypto/tls" + "net/http" + "net/http/httptrace" + "time" + + "gitlab.com/gitlab-org/labkit/log" + + "gitlab.com/gitlab-org/gitlab-pages/metrics" +) + +type tracedTransport struct { + next http.RoundTripper +} + +// withRoundTripper takes an original RoundTripper, reports metrics based on the +// gauge and counter collectors passed +func (tr *tracedTransport) RoundTrip(r *http.Request) (*http.Response, error) { + r = r.WithContext(httptrace.WithClientTrace(r.Context(), newTracer(time.Now()))) + + return tr.next.RoundTrip(r) +} + +func newTracer(start time.Time) *httptrace.ClientTrace { + trace := &httptrace.ClientTrace{ + GetConn: func(host string) { + httpTraceObserve("httptrace.ClientTrace.GetConn", start) + + log.WithFields(log.Fields{ + "host": host, + }).Traceln("httptrace.ClientTrace.GetConn") + }, + GotConn: func(connInfo httptrace.GotConnInfo) { + httpTraceObserve("httptrace.ClientTrace.GotConn", start) + + log.WithFields(log.Fields{ + "reused": connInfo.Reused, + "was_idle": connInfo.WasIdle, + "idle_time_ms": connInfo.IdleTime.Milliseconds(), + }).Traceln("httptrace.ClientTrace.GotConn") + }, + GotFirstResponseByte: func() { + httpTraceObserve("httptrace.ClientTrace.GotFirstResponseByte", start) + }, + DNSStart: func(d httptrace.DNSStartInfo) { + httpTraceObserve("httptrace.ClientTrace.DNSStart", start) + }, + DNSDone: func(d httptrace.DNSDoneInfo) { + httpTraceObserve("httptrace.ClientTrace.DNSDone", start) + + log.WithFields(log.Fields{}).WithError(d.Err). + Traceln("httptrace.ClientTrace.DNSDone") + }, + ConnectStart: func(net, addr string) { + httpTraceObserve("httptrace.ClientTrace.ConnectStart", start) + + log.WithFields(log.Fields{ + "network": net, + "address": addr, + }).Traceln("httptrace.ClientTrace.ConnectStart") + }, + ConnectDone: func(net string, addr string, err error) { + httpTraceObserve("httptrace.ClientTrace.ConnectDone", start) + + log.WithFields(log.Fields{ + "network": net, + "address": addr, + }).WithError(err).Traceln("httptrace.ClientTrace.ConnectDone") + }, + TLSHandshakeStart: func() { + httpTraceObserve("httptrace.ClientTrace.TLSHandshakeStart", start) + }, + TLSHandshakeDone: func(connState tls.ConnectionState, err error) { + httpTraceObserve("httptrace.ClientTrace.TLSHandshakeDone", start) + + log.WithFields(log.Fields{ + "version": connState.Version, + "connection_resumed": connState.DidResume, + }).WithError(err).Traceln("httptrace.ClientTrace.TLSHandshakeDone") + }, + } + + return trace +} + +func httpTraceObserve(label string, start time.Time) { + metrics.HTTPRangeTraceDuration.WithLabelValues(label). + Observe(time.Since(start).Seconds()) +} diff --git a/internal/httptransport/transport.go b/internal/httptransport/transport.go index 8f8fa387..304ac1c6 100644 --- a/internal/httptransport/transport.go +++ b/internal/httptransport/transport.go @@ -27,7 +27,7 @@ var ( type meteredRoundTripper struct { next http.RoundTripper name string - durations *prometheus.GaugeVec + durations *prometheus.HistogramVec counter *prometheus.CounterVec } @@ -46,11 +46,11 @@ func newInternalTransport() *http.Transport { // NewTransportWithMetrics will create a custom http.RoundTripper that can be used with an http.Client. // The RoundTripper will report metrics based on the collectors passed. -func NewTransportWithMetrics(name string, gaugeVec *prometheus.GaugeVec, counterVec *prometheus.CounterVec) http.RoundTripper { +func NewTransportWithMetrics(name string, histogramVec *prometheus.HistogramVec, counterVec *prometheus.CounterVec) http.RoundTripper { return &meteredRoundTripper{ next: InternalTransport, name: name, - durations: gaugeVec, + durations: histogramVec, counter: counterVec, } } @@ -93,7 +93,7 @@ func (mrt *meteredRoundTripper) RoundTrip(r *http.Request) (*http.Response, erro mrt.logResponse(r, resp) statusCode := strconv.Itoa(resp.StatusCode) - mrt.durations.WithLabelValues(statusCode).Set(time.Since(start).Seconds()) + mrt.durations.WithLabelValues(statusCode).Observe(time.Since(start).Seconds()) mrt.counter.WithLabelValues(statusCode).Inc() return resp, nil diff --git a/internal/httptransport/transport_test.go b/internal/httptransport/transport_test.go index 60fbbb47..a4105bef 100644 --- a/internal/httptransport/transport_test.go +++ b/internal/httptransport/transport_test.go @@ -43,7 +43,7 @@ func Test_withRoundTripper(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gaugeVec := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + histVec := prometheus.NewHistogramVec(prometheus.HistogramOpts{ Name: t.Name(), }, []string{"status_code"}) @@ -58,7 +58,7 @@ func Test_withRoundTripper(t *testing.T) { err: tt.err, } - mtr := &meteredRoundTripper{next: next, durations: gaugeVec, counter: counterVec} + mtr := &meteredRoundTripper{next: next, durations: histVec, counter: counterVec} r := httptest.NewRequest("GET", "/", nil) res, err := mtr.RoundTrip(r) @@ -72,9 +72,6 @@ func Test_withRoundTripper(t *testing.T) { require.NotNil(t, res) statusCode := strconv.Itoa(res.StatusCode) - gaugeValue := testutil.ToFloat64(gaugeVec.WithLabelValues(statusCode)) - require.Greater(t, gaugeValue, float64(0)) - counterCount := testutil.ToFloat64(counterVec.WithLabelValues(statusCode)) require.Equal(t, float64(1), counterCount, statusCode) }) diff --git a/internal/vfs/zip/archive.go b/internal/vfs/zip/archive.go index ce7d2ec8..5fc55b0d 100644 --- a/internal/vfs/zip/archive.go +++ b/internal/vfs/zip/archive.go @@ -16,6 +16,7 @@ import ( "gitlab.com/gitlab-org/gitlab-pages/internal/httprange" "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" + "gitlab.com/gitlab-org/gitlab-pages/metrics" ) const ( @@ -58,7 +59,7 @@ func newArchive(path string, openTimeout time.Duration) *zipArchive { } } -func (a *zipArchive) openArchive(parentCtx context.Context) error { +func (a *zipArchive) openArchive(parentCtx context.Context) (err error) { // return early if openArchive was done already in a concurrent request select { case <-a.done: @@ -104,6 +105,7 @@ func (a *zipArchive) readArchive() { a.resource, a.err = httprange.NewResource(ctx, a.path) if a.err != nil { + metrics.ZipOpened.WithLabelValues("error").Inc() return } @@ -113,7 +115,8 @@ func (a *zipArchive) readArchive() { a.archive, a.err = zip.NewReader(a.reader, a.resource.Size) }) - if a.archive == nil { + if a.archive == nil || a.err != nil { + metrics.ZipOpened.WithLabelValues("error").Inc() return } @@ -127,6 +130,11 @@ func (a *zipArchive) readArchive() { // recycle memory a.archive.File = nil + + fileCount := float64(len(a.files)) + metrics.ZipOpened.WithLabelValues("ok").Inc() + metrics.ZipOpenedEntriesCount.Add(fileCount) + metrics.ZipArchiveEntriesCached.Add(fileCount) } func (a *zipArchive) findFile(name string) *zip.File { @@ -213,3 +221,8 @@ func (a *zipArchive) Readlink(ctx context.Context, name string) (string, error) // only return the n bytes read from the link return string(symlink[:n]), nil } + +// onEvicted called by the zipVFS.cache when an archive is removed from the cache +func (a *zipArchive) onEvicted() { + metrics.ZipArchiveEntriesCached.Sub(float64(len(a.files))) +} diff --git a/internal/vfs/zip/vfs.go b/internal/vfs/zip/vfs.go index 283e01c2..fd0855f7 100644 --- a/internal/vfs/zip/vfs.go +++ b/internal/vfs/zip/vfs.go @@ -9,6 +9,7 @@ import ( "github.com/patrickmn/go-cache" "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" + "gitlab.com/gitlab-org/gitlab-pages/metrics" ) const ( @@ -29,10 +30,18 @@ type zipVFS struct { // New creates a zipVFS instance that can be used by a serving request func New() vfs.VFS { - return &zipVFS{ + zipVFS := &zipVFS{ // TODO: add cache operation callbacks https://gitlab.com/gitlab-org/gitlab-pages/-/issues/465 cache: cache.New(defaultCacheExpirationInterval, defaultCacheCleanupInterval), } + + zipVFS.cache.OnEvicted(func(s string, i interface{}) { + metrics.ZipCachedArchives.Dec() + + i.(*zipArchive).onEvicted() + }) + + return zipVFS } // Root opens an archive given a URL path and returns an instance of zipArchive @@ -69,6 +78,8 @@ func (fs *zipVFS) Name() string { func (fs *zipVFS) findOrOpenArchive(ctx context.Context, path string) (*zipArchive, error) { archive, expiry, found := fs.cache.GetWithExpiration(path) if found { + metrics.ZipServingArchiveCache.WithLabelValues("hit").Inc() + // TODO: do not refreshed errored archives https://gitlab.com/gitlab-org/gitlab-pages/-/merge_requests/351 if time.Until(expiry) < defaultCacheRefreshInterval { // refresh item @@ -82,9 +93,16 @@ func (fs *zipVFS) findOrOpenArchive(ctx context.Context, path string) (*zipArchi if fs.cache.Add(path, archive, cache.DefaultExpiration) != nil { return nil, errAlreadyCached } + metrics.ZipServingArchiveCache.WithLabelValues("miss").Inc() + metrics.ZipCachedArchives.Inc() } zipArchive := archive.(*zipArchive) + err := zipArchive.openArchive(ctx) - return zipArchive, err + if err != nil { + return nil, err + } + + return zipArchive, nil } diff --git a/metrics/metrics.go b/metrics/metrics.go index dcfac76d..669b4423 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -73,7 +73,7 @@ var ( }, []string{"status_code"}) // DomainsSourceAPICallDuration is the time it takes to get a response from the GitLab API in seconds - DomainsSourceAPICallDuration = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + DomainsSourceAPICallDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Name: "gitlab_pages_domains_source_api_call_duration", Help: "The time (in seconds) it takes to get a response from the GitLab domains API", }, []string{"status_code"}) @@ -99,19 +99,92 @@ var ( Help: "The number of VFS operations", }, []string{"vfs_name", "operation", "success"}) - // ObjectStorageBackendReqTotal is the number of requests made to Object Storage by zip file serving + // HTTPRangeRequestsTotal is the number of requests made to a + // httprange.Resource by opening and/or reading from it. Mostly used by the + // internal/vfs/zip package to load archives from Object Storage. // Could be bigger than the number of pages served. - ObjectStorageBackendReqTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "gitlab_pages_object_storage_backend_requests_total", - Help: "The number of requests made to Object Storage by zip file serving with different status codes." + + HTTPRangeRequestsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "gitlab_pages_httprange_requests_total", + Help: "The number of requests made by the zip VFS to a Resource with " + + "different status codes." + "Could be bigger than the number of requests served", }, []string{"status_code"}) - // ObjectStorageBackendReqDuration is the time it takes to get a response from Object Storage in seconds for zip file servings - ObjectStorageBackendReqDuration = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "gitlab_pages_object_storage_backend_requests_duration", - Help: "The time (in seconds) it takes to get a response from the Object Storage provider for zip file serving", - }, []string{"status_code"}) + // HTTPRangeRequestDuration is the time it takes to get a response + // from an httprange.Resource hosted in object storage for a request made by + // the zip VFS + HTTPRangeRequestDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "gitlab_pages_httprange_requests_duration", + Help: "The time (in seconds) it takes to get a response from " + + "a httprange.Resource hosted in object storage for a request " + + "made by the zip VFS", + }, + []string{"status_code"}, + ) + + // HTTPRangeTraceDuration httprange requests duration in seconds for + // different stages of an http request (see httptrace.ClientTrace) + HTTPRangeTraceDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "gitlab_pages_httprange_trace_duration", + Help: "httprange request tracing duration in seconds for " + + "different connection stages (see Go's httptrace.ClientTrace)", + Buckets: []float64{0.001, 0.005, 0.01, 0.02, 0.05, 0.100, 0.250, + 0.500, 1, 2, 5, 10, 20, 50}, + }, + []string{"request_stage"}, + ) + + // HTTPRangeOpenRequests is the number of open requests made by httprange.Reader + HTTPRangeOpenRequests = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "gitlab_pages_httprange_open_requests", + Help: "The number of open requests made by httprange.Reader", + }) + + // ZipOpened is the number of zip archives that have been opened + ZipOpened = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "gitlab_pages_zip_opened", + Help: "The total number of zip archives that have been opened", + }, + []string{"state"}, + ) + + // ZipServingArchiveCache is the number of zip archive cache hits/misses + ZipServingArchiveCache = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "gitlab_pages_zip_cache_requests", + Help: "The number of zip archives cache hits/misses", + }, + []string{"cache"}, + ) + + // ZipCachedArchives is the number of zip archives currently in the cache + ZipCachedArchives = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "gitlab_pages_zip_cached_archives", + Help: "The number of zip archives currently in the cache", + }, + ) + + // ZipArchiveEntriesCached is the number of files per zip archive currently + // in the cache + ZipArchiveEntriesCached = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "gitlab_pages_zip_archive_entries_cached", + Help: "The number of files per zip archive currently in the cache", + }, + ) + + // ZipOpenedEntriesCount is the number of files per archive total count + // over time + ZipOpenedEntriesCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "gitlab_pages_zip_opened_entries_count", + Help: "The number of files per zip archive total count over time", + }, + ) ) // MustRegister collectors with the Prometheus client @@ -132,7 +205,14 @@ func MustRegister() { DiskServingFileSize, ServingTime, VFSOperations, - ObjectStorageBackendReqTotal, - ObjectStorageBackendReqDuration, + HTTPRangeRequestsTotal, + HTTPRangeRequestDuration, + HTTPRangeTraceDuration, + HTTPRangeOpenRequests, + ZipOpened, + ZipOpenedEntriesCount, + ZipServingArchiveCache, + ZipArchiveEntriesCached, + ZipCachedArchives, ) } |