diff options
author | Jaime Martinez <jmartinez@gitlab.com> | 2020-09-30 08:24:12 +0300 |
---|---|---|
committer | Jaime Martinez <jmartinez@gitlab.com> | 2020-09-30 08:24:12 +0300 |
commit | 031f6a1e63b52bf18db4087da7f678428f0ffdb8 (patch) | |
tree | f6beea5e90337f71dd03c80cd24de25b59cca8ee | |
parent | 9f34b7999ed3b7c1f1cef6faa42d5d42ea3c5569 (diff) |
Use histograms instead of gauges for transport durations
-rw-r--r-- | acceptance_test.go | 12 | ||||
-rw-r--r-- | internal/httprange/transport.go | 36 | ||||
-rw-r--r-- | internal/httptransport/transport.go | 8 | ||||
-rw-r--r-- | internal/httptransport/transport_test.go | 7 | ||||
-rw-r--r-- | internal/vfs/zip/archive.go | 12 | ||||
-rw-r--r-- | internal/vfs/zip/vfs.go | 5 | ||||
-rw-r--r-- | metrics/metrics.go | 12 |
7 files changed, 48 insertions, 44 deletions
diff --git a/acceptance_test.go b/acceptance_test.go index e131d6de..be47c2ab 100644 --- a/acceptance_test.go +++ b/acceptance_test.go @@ -486,16 +486,14 @@ func TestPrometheusMetricsCanBeScraped(t *testing.T) { require.Contains(t, string(body), "gitlab_pages_disk_serving_file_size_bytes_sum") require.Contains(t, string(body), "gitlab_pages_serving_time_seconds_sum") require.Contains(t, string(body), `gitlab_pages_domains_source_api_requests_total{status_code="200"}`) - require.Contains(t, string(body), `gitlab_pages_domains_source_api_call_duration{status_code="200"}`) + require.Contains(t, string(body), `gitlab_pages_domains_source_api_call_duration_bucket`) // object storage require.Contains(t, string(body), `gitlab_pages_object_storage_backend_requests_total{status_code="206"}`) - require.Contains(t, string(body), `gitlab_pages_object_storage_backend_requests_duration{status_code="206"}`) - require.Contains(t, string(body), `gitlab_pages_object_storage_backend_request_responsiveness_ms_count`) - require.Contains(t, string(body), `gitlab_pages_object_storage_open_zip_archives_total`) - require.Contains(t, string(body), `gitlab_pages_object_storage_failed_open_zip_archives_total`) + require.Contains(t, string(body), `gitlab_pages_object_storage_backend_requests_duration_bucket`) + require.Contains(t, string(body), `gitlab_pages_object_storage_backend_httptrace_duration`) + require.Contains(t, string(body), `gitlab_pages_zip_archives_total`) require.Contains(t, string(body), `gitlab_pages_object_storage_files_per_zip_archive`) - require.Contains(t, string(body), `gitlab_pages_object_storage_zip_archive_cache_hit`) - require.Contains(t, string(body), `gitlab_pages_object_storage_zip_archive_cache_miss`) + require.Contains(t, string(body), `gitlab_pages_object_storage_zip_archive_cache`) } func TestDisabledRedirects(t *testing.T) { diff --git a/internal/httprange/transport.go b/internal/httprange/transport.go index f0a8cf84..3d6140af 100644 --- a/internal/httprange/transport.go +++ b/internal/httprange/transport.go @@ -26,61 +26,62 @@ func (tr *tracedTransport) RoundTrip(r *http.Request) (*http.Response, error) { func newTracer(start time.Time) *httptrace.ClientTrace { trace := &httptrace.ClientTrace{ GetConn: func(host string) { - observe("get_connection", start) + httpTraceObserve("httptrace.ClientTrace.GetConn", start) log.WithFields(log.Fields{ "host": host, - }).Traceln("get_connection") + }).Traceln("httptrace.ClientTrace.GetConn") }, GotConn: func(connInfo httptrace.GotConnInfo) { - observe("get_connection", start) + httpTraceObserve("httptrace.ClientTrace.GotConn", start) log.WithFields(log.Fields{ "reused": connInfo.Reused, "was_idle": connInfo.WasIdle, "idle_time_ms": connInfo.IdleTime.Milliseconds(), - }).Traceln("get_connection") + }).Traceln("httptrace.ClientTrace.GotConn") }, PutIdleConn: nil, GotFirstResponseByte: func() { - observe("got_first_response_byte", start) + httpTraceObserve("httptrace.ClientTrace.GotFirstResponseByte", start) }, Got100Continue: nil, Got1xxResponse: nil, DNSStart: func(d httptrace.DNSStartInfo) { - observe("dns_lookup_start", start) + httpTraceObserve("httptrace.ClientTrace.DNSStart", start) }, DNSDone: func(d httptrace.DNSDoneInfo) { - observe("dns_lookup_done", start) + httpTraceObserve("httptrace.ClientTrace.DNSDone", start) - log.WithFields(log.Fields{}).WithError(d.Err).Traceln("dns_lookup_done") + log.WithFields(log.Fields{}).WithError(d.Err). + Traceln("httptrace.ClientTrace.DNSDone") }, ConnectStart: func(net, addr string) { - observe("connect_start", start) + httpTraceObserve("httptrace.ClientTrace.ConnectStart", start) log.WithFields(log.Fields{ "network": net, "address": addr, - }).Traceln("connect_start") + }).Traceln("httptrace.ClientTrace.ConnectStart") }, ConnectDone: func(net string, addr string, err error) { - observe("connect_done", start) + httpTraceObserve("httptrace.ClientTrace.ConnectDone", start) log.WithFields(log.Fields{ "network": net, "address": addr, - }).WithError(err).Traceln("connect_done") + }).WithError(err).Traceln("httptrace.ClientTrace.ConnectDone") }, TLSHandshakeStart: func() { - observe("tls_handshake_start", start) + httpTraceObserve("httptrace.ClientTrace.TLSHandshakeStart", start) }, TLSHandshakeDone: func(connState tls.ConnectionState, err error) { - observe("tls_handshake_done", start) + httpTraceObserve("httptrace.ClientTrace.TLSHandshakeDone", start) log.WithFields(log.Fields{ "version": connState.Version, "connection_resumed": connState.DidResume, - }).WithError(err).Traceln("tls_handshake_done") + }).WithError(err).Traceln("httptrace.ClientTrace.TLSHandshakeDone") }, WroteHeaderField: nil, WroteHeaders: nil, @@ -91,6 +92,7 @@ func newTracer(start time.Time) *httptrace.ClientTrace { return trace } -func observe(label string, start time.Time) { - metrics.ObjectStorageTraceDuration.WithLabelValues(label).Observe(float64(time.Since(start).Milliseconds())) +func httpTraceObserve(label string, start time.Time) { + metrics.ObjectStorageTraceDuration.WithLabelValues(label). + Observe(time.Since(start).Seconds()) } diff --git a/internal/httptransport/transport.go b/internal/httptransport/transport.go index 8f8fa387..304ac1c6 100644 --- a/internal/httptransport/transport.go +++ b/internal/httptransport/transport.go @@ -27,7 +27,7 @@ var ( type meteredRoundTripper struct { next http.RoundTripper name string - durations *prometheus.GaugeVec + durations *prometheus.HistogramVec counter *prometheus.CounterVec } @@ -46,11 +46,11 @@ func newInternalTransport() *http.Transport { // NewTransportWithMetrics will create a custom http.RoundTripper that can be used with an http.Client. // The RoundTripper will report metrics based on the collectors passed. -func NewTransportWithMetrics(name string, gaugeVec *prometheus.GaugeVec, counterVec *prometheus.CounterVec) http.RoundTripper { +func NewTransportWithMetrics(name string, histogramVec *prometheus.HistogramVec, counterVec *prometheus.CounterVec) http.RoundTripper { return &meteredRoundTripper{ next: InternalTransport, name: name, - durations: gaugeVec, + durations: histogramVec, counter: counterVec, } } @@ -93,7 +93,7 @@ func (mrt *meteredRoundTripper) RoundTrip(r *http.Request) (*http.Response, erro mrt.logResponse(r, resp) statusCode := strconv.Itoa(resp.StatusCode) - mrt.durations.WithLabelValues(statusCode).Set(time.Since(start).Seconds()) + mrt.durations.WithLabelValues(statusCode).Observe(time.Since(start).Seconds()) mrt.counter.WithLabelValues(statusCode).Inc() return resp, nil diff --git a/internal/httptransport/transport_test.go b/internal/httptransport/transport_test.go index 60fbbb47..a4105bef 100644 --- a/internal/httptransport/transport_test.go +++ b/internal/httptransport/transport_test.go @@ -43,7 +43,7 @@ func Test_withRoundTripper(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gaugeVec := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + histVec := prometheus.NewHistogramVec(prometheus.HistogramOpts{ Name: t.Name(), }, []string{"status_code"}) @@ -58,7 +58,7 @@ func Test_withRoundTripper(t *testing.T) { err: tt.err, } - mtr := &meteredRoundTripper{next: next, durations: gaugeVec, counter: counterVec} + mtr := &meteredRoundTripper{next: next, durations: histVec, counter: counterVec} r := httptest.NewRequest("GET", "/", nil) res, err := mtr.RoundTrip(r) @@ -72,9 +72,6 @@ func Test_withRoundTripper(t *testing.T) { require.NotNil(t, res) statusCode := strconv.Itoa(res.StatusCode) - gaugeValue := testutil.ToFloat64(gaugeVec.WithLabelValues(statusCode)) - require.Greater(t, gaugeValue, float64(0)) - counterCount := testutil.ToFloat64(counterVec.WithLabelValues(statusCode)) require.Equal(t, float64(1), counterCount, statusCode) }) diff --git a/internal/vfs/zip/archive.go b/internal/vfs/zip/archive.go index cfcdee76..7a9bcc3d 100644 --- a/internal/vfs/zip/archive.go +++ b/internal/vfs/zip/archive.go @@ -59,7 +59,16 @@ func newArchive(path string, openTimeout time.Duration) *zipArchive { } } -func (a *zipArchive) openArchive(parentCtx context.Context) error { +func (a *zipArchive) openArchive(parentCtx context.Context) (err error) { + defer func() { + // checking named return err value + if err != nil { + metrics.ZipServingOpenArchivesTotal.WithLabelValues("error").Inc() + } else { + metrics.ZipServingOpenArchivesTotal.WithLabelValues("ok").Inc() + } + }() + // return early if openArchive was done already in a concurrent request select { case <-a.done: @@ -130,7 +139,6 @@ func (a *zipArchive) readArchive() { a.archive.File = nil metrics.ZipServingFilesPerArchiveCount.Observe(float64(len(a.files))) - metrics.ZipServingOpenArchivesTotal.Inc() } func (a *zipArchive) findFile(name string) *zip.File { diff --git a/internal/vfs/zip/vfs.go b/internal/vfs/zip/vfs.go index 6d0ba616..985ec3bb 100644 --- a/internal/vfs/zip/vfs.go +++ b/internal/vfs/zip/vfs.go @@ -70,7 +70,7 @@ func (fs *zipVFS) Name() string { func (fs *zipVFS) findOrOpenArchive(ctx context.Context, path string) (*zipArchive, error) { archive, expiry, found := fs.cache.GetWithExpiration(path) if found { - metrics.ZipServingArchiveCache.Inc() + metrics.ZipServingArchiveCache.WithLabelValues("hit").Inc() // TODO: do not refreshed errored archives https://gitlab.com/gitlab-org/gitlab-pages/-/merge_requests/351 if time.Until(expiry) < defaultCacheRefreshInterval { @@ -86,14 +86,13 @@ func (fs *zipVFS) findOrOpenArchive(ctx context.Context, path string) (*zipArchi return nil, errAlreadyCached } - metrics.ZipServingArchiveCacheMiss.Inc() + metrics.ZipServingArchiveCache.WithLabelValues("miss").Inc() } zipArchive := archive.(*zipArchive) err := zipArchive.openArchive(ctx) if err != nil { - metrics.ZipServingFailedOpenArchivesTotal.Inc() return nil, err } diff --git a/metrics/metrics.go b/metrics/metrics.go index b722b9d9..effb717b 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -73,7 +73,7 @@ var ( }, []string{"status_code"}) // DomainsSourceAPICallDuration is the time it takes to get a response from the GitLab API in seconds - DomainsSourceAPICallDuration = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + DomainsSourceAPICallDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Name: "gitlab_pages_domains_source_api_call_duration", Help: "The time (in seconds) it takes to get a response from the GitLab domains API", }, []string{"status_code"}) @@ -111,7 +111,7 @@ var ( // from Object Storage in seconds for zip file servings ObjectStorageBackendReqDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ - Name: "gitlab_pages_object_storage_backend_requests_duration_seconds", + Name: "gitlab_pages_object_storage_backend_requests_duration", Help: "The time (in seconds) it takes to get a response from the " + "Object Storage provider for zip file serving", }, @@ -122,9 +122,9 @@ var ( // seconds for different stages of an http request see httptrace.ClientTrace ObjectStorageTraceDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ - Name: "gitlab_pages_object_storage_backend_httptrace_duration_seconds", - Help: "Object Storage request tracing duration for different " + - "stages (see Go's httptrace.ClientTrace)", + Name: "gitlab_pages_object_storage_backend_httptrace_duration", + Help: "Object Storage request tracing duration in seconds for " + + "different connection stages (see Go's httptrace.ClientTrace)", Buckets: []float64{0.001, 0.005, 0.01, 0.02, 0.05, 0.100, 0.250, 0.500, 1, 2, 5, 10, 20, 50}, }, @@ -154,7 +154,7 @@ var ( // ZipServingArchiveCache is the number of zip archive cache hits/misses ZipServingArchiveCache = prometheus.NewCounterVec( prometheus.CounterOpts{ - Name: "gitlab_pages_object_storage_zip_archive_cache_hit", + Name: "gitlab_pages_object_storage_zip_archive_cache", Help: "The number of object storage zip archives cache hits", }, []string{"cache"}, |