diff options
author | Jaime Martinez <jmartinez@gitlab.com> | 2021-11-09 12:25:23 +0300 |
---|---|---|
committer | Jaime Martinez <jmartinez@gitlab.com> | 2021-11-09 12:25:23 +0300 |
commit | dccd3723bd00163ba897b2f1b6f280a23a9a1424 (patch) | |
tree | 743ebb9786eb9107a0cd93677ea2f31277afe46c | |
parent | aa897ce9849d35cd7ff1121351f1033e91d0c062 (diff) |
feat: add more zip metricsadd-more-zip-metrics
and log when opening an archive takes longer than expected.
Related to https://gitlab.com/gitlab-org/gitlab/-/issues/344004
Changelog: added
-rw-r--r-- | internal/vfs/zip/archive.go | 52 | ||||
-rw-r--r-- | metrics/metrics.go | 24 | ||||
-rw-r--r-- | test/acceptance/metrics_test.go | 2 |
3 files changed, 71 insertions, 7 deletions
diff --git a/internal/vfs/zip/archive.go b/internal/vfs/zip/archive.go index 588fb76d..d9b33911 100644 --- a/internal/vfs/zip/archive.go +++ b/internal/vfs/zip/archive.go @@ -14,6 +14,7 @@ import ( "sync/atomic" "time" + "github.com/sirupsen/logrus" "gitlab.com/gitlab-org/labkit/log" "gitlab.com/gitlab-org/gitlab-pages/internal/httprange" @@ -90,7 +91,7 @@ func (a *zipArchive) openArchive(parentCtx context.Context, url string) (err err a.once.Do(func() { // read archive once in its own routine with its own timeout // if parentCtx is canceled, readArchive will continue regardless and will be cached in memory - go a.readArchive(url) + go a.readArchive(parentCtx, url) }) // wait for readArchive to be done or return if the parent context is canceled @@ -112,8 +113,20 @@ func (a *zipArchive) openArchive(parentCtx context.Context, url string) (err err // readArchive creates an httprange.Resource that can read the archive's contents and stores a slice of *zip.Files // that can be accessed later when calling any of th vfs.VFS operations -func (a *zipArchive) readArchive(url string) { - defer close(a.done) +func (a *zipArchive) readArchive(parentCtx context.Context, url string) { + start := time.Now() + defer func(start time.Time) { + close(a.done) + + if time.Since(start) > time.Second { + log.ContextLogger(parentCtx). + WithFields(logrus.Fields{ + "read_archive_duration": time.Since(start), + "url": url, + // TODO: adding more fields field would require some extra refactoring + }).Warn("reading archive took longer than 1s") + } + }(start) // readArchive with a timeout separate from openArchive's ctx, cancel := context.WithTimeout(context.Background(), a.openTimeout) @@ -158,6 +171,7 @@ func (a *zipArchive) readArchive(url string) { metrics.ZipOpened.WithLabelValues("ok").Inc() metrics.ZipOpenedEntriesCount.Add(fileCount) metrics.ZipArchiveEntriesCached.Add(fileCount) + metrics.ZipReadArchiveHeaderDuration.Observe(time.Since(start).Seconds()) } // addPathDirectory adds a directory for a given path @@ -190,7 +204,12 @@ func (a *zipArchive) findDirectory(name string) *zip.FileHeader { } // Open finds the file by name inside the zipArchive and returns a reader that can be served by the VFS -func (a *zipArchive) Open(ctx context.Context, name string) (vfs.File, error) { +func (a *zipArchive) Open(ctx context.Context, name string) (f vfs.File, err error) { + start := time.Now() + defer func(start time.Time, err error) { + reportOpenFileDuration(start, "Open", err) + }(start, err) + file := a.findFile(name) if file == nil { if a.findDirectory(name) != nil { @@ -224,7 +243,12 @@ func (a *zipArchive) Open(ctx context.Context, name string) (vfs.File, error) { } // Lstat finds the file by name inside the zipArchive and returns its FileInfo -func (a *zipArchive) Lstat(ctx context.Context, name string) (os.FileInfo, error) { +func (a *zipArchive) Lstat(ctx context.Context, name string) (fi os.FileInfo, err error) { + start := time.Now() + defer func(start time.Time, err error) { + reportOpenFileDuration(start, "Lstat", err) + }(start, err) + file := a.findFile(name) if file != nil { return file.FileInfo(), nil @@ -238,8 +262,13 @@ func (a *zipArchive) Lstat(ctx context.Context, name string) (os.FileInfo, error return nil, os.ErrNotExist } -// ReadLink finds the file by name inside the zipArchive and returns the contents of the symlink -func (a *zipArchive) Readlink(ctx context.Context, name string) (string, error) { +// Readlink finds the file by name inside the zipArchive and returns the contents of the symlink +func (a *zipArchive) Readlink(ctx context.Context, name string) (f string, err error) { + start := time.Now() + defer func(start time.Time, err error) { + reportOpenFileDuration(start, "Readlink", err) + }(start, err) + file := a.findFile(name) if file == nil { if a.findDirectory(name) != nil { @@ -306,3 +335,12 @@ func (a *zipArchive) openStatus() (archiveStatus, error) { return archiveOpening, nil } } + +func reportOpenFileDuration(start time.Time, op string, err error) { + status := "ok" + if err != nil { + status = "error" + } + + metrics.ZipOpenFileDuration.WithLabelValues(status, op).Observe(time.Since(start).Seconds()) +} diff --git a/metrics/metrics.go b/metrics/metrics.go index 680f7a05..44164126 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -121,6 +121,16 @@ var ( []string{"state"}, ) + // ZipReadArchiveHeaderDuration is the time it takes to read an archive's header information into memory + ZipReadArchiveHeaderDuration = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Name: "gitlab_pages_zip_read_archive_header_duration", + Help: "The time it takes to read an archive's header information into memory", + Buckets: []float64{0.001, 0.005, 0.01, 0.02, 0.05, 0.100, 0.250, + 0.500, 1, 2, 5, 10, 20, 50}, + }, + ) + // ZipCacheRequests is the number of cache hits/misses ZipCacheRequests = prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -157,6 +167,18 @@ var ( }, ) + // ZipOpenFileDuration is the time it takes to get a file from Object Storage by + // operation Open, Lstat or Readlink + ZipOpenFileDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "gitlab_pages_zip_open_file_duration", + Help: "The time it takes to open a file from Object Storage before is served", + Buckets: []float64{0.001, 0.005, 0.01, 0.02, 0.05, 0.100, 0.250, + 0.500, 1, 2, 5, 10, 20, 50}, + }, + []string{"status", "op"}, + ) + RejectedRequestsCount = prometheus.NewCounter( prometheus.CounterOpts{ Name: "gitlab_pages_unknown_method_rejected_requests", @@ -239,10 +261,12 @@ func MustRegister() { HTTPRangeTraceDuration, HTTPRangeOpenRequests, ZipOpened, + ZipReadArchiveHeaderDuration, ZipOpenedEntriesCount, ZipCacheRequests, ZipArchiveEntriesCached, ZipCachedEntries, + ZipOpenFileDuration, RejectedRequestsCount, LimitListenerMaxConns, LimitListenerConcurrentConns, diff --git a/test/acceptance/metrics_test.go b/test/acceptance/metrics_test.go index cb4262fb..45e91875 100644 --- a/test/acceptance/metrics_test.go +++ b/test/acceptance/metrics_test.go @@ -48,10 +48,12 @@ func TestPrometheusMetricsCanBeScraped(t *testing.T) { require.Contains(t, string(body), "gitlab_pages_httprange_open_requests") // zip archives require.Contains(t, string(body), "gitlab_pages_zip_opened") + require.Contains(t, string(body), "gitlab_pages_zip_read_archive_header_duration") require.Contains(t, string(body), "gitlab_pages_zip_cache_requests") require.Contains(t, string(body), "gitlab_pages_zip_cached_entries") require.Contains(t, string(body), "gitlab_pages_zip_archive_entries_cached") require.Contains(t, string(body), "gitlab_pages_zip_opened_entries_count") + require.Contains(t, string(body), "gitlab_pages_zip_open_file_duration") // limit_listener require.Contains(t, string(body), "gitlab_pages_limit_listener_max_conns") require.Contains(t, string(body), "gitlab_pages_limit_listener_concurrent_conns") |