diff options
author | Vladimir Shushlin <vshushlin@gitlab.com> | 2022-04-14 06:43:40 +0300 |
---|---|---|
committer | Vladimir Shushlin <vshushlin@gitlab.com> | 2022-04-14 06:43:40 +0300 |
commit | 3eed26c829eb421a7b20748ec3c7a2bf2a189c81 (patch) | |
tree | f517433dbf5a550b655dfb8624c7bbd05853e6f5 | |
parent | 189eeedc91148c9c217374d9292bba740a1f801c (diff) | |
parent | c9063fd0613ad4aac56b02f6feb5f2b1d9b019a3 (diff) |
Merge branch 'fix/cache-ctx-err' into 'master'
fix: handle ctx errors during lookups
See merge request gitlab-org/gitlab-pages!677
-rw-r--r-- | internal/source/gitlab/cache/cache.go | 15 | ||||
-rw-r--r-- | internal/source/gitlab/cache/cache_test.go | 46 | ||||
-rw-r--r-- | internal/source/gitlab/cache/entry.go | 16 | ||||
-rw-r--r-- | internal/source/gitlab/cache/retriever.go | 8 |
4 files changed, 63 insertions, 22 deletions
diff --git a/internal/source/gitlab/cache/cache.go b/internal/source/gitlab/cache/cache.go index 473c464c..7dddb176 100644 --- a/internal/source/gitlab/cache/cache.go +++ b/internal/source/gitlab/cache/cache.go @@ -4,6 +4,8 @@ import ( "context" "fmt" + "gitlab.com/gitlab-org/labkit/correlation" + "gitlab.com/gitlab-org/gitlab-pages/internal/config" "gitlab.com/gitlab-org/gitlab-pages/internal/source/gitlab/api" "gitlab.com/gitlab-org/gitlab-pages/metrics" @@ -95,12 +97,21 @@ func (c *Cache) Resolve(ctx context.Context, domain string) *api.Lookup { func (c *Cache) retrieve(ctx context.Context, entry *Entry) *api.Lookup { // We run the code within an additional func() to run both `e.setResponse` // and `c.retriever.Retrieve` asynchronously. - entry.retrieve.Do(func() { go func() { entry.setResponse(c.retriever.Retrieve(ctx, entry.domain)) }() }) + // We are using a sync.Once so this assumes that setResponse is always called + // the first (and only) time f is called, otherwise future requests will hang. + entry.retrieve.Do(func() { + correlationID := correlation.ExtractFromContext(ctx) + + go func() { + l := c.retriever.Retrieve(correlationID, entry.domain) + entry.setResponse(l) + }() + }) var lookup *api.Lookup select { case <-ctx.Done(): - lookup = &api.Lookup{Name: entry.domain, Error: fmt.Errorf("context done: %w", ctx.Err())} + lookup = &api.Lookup{Name: entry.domain, Error: fmt.Errorf("original context done: %w", ctx.Err())} case <-entry.retrieved: lookup = entry.Lookup() } diff --git a/internal/source/gitlab/cache/cache_test.go b/internal/source/gitlab/cache/cache_test.go index 267c9b29..aad357ae 100644 --- a/internal/source/gitlab/cache/cache_test.go +++ b/internal/source/gitlab/cache/cache_test.go @@ -48,16 +48,8 @@ func (c *clientMock) Status() error { } func withTestCache(config resolverConfig, cacheConfig *config.Cache, block func(*Cache, *clientMock)) { - var chanSize int - - if config.buffered { - chanSize = 1 - } else { - chanSize = 0 - } - resolver := &clientMock{ - domain: make(chan string, chanSize), + domain: make(chan string, config.bufferSize), lookups: make(chan uint64, 100), failure: config.failure, } @@ -91,8 +83,8 @@ func (cache *Cache) withTestEntry(config entryConfig, block func(*Entry)) { } type resolverConfig struct { - buffered bool - failure error + bufferSize int + failure error } type entryConfig struct { @@ -102,8 +94,36 @@ type entryConfig struct { } func TestResolve(t *testing.T) { + t.Run("ctx errors should not be cached", func(t *testing.T) { + cc := &config.Cache{ + CacheExpiry: 10 * time.Minute, + CacheCleanupInterval: 10 * time.Minute, + EntryRefreshTimeout: 10 * time.Minute, + RetrievalTimeout: 1 * time.Second, + MaxRetrievalInterval: 50 * time.Millisecond, + MaxRetrievalRetries: 3, + } + + withTestCache(resolverConfig{bufferSize: 1}, cc, func(cache *Cache, resolver *clientMock) { + require.Equal(t, 0, len(resolver.lookups)) + + // wait for retrieval timeout to expire + lookup := cache.Resolve(context.Background(), "foo.gitlab.com") + require.ErrorIs(t, lookup.Error, context.DeadlineExceeded) + require.Equal(t, "foo.gitlab.com", lookup.Name) + + // future lookups should succeed once the entry has been refreshed. + // refresh happens in a separate goroutine so the first few requests might still fail + require.Eventually(t, func() bool { + resolver.domain <- "foo.gitlab.com" + lookup := cache.Resolve(context.Background(), "foo.gitlab.com") + return lookup.Error == nil + }, 1*time.Second, 10*time.Millisecond) + }) + }) + t.Run("when item is not cached", func(t *testing.T) { - withTestCache(resolverConfig{buffered: true}, nil, func(cache *Cache, resolver *clientMock) { + withTestCache(resolverConfig{bufferSize: 1}, nil, func(cache *Cache, resolver *clientMock) { require.Empty(t, resolver.lookups) resolver.domain <- "my.gitlab.com" @@ -170,7 +190,7 @@ func TestResolve(t *testing.T) { }) t.Run("when item is in long cache only", func(t *testing.T) { - withTestCache(resolverConfig{buffered: false}, nil, func(cache *Cache, resolver *clientMock) { + withTestCache(resolverConfig{}, nil, func(cache *Cache, resolver *clientMock) { cache.withTestEntry(entryConfig{expired: true, retrieved: true}, func(*Entry) { lookup := cache.Resolve(context.Background(), "my.gitlab.com") diff --git a/internal/source/gitlab/cache/entry.go b/internal/source/gitlab/cache/entry.go index 742df9c8..1c752913 100644 --- a/internal/source/gitlab/cache/entry.go +++ b/internal/source/gitlab/cache/entry.go @@ -1,7 +1,9 @@ package cache import ( + "context" "errors" + "net" "os" "sync" "time" @@ -45,7 +47,7 @@ func (e *Entry) IsUpToDate() bool { e.mux.RLock() defer e.mux.RUnlock() - return e.isResolved() && !e.isOutdated() + return e.isResolved() && !e.isOutdated() && !e.timedOut() } // NeedsRefresh return true if the entry has been resolved correctly but it has @@ -54,7 +56,7 @@ func (e *Entry) NeedsRefresh() bool { e.mux.RLock() defer e.mux.RUnlock() - return e.isResolved() && e.isOutdated() + return e.isResolved() && (e.isOutdated() || e.timedOut()) } // Lookup returns a retriever Lookup response. @@ -97,6 +99,16 @@ func (e *Entry) domainExists() bool { return !errors.Is(e.response.Error, domain.ErrDomainDoesNotExist) } +func (e *Entry) timedOut() bool { + err := e.response.Error + var neterr net.Error + if ok := errors.As(err, &neterr); ok && (neterr.Timeout() || neterr.Temporary()) { + return true + } + + return errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) +} + // hasTemporaryError checks currently refreshed entry for errors after resolving the lookup again // and is different to domain.ErrDomainDoesNotExist (this is an edge case to prevent serving // a page right after being deleted). diff --git a/internal/source/gitlab/cache/retriever.go b/internal/source/gitlab/cache/retriever.go index 68299f6c..01ce4a16 100644 --- a/internal/source/gitlab/cache/retriever.go +++ b/internal/source/gitlab/cache/retriever.go @@ -35,11 +35,9 @@ func NewRetriever(client api.Client, retrievalTimeout, maxRetrievalInterval time // Retrieve retrieves a lookup response from external source with timeout and // backoff. It has its own context with timeout. -func (r *Retriever) Retrieve(originalCtx context.Context, domain string) (lookup api.Lookup) { - logMsg := "" +func (r *Retriever) Retrieve(correlationID, domain string) (lookup api.Lookup) { + var logMsg string - // forward correlation_id from originalCtx to the new independent context - correlationID := correlation.ExtractFromContext(originalCtx) ctx := correlation.ContextWithCorrelation(context.Background(), correlationID) ctx, cancel := context.WithTimeout(ctx, r.retrievalTimeout) @@ -48,7 +46,7 @@ func (r *Retriever) Retrieve(originalCtx context.Context, domain string) (lookup select { case <-ctx.Done(): logMsg = "retrieval context done" - lookup = api.Lookup{Error: fmt.Errorf(logMsg+": %w", ctx.Err())} + lookup = api.Lookup{Name: domain, Error: fmt.Errorf(logMsg+": %w", ctx.Err())} case lookup = <-r.resolveWithBackoff(ctx, domain): logMsg = "retrieval response sent" } |