From ecfdfa7f25f82c11870a0c83e9fd7be0fc80ea00 Mon Sep 17 00:00:00 2001 From: feistel <6742251-feistel@users.noreply.gitlab.com> Date: Sat, 29 Jan 2022 19:17:30 +0100 Subject: fix: do not cache responses if there is a ctx error If the retriever timed out or there was a ctx/temporary error we just trigger a refresh on future requests Changelog: fixed --- internal/source/gitlab/cache/cache.go | 24 ++++++++++++++++++++---- internal/source/gitlab/cache/entry.go | 16 ++++++++++++++-- internal/source/gitlab/cache/retriever.go | 13 ++++--------- 3 files changed, 38 insertions(+), 15 deletions(-) (limited to 'internal') diff --git a/internal/source/gitlab/cache/cache.go b/internal/source/gitlab/cache/cache.go index 473c464c..84c35a62 100644 --- a/internal/source/gitlab/cache/cache.go +++ b/internal/source/gitlab/cache/cache.go @@ -4,6 +4,8 @@ import ( "context" "fmt" + "gitlab.com/gitlab-org/labkit/correlation" + "gitlab.com/gitlab-org/gitlab-pages/internal/config" "gitlab.com/gitlab-org/gitlab-pages/internal/source/gitlab/api" "gitlab.com/gitlab-org/gitlab-pages/metrics" @@ -92,15 +94,29 @@ func (c *Cache) Resolve(ctx context.Context, domain string) *api.Lookup { return c.retrieve(ctx, entry) } -func (c *Cache) retrieve(ctx context.Context, entry *Entry) *api.Lookup { +func (c *Cache) retrieve(originalCtx context.Context, entry *Entry) *api.Lookup { // We run the code within an additional func() to run both `e.setResponse` // and `c.retriever.Retrieve` asynchronously. - entry.retrieve.Do(func() { go func() { entry.setResponse(c.retriever.Retrieve(ctx, entry.domain)) }() }) + // We are using a sync.Once so this assumes that setResponse is always called + // the first (and only) time f is called, otherwise future requests will hang. + entry.retrieve.Do(func() { + // forward correlation_id from originalCtx to the new independent context + correlationID := correlation.ExtractFromContext(originalCtx) + retrieverCtx := correlation.ContextWithCorrelation(context.Background(), correlationID) + + retrieverCtx, cancel := context.WithTimeout(retrieverCtx, c.retriever.retrievalTimeout) + + go func() { + l := c.retriever.Retrieve(retrieverCtx, entry.domain) + entry.setResponse(l) + cancel() + }() + }) var lookup *api.Lookup select { - case <-ctx.Done(): - lookup = &api.Lookup{Name: entry.domain, Error: fmt.Errorf("context done: %w", ctx.Err())} + case <-originalCtx.Done(): + lookup = &api.Lookup{Name: entry.domain, Error: fmt.Errorf("original context done: %w", originalCtx.Err())} case <-entry.retrieved: lookup = entry.Lookup() } diff --git a/internal/source/gitlab/cache/entry.go b/internal/source/gitlab/cache/entry.go index 742df9c8..1c752913 100644 --- a/internal/source/gitlab/cache/entry.go +++ b/internal/source/gitlab/cache/entry.go @@ -1,7 +1,9 @@ package cache import ( + "context" "errors" + "net" "os" "sync" "time" @@ -45,7 +47,7 @@ func (e *Entry) IsUpToDate() bool { e.mux.RLock() defer e.mux.RUnlock() - return e.isResolved() && !e.isOutdated() + return e.isResolved() && !e.isOutdated() && !e.timedOut() } // NeedsRefresh return true if the entry has been resolved correctly but it has @@ -54,7 +56,7 @@ func (e *Entry) NeedsRefresh() bool { e.mux.RLock() defer e.mux.RUnlock() - return e.isResolved() && e.isOutdated() + return e.isResolved() && (e.isOutdated() || e.timedOut()) } // Lookup returns a retriever Lookup response. @@ -97,6 +99,16 @@ func (e *Entry) domainExists() bool { return !errors.Is(e.response.Error, domain.ErrDomainDoesNotExist) } +func (e *Entry) timedOut() bool { + err := e.response.Error + var neterr net.Error + if ok := errors.As(err, &neterr); ok && (neterr.Timeout() || neterr.Temporary()) { + return true + } + + return errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) +} + // hasTemporaryError checks currently refreshed entry for errors after resolving the lookup again // and is different to domain.ErrDomainDoesNotExist (this is an edge case to prevent serving // a page right after being deleted). diff --git a/internal/source/gitlab/cache/retriever.go b/internal/source/gitlab/cache/retriever.go index 68299f6c..d6dd0924 100644 --- a/internal/source/gitlab/cache/retriever.go +++ b/internal/source/gitlab/cache/retriever.go @@ -35,20 +35,15 @@ func NewRetriever(client api.Client, retrievalTimeout, maxRetrievalInterval time // Retrieve retrieves a lookup response from external source with timeout and // backoff. It has its own context with timeout. -func (r *Retriever) Retrieve(originalCtx context.Context, domain string) (lookup api.Lookup) { - logMsg := "" +func (r *Retriever) Retrieve(ctx context.Context, domain string) (lookup api.Lookup) { + var logMsg string - // forward correlation_id from originalCtx to the new independent context - correlationID := correlation.ExtractFromContext(originalCtx) - ctx := correlation.ContextWithCorrelation(context.Background(), correlationID) - - ctx, cancel := context.WithTimeout(ctx, r.retrievalTimeout) - defer cancel() + correlationID := correlation.ExtractFromContext(ctx) select { case <-ctx.Done(): logMsg = "retrieval context done" - lookup = api.Lookup{Error: fmt.Errorf(logMsg+": %w", ctx.Err())} + lookup = api.Lookup{Name: domain, Error: fmt.Errorf(logMsg+": %w", ctx.Err())} case lookup = <-r.resolveWithBackoff(ctx, domain): logMsg = "retrieval response sent" } -- cgit v1.2.3