diff options
author | Jaime Martinez <jmartinez@gitlab.com> | 2021-02-16 09:47:27 +0300 |
---|---|---|
committer | Jaime Martinez <jmartinez@gitlab.com> | 2021-03-18 03:20:16 +0300 |
commit | ed2b1232009f50b54278cadae8c1a3659539fa4c (patch) | |
tree | 740436867af5dd6d7a12585bbd9114b839795a65 /internal/source | |
parent | 4e9019261bb583619eadae7275aaafc11a7c95c1 (diff) |
Handle errored lookup refreshes correctly
Diffstat (limited to 'internal/source')
-rw-r--r-- | internal/source/gitlab/cache/cache.go | 4 | ||||
-rw-r--r-- | internal/source/gitlab/cache/entry.go | 45 | ||||
-rw-r--r-- | internal/source/gitlab/cache/entry_test.go | 131 | ||||
-rw-r--r-- | internal/source/gitlab/cache/memstore.go | 4 |
4 files changed, 168 insertions, 16 deletions
diff --git a/internal/source/gitlab/cache/cache.go b/internal/source/gitlab/cache/cache.go index 77291033..71b8e745 100644 --- a/internal/source/gitlab/cache/cache.go +++ b/internal/source/gitlab/cache/cache.go @@ -80,14 +80,14 @@ func (c *Cache) Resolve(ctx context.Context, domain string) *api.Lookup { } if entry.NeedsRefresh() { - entry.Refresh(c.client, c.store) + entry.Refresh(c.store) metrics.DomainsSourceCacheHit.Inc() return entry.Lookup() } metrics.DomainsSourceCacheMiss.Inc() - return entry.Retrieve(ctx, c.client) + return entry.Retrieve(ctx) } // Status calls the client Status to check connectivity with the API diff --git a/internal/source/gitlab/cache/entry.go b/internal/source/gitlab/cache/entry.go index 4ea25485..bf6866bd 100644 --- a/internal/source/gitlab/cache/entry.go +++ b/internal/source/gitlab/cache/entry.go @@ -6,6 +6,7 @@ import ( "sync" "time" + "gitlab.com/gitlab-org/gitlab-pages/internal/domain" "gitlab.com/gitlab-org/gitlab-pages/internal/source/gitlab/api" ) @@ -21,10 +22,11 @@ type Entry struct { retrieved chan struct{} response *api.Lookup refreshTimeout time.Duration + maxDuration time.Duration retriever *Retriever } -func newCacheEntry(domain string, refreshTimeout time.Duration, retriever *Retriever) *Entry { +func newCacheEntry(domain string, refreshTimeout, maxDuration time.Duration, retriever *Retriever) *Entry { return &Entry{ domain: domain, created: time.Now(), @@ -33,6 +35,7 @@ func newCacheEntry(domain string, refreshTimeout time.Duration, retriever *Retri mux: &sync.RWMutex{}, retrieved: make(chan struct{}), refreshTimeout: refreshTimeout, + maxDuration: maxDuration, retriever: retriever, } } @@ -64,7 +67,7 @@ func (e *Entry) Lookup() *api.Lookup { } // Retrieve perform a blocking retrieval of the cache entry response. -func (e *Entry) Retrieve(ctx context.Context, client api.Client) (lookup *api.Lookup) { +func (e *Entry) Retrieve(ctx context.Context) (lookup *api.Lookup) { // We run the code within an additional func() to run both `e.setResponse` // and `e.retrieve.Retrieve` asynchronously. e.retrieve.Do(func() { go func() { e.setResponse(e.retriever.Retrieve(e.domain)) }() }) @@ -80,20 +83,22 @@ func (e *Entry) Retrieve(ctx context.Context, client api.Client) (lookup *api.Lo } // Refresh will update the entry in the store only when it gets resolved. -func (e *Entry) Refresh(client api.Client, store Store) { +func (e *Entry) Refresh(store Store) { e.refresh.Do(func() { - go func() { - entry := newCacheEntry(e.domain, e.refreshTimeout, e.retriever) + go e.refreshFunc(store) + }) +} - entry.Retrieve(context.Background(), client) +func (e *Entry) refreshFunc(store Store) { + entry := newCacheEntry(e.domain, e.refreshTimeout, e.maxDuration, e.retriever) - if entry.response != nil && entry.response.Error != nil { - entry.response = e.response - } + entry.Retrieve(context.Background()) + if e.reuseEntry(entry) { + entry.response = e.response + entry.created = e.created + } - store.ReplaceOrCreate(e.domain, entry) - }() - }) + store.ReplaceOrCreate(e.domain, entry) } func (e *Entry) setResponse(lookup api.Lookup) { @@ -111,3 +116,19 @@ func (e *Entry) isExpired() bool { func (e *Entry) isResolved() bool { return e.response != nil } + +func (e *Entry) maxTimeInCache() bool { + return time.Since(e.created) > e.maxDuration +} + +// reuseEntry as the refreshed entry when there is an error after resolving the lookup again +// and is different to domain.ErrDomainDoesNotExist. This is an edge case to prevent serving +// a page right after being deleted. +// It should only be refreshed as long as it hasn't passed e.maxDuration. +// See https://gitlab.com/gitlab-org/gitlab-pages/-/issues/281. +func (e *Entry) reuseEntry(entry *Entry) bool { + return entry.response != nil && + entry.response.Error != nil && + !errors.Is(entry.response.Error, domain.ErrDomainDoesNotExist) && + !e.maxTimeInCache() +} diff --git a/internal/source/gitlab/cache/entry_test.go b/internal/source/gitlab/cache/entry_test.go index 36aee522..59183785 100644 --- a/internal/source/gitlab/cache/entry_test.go +++ b/internal/source/gitlab/cache/entry_test.go @@ -1,9 +1,14 @@ package cache import ( + "context" + "errors" + "net/http" "testing" "time" + "gitlab.com/gitlab-org/gitlab-pages/internal/domain" + "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitlab-pages/internal/source/gitlab/api" @@ -49,7 +54,7 @@ func TestIsUpToDateAndNeedsRefresh(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - entry := newCacheEntry("my.gitlab.com", testCacheConfig.EntryRefreshTimeout, nil) + entry := newCacheEntry("my.gitlab.com", testCacheConfig.EntryRefreshTimeout, testCacheConfig.CacheExpiry, nil) if tt.resolved { entry.response = &api.Lookup{} } @@ -62,3 +67,127 @@ func TestIsUpToDateAndNeedsRefresh(t *testing.T) { }) } } + +func TestEntryRefresh(t *testing.T) { + client := &lookupMock{ + successCount: 1, + responses: map[string]api.Lookup{ + "test.gitlab.io": api.Lookup{ + Name: "test.gitlab.io", + Domain: &api.VirtualDomain{ + LookupPaths: nil, + }, + }, + "error.gitlab.io": api.Lookup{ + Name: "error.gitlab.io", + Error: errors.New("something went wrong"), + }, + }, + } + cc := &cacheConfig{ + cacheExpiry: 100 * time.Millisecond, + entryRefreshTimeout: time.Millisecond, + retrievalTimeout: 50 * time.Millisecond, + maxRetrievalInterval: time.Millisecond, + maxRetrievalRetries: 1, + } + + store := newMemStore(client, cc) + + t.Run("entry is the same after refreshed lookup has error", func(t *testing.T) { + entry := newCacheEntry("test.gitlab.io", cc.entryRefreshTimeout, cc.cacheExpiry, store.(*memstore).retriever) + + ctx, cancel := context.WithTimeout(context.Background(), cc.retrievalTimeout) + defer cancel() + + lookup := entry.Retrieve(ctx) + require.NoError(t, lookup.Error) + + require.Eventually(t, entry.NeedsRefresh, 2*cc.entryRefreshTimeout, time.Millisecond, "entry should need refresh") + + entry.refreshFunc(store) + + require.True(t, client.failed, "refresh should have failed") + + storedEntry := loadEntry(t, "test.gitlab.io", store) + + require.NoError(t, storedEntry.Lookup().Error, "resolving failed but lookup should still be valid") + require.Equal(t, storedEntry.created.UnixNano(), entry.created.UnixNano(), "refreshed entry should be the same") + require.Equal(t, storedEntry.Lookup(), entry.Lookup(), "lookup should be the same") + }) + + t.Run("entry is different after expiring", func(t *testing.T) { + client.failed = false + + entry := newCacheEntry("error.gitlab.io", cc.entryRefreshTimeout, cc.cacheExpiry, store.(*memstore).retriever) + + ctx, cancel := context.WithTimeout(context.Background(), cc.retrievalTimeout) + defer cancel() + + lookup := entry.Retrieve(ctx) + require.Error(t, lookup.Error) + require.Eventually(t, entry.NeedsRefresh, 2*cc.entryRefreshTimeout, time.Millisecond, "entry should need refresh") + + // wait for entry to expire + time.Sleep(cc.cacheExpiry) + // refreshing the entry after it has expired should create a completely new one + entry.refreshFunc(store) + + require.True(t, client.failed, "refresh should have failed") + + storedEntry := loadEntry(t, "error.gitlab.io", store) + require.NotEqual(t, storedEntry, entry, "stored entry should be different") + require.Greater(t, storedEntry.created.UnixNano(), entry.created.UnixNano(), "") + }) +} + +func loadEntry(t *testing.T, domain string, store Store) *Entry { + t.Helper() + + m := store.(*memstore) + m.mux.RLock() + i, exists := m.store.Get(domain) + m.mux.RUnlock() + require.True(t, exists) + return i.(*Entry) +} + +type lookupMock struct { + currentCount int + successCount int + failed bool + responses map[string]api.Lookup +} + +func (mm *lookupMock) GetLookup(ctx context.Context, domainName string) api.Lookup { + lookup := api.Lookup{ + Name: domainName, + } + + select { + case <-ctx.Done(): + lookup.Error = ctx.Err() + return lookup + default: + lookup, ok := mm.responses[domainName] + if !ok { + lookup.Error = domain.ErrDomainDoesNotExist + return lookup + } + + // return error after mm.successCount + mm.currentCount++ + if mm.currentCount > mm.successCount { + mm.currentCount = 0 + mm.failed = true + + lookup.Error = http.ErrServerClosed + } + + return lookup + } +} + +func (mm *lookupMock) Status() error { + return nil +} diff --git a/internal/source/gitlab/cache/memstore.go b/internal/source/gitlab/cache/memstore.go index 0bf9bb79..4565590e 100644 --- a/internal/source/gitlab/cache/memstore.go +++ b/internal/source/gitlab/cache/memstore.go @@ -15,6 +15,7 @@ type memstore struct { mux *sync.RWMutex retriever *Retriever entryRefreshTimeout time.Duration + entryMaxDuration time.Duration } func newMemStore(client api.Client, cc *config.Cache) Store { @@ -25,6 +26,7 @@ func newMemStore(client api.Client, cc *config.Cache) Store { mux: &sync.RWMutex{}, retriever: retriever, entryRefreshTimeout: cc.EntryRefreshTimeout, + entryMaxDuration: cc.CacheExpiry, } } @@ -46,7 +48,7 @@ func (m *memstore) LoadOrCreate(domain string) *Entry { return entry.(*Entry) } - newEntry := newCacheEntry(domain, m.entryRefreshTimeout, m.retriever) + newEntry := newCacheEntry(domain, m.entryRefreshTimeout, m.entryMaxDuration, m.retriever) m.store.SetDefault(domain, newEntry) return newEntry |