1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
package zip
import (
"context"
"errors"
"net/url"
"time"
"github.com/patrickmn/go-cache"
"gitlab.com/gitlab-org/gitlab-pages/internal/vfs"
"gitlab.com/gitlab-org/gitlab-pages/metrics"
)
const (
// TODO: make these configurable https://gitlab.com/gitlab-org/gitlab-pages/-/issues/464
defaultCacheExpirationInterval = time.Minute
defaultCacheCleanupInterval = time.Minute / 2
defaultCacheRefreshInterval = time.Minute / 2
// we assume that each item costs around 100 bytes
// this gives around 5MB of raw memory needed without acceleration structures
defaultDataOffsetItems = 50000
defaultDataOffsetExpirationInterval = time.Hour
// we assume that each item costs around 200 bytes
// this gives around 2MB of raw memory needed without acceleration structures
defaultReadlinkItems = 10000
defaultReadlinkExpirationInterval = time.Hour
)
var (
errAlreadyCached = errors.New("archive already cached")
)
// zipVFS is a simple cached implementation of the vfs.VFS interface
type zipVFS struct {
cache *cache.Cache
dataOffsetCache *lruCache
readlinkCache *lruCache
archiveCount int64
}
// New creates a zipVFS instance that can be used by a serving request
func New() vfs.VFS {
zipVFS := &zipVFS{
cache: cache.New(defaultCacheExpirationInterval, defaultCacheCleanupInterval),
dataOffsetCache: newLruCache("data-offset", defaultDataOffsetItems, defaultDataOffsetExpirationInterval),
readlinkCache: newLruCache("readlink", defaultReadlinkItems, defaultReadlinkExpirationInterval),
}
zipVFS.cache.OnEvicted(func(s string, i interface{}) {
metrics.ZipCachedEntries.WithLabelValues("archive").Dec()
i.(*zipArchive).onEvicted()
})
return zipVFS
}
// Root opens an archive given a URL path and returns an instance of zipArchive
// that implements the vfs.VFS interface.
// To avoid using locks, the findOrOpenArchive function runs inside of a for
// loop until an archive is either found or created and saved.
// If findOrOpenArchive returns errAlreadyCached, the for loop will continue
// to try and find the cached archive or return if there's an error, for example
// if the context is canceled.
func (fs *zipVFS) Root(ctx context.Context, path string) (vfs.Root, error) {
urlPath, err := url.Parse(path)
if err != nil {
return nil, err
}
// we do it in loop to not use any additional locks
for {
root, err := fs.findOrOpenArchive(ctx, urlPath.String())
if err == errAlreadyCached {
continue
}
return root, err
}
}
func (fs *zipVFS) Name() string {
return "zip"
}
// findOrOpenArchive if found in fs.cache refresh if needed and return it.
// otherwise open the archive and try to save it, if saving fails it's because
// the archive has already been cached (e.g. by another concurrent request)
func (fs *zipVFS) findOrOpenArchive(ctx context.Context, path string) (*zipArchive, error) {
archive, expiry, found := fs.cache.GetWithExpiration(path)
if found {
metrics.ZipCacheRequests.WithLabelValues("archive", "hit").Inc()
// TODO: do not refreshed errored archives https://gitlab.com/gitlab-org/gitlab-pages/-/merge_requests/351
if time.Until(expiry) < defaultCacheRefreshInterval {
// refresh item
fs.cache.SetDefault(path, archive)
}
} else {
archive = newArchive(fs, path, DefaultOpenTimeout)
// if adding the archive to the cache fails it means it's already been added before
// this is done to find concurrent additions.
if fs.cache.Add(path, archive, cache.DefaultExpiration) != nil {
return nil, errAlreadyCached
}
metrics.ZipCacheRequests.WithLabelValues("archive", "miss").Inc()
metrics.ZipCachedEntries.WithLabelValues("archive").Inc()
}
zipArchive := archive.(*zipArchive)
err := zipArchive.openArchive(ctx)
if err != nil {
return nil, err
}
return zipArchive, nil
}
|