diff options
author | Kamil Trzciński <ayufan@ayufan.eu> | 2020-09-30 13:46:43 +0300 |
---|---|---|
committer | Kamil Trzciński <ayufan@ayufan.eu> | 2020-10-13 00:13:32 +0300 |
commit | e41a1b9f32072f253d6b13d40c111aa5c42f1899 (patch) | |
tree | 9d7d086acb9c39037a34a34b9aca314543eaf666 | |
parent | 5f461b42e39419795b2669278398a9a7c6ed2cd9 (diff) |
Try to LRU cache offsets and symlinks
-rw-r--r-- | go.mod | 2 | ||||
-rw-r--r-- | go.sum | 8 | ||||
-rw-r--r-- | internal/vfs/zip/archive.go | 56 | ||||
-rw-r--r-- | internal/vfs/zip/archive_test.go | 9 | ||||
-rw-r--r-- | internal/vfs/zip/vfs.go | 13 |
5 files changed, 65 insertions, 23 deletions
@@ -12,6 +12,8 @@ require ( github.com/gorilla/securecookie v1.1.1 github.com/gorilla/sessions v1.2.0 github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 + github.com/karlseguin/ccache v2.0.3+incompatible + github.com/karlseguin/ccache/v2 v2.0.6 // indirect github.com/karrick/godirwalk v1.10.12 github.com/kr/text v0.2.0 // indirect github.com/namsral/flag v1.7.4-pre @@ -174,6 +174,12 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88/go.mod h1:3w7q1U84EfirKl04SVQ/s7nPm1ZPhiXd34z40TNz36k= github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 h1:iQTw/8FWTuc7uiaSepXwyf3o52HaUYcV+Tu66S3F5GA= github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8= +github.com/karlseguin/ccache v2.0.3+incompatible h1:j68C9tWOROiOLWTS/kCGg9IcJG+ACqn5+0+t8Oh83UU= +github.com/karlseguin/ccache v2.0.3+incompatible/go.mod h1:CM9tNPzT6EdRh14+jiW8mEF9mkNZuuE51qmgGYUB93w= +github.com/karlseguin/ccache/v2 v2.0.6 h1:jFCLz4bF4EPfuCcvESAgYNClkEb31LV3WzyOwLlFz7w= +github.com/karlseguin/ccache/v2 v2.0.6/go.mod h1:2BDThcfQMf/c0jnZowt16eW405XIqZPavt+HoYEtcxQ= +github.com/karlseguin/expect v1.0.2-0.20190806010014-778a5f0c6003 h1:vJ0Snvo+SLMY72r5J4sEfkuE7AFbixEP2qRbEcum/wA= +github.com/karlseguin/expect v1.0.2-0.20190806010014-778a5f0c6003/go.mod h1:zNBxMY8P21owkeogJELCLeHIt+voOSduHYTFUbwRAV8= github.com/karrick/godirwalk v1.10.12 h1:BqUm+LuJcXjGv1d2mj3gBiQyrQ57a0rYoAmhvJQ7RDU= github.com/karrick/godirwalk v1.10.12/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA= github.com/kataras/golog v0.0.9/go.mod h1:12HJgwBIZFNGL0EJnMRhmvGA0PQGx8VFwrZtM4CqbAk= @@ -325,6 +331,8 @@ github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPU github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= github.com/wadey/gocovmerge v0.0.0-20160331181800-b5bfa59ec0ad h1:W0LEBv82YCGEtcmPA3uNZBI33/qF//HAAs3MawDjRa0= github.com/wadey/gocovmerge v0.0.0-20160331181800-b5bfa59ec0ad/go.mod h1:Hy8o65+MXnS6EwGElrSRjUzQDLXreJlzYLlWiHtt8hM= +github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0 h1:3UeQBvD0TFrlVjOeLOBz+CPAI8dnbqNSVwUwRrkp7vQ= +github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0/go.mod h1:IXCdmsXIht47RaVFLEdVnh1t+pgYtTAhQGj73kz+2DM= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= diff --git a/internal/vfs/zip/archive.go b/internal/vfs/zip/archive.go index 5fc55b0d..158fb87b 100644 --- a/internal/vfs/zip/archive.go +++ b/internal/vfs/zip/archive.go @@ -8,8 +8,10 @@ import ( "io" "os" "path/filepath" + "strconv" "strings" "sync" + "sync/atomic" "time" log "github.com/sirupsen/logrus" @@ -24,7 +26,9 @@ const ( maxSymlinkSize = 256 // DefaultOpenTimeout to request an archive and read its contents the first time - DefaultOpenTimeout = 30 * time.Second + DefaultOpenTimeout = 30 * time.Second + DataOffsetCacheInterval = 60 * time.Second + ReadLinkCacheInterval = 60 * time.Second ) var ( @@ -36,11 +40,15 @@ var ( // It represents a zip archive saving all its files in memory. // It holds an httprange.Resource that can be read with httprange.RangedReader in chunks. type zipArchive struct { + fs *zipVFS + path string once sync.Once done chan struct{} openTimeout time.Duration + cacheKey string + resource *httprange.Resource reader *httprange.RangedReader archive *zip.Reader @@ -50,12 +58,14 @@ type zipArchive struct { files map[string]*zip.File } -func newArchive(path string, openTimeout time.Duration) *zipArchive { +func newArchive(fs *zipVFS, path string, openTimeout time.Duration) *zipArchive { return &zipArchive{ + fs: fs, path: path, done: make(chan struct{}), files: make(map[string]*zip.File), openTimeout: openTimeout, + cacheKey: strconv.FormatInt(atomic.AddInt64(&fs.archiveCount, 1), 10) + ":", } } @@ -158,12 +168,15 @@ func (a *zipArchive) Open(ctx context.Context, name string) (vfs.File, error) { return nil, os.ErrNotExist } - // TODO: cache dataOffsets of files https://gitlab.com/gitlab-org/gitlab-pages/-/issues/461 - dataOffset, err := file.DataOffset() + item, err := a.fs.dataOffsetCache.Fetch(a.cacheKey+":"+name, DataOffsetCacheInterval, func() (interface{}, error) { + return file.DataOffset() + }) if err != nil { return nil, err } + dataOffset := item.Value().(int64) + // only read from dataOffset up to the size of the compressed file reader := a.reader.SectionReader(ctx, dataOffset, int64(file.CompressedSize64)) @@ -198,28 +211,37 @@ func (a *zipArchive) Readlink(ctx context.Context, name string) (string, error) return "", errNotSymlink } - rc, err := file.Open() - if err != nil { - return "", err - } - defer rc.Close() + item, err := a.fs.readlinkCache.Fetch(a.cacheKey+":"+name, ReadLinkCacheInterval, func() (interface{}, error) { + rc, err := file.Open() + if err != nil { + return nil, err + } + defer rc.Close() + + var symlink [maxSymlinkSize + 1]byte - symlink := make([]byte, maxSymlinkSize+1) + // read up to len(symlink) bytes from the link file + n, err := io.ReadFull(rc, symlink[:]) + if err != nil && err != io.ErrUnexpectedEOF { + // if err == io.ErrUnexpectedEOF the link is smaller than len(symlink) so it's OK to not return it + return nil, err + } - // read up to len(symlink) bytes from the link file - n, err := io.ReadFull(rc, symlink) - if err != nil && err != io.ErrUnexpectedEOF { - // if err == io.ErrUnexpectedEOF the link is smaller than len(symlink) so it's OK to not return it + // cache symlink up to desired size + return string(symlink[:n]), nil + }) + if err != nil { return "", err } + symlink := item.Value().(string) + // return errSymlinkSize if the number of bytes read from the link is too big - if n > maxSymlinkSize { + if len(symlink) > maxSymlinkSize { return "", errSymlinkSize } - // only return the n bytes read from the link - return string(symlink[:n]), nil + return symlink, nil } // onEvicted called by the zipVFS.cache when an archive is removed from the cache diff --git a/internal/vfs/zip/archive_test.go b/internal/vfs/zip/archive_test.go index bb094038..d778eefb 100644 --- a/internal/vfs/zip/archive_test.go +++ b/internal/vfs/zip/archive_test.go @@ -181,7 +181,8 @@ func TestArchiveCanBeReadAfterOpenCtxCanceled(t *testing.T) { testServerURL, cleanup := newZipFileServerURL(t, "group/zip.gitlab.io/public.zip") defer cleanup() - zip := newArchive(testServerURL+"/public.zip", time.Second) + fs := New().(*zipVFS) + zip := newArchive(fs, testServerURL+"/public.zip", time.Second) ctx, cancel := context.WithCancel(context.Background()) cancel() @@ -203,7 +204,8 @@ func TestReadArchiveFails(t *testing.T) { testServerURL, cleanup := newZipFileServerURL(t, "group/zip.gitlab.io/public.zip") defer cleanup() - zip := newArchive(testServerURL+"/unkown.html", time.Second) + fs := New().(*zipVFS) + zip := newArchive(fs, testServerURL+"/unkown.html", time.Second) err := zip.openArchive(context.Background()) require.Error(t, err) @@ -218,7 +220,8 @@ func openZipArchive(t *testing.T) (*zipArchive, func()) { testServerURL, cleanup := newZipFileServerURL(t, "group/zip.gitlab.io/public.zip") - zip := newArchive(testServerURL+"/public.zip", time.Second) + fs := New().(*zipVFS) + zip := newArchive(fs, testServerURL+"/public.zip", time.Second) err := zip.openArchive(context.Background()) require.NoError(t, err) diff --git a/internal/vfs/zip/vfs.go b/internal/vfs/zip/vfs.go index fd0855f7..a99b8771 100644 --- a/internal/vfs/zip/vfs.go +++ b/internal/vfs/zip/vfs.go @@ -6,6 +6,7 @@ import ( "net/url" "time" + "github.com/karlseguin/ccache" "github.com/patrickmn/go-cache" "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" @@ -25,14 +26,20 @@ var ( // zipVFS is a simple cached implementation of the vfs.VFS interface type zipVFS struct { - cache *cache.Cache + cache *cache.Cache + dataOffsetCache *ccache.Cache + readlinkCache *ccache.Cache + + archiveCount int64 } // New creates a zipVFS instance that can be used by a serving request func New() vfs.VFS { zipVFS := &zipVFS{ // TODO: add cache operation callbacks https://gitlab.com/gitlab-org/gitlab-pages/-/issues/465 - cache: cache.New(defaultCacheExpirationInterval, defaultCacheCleanupInterval), + cache: cache.New(defaultCacheExpirationInterval, defaultCacheCleanupInterval), + dataOffsetCache: ccache.New(ccache.Configure().MaxSize(10000).ItemsToPrune(2000)), + readlinkCache: ccache.New(ccache.Configure().MaxSize(1000).ItemsToPrune(2000)), } zipVFS.cache.OnEvicted(func(s string, i interface{}) { @@ -86,7 +93,7 @@ func (fs *zipVFS) findOrOpenArchive(ctx context.Context, path string) (*zipArchi fs.cache.SetDefault(path, archive) } } else { - archive = newArchive(path, DefaultOpenTimeout) + archive = newArchive(fs, path, DefaultOpenTimeout) // if adding the archive to the cache fails it means it's already been added before // this is done to find concurrent additions. |