diff options
author | Vladimir Shushlin <vshushlin@gitlab.com> | 2020-09-21 14:00:00 +0300 |
---|---|---|
committer | Vladimir Shushlin <vshushlin@gitlab.com> | 2020-09-21 14:00:00 +0300 |
commit | 78a56d61856a6d499225dfabdfb3ff1d27eeb8d0 (patch) | |
tree | 6a8e354a80a1b6786b2b05ebe23f63bc3811e313 | |
parent | b58d528f947b9f5440e163386a49f4d581290982 (diff) | |
parent | 92bef3ba5a9ebc9aad6abe661c900e13fdd27580 (diff) |
Merge branch '443-add-zip-package-to-vfs' into 'master'
Add zip package to VFS
See merge request gitlab-org/gitlab-pages!348
-rw-r--r-- | internal/httprange/http_reader.go | 4 | ||||
-rw-r--r-- | internal/serving/disk/reader.go | 13 | ||||
-rw-r--r-- | internal/vfs/file.go | 7 | ||||
-rw-r--r-- | internal/vfs/zip/archive.go | 207 | ||||
-rw-r--r-- | internal/vfs/zip/archive_test.go | 252 | ||||
-rw-r--r-- | internal/vfs/zip/deflate_reader.go | 31 | ||||
-rw-r--r-- | shared/pages/group/zip.gitlab.io/public.zip | bin | 0 -> 2415 bytes |
7 files changed, 511 insertions, 3 deletions
diff --git a/internal/httprange/http_reader.go b/internal/httprange/http_reader.go index 474b589f..4e7db6bc 100644 --- a/internal/httprange/http_reader.go +++ b/internal/httprange/http_reader.go @@ -8,6 +8,7 @@ import ( "time" "gitlab.com/gitlab-org/gitlab-pages/internal/httptransport" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" "gitlab.com/gitlab-org/gitlab-pages/metrics" ) @@ -41,6 +42,9 @@ type Reader struct { offset int64 } +// ensure that Reader is seekable +var _ vfs.SeekableFile = &Reader{} + // TODO: make this configurable/take an http client when creating a reader/ranged reader // instead https://gitlab.com/gitlab-org/gitlab-pages/-/issues/457 var httpClient = &http.Client{ diff --git a/internal/serving/disk/reader.go b/internal/serving/disk/reader.go index 350d036b..5b34c556 100644 --- a/internal/serving/disk/reader.go +++ b/internal/serving/disk/reader.go @@ -194,10 +194,19 @@ func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *h return err } + w.Header().Set("Content-Type", contentType) + reader.fileSizeMetric.Observe(float64(fi.Size())) - w.Header().Set("Content-Type", contentType) - http.ServeContent(w, r, origPath, fi.ModTime(), file) + // Support vfs.SeekableFile if available (uncompressed files) + if rs, ok := file.(vfs.SeekableFile); ok { + http.ServeContent(w, r, origPath, fi.ModTime(), rs) + } else { + // compressed files will be served by io.Copy + // TODO: Add extra headers https://gitlab.com/gitlab-org/gitlab-pages/-/issues/466 + w.Header().Set("Content-Length", strconv.FormatInt(fi.Size(), 10)) + io.Copy(w, file) + } return nil } diff --git a/internal/vfs/file.go b/internal/vfs/file.go index 5260c847..47f68308 100644 --- a/internal/vfs/file.go +++ b/internal/vfs/file.go @@ -5,6 +5,11 @@ import "io" // File represents an open file, which will typically be the response body of a Pages request. type File interface { io.Reader - io.Seeker io.Closer } + +// SeekableFile represents a seekable file, which will typically be the response body of a Pages request. +type SeekableFile interface { + File + io.Seeker +} diff --git a/internal/vfs/zip/archive.go b/internal/vfs/zip/archive.go new file mode 100644 index 00000000..a9265b08 --- /dev/null +++ b/internal/vfs/zip/archive.go @@ -0,0 +1,207 @@ +package zip + +import ( + "archive/zip" + "context" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "sync" + "time" + + log "github.com/sirupsen/logrus" + + "gitlab.com/gitlab-org/gitlab-pages/internal/httprange" + "gitlab.com/gitlab-org/gitlab-pages/internal/vfs" +) + +const ( + dirPrefix = "public/" + maxSymlinkSize = 256 + + // DefaultOpenTimeout to request an archive and read its contents the first time + DefaultOpenTimeout = 30 * time.Second +) + +var ( + errNotSymlink = errors.New("not a symlink") + errSymlinkSize = errors.New("symlink too long") +) + +// zipArchive implements the vfs.Root interface. +// It represents a zip archive saving all its files in memory. +// It holds an httprange.Resource that can be read with httprange.RangedReader in chunks. +type zipArchive struct { + path string + once sync.Once + done chan struct{} + openTimeout time.Duration + + resource *httprange.Resource + reader *httprange.RangedReader + archive *zip.Reader + err error + + // TODO: add metrics https://gitlab.com/gitlab-org/gitlab-pages/-/issues/423 + files map[string]*zip.File +} + +func newArchive(path string, openTimeout time.Duration) *zipArchive { + return &zipArchive{ + path: path, + done: make(chan struct{}), + files: make(map[string]*zip.File), + openTimeout: openTimeout, + } +} + +func (a *zipArchive) openArchive(parentCtx context.Context) error { + ctx, cancel := context.WithTimeout(parentCtx, a.openTimeout) + defer cancel() + + a.once.Do(func() { + // read archive once in its own routine with its own timeout + // if parentCtx is canceled, readArchive will continue regardless and will be cached in memory + go a.readArchive() + }) + + // wait for readArchive to be done or return if the parent context is canceled + select { + case <-a.done: + return a.err + case <-ctx.Done(): + err := ctx.Err() + switch err { + case context.Canceled: + log.WithError(err).Traceln("open zip archive request canceled") + case context.DeadlineExceeded: + log.WithError(err).Traceln("open zip archive timed out") + } + + return err + } +} + +// readArchive creates an httprange.Resource that can read the archive's contents and stores a slice of *zip.Files +// that can be accessed later when calling any of th vfs.VFS operations +func (a *zipArchive) readArchive() { + defer close(a.done) + + // readArchive with a timeout separate from openArchive's + ctx, cancel := context.WithTimeout(context.Background(), a.openTimeout) + defer cancel() + + a.resource, a.err = httprange.NewResource(ctx, a.path) + if a.err != nil { + return + } + + // load all archive files into memory using a cached ranged reader + a.reader = httprange.NewRangedReader(a.resource) + a.reader.WithCachedReader(func() { + a.archive, a.err = zip.NewReader(a.reader, a.resource.Size) + }) + + if a.archive == nil { + return + } + + // TODO: Improve preprocessing of zip archives https://gitlab.com/gitlab-org/gitlab-pages/-/issues/432 + for _, file := range a.archive.File { + if !strings.HasPrefix(file.Name, dirPrefix) { + continue + } + a.files[file.Name] = file + } + + // recycle memory + a.archive.File = nil +} + +func (a *zipArchive) findFile(name string) *zip.File { + name = filepath.Join(dirPrefix, name) + + if file := a.files[name]; file != nil { + return file + } + + if dir := a.files[name+"/"]; dir != nil { + return dir + } + + return nil +} + +// Open finds the file by name inside the zipArchive and returns a reader that can be served by the VFS +func (a *zipArchive) Open(ctx context.Context, name string) (vfs.File, error) { + file := a.findFile(name) + if file == nil { + return nil, os.ErrNotExist + } + + // TODO: cache dataOffsets of files https://gitlab.com/gitlab-org/gitlab-pages/-/issues/461 + dataOffset, err := file.DataOffset() + if err != nil { + return nil, err + } + + // only read from dataOffset up to the size of the compressed file + reader := a.reader.SectionReader(dataOffset, int64(file.CompressedSize64)) + + switch file.Method { + case zip.Deflate: + return newDeflateReader(reader), nil + case zip.Store: + return reader, nil + default: + return nil, fmt.Errorf("unsupported compression method: %x", file.Method) + } +} + +// Lstat finds the file by name inside the zipArchive and returns its FileInfo +func (a *zipArchive) Lstat(ctx context.Context, name string) (os.FileInfo, error) { + file := a.findFile(name) + if file == nil { + return nil, os.ErrNotExist + } + + return file.FileInfo(), nil +} + +// ReadLink finds the file by name inside the zipArchive and returns the contents of the symlink +func (a *zipArchive) Readlink(ctx context.Context, name string) (string, error) { + file := a.findFile(name) + if file == nil { + return "", os.ErrNotExist + } + + if file.FileInfo().Mode()&os.ModeSymlink != os.ModeSymlink { + return "", errNotSymlink + } + + rc, err := file.Open() + if err != nil { + return "", err + } + defer rc.Close() + + symlink := make([]byte, maxSymlinkSize+1) + + // read up to len(symlink) bytes from the link file + n, err := io.ReadFull(rc, symlink) + if err != nil && err != io.ErrUnexpectedEOF { + // if err == io.ErrUnexpectedEOF the link is smaller than len(symlink) so it's OK to not return it + return "", err + } + + // return errSymlinkSize if the number of bytes read from the link is too big + if n > maxSymlinkSize { + return "", errSymlinkSize + } + + // only return the n bytes read from the link + return string(symlink[:n]), nil +} diff --git a/internal/vfs/zip/archive_test.go b/internal/vfs/zip/archive_test.go new file mode 100644 index 00000000..bb094038 --- /dev/null +++ b/internal/vfs/zip/archive_test.go @@ -0,0 +1,252 @@ +package zip + +import ( + "context" + "io/ioutil" + "net/http" + "net/http/httptest" + "os" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "gitlab.com/gitlab-org/gitlab-pages/internal/testhelpers" +) + +var chdirSet = false + +func TestOpen(t *testing.T) { + zip, cleanup := openZipArchive(t) + defer cleanup() + + tests := map[string]struct { + file string + expectedContent string + expectedErr error + }{ + "file_exists": { + file: "index.html", + expectedContent: "zip.gitlab.io/project/index.html\n", + expectedErr: nil, + }, + "file_exists_in_subdir": { + file: "subdir/hello.html", + expectedContent: "zip.gitlab.io/project/subdir/hello.html\n", + expectedErr: nil, + }, + "file_exists_symlink": { + file: "symlink.html", + expectedContent: "subdir/linked.html", + expectedErr: nil, + }, + "is_dir": { + file: "subdir", + expectedErr: nil, + }, + "file_does_not_exist": { + file: "unknown.html", + expectedErr: os.ErrNotExist, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + f, err := zip.Open(context.Background(), tt.file) + if tt.expectedErr != nil { + require.EqualError(t, err, tt.expectedErr.Error()) + return + } + + require.NoError(t, err) + + if tt.expectedContent == "" { + // cannot ioutil.ReadAll dirs but zip.Open should not fail + return + } + + data, err := ioutil.ReadAll(f) + require.NoError(t, err) + + require.Equal(t, tt.expectedContent, string(data)) + require.NoError(t, f.Close()) + }) + } +} + +func TestLstat(t *testing.T) { + zip, cleanup := openZipArchive(t) + defer cleanup() + + tests := map[string]struct { + file string + isDir bool + isSymlink bool + expectedErr error + }{ + "file_exists": { + file: "index.html", + }, + "file_exists_in_subdir": { + file: "subdir/hello.html", + }, + "file_exists_symlink": { + file: "symlink.html", + isSymlink: true, + }, + "is_dir": { + file: "subdir", + isDir: true, + }, + "file_does_not_exist": { + file: "unknown.html", + expectedErr: os.ErrNotExist, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + fi, err := zip.Lstat(context.Background(), tt.file) + if tt.expectedErr != nil { + require.EqualError(t, err, tt.expectedErr.Error()) + return + } + + require.NoError(t, err) + require.Contains(t, tt.file, fi.Name()) + require.Equal(t, tt.isDir, fi.IsDir()) + require.NotEmpty(t, fi.ModTime()) + + if tt.isDir { + require.Zero(t, fi.Size()) + require.True(t, fi.IsDir()) + return + } + + require.NotZero(t, fi.Size()) + + if tt.isSymlink { + require.NotZero(t, fi.Mode()&os.ModeSymlink) + } else { + require.True(t, fi.Mode().IsRegular()) + } + }) + } +} + +func TestReadLink(t *testing.T) { + zip, cleanup := openZipArchive(t) + defer cleanup() + + tests := map[string]struct { + file string + expectedErr error + }{ + "symlink_success": { + file: "symlink.html", + }, + "file": { + file: "index.html", + expectedErr: errNotSymlink, + }, + "dir": { + file: "subdir", + expectedErr: errNotSymlink, + }, + "symlink_too_big": { + file: "bad_symlink.html", + expectedErr: errSymlinkSize, + }, + "file_does_not_exist": { + file: "unknown.html", + expectedErr: os.ErrNotExist, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + link, err := zip.Readlink(context.Background(), tt.file) + if tt.expectedErr != nil { + require.EqualError(t, err, tt.expectedErr.Error()) + return + } + + require.NoError(t, err) + require.NotEmpty(t, link) + }) + } +} + +func TestArchiveCanBeReadAfterOpenCtxCanceled(t *testing.T) { + testServerURL, cleanup := newZipFileServerURL(t, "group/zip.gitlab.io/public.zip") + defer cleanup() + + zip := newArchive(testServerURL+"/public.zip", time.Second) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + err := zip.openArchive(ctx) + require.EqualError(t, err, context.Canceled.Error()) + + <-zip.done + + file, err := zip.Open(context.Background(), "index.html") + require.NoError(t, err) + data, err := ioutil.ReadAll(file) + require.NoError(t, err) + + require.Equal(t, "zip.gitlab.io/project/index.html\n", string(data)) + require.NoError(t, file.Close()) +} + +func TestReadArchiveFails(t *testing.T) { + testServerURL, cleanup := newZipFileServerURL(t, "group/zip.gitlab.io/public.zip") + defer cleanup() + + zip := newArchive(testServerURL+"/unkown.html", time.Second) + + err := zip.openArchive(context.Background()) + require.Error(t, err) + require.Contains(t, err.Error(), "Not Found") + + _, err = zip.Open(context.Background(), "index.html") + require.EqualError(t, err, os.ErrNotExist.Error()) +} + +func openZipArchive(t *testing.T) (*zipArchive, func()) { + t.Helper() + + testServerURL, cleanup := newZipFileServerURL(t, "group/zip.gitlab.io/public.zip") + + zip := newArchive(testServerURL+"/public.zip", time.Second) + + err := zip.openArchive(context.Background()) + require.NoError(t, err) + + // public/ public/index.html public/404.html public/symlink.html + // public/subdir/ public/subdir/hello.html public/subdir/linked.html + // public/bad_symlink.html public/subdir/2bp3Qzs... + require.NotZero(t, zip.files) + + return zip, func() { + cleanup() + } +} + +func newZipFileServerURL(t *testing.T, zipFilePath string) (string, func()) { + t.Helper() + + chdir := testhelpers.ChdirInPath(t, "../../../shared/pages", &chdirSet) + + m := http.NewServeMux() + m.HandleFunc("/public.zip", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.ServeFile(w, r, zipFilePath) + })) + + testServer := httptest.NewServer(m) + + return testServer.URL, func() { + chdir() + testServer.Close() + } +} diff --git a/internal/vfs/zip/deflate_reader.go b/internal/vfs/zip/deflate_reader.go new file mode 100644 index 00000000..16a2d72e --- /dev/null +++ b/internal/vfs/zip/deflate_reader.go @@ -0,0 +1,31 @@ +package zip + +import ( + "compress/flate" + "io" +) + +// deflateReader wrapper to support reading compressed files. +// Implements the io.ReadCloser interface. +type deflateReader struct { + reader io.ReadCloser + flateReader io.ReadCloser +} + +// Read from flateReader +func (r *deflateReader) Read(p []byte) (n int, err error) { + return r.flateReader.Read(p) +} + +// Close all readers +func (r *deflateReader) Close() error { + r.reader.Close() + return r.flateReader.Close() +} + +func newDeflateReader(r io.ReadCloser) *deflateReader { + return &deflateReader{ + reader: r, + flateReader: flate.NewReader(r), + } +} diff --git a/shared/pages/group/zip.gitlab.io/public.zip b/shared/pages/group/zip.gitlab.io/public.zip Binary files differnew file mode 100644 index 00000000..f1278bce --- /dev/null +++ b/shared/pages/group/zip.gitlab.io/public.zip |