Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-pages.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Shushlin <vshushlin@gitlab.com>2020-09-21 14:00:00 +0300
committerVladimir Shushlin <vshushlin@gitlab.com>2020-09-21 14:00:00 +0300
commit78a56d61856a6d499225dfabdfb3ff1d27eeb8d0 (patch)
tree6a8e354a80a1b6786b2b05ebe23f63bc3811e313
parentb58d528f947b9f5440e163386a49f4d581290982 (diff)
parent92bef3ba5a9ebc9aad6abe661c900e13fdd27580 (diff)
Merge branch '443-add-zip-package-to-vfs' into 'master'
Add zip package to VFS See merge request gitlab-org/gitlab-pages!348
-rw-r--r--internal/httprange/http_reader.go4
-rw-r--r--internal/serving/disk/reader.go13
-rw-r--r--internal/vfs/file.go7
-rw-r--r--internal/vfs/zip/archive.go207
-rw-r--r--internal/vfs/zip/archive_test.go252
-rw-r--r--internal/vfs/zip/deflate_reader.go31
-rw-r--r--shared/pages/group/zip.gitlab.io/public.zipbin0 -> 2415 bytes
7 files changed, 511 insertions, 3 deletions
diff --git a/internal/httprange/http_reader.go b/internal/httprange/http_reader.go
index 474b589f..4e7db6bc 100644
--- a/internal/httprange/http_reader.go
+++ b/internal/httprange/http_reader.go
@@ -8,6 +8,7 @@ import (
"time"
"gitlab.com/gitlab-org/gitlab-pages/internal/httptransport"
+ "gitlab.com/gitlab-org/gitlab-pages/internal/vfs"
"gitlab.com/gitlab-org/gitlab-pages/metrics"
)
@@ -41,6 +42,9 @@ type Reader struct {
offset int64
}
+// ensure that Reader is seekable
+var _ vfs.SeekableFile = &Reader{}
+
// TODO: make this configurable/take an http client when creating a reader/ranged reader
// instead https://gitlab.com/gitlab-org/gitlab-pages/-/issues/457
var httpClient = &http.Client{
diff --git a/internal/serving/disk/reader.go b/internal/serving/disk/reader.go
index 350d036b..5b34c556 100644
--- a/internal/serving/disk/reader.go
+++ b/internal/serving/disk/reader.go
@@ -194,10 +194,19 @@ func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *h
return err
}
+ w.Header().Set("Content-Type", contentType)
+
reader.fileSizeMetric.Observe(float64(fi.Size()))
- w.Header().Set("Content-Type", contentType)
- http.ServeContent(w, r, origPath, fi.ModTime(), file)
+ // Support vfs.SeekableFile if available (uncompressed files)
+ if rs, ok := file.(vfs.SeekableFile); ok {
+ http.ServeContent(w, r, origPath, fi.ModTime(), rs)
+ } else {
+ // compressed files will be served by io.Copy
+ // TODO: Add extra headers https://gitlab.com/gitlab-org/gitlab-pages/-/issues/466
+ w.Header().Set("Content-Length", strconv.FormatInt(fi.Size(), 10))
+ io.Copy(w, file)
+ }
return nil
}
diff --git a/internal/vfs/file.go b/internal/vfs/file.go
index 5260c847..47f68308 100644
--- a/internal/vfs/file.go
+++ b/internal/vfs/file.go
@@ -5,6 +5,11 @@ import "io"
// File represents an open file, which will typically be the response body of a Pages request.
type File interface {
io.Reader
- io.Seeker
io.Closer
}
+
+// SeekableFile represents a seekable file, which will typically be the response body of a Pages request.
+type SeekableFile interface {
+ File
+ io.Seeker
+}
diff --git a/internal/vfs/zip/archive.go b/internal/vfs/zip/archive.go
new file mode 100644
index 00000000..a9265b08
--- /dev/null
+++ b/internal/vfs/zip/archive.go
@@ -0,0 +1,207 @@
+package zip
+
+import (
+ "archive/zip"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+ "time"
+
+ log "github.com/sirupsen/logrus"
+
+ "gitlab.com/gitlab-org/gitlab-pages/internal/httprange"
+ "gitlab.com/gitlab-org/gitlab-pages/internal/vfs"
+)
+
+const (
+ dirPrefix = "public/"
+ maxSymlinkSize = 256
+
+ // DefaultOpenTimeout to request an archive and read its contents the first time
+ DefaultOpenTimeout = 30 * time.Second
+)
+
+var (
+ errNotSymlink = errors.New("not a symlink")
+ errSymlinkSize = errors.New("symlink too long")
+)
+
+// zipArchive implements the vfs.Root interface.
+// It represents a zip archive saving all its files in memory.
+// It holds an httprange.Resource that can be read with httprange.RangedReader in chunks.
+type zipArchive struct {
+ path string
+ once sync.Once
+ done chan struct{}
+ openTimeout time.Duration
+
+ resource *httprange.Resource
+ reader *httprange.RangedReader
+ archive *zip.Reader
+ err error
+
+ // TODO: add metrics https://gitlab.com/gitlab-org/gitlab-pages/-/issues/423
+ files map[string]*zip.File
+}
+
+func newArchive(path string, openTimeout time.Duration) *zipArchive {
+ return &zipArchive{
+ path: path,
+ done: make(chan struct{}),
+ files: make(map[string]*zip.File),
+ openTimeout: openTimeout,
+ }
+}
+
+func (a *zipArchive) openArchive(parentCtx context.Context) error {
+ ctx, cancel := context.WithTimeout(parentCtx, a.openTimeout)
+ defer cancel()
+
+ a.once.Do(func() {
+ // read archive once in its own routine with its own timeout
+ // if parentCtx is canceled, readArchive will continue regardless and will be cached in memory
+ go a.readArchive()
+ })
+
+ // wait for readArchive to be done or return if the parent context is canceled
+ select {
+ case <-a.done:
+ return a.err
+ case <-ctx.Done():
+ err := ctx.Err()
+ switch err {
+ case context.Canceled:
+ log.WithError(err).Traceln("open zip archive request canceled")
+ case context.DeadlineExceeded:
+ log.WithError(err).Traceln("open zip archive timed out")
+ }
+
+ return err
+ }
+}
+
+// readArchive creates an httprange.Resource that can read the archive's contents and stores a slice of *zip.Files
+// that can be accessed later when calling any of th vfs.VFS operations
+func (a *zipArchive) readArchive() {
+ defer close(a.done)
+
+ // readArchive with a timeout separate from openArchive's
+ ctx, cancel := context.WithTimeout(context.Background(), a.openTimeout)
+ defer cancel()
+
+ a.resource, a.err = httprange.NewResource(ctx, a.path)
+ if a.err != nil {
+ return
+ }
+
+ // load all archive files into memory using a cached ranged reader
+ a.reader = httprange.NewRangedReader(a.resource)
+ a.reader.WithCachedReader(func() {
+ a.archive, a.err = zip.NewReader(a.reader, a.resource.Size)
+ })
+
+ if a.archive == nil {
+ return
+ }
+
+ // TODO: Improve preprocessing of zip archives https://gitlab.com/gitlab-org/gitlab-pages/-/issues/432
+ for _, file := range a.archive.File {
+ if !strings.HasPrefix(file.Name, dirPrefix) {
+ continue
+ }
+ a.files[file.Name] = file
+ }
+
+ // recycle memory
+ a.archive.File = nil
+}
+
+func (a *zipArchive) findFile(name string) *zip.File {
+ name = filepath.Join(dirPrefix, name)
+
+ if file := a.files[name]; file != nil {
+ return file
+ }
+
+ if dir := a.files[name+"/"]; dir != nil {
+ return dir
+ }
+
+ return nil
+}
+
+// Open finds the file by name inside the zipArchive and returns a reader that can be served by the VFS
+func (a *zipArchive) Open(ctx context.Context, name string) (vfs.File, error) {
+ file := a.findFile(name)
+ if file == nil {
+ return nil, os.ErrNotExist
+ }
+
+ // TODO: cache dataOffsets of files https://gitlab.com/gitlab-org/gitlab-pages/-/issues/461
+ dataOffset, err := file.DataOffset()
+ if err != nil {
+ return nil, err
+ }
+
+ // only read from dataOffset up to the size of the compressed file
+ reader := a.reader.SectionReader(dataOffset, int64(file.CompressedSize64))
+
+ switch file.Method {
+ case zip.Deflate:
+ return newDeflateReader(reader), nil
+ case zip.Store:
+ return reader, nil
+ default:
+ return nil, fmt.Errorf("unsupported compression method: %x", file.Method)
+ }
+}
+
+// Lstat finds the file by name inside the zipArchive and returns its FileInfo
+func (a *zipArchive) Lstat(ctx context.Context, name string) (os.FileInfo, error) {
+ file := a.findFile(name)
+ if file == nil {
+ return nil, os.ErrNotExist
+ }
+
+ return file.FileInfo(), nil
+}
+
+// ReadLink finds the file by name inside the zipArchive and returns the contents of the symlink
+func (a *zipArchive) Readlink(ctx context.Context, name string) (string, error) {
+ file := a.findFile(name)
+ if file == nil {
+ return "", os.ErrNotExist
+ }
+
+ if file.FileInfo().Mode()&os.ModeSymlink != os.ModeSymlink {
+ return "", errNotSymlink
+ }
+
+ rc, err := file.Open()
+ if err != nil {
+ return "", err
+ }
+ defer rc.Close()
+
+ symlink := make([]byte, maxSymlinkSize+1)
+
+ // read up to len(symlink) bytes from the link file
+ n, err := io.ReadFull(rc, symlink)
+ if err != nil && err != io.ErrUnexpectedEOF {
+ // if err == io.ErrUnexpectedEOF the link is smaller than len(symlink) so it's OK to not return it
+ return "", err
+ }
+
+ // return errSymlinkSize if the number of bytes read from the link is too big
+ if n > maxSymlinkSize {
+ return "", errSymlinkSize
+ }
+
+ // only return the n bytes read from the link
+ return string(symlink[:n]), nil
+}
diff --git a/internal/vfs/zip/archive_test.go b/internal/vfs/zip/archive_test.go
new file mode 100644
index 00000000..bb094038
--- /dev/null
+++ b/internal/vfs/zip/archive_test.go
@@ -0,0 +1,252 @@
+package zip
+
+import (
+ "context"
+ "io/ioutil"
+ "net/http"
+ "net/http/httptest"
+ "os"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/require"
+
+ "gitlab.com/gitlab-org/gitlab-pages/internal/testhelpers"
+)
+
+var chdirSet = false
+
+func TestOpen(t *testing.T) {
+ zip, cleanup := openZipArchive(t)
+ defer cleanup()
+
+ tests := map[string]struct {
+ file string
+ expectedContent string
+ expectedErr error
+ }{
+ "file_exists": {
+ file: "index.html",
+ expectedContent: "zip.gitlab.io/project/index.html\n",
+ expectedErr: nil,
+ },
+ "file_exists_in_subdir": {
+ file: "subdir/hello.html",
+ expectedContent: "zip.gitlab.io/project/subdir/hello.html\n",
+ expectedErr: nil,
+ },
+ "file_exists_symlink": {
+ file: "symlink.html",
+ expectedContent: "subdir/linked.html",
+ expectedErr: nil,
+ },
+ "is_dir": {
+ file: "subdir",
+ expectedErr: nil,
+ },
+ "file_does_not_exist": {
+ file: "unknown.html",
+ expectedErr: os.ErrNotExist,
+ },
+ }
+
+ for name, tt := range tests {
+ t.Run(name, func(t *testing.T) {
+ f, err := zip.Open(context.Background(), tt.file)
+ if tt.expectedErr != nil {
+ require.EqualError(t, err, tt.expectedErr.Error())
+ return
+ }
+
+ require.NoError(t, err)
+
+ if tt.expectedContent == "" {
+ // cannot ioutil.ReadAll dirs but zip.Open should not fail
+ return
+ }
+
+ data, err := ioutil.ReadAll(f)
+ require.NoError(t, err)
+
+ require.Equal(t, tt.expectedContent, string(data))
+ require.NoError(t, f.Close())
+ })
+ }
+}
+
+func TestLstat(t *testing.T) {
+ zip, cleanup := openZipArchive(t)
+ defer cleanup()
+
+ tests := map[string]struct {
+ file string
+ isDir bool
+ isSymlink bool
+ expectedErr error
+ }{
+ "file_exists": {
+ file: "index.html",
+ },
+ "file_exists_in_subdir": {
+ file: "subdir/hello.html",
+ },
+ "file_exists_symlink": {
+ file: "symlink.html",
+ isSymlink: true,
+ },
+ "is_dir": {
+ file: "subdir",
+ isDir: true,
+ },
+ "file_does_not_exist": {
+ file: "unknown.html",
+ expectedErr: os.ErrNotExist,
+ },
+ }
+
+ for name, tt := range tests {
+ t.Run(name, func(t *testing.T) {
+ fi, err := zip.Lstat(context.Background(), tt.file)
+ if tt.expectedErr != nil {
+ require.EqualError(t, err, tt.expectedErr.Error())
+ return
+ }
+
+ require.NoError(t, err)
+ require.Contains(t, tt.file, fi.Name())
+ require.Equal(t, tt.isDir, fi.IsDir())
+ require.NotEmpty(t, fi.ModTime())
+
+ if tt.isDir {
+ require.Zero(t, fi.Size())
+ require.True(t, fi.IsDir())
+ return
+ }
+
+ require.NotZero(t, fi.Size())
+
+ if tt.isSymlink {
+ require.NotZero(t, fi.Mode()&os.ModeSymlink)
+ } else {
+ require.True(t, fi.Mode().IsRegular())
+ }
+ })
+ }
+}
+
+func TestReadLink(t *testing.T) {
+ zip, cleanup := openZipArchive(t)
+ defer cleanup()
+
+ tests := map[string]struct {
+ file string
+ expectedErr error
+ }{
+ "symlink_success": {
+ file: "symlink.html",
+ },
+ "file": {
+ file: "index.html",
+ expectedErr: errNotSymlink,
+ },
+ "dir": {
+ file: "subdir",
+ expectedErr: errNotSymlink,
+ },
+ "symlink_too_big": {
+ file: "bad_symlink.html",
+ expectedErr: errSymlinkSize,
+ },
+ "file_does_not_exist": {
+ file: "unknown.html",
+ expectedErr: os.ErrNotExist,
+ },
+ }
+
+ for name, tt := range tests {
+ t.Run(name, func(t *testing.T) {
+ link, err := zip.Readlink(context.Background(), tt.file)
+ if tt.expectedErr != nil {
+ require.EqualError(t, err, tt.expectedErr.Error())
+ return
+ }
+
+ require.NoError(t, err)
+ require.NotEmpty(t, link)
+ })
+ }
+}
+
+func TestArchiveCanBeReadAfterOpenCtxCanceled(t *testing.T) {
+ testServerURL, cleanup := newZipFileServerURL(t, "group/zip.gitlab.io/public.zip")
+ defer cleanup()
+
+ zip := newArchive(testServerURL+"/public.zip", time.Second)
+ ctx, cancel := context.WithCancel(context.Background())
+ cancel()
+
+ err := zip.openArchive(ctx)
+ require.EqualError(t, err, context.Canceled.Error())
+
+ <-zip.done
+
+ file, err := zip.Open(context.Background(), "index.html")
+ require.NoError(t, err)
+ data, err := ioutil.ReadAll(file)
+ require.NoError(t, err)
+
+ require.Equal(t, "zip.gitlab.io/project/index.html\n", string(data))
+ require.NoError(t, file.Close())
+}
+
+func TestReadArchiveFails(t *testing.T) {
+ testServerURL, cleanup := newZipFileServerURL(t, "group/zip.gitlab.io/public.zip")
+ defer cleanup()
+
+ zip := newArchive(testServerURL+"/unkown.html", time.Second)
+
+ err := zip.openArchive(context.Background())
+ require.Error(t, err)
+ require.Contains(t, err.Error(), "Not Found")
+
+ _, err = zip.Open(context.Background(), "index.html")
+ require.EqualError(t, err, os.ErrNotExist.Error())
+}
+
+func openZipArchive(t *testing.T) (*zipArchive, func()) {
+ t.Helper()
+
+ testServerURL, cleanup := newZipFileServerURL(t, "group/zip.gitlab.io/public.zip")
+
+ zip := newArchive(testServerURL+"/public.zip", time.Second)
+
+ err := zip.openArchive(context.Background())
+ require.NoError(t, err)
+
+ // public/ public/index.html public/404.html public/symlink.html
+ // public/subdir/ public/subdir/hello.html public/subdir/linked.html
+ // public/bad_symlink.html public/subdir/2bp3Qzs...
+ require.NotZero(t, zip.files)
+
+ return zip, func() {
+ cleanup()
+ }
+}
+
+func newZipFileServerURL(t *testing.T, zipFilePath string) (string, func()) {
+ t.Helper()
+
+ chdir := testhelpers.ChdirInPath(t, "../../../shared/pages", &chdirSet)
+
+ m := http.NewServeMux()
+ m.HandleFunc("/public.zip", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ http.ServeFile(w, r, zipFilePath)
+ }))
+
+ testServer := httptest.NewServer(m)
+
+ return testServer.URL, func() {
+ chdir()
+ testServer.Close()
+ }
+}
diff --git a/internal/vfs/zip/deflate_reader.go b/internal/vfs/zip/deflate_reader.go
new file mode 100644
index 00000000..16a2d72e
--- /dev/null
+++ b/internal/vfs/zip/deflate_reader.go
@@ -0,0 +1,31 @@
+package zip
+
+import (
+ "compress/flate"
+ "io"
+)
+
+// deflateReader wrapper to support reading compressed files.
+// Implements the io.ReadCloser interface.
+type deflateReader struct {
+ reader io.ReadCloser
+ flateReader io.ReadCloser
+}
+
+// Read from flateReader
+func (r *deflateReader) Read(p []byte) (n int, err error) {
+ return r.flateReader.Read(p)
+}
+
+// Close all readers
+func (r *deflateReader) Close() error {
+ r.reader.Close()
+ return r.flateReader.Close()
+}
+
+func newDeflateReader(r io.ReadCloser) *deflateReader {
+ return &deflateReader{
+ reader: r,
+ flateReader: flate.NewReader(r),
+ }
+}
diff --git a/shared/pages/group/zip.gitlab.io/public.zip b/shared/pages/group/zip.gitlab.io/public.zip
new file mode 100644
index 00000000..f1278bce
--- /dev/null
+++ b/shared/pages/group/zip.gitlab.io/public.zip
Binary files differ