Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-pages.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaime Martinez <jmartinez@gitlab.com>2020-09-09 09:23:02 +0300
committerVladimir Shushlin <v.shushlin@gmail.com>2020-09-21 13:53:18 +0300
commite0dc0e38b1ca181caa2e85df7561da36aef8054e (patch)
tree62f90b1cdc41917f51e1195dfffc051d676ed5ed /internal
parentb58d528f947b9f5440e163386a49f4d581290982 (diff)
Add zip package that implements vfs.Root
Adds a zip package capable of reading zip files from an httprange.Resource. It reads archive file contents into memory per archive that is requested from the vfs.Root. WIP: add simple test for archive reader WIP: fix build WIP: update archive test WIP: adding tests for archive WIP: print more info WIP: update zip file correct symlink WIP: use correct file Add bad symlink file to zip Update handling long symlinks update documentation and reorg code fix up stuff
Diffstat (limited to 'internal')
-rw-r--r--internal/httprange/http_reader.go4
-rw-r--r--internal/serving/disk/reader.go11
-rw-r--r--internal/vfs/file.go7
-rw-r--r--internal/vfs/zip/archive.go183
-rw-r--r--internal/vfs/zip/archive_test.go213
-rw-r--r--internal/vfs/zip/deflate_reader.go27
6 files changed, 442 insertions, 3 deletions
diff --git a/internal/httprange/http_reader.go b/internal/httprange/http_reader.go
index 474b589f..4e7db6bc 100644
--- a/internal/httprange/http_reader.go
+++ b/internal/httprange/http_reader.go
@@ -8,6 +8,7 @@ import (
"time"
"gitlab.com/gitlab-org/gitlab-pages/internal/httptransport"
+ "gitlab.com/gitlab-org/gitlab-pages/internal/vfs"
"gitlab.com/gitlab-org/gitlab-pages/metrics"
)
@@ -41,6 +42,9 @@ type Reader struct {
offset int64
}
+// ensure that Reader is seekable
+var _ vfs.SeekableFile = &Reader{}
+
// TODO: make this configurable/take an http client when creating a reader/ranged reader
// instead https://gitlab.com/gitlab-org/gitlab-pages/-/issues/457
var httpClient = &http.Client{
diff --git a/internal/serving/disk/reader.go b/internal/serving/disk/reader.go
index 350d036b..a0c0ed0d 100644
--- a/internal/serving/disk/reader.go
+++ b/internal/serving/disk/reader.go
@@ -194,10 +194,17 @@ func (reader *Reader) serveFile(ctx context.Context, w http.ResponseWriter, r *h
return err
}
+ w.Header().Set("Content-Type", contentType)
+
reader.fileSizeMetric.Observe(float64(fi.Size()))
- w.Header().Set("Content-Type", contentType)
- http.ServeContent(w, r, origPath, fi.ModTime(), file)
+ // Support SeekableFile if available
+ if rs, ok := file.(vfs.SeekableFile); ok {
+ http.ServeContent(w, r, origPath, fi.ModTime(), rs)
+ } else {
+ w.Header().Set("Content-Length", strconv.FormatInt(fi.Size(), 10))
+ io.Copy(w, file)
+ }
return nil
}
diff --git a/internal/vfs/file.go b/internal/vfs/file.go
index 5260c847..47f68308 100644
--- a/internal/vfs/file.go
+++ b/internal/vfs/file.go
@@ -5,6 +5,11 @@ import "io"
// File represents an open file, which will typically be the response body of a Pages request.
type File interface {
io.Reader
- io.Seeker
io.Closer
}
+
+// SeekableFile represents a seekable file, which will typically be the response body of a Pages request.
+type SeekableFile interface {
+ File
+ io.Seeker
+}
diff --git a/internal/vfs/zip/archive.go b/internal/vfs/zip/archive.go
new file mode 100644
index 00000000..1d64e148
--- /dev/null
+++ b/internal/vfs/zip/archive.go
@@ -0,0 +1,183 @@
+package zip
+
+import (
+ "archive/zip"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+
+ log "github.com/sirupsen/logrus"
+
+ "gitlab.com/gitlab-org/gitlab-pages/internal/httprange"
+ "gitlab.com/gitlab-org/gitlab-pages/internal/vfs"
+)
+
+const (
+ dirPrefix = "public/"
+ maxSymlinkSize = 256
+)
+
+var (
+ errNotSymlink = errors.New("not a symlink")
+ errSymlinkSize = errors.New("symlink too long")
+)
+
+// zipArchive implements the vfs.Root interface.
+// It represents a zip archive saving all its files int memory.
+// It holds an httprange.Resource that can be read with httprange.RangedReader in chunks.
+type zipArchive struct {
+ path string
+ once sync.Once
+ done chan struct{}
+
+ resource *httprange.Resource
+ reader *httprange.RangedReader
+ archive *zip.Reader
+ err error
+
+ files map[string]*zip.File
+}
+
+func newArchive(path string) *zipArchive {
+ return &zipArchive{
+ path: path,
+ done: make(chan struct{}),
+ files: make(map[string]*zip.File),
+ }
+}
+
+func (a *zipArchive) openArchive(ctx context.Context) error {
+ a.once.Do(func() {
+ a.readArchive(ctx)
+ })
+
+ // wait for readArchive to be done or return when the context is canceled
+ select {
+ case <-a.done:
+ return a.err
+ case <-ctx.Done():
+ err := ctx.Err()
+ log.WithError(err).Traceln("open zip archive timed out")
+
+ return err
+ }
+}
+
+func (a *zipArchive) readArchive(ctx context.Context) {
+ defer close(a.done)
+
+ a.resource, a.err = httprange.NewResource(ctx, a.path)
+ if a.err != nil {
+ return
+ }
+
+ // load all archive files into memory using a cached ranged reader
+ a.reader = httprange.NewRangedReader(a.resource)
+ a.reader.WithCachedReader(func() {
+ a.archive, a.err = zip.NewReader(a.reader, a.resource.Size)
+ })
+
+ if a.archive != nil {
+ for _, file := range a.archive.File {
+ if !strings.HasPrefix(file.Name, dirPrefix) {
+ continue
+ }
+ a.files[file.Name] = file
+ }
+
+ // recycle memory
+ a.archive.File = nil
+ }
+}
+
+func (a *zipArchive) findFile(name string) *zip.File {
+ name = filepath.Join(dirPrefix, name)
+
+ if file := a.files[name]; file != nil {
+ return file
+ }
+
+ if dir := a.files[name+"/"]; dir != nil {
+ return dir
+ }
+
+ return nil
+}
+
+// Open finds the file by name inside the zipArchive and returns a reader that can be served by the VFS
+func (a *zipArchive) Open(ctx context.Context, name string) (vfs.File, error) {
+ file := a.findFile(name)
+ if file == nil {
+ return nil, os.ErrNotExist
+ }
+
+ dataOffset, err := file.DataOffset()
+ if err != nil {
+ return nil, err
+ }
+
+ // only read from dataOffset up to the size of the compressed file
+ reader := a.reader.SectionReader(dataOffset, int64(file.CompressedSize64))
+
+ switch file.Method {
+ case zip.Deflate:
+ return newDeflateReader(reader), nil
+ case zip.Store:
+ return reader, nil
+ default:
+ return nil, fmt.Errorf("unsupported compression method: %x", file.Method)
+ }
+}
+
+// Lstat finds the file by name inside the zipArchive and returns its FileInfo
+func (a *zipArchive) Lstat(ctx context.Context, name string) (os.FileInfo, error) {
+ file := a.findFile(name)
+ if file == nil {
+ return nil, os.ErrNotExist
+ }
+
+ return file.FileInfo(), nil
+}
+
+// ReadLink finds the file by name inside the zipArchive and returns the contents of the symlink
+func (a *zipArchive) Readlink(ctx context.Context, name string) (string, error) {
+ file := a.findFile(name)
+ if file == nil {
+ return "", os.ErrNotExist
+ }
+
+ if file.FileInfo().Mode()&os.ModeSymlink != os.ModeSymlink {
+ return "", errNotSymlink
+ }
+
+ rc, err := file.Open()
+ if err != nil {
+ return "", err
+ }
+ defer rc.Close()
+
+ symlink := make([]byte, maxSymlinkSize+1)
+
+ // read up to len(symlink) bytes from the link file
+ n, err := io.ReadFull(rc, symlink)
+ if err != nil && err != io.ErrUnexpectedEOF {
+ // if err == io.ErrUnexpectedEOF the link is smaller than len(symlink) so it's OK to not return it
+ return "", err
+ }
+
+ // return errSymlinkSize if the number of bytes read from the link is too big
+ if n > maxSymlinkSize {
+ return "", errSymlinkSize
+ }
+
+ // only return the n bytes read from the link
+ return string(symlink[:n]), nil
+}
+
+// close no-op: everything can be recycled by the GC
+func (a *zipArchive) close() {}
diff --git a/internal/vfs/zip/archive_test.go b/internal/vfs/zip/archive_test.go
new file mode 100644
index 00000000..05eab3b6
--- /dev/null
+++ b/internal/vfs/zip/archive_test.go
@@ -0,0 +1,213 @@
+package zip
+
+import (
+ "context"
+ "io/ioutil"
+ "net/http"
+ "net/http/httptest"
+ "os"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+
+ "gitlab.com/gitlab-org/gitlab-pages/internal/testhelpers"
+)
+
+var chdirSet = false
+
+func TestOpen(t *testing.T) {
+ zip, cleanup := openZipArchive(t)
+ defer cleanup()
+
+ tests := map[string]struct {
+ file string
+ expectedContent string
+ expectedErr error
+ }{
+ "file_exists": {
+ file: "index.html",
+ expectedContent: "zip.gitlab.io/project/index.html\n",
+ expectedErr: nil,
+ },
+ "file_exists_in_subdir": {
+ file: "subdir/hello.html",
+ expectedContent: "zip.gitlab.io/project/subdir/hello.html\n",
+ expectedErr: nil,
+ },
+ "file_exists_symlink": {
+ file: "symlink.html",
+ expectedContent: "subdir/linked.html",
+ expectedErr: nil,
+ },
+ "is_dir": {
+ file: "subdir",
+ expectedContent: "zip.gitlab.io/project/index.html\n",
+ expectedErr: nil,
+ },
+ "file_does_not_exist": {
+ file: "unknown.html",
+ expectedErr: os.ErrNotExist,
+ },
+ }
+
+ for name, tt := range tests {
+ t.Run(name, func(t *testing.T) {
+ f, err := zip.Open(context.Background(), tt.file)
+ if tt.expectedErr != nil {
+ require.EqualError(t, err, tt.expectedErr.Error())
+ return
+ }
+
+ require.NoError(t, err)
+
+ if name == "is_dir" {
+ // cannot ioutil.ReadAll dirs but zip.Open should not fail
+ return
+ }
+
+ data, err := ioutil.ReadAll(f)
+ require.NoError(t, err)
+
+ require.Equal(t, tt.expectedContent, string(data))
+ require.NoError(t, f.Close())
+ })
+ }
+}
+
+func TestLstat(t *testing.T) {
+ zip, cleanup := openZipArchive(t)
+ defer cleanup()
+
+ tests := map[string]struct {
+ file string
+ isDir bool
+ isSymlink bool
+ expectedErr error
+ }{
+ "file_exists": {
+ file: "index.html",
+ },
+ "file_exists_in_subdir": {
+ file: "subdir/hello.html",
+ },
+ "file_exists_symlink": {
+ file: "symlink.html",
+ isSymlink: true,
+ },
+ "is_dir": {
+ file: "subdir",
+ isDir: true,
+ },
+ "file_does_not_exist": {
+ file: "unknown.html",
+ expectedErr: os.ErrNotExist,
+ },
+ }
+
+ for name, tt := range tests {
+ t.Run(name, func(t *testing.T) {
+ fi, err := zip.Lstat(context.Background(), tt.file)
+ if tt.expectedErr != nil {
+ require.EqualError(t, err, tt.expectedErr.Error())
+ return
+ }
+
+ require.NoError(t, err)
+ require.Contains(t, tt.file, fi.Name())
+ require.Equal(t, tt.isDir, fi.IsDir())
+ require.NotEmpty(t, fi.ModTime())
+
+ if tt.isDir {
+ require.Zero(t, fi.Size())
+ require.True(t, fi.IsDir())
+ return
+ }
+
+ require.NotZero(t, fi.Size())
+
+ if tt.isSymlink {
+ require.NotZero(t, fi.Mode()&os.ModeSymlink)
+ } else {
+ require.True(t, fi.Mode().IsRegular())
+ }
+ })
+ }
+}
+
+func TestReadLink(t *testing.T) {
+ zip, cleanup := openZipArchive(t)
+ defer cleanup()
+
+ tests := map[string]struct {
+ file string
+ expectedErr error
+ }{
+ "symlink_success": {
+ file: "symlink.html",
+ },
+ "file": {
+ file: "index.html",
+ expectedErr: errNotSymlink,
+ },
+ "dir": {
+ file: "subdir",
+ expectedErr: errNotSymlink,
+ },
+ "symlink_too_big": {
+ file: "bad_symlink.html",
+ expectedErr: errSymlinkSize,
+ },
+ "file_does_not_exist": {
+ file: "unknown.html",
+ expectedErr: os.ErrNotExist,
+ },
+ }
+
+ for name, tt := range tests {
+ t.Run(name, func(t *testing.T) {
+ link, err := zip.Readlink(context.Background(), tt.file)
+ if tt.expectedErr != nil {
+ require.EqualError(t, err, tt.expectedErr.Error())
+ return
+ }
+
+ require.NoError(t, err)
+ require.NotEmpty(t, link)
+ })
+ }
+}
+func openZipArchive(t *testing.T) (*zipArchive, func()) {
+ t.Helper()
+
+ testServerURL, cleanup := newZipFileServerURL(t, "group/zip.gitlab.io/public.zip")
+
+ zip := newArchive(testServerURL + "/public.zip")
+
+ err := zip.openArchive(context.Background())
+ require.NoError(t, err)
+
+ // public/ public/index.html public/404.html public/symlink.html
+ // public/subdir/ public/subdir/hello.html public/subdir/linked.html
+ // public/bad_symlink.html public/subdir/2bp3Qzs...
+ require.NotZero(t, zip.files)
+
+ return zip, func() {
+ zip.close()
+ cleanup()
+ }
+}
+
+func newZipFileServerURL(t *testing.T, zipFilePath string) (string, func()) {
+ t.Helper()
+
+ chdir := testhelpers.ChdirInPath(t, "../../../shared/pages", &chdirSet)
+
+ testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ http.ServeFile(w, r, zipFilePath)
+ }))
+
+ return testServer.URL, func() {
+ chdir()
+ testServer.Close()
+ }
+}
diff --git a/internal/vfs/zip/deflate_reader.go b/internal/vfs/zip/deflate_reader.go
new file mode 100644
index 00000000..2e55ee5a
--- /dev/null
+++ b/internal/vfs/zip/deflate_reader.go
@@ -0,0 +1,27 @@
+package zip
+
+import (
+ "compress/flate"
+ "io"
+)
+
+type deflateReader struct {
+ R io.ReadCloser
+ D io.ReadCloser
+}
+
+func (r *deflateReader) Read(p []byte) (n int, err error) {
+ return r.D.Read(p)
+}
+
+func (r *deflateReader) Close() error {
+ r.R.Close()
+ return r.D.Close()
+}
+
+func newDeflateReader(r io.ReadCloser) *deflateReader {
+ return &deflateReader{
+ R: r,
+ D: flate.NewReader(r),
+ }
+}