diff options
Diffstat (limited to 'workhorse/internal/zipartifacts')
-rw-r--r-- | workhorse/internal/zipartifacts/.gitignore | 1 | ||||
-rw-r--r-- | workhorse/internal/zipartifacts/entry.go | 13 | ||||
-rw-r--r-- | workhorse/internal/zipartifacts/errors.go | 57 | ||||
-rw-r--r-- | workhorse/internal/zipartifacts/errors_test.go | 32 | ||||
-rw-r--r-- | workhorse/internal/zipartifacts/metadata.go | 117 | ||||
-rw-r--r-- | workhorse/internal/zipartifacts/metadata_test.go | 102 | ||||
-rw-r--r-- | workhorse/internal/zipartifacts/open_archive.go | 138 | ||||
-rw-r--r-- | workhorse/internal/zipartifacts/open_archive_test.go | 68 |
8 files changed, 528 insertions, 0 deletions
diff --git a/workhorse/internal/zipartifacts/.gitignore b/workhorse/internal/zipartifacts/.gitignore new file mode 100644 index 00000000000..ace1063ab02 --- /dev/null +++ b/workhorse/internal/zipartifacts/.gitignore @@ -0,0 +1 @@ +/testdata diff --git a/workhorse/internal/zipartifacts/entry.go b/workhorse/internal/zipartifacts/entry.go new file mode 100644 index 00000000000..527387ceaa1 --- /dev/null +++ b/workhorse/internal/zipartifacts/entry.go @@ -0,0 +1,13 @@ +package zipartifacts + +import ( + "encoding/base64" +) + +func DecodeFileEntry(entry string) (string, error) { + decoded, err := base64.StdEncoding.DecodeString(entry) + if err != nil { + return "", err + } + return string(decoded), nil +} diff --git a/workhorse/internal/zipartifacts/errors.go b/workhorse/internal/zipartifacts/errors.go new file mode 100644 index 00000000000..162816618f8 --- /dev/null +++ b/workhorse/internal/zipartifacts/errors.go @@ -0,0 +1,57 @@ +package zipartifacts + +import ( + "errors" +) + +// These are exit codes used by subprocesses in cmd/gitlab-zip-xxx. We also use +// them to map errors and error messages that we use as label in Prometheus. +const ( + CodeNotZip = 10 + iota + CodeEntryNotFound + CodeArchiveNotFound + CodeLimitsReached + CodeUnknownError +) + +var ( + ErrorCode = map[int]error{ + CodeNotZip: errors.New("zip archive format invalid"), + CodeEntryNotFound: errors.New("zip entry not found"), + CodeArchiveNotFound: errors.New("zip archive not found"), + CodeLimitsReached: errors.New("zip processing limits reached"), + CodeUnknownError: errors.New("zip processing unknown error"), + } + + ErrorLabel = map[int]string{ + CodeNotZip: "archive_invalid", + CodeEntryNotFound: "entry_not_found", + CodeArchiveNotFound: "archive_not_found", + CodeLimitsReached: "limits_reached", + CodeUnknownError: "unknown_error", + } + + ErrBadMetadata = errors.New("zip artifacts metadata invalid") +) + +// ExitCodeByError find an os.Exit code for a corresponding error. +// CodeUnkownError in case it can not be found. +func ExitCodeByError(err error) int { + for c, e := range ErrorCode { + if err == e { + return c + } + } + + return CodeUnknownError +} + +// ErrorLabelByCode returns a Prometheus counter label associated with an exit code. +func ErrorLabelByCode(code int) string { + label, ok := ErrorLabel[code] + if ok { + return label + } + + return ErrorLabel[CodeUnknownError] +} diff --git a/workhorse/internal/zipartifacts/errors_test.go b/workhorse/internal/zipartifacts/errors_test.go new file mode 100644 index 00000000000..6fce160b3bc --- /dev/null +++ b/workhorse/internal/zipartifacts/errors_test.go @@ -0,0 +1,32 @@ +package zipartifacts + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestExitCodeByError(t *testing.T) { + t.Run("when error has been recognized", func(t *testing.T) { + code := ExitCodeByError(ErrorCode[CodeLimitsReached]) + + require.Equal(t, code, CodeLimitsReached) + require.Greater(t, code, 10) + }) + + t.Run("when error is an unknown one", func(t *testing.T) { + code := ExitCodeByError(errors.New("unknown error")) + + require.Equal(t, code, CodeUnknownError) + require.Greater(t, code, 10) + }) +} + +func TestErrorLabels(t *testing.T) { + for code := range ErrorCode { + _, ok := ErrorLabel[code] + + require.True(t, ok) + } +} diff --git a/workhorse/internal/zipartifacts/metadata.go b/workhorse/internal/zipartifacts/metadata.go new file mode 100644 index 00000000000..1ecf52deafb --- /dev/null +++ b/workhorse/internal/zipartifacts/metadata.go @@ -0,0 +1,117 @@ +package zipartifacts + +import ( + "archive/zip" + "compress/gzip" + "encoding/binary" + "encoding/json" + "io" + "path" + "sort" + "strconv" +) + +type metadata struct { + Modified int64 `json:"modified,omitempty"` + Mode string `json:"mode,omitempty"` + CRC uint32 `json:"crc,omitempty"` + Size uint64 `json:"size,omitempty"` + Zipped uint64 `json:"zipped,omitempty"` + Comment string `json:"comment,omitempty"` +} + +const MetadataHeaderPrefix = "\x00\x00\x00&" // length of string below, encoded properly +const MetadataHeader = "GitLab Build Artifacts Metadata 0.0.2\n" + +func newMetadata(file *zip.File) metadata { + if file == nil { + return metadata{} + } + + return metadata{ + //lint:ignore SA1019 Remove this once the minimum supported version is go 1.10 (go 1.9 and down do not support an alternative) + Modified: file.ModTime().Unix(), + Mode: strconv.FormatUint(uint64(file.Mode().Perm()), 8), + CRC: file.CRC32, + Size: file.UncompressedSize64, + Zipped: file.CompressedSize64, + Comment: file.Comment, + } +} + +func (m metadata) writeEncoded(output io.Writer) error { + j, err := json.Marshal(m) + if err != nil { + return err + } + j = append(j, byte('\n')) + return writeBytes(output, j) +} + +func writeZipEntryMetadata(output io.Writer, path string, entry *zip.File) error { + if err := writeString(output, path); err != nil { + return err + } + + if err := newMetadata(entry).writeEncoded(output); err != nil { + return err + } + + return nil +} + +func GenerateZipMetadata(w io.Writer, archive *zip.Reader) error { + output := gzip.NewWriter(w) + defer output.Close() + + if err := writeString(output, MetadataHeader); err != nil { + return err + } + + // Write empty error header that we may need in the future + if err := writeString(output, "{}"); err != nil { + return err + } + + // Create map of files in zip archive + zipMap := make(map[string]*zip.File, len(archive.File)) + + // Add missing entries + for _, entry := range archive.File { + zipMap[entry.Name] = entry + + for d := path.Dir(entry.Name); d != "." && d != "/"; d = path.Dir(d) { + entryDir := d + "/" + if _, ok := zipMap[entryDir]; !ok { + zipMap[entryDir] = nil + } + } + } + + // Sort paths + sortedPaths := make([]string, 0, len(zipMap)) + for path := range zipMap { + sortedPaths = append(sortedPaths, path) + } + sort.Strings(sortedPaths) + + // Write all files + for _, path := range sortedPaths { + if err := writeZipEntryMetadata(output, path, zipMap[path]); err != nil { + return err + } + } + return nil +} + +func writeBytes(output io.Writer, data []byte) error { + err := binary.Write(output, binary.BigEndian, uint32(len(data))) + if err == nil { + _, err = output.Write(data) + } + return err +} + +func writeString(output io.Writer, str string) error { + return writeBytes(output, []byte(str)) +} diff --git a/workhorse/internal/zipartifacts/metadata_test.go b/workhorse/internal/zipartifacts/metadata_test.go new file mode 100644 index 00000000000..0f130ab4c15 --- /dev/null +++ b/workhorse/internal/zipartifacts/metadata_test.go @@ -0,0 +1,102 @@ +package zipartifacts_test + +import ( + "archive/zip" + "bytes" + "compress/gzip" + "context" + "fmt" + "io" + "io/ioutil" + "os" + "testing" + + "github.com/stretchr/testify/require" + + "gitlab.com/gitlab-org/gitlab-workhorse/internal/zipartifacts" +) + +func generateTestArchive(w io.Writer) error { + archive := zip.NewWriter(w) + + // non-POSIX paths are here just to test if we never enter infinite loop + files := []string{"file1", "some/file/dir/", "some/file/dir/file2", "../../test12/test", + "/usr/bin/test", `c:\windows\win32.exe`, `c:/windows/win.dll`, "./f/asd", "/"} + + for _, file := range files { + archiveFile, err := archive.Create(file) + if err != nil { + return err + } + + fmt.Fprint(archiveFile, file) + } + + return archive.Close() +} + +func validateMetadata(r io.Reader) error { + gz, err := gzip.NewReader(r) + if err != nil { + return err + } + + meta, err := ioutil.ReadAll(gz) + if err != nil { + return err + } + + paths := []string{"file1", "some/", "some/file/", "some/file/dir/", "some/file/dir/file2"} + for _, path := range paths { + if !bytes.Contains(meta, []byte(path+"\x00")) { + return fmt.Errorf(fmt.Sprintf("zipartifacts: metadata for path %q not found", path)) + } + } + + return nil +} + +func TestGenerateZipMetadataFromFile(t *testing.T) { + var metaBuffer bytes.Buffer + + f, err := ioutil.TempFile("", "workhorse-metadata.zip-") + if f != nil { + defer os.Remove(f.Name()) + } + require.NoError(t, err) + defer f.Close() + + err = generateTestArchive(f) + require.NoError(t, err) + f.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + archive, err := zipartifacts.OpenArchive(ctx, f.Name()) + require.NoError(t, err, "zipartifacts: OpenArchive failed") + + err = zipartifacts.GenerateZipMetadata(&metaBuffer, archive) + require.NoError(t, err, "zipartifacts: GenerateZipMetadata failed") + + err = validateMetadata(&metaBuffer) + require.NoError(t, err) +} + +func TestErrNotAZip(t *testing.T) { + f, err := ioutil.TempFile("", "workhorse-metadata.zip-") + if f != nil { + defer os.Remove(f.Name()) + } + require.NoError(t, err) + defer f.Close() + + _, err = fmt.Fprint(f, "Not a zip file") + require.NoError(t, err) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + _, err = zipartifacts.OpenArchive(ctx, f.Name()) + require.Equal(t, zipartifacts.ErrorCode[zipartifacts.CodeNotZip], err, "OpenArchive requires a zip file") +} diff --git a/workhorse/internal/zipartifacts/open_archive.go b/workhorse/internal/zipartifacts/open_archive.go new file mode 100644 index 00000000000..30b86b66c49 --- /dev/null +++ b/workhorse/internal/zipartifacts/open_archive.go @@ -0,0 +1,138 @@ +package zipartifacts + +import ( + "archive/zip" + "context" + "fmt" + "io" + "net" + "net/http" + "os" + "strings" + "time" + + "gitlab.com/gitlab-org/gitlab-workhorse/internal/httprs" + + "gitlab.com/gitlab-org/labkit/correlation" + "gitlab.com/gitlab-org/labkit/mask" + "gitlab.com/gitlab-org/labkit/tracing" +) + +var httpClient = &http.Client{ + Transport: tracing.NewRoundTripper(correlation.NewInstrumentedRoundTripper(&http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 10 * time.Second, + }).DialContext, + IdleConnTimeout: 30 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 10 * time.Second, + ResponseHeaderTimeout: 30 * time.Second, + DisableCompression: true, + })), +} + +type archive struct { + reader io.ReaderAt + size int64 +} + +// OpenArchive will open a zip.Reader from a local path or a remote object store URL +// in case of remote url it will make use of ranged requestes to support seeking. +// If the path do not exists error will be ErrArchiveNotFound, +// if the file isn't a zip archive error will be ErrNotAZip +func OpenArchive(ctx context.Context, archivePath string) (*zip.Reader, error) { + archive, err := openArchiveLocation(ctx, archivePath) + if err != nil { + return nil, err + } + + return openZipReader(archive.reader, archive.size) +} + +// OpenArchiveWithReaderFunc opens a zip.Reader from either local path or a +// remote object, similarly to OpenArchive function. The difference is that it +// allows passing a readerFunc that takes a io.ReaderAt that is either going to +// be os.File or a custom reader we use to read from object storage. The +// readerFunc can augment the archive reader and return a type that satisfies +// io.ReaderAt. +func OpenArchiveWithReaderFunc(ctx context.Context, location string, readerFunc func(io.ReaderAt, int64) io.ReaderAt) (*zip.Reader, error) { + archive, err := openArchiveLocation(ctx, location) + if err != nil { + return nil, err + } + + return openZipReader(readerFunc(archive.reader, archive.size), archive.size) +} + +func openArchiveLocation(ctx context.Context, location string) (*archive, error) { + if isURL(location) { + return openHTTPArchive(ctx, location) + } + + return openFileArchive(ctx, location) +} + +func isURL(path string) bool { + return strings.HasPrefix(path, "http://") || strings.HasPrefix(path, "https://") +} + +func openHTTPArchive(ctx context.Context, archivePath string) (*archive, error) { + scrubbedArchivePath := mask.URL(archivePath) + req, err := http.NewRequest(http.MethodGet, archivePath, nil) + if err != nil { + return nil, fmt.Errorf("can't create HTTP GET %q: %v", scrubbedArchivePath, err) + } + req = req.WithContext(ctx) + + resp, err := httpClient.Do(req.WithContext(ctx)) + if err != nil { + return nil, fmt.Errorf("HTTP GET %q: %v", scrubbedArchivePath, err) + } else if resp.StatusCode == http.StatusNotFound { + return nil, ErrorCode[CodeArchiveNotFound] + } else if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP GET %q: %d: %v", scrubbedArchivePath, resp.StatusCode, resp.Status) + } + + rs := httprs.NewHttpReadSeeker(resp, httpClient) + + go func() { + <-ctx.Done() + resp.Body.Close() + rs.Close() + }() + + return &archive{reader: rs, size: resp.ContentLength}, nil +} + +func openFileArchive(ctx context.Context, archivePath string) (*archive, error) { + file, err := os.Open(archivePath) + if err != nil { + if os.IsNotExist(err) { + return nil, ErrorCode[CodeArchiveNotFound] + } + } + + go func() { + <-ctx.Done() + // We close the archive from this goroutine so that we can safely return a *zip.Reader instead of a *zip.ReadCloser + file.Close() + }() + + stat, err := file.Stat() + if err != nil { + return nil, err + } + + return &archive{reader: file, size: stat.Size()}, nil +} + +func openZipReader(archive io.ReaderAt, size int64) (*zip.Reader, error) { + reader, err := zip.NewReader(archive, size) + if err != nil { + return nil, ErrorCode[CodeNotZip] + } + + return reader, nil +} diff --git a/workhorse/internal/zipartifacts/open_archive_test.go b/workhorse/internal/zipartifacts/open_archive_test.go new file mode 100644 index 00000000000..f7624d053d9 --- /dev/null +++ b/workhorse/internal/zipartifacts/open_archive_test.go @@ -0,0 +1,68 @@ +package zipartifacts + +import ( + "archive/zip" + "context" + "fmt" + "io/ioutil" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestOpenHTTPArchive(t *testing.T) { + const ( + zipFile = "test.zip" + entryName = "hello.txt" + contents = "world" + testRoot = "testdata/public" + ) + + require.NoError(t, os.MkdirAll(testRoot, 0755)) + f, err := os.Create(filepath.Join(testRoot, zipFile)) + require.NoError(t, err, "create file") + defer f.Close() + + zw := zip.NewWriter(f) + w, err := zw.Create(entryName) + require.NoError(t, err, "create zip entry") + _, err = fmt.Fprint(w, contents) + require.NoError(t, err, "write zip entry contents") + require.NoError(t, zw.Close(), "close zip writer") + require.NoError(t, f.Close(), "close file") + + srv := httptest.NewServer(http.FileServer(http.Dir(testRoot))) + defer srv.Close() + + zr, err := OpenArchive(context.Background(), srv.URL+"/"+zipFile) + require.NoError(t, err, "call OpenArchive") + require.Len(t, zr.File, 1) + + zf := zr.File[0] + require.Equal(t, entryName, zf.Name, "zip entry name") + + entry, err := zf.Open() + require.NoError(t, err, "get zip entry reader") + defer entry.Close() + + actualContents, err := ioutil.ReadAll(entry) + require.NoError(t, err, "read zip entry contents") + require.Equal(t, contents, string(actualContents), "compare zip entry contents") +} + +func TestOpenHTTPArchiveNotSendingAcceptEncodingHeader(t *testing.T) { + requestHandler := func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, "GET", r.Method) + require.Nil(t, r.Header["Accept-Encoding"]) + w.WriteHeader(http.StatusOK) + } + + srv := httptest.NewServer(http.HandlerFunc(requestHandler)) + defer srv.Close() + + OpenArchive(context.Background(), srv.URL) +} |