diff options
Diffstat (limited to 'workhorse/internal/upload')
-rw-r--r-- | workhorse/internal/upload/artifacts_uploader.go | 31 | ||||
-rw-r--r-- | workhorse/internal/upload/exif.go | 91 | ||||
-rw-r--r-- | workhorse/internal/upload/rewrite.go | 106 | ||||
-rw-r--r-- | workhorse/internal/upload/saved_file_tracker.go | 12 | ||||
-rw-r--r-- | workhorse/internal/upload/uploads.go | 1 |
5 files changed, 122 insertions, 119 deletions
diff --git a/workhorse/internal/upload/artifacts_uploader.go b/workhorse/internal/upload/artifacts_uploader.go index debbb9c24db..a8c944a1d33 100644 --- a/workhorse/internal/upload/artifacts_uploader.go +++ b/workhorse/internal/upload/artifacts_uploader.go @@ -3,6 +3,7 @@ package upload import ( "context" "fmt" + "io" "mime/multipart" "net/http" "os" @@ -16,6 +17,7 @@ import ( "gitlab.com/gitlab-org/gitlab/workhorse/internal/api" "gitlab.com/gitlab-org/gitlab/workhorse/internal/helper" + "gitlab.com/gitlab-org/gitlab/workhorse/internal/lsif_transformer/parser" "gitlab.com/gitlab-org/gitlab/workhorse/internal/upload/destination" "gitlab.com/gitlab-org/gitlab/workhorse/internal/zipartifacts" ) @@ -34,8 +36,9 @@ var zipSubcommandsErrorsCounter = promauto.NewCounterVec( }, []string{"error"}) type artifactsUploadProcessor struct { - opts *destination.UploadOpts - format string + format string + processLSIF bool + tempDir string SavedFileTracker } @@ -43,16 +46,11 @@ type artifactsUploadProcessor struct { // Artifacts is like a Multipart but specific for artifacts upload. func Artifacts(myAPI *api.API, h http.Handler, p Preparer) http.Handler { return myAPI.PreAuthorizeHandler(func(w http.ResponseWriter, r *http.Request, a *api.Response) { - opts, err := p.Prepare(a) - if err != nil { - helper.Fail500(w, r, fmt.Errorf("UploadArtifacts: error preparing file storage options")) - return - } - format := r.URL.Query().Get(ArtifactFormatKey) mg := &artifactsUploadProcessor{ - opts: opts, format: format, + processLSIF: a.ProcessLsif, + tempDir: a.TempPath, SavedFileTracker: SavedFileTracker{Request: r}, } interceptMultipartFiles(w, r, h, mg, &eagerAuthorizer{a}, p) @@ -61,7 +59,7 @@ func Artifacts(myAPI *api.API, h http.Handler, p Preparer) http.Handler { func (a *artifactsUploadProcessor) generateMetadataFromZip(ctx context.Context, file *destination.FileHandler) (*destination.FileHandler, error) { metaOpts := &destination.UploadOpts{ - LocalTempPath: a.opts.LocalTempPath, + LocalTempPath: a.tempDir, } if metaOpts.LocalTempPath == "" { metaOpts.LocalTempPath = os.TempDir() @@ -115,10 +113,10 @@ func (a *artifactsUploadProcessor) generateMetadataFromZip(ctx context.Context, func (a *artifactsUploadProcessor) ProcessFile(ctx context.Context, formName string, file *destination.FileHandler, writer *multipart.Writer) error { // ProcessFile for artifacts requires file form-data field name to eq `file` - if formName != "file" { return fmt.Errorf("invalid form field: %q", formName) } + if a.Count() > 0 { return fmt.Errorf("artifacts request contains more than one file") } @@ -134,7 +132,6 @@ func (a *artifactsUploadProcessor) ProcessFile(ctx context.Context, formName str return nil } - // TODO: can we rely on disk for shipping metadata? Not if we split workhorse and rails in 2 different PODs metadata, err := a.generateMetadataFromZip(ctx, file) if err != nil { return err @@ -156,6 +153,12 @@ func (a *artifactsUploadProcessor) ProcessFile(ctx context.Context, formName str return nil } -func (a *artifactsUploadProcessor) Name() string { - return "artifacts" +func (a *artifactsUploadProcessor) Name() string { return "artifacts" } + +func (a *artifactsUploadProcessor) TransformContents(ctx context.Context, filename string, r io.Reader) (io.ReadCloser, error) { + if a.processLSIF { + return parser.NewParser(ctx, r) + } + + return a.SavedFileTracker.TransformContents(ctx, filename, r) } diff --git a/workhorse/internal/upload/exif.go b/workhorse/internal/upload/exif.go new file mode 100644 index 00000000000..e77afb24502 --- /dev/null +++ b/workhorse/internal/upload/exif.go @@ -0,0 +1,91 @@ +package upload + +import ( + "context" + "io" + "net/http" + "os" + + "gitlab.com/gitlab-org/labkit/log" + "golang.org/x/image/tiff" + + "gitlab.com/gitlab-org/gitlab/workhorse/internal/upload/exif" +) + +func handleExifUpload(ctx context.Context, r io.Reader, filename string, imageType exif.FileType) (io.ReadCloser, error) { + tmpfile, err := os.CreateTemp("", "exifremove") + if err != nil { + return nil, err + } + go func() { + <-ctx.Done() + tmpfile.Close() + }() + if err := os.Remove(tmpfile.Name()); err != nil { + return nil, err + } + + _, err = io.Copy(tmpfile, r) + if err != nil { + return nil, err + } + + if _, err := tmpfile.Seek(0, io.SeekStart); err != nil { + return nil, err + } + + isValidType := false + switch imageType { + case exif.TypeJPEG: + isValidType = isJPEG(tmpfile) + case exif.TypeTIFF: + isValidType = isTIFF(tmpfile) + } + + if _, err := tmpfile.Seek(0, io.SeekStart); err != nil { + return nil, err + } + + if !isValidType { + log.WithContextFields(ctx, log.Fields{ + "filename": filename, + "imageType": imageType, + }).Info("invalid content type, not running exiftool") + + return tmpfile, nil + } + + log.WithContextFields(ctx, log.Fields{ + "filename": filename, + }).Info("running exiftool to remove any metadata") + + cleaner, err := exif.NewCleaner(ctx, tmpfile) + if err != nil { + return nil, err + } + + return cleaner, nil +} + +func isTIFF(r io.Reader) bool { + _, err := tiff.DecodeConfig(r) + if err == nil { + return true + } + + if _, unsupported := err.(tiff.UnsupportedError); unsupported { + return true + } + + return false +} + +func isJPEG(r io.Reader) bool { + // Only the first 512 bytes are used to sniff the content type. + buf, err := io.ReadAll(io.LimitReader(r, 512)) + if err != nil { + return false + } + + return http.DetectContentType(buf) == "image/jpeg" +} diff --git a/workhorse/internal/upload/rewrite.go b/workhorse/internal/upload/rewrite.go index 7b753c8e5d7..d03445923fa 100644 --- a/workhorse/internal/upload/rewrite.go +++ b/workhorse/internal/upload/rewrite.go @@ -9,18 +9,15 @@ import ( "mime/multipart" "net/http" "net/textproto" - "os" "path/filepath" "strings" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "golang.org/x/image/tiff" "gitlab.com/gitlab-org/gitlab/workhorse/internal/log" "gitlab.com/gitlab-org/gitlab/workhorse/internal/api" - "gitlab.com/gitlab-org/gitlab/workhorse/internal/lsif_transformer/parser" "gitlab.com/gitlab-org/gitlab/workhorse/internal/upload/destination" "gitlab.com/gitlab-org/gitlab/workhorse/internal/upload/exif" ) @@ -151,22 +148,11 @@ func (rew *rewriter) handleFilePart(r *http.Request, name string, p *multipart.P return err } - var inputReader io.ReadCloser ctx := r.Context() - if imageType := exif.FileTypeFromSuffix(filename); imageType != exif.TypeUnknown { - inputReader, err = handleExifUpload(ctx, p, filename, imageType) - if err != nil { - return err - } - } else if apiResponse.ProcessLsif { - inputReader, err = handleLsifUpload(ctx, p, opts.LocalTempPath, filename) - if err != nil { - return err - } - } else { - inputReader = io.NopCloser(p) + inputReader, err := rew.filter.TransformContents(ctx, filename, p) + if err != nil { + return err } - defer inputReader.Close() fh, err := destination.Upload(ctx, inputReader, -1, filename, opts) @@ -194,92 +180,6 @@ func (rew *rewriter) handleFilePart(r *http.Request, name string, p *multipart.P return rew.filter.ProcessFile(ctx, name, fh, rew.writer) } -func handleExifUpload(ctx context.Context, r io.Reader, filename string, imageType exif.FileType) (io.ReadCloser, error) { - tmpfile, err := os.CreateTemp("", "exifremove") - if err != nil { - return nil, err - } - go func() { - <-ctx.Done() - tmpfile.Close() - }() - if err := os.Remove(tmpfile.Name()); err != nil { - return nil, err - } - - _, err = io.Copy(tmpfile, r) - if err != nil { - return nil, err - } - - if _, err := tmpfile.Seek(0, io.SeekStart); err != nil { - return nil, err - } - - isValidType := false - switch imageType { - case exif.TypeJPEG: - isValidType = isJPEG(tmpfile) - case exif.TypeTIFF: - isValidType = isTIFF(tmpfile) - } - - if _, err := tmpfile.Seek(0, io.SeekStart); err != nil { - return nil, err - } - - if !isValidType { - log.WithContextFields(ctx, log.Fields{ - "filename": filename, - "imageType": imageType, - }).Info("invalid content type, not running exiftool") - - return tmpfile, nil - } - - log.WithContextFields(ctx, log.Fields{ - "filename": filename, - }).Info("running exiftool to remove any metadata") - - cleaner, err := exif.NewCleaner(ctx, tmpfile) - if err != nil { - return nil, err - } - - return cleaner, nil -} - -func isTIFF(r io.Reader) bool { - _, err := tiff.DecodeConfig(r) - if err == nil { - return true - } - - if _, unsupported := err.(tiff.UnsupportedError); unsupported { - return true - } - - return false -} - -func isJPEG(r io.Reader) bool { - // Only the first 512 bytes are used to sniff the content type. - buf, err := io.ReadAll(io.LimitReader(r, 512)) - if err != nil { - return false - } - - return http.DetectContentType(buf) == "image/jpeg" -} - -func handleLsifUpload(ctx context.Context, reader io.Reader, tempPath, filename string) (io.ReadCloser, error) { - parserConfig := parser.Config{ - TempPath: tempPath, - } - - return parser.NewParser(ctx, reader, parserConfig) -} - func (rew *rewriter) copyPart(ctx context.Context, name string, p *multipart.Part) error { np, err := rew.writer.CreatePart(p.Header) if err != nil { diff --git a/workhorse/internal/upload/saved_file_tracker.go b/workhorse/internal/upload/saved_file_tracker.go index 77758520d94..1fad5343647 100644 --- a/workhorse/internal/upload/saved_file_tracker.go +++ b/workhorse/internal/upload/saved_file_tracker.go @@ -3,11 +3,13 @@ package upload import ( "context" "fmt" + "io" "mime/multipart" "net/http" "gitlab.com/gitlab-org/gitlab/workhorse/internal/secret" "gitlab.com/gitlab-org/gitlab/workhorse/internal/upload/destination" + "gitlab.com/gitlab-org/gitlab/workhorse/internal/upload/exif" ) type SavedFileTracker struct { @@ -54,6 +56,12 @@ func (s *SavedFileTracker) Finalize(_ context.Context) error { return nil } -func (s *SavedFileTracker) Name() string { - return "accelerate" +func (s *SavedFileTracker) Name() string { return "accelerate" } + +func (*SavedFileTracker) TransformContents(ctx context.Context, filename string, r io.Reader) (io.ReadCloser, error) { + if imageType := exif.FileTypeFromSuffix(filename); imageType != exif.TypeUnknown { + return handleExifUpload(ctx, r, filename, imageType) + } + + return io.NopCloser(r), nil } diff --git a/workhorse/internal/upload/uploads.go b/workhorse/internal/upload/uploads.go index cdbd367297e..61b419901a7 100644 --- a/workhorse/internal/upload/uploads.go +++ b/workhorse/internal/upload/uploads.go @@ -36,6 +36,7 @@ type MultipartFormProcessor interface { Finalize(ctx context.Context) error Name() string Count() int + TransformContents(ctx context.Context, filename string, r io.Reader) (io.ReadCloser, error) } // interceptMultipartFiles is the core of the implementation of |