diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-06-10 10:18:00 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-06-10 10:18:00 +0300 |
commit | bdd33f61948e5fa22da595f9b87e7d5032c19c26 (patch) | |
tree | 3ac7c3e02086ec0d716fe49b7adb564fbd358389 | |
parent | ca99cbe1bba2cc1e38f561ad5034fed465824967 (diff) | |
parent | 0603c758d6e3580adbd3d0d69e326d05baa340b9 (diff) |
Merge branch 'pks-lfs-process-filter' into 'master'
gitaly-lfs-smudge: Implement support for running as a long-running filter process
Closes #4153
See merge request gitlab-org/gitaly!4595
-rw-r--r-- | cmd/gitaly-lfs-smudge/lfs_smudge.go | 76 | ||||
-rw-r--r-- | cmd/gitaly-lfs-smudge/lfs_smudge_test.go | 188 | ||||
-rw-r--r-- | cmd/gitaly-lfs-smudge/main.go | 27 | ||||
-rw-r--r-- | cmd/gitaly-lfs-smudge/smudge.go | 320 | ||||
-rw-r--r-- | cmd/gitaly-lfs-smudge/smudge_test.go | 560 | ||||
-rw-r--r-- | internal/git/pktline/pkt_line_test.go | 55 | ||||
-rw-r--r-- | internal/git/pktline/pktline.go | 24 | ||||
-rw-r--r-- | internal/git/smudge/config.go | 34 | ||||
-rw-r--r-- | internal/git/smudge/config_test.go | 2 | ||||
-rw-r--r-- | internal/gitaly/service/repository/archive.go | 60 | ||||
-rw-r--r-- | internal/gitaly/service/repository/archive_test.go | 70 | ||||
-rw-r--r-- | internal/metadata/featureflag/ff_get_archive_lfs_filter_process.go | 5 |
12 files changed, 1108 insertions, 313 deletions
diff --git a/cmd/gitaly-lfs-smudge/lfs_smudge.go b/cmd/gitaly-lfs-smudge/lfs_smudge.go deleted file mode 100644 index d2676e14a..000000000 --- a/cmd/gitaly-lfs-smudge/lfs_smudge.go +++ /dev/null @@ -1,76 +0,0 @@ -package main - -import ( - "context" - "fmt" - "io" - "net/url" - - "github.com/git-lfs/git-lfs/v3/lfs" - "gitlab.com/gitlab-org/gitaly/v15/internal/git/smudge" - "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config/prometheus" - "gitlab.com/gitlab-org/gitaly/v15/internal/gitlab" - "gitlab.com/gitlab-org/labkit/log" - "gitlab.com/gitlab-org/labkit/tracing" -) - -func smudgeContents(cfg smudge.Config, to io.Writer, from io.Reader) (returnedErr error) { - // Since the environment is sanitized at the moment, we're only - // using this to extract the correlation ID. The finished() call - // to clean up the tracing will be a NOP here. - ctx, finished := tracing.ExtractFromEnv(context.Background()) - defer finished() - - output, err := handleSmudge(ctx, cfg, from) - if err != nil { - return fmt.Errorf("smudging contents: %w", err) - } - defer func() { - if err := output.Close(); err != nil && returnedErr == nil { - returnedErr = fmt.Errorf("closing LFS object: %w", err) - } - }() - - if _, err := io.Copy(to, output); err != nil { - return fmt.Errorf("writing smudged contents: %w", err) - } - - return nil -} - -func handleSmudge(ctx context.Context, cfg smudge.Config, from io.Reader) (io.ReadCloser, error) { - logger := log.ContextLogger(ctx) - - ptr, contents, err := lfs.DecodeFrom(from) - if err != nil { - // This isn't a valid LFS pointer. Just copy the existing pointer data. - return io.NopCloser(contents), nil - } - - logger.WithField("oid", ptr.Oid).Debug("decoded LFS OID") - - client, err := gitlab.NewHTTPClient(logger, cfg.Gitlab, cfg.TLS, prometheus.Config{}) - if err != nil { - return io.NopCloser(contents), err - } - - qs := url.Values{} - qs.Set("oid", ptr.Oid) - qs.Set("gl_repository", cfg.GlRepository) - u := url.URL{Path: "/lfs", RawQuery: qs.Encode()} - - response, err := client.Get(ctx, u.String()) - if err != nil { - return io.NopCloser(contents), fmt.Errorf("error loading LFS object: %v", err) - } - - if response.StatusCode == 200 { - return response.Body, nil - } - - if err := response.Body.Close(); err != nil { - logger.WithError(err).Error("closing LFS pointer body: %w", err) - } - - return io.NopCloser(contents), nil -} diff --git a/cmd/gitaly-lfs-smudge/lfs_smudge_test.go b/cmd/gitaly-lfs-smudge/lfs_smudge_test.go deleted file mode 100644 index 477a2131f..000000000 --- a/cmd/gitaly-lfs-smudge/lfs_smudge_test.go +++ /dev/null @@ -1,188 +0,0 @@ -package main - -import ( - "bytes" - "net/http" - "strings" - "testing" - - "github.com/stretchr/testify/require" - "gitlab.com/gitlab-org/gitaly/v15/internal/git/smudge" - "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" - "gitlab.com/gitlab-org/gitaly/v15/internal/gitlab" -) - -const ( - lfsOid = "3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa" - lfsPointer = `version https://git-lfs.github.com/spec/v1 -oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa -size 177735 -` - lfsPointerWithCRLF = `version https://git-lfs.github.com/spec/v1 -oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa` + "\r\nsize 177735" - invalidLfsPointer = `version https://git-lfs.github.com/spec/v1 -oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa&gl_repository=project-51 -size 177735 -` - invalidLfsPointerWithNonHex = `version https://git-lfs.github.com/spec/v1 -oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12z- -size 177735` - glRepository = "project-1" - secretToken = "topsecret" - testData = "hello world" - certPath = "../../internal/gitlab/testdata/certs/server.crt" - keyPath = "../../internal/gitlab/testdata/certs/server.key" -) - -var defaultOptions = gitlab.TestServerOptions{ - SecretToken: secretToken, - LfsBody: testData, - LfsOid: lfsOid, - GlRepository: glRepository, - ClientCACertPath: certPath, - ServerCertPath: certPath, - ServerKeyPath: keyPath, -} - -func TestSuccessfulLfsSmudge(t *testing.T) { - testCases := []struct { - desc string - data string - }{ - { - desc: "regular LFS pointer", - data: lfsPointer, - }, - { - desc: "LFS pointer with CRLF", - data: lfsPointerWithCRLF, - }, - } - - for _, tc := range testCases { - t.Run(tc.desc, func(t *testing.T) { - var b bytes.Buffer - reader := strings.NewReader(tc.data) - - gitlabCfg, cleanup := runTestServer(t, defaultOptions) - defer cleanup() - - cfg := smudge.Config{ - GlRepository: "project-1", - Gitlab: gitlabCfg, - TLS: config.TLS{ - CertPath: certPath, - KeyPath: keyPath, - }, - } - - require.NoError(t, smudgeContents(cfg, &b, reader)) - require.Equal(t, testData, b.String()) - }) - } -} - -func TestUnsuccessfulLfsSmudge(t *testing.T) { - defaultConfig := func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config { - return smudge.Config{ - GlRepository: "project-1", - Gitlab: gitlabCfg, - } - } - - testCases := []struct { - desc string - setupCfg func(*testing.T, config.Gitlab) smudge.Config - data string - expectedError bool - options gitlab.TestServerOptions - expectedGitalyTLS string - }{ - { - desc: "bad LFS pointer", - data: "test data", - setupCfg: defaultConfig, - options: defaultOptions, - expectedError: false, - }, - { - desc: "invalid LFS pointer", - data: invalidLfsPointer, - setupCfg: defaultConfig, - options: defaultOptions, - expectedError: false, - }, - { - desc: "invalid LFS pointer with non-hex characters", - data: invalidLfsPointerWithNonHex, - setupCfg: defaultConfig, - options: defaultOptions, - expectedError: false, - }, - { - desc: "missing GL_REPOSITORY", - data: lfsPointer, - setupCfg: func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config { - cfg := defaultConfig(t, gitlabCfg) - cfg.GlRepository = "" - return cfg - }, - options: defaultOptions, - expectedError: true, - }, - { - desc: "missing GL_INTERNAL_CONFIG", - data: lfsPointer, - setupCfg: func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config { - cfg := defaultConfig(t, gitlabCfg) - cfg.Gitlab = config.Gitlab{} - return cfg - }, - options: defaultOptions, - expectedError: true, - }, - { - desc: "failed HTTP response", - data: lfsPointer, - setupCfg: defaultConfig, - options: gitlab.TestServerOptions{ - SecretToken: secretToken, - LfsBody: testData, - LfsOid: lfsOid, - GlRepository: glRepository, - LfsStatusCode: http.StatusInternalServerError, - }, - expectedError: true, - }, - { - desc: "invalid TLS paths", - data: lfsPointer, - setupCfg: func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config { - cfg := defaultConfig(t, gitlabCfg) - cfg.TLS = config.TLS{CertPath: "fake-path", KeyPath: "not-real"} - return cfg - }, - options: defaultOptions, - expectedError: true, - }, - } - - for _, tc := range testCases { - t.Run(tc.desc, func(t *testing.T) { - gitlabCfg, cleanup := runTestServer(t, tc.options) - defer cleanup() - - cfg := tc.setupCfg(t, gitlabCfg) - - var b bytes.Buffer - err := smudgeContents(cfg, &b, strings.NewReader(tc.data)) - - if tc.expectedError { - require.Error(t, err) - } else { - require.NoError(t, err) - require.Equal(t, tc.data, b.String()) - } - }) - } -} diff --git a/cmd/gitaly-lfs-smudge/main.go b/cmd/gitaly-lfs-smudge/main.go index 9db0fbd55..51a63a6b4 100644 --- a/cmd/gitaly-lfs-smudge/main.go +++ b/cmd/gitaly-lfs-smudge/main.go @@ -1,6 +1,7 @@ package main import ( + "context" "fmt" "io" "os" @@ -10,6 +11,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/helper/env" gitalylog "gitlab.com/gitlab-org/gitaly/v15/internal/log" "gitlab.com/gitlab-org/labkit/log" + "gitlab.com/gitlab-org/labkit/tracing" ) func requireStdin(msg string) { @@ -59,14 +61,31 @@ func initLogging(environment []string) (io.Closer, error) { } func run(environment []string, out io.Writer, in io.Reader) error { + // Since the environment is sanitized at the moment, we're only + // using this to extract the correlation ID. The finished() call + // to clean up the tracing will be a NOP here. + ctx, finished := tracing.ExtractFromEnv(context.Background()) + defer finished() + cfg, err := smudge.ConfigFromEnvironment(environment) if err != nil { return fmt.Errorf("loading configuration: %w", err) } - if err := smudgeContents(cfg, out, in); err != nil { - return fmt.Errorf("running smudge filter: %w", err) - } + switch cfg.DriverType { + case smudge.DriverTypeFilter: + if err := filter(ctx, cfg, out, in); err != nil { + return fmt.Errorf("running smudge filter: %w", err) + } - return nil + return nil + case smudge.DriverTypeProcess: + if err := process(ctx, cfg, out, in); err != nil { + return fmt.Errorf("running smudge process: %w", err) + } + + return nil + default: + return fmt.Errorf("unknown driver type: %v", cfg.DriverType) + } } diff --git a/cmd/gitaly-lfs-smudge/smudge.go b/cmd/gitaly-lfs-smudge/smudge.go new file mode 100644 index 000000000..875a084c2 --- /dev/null +++ b/cmd/gitaly-lfs-smudge/smudge.go @@ -0,0 +1,320 @@ +package main + +import ( + "bufio" + "bytes" + "context" + "errors" + "fmt" + "io" + "net/url" + + "github.com/git-lfs/git-lfs/v3/lfs" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/pktline" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/smudge" + "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config/prometheus" + "gitlab.com/gitlab-org/gitaly/v15/internal/gitlab" + "gitlab.com/gitlab-org/labkit/log" +) + +func filter(ctx context.Context, cfg smudge.Config, to io.Writer, from io.Reader) (returnedErr error) { + client, err := gitlab.NewHTTPClient(log.ContextLogger(ctx), cfg.Gitlab, cfg.TLS, prometheus.Config{}) + if err != nil { + return fmt.Errorf("creating HTTP client: %w", err) + } + + output, err := smudgeOneObject(ctx, cfg, client, from) + if err != nil { + return fmt.Errorf("smudging contents: %w", err) + } + defer func() { + if err := output.Close(); err != nil && returnedErr == nil { + returnedErr = fmt.Errorf("closing LFS object: %w", err) + } + }() + + if _, err := io.Copy(to, output); err != nil { + return fmt.Errorf("writing smudged contents: %w", err) + } + + return nil +} + +// processState encodes a state machine for handling long-running filter processes. +type processState int + +const ( + // processStateAnnounce is the initial state where we expect the client to announce its + // presence. + processStateAnnounce = processState(iota) + // processStateVersions is the state where the client announces all its known versions. + processStateVersions + // processStateCapabilities is the state where we have announced our own supported version + // to the client. The client now starts to send its supported capabilities. + processStateCapabilities + // processStateCommand is the state where the client sends the command it wants us to + // perform. + processStateCommand + // processStateSmudgeMetadata is the state where the client sends metadata of the file + // that should be smudged. + processStateSmudgeMetadata + // processStateSmudgeContent is the state where the client sends the contents of the file + // that should be smudged. + processStateSmudgeContent +) + +func process(ctx context.Context, cfg smudge.Config, to io.Writer, from io.Reader) error { + client, err := gitlab.NewHTTPClient(log.ContextLogger(ctx), cfg.Gitlab, cfg.TLS, prometheus.Config{}) + if err != nil { + return fmt.Errorf("creating HTTP client: %w", err) + } + + scanner := pktline.NewScanner(from) + + writer := bufio.NewWriter(to) + + buf := make([]byte, pktline.MaxPktSize-4) + var content bytes.Buffer + + clientSupportsVersion2 := false + clientSupportsSmudgeCapability := false + + state := processStateAnnounce + for scanner.Scan() { + line := scanner.Bytes() + + var data []byte + if !pktline.IsFlush(line) { + payload, err := pktline.Payload(line) + if err != nil { + return fmt.Errorf("getting payload: %w", err) + } + + data = payload + } + + switch state { + case processStateAnnounce: + if !bytes.Equal(data, []byte("git-filter-client\n")) { + return fmt.Errorf("invalid client %q", string(data)) + } + + state = processStateVersions + case processStateVersions: + // The client will announce one or more supported versions to us. We need to + // collect them all in order to determine whether we do in fact support one + // of the announced versions. + if !pktline.IsFlush(line) { + if !bytes.HasPrefix(data, []byte("version=")) { + return fmt.Errorf("expected version, got %q", string(data)) + } + + // We only support version two of this protocol, so we have to check + // whether that version is announced by the client. + if bytes.Equal(data, []byte("version=2\n")) { + clientSupportsVersion2 = true + } + + break + } + + // We have gotten a flush packet, so the client is done announcing its + // versions. If we haven't seen our version announced then it's time to + // quit. + if !clientSupportsVersion2 { + return fmt.Errorf("client does not support version 2") + } + + // Announce that we're a server and that we're talking version 2 of this + // protocol. + if _, err := pktline.WriteString(writer, "git-filter-server\n"); err != nil { + return fmt.Errorf("announcing server presence: %w", err) + } + + if _, err := pktline.WriteString(writer, "version=2\n"); err != nil { + return fmt.Errorf("announcing server version: %w", err) + } + + if err := pktline.WriteFlush(writer); err != nil { + return fmt.Errorf("flushing announcement: %w", err) + } + + state = processStateCapabilities + case processStateCapabilities: + // Similar as above, the client will now announce all the capabilities it + // supports. We only support the "smudging" capability. + if !pktline.IsFlush(line) { + if !bytes.HasPrefix(data, []byte("capability=")) { + return fmt.Errorf("expected capability, got: %q", string(data)) + } + + // We only support smudging contents. + if bytes.Equal(data, []byte("capability=smudge\n")) { + clientSupportsSmudgeCapability = true + } + + break + } + + // If the client doesn't support smudging then we're done. + if !clientSupportsSmudgeCapability { + return fmt.Errorf("client does not support smudge capability") + } + + // Announce that the only capability we support ourselves is smudging. + if _, err := pktline.WriteString(writer, "capability=smudge\n"); err != nil { + return fmt.Errorf("announcing smudge capability: %w", err) + } + + if err := pktline.WriteFlush(writer); err != nil { + return fmt.Errorf("flushing capability announcement: %w", err) + } + + state = processStateCommand + case processStateCommand: + // We're now in the processing loop where the client may announce one or + // more smudge commands. + if !bytes.Equal(data, []byte("command=smudge\n")) { + return fmt.Errorf("expected smudge command, got %q", string(data)) + } + + state = processStateSmudgeMetadata + case processStateSmudgeMetadata: + // The client sends us various information about the blob like the path + // name or treeish. We don't care about that information, so we just wait + // until we get the flush packet. + if !pktline.IsFlush(line) { + break + } + + content.Reset() + + state = processStateSmudgeContent + case processStateSmudgeContent: + // When we receive a flush packet we know that the client is done sending us + // the clean data. + if pktline.IsFlush(line) { + smudgedReader, err := smudgeOneObject(ctx, cfg, client, &content) + if err != nil { + log.ContextLogger(ctx).WithError(err).Error("failed smudging LFS pointer") + + if _, err := pktline.WriteString(writer, "status=error\n"); err != nil { + return fmt.Errorf("reporting failure: %w", err) + } + + if err := pktline.WriteFlush(writer); err != nil { + return fmt.Errorf("flushing error: %w", err) + } + + state = processStateCommand + break + } + defer smudgedReader.Close() + + if _, err := pktline.WriteString(writer, "status=success\n"); err != nil { + return fmt.Errorf("sending status: %w", err) + } + + if err := pktline.WriteFlush(writer); err != nil { + return fmt.Errorf("flushing status: %w", err) + } + + // Read the smudged results in batches and relay it to the client. + // Because pktlines are limited in size we only ever read at most + // that many bytes. + var isEOF bool + for !isEOF { + n, err := smudgedReader.Read(buf) + if err != nil { + if !errors.Is(err, io.EOF) { + return fmt.Errorf("reading smudged contents: %w", err) + } + + isEOF = true + } + + if n > 0 { + if _, err := pktline.WriteString(writer, string(buf[:n])); err != nil { + return fmt.Errorf("writing smudged contents: %w", err) + } + } + } + smudgedReader.Close() + + // We're done writing the smudged contents to the client, so we need + // to tell the client. + if err := pktline.WriteFlush(writer); err != nil { + return fmt.Errorf("flushing smudged contents: %w", err) + } + + // We now have the opportunity to correct the status in case an + // error happened. For now we don't bother though and just abort the + // whole process in case we failed to read an LFS object, and that's + // why we just flush a second time. + if err := pktline.WriteFlush(writer); err != nil { + return fmt.Errorf("flushing final status: %w", err) + } + + // We are now ready to accept another command. + state = processStateCommand + break + } + + // Write the pktline into our buffer. Ideally, we could avoid slurping the + // whole content into memory first. But unfortunately, this is impossible in + // the context of long-running processes: the server-side _must not_ answer + // to the client before it has received all contents. And in the case we got + // a non-LFS-pointer as input, this means we have to slurp in all of its + // contents so that we can echo it back to the caller. + if _, err := content.Write(data); err != nil { + return fmt.Errorf("could not write clean data: %w", err) + } + } + + if err := writer.Flush(); err != nil { + return fmt.Errorf("could not flush: %w", err) + } + } + + if err := scanner.Err(); err != nil && !errors.Is(err, io.EOF) { + return fmt.Errorf("error scanning input: %w", err) + } + + if state != processStateCommand { + return fmt.Errorf("unexpected termination in state %v", state) + } + + return nil +} + +func smudgeOneObject(ctx context.Context, cfg smudge.Config, gitlabClient *gitlab.HTTPClient, from io.Reader) (io.ReadCloser, error) { + logger := log.ContextLogger(ctx) + + ptr, contents, err := lfs.DecodeFrom(from) + if err != nil { + // This isn't a valid LFS pointer. Just copy the existing pointer data. + return io.NopCloser(contents), nil + } + + logger.WithField("oid", ptr.Oid).Debug("decoded LFS OID") + + qs := url.Values{} + qs.Set("oid", ptr.Oid) + qs.Set("gl_repository", cfg.GlRepository) + u := url.URL{Path: "/lfs", RawQuery: qs.Encode()} + + response, err := gitlabClient.Get(ctx, u.String()) + if err != nil { + return nil, fmt.Errorf("error loading LFS object: %v", err) + } + + if response.StatusCode == 200 { + return response.Body, nil + } + + if err := response.Body.Close(); err != nil { + logger.WithError(err).Error("closing LFS pointer body: %w", err) + } + + return io.NopCloser(contents), nil +} diff --git a/cmd/gitaly-lfs-smudge/smudge_test.go b/cmd/gitaly-lfs-smudge/smudge_test.go new file mode 100644 index 000000000..f3d77ba0c --- /dev/null +++ b/cmd/gitaly-lfs-smudge/smudge_test.go @@ -0,0 +1,560 @@ +package main + +import ( + "bytes" + "fmt" + "net/http" + "strings" + "testing" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/pktline" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/smudge" + "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" + "gitlab.com/gitlab-org/gitaly/v15/internal/gitlab" + "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" +) + +const ( + lfsOid = "3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa" + lfsPointer = `version https://git-lfs.github.com/spec/v1 +oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa +size 177735 +` + lfsPointerWithCRLF = `version https://git-lfs.github.com/spec/v1 +oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa` + "\r\nsize 177735" + invalidLfsPointer = `version https://git-lfs.github.com/spec/v1 +oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa&gl_repository=project-51 +size 177735 +` + invalidLfsPointerWithNonHex = `version https://git-lfs.github.com/spec/v1 +oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12z- +size 177735` + glRepository = "project-1" + secretToken = "topsecret" + testData = "hello world" + certPath = "../../internal/gitlab/testdata/certs/server.crt" + keyPath = "../../internal/gitlab/testdata/certs/server.key" +) + +var defaultOptions = gitlab.TestServerOptions{ + SecretToken: secretToken, + LfsBody: testData, + LfsOid: lfsOid, + GlRepository: glRepository, + ClientCACertPath: certPath, + ServerCertPath: certPath, + ServerKeyPath: keyPath, +} + +func TestFilter_successful(t *testing.T) { + testCases := []struct { + desc string + data string + }{ + { + desc: "regular LFS pointer", + data: lfsPointer, + }, + { + desc: "LFS pointer with CRLF", + data: lfsPointerWithCRLF, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + ctx := testhelper.Context(t) + + var b bytes.Buffer + reader := strings.NewReader(tc.data) + + gitlabCfg, cleanup := runTestServer(t, defaultOptions) + defer cleanup() + + cfg := smudge.Config{ + GlRepository: "project-1", + Gitlab: gitlabCfg, + TLS: config.TLS{ + CertPath: certPath, + KeyPath: keyPath, + }, + } + + require.NoError(t, filter(ctx, cfg, &b, reader)) + require.Equal(t, testData, b.String()) + }) + } +} + +func TestFilter_unsuccessful(t *testing.T) { + defaultConfig := func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config { + return smudge.Config{ + GlRepository: "project-1", + Gitlab: gitlabCfg, + } + } + + testCases := []struct { + desc string + setupCfg func(*testing.T, config.Gitlab) smudge.Config + data string + expectedError bool + options gitlab.TestServerOptions + expectedGitalyTLS string + }{ + { + desc: "bad LFS pointer", + data: "test data", + setupCfg: defaultConfig, + options: defaultOptions, + expectedError: false, + }, + { + desc: "invalid LFS pointer", + data: invalidLfsPointer, + setupCfg: defaultConfig, + options: defaultOptions, + expectedError: false, + }, + { + desc: "invalid LFS pointer with non-hex characters", + data: invalidLfsPointerWithNonHex, + setupCfg: defaultConfig, + options: defaultOptions, + expectedError: false, + }, + { + desc: "missing GL_REPOSITORY", + data: lfsPointer, + setupCfg: func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config { + cfg := defaultConfig(t, gitlabCfg) + cfg.GlRepository = "" + return cfg + }, + options: defaultOptions, + expectedError: true, + }, + { + desc: "missing GL_INTERNAL_CONFIG", + data: lfsPointer, + setupCfg: func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config { + cfg := defaultConfig(t, gitlabCfg) + cfg.Gitlab = config.Gitlab{} + return cfg + }, + options: defaultOptions, + expectedError: true, + }, + { + desc: "failed HTTP response", + data: lfsPointer, + setupCfg: defaultConfig, + options: gitlab.TestServerOptions{ + SecretToken: secretToken, + LfsBody: testData, + LfsOid: lfsOid, + GlRepository: glRepository, + LfsStatusCode: http.StatusInternalServerError, + }, + expectedError: true, + }, + { + desc: "invalid TLS paths", + data: lfsPointer, + setupCfg: func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config { + cfg := defaultConfig(t, gitlabCfg) + cfg.TLS = config.TLS{CertPath: "fake-path", KeyPath: "not-real"} + return cfg + }, + options: defaultOptions, + expectedError: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + ctx := testhelper.Context(t) + + gitlabCfg, cleanup := runTestServer(t, tc.options) + defer cleanup() + + cfg := tc.setupCfg(t, gitlabCfg) + + var b bytes.Buffer + err := filter(ctx, cfg, &b, strings.NewReader(tc.data)) + + if tc.expectedError { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, tc.data, b.String()) + } + }) + } +} + +func TestProcess(t *testing.T) { + t.Parallel() + + ctx := testhelper.Context(t) + + gitlabCfg, cleanup := runTestServer(t, defaultOptions) + defer cleanup() + + defaultSmudgeCfg := smudge.Config{ + GlRepository: "project-1", + Gitlab: gitlabCfg, + TLS: config.TLS{ + CertPath: certPath, + KeyPath: keyPath, + }, + DriverType: smudge.DriverTypeProcess, + } + + pkt := func(data string) string { + return fmt.Sprintf("%04x%s", len(data)+4, data) + } + flush := "0000" + + for _, tc := range []struct { + desc string + cfg smudge.Config + input []string + expectedErr error + expectedOutput string + }{ + { + desc: "unsupported client", + cfg: defaultSmudgeCfg, + input: []string{ + pkt("git-foobar-client\n"), + pkt("version=2\n"), + flush, + }, + expectedErr: fmt.Errorf("invalid client %q", "git-foobar-client\n"), + }, + { + desc: "unsupported version", + cfg: defaultSmudgeCfg, + input: []string{ + pkt("git-filter-client\n"), + pkt("version=3\n"), + flush, + }, + expectedErr: fmt.Errorf("client does not support version 2"), + }, + { + desc: "unsupported capability", + cfg: defaultSmudgeCfg, + input: []string{ + pkt("git-filter-client\n"), + pkt("version=2\n"), + flush, + pkt("capability=foobar\n"), + flush, + }, + expectedErr: fmt.Errorf("client does not support smudge capability"), + expectedOutput: strings.Join([]string{ + pkt("git-filter-server\n"), + pkt("version=2\n"), + flush, + }, ""), + }, + { + desc: "unsupported command", + cfg: defaultSmudgeCfg, + input: []string{ + pkt("git-filter-client\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + pkt("command=clean\n"), + flush, + }, + expectedErr: fmt.Errorf("expected smudge command, got %q", "command=clean\n"), + expectedOutput: strings.Join([]string{ + pkt("git-filter-server\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + }, ""), + }, + { + desc: "single non-LFS blob", + cfg: defaultSmudgeCfg, + input: []string{ + pkt("git-filter-client\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + pkt("command=smudge\n"), + pkt("some=metadata\n"), + pkt("more=metadata\n"), + flush, + pkt("something"), + flush, + }, + expectedOutput: strings.Join([]string{ + pkt("git-filter-server\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + pkt("status=success\n"), + flush, + pkt("something"), + flush, + flush, + }, ""), + }, + { + desc: "single non-LFS blob with multiline contents", + cfg: defaultSmudgeCfg, + input: []string{ + pkt("git-filter-client\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + pkt("command=smudge\n"), + pkt("some=metadata\n"), + pkt("more=metadata\n"), + flush, + pkt("some"), + pkt("thing"), + flush, + }, + expectedOutput: strings.Join([]string{ + pkt("git-filter-server\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + pkt("status=success\n"), + flush, + pkt("something"), + flush, + flush, + }, ""), + }, + { + desc: "multiple non-LFS blobs", + cfg: defaultSmudgeCfg, + input: []string{ + pkt("git-filter-client\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + pkt("command=smudge\n"), + flush, + pkt("first"), + flush, + pkt("command=smudge\n"), + flush, + pkt("second"), + flush, + }, + expectedOutput: strings.Join([]string{ + pkt("git-filter-server\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + pkt("status=success\n"), + flush, + pkt("first"), + flush, + flush, + pkt("status=success\n"), + flush, + pkt("second"), + flush, + flush, + }, ""), + }, + { + desc: "single LFS blob", + cfg: defaultSmudgeCfg, + input: []string{ + pkt("git-filter-client\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + pkt("command=smudge\n"), + flush, + pkt(lfsPointer), + flush, + }, + expectedOutput: strings.Join([]string{ + pkt("git-filter-server\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + pkt("status=success\n"), + flush, + pkt("hello world"), + flush, + flush, + }, ""), + }, + { + desc: "multiple LFS blobs", + cfg: defaultSmudgeCfg, + input: []string{ + pkt("git-filter-client\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + pkt("command=smudge\n"), + flush, + pkt(lfsPointer), + flush, + pkt("command=smudge\n"), + flush, + pkt(lfsPointer), + flush, + }, + expectedOutput: strings.Join([]string{ + pkt("git-filter-server\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + pkt("status=success\n"), + flush, + pkt("hello world"), + flush, + flush, + pkt("status=success\n"), + flush, + pkt("hello world"), + flush, + flush, + }, ""), + }, + { + desc: "full-blown session", + cfg: defaultSmudgeCfg, + input: []string{ + pkt("git-filter-client\n"), + pkt("version=1\n"), + pkt("version=2\n"), + pkt("version=99\n"), + flush, + pkt("capability=frobnicate\n"), + pkt("capability=smudge\n"), + pkt("capability=clean\n"), + flush, + // First blob. + pkt("command=smudge\n"), + pkt("unused=metadata\n"), + flush, + pkt("something something"), + flush, + // Second blob. + pkt("command=smudge\n"), + pkt("more=unused=metadata\n"), + flush, + pkt(lfsPointer), + flush, + // Third blob. + pkt("command=smudge\n"), + pkt("this=is a huge binary blob\n"), + flush, + pkt(strings.Repeat("1", pktline.MaxPktSize-4)), + pkt(strings.Repeat("2", pktline.MaxPktSize-4)), + pkt(strings.Repeat("3", pktline.MaxPktSize-4)), + flush, + }, + expectedOutput: strings.Join([]string{ + pkt("git-filter-server\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + pkt("status=success\n"), + flush, + pkt("something something"), + flush, + flush, + pkt("status=success\n"), + flush, + pkt("hello world"), + flush, + flush, + pkt("status=success\n"), + flush, + // This looks a bit funny, but can be attributed to the fact that we + // use an io.Multireader to read the first 1024 bytes when parsing + // the LFS pointer. + pkt(strings.Repeat("1", 1024)), + pkt(strings.Repeat("1", pktline.MaxPktSize-4-1024) + strings.Repeat("2", 1024)), + pkt(strings.Repeat("2", pktline.MaxPktSize-4-1024) + strings.Repeat("3", 1024)), + pkt(strings.Repeat("3", pktline.MaxPktSize-4-1024)), + flush, + flush, + }, ""), + }, + { + desc: "partial failure", + cfg: defaultSmudgeCfg, + input: []string{ + pkt("git-filter-client\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + // The first command sends an unknown LFS pointer that should cause + // an error. + pkt("command=smudge\n"), + flush, + pkt(strings.Join([]string{ + "version https://git-lfs.github.com/spec/v1", + "oid sha256:1111111111111111111111111111111111111111111111111111111111111111", + "size 177735", + }, "\n") + "\n"), + flush, + // And the second command sends a known LFS pointer that should + // still be processed as expected, regardless of the initial error. + pkt("command=smudge\n"), + flush, + pkt(lfsPointer), + flush, + }, + expectedOutput: strings.Join([]string{ + pkt("git-filter-server\n"), + pkt("version=2\n"), + flush, + pkt("capability=smudge\n"), + flush, + pkt("status=error\n"), + flush, + pkt("status=success\n"), + flush, + pkt("hello world"), + flush, + flush, + }, ""), + }, + } { + t.Run(tc.desc, func(t *testing.T) { + var inputBuffer bytes.Buffer + for _, input := range tc.input { + _, err := inputBuffer.WriteString(input) + require.NoError(t, err) + } + + var outputBuffer bytes.Buffer + require.Equal(t, tc.expectedErr, process(ctx, tc.cfg, &outputBuffer, &inputBuffer)) + require.Equal(t, tc.expectedOutput, outputBuffer.String()) + }) + } +} diff --git a/internal/git/pktline/pkt_line_test.go b/internal/git/pktline/pkt_line_test.go index e3192a011..ef9daaf78 100644 --- a/internal/git/pktline/pkt_line_test.go +++ b/internal/git/pktline/pkt_line_test.go @@ -332,3 +332,58 @@ func TestEachSidebandPacket(t *testing.T) { }) } } + +func TestPayload(t *testing.T) { + for _, tc := range []struct { + desc string + input string + expectedErr string + expectedPayload []byte + }{ + { + desc: "packet too small", + input: "123", + expectedErr: "packet too small", + }, + { + desc: "invalid length prefix", + input: "something", + expectedErr: "parsing length header \"some\": strconv.ParseUint: parsing \"some\": invalid syntax", + }, + { + desc: "flush packet", + input: "0000", + expectedErr: "flush packets do not have a payload", + }, + { + desc: "packet with trailing bytes", + input: "0005atrailing", + expectedErr: "packet length 13 does not match header length 5", + }, + { + desc: "packet with missing bytes", + input: "0006a", + expectedErr: "packet length 5 does not match header length 6", + }, + { + desc: "empty packet", + input: "0004", + expectedPayload: []byte{}, + }, + { + desc: "packet with data", + input: "0068" + strings.Repeat("x", 100), + expectedPayload: bytes.Repeat([]byte("x"), 100), + }, + } { + t.Run(tc.desc, func(t *testing.T) { + payload, err := Payload([]byte(tc.input)) + if tc.expectedErr == "" { + require.NoError(t, err) + } else { + require.EqualError(t, err, tc.expectedErr) + } + require.Equal(t, tc.expectedPayload, payload) + }) + } +} diff --git a/internal/git/pktline/pktline.go b/internal/git/pktline/pktline.go index e97531fa3..3cdeaf4e9 100644 --- a/internal/git/pktline/pktline.go +++ b/internal/git/pktline/pktline.go @@ -39,6 +39,30 @@ func Data(pkt []byte) []byte { return pkt[4:] } +// Payload returns the pktline's data. It verifies that the length header matches what we expect as +// data. +func Payload(pkt []byte) ([]byte, error) { + if len(pkt) < 4 { + return nil, fmt.Errorf("packet too small") + } + + if IsFlush(pkt) { + return nil, fmt.Errorf("flush packets do not have a payload") + } + + lengthHeader := string(pkt[:4]) + length, err := strconv.ParseUint(lengthHeader, 16, 16) + if err != nil { + return nil, fmt.Errorf("parsing length header %q: %w", lengthHeader, err) + } + + if uint64(len(pkt)) != length { + return nil, fmt.Errorf("packet length %d does not match header length %d", len(pkt), length) + } + + return pkt[4:], nil +} + // IsFlush detects the special flush packet '0000' func IsFlush(pkt []byte) bool { return bytes.Equal(pkt, PktFlush()) diff --git a/internal/git/smudge/config.go b/internal/git/smudge/config.go index 79ca4b1ba..c64c4f643 100644 --- a/internal/git/smudge/config.go +++ b/internal/git/smudge/config.go @@ -4,7 +4,9 @@ import ( "encoding/json" "errors" "fmt" + "path/filepath" + "gitlab.com/gitlab-org/gitaly/v15/internal/git" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" "gitlab.com/gitlab-org/gitaly/v15/internal/helper/env" ) @@ -13,6 +15,18 @@ import ( // value of this environment variable should be the JSON-encoded `Config` struct. const ConfigEnvironmentKey = "GITALY_LFS_SMUDGE_CONFIG" +// DriverType determines the type of the smudge filter. +type DriverType int + +const ( + // DriverTypeFilter indicates that the smudge filter is to be run once per object. This is + // the current default but will be phased out eventually in favor of DriverTypeProcess. + DriverTypeFilter = DriverType(0) + // DriverTypeProcess is a long-running smudge filter that can process multiple objects in + // one session. See gitattributes(5), "Long Running Filter Process". + DriverTypeProcess = DriverType(1) +) + // Config is the configuration used to run gitaly-lfs-smudge. It must be injected via environment // variables. type Config struct { @@ -24,6 +38,8 @@ type Config struct { Gitlab config.Gitlab `json:"gitlab"` // TLS contains configuration for setting up a TLS-encrypted connection to Rails. TLS config.TLS `json:"tls"` + // DriverType is the type of the smudge driver that should be used. + DriverType DriverType `json:"driver_type"` } // ConfigFromEnvironment loads the Config structure from the set of given environment variables. @@ -77,3 +93,21 @@ func (c Config) Environment() (string, error) { return fmt.Sprintf("%s=%s", ConfigEnvironmentKey, marshalled), nil } + +// GitConfiguration returns the Git configuration required to run the smudge filter. +func (c Config) GitConfiguration(cfg config.Cfg) (git.ConfigPair, error) { + switch c.DriverType { + case DriverTypeFilter: + return git.ConfigPair{ + Key: "filter.lfs.smudge", + Value: filepath.Join(cfg.BinDir, "gitaly-lfs-smudge"), + }, nil + case DriverTypeProcess: + return git.ConfigPair{ + Key: "filter.lfs.process", + Value: filepath.Join(cfg.BinDir, "gitaly-lfs-smudge"), + }, nil + default: + return git.ConfigPair{}, fmt.Errorf("unknown driver type: %v", c.DriverType) + } +} diff --git a/internal/git/smudge/config_test.go b/internal/git/smudge/config_test.go index d988bf16c..e2299b763 100644 --- a/internal/git/smudge/config_test.go +++ b/internal/git/smudge/config_test.go @@ -169,5 +169,5 @@ func TestConfig_Environment(t *testing.T) { env, err := cfg.Environment() require.NoError(t, err) - require.Equal(t, `GITALY_LFS_SMUDGE_CONFIG={"gl_repository":"repo","gitlab":{"url":"https://example.com","relative_url_root":"gitlab","http_settings":{"read_timeout":1,"user":"user","password":"correcthorsebatterystaple","ca_file":"/ca/file","ca_path":"/ca/path","self_signed_cert":true},"secret_file":"/secret/path"},"tls":{"cert_path":"/cert/path","key_path":"/key/path"}}`, env) + require.Equal(t, `GITALY_LFS_SMUDGE_CONFIG={"gl_repository":"repo","gitlab":{"url":"https://example.com","relative_url_root":"gitlab","http_settings":{"read_timeout":1,"user":"user","password":"correcthorsebatterystaple","ca_file":"/ca/file","ca_path":"/ca/path","self_signed_cert":true},"secret_file":"/secret/path"},"tls":{"cert_path":"/cert/path","key_path":"/key/path"},"driver_type":0}`, env) } diff --git a/internal/gitaly/service/repository/archive.go b/internal/gitaly/service/repository/archive.go index f08ee71a7..fd2497ac2 100644 --- a/internal/gitaly/service/repository/archive.go +++ b/internal/gitaly/service/repository/archive.go @@ -8,7 +8,6 @@ import ( "io" "os" "os/exec" - "path/filepath" "strings" "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" @@ -19,21 +18,19 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/storage" "gitlab.com/gitlab-org/gitaly/v15/internal/helper" "gitlab.com/gitlab-org/gitaly/v15/internal/log" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" "gitlab.com/gitlab-org/gitaly/v15/streamio" - "gitlab.com/gitlab-org/labkit/correlation" "google.golang.org/protobuf/proto" ) type archiveParams struct { - ctx context.Context writer io.Writer in *gitalypb.GetArchiveRequest compressCmd *exec.Cmd format string archivePath string exclude []string - binDir string loggingDir string } @@ -86,15 +83,13 @@ func (s *server) GetArchive(in *gitalypb.GetArchiveRequest, stream gitalypb.Repo ctxlogrus.Extract(ctx).WithField("request_hash", requestHash(in)).Info("request details") - return s.handleArchive(archiveParams{ - ctx: ctx, + return s.handleArchive(ctx, archiveParams{ writer: writer, in: in, compressCmd: compressCmd, format: format, archivePath: path, exclude: exclude, - binDir: s.binDir, loggingDir: s.loggingCfg.Dir, }) } @@ -177,7 +172,7 @@ func findGetArchivePath(ctx context.Context, f *catfile.TreeEntryFinder, commitI return true, nil } -func (s *server) handleArchive(p archiveParams) error { +func (s *server) handleArchive(ctx context.Context, p archiveParams) error { var args []string pathspecs := make([]string, 0, len(p.exclude)+1) if !p.in.GetElidePath() { @@ -196,31 +191,40 @@ func (s *server) handleArchive(p archiveParams) error { pathspecs = append(pathspecs, ":(exclude)"+exclude) } - smudgeCfg := smudge.Config{ - GlRepository: p.in.GetRepository().GetGlRepository(), - Gitlab: s.cfg.Gitlab, - TLS: s.cfg.TLS, - } + var env []string + var config []git.ConfigPair - smudgeEnv, err := smudgeCfg.Environment() - if err != nil { - return fmt.Errorf("setting up smudge environment: %w", err) - } + if p.in.GetIncludeLfsBlobs() { + smudgeCfg := smudge.Config{ + GlRepository: p.in.GetRepository().GetGlRepository(), + Gitlab: s.cfg.Gitlab, + TLS: s.cfg.TLS, + DriverType: smudge.DriverTypeFilter, + } - env := []string{ - smudgeEnv, - fmt.Sprintf("CORRELATION_ID=%s", correlation.ExtractFromContext(p.ctx)), - fmt.Sprintf("%s=%s", log.GitalyLogDirEnvKey, p.loggingDir), - } + if featureflag.GetArchiveLfsFilterProcess.IsEnabled(ctx) { + smudgeCfg.DriverType = smudge.DriverTypeProcess + } - var config []git.ConfigPair + smudgeEnv, err := smudgeCfg.Environment() + if err != nil { + return fmt.Errorf("setting up smudge environment: %w", err) + } - if p.in.GetIncludeLfsBlobs() { - binary := filepath.Join(p.binDir, "gitaly-lfs-smudge") - config = append(config, git.ConfigPair{Key: "filter.lfs.smudge", Value: binary}) + smudgeGitConfig, err := smudgeCfg.GitConfiguration(s.cfg) + if err != nil { + return fmt.Errorf("setting up smudge gitconfig: %w", err) + } + + env = append( + env, + smudgeEnv, + fmt.Sprintf("%s=%s", log.GitalyLogDirEnvKey, p.loggingDir), + ) + config = append(config, smudgeGitConfig) } - archiveCommand, err := s.gitCmdFactory.New(p.ctx, p.in.GetRepository(), git.SubCmd{ + archiveCommand, err := s.gitCmdFactory.New(ctx, p.in.GetRepository(), git.SubCmd{ Name: "archive", Flags: []git.Option{git.ValueFlag{Name: "--format", Value: p.format}, git.ValueFlag{Name: "--prefix", Value: p.in.GetPrefix() + "/"}}, Args: args, @@ -231,7 +235,7 @@ func (s *server) handleArchive(p archiveParams) error { } if p.compressCmd != nil { - command, err := command.New(p.ctx, p.compressCmd, archiveCommand, p.writer, nil) + command, err := command.New(ctx, p.compressCmd, archiveCommand, p.writer, nil) if err != nil { return err } diff --git a/internal/gitaly/service/repository/archive_test.go b/internal/gitaly/service/repository/archive_test.go index 995189aaf..caa64a00b 100644 --- a/internal/gitaly/service/repository/archive_test.go +++ b/internal/gitaly/service/repository/archive_test.go @@ -3,6 +3,7 @@ package repository import ( "archive/zip" "bytes" + "context" "fmt" "io" "os" @@ -16,6 +17,8 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/git/smudge" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" "gitlab.com/gitlab-org/gitaly/v15/internal/gitlab" + "gitlab.com/gitlab-org/gitaly/v15/internal/helper/text" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testserver" @@ -167,7 +170,12 @@ func TestGetArchiveSuccess(t *testing.T) { } } -func TestGetArchiveWithLfsSuccess(t *testing.T) { +func TestGetArchive_includeLfsBlobs(t *testing.T) { + t.Parallel() + testhelper.NewFeatureSets(featureflag.GetArchiveLfsFilterProcess).Run(t, testGetArchiveIncludeLfsBlobs) +} + +func testGetArchiveIncludeLfsBlobs(t *testing.T, ctx context.Context) { t.Parallel() defaultOptions := gitlab.TestServerOptions{ SecretToken: secretToken, @@ -191,7 +199,6 @@ func TestGetArchiveWithLfsSuccess(t *testing.T) { client := newRepositoryClient(t, cfg, serverSocketPath) cfg.SocketPath = serverSocketPath - ctx := testhelper.Context(t) repo, _ := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{ Seed: gittest.SeedGitLabTest, }) @@ -469,13 +476,17 @@ func TestGetArchivePathInjection(t *testing.T) { require.Zero(t, authorizedKeysFileStat.Size()) } -func TestGetArchiveEnv(t *testing.T) { +func TestGetArchive_environment(t *testing.T) { + t.Parallel() + testhelper.NewFeatureSets(featureflag.GetArchiveLfsFilterProcess).Run(t, testGetArchiveEnvironment) +} + +func testGetArchiveEnvironment(t *testing.T, ctx context.Context) { testhelper.SkipWithPraefect(t, "It's not possible to create repositories through the API with the git command overwritten by the script.") t.Parallel() cfg := testcfg.Build(t) - ctx := testhelper.Context(t) gitCmdFactory := gittest.NewInterceptingCommandFactory(ctx, t, cfg, func(git.ExecutionEnvironment) string { return `#!/bin/sh @@ -497,28 +508,55 @@ func TestGetArchiveEnv(t *testing.T) { correlationID := correlation.SafeRandomID() ctx = correlation.ContextWithCorrelation(ctx, correlationID) - req := &gitalypb.GetArchiveRequest{ - Repository: repo, - CommitId: commitID, - } - smudgeCfg := smudge.Config{ GlRepository: gittest.GlRepository, Gitlab: cfg.Gitlab, TLS: cfg.TLS, + DriverType: smudge.DriverTypeFilter, + } + + if featureflag.GetArchiveLfsFilterProcess.IsEnabled(ctx) { + smudgeCfg.DriverType = smudge.DriverTypeProcess } smudgeEnv, err := smudgeCfg.Environment() require.NoError(t, err) - stream, err := client.GetArchive(ctx, req) - require.NoError(t, err) + for _, tc := range []struct { + desc string + includeLFSBlobs bool + expectedEnv []string + }{ + { + desc: "without LFS blobs", + includeLFSBlobs: false, + expectedEnv: []string{ + "CORRELATION_ID=" + correlationID, + }, + }, + { + desc: "with LFS blobs", + includeLFSBlobs: true, + expectedEnv: []string{ + "CORRELATION_ID=" + correlationID, + smudgeEnv, + "GITALY_LOG_DIR=" + cfg.Logging.Dir, + }, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + stream, err := client.GetArchive(ctx, &gitalypb.GetArchiveRequest{ + Repository: repo, + CommitId: commitID, + IncludeLfsBlobs: tc.includeLFSBlobs, + }) + require.NoError(t, err) - data, err := consumeArchive(stream) - require.NoError(t, err) - require.Contains(t, string(data), "CORRELATION_ID="+correlationID) - require.Contains(t, string(data), "GITALY_LOG_DIR="+cfg.Logging.Dir) - require.Contains(t, string(data), smudgeEnv) + data, err := consumeArchive(stream) + require.NoError(t, err) + require.ElementsMatch(t, tc.expectedEnv, strings.Split(text.ChompBytes(data), "\n")) + }) + } } func compressedFileContents(t *testing.T, format gitalypb.GetArchiveRequest_Format, name string) []byte { diff --git a/internal/metadata/featureflag/ff_get_archive_lfs_filter_process.go b/internal/metadata/featureflag/ff_get_archive_lfs_filter_process.go new file mode 100644 index 000000000..489f42f3a --- /dev/null +++ b/internal/metadata/featureflag/ff_get_archive_lfs_filter_process.go @@ -0,0 +1,5 @@ +package featureflag + +// GetArchiveLfsFilterProcess enables the use of a long-running filter process to smudge LFS +// pointers to their contents. +var GetArchiveLfsFilterProcess = NewFeatureFlag("get_archive_lfs_filter_process", false) |