Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-06-10 10:18:00 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-06-10 10:18:00 +0300
commitbdd33f61948e5fa22da595f9b87e7d5032c19c26 (patch)
tree3ac7c3e02086ec0d716fe49b7adb564fbd358389
parentca99cbe1bba2cc1e38f561ad5034fed465824967 (diff)
parent0603c758d6e3580adbd3d0d69e326d05baa340b9 (diff)
Merge branch 'pks-lfs-process-filter' into 'master'
gitaly-lfs-smudge: Implement support for running as a long-running filter process Closes #4153 See merge request gitlab-org/gitaly!4595
-rw-r--r--cmd/gitaly-lfs-smudge/lfs_smudge.go76
-rw-r--r--cmd/gitaly-lfs-smudge/lfs_smudge_test.go188
-rw-r--r--cmd/gitaly-lfs-smudge/main.go27
-rw-r--r--cmd/gitaly-lfs-smudge/smudge.go320
-rw-r--r--cmd/gitaly-lfs-smudge/smudge_test.go560
-rw-r--r--internal/git/pktline/pkt_line_test.go55
-rw-r--r--internal/git/pktline/pktline.go24
-rw-r--r--internal/git/smudge/config.go34
-rw-r--r--internal/git/smudge/config_test.go2
-rw-r--r--internal/gitaly/service/repository/archive.go60
-rw-r--r--internal/gitaly/service/repository/archive_test.go70
-rw-r--r--internal/metadata/featureflag/ff_get_archive_lfs_filter_process.go5
12 files changed, 1108 insertions, 313 deletions
diff --git a/cmd/gitaly-lfs-smudge/lfs_smudge.go b/cmd/gitaly-lfs-smudge/lfs_smudge.go
deleted file mode 100644
index d2676e14a..000000000
--- a/cmd/gitaly-lfs-smudge/lfs_smudge.go
+++ /dev/null
@@ -1,76 +0,0 @@
-package main
-
-import (
- "context"
- "fmt"
- "io"
- "net/url"
-
- "github.com/git-lfs/git-lfs/v3/lfs"
- "gitlab.com/gitlab-org/gitaly/v15/internal/git/smudge"
- "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config/prometheus"
- "gitlab.com/gitlab-org/gitaly/v15/internal/gitlab"
- "gitlab.com/gitlab-org/labkit/log"
- "gitlab.com/gitlab-org/labkit/tracing"
-)
-
-func smudgeContents(cfg smudge.Config, to io.Writer, from io.Reader) (returnedErr error) {
- // Since the environment is sanitized at the moment, we're only
- // using this to extract the correlation ID. The finished() call
- // to clean up the tracing will be a NOP here.
- ctx, finished := tracing.ExtractFromEnv(context.Background())
- defer finished()
-
- output, err := handleSmudge(ctx, cfg, from)
- if err != nil {
- return fmt.Errorf("smudging contents: %w", err)
- }
- defer func() {
- if err := output.Close(); err != nil && returnedErr == nil {
- returnedErr = fmt.Errorf("closing LFS object: %w", err)
- }
- }()
-
- if _, err := io.Copy(to, output); err != nil {
- return fmt.Errorf("writing smudged contents: %w", err)
- }
-
- return nil
-}
-
-func handleSmudge(ctx context.Context, cfg smudge.Config, from io.Reader) (io.ReadCloser, error) {
- logger := log.ContextLogger(ctx)
-
- ptr, contents, err := lfs.DecodeFrom(from)
- if err != nil {
- // This isn't a valid LFS pointer. Just copy the existing pointer data.
- return io.NopCloser(contents), nil
- }
-
- logger.WithField("oid", ptr.Oid).Debug("decoded LFS OID")
-
- client, err := gitlab.NewHTTPClient(logger, cfg.Gitlab, cfg.TLS, prometheus.Config{})
- if err != nil {
- return io.NopCloser(contents), err
- }
-
- qs := url.Values{}
- qs.Set("oid", ptr.Oid)
- qs.Set("gl_repository", cfg.GlRepository)
- u := url.URL{Path: "/lfs", RawQuery: qs.Encode()}
-
- response, err := client.Get(ctx, u.String())
- if err != nil {
- return io.NopCloser(contents), fmt.Errorf("error loading LFS object: %v", err)
- }
-
- if response.StatusCode == 200 {
- return response.Body, nil
- }
-
- if err := response.Body.Close(); err != nil {
- logger.WithError(err).Error("closing LFS pointer body: %w", err)
- }
-
- return io.NopCloser(contents), nil
-}
diff --git a/cmd/gitaly-lfs-smudge/lfs_smudge_test.go b/cmd/gitaly-lfs-smudge/lfs_smudge_test.go
deleted file mode 100644
index 477a2131f..000000000
--- a/cmd/gitaly-lfs-smudge/lfs_smudge_test.go
+++ /dev/null
@@ -1,188 +0,0 @@
-package main
-
-import (
- "bytes"
- "net/http"
- "strings"
- "testing"
-
- "github.com/stretchr/testify/require"
- "gitlab.com/gitlab-org/gitaly/v15/internal/git/smudge"
- "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config"
- "gitlab.com/gitlab-org/gitaly/v15/internal/gitlab"
-)
-
-const (
- lfsOid = "3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa"
- lfsPointer = `version https://git-lfs.github.com/spec/v1
-oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa
-size 177735
-`
- lfsPointerWithCRLF = `version https://git-lfs.github.com/spec/v1
-oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa` + "\r\nsize 177735"
- invalidLfsPointer = `version https://git-lfs.github.com/spec/v1
-oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa&gl_repository=project-51
-size 177735
-`
- invalidLfsPointerWithNonHex = `version https://git-lfs.github.com/spec/v1
-oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12z-
-size 177735`
- glRepository = "project-1"
- secretToken = "topsecret"
- testData = "hello world"
- certPath = "../../internal/gitlab/testdata/certs/server.crt"
- keyPath = "../../internal/gitlab/testdata/certs/server.key"
-)
-
-var defaultOptions = gitlab.TestServerOptions{
- SecretToken: secretToken,
- LfsBody: testData,
- LfsOid: lfsOid,
- GlRepository: glRepository,
- ClientCACertPath: certPath,
- ServerCertPath: certPath,
- ServerKeyPath: keyPath,
-}
-
-func TestSuccessfulLfsSmudge(t *testing.T) {
- testCases := []struct {
- desc string
- data string
- }{
- {
- desc: "regular LFS pointer",
- data: lfsPointer,
- },
- {
- desc: "LFS pointer with CRLF",
- data: lfsPointerWithCRLF,
- },
- }
-
- for _, tc := range testCases {
- t.Run(tc.desc, func(t *testing.T) {
- var b bytes.Buffer
- reader := strings.NewReader(tc.data)
-
- gitlabCfg, cleanup := runTestServer(t, defaultOptions)
- defer cleanup()
-
- cfg := smudge.Config{
- GlRepository: "project-1",
- Gitlab: gitlabCfg,
- TLS: config.TLS{
- CertPath: certPath,
- KeyPath: keyPath,
- },
- }
-
- require.NoError(t, smudgeContents(cfg, &b, reader))
- require.Equal(t, testData, b.String())
- })
- }
-}
-
-func TestUnsuccessfulLfsSmudge(t *testing.T) {
- defaultConfig := func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config {
- return smudge.Config{
- GlRepository: "project-1",
- Gitlab: gitlabCfg,
- }
- }
-
- testCases := []struct {
- desc string
- setupCfg func(*testing.T, config.Gitlab) smudge.Config
- data string
- expectedError bool
- options gitlab.TestServerOptions
- expectedGitalyTLS string
- }{
- {
- desc: "bad LFS pointer",
- data: "test data",
- setupCfg: defaultConfig,
- options: defaultOptions,
- expectedError: false,
- },
- {
- desc: "invalid LFS pointer",
- data: invalidLfsPointer,
- setupCfg: defaultConfig,
- options: defaultOptions,
- expectedError: false,
- },
- {
- desc: "invalid LFS pointer with non-hex characters",
- data: invalidLfsPointerWithNonHex,
- setupCfg: defaultConfig,
- options: defaultOptions,
- expectedError: false,
- },
- {
- desc: "missing GL_REPOSITORY",
- data: lfsPointer,
- setupCfg: func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config {
- cfg := defaultConfig(t, gitlabCfg)
- cfg.GlRepository = ""
- return cfg
- },
- options: defaultOptions,
- expectedError: true,
- },
- {
- desc: "missing GL_INTERNAL_CONFIG",
- data: lfsPointer,
- setupCfg: func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config {
- cfg := defaultConfig(t, gitlabCfg)
- cfg.Gitlab = config.Gitlab{}
- return cfg
- },
- options: defaultOptions,
- expectedError: true,
- },
- {
- desc: "failed HTTP response",
- data: lfsPointer,
- setupCfg: defaultConfig,
- options: gitlab.TestServerOptions{
- SecretToken: secretToken,
- LfsBody: testData,
- LfsOid: lfsOid,
- GlRepository: glRepository,
- LfsStatusCode: http.StatusInternalServerError,
- },
- expectedError: true,
- },
- {
- desc: "invalid TLS paths",
- data: lfsPointer,
- setupCfg: func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config {
- cfg := defaultConfig(t, gitlabCfg)
- cfg.TLS = config.TLS{CertPath: "fake-path", KeyPath: "not-real"}
- return cfg
- },
- options: defaultOptions,
- expectedError: true,
- },
- }
-
- for _, tc := range testCases {
- t.Run(tc.desc, func(t *testing.T) {
- gitlabCfg, cleanup := runTestServer(t, tc.options)
- defer cleanup()
-
- cfg := tc.setupCfg(t, gitlabCfg)
-
- var b bytes.Buffer
- err := smudgeContents(cfg, &b, strings.NewReader(tc.data))
-
- if tc.expectedError {
- require.Error(t, err)
- } else {
- require.NoError(t, err)
- require.Equal(t, tc.data, b.String())
- }
- })
- }
-}
diff --git a/cmd/gitaly-lfs-smudge/main.go b/cmd/gitaly-lfs-smudge/main.go
index 9db0fbd55..51a63a6b4 100644
--- a/cmd/gitaly-lfs-smudge/main.go
+++ b/cmd/gitaly-lfs-smudge/main.go
@@ -1,6 +1,7 @@
package main
import (
+ "context"
"fmt"
"io"
"os"
@@ -10,6 +11,7 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/helper/env"
gitalylog "gitlab.com/gitlab-org/gitaly/v15/internal/log"
"gitlab.com/gitlab-org/labkit/log"
+ "gitlab.com/gitlab-org/labkit/tracing"
)
func requireStdin(msg string) {
@@ -59,14 +61,31 @@ func initLogging(environment []string) (io.Closer, error) {
}
func run(environment []string, out io.Writer, in io.Reader) error {
+ // Since the environment is sanitized at the moment, we're only
+ // using this to extract the correlation ID. The finished() call
+ // to clean up the tracing will be a NOP here.
+ ctx, finished := tracing.ExtractFromEnv(context.Background())
+ defer finished()
+
cfg, err := smudge.ConfigFromEnvironment(environment)
if err != nil {
return fmt.Errorf("loading configuration: %w", err)
}
- if err := smudgeContents(cfg, out, in); err != nil {
- return fmt.Errorf("running smudge filter: %w", err)
- }
+ switch cfg.DriverType {
+ case smudge.DriverTypeFilter:
+ if err := filter(ctx, cfg, out, in); err != nil {
+ return fmt.Errorf("running smudge filter: %w", err)
+ }
- return nil
+ return nil
+ case smudge.DriverTypeProcess:
+ if err := process(ctx, cfg, out, in); err != nil {
+ return fmt.Errorf("running smudge process: %w", err)
+ }
+
+ return nil
+ default:
+ return fmt.Errorf("unknown driver type: %v", cfg.DriverType)
+ }
}
diff --git a/cmd/gitaly-lfs-smudge/smudge.go b/cmd/gitaly-lfs-smudge/smudge.go
new file mode 100644
index 000000000..875a084c2
--- /dev/null
+++ b/cmd/gitaly-lfs-smudge/smudge.go
@@ -0,0 +1,320 @@
+package main
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "net/url"
+
+ "github.com/git-lfs/git-lfs/v3/lfs"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/pktline"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/smudge"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config/prometheus"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/gitlab"
+ "gitlab.com/gitlab-org/labkit/log"
+)
+
+func filter(ctx context.Context, cfg smudge.Config, to io.Writer, from io.Reader) (returnedErr error) {
+ client, err := gitlab.NewHTTPClient(log.ContextLogger(ctx), cfg.Gitlab, cfg.TLS, prometheus.Config{})
+ if err != nil {
+ return fmt.Errorf("creating HTTP client: %w", err)
+ }
+
+ output, err := smudgeOneObject(ctx, cfg, client, from)
+ if err != nil {
+ return fmt.Errorf("smudging contents: %w", err)
+ }
+ defer func() {
+ if err := output.Close(); err != nil && returnedErr == nil {
+ returnedErr = fmt.Errorf("closing LFS object: %w", err)
+ }
+ }()
+
+ if _, err := io.Copy(to, output); err != nil {
+ return fmt.Errorf("writing smudged contents: %w", err)
+ }
+
+ return nil
+}
+
+// processState encodes a state machine for handling long-running filter processes.
+type processState int
+
+const (
+ // processStateAnnounce is the initial state where we expect the client to announce its
+ // presence.
+ processStateAnnounce = processState(iota)
+ // processStateVersions is the state where the client announces all its known versions.
+ processStateVersions
+ // processStateCapabilities is the state where we have announced our own supported version
+ // to the client. The client now starts to send its supported capabilities.
+ processStateCapabilities
+ // processStateCommand is the state where the client sends the command it wants us to
+ // perform.
+ processStateCommand
+ // processStateSmudgeMetadata is the state where the client sends metadata of the file
+ // that should be smudged.
+ processStateSmudgeMetadata
+ // processStateSmudgeContent is the state where the client sends the contents of the file
+ // that should be smudged.
+ processStateSmudgeContent
+)
+
+func process(ctx context.Context, cfg smudge.Config, to io.Writer, from io.Reader) error {
+ client, err := gitlab.NewHTTPClient(log.ContextLogger(ctx), cfg.Gitlab, cfg.TLS, prometheus.Config{})
+ if err != nil {
+ return fmt.Errorf("creating HTTP client: %w", err)
+ }
+
+ scanner := pktline.NewScanner(from)
+
+ writer := bufio.NewWriter(to)
+
+ buf := make([]byte, pktline.MaxPktSize-4)
+ var content bytes.Buffer
+
+ clientSupportsVersion2 := false
+ clientSupportsSmudgeCapability := false
+
+ state := processStateAnnounce
+ for scanner.Scan() {
+ line := scanner.Bytes()
+
+ var data []byte
+ if !pktline.IsFlush(line) {
+ payload, err := pktline.Payload(line)
+ if err != nil {
+ return fmt.Errorf("getting payload: %w", err)
+ }
+
+ data = payload
+ }
+
+ switch state {
+ case processStateAnnounce:
+ if !bytes.Equal(data, []byte("git-filter-client\n")) {
+ return fmt.Errorf("invalid client %q", string(data))
+ }
+
+ state = processStateVersions
+ case processStateVersions:
+ // The client will announce one or more supported versions to us. We need to
+ // collect them all in order to determine whether we do in fact support one
+ // of the announced versions.
+ if !pktline.IsFlush(line) {
+ if !bytes.HasPrefix(data, []byte("version=")) {
+ return fmt.Errorf("expected version, got %q", string(data))
+ }
+
+ // We only support version two of this protocol, so we have to check
+ // whether that version is announced by the client.
+ if bytes.Equal(data, []byte("version=2\n")) {
+ clientSupportsVersion2 = true
+ }
+
+ break
+ }
+
+ // We have gotten a flush packet, so the client is done announcing its
+ // versions. If we haven't seen our version announced then it's time to
+ // quit.
+ if !clientSupportsVersion2 {
+ return fmt.Errorf("client does not support version 2")
+ }
+
+ // Announce that we're a server and that we're talking version 2 of this
+ // protocol.
+ if _, err := pktline.WriteString(writer, "git-filter-server\n"); err != nil {
+ return fmt.Errorf("announcing server presence: %w", err)
+ }
+
+ if _, err := pktline.WriteString(writer, "version=2\n"); err != nil {
+ return fmt.Errorf("announcing server version: %w", err)
+ }
+
+ if err := pktline.WriteFlush(writer); err != nil {
+ return fmt.Errorf("flushing announcement: %w", err)
+ }
+
+ state = processStateCapabilities
+ case processStateCapabilities:
+ // Similar as above, the client will now announce all the capabilities it
+ // supports. We only support the "smudging" capability.
+ if !pktline.IsFlush(line) {
+ if !bytes.HasPrefix(data, []byte("capability=")) {
+ return fmt.Errorf("expected capability, got: %q", string(data))
+ }
+
+ // We only support smudging contents.
+ if bytes.Equal(data, []byte("capability=smudge\n")) {
+ clientSupportsSmudgeCapability = true
+ }
+
+ break
+ }
+
+ // If the client doesn't support smudging then we're done.
+ if !clientSupportsSmudgeCapability {
+ return fmt.Errorf("client does not support smudge capability")
+ }
+
+ // Announce that the only capability we support ourselves is smudging.
+ if _, err := pktline.WriteString(writer, "capability=smudge\n"); err != nil {
+ return fmt.Errorf("announcing smudge capability: %w", err)
+ }
+
+ if err := pktline.WriteFlush(writer); err != nil {
+ return fmt.Errorf("flushing capability announcement: %w", err)
+ }
+
+ state = processStateCommand
+ case processStateCommand:
+ // We're now in the processing loop where the client may announce one or
+ // more smudge commands.
+ if !bytes.Equal(data, []byte("command=smudge\n")) {
+ return fmt.Errorf("expected smudge command, got %q", string(data))
+ }
+
+ state = processStateSmudgeMetadata
+ case processStateSmudgeMetadata:
+ // The client sends us various information about the blob like the path
+ // name or treeish. We don't care about that information, so we just wait
+ // until we get the flush packet.
+ if !pktline.IsFlush(line) {
+ break
+ }
+
+ content.Reset()
+
+ state = processStateSmudgeContent
+ case processStateSmudgeContent:
+ // When we receive a flush packet we know that the client is done sending us
+ // the clean data.
+ if pktline.IsFlush(line) {
+ smudgedReader, err := smudgeOneObject(ctx, cfg, client, &content)
+ if err != nil {
+ log.ContextLogger(ctx).WithError(err).Error("failed smudging LFS pointer")
+
+ if _, err := pktline.WriteString(writer, "status=error\n"); err != nil {
+ return fmt.Errorf("reporting failure: %w", err)
+ }
+
+ if err := pktline.WriteFlush(writer); err != nil {
+ return fmt.Errorf("flushing error: %w", err)
+ }
+
+ state = processStateCommand
+ break
+ }
+ defer smudgedReader.Close()
+
+ if _, err := pktline.WriteString(writer, "status=success\n"); err != nil {
+ return fmt.Errorf("sending status: %w", err)
+ }
+
+ if err := pktline.WriteFlush(writer); err != nil {
+ return fmt.Errorf("flushing status: %w", err)
+ }
+
+ // Read the smudged results in batches and relay it to the client.
+ // Because pktlines are limited in size we only ever read at most
+ // that many bytes.
+ var isEOF bool
+ for !isEOF {
+ n, err := smudgedReader.Read(buf)
+ if err != nil {
+ if !errors.Is(err, io.EOF) {
+ return fmt.Errorf("reading smudged contents: %w", err)
+ }
+
+ isEOF = true
+ }
+
+ if n > 0 {
+ if _, err := pktline.WriteString(writer, string(buf[:n])); err != nil {
+ return fmt.Errorf("writing smudged contents: %w", err)
+ }
+ }
+ }
+ smudgedReader.Close()
+
+ // We're done writing the smudged contents to the client, so we need
+ // to tell the client.
+ if err := pktline.WriteFlush(writer); err != nil {
+ return fmt.Errorf("flushing smudged contents: %w", err)
+ }
+
+ // We now have the opportunity to correct the status in case an
+ // error happened. For now we don't bother though and just abort the
+ // whole process in case we failed to read an LFS object, and that's
+ // why we just flush a second time.
+ if err := pktline.WriteFlush(writer); err != nil {
+ return fmt.Errorf("flushing final status: %w", err)
+ }
+
+ // We are now ready to accept another command.
+ state = processStateCommand
+ break
+ }
+
+ // Write the pktline into our buffer. Ideally, we could avoid slurping the
+ // whole content into memory first. But unfortunately, this is impossible in
+ // the context of long-running processes: the server-side _must not_ answer
+ // to the client before it has received all contents. And in the case we got
+ // a non-LFS-pointer as input, this means we have to slurp in all of its
+ // contents so that we can echo it back to the caller.
+ if _, err := content.Write(data); err != nil {
+ return fmt.Errorf("could not write clean data: %w", err)
+ }
+ }
+
+ if err := writer.Flush(); err != nil {
+ return fmt.Errorf("could not flush: %w", err)
+ }
+ }
+
+ if err := scanner.Err(); err != nil && !errors.Is(err, io.EOF) {
+ return fmt.Errorf("error scanning input: %w", err)
+ }
+
+ if state != processStateCommand {
+ return fmt.Errorf("unexpected termination in state %v", state)
+ }
+
+ return nil
+}
+
+func smudgeOneObject(ctx context.Context, cfg smudge.Config, gitlabClient *gitlab.HTTPClient, from io.Reader) (io.ReadCloser, error) {
+ logger := log.ContextLogger(ctx)
+
+ ptr, contents, err := lfs.DecodeFrom(from)
+ if err != nil {
+ // This isn't a valid LFS pointer. Just copy the existing pointer data.
+ return io.NopCloser(contents), nil
+ }
+
+ logger.WithField("oid", ptr.Oid).Debug("decoded LFS OID")
+
+ qs := url.Values{}
+ qs.Set("oid", ptr.Oid)
+ qs.Set("gl_repository", cfg.GlRepository)
+ u := url.URL{Path: "/lfs", RawQuery: qs.Encode()}
+
+ response, err := gitlabClient.Get(ctx, u.String())
+ if err != nil {
+ return nil, fmt.Errorf("error loading LFS object: %v", err)
+ }
+
+ if response.StatusCode == 200 {
+ return response.Body, nil
+ }
+
+ if err := response.Body.Close(); err != nil {
+ logger.WithError(err).Error("closing LFS pointer body: %w", err)
+ }
+
+ return io.NopCloser(contents), nil
+}
diff --git a/cmd/gitaly-lfs-smudge/smudge_test.go b/cmd/gitaly-lfs-smudge/smudge_test.go
new file mode 100644
index 000000000..f3d77ba0c
--- /dev/null
+++ b/cmd/gitaly-lfs-smudge/smudge_test.go
@@ -0,0 +1,560 @@
+package main
+
+import (
+ "bytes"
+ "fmt"
+ "net/http"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/pktline"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/smudge"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/gitlab"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
+)
+
+const (
+ lfsOid = "3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa"
+ lfsPointer = `version https://git-lfs.github.com/spec/v1
+oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa
+size 177735
+`
+ lfsPointerWithCRLF = `version https://git-lfs.github.com/spec/v1
+oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa` + "\r\nsize 177735"
+ invalidLfsPointer = `version https://git-lfs.github.com/spec/v1
+oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12aa&gl_repository=project-51
+size 177735
+`
+ invalidLfsPointerWithNonHex = `version https://git-lfs.github.com/spec/v1
+oid sha256:3ea5dd307f195f449f0e08234183b82e92c3d5f4cff11c2a6bb014f9e0de12z-
+size 177735`
+ glRepository = "project-1"
+ secretToken = "topsecret"
+ testData = "hello world"
+ certPath = "../../internal/gitlab/testdata/certs/server.crt"
+ keyPath = "../../internal/gitlab/testdata/certs/server.key"
+)
+
+var defaultOptions = gitlab.TestServerOptions{
+ SecretToken: secretToken,
+ LfsBody: testData,
+ LfsOid: lfsOid,
+ GlRepository: glRepository,
+ ClientCACertPath: certPath,
+ ServerCertPath: certPath,
+ ServerKeyPath: keyPath,
+}
+
+func TestFilter_successful(t *testing.T) {
+ testCases := []struct {
+ desc string
+ data string
+ }{
+ {
+ desc: "regular LFS pointer",
+ data: lfsPointer,
+ },
+ {
+ desc: "LFS pointer with CRLF",
+ data: lfsPointerWithCRLF,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ ctx := testhelper.Context(t)
+
+ var b bytes.Buffer
+ reader := strings.NewReader(tc.data)
+
+ gitlabCfg, cleanup := runTestServer(t, defaultOptions)
+ defer cleanup()
+
+ cfg := smudge.Config{
+ GlRepository: "project-1",
+ Gitlab: gitlabCfg,
+ TLS: config.TLS{
+ CertPath: certPath,
+ KeyPath: keyPath,
+ },
+ }
+
+ require.NoError(t, filter(ctx, cfg, &b, reader))
+ require.Equal(t, testData, b.String())
+ })
+ }
+}
+
+func TestFilter_unsuccessful(t *testing.T) {
+ defaultConfig := func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config {
+ return smudge.Config{
+ GlRepository: "project-1",
+ Gitlab: gitlabCfg,
+ }
+ }
+
+ testCases := []struct {
+ desc string
+ setupCfg func(*testing.T, config.Gitlab) smudge.Config
+ data string
+ expectedError bool
+ options gitlab.TestServerOptions
+ expectedGitalyTLS string
+ }{
+ {
+ desc: "bad LFS pointer",
+ data: "test data",
+ setupCfg: defaultConfig,
+ options: defaultOptions,
+ expectedError: false,
+ },
+ {
+ desc: "invalid LFS pointer",
+ data: invalidLfsPointer,
+ setupCfg: defaultConfig,
+ options: defaultOptions,
+ expectedError: false,
+ },
+ {
+ desc: "invalid LFS pointer with non-hex characters",
+ data: invalidLfsPointerWithNonHex,
+ setupCfg: defaultConfig,
+ options: defaultOptions,
+ expectedError: false,
+ },
+ {
+ desc: "missing GL_REPOSITORY",
+ data: lfsPointer,
+ setupCfg: func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config {
+ cfg := defaultConfig(t, gitlabCfg)
+ cfg.GlRepository = ""
+ return cfg
+ },
+ options: defaultOptions,
+ expectedError: true,
+ },
+ {
+ desc: "missing GL_INTERNAL_CONFIG",
+ data: lfsPointer,
+ setupCfg: func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config {
+ cfg := defaultConfig(t, gitlabCfg)
+ cfg.Gitlab = config.Gitlab{}
+ return cfg
+ },
+ options: defaultOptions,
+ expectedError: true,
+ },
+ {
+ desc: "failed HTTP response",
+ data: lfsPointer,
+ setupCfg: defaultConfig,
+ options: gitlab.TestServerOptions{
+ SecretToken: secretToken,
+ LfsBody: testData,
+ LfsOid: lfsOid,
+ GlRepository: glRepository,
+ LfsStatusCode: http.StatusInternalServerError,
+ },
+ expectedError: true,
+ },
+ {
+ desc: "invalid TLS paths",
+ data: lfsPointer,
+ setupCfg: func(t *testing.T, gitlabCfg config.Gitlab) smudge.Config {
+ cfg := defaultConfig(t, gitlabCfg)
+ cfg.TLS = config.TLS{CertPath: "fake-path", KeyPath: "not-real"}
+ return cfg
+ },
+ options: defaultOptions,
+ expectedError: true,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ ctx := testhelper.Context(t)
+
+ gitlabCfg, cleanup := runTestServer(t, tc.options)
+ defer cleanup()
+
+ cfg := tc.setupCfg(t, gitlabCfg)
+
+ var b bytes.Buffer
+ err := filter(ctx, cfg, &b, strings.NewReader(tc.data))
+
+ if tc.expectedError {
+ require.Error(t, err)
+ } else {
+ require.NoError(t, err)
+ require.Equal(t, tc.data, b.String())
+ }
+ })
+ }
+}
+
+func TestProcess(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+
+ gitlabCfg, cleanup := runTestServer(t, defaultOptions)
+ defer cleanup()
+
+ defaultSmudgeCfg := smudge.Config{
+ GlRepository: "project-1",
+ Gitlab: gitlabCfg,
+ TLS: config.TLS{
+ CertPath: certPath,
+ KeyPath: keyPath,
+ },
+ DriverType: smudge.DriverTypeProcess,
+ }
+
+ pkt := func(data string) string {
+ return fmt.Sprintf("%04x%s", len(data)+4, data)
+ }
+ flush := "0000"
+
+ for _, tc := range []struct {
+ desc string
+ cfg smudge.Config
+ input []string
+ expectedErr error
+ expectedOutput string
+ }{
+ {
+ desc: "unsupported client",
+ cfg: defaultSmudgeCfg,
+ input: []string{
+ pkt("git-foobar-client\n"),
+ pkt("version=2\n"),
+ flush,
+ },
+ expectedErr: fmt.Errorf("invalid client %q", "git-foobar-client\n"),
+ },
+ {
+ desc: "unsupported version",
+ cfg: defaultSmudgeCfg,
+ input: []string{
+ pkt("git-filter-client\n"),
+ pkt("version=3\n"),
+ flush,
+ },
+ expectedErr: fmt.Errorf("client does not support version 2"),
+ },
+ {
+ desc: "unsupported capability",
+ cfg: defaultSmudgeCfg,
+ input: []string{
+ pkt("git-filter-client\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=foobar\n"),
+ flush,
+ },
+ expectedErr: fmt.Errorf("client does not support smudge capability"),
+ expectedOutput: strings.Join([]string{
+ pkt("git-filter-server\n"),
+ pkt("version=2\n"),
+ flush,
+ }, ""),
+ },
+ {
+ desc: "unsupported command",
+ cfg: defaultSmudgeCfg,
+ input: []string{
+ pkt("git-filter-client\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ pkt("command=clean\n"),
+ flush,
+ },
+ expectedErr: fmt.Errorf("expected smudge command, got %q", "command=clean\n"),
+ expectedOutput: strings.Join([]string{
+ pkt("git-filter-server\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ }, ""),
+ },
+ {
+ desc: "single non-LFS blob",
+ cfg: defaultSmudgeCfg,
+ input: []string{
+ pkt("git-filter-client\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ pkt("command=smudge\n"),
+ pkt("some=metadata\n"),
+ pkt("more=metadata\n"),
+ flush,
+ pkt("something"),
+ flush,
+ },
+ expectedOutput: strings.Join([]string{
+ pkt("git-filter-server\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ pkt("status=success\n"),
+ flush,
+ pkt("something"),
+ flush,
+ flush,
+ }, ""),
+ },
+ {
+ desc: "single non-LFS blob with multiline contents",
+ cfg: defaultSmudgeCfg,
+ input: []string{
+ pkt("git-filter-client\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ pkt("command=smudge\n"),
+ pkt("some=metadata\n"),
+ pkt("more=metadata\n"),
+ flush,
+ pkt("some"),
+ pkt("thing"),
+ flush,
+ },
+ expectedOutput: strings.Join([]string{
+ pkt("git-filter-server\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ pkt("status=success\n"),
+ flush,
+ pkt("something"),
+ flush,
+ flush,
+ }, ""),
+ },
+ {
+ desc: "multiple non-LFS blobs",
+ cfg: defaultSmudgeCfg,
+ input: []string{
+ pkt("git-filter-client\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ pkt("command=smudge\n"),
+ flush,
+ pkt("first"),
+ flush,
+ pkt("command=smudge\n"),
+ flush,
+ pkt("second"),
+ flush,
+ },
+ expectedOutput: strings.Join([]string{
+ pkt("git-filter-server\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ pkt("status=success\n"),
+ flush,
+ pkt("first"),
+ flush,
+ flush,
+ pkt("status=success\n"),
+ flush,
+ pkt("second"),
+ flush,
+ flush,
+ }, ""),
+ },
+ {
+ desc: "single LFS blob",
+ cfg: defaultSmudgeCfg,
+ input: []string{
+ pkt("git-filter-client\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ pkt("command=smudge\n"),
+ flush,
+ pkt(lfsPointer),
+ flush,
+ },
+ expectedOutput: strings.Join([]string{
+ pkt("git-filter-server\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ pkt("status=success\n"),
+ flush,
+ pkt("hello world"),
+ flush,
+ flush,
+ }, ""),
+ },
+ {
+ desc: "multiple LFS blobs",
+ cfg: defaultSmudgeCfg,
+ input: []string{
+ pkt("git-filter-client\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ pkt("command=smudge\n"),
+ flush,
+ pkt(lfsPointer),
+ flush,
+ pkt("command=smudge\n"),
+ flush,
+ pkt(lfsPointer),
+ flush,
+ },
+ expectedOutput: strings.Join([]string{
+ pkt("git-filter-server\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ pkt("status=success\n"),
+ flush,
+ pkt("hello world"),
+ flush,
+ flush,
+ pkt("status=success\n"),
+ flush,
+ pkt("hello world"),
+ flush,
+ flush,
+ }, ""),
+ },
+ {
+ desc: "full-blown session",
+ cfg: defaultSmudgeCfg,
+ input: []string{
+ pkt("git-filter-client\n"),
+ pkt("version=1\n"),
+ pkt("version=2\n"),
+ pkt("version=99\n"),
+ flush,
+ pkt("capability=frobnicate\n"),
+ pkt("capability=smudge\n"),
+ pkt("capability=clean\n"),
+ flush,
+ // First blob.
+ pkt("command=smudge\n"),
+ pkt("unused=metadata\n"),
+ flush,
+ pkt("something something"),
+ flush,
+ // Second blob.
+ pkt("command=smudge\n"),
+ pkt("more=unused=metadata\n"),
+ flush,
+ pkt(lfsPointer),
+ flush,
+ // Third blob.
+ pkt("command=smudge\n"),
+ pkt("this=is a huge binary blob\n"),
+ flush,
+ pkt(strings.Repeat("1", pktline.MaxPktSize-4)),
+ pkt(strings.Repeat("2", pktline.MaxPktSize-4)),
+ pkt(strings.Repeat("3", pktline.MaxPktSize-4)),
+ flush,
+ },
+ expectedOutput: strings.Join([]string{
+ pkt("git-filter-server\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ pkt("status=success\n"),
+ flush,
+ pkt("something something"),
+ flush,
+ flush,
+ pkt("status=success\n"),
+ flush,
+ pkt("hello world"),
+ flush,
+ flush,
+ pkt("status=success\n"),
+ flush,
+ // This looks a bit funny, but can be attributed to the fact that we
+ // use an io.Multireader to read the first 1024 bytes when parsing
+ // the LFS pointer.
+ pkt(strings.Repeat("1", 1024)),
+ pkt(strings.Repeat("1", pktline.MaxPktSize-4-1024) + strings.Repeat("2", 1024)),
+ pkt(strings.Repeat("2", pktline.MaxPktSize-4-1024) + strings.Repeat("3", 1024)),
+ pkt(strings.Repeat("3", pktline.MaxPktSize-4-1024)),
+ flush,
+ flush,
+ }, ""),
+ },
+ {
+ desc: "partial failure",
+ cfg: defaultSmudgeCfg,
+ input: []string{
+ pkt("git-filter-client\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ // The first command sends an unknown LFS pointer that should cause
+ // an error.
+ pkt("command=smudge\n"),
+ flush,
+ pkt(strings.Join([]string{
+ "version https://git-lfs.github.com/spec/v1",
+ "oid sha256:1111111111111111111111111111111111111111111111111111111111111111",
+ "size 177735",
+ }, "\n") + "\n"),
+ flush,
+ // And the second command sends a known LFS pointer that should
+ // still be processed as expected, regardless of the initial error.
+ pkt("command=smudge\n"),
+ flush,
+ pkt(lfsPointer),
+ flush,
+ },
+ expectedOutput: strings.Join([]string{
+ pkt("git-filter-server\n"),
+ pkt("version=2\n"),
+ flush,
+ pkt("capability=smudge\n"),
+ flush,
+ pkt("status=error\n"),
+ flush,
+ pkt("status=success\n"),
+ flush,
+ pkt("hello world"),
+ flush,
+ flush,
+ }, ""),
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ var inputBuffer bytes.Buffer
+ for _, input := range tc.input {
+ _, err := inputBuffer.WriteString(input)
+ require.NoError(t, err)
+ }
+
+ var outputBuffer bytes.Buffer
+ require.Equal(t, tc.expectedErr, process(ctx, tc.cfg, &outputBuffer, &inputBuffer))
+ require.Equal(t, tc.expectedOutput, outputBuffer.String())
+ })
+ }
+}
diff --git a/internal/git/pktline/pkt_line_test.go b/internal/git/pktline/pkt_line_test.go
index e3192a011..ef9daaf78 100644
--- a/internal/git/pktline/pkt_line_test.go
+++ b/internal/git/pktline/pkt_line_test.go
@@ -332,3 +332,58 @@ func TestEachSidebandPacket(t *testing.T) {
})
}
}
+
+func TestPayload(t *testing.T) {
+ for _, tc := range []struct {
+ desc string
+ input string
+ expectedErr string
+ expectedPayload []byte
+ }{
+ {
+ desc: "packet too small",
+ input: "123",
+ expectedErr: "packet too small",
+ },
+ {
+ desc: "invalid length prefix",
+ input: "something",
+ expectedErr: "parsing length header \"some\": strconv.ParseUint: parsing \"some\": invalid syntax",
+ },
+ {
+ desc: "flush packet",
+ input: "0000",
+ expectedErr: "flush packets do not have a payload",
+ },
+ {
+ desc: "packet with trailing bytes",
+ input: "0005atrailing",
+ expectedErr: "packet length 13 does not match header length 5",
+ },
+ {
+ desc: "packet with missing bytes",
+ input: "0006a",
+ expectedErr: "packet length 5 does not match header length 6",
+ },
+ {
+ desc: "empty packet",
+ input: "0004",
+ expectedPayload: []byte{},
+ },
+ {
+ desc: "packet with data",
+ input: "0068" + strings.Repeat("x", 100),
+ expectedPayload: bytes.Repeat([]byte("x"), 100),
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ payload, err := Payload([]byte(tc.input))
+ if tc.expectedErr == "" {
+ require.NoError(t, err)
+ } else {
+ require.EqualError(t, err, tc.expectedErr)
+ }
+ require.Equal(t, tc.expectedPayload, payload)
+ })
+ }
+}
diff --git a/internal/git/pktline/pktline.go b/internal/git/pktline/pktline.go
index e97531fa3..3cdeaf4e9 100644
--- a/internal/git/pktline/pktline.go
+++ b/internal/git/pktline/pktline.go
@@ -39,6 +39,30 @@ func Data(pkt []byte) []byte {
return pkt[4:]
}
+// Payload returns the pktline's data. It verifies that the length header matches what we expect as
+// data.
+func Payload(pkt []byte) ([]byte, error) {
+ if len(pkt) < 4 {
+ return nil, fmt.Errorf("packet too small")
+ }
+
+ if IsFlush(pkt) {
+ return nil, fmt.Errorf("flush packets do not have a payload")
+ }
+
+ lengthHeader := string(pkt[:4])
+ length, err := strconv.ParseUint(lengthHeader, 16, 16)
+ if err != nil {
+ return nil, fmt.Errorf("parsing length header %q: %w", lengthHeader, err)
+ }
+
+ if uint64(len(pkt)) != length {
+ return nil, fmt.Errorf("packet length %d does not match header length %d", len(pkt), length)
+ }
+
+ return pkt[4:], nil
+}
+
// IsFlush detects the special flush packet '0000'
func IsFlush(pkt []byte) bool {
return bytes.Equal(pkt, PktFlush())
diff --git a/internal/git/smudge/config.go b/internal/git/smudge/config.go
index 79ca4b1ba..c64c4f643 100644
--- a/internal/git/smudge/config.go
+++ b/internal/git/smudge/config.go
@@ -4,7 +4,9 @@ import (
"encoding/json"
"errors"
"fmt"
+ "path/filepath"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/v15/internal/helper/env"
)
@@ -13,6 +15,18 @@ import (
// value of this environment variable should be the JSON-encoded `Config` struct.
const ConfigEnvironmentKey = "GITALY_LFS_SMUDGE_CONFIG"
+// DriverType determines the type of the smudge filter.
+type DriverType int
+
+const (
+ // DriverTypeFilter indicates that the smudge filter is to be run once per object. This is
+ // the current default but will be phased out eventually in favor of DriverTypeProcess.
+ DriverTypeFilter = DriverType(0)
+ // DriverTypeProcess is a long-running smudge filter that can process multiple objects in
+ // one session. See gitattributes(5), "Long Running Filter Process".
+ DriverTypeProcess = DriverType(1)
+)
+
// Config is the configuration used to run gitaly-lfs-smudge. It must be injected via environment
// variables.
type Config struct {
@@ -24,6 +38,8 @@ type Config struct {
Gitlab config.Gitlab `json:"gitlab"`
// TLS contains configuration for setting up a TLS-encrypted connection to Rails.
TLS config.TLS `json:"tls"`
+ // DriverType is the type of the smudge driver that should be used.
+ DriverType DriverType `json:"driver_type"`
}
// ConfigFromEnvironment loads the Config structure from the set of given environment variables.
@@ -77,3 +93,21 @@ func (c Config) Environment() (string, error) {
return fmt.Sprintf("%s=%s", ConfigEnvironmentKey, marshalled), nil
}
+
+// GitConfiguration returns the Git configuration required to run the smudge filter.
+func (c Config) GitConfiguration(cfg config.Cfg) (git.ConfigPair, error) {
+ switch c.DriverType {
+ case DriverTypeFilter:
+ return git.ConfigPair{
+ Key: "filter.lfs.smudge",
+ Value: filepath.Join(cfg.BinDir, "gitaly-lfs-smudge"),
+ }, nil
+ case DriverTypeProcess:
+ return git.ConfigPair{
+ Key: "filter.lfs.process",
+ Value: filepath.Join(cfg.BinDir, "gitaly-lfs-smudge"),
+ }, nil
+ default:
+ return git.ConfigPair{}, fmt.Errorf("unknown driver type: %v", c.DriverType)
+ }
+}
diff --git a/internal/git/smudge/config_test.go b/internal/git/smudge/config_test.go
index d988bf16c..e2299b763 100644
--- a/internal/git/smudge/config_test.go
+++ b/internal/git/smudge/config_test.go
@@ -169,5 +169,5 @@ func TestConfig_Environment(t *testing.T) {
env, err := cfg.Environment()
require.NoError(t, err)
- require.Equal(t, `GITALY_LFS_SMUDGE_CONFIG={"gl_repository":"repo","gitlab":{"url":"https://example.com","relative_url_root":"gitlab","http_settings":{"read_timeout":1,"user":"user","password":"correcthorsebatterystaple","ca_file":"/ca/file","ca_path":"/ca/path","self_signed_cert":true},"secret_file":"/secret/path"},"tls":{"cert_path":"/cert/path","key_path":"/key/path"}}`, env)
+ require.Equal(t, `GITALY_LFS_SMUDGE_CONFIG={"gl_repository":"repo","gitlab":{"url":"https://example.com","relative_url_root":"gitlab","http_settings":{"read_timeout":1,"user":"user","password":"correcthorsebatterystaple","ca_file":"/ca/file","ca_path":"/ca/path","self_signed_cert":true},"secret_file":"/secret/path"},"tls":{"cert_path":"/cert/path","key_path":"/key/path"},"driver_type":0}`, env)
}
diff --git a/internal/gitaly/service/repository/archive.go b/internal/gitaly/service/repository/archive.go
index f08ee71a7..fd2497ac2 100644
--- a/internal/gitaly/service/repository/archive.go
+++ b/internal/gitaly/service/repository/archive.go
@@ -8,7 +8,6 @@ import (
"io"
"os"
"os/exec"
- "path/filepath"
"strings"
"github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
@@ -19,21 +18,19 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/storage"
"gitlab.com/gitlab-org/gitaly/v15/internal/helper"
"gitlab.com/gitlab-org/gitaly/v15/internal/log"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag"
"gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb"
"gitlab.com/gitlab-org/gitaly/v15/streamio"
- "gitlab.com/gitlab-org/labkit/correlation"
"google.golang.org/protobuf/proto"
)
type archiveParams struct {
- ctx context.Context
writer io.Writer
in *gitalypb.GetArchiveRequest
compressCmd *exec.Cmd
format string
archivePath string
exclude []string
- binDir string
loggingDir string
}
@@ -86,15 +83,13 @@ func (s *server) GetArchive(in *gitalypb.GetArchiveRequest, stream gitalypb.Repo
ctxlogrus.Extract(ctx).WithField("request_hash", requestHash(in)).Info("request details")
- return s.handleArchive(archiveParams{
- ctx: ctx,
+ return s.handleArchive(ctx, archiveParams{
writer: writer,
in: in,
compressCmd: compressCmd,
format: format,
archivePath: path,
exclude: exclude,
- binDir: s.binDir,
loggingDir: s.loggingCfg.Dir,
})
}
@@ -177,7 +172,7 @@ func findGetArchivePath(ctx context.Context, f *catfile.TreeEntryFinder, commitI
return true, nil
}
-func (s *server) handleArchive(p archiveParams) error {
+func (s *server) handleArchive(ctx context.Context, p archiveParams) error {
var args []string
pathspecs := make([]string, 0, len(p.exclude)+1)
if !p.in.GetElidePath() {
@@ -196,31 +191,40 @@ func (s *server) handleArchive(p archiveParams) error {
pathspecs = append(pathspecs, ":(exclude)"+exclude)
}
- smudgeCfg := smudge.Config{
- GlRepository: p.in.GetRepository().GetGlRepository(),
- Gitlab: s.cfg.Gitlab,
- TLS: s.cfg.TLS,
- }
+ var env []string
+ var config []git.ConfigPair
- smudgeEnv, err := smudgeCfg.Environment()
- if err != nil {
- return fmt.Errorf("setting up smudge environment: %w", err)
- }
+ if p.in.GetIncludeLfsBlobs() {
+ smudgeCfg := smudge.Config{
+ GlRepository: p.in.GetRepository().GetGlRepository(),
+ Gitlab: s.cfg.Gitlab,
+ TLS: s.cfg.TLS,
+ DriverType: smudge.DriverTypeFilter,
+ }
- env := []string{
- smudgeEnv,
- fmt.Sprintf("CORRELATION_ID=%s", correlation.ExtractFromContext(p.ctx)),
- fmt.Sprintf("%s=%s", log.GitalyLogDirEnvKey, p.loggingDir),
- }
+ if featureflag.GetArchiveLfsFilterProcess.IsEnabled(ctx) {
+ smudgeCfg.DriverType = smudge.DriverTypeProcess
+ }
- var config []git.ConfigPair
+ smudgeEnv, err := smudgeCfg.Environment()
+ if err != nil {
+ return fmt.Errorf("setting up smudge environment: %w", err)
+ }
- if p.in.GetIncludeLfsBlobs() {
- binary := filepath.Join(p.binDir, "gitaly-lfs-smudge")
- config = append(config, git.ConfigPair{Key: "filter.lfs.smudge", Value: binary})
+ smudgeGitConfig, err := smudgeCfg.GitConfiguration(s.cfg)
+ if err != nil {
+ return fmt.Errorf("setting up smudge gitconfig: %w", err)
+ }
+
+ env = append(
+ env,
+ smudgeEnv,
+ fmt.Sprintf("%s=%s", log.GitalyLogDirEnvKey, p.loggingDir),
+ )
+ config = append(config, smudgeGitConfig)
}
- archiveCommand, err := s.gitCmdFactory.New(p.ctx, p.in.GetRepository(), git.SubCmd{
+ archiveCommand, err := s.gitCmdFactory.New(ctx, p.in.GetRepository(), git.SubCmd{
Name: "archive",
Flags: []git.Option{git.ValueFlag{Name: "--format", Value: p.format}, git.ValueFlag{Name: "--prefix", Value: p.in.GetPrefix() + "/"}},
Args: args,
@@ -231,7 +235,7 @@ func (s *server) handleArchive(p archiveParams) error {
}
if p.compressCmd != nil {
- command, err := command.New(p.ctx, p.compressCmd, archiveCommand, p.writer, nil)
+ command, err := command.New(ctx, p.compressCmd, archiveCommand, p.writer, nil)
if err != nil {
return err
}
diff --git a/internal/gitaly/service/repository/archive_test.go b/internal/gitaly/service/repository/archive_test.go
index 995189aaf..caa64a00b 100644
--- a/internal/gitaly/service/repository/archive_test.go
+++ b/internal/gitaly/service/repository/archive_test.go
@@ -3,6 +3,7 @@ package repository
import (
"archive/zip"
"bytes"
+ "context"
"fmt"
"io"
"os"
@@ -16,6 +17,8 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/git/smudge"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitlab"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/helper/text"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testserver"
@@ -167,7 +170,12 @@ func TestGetArchiveSuccess(t *testing.T) {
}
}
-func TestGetArchiveWithLfsSuccess(t *testing.T) {
+func TestGetArchive_includeLfsBlobs(t *testing.T) {
+ t.Parallel()
+ testhelper.NewFeatureSets(featureflag.GetArchiveLfsFilterProcess).Run(t, testGetArchiveIncludeLfsBlobs)
+}
+
+func testGetArchiveIncludeLfsBlobs(t *testing.T, ctx context.Context) {
t.Parallel()
defaultOptions := gitlab.TestServerOptions{
SecretToken: secretToken,
@@ -191,7 +199,6 @@ func TestGetArchiveWithLfsSuccess(t *testing.T) {
client := newRepositoryClient(t, cfg, serverSocketPath)
cfg.SocketPath = serverSocketPath
- ctx := testhelper.Context(t)
repo, _ := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{
Seed: gittest.SeedGitLabTest,
})
@@ -469,13 +476,17 @@ func TestGetArchivePathInjection(t *testing.T) {
require.Zero(t, authorizedKeysFileStat.Size())
}
-func TestGetArchiveEnv(t *testing.T) {
+func TestGetArchive_environment(t *testing.T) {
+ t.Parallel()
+ testhelper.NewFeatureSets(featureflag.GetArchiveLfsFilterProcess).Run(t, testGetArchiveEnvironment)
+}
+
+func testGetArchiveEnvironment(t *testing.T, ctx context.Context) {
testhelper.SkipWithPraefect(t, "It's not possible to create repositories through the API with the git command overwritten by the script.")
t.Parallel()
cfg := testcfg.Build(t)
- ctx := testhelper.Context(t)
gitCmdFactory := gittest.NewInterceptingCommandFactory(ctx, t, cfg, func(git.ExecutionEnvironment) string {
return `#!/bin/sh
@@ -497,28 +508,55 @@ func TestGetArchiveEnv(t *testing.T) {
correlationID := correlation.SafeRandomID()
ctx = correlation.ContextWithCorrelation(ctx, correlationID)
- req := &gitalypb.GetArchiveRequest{
- Repository: repo,
- CommitId: commitID,
- }
-
smudgeCfg := smudge.Config{
GlRepository: gittest.GlRepository,
Gitlab: cfg.Gitlab,
TLS: cfg.TLS,
+ DriverType: smudge.DriverTypeFilter,
+ }
+
+ if featureflag.GetArchiveLfsFilterProcess.IsEnabled(ctx) {
+ smudgeCfg.DriverType = smudge.DriverTypeProcess
}
smudgeEnv, err := smudgeCfg.Environment()
require.NoError(t, err)
- stream, err := client.GetArchive(ctx, req)
- require.NoError(t, err)
+ for _, tc := range []struct {
+ desc string
+ includeLFSBlobs bool
+ expectedEnv []string
+ }{
+ {
+ desc: "without LFS blobs",
+ includeLFSBlobs: false,
+ expectedEnv: []string{
+ "CORRELATION_ID=" + correlationID,
+ },
+ },
+ {
+ desc: "with LFS blobs",
+ includeLFSBlobs: true,
+ expectedEnv: []string{
+ "CORRELATION_ID=" + correlationID,
+ smudgeEnv,
+ "GITALY_LOG_DIR=" + cfg.Logging.Dir,
+ },
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ stream, err := client.GetArchive(ctx, &gitalypb.GetArchiveRequest{
+ Repository: repo,
+ CommitId: commitID,
+ IncludeLfsBlobs: tc.includeLFSBlobs,
+ })
+ require.NoError(t, err)
- data, err := consumeArchive(stream)
- require.NoError(t, err)
- require.Contains(t, string(data), "CORRELATION_ID="+correlationID)
- require.Contains(t, string(data), "GITALY_LOG_DIR="+cfg.Logging.Dir)
- require.Contains(t, string(data), smudgeEnv)
+ data, err := consumeArchive(stream)
+ require.NoError(t, err)
+ require.ElementsMatch(t, tc.expectedEnv, strings.Split(text.ChompBytes(data), "\n"))
+ })
+ }
}
func compressedFileContents(t *testing.T, format gitalypb.GetArchiveRequest_Format, name string) []byte {
diff --git a/internal/metadata/featureflag/ff_get_archive_lfs_filter_process.go b/internal/metadata/featureflag/ff_get_archive_lfs_filter_process.go
new file mode 100644
index 000000000..489f42f3a
--- /dev/null
+++ b/internal/metadata/featureflag/ff_get_archive_lfs_filter_process.go
@@ -0,0 +1,5 @@
+package featureflag
+
+// GetArchiveLfsFilterProcess enables the use of a long-running filter process to smudge LFS
+// pointers to their contents.
+var GetArchiveLfsFilterProcess = NewFeatureFlag("get_archive_lfs_filter_process", false)