Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Chandler <wchandler@gitlab.com>2023-09-05 23:57:29 +0300
committerWill Chandler <wchandler@gitlab.com>2023-09-05 23:57:29 +0300
commit2d2653afc63b49a40cbee4dc2d5e4c2c700c1abe (patch)
tree16106dabfa4b665095d3cd499718764ca688d1f8
parentb9064e00593e573847ca1c6bb28f73b1b9331af0 (diff)
parent6a42099a2b2da4cd827daa2ffdb52e23a668a28b (diff)
Merge branch 'ps-go-based-tar-writer' into 'master'
archive: Create tarball with archive/tar package See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/6080 Merged-by: Will Chandler <wchandler@gitlab.com> Approved-by: Patrick Steinhardt <psteinhardt@gitlab.com> Approved-by: Will Chandler <wchandler@gitlab.com> Reviewed-by: Patrick Steinhardt <psteinhardt@gitlab.com> Reviewed-by: Pavlo Strokov <pstrokov@gitlab.com> Reviewed-by: Pavlo Strokov <pavlo.strokov@refurbed.com> Co-authored-by: Strokov Pavlo <strokovpavelsergeevich@gmail.com>
-rw-r--r--cmd/gitaly-hooks/hooks_test.go7
-rw-r--r--internal/archive/archive.go238
-rw-r--r--internal/archive/archive_test.go93
-rw-r--r--internal/archive/testhelper_test.go11
-rw-r--r--internal/git/gittest/command.go18
5 files changed, 359 insertions, 8 deletions
diff --git a/cmd/gitaly-hooks/hooks_test.go b/cmd/gitaly-hooks/hooks_test.go
index 934f1a3ce..25cd7937f 100644
--- a/cmd/gitaly-hooks/hooks_test.go
+++ b/cmd/gitaly-hooks/hooks_test.go
@@ -826,9 +826,10 @@ remote: error executing git hook
var stderr, stdout bytes.Buffer
gittest.ExecOpts(t, cfg, gittest.ExecConfig{
- Env: envForHooks(t, ctx, cfg, repo, glHookValues{GitalyLogDir: logDir}, proxyValues{}),
- Stdout: &stdout,
- Stderr: &stderr,
+ Env: envForHooks(t, ctx, cfg, repo, glHookValues{GitalyLogDir: logDir}, proxyValues{}),
+ Stdout: &stdout,
+ Stderr: &stderr,
+ ExpectedExitCode: 128,
}, args...)
require.Equal(t, "", stdout.String())
diff --git a/internal/archive/archive.go b/internal/archive/archive.go
index 20735edfe..4667f9ae0 100644
--- a/internal/archive/archive.go
+++ b/internal/archive/archive.go
@@ -1,14 +1,43 @@
package archive
import (
+ "archive/tar"
"context"
+ "errors"
"fmt"
"io"
+ "io/fs"
+ "os"
+ "path/filepath"
"runtime"
+ "strings"
"gitlab.com/gitlab-org/gitaly/v16/internal/command"
+ "golang.org/x/exp/slices"
)
+const (
+ // readDirEntriesPageSize is an amount of fs.DirEntry(s) to read
+ // from the opened file descriptor of the directory.
+ readDirEntriesPageSize = 32
+
+ // Below is a set of flags used to decide what to do with the file/directory
+ // found on the disk.
+ // decisionWrite means that the file/directory needs to be added to the archive
+ decisionWrite = iota
+ // decisionSkip means that the file/directory shouldn't be added to the archive
+ decisionSkip
+ // decisionStop means that nothing else left to be added to the archive
+ decisionStop
+
+ // tarFormat is a format to use for the archived files/directories/etc.
+ // The decision is made to use it as it is the latest version of the format.
+ // It allows sparse files, long file names, etc. that older formats doesn't support.
+ tarFormat = tar.FormatPAX
+)
+
+type decider func(string) int
+
// WriteTarball writes a tarball to an `io.Writer` for the provided path
// containing the specified archive members. Members should be specified
// relative to `path`.
@@ -32,3 +61,212 @@ func WriteTarball(ctx context.Context, writer io.Writer, path string, members ..
return nil
}
+
+// writeTarball creates a tar archive by flushing data into writer.
+// Files and folders to be included into archive needs to be provided as members.
+// If entry from members slice points to a folder all content inside it will be archived.
+// The empty folders will be archived as well.
+// It doesn't support sparse files efficiently, see https://github.com/golang/go/issues/22735
+func writeTarball(ctx context.Context, writer io.Writer, path string, members ...string) error {
+ // The function decides what to do with the entry: write, skip or finish archive
+ // creation if all members are added.
+ decide := func(candidate string) int {
+ if len(members) == 0 {
+ return decisionStop
+ }
+
+ idx := slices.Index(members, candidate)
+ if idx < 0 {
+ return decisionSkip
+ }
+
+ members = slices.Delete(members, idx, idx+1)
+ return decisionWrite
+ }
+
+ archWriter := tar.NewWriter(writer)
+ if err := walkDir(ctx, archWriter, path, "", decide); err != nil {
+ return fmt.Errorf("walk dir: %w", err)
+ }
+
+ if err := archWriter.Close(); err != nil {
+ return fmt.Errorf("closing archive writer: %w", err)
+ }
+
+ return nil
+}
+
+func walkDir(ctx context.Context, archWriter *tar.Writer, path, currentPrefix string, decide decider) error {
+ if cancelled(ctx) {
+ return ctx.Err()
+ }
+
+ dir, err := os.Open(path)
+ if err != nil {
+ return fmt.Errorf("open dir: %w", err)
+ }
+ defer func() { _ = dir.Close() }()
+
+ dirStat, err := dir.Stat()
+ if err != nil {
+ return fmt.Errorf("dir description: %w", err)
+ }
+
+ if dirStat.Mode().Type() != fs.ModeDir {
+ return errors.New("is not a dir")
+ }
+
+ for {
+ // Own implementation of the directory walker is used for optimization.
+ // fs.WalkDir() reads all the entries into memory at once and does the sorting.
+ // If there are a lot of files in the nested directories all that info will remain
+ // in the memory while the leaf is reached. In other words if there is 1000 files
+ // in the root and 1000 file in the nested directory of the root in one moment
+ // 2000 entries will remain in the memory for no reason.
+ dirEntries, err := dir.ReadDir(readDirEntriesPageSize)
+ if err != nil {
+ if errors.Is(err, io.EOF) {
+ return nil
+ }
+
+ return fmt.Errorf("chunked dir read: %w", err)
+ }
+
+ for _, dirEntry := range dirEntries {
+ if cancelled(ctx) {
+ return ctx.Err()
+ }
+
+ entryName := dirEntry.Name()
+ entryRelativePath := filepath.Join(currentPrefix, entryName)
+
+ decision := decide(entryRelativePath)
+ switch decision {
+ case decisionStop:
+ // Nothing needs to be added to the archive.
+ return nil
+ case decisionSkip:
+ // Current entry doesn't need to be added, but we need travers it if it is a directory.
+ if dirEntry.IsDir() {
+ if err := walkDir(ctx, archWriter, filepath.Join(path, entryName), entryRelativePath, decide); err != nil {
+ return fmt.Errorf("walk dir: %w", err)
+ }
+ }
+ case decisionWrite:
+ if dirEntry.IsDir() {
+ // As this is a dir we add all its content to the archive.
+ if err := tarDir(ctx, archWriter, filepath.Join(path, entryName), entryRelativePath, func(string) int { return decisionWrite }); err != nil {
+ return fmt.Errorf("write dir: %w", err)
+ }
+ continue
+ }
+
+ if err := tarFile(archWriter, path, entryRelativePath); err != nil {
+ return fmt.Errorf("write file: %w", err)
+ }
+ default:
+ return fmt.Errorf("unhandled decision: %d", decision)
+ }
+ }
+ }
+}
+
+func tarDir(ctx context.Context, archWriter *tar.Writer, path, currentPrefix string, decide decider) error {
+ // The current entry is a directory that needs to be added to the archive.
+ stat, err := os.Lstat(path)
+ if err != nil {
+ return err
+ }
+
+ link := ""
+ if stat.Mode()&fs.ModeSymlink != 0 {
+ link, err = os.Readlink(path)
+ if err != nil {
+ return fmt.Errorf("read link destination: %w", err)
+ }
+ }
+
+ header, err := tar.FileInfoHeader(stat, link)
+ if err != nil {
+ return fmt.Errorf("file info header: %w", err)
+ }
+ header.Name = strings.TrimSuffix(currentPrefix, "/") + "/"
+ header.Format = tarFormat
+
+ if err := archWriter.WriteHeader(header); err != nil {
+ return fmt.Errorf("write file info header: %w", err)
+ }
+
+ if err := walkDir(ctx, archWriter, path, currentPrefix, decide); err != nil {
+ return fmt.Errorf("walk dir: %w", err)
+ }
+
+ return nil
+}
+
+func tarFile(archWriter *tar.Writer, path, entryRelativePath string) error {
+ entryPath := filepath.Join(path, filepath.Base(entryRelativePath))
+
+ entryStat, err := os.Lstat(entryPath)
+ if err != nil {
+ return fmt.Errorf("file/link description: %w", err)
+ }
+
+ if entryStat.Mode()&fs.ModeSymlink != 0 {
+ targetPath, err := os.Readlink(entryPath)
+ if err != nil {
+ return fmt.Errorf("read link destination: %w", err)
+ }
+
+ header := &tar.Header{
+ Typeflag: tar.TypeSymlink,
+ Name: entryRelativePath,
+ Linkname: targetPath,
+ Format: tarFormat,
+ Mode: int64(entryStat.Mode().Perm()),
+ }
+
+ if err := archWriter.WriteHeader(header); err != nil {
+ return fmt.Errorf("write symlink file info header: %w", err)
+ }
+
+ return nil
+ }
+
+ curFile, err := os.Open(entryPath)
+ if err != nil {
+ return fmt.Errorf("open file: %w", err)
+ }
+ defer func() { _ = curFile.Close() }()
+
+ curFileStat, err := curFile.Stat()
+ if err != nil {
+ return fmt.Errorf("file description: %w", err)
+ }
+
+ header, err := tar.FileInfoHeader(curFileStat, curFileStat.Name())
+ if err != nil {
+ return fmt.Errorf("file info header: %w", err)
+ }
+ header.Name = entryRelativePath
+ header.Format = tarFormat
+
+ if err := archWriter.WriteHeader(header); err != nil {
+ return fmt.Errorf("write file info header: %w", err)
+ }
+
+ if _, err := io.Copy(archWriter, curFile); err != nil {
+ return fmt.Errorf("copy file: %w", err)
+ }
+
+ return nil
+}
+
+func cancelled(ctx context.Context) bool {
+ select {
+ case <-ctx.Done():
+ return true
+ default:
+ return false
+ }
+}
diff --git a/internal/archive/archive_test.go b/internal/archive/archive_test.go
new file mode 100644
index 000000000..39c2abbed
--- /dev/null
+++ b/internal/archive/archive_test.go
@@ -0,0 +1,93 @@
+package archive
+
+import (
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/git/gittest"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/helper/perm"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper/testcfg"
+)
+
+func TestWriteTarball(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+ tempDir := testhelper.TempDir(t)
+ srcDir := filepath.Join(tempDir, "src")
+
+ // regular file
+ writeFile(t, filepath.Join(srcDir, "a.txt"), []byte("a"))
+ // empty dir
+ require.NoError(t, os.Mkdir(filepath.Join(srcDir, "empty_dir"), perm.PublicDir))
+ // file with long name
+ writeFile(t, filepath.Join(srcDir, strings.Repeat("b", 150)+".txt"), []byte("b"))
+ // regular file that is not expected to be part of the archive (not in the members list)
+ writeFile(t, filepath.Join(srcDir, "excluded.txt"), []byte("excluded"))
+ // folder with multiple files all expected to be archived
+ nestedPath := filepath.Join(srcDir, "nested1")
+ for i := 0; i < readDirEntriesPageSize+1; i++ {
+ writeFile(t, filepath.Join(nestedPath, fmt.Sprintf("%d.txt", i)), []byte{byte(i)})
+ }
+ // nested file that is not expected to be part of the archive
+ writeFile(t, filepath.Join(srcDir, "nested2/nested/nested/c.txt"), []byte("c"))
+ // deeply nested file
+ writeFile(t, filepath.Join(srcDir, "nested2/nested/nested/nested/nested/d.txt"), []byte("d"))
+ // file that is used to create a symbolic link, is not expected to be part of the archive
+ writeFile(t, filepath.Join(srcDir, "nested3/target.txt"), []byte("target"))
+ // link to the file above
+ require.NoError(t, os.Symlink(filepath.Join(srcDir, "nested3/target.txt"), filepath.Join(srcDir, "link.to.target.txt")))
+ // directory that is a target of the symlink should not be archived
+ writeFile(t, filepath.Join(srcDir, "nested4/stub.txt"), []byte("symlinked"))
+ // link to the folder above
+ require.NoError(t, os.Symlink(filepath.Join(srcDir, "nested4"), filepath.Join(srcDir, "link.to.nested4")))
+
+ tarPath := filepath.Join(tempDir, "out.tar")
+ archFile, err := os.Create(tarPath)
+ require.NoError(t, err)
+ defer func() { _ = archFile.Close() }()
+
+ err = writeTarball(
+ ctx,
+ archFile,
+ srcDir,
+ "a.txt",
+ strings.Repeat("b", 150)+".txt",
+ "nested1",
+ "nested2/nested/nested/nested/nested/d.txt",
+ "link.to.target.txt",
+ "link.to.nested4",
+ )
+ require.NoError(t, err)
+ require.NoError(t, archFile.Close())
+
+ cfg := testcfg.Build(t)
+
+ dstDir := filepath.Join(tempDir, "dst")
+ require.NoError(t, os.Mkdir(dstDir, perm.PublicDir))
+ output, err := exec.Command("tar", "-xf", tarPath, "-C", dstDir).CombinedOutput()
+ require.NoErrorf(t, err, "%s", output)
+ diff := gittest.ExecOpts(t, cfg, gittest.ExecConfig{ExpectedExitCode: 1}, "diff", "--no-index", "--name-only", "--exit-code", dstDir, srcDir)
+ expected := strings.Join(
+ []string{
+ filepath.Join(srcDir, "excluded.txt"),
+ filepath.Join(srcDir, "nested2/nested/nested/c.txt"),
+ filepath.Join(srcDir, "nested3/target.txt"),
+ filepath.Join(srcDir, "nested4/stub.txt\n"),
+ },
+ "\n",
+ )
+ require.Equal(t, expected, string(diff))
+}
+
+func writeFile(tb testing.TB, path string, data []byte) {
+ tb.Helper()
+ require.NoError(tb, os.MkdirAll(filepath.Dir(path), perm.PrivateDir))
+ require.NoError(tb, os.WriteFile(path, data, perm.PrivateFile))
+}
diff --git a/internal/archive/testhelper_test.go b/internal/archive/testhelper_test.go
new file mode 100644
index 000000000..b8f9015cb
--- /dev/null
+++ b/internal/archive/testhelper_test.go
@@ -0,0 +1,11 @@
+package archive
+
+import (
+ "testing"
+
+ "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
+)
+
+func TestMain(m *testing.M) {
+ testhelper.Run(m)
+}
diff --git a/internal/git/gittest/command.go b/internal/git/gittest/command.go
index 7fd44151b..862c7822b 100644
--- a/internal/git/gittest/command.go
+++ b/internal/git/gittest/command.go
@@ -26,6 +26,9 @@ type ExecConfig struct {
// Env contains environment variables that should be appended to the spawned command's
// environment.
Env []string
+ // ExpectedExitCode is used to check the resulting exit code of the command. This can be used in case a command
+ // is expected to return an error code.
+ ExpectedExitCode int
}
// Exec runs a git command and returns the standard output, or fails.
@@ -59,13 +62,18 @@ func ExecOpts(tb testing.TB, cfg config.Cfg, execCfg ExecConfig, args ...string)
}
func handleExecErr(tb testing.TB, cfg config.Cfg, execCfg ExecConfig, args []string, err error) {
- if execCfg.Stderr == nil {
- tb.Log(cfg.Git.BinPath, args)
- if ee, ok := err.(*exec.ExitError); ok {
- tb.Logf("%s\n", ee.Stderr)
+ var stderr []byte
+ if ee, ok := err.(*exec.ExitError); ok {
+ if execCfg.ExpectedExitCode == ee.ExitCode() {
+ return
}
- tb.Fatal(err)
+ stderr = ee.Stderr
}
+ tb.Log(cfg.Git.BinPath, args)
+ if len(stderr) > 0 {
+ tb.Logf("%s\n", stderr)
+ }
+ tb.Fatal(err)
}
// NewCommand creates a new Git command ready for execution.