Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2021-06-10 12:10:39 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2021-06-14 12:42:19 +0300
commitca351860ac8fd1e398de26a783a1e7f37e57fe0a (patch)
tree986db54df776bcdc044a73e7ab48134e34040658
parentf365b27d869b4e96850a10bdd40b775ab37045b6 (diff)
blob: Implement pipeline step for git-rev-list(1)
In order to unify infrastructure when filtering down a list of revisions to a list of objects, we're creating a set of pipeline steps which first enumerate all objects, then extract information about these objects and finally read the whole objects. This pipeline will allow us to flexibly put together steps and filter down the object set at intermediate steps. This commit implements the first pipeline step, which enumerates all objects reachable from a set of revisions via git-rev-list(1).
-rw-r--r--internal/gitaly/service/blob/pipeline.go128
-rw-r--r--internal/gitaly/service/blob/pipeline_test.go168
2 files changed, 296 insertions, 0 deletions
diff --git a/internal/gitaly/service/blob/pipeline.go b/internal/gitaly/service/blob/pipeline.go
new file mode 100644
index 000000000..78efc6c4a
--- /dev/null
+++ b/internal/gitaly/service/blob/pipeline.go
@@ -0,0 +1,128 @@
+package blob
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "fmt"
+
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo"
+)
+
+// revlistResult is a result for the revlist pipeline step.
+type revlistResult struct {
+ // err is an error which occurred during execution of the pipeline.
+ err error
+
+ // oid is the object ID of an object printed by git-rev-list(1).
+ oid git.ObjectID
+ // objectName is the name of the object. This is typically the path of the object if it was
+ // traversed via either a tree or a commit. The path depends on the order in which objects
+ // are traversed: if e.g. two different trees refer to the same blob with different names,
+ // the blob's path depends on which of the trees was traversed first.
+ objectName []byte
+}
+
+// revlistConfig is configuration for the revlist pipeline step.
+type revlistConfig struct {
+ blobLimit int
+}
+
+// revlistOption is an option for the revlist pipeline step.
+type revlistOption func(cfg *revlistConfig)
+
+// withBlobLimit sets up a size limit for blobs. Only blobs whose size is smaller than this limit
+// will be returned by the pipeline step.
+func withBlobLimit(limit int) revlistOption {
+ return func(cfg *revlistConfig) {
+ cfg.blobLimit = limit
+ }
+}
+
+// revlist runs git-rev-list(1) with objects and object names enabled. The returned channel will
+// contain all object IDs listed by this command. Cancelling the context will cause the pipeline to
+// be cancelled, too.
+func revlist(
+ ctx context.Context,
+ repo *localrepo.Repo,
+ revisions []string,
+ options ...revlistOption,
+) <-chan revlistResult {
+ var cfg revlistConfig
+ for _, option := range options {
+ option(&cfg)
+ }
+
+ resultChan := make(chan revlistResult)
+ go func() {
+ defer close(resultChan)
+
+ sendResult := func(result revlistResult) bool {
+ select {
+ case resultChan <- result:
+ return false
+ case <-ctx.Done():
+ return true
+ }
+ }
+
+ flags := []git.Option{
+ git.Flag{Name: "--in-commit-order"},
+ git.Flag{Name: "--objects"},
+ git.Flag{Name: "--object-names"},
+ }
+ if cfg.blobLimit > 0 {
+ flags = append(flags, git.Flag{
+ Name: fmt.Sprintf("--filter=blob:limit=%d", cfg.blobLimit),
+ })
+ }
+
+ revlist, err := repo.Exec(ctx, git.SubCmd{
+ Name: "rev-list",
+ Flags: flags,
+ Args: revisions,
+ })
+ if err != nil {
+ sendResult(revlistResult{err: err})
+ return
+ }
+
+ scanner := bufio.NewScanner(revlist)
+ for scanner.Scan() {
+ // We need to copy the line here because we'll hand it over to the caller
+ // asynchronously, and the next call to `Scan()` will overwrite the buffer.
+ line := make([]byte, len(scanner.Bytes()))
+ copy(line, scanner.Bytes())
+
+ oidAndName := bytes.SplitN(line, []byte{' '}, 2)
+
+ result := revlistResult{
+ oid: git.ObjectID(oidAndName[0]),
+ }
+ if len(oidAndName) == 2 && len(oidAndName[1]) > 0 {
+ result.objectName = oidAndName[1]
+ }
+
+ if isDone := sendResult(result); isDone {
+ return
+ }
+ }
+
+ if err := scanner.Err(); err != nil {
+ sendResult(revlistResult{
+ err: fmt.Errorf("scanning rev-list output: %w", err),
+ })
+ return
+ }
+
+ if err := revlist.Wait(); err != nil {
+ sendResult(revlistResult{
+ err: fmt.Errorf("rev-list pipeline command: %w", err),
+ })
+ return
+ }
+ }()
+
+ return resultChan
+}
diff --git a/internal/gitaly/service/blob/pipeline_test.go b/internal/gitaly/service/blob/pipeline_test.go
new file mode 100644
index 000000000..5314e00d9
--- /dev/null
+++ b/internal/gitaly/service/blob/pipeline_test.go
@@ -0,0 +1,168 @@
+package blob
+
+import (
+ "errors"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/localrepo"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/testhelper/testcfg"
+)
+
+func TestRevlist(t *testing.T) {
+ cfg := testcfg.Build(t)
+
+ repoProto, _, cleanup := gittest.CloneRepoAtStorage(t, cfg, cfg.Storages[0], t.Name())
+ defer cleanup()
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ for _, tc := range []struct {
+ desc string
+ revisions []string
+ options []revlistOption
+ expectedResults []revlistResult
+ }{
+ {
+ desc: "single blob",
+ revisions: []string{
+ lfsPointer1,
+ },
+ expectedResults: []revlistResult{
+ {oid: lfsPointer1},
+ },
+ },
+ {
+ desc: "multiple blobs",
+ revisions: []string{
+ lfsPointer1,
+ lfsPointer2,
+ lfsPointer3,
+ lfsPointer4,
+ },
+ expectedResults: []revlistResult{
+ {oid: lfsPointer1},
+ {oid: lfsPointer2},
+ {oid: lfsPointer3},
+ {oid: lfsPointer4},
+ },
+ },
+ {
+ desc: "duplicated blob prints blob once only",
+ revisions: []string{
+ lfsPointer1,
+ lfsPointer1,
+ },
+ expectedResults: []revlistResult{
+ {oid: lfsPointer1},
+ },
+ },
+ {
+ desc: "tree results in object names",
+ revisions: []string{
+ "b95c0fad32f4361845f91d9ce4c1721b52b82793",
+ },
+ expectedResults: []revlistResult{
+ {oid: "b95c0fad32f4361845f91d9ce4c1721b52b82793"},
+ {oid: "93e123ac8a3e6a0b600953d7598af629dec7b735", objectName: []byte("branch-test.txt")},
+ },
+ },
+ {
+ desc: "revision range",
+ revisions: []string{
+ "^refs/heads/master~",
+ "refs/heads/master",
+ },
+ expectedResults: []revlistResult{
+ {oid: "1e292f8fedd741b75372e19097c76d327140c312"},
+ {oid: "07f8147e8e73aab6c935c296e8cdc5194dee729b"},
+ {oid: "ceb102b8d3f9a95c2eb979213e49f7cc1b23d56e", objectName: []byte("files")},
+ {oid: "2132d150328bd9334cc4e62a16a5d998a7e399b9", objectName: []byte("files/flat")},
+ {oid: "f3942dc8b824a2c9359e518d48e68f84461bd2f7", objectName: []byte("files/flat/path")},
+ {oid: "ea7249055466085d0a6c69951908ef47757e92f4", objectName: []byte("files/flat/path/correct")},
+ {oid: "c1c67abbaf91f624347bb3ae96eabe3a1b742478"},
+ },
+ },
+ {
+ // This is a tree object with multiple blobs. We cannot directly filter
+ // blobs given that Git will always print whatever's been provided on the
+ // command line. While we can already fix this with Git v2.32.0 via
+ // the new `--filter-provided` option, let's defer this fix to a later
+ // point. We demonstrate that this option is working by having the same test
+ // twice, once without and once with limit.
+ desc: "tree with multiple blobs without limit",
+ revisions: []string{
+ "79d5f98270ad677c86a7e1ab2baa922958565135",
+ },
+ expectedResults: []revlistResult{
+ {oid: "79d5f98270ad677c86a7e1ab2baa922958565135"},
+ {oid: "8af7f880ce38649fc49f66e3f38857bfbec3f0b7", objectName: []byte("feature-1.txt")},
+ {oid: "16ca0b267f82cd2f5ca1157dd162dae98745eab8", objectName: []byte("feature-2.txt")},
+ {oid: "0fb47f093f769008049a0b0976ac3fa6d6125033", objectName: []byte("hotfix-1.txt")},
+ {oid: "4ae6c5e14452a35d04156277ae63e8356eb17cae", objectName: []byte("hotfix-2.txt")},
+ {oid: "b988ffed90cb6a9b7f98a3686a933edb3c5d70c0", objectName: []byte("iso8859.txt")},
+ {oid: "570f8e1dfe8149c1d17002712310d43dfeb43159", objectName: []byte("russian.rb")},
+ {oid: "7a17968582c21c9153ec24c6a9d5f33592ad9103", objectName: []byte("test.txt")},
+ {oid: "f3064a3aa9c14277483f690250072e987e2c8356", objectName: []byte("\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88.txt")},
+ {oid: "3a26c18b02e843b459732e7ade7ab9a154a1002b", objectName: []byte("\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88.xls")},
+ },
+ },
+ {
+ // And now the second time we execute this test with a limit and see that we
+ // get less blobs as result.
+ desc: "tree with multiple blobs with limit",
+ revisions: []string{
+ "79d5f98270ad677c86a7e1ab2baa922958565135",
+ },
+ options: []revlistOption{
+ withBlobLimit(10),
+ },
+ expectedResults: []revlistResult{
+ {oid: "79d5f98270ad677c86a7e1ab2baa922958565135"},
+ {oid: "0fb47f093f769008049a0b0976ac3fa6d6125033", objectName: []byte("hotfix-1.txt")},
+ {oid: "4ae6c5e14452a35d04156277ae63e8356eb17cae", objectName: []byte("hotfix-2.txt")},
+ {oid: "b988ffed90cb6a9b7f98a3686a933edb3c5d70c0", objectName: []byte("iso8859.txt")},
+ },
+ },
+ {
+ desc: "invalid revision",
+ revisions: []string{
+ "refs/heads/does-not-exist",
+ },
+ expectedResults: []revlistResult{
+ {err: errors.New("rev-list pipeline command: exit status 128")},
+ },
+ },
+ {
+ desc: "mixed valid and invalid revision",
+ revisions: []string{
+ lfsPointer1,
+ "refs/heads/does-not-exist",
+ },
+ expectedResults: []revlistResult{
+ {err: errors.New("rev-list pipeline command: exit status 128")},
+ },
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ resultChan := revlist(ctx, repo, tc.revisions, tc.options...)
+
+ var results []revlistResult
+ for result := range resultChan {
+ // We're converting the error here to a plain un-nested error such
+ // that we don't have to replicate the complete error's structure.
+ if result.err != nil {
+ result.err = errors.New(result.err.Error())
+ }
+
+ results = append(results, result)
+ }
+
+ require.Equal(t, tc.expectedResults, results)
+ })
+ }
+}