Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZeger-Jan van de Weg <git@zjvandeweg.nl>2019-07-08 17:34:45 +0300
committerZeger-Jan van de Weg <git@zjvandeweg.nl>2019-07-08 17:34:45 +0300
commit006af5ab9eba56bb37373c569584db349586b3bb (patch)
treed4730739935229b4ff47ed58846227f36924c2bb
parentcbdd69b4ec5c0a3dcac6a717608c0d6c3ddeb6ef (diff)
parent719be513a581e87cf9fa29ac820ceb43c1fadf35 (diff)
Merge branch 'jv-hashfile' into 'master'
Add hashfile reader See merge request gitlab-org/gitaly!1347
-rw-r--r--internal/git/gitio/hashfile.go53
-rw-r--r--internal/git/gitio/hashfile_test.go55
-rw-r--r--internal/git/gitio/trailer.go80
-rw-r--r--internal/git/gitio/trailer_test.go72
4 files changed, 260 insertions, 0 deletions
diff --git a/internal/git/gitio/hashfile.go b/internal/git/gitio/hashfile.go
new file mode 100644
index 000000000..fc090b76b
--- /dev/null
+++ b/internal/git/gitio/hashfile.go
@@ -0,0 +1,53 @@
+package gitio
+
+import (
+ "bytes"
+ "crypto/sha1"
+ "fmt"
+ "hash"
+ "io"
+)
+
+// HashfileReader reads and verifies Git "hashfiles" as defined in
+// https://github.com/git/git/blob/v2.21.0/csum-file.h.
+type HashfileReader struct {
+ tr *TrailerReader
+ tee io.Reader
+ sum hash.Hash
+}
+
+// NewHashfileReader wraps r to return a reader that will omit the
+// trailing checksum. When the HashfileReader reaches EOF it will
+// transparently compare the actual checksum, as calculated while
+// reading, to the expected checksum provided by the trailer of r.
+func NewHashfileReader(r io.Reader) *HashfileReader {
+ sum := sha1.New()
+ tr := NewTrailerReader(r, sum.Size())
+ return &HashfileReader{
+ tr: tr,
+ tee: io.TeeReader(tr, sum),
+ sum: sum,
+ }
+}
+
+func (hr *HashfileReader) Read(p []byte) (int, error) {
+ n, err := hr.tee.Read(p)
+ if err == io.EOF {
+ return n, hr.validateChecksum()
+ }
+
+ return n, err
+}
+
+func (hr *HashfileReader) validateChecksum() error {
+ trailer, err := hr.tr.Trailer()
+ if err != nil {
+ return err
+ }
+
+ if actualSum := hr.sum.Sum(nil); !bytes.Equal(trailer, actualSum) {
+ return fmt.Errorf("hashfile checksum mismatch: expected %x got %x", trailer, actualSum)
+ }
+
+ return io.EOF
+}
diff --git a/internal/git/gitio/hashfile_test.go b/internal/git/gitio/hashfile_test.go
new file mode 100644
index 000000000..a7a317b91
--- /dev/null
+++ b/internal/git/gitio/hashfile_test.go
@@ -0,0 +1,55 @@
+package gitio
+
+import (
+ "io/ioutil"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestHashfileReader(t *testing.T) {
+ testCases := []struct {
+ desc string
+ in string
+ out string
+ fail bool
+ }{
+ {
+ desc: "simple input",
+ in: "hello\xaa\xf4\xc6\x1d\xdc\xc5\xe8\xa2\xda\xbe\xde\x0f\x3b\x48\x2c\xd9\xae\xa9\x43\x4d",
+ out: "hello",
+ },
+ {
+ desc: "empty input",
+ in: "\xda\x39\xa3\xee\x5e\x6b\x4b\x0d\x32\x55\xbf\xef\x95\x60\x18\x90\xaf\xd8\x07\x09",
+ out: "",
+ },
+ {
+ desc: "checksum mismatch",
+ in: "hello\xff\xf4\xc6\x1d\xdc\xc5\xe8\xa2\xda\xbe\xde\x0f\x3b\x48\x2c\xd9\xae\xa9\x43\x4d",
+ out: "hello",
+ fail: true,
+ },
+ {
+ desc: "input too short",
+ in: "hello world",
+ out: "",
+ fail: true,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ r := NewHashfileReader(strings.NewReader(tc.in))
+ out, err := ioutil.ReadAll(r)
+ if tc.fail {
+ require.Error(t, err, "invalid input should cause error")
+ return
+ }
+
+ require.NoError(t, err, "valid input")
+ require.Equal(t, tc.out, string(out), "compare output")
+ })
+ }
+}
diff --git a/internal/git/gitio/trailer.go b/internal/git/gitio/trailer.go
new file mode 100644
index 000000000..fd60ca8df
--- /dev/null
+++ b/internal/git/gitio/trailer.go
@@ -0,0 +1,80 @@
+package gitio
+
+import (
+ "fmt"
+ "io"
+)
+
+// TrailerReader models the behavior of Git hashfiles where the last N
+// bytes of the underlying reader are not part of the content.
+// TrailerReader acts like an io.Reader but will always hold back the
+// last N bytes. Once the underlying reader has reached EOF, the trailer
+// (the last N bytes) can be retrieved with the Trailer() method.
+type TrailerReader struct {
+ r io.Reader
+ start, end int
+ trailerSize int
+ buf []byte
+ atEOF bool
+}
+
+// NewTrailerReader returns a new TrailerReader. The returned
+// TrailerReader will never return the last trailerSize bytes of r; to
+// get to those bytes, first read the TrailerReader to EOF and then call
+// Trailer().
+func NewTrailerReader(r io.Reader, trailerSize int) *TrailerReader {
+ const bufSize = 8192
+ if trailerSize >= bufSize {
+ panic("trailerSize too large for TrailerReader")
+ }
+
+ return &TrailerReader{
+ r: r,
+ trailerSize: trailerSize,
+ buf: make([]byte, bufSize),
+ }
+}
+
+// Trailer yields the last trailerSize bytes of the underlying reader of
+// tr. If the underlying reader has not reached EOF yet Trailer will
+// return an error.
+func (tr *TrailerReader) Trailer() ([]byte, error) {
+ bufLen := tr.end - tr.start
+ if !tr.atEOF || bufLen > tr.trailerSize {
+ return nil, fmt.Errorf("cannot get trailer before reader has reached EOF")
+ }
+
+ if bufLen < tr.trailerSize {
+ return nil, fmt.Errorf("not enough bytes to yield trailer")
+ }
+
+ return tr.buf[tr.end-tr.trailerSize : tr.end], nil
+}
+
+func (tr *TrailerReader) Read(p []byte) (int, error) {
+ if bufLen := tr.end - tr.start; !tr.atEOF && bufLen <= tr.trailerSize {
+ copy(tr.buf, tr.buf[tr.start:tr.end])
+ tr.start = 0
+ tr.end = bufLen
+
+ n, err := tr.r.Read(tr.buf[tr.end:])
+ if err != nil {
+ if err != io.EOF {
+ return 0, err
+ }
+ tr.atEOF = true
+ }
+ tr.end += n
+ }
+
+ if tr.end-tr.start <= tr.trailerSize {
+ if tr.atEOF {
+ return 0, io.EOF
+ }
+ return 0, nil
+ }
+
+ n := copy(p, tr.buf[tr.start:tr.end-tr.trailerSize])
+ tr.start += n
+ return n, nil
+}
diff --git a/internal/git/gitio/trailer_test.go b/internal/git/gitio/trailer_test.go
new file mode 100644
index 000000000..d01a97119
--- /dev/null
+++ b/internal/git/gitio/trailer_test.go
@@ -0,0 +1,72 @@
+package gitio
+
+import (
+ "io/ioutil"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestTrailerReaderSuccess(t *testing.T) {
+ const trailerLen = 5
+
+ testCases := []struct {
+ desc string
+ in string
+ out string
+ trailer string
+ }{
+ {
+ desc: "large input",
+ in: strings.Repeat("hello", 4000) + "world",
+ out: strings.Repeat("hello", 4000),
+ trailer: "world",
+ },
+ {
+ desc: "small input",
+ in: "hello world",
+ out: "hello ",
+ trailer: "world",
+ },
+ {
+ desc: "smallest input",
+ in: "world",
+ out: "",
+ trailer: "world",
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ tr := NewTrailerReader(strings.NewReader(tc.in), trailerLen)
+ require.Len(t, tc.trailer, trailerLen, "test case trailer sanity check")
+
+ out, err := ioutil.ReadAll(tr)
+ require.NoError(t, err, "read all")
+ require.Equal(t, tc.out, string(out), "compare output")
+
+ trailer, err := tr.Trailer()
+ require.NoError(t, err, "trailer error")
+ require.Equal(t, tc.trailer, string(trailer), "compare trailer")
+ })
+ }
+}
+
+func TestTrailerReaderFail(t *testing.T) {
+ const in = "hello world"
+ const trailerLen = 100
+ require.True(t, len(in) < trailerLen, "sanity check")
+
+ tr := NewTrailerReader(strings.NewReader(in), trailerLen)
+
+ _, err := tr.Trailer()
+ require.Error(t, err, "Trailer() should fail when called too early")
+
+ out, err := ioutil.ReadAll(tr)
+ require.NoError(t, err, "read")
+ require.Empty(t, out, "read output")
+
+ _, err = tr.Trailer()
+ require.Error(t, err, "Trailer() should fail if there is not enough data")
+}