diff options
author | Zeger-Jan van de Weg <git@zjvandeweg.nl> | 2019-07-08 17:34:45 +0300 |
---|---|---|
committer | Zeger-Jan van de Weg <git@zjvandeweg.nl> | 2019-07-08 17:34:45 +0300 |
commit | 006af5ab9eba56bb37373c569584db349586b3bb (patch) | |
tree | d4730739935229b4ff47ed58846227f36924c2bb | |
parent | cbdd69b4ec5c0a3dcac6a717608c0d6c3ddeb6ef (diff) | |
parent | 719be513a581e87cf9fa29ac820ceb43c1fadf35 (diff) |
Merge branch 'jv-hashfile' into 'master'
Add hashfile reader
See merge request gitlab-org/gitaly!1347
-rw-r--r-- | internal/git/gitio/hashfile.go | 53 | ||||
-rw-r--r-- | internal/git/gitio/hashfile_test.go | 55 | ||||
-rw-r--r-- | internal/git/gitio/trailer.go | 80 | ||||
-rw-r--r-- | internal/git/gitio/trailer_test.go | 72 |
4 files changed, 260 insertions, 0 deletions
diff --git a/internal/git/gitio/hashfile.go b/internal/git/gitio/hashfile.go new file mode 100644 index 000000000..fc090b76b --- /dev/null +++ b/internal/git/gitio/hashfile.go @@ -0,0 +1,53 @@ +package gitio + +import ( + "bytes" + "crypto/sha1" + "fmt" + "hash" + "io" +) + +// HashfileReader reads and verifies Git "hashfiles" as defined in +// https://github.com/git/git/blob/v2.21.0/csum-file.h. +type HashfileReader struct { + tr *TrailerReader + tee io.Reader + sum hash.Hash +} + +// NewHashfileReader wraps r to return a reader that will omit the +// trailing checksum. When the HashfileReader reaches EOF it will +// transparently compare the actual checksum, as calculated while +// reading, to the expected checksum provided by the trailer of r. +func NewHashfileReader(r io.Reader) *HashfileReader { + sum := sha1.New() + tr := NewTrailerReader(r, sum.Size()) + return &HashfileReader{ + tr: tr, + tee: io.TeeReader(tr, sum), + sum: sum, + } +} + +func (hr *HashfileReader) Read(p []byte) (int, error) { + n, err := hr.tee.Read(p) + if err == io.EOF { + return n, hr.validateChecksum() + } + + return n, err +} + +func (hr *HashfileReader) validateChecksum() error { + trailer, err := hr.tr.Trailer() + if err != nil { + return err + } + + if actualSum := hr.sum.Sum(nil); !bytes.Equal(trailer, actualSum) { + return fmt.Errorf("hashfile checksum mismatch: expected %x got %x", trailer, actualSum) + } + + return io.EOF +} diff --git a/internal/git/gitio/hashfile_test.go b/internal/git/gitio/hashfile_test.go new file mode 100644 index 000000000..a7a317b91 --- /dev/null +++ b/internal/git/gitio/hashfile_test.go @@ -0,0 +1,55 @@ +package gitio + +import ( + "io/ioutil" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestHashfileReader(t *testing.T) { + testCases := []struct { + desc string + in string + out string + fail bool + }{ + { + desc: "simple input", + in: "hello\xaa\xf4\xc6\x1d\xdc\xc5\xe8\xa2\xda\xbe\xde\x0f\x3b\x48\x2c\xd9\xae\xa9\x43\x4d", + out: "hello", + }, + { + desc: "empty input", + in: "\xda\x39\xa3\xee\x5e\x6b\x4b\x0d\x32\x55\xbf\xef\x95\x60\x18\x90\xaf\xd8\x07\x09", + out: "", + }, + { + desc: "checksum mismatch", + in: "hello\xff\xf4\xc6\x1d\xdc\xc5\xe8\xa2\xda\xbe\xde\x0f\x3b\x48\x2c\xd9\xae\xa9\x43\x4d", + out: "hello", + fail: true, + }, + { + desc: "input too short", + in: "hello world", + out: "", + fail: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + r := NewHashfileReader(strings.NewReader(tc.in)) + out, err := ioutil.ReadAll(r) + if tc.fail { + require.Error(t, err, "invalid input should cause error") + return + } + + require.NoError(t, err, "valid input") + require.Equal(t, tc.out, string(out), "compare output") + }) + } +} diff --git a/internal/git/gitio/trailer.go b/internal/git/gitio/trailer.go new file mode 100644 index 000000000..fd60ca8df --- /dev/null +++ b/internal/git/gitio/trailer.go @@ -0,0 +1,80 @@ +package gitio + +import ( + "fmt" + "io" +) + +// TrailerReader models the behavior of Git hashfiles where the last N +// bytes of the underlying reader are not part of the content. +// TrailerReader acts like an io.Reader but will always hold back the +// last N bytes. Once the underlying reader has reached EOF, the trailer +// (the last N bytes) can be retrieved with the Trailer() method. +type TrailerReader struct { + r io.Reader + start, end int + trailerSize int + buf []byte + atEOF bool +} + +// NewTrailerReader returns a new TrailerReader. The returned +// TrailerReader will never return the last trailerSize bytes of r; to +// get to those bytes, first read the TrailerReader to EOF and then call +// Trailer(). +func NewTrailerReader(r io.Reader, trailerSize int) *TrailerReader { + const bufSize = 8192 + if trailerSize >= bufSize { + panic("trailerSize too large for TrailerReader") + } + + return &TrailerReader{ + r: r, + trailerSize: trailerSize, + buf: make([]byte, bufSize), + } +} + +// Trailer yields the last trailerSize bytes of the underlying reader of +// tr. If the underlying reader has not reached EOF yet Trailer will +// return an error. +func (tr *TrailerReader) Trailer() ([]byte, error) { + bufLen := tr.end - tr.start + if !tr.atEOF || bufLen > tr.trailerSize { + return nil, fmt.Errorf("cannot get trailer before reader has reached EOF") + } + + if bufLen < tr.trailerSize { + return nil, fmt.Errorf("not enough bytes to yield trailer") + } + + return tr.buf[tr.end-tr.trailerSize : tr.end], nil +} + +func (tr *TrailerReader) Read(p []byte) (int, error) { + if bufLen := tr.end - tr.start; !tr.atEOF && bufLen <= tr.trailerSize { + copy(tr.buf, tr.buf[tr.start:tr.end]) + tr.start = 0 + tr.end = bufLen + + n, err := tr.r.Read(tr.buf[tr.end:]) + if err != nil { + if err != io.EOF { + return 0, err + } + tr.atEOF = true + } + tr.end += n + } + + if tr.end-tr.start <= tr.trailerSize { + if tr.atEOF { + return 0, io.EOF + } + return 0, nil + } + + n := copy(p, tr.buf[tr.start:tr.end-tr.trailerSize]) + tr.start += n + return n, nil +} diff --git a/internal/git/gitio/trailer_test.go b/internal/git/gitio/trailer_test.go new file mode 100644 index 000000000..d01a97119 --- /dev/null +++ b/internal/git/gitio/trailer_test.go @@ -0,0 +1,72 @@ +package gitio + +import ( + "io/ioutil" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestTrailerReaderSuccess(t *testing.T) { + const trailerLen = 5 + + testCases := []struct { + desc string + in string + out string + trailer string + }{ + { + desc: "large input", + in: strings.Repeat("hello", 4000) + "world", + out: strings.Repeat("hello", 4000), + trailer: "world", + }, + { + desc: "small input", + in: "hello world", + out: "hello ", + trailer: "world", + }, + { + desc: "smallest input", + in: "world", + out: "", + trailer: "world", + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + tr := NewTrailerReader(strings.NewReader(tc.in), trailerLen) + require.Len(t, tc.trailer, trailerLen, "test case trailer sanity check") + + out, err := ioutil.ReadAll(tr) + require.NoError(t, err, "read all") + require.Equal(t, tc.out, string(out), "compare output") + + trailer, err := tr.Trailer() + require.NoError(t, err, "trailer error") + require.Equal(t, tc.trailer, string(trailer), "compare trailer") + }) + } +} + +func TestTrailerReaderFail(t *testing.T) { + const in = "hello world" + const trailerLen = 100 + require.True(t, len(in) < trailerLen, "sanity check") + + tr := NewTrailerReader(strings.NewReader(in), trailerLen) + + _, err := tr.Trailer() + require.Error(t, err, "Trailer() should fail when called too early") + + out, err := ioutil.ReadAll(tr) + require.NoError(t, err, "read") + require.Empty(t, out, "read output") + + _, err = tr.Trailer() + require.Error(t, err, "Trailer() should fail if there is not enough data") +} |