Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Vosmaer <jacob@gitlab.com>2019-07-01 12:38:00 +0300
committerJacob Vosmaer <jacob@gitlab.com>2019-07-01 12:38:00 +0300
commitc52f057e907f7b59657d672452aee8f6acd2b7c7 (patch)
treef03c7e91af6186b1dc96097b0186a5a84f42ee20
parent2760a2bbd607b8eb957c1c6e18c177a5e2c753ee (diff)
Move more stuff to packfile
-rw-r--r--.gitignore1
-rw-r--r--internal/git/packfile/bitmap.go99
-rw-r--r--internal/git/packfile/index.go259
-rw-r--r--parse-bitmap.go298
4 files changed, 382 insertions, 275 deletions
diff --git a/.gitignore b/.gitignore
index b7fbb0b4c..5d5d76db0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,3 +24,4 @@ git-env
gitaly.pid
/vendor/github.com/libgit2/git2go/vendor
/vendor
+/parse-bitmap
diff --git a/internal/git/packfile/bitmap.go b/internal/git/packfile/bitmap.go
index 971ed5405..6a5d40c55 100644
--- a/internal/git/packfile/bitmap.go
+++ b/internal/git/packfile/bitmap.go
@@ -1,18 +1,106 @@
package packfile
import (
+ "bufio"
"encoding/binary"
+ "encoding/hex"
"fmt"
"io"
"math"
+ "os"
+
+ "gitlab.com/gitlab-org/gitaly/internal/git/gitio"
)
+type Bitmap struct {
+ Commits *EWAH
+ Trees *EWAH
+ Blobs *EWAH
+ Tags *EWAH
+ nBitmapCommits uint32
+}
+
+func (idx *Index) LoadBitmap() error {
+ f, err := os.Open(idx.packBase + ".bitmap")
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ r := bufio.NewReader(gitio.NewHashfileReader(f))
+
+ bmp := &Bitmap{}
+ bmp.nBitmapCommits, err = idx.parseBitmapHeader(r)
+ if err != nil {
+ return err
+ }
+
+ for _, ptr := range []**EWAH{&bmp.Commits, &bmp.Trees, &bmp.Blobs, &bmp.Tags} {
+ *ptr, err = ReadEWAH(r)
+ if err != nil {
+ return err
+ }
+ }
+ for i := uint32(0); i < bmp.nBitmapCommits; i++ {
+ const entryHeaderLen = 6
+ if _, err := r.Discard(entryHeaderLen); err != nil {
+ return err
+ }
+
+ if _, err := ReadEWAH(r); err != nil {
+ return err
+ }
+ }
+
+ // TODO handle hashcache
+
+ if _, err := r.Peek(1); err != io.EOF {
+ return fmt.Errorf("expected EOF, got %v", err)
+ }
+
+ idx.Bitmap = bmp
+ return nil
+}
+
+func (idx *Index) parseBitmapHeader(r io.Reader) (uint32, error) {
+ const headerLen = 32
+ header, err := readN(r, headerLen)
+ if err != nil {
+ return 0, err
+ }
+
+ const sig = "BITM\x00\x01"
+ if actualSig := string(header[:len(sig)]); actualSig != sig {
+ return 0, fmt.Errorf("unexpected signature %q", actualSig)
+ }
+ header = header[len(sig):]
+
+ const flagLen = 2
+ flags := binary.BigEndian.Uint16(header[:flagLen])
+ const minFlags = 1
+ if flags&minFlags < minFlags {
+ return 0, fmt.Errorf("invalid flags %x", flags)
+ }
+ header = header[flagLen:]
+
+ count := binary.BigEndian.Uint32(header[:4])
+ header = header[4:]
+
+ if s := hex.EncodeToString(header); s != idx.ID {
+ return 0, fmt.Errorf("unexpected pack ID in bitmap header: %s", s)
+ }
+
+ return count, nil
+}
+
type EWAH struct {
raw []byte
bits uint32
words uint32
}
+const ewahTrailerLen = 4
+
func ReadEWAH(r io.Reader) (*EWAH, error) {
header := make([]byte, 8)
if _, err := io.ReadFull(r, header); err != nil {
@@ -24,7 +112,7 @@ func ReadEWAH(r io.Reader) (*EWAH, error) {
words: binary.BigEndian.Uint32(header[4:]),
}
- rawSize := int64(e.words)*8 + 4
+ rawSize := int64(e.words)*8 + ewahTrailerLen
if rawSize > math.MaxInt32 {
return nil, fmt.Errorf("EWAH bitmap does not fit in Go slice")
}
@@ -40,8 +128,9 @@ func ReadEWAH(r io.Reader) (*EWAH, error) {
func (e *EWAH) Scan(f func(uint32) error) error {
bit := uint32(0)
- lastByte := int(8 * e.words)
- for cursor := 0; cursor < lastByte; {
+ cursor := 0
+ lastByte := len(e.raw) - ewahTrailerLen
+ for cursor < lastByte && bit < e.bits {
header := binary.BigEndian.Uint64(e.raw[cursor : cursor+8])
cursor += 8
@@ -77,5 +166,9 @@ func (e *EWAH) Scan(f func(uint32) error) error {
}
}
+ if cursor != lastByte {
+ return fmt.Errorf("expected bitmap to use %d bytes, but consumed only %d", lastByte, cursor)
+ }
+
return nil
}
diff --git a/internal/git/packfile/index.go b/internal/git/packfile/index.go
new file mode 100644
index 000000000..426bbac33
--- /dev/null
+++ b/internal/git/packfile/index.go
@@ -0,0 +1,259 @@
+package packfile
+
+import (
+ "bufio"
+ "crypto/sha1"
+ "encoding/binary"
+ "encoding/hex"
+ "fmt"
+ "io"
+ "math"
+ "os"
+ "regexp"
+ "sort"
+ "strconv"
+
+ "gitlab.com/gitlab-org/gitaly/internal/git/gitio"
+)
+
+const sumSize = sha1.Size
+
+var (
+ idxFileRegex = regexp.MustCompile(`\A(.*/pack-)([0-9a-f]{40})\.idx\z`)
+)
+
+type Index struct {
+ ID string
+ packBase string
+ Objects []*Object
+ fanOut [256]int
+ *Bitmap
+}
+
+func ReadIndex(idxPath string) (*Index, error) {
+ reMatches := idxFileRegex.FindStringSubmatch(idxPath)
+ if len(reMatches) == 0 {
+ return nil, fmt.Errorf("invalid idx filename: %q", idxPath)
+ }
+
+ idx := &Index{
+ packBase: reMatches[1] + reMatches[2],
+ ID: reMatches[2],
+ }
+
+ f, err := os.Open(idx.packBase + ".idx")
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ r := bufio.NewReader(gitio.NewHashfileReader(f))
+
+ const sig = "\377tOc\x00\x00\x00\x02"
+ actualSig, err := readN(r, len(sig))
+ if s := string(actualSig); s != sig {
+ return nil, fmt.Errorf("unexpected idx signature %q", s)
+ }
+
+ count, err := idx.nPackObjects()
+ if err != nil {
+ return nil, err
+ }
+
+ // TODO use a data structure other than a Go slice to hold the index
+ // entries? Go slices use int as their index type, and int may not be
+ // able to hold MaxUint32.
+ if count > math.MaxInt32 {
+ return nil, fmt.Errorf("too many objects in to fit in Go slice: %d", count)
+ }
+ idx.Objects = make([]*Object, count)
+
+ for i := range idx.fanOut {
+ n, err := readUint32(r)
+ if err != nil {
+ return nil, err
+ }
+
+ idx.fanOut[i] = int(n) // cast is safe because we know n<=len(idx.Objects)
+ }
+
+ buf := make([]byte, sumSize)
+ for i := 0; i < len(idx.Objects); i++ {
+ if _, err := io.ReadFull(r, buf); err != nil {
+ return nil, err
+ }
+ idx.Objects[i] = &Object{OID: hex.EncodeToString(buf)}
+ }
+
+ // Discard CRC32 values (one for each object)
+ for i := 0; i < len(idx.Objects); i++ {
+ if _, err := r.Discard(4); err != nil {
+ return nil, err
+ }
+ }
+
+ // Read 4-byte offsets
+ has8ByteOffsets := false
+ for i := 0; i < len(idx.Objects); i++ {
+ offset, err := readUint32(r)
+ if err != nil {
+ return nil, err
+ }
+
+ const mask = 1 << 31
+ if offset&mask == mask {
+ has8ByteOffsets = true
+ continue
+ }
+
+ idx.Objects[i].Offset = uint64(offset)
+ }
+
+ if has8ByteOffsets {
+ for i := 0; i < len(idx.Objects); i++ {
+ offset, err := readUint64(r)
+ if err != nil {
+ return nil, err
+ }
+
+ // TODO Not clear if all 8-byte offsets are populated, or only those that
+ // don't fit into 4 bytes.
+ if offset > 0 {
+ idx.Objects[i].Offset = offset
+ }
+ }
+ }
+
+ idxPackID, err := readN(r, sumSize)
+ if err != nil {
+ return nil, err
+ }
+
+ if s := hex.EncodeToString(idxPackID); s != idx.ID {
+ return nil, fmt.Errorf("unexpected pack ID in idx: %s", s)
+ }
+
+ if _, err := r.Peek(1); err != io.EOF {
+ if err == nil {
+ err = fmt.Errorf("unexpected trailing data, expected EOF")
+ }
+ return nil, err
+ }
+
+ return idx, nil
+}
+
+func (idx *Index) GetObject(oid string) (*Object, bool) {
+ if len(oid) < 2 {
+ return nil, false
+ }
+
+ radix64, err := strconv.ParseInt(oid[:2], 16, 0)
+ if err != nil {
+ return nil, false
+ }
+
+ radix := int(radix64)
+ last := idx.fanOut[radix]
+ first := 0
+ if radix > 0 {
+ first = idx.fanOut[radix-1]
+ }
+
+ objRange := idx.Objects[first:last]
+ objIdx := sort.Search(len(objRange), func(i int) bool {
+ return objRange[i].OID >= oid
+ })
+ if objIdx == len(objRange) {
+ return nil, false
+ }
+ obj := objRange[objIdx]
+
+ if obj.OID != oid {
+ return nil, false
+ }
+
+ return obj, true
+}
+
+func (idx *Index) nPackObjects() (uint32, error) {
+ f, err := idx.openPack()
+ if err != nil {
+ return 0, err
+ }
+ defer f.Close()
+
+ const headerLen = 12
+ header, err := readN(f, headerLen)
+ if err != nil {
+ return 0, err
+ }
+
+ const sig = "PACK\x00\x00\x00\x02"
+ if s := string(header[:len(sig)]); s != sig {
+ return 0, fmt.Errorf("unexpected pack signature %q", s)
+ }
+ header = header[len(sig):]
+
+ return binary.BigEndian.Uint32(header), nil
+}
+
+func (idx *Index) openPack() (f *os.File, err error) {
+ packPath := idx.packBase + ".pack"
+ f, err = os.Open(packPath)
+ if err != nil {
+ return nil, err
+ }
+
+ defer func(f *os.File) {
+ if err != nil {
+ f.Close()
+ }
+ }(f) // Bind f early so that we can do "return nil, err".
+
+ if _, err := f.Seek(-sumSize, io.SeekEnd); err != nil {
+ return nil, err
+ }
+
+ sum, err := readN(f, sumSize)
+ if err != nil {
+ return nil, err
+ }
+
+ if s := hex.EncodeToString(sum); s != idx.ID {
+ return nil, fmt.Errorf("unexpected trailing checksum in .pack: %s", s)
+ }
+
+ if _, err := f.Seek(0, io.SeekStart); err != nil {
+ return nil, err
+ }
+
+ return f, nil
+}
+
+func readUint32(r io.Reader) (uint32, error) {
+ buf, err := readN(r, 4)
+ if err != nil {
+ return 0, err
+ }
+
+ return binary.BigEndian.Uint32(buf), nil
+}
+
+func readUint64(r io.Reader) (uint64, error) {
+ buf, err := readN(r, 8)
+ if err != nil {
+ return 0, err
+ }
+
+ return binary.BigEndian.Uint64(buf), nil
+}
+
+func readN(r io.Reader, n int) ([]byte, error) {
+ buf := make([]byte, n)
+ if _, err := io.ReadFull(r, buf); err != nil {
+ return nil, err
+ }
+
+ return buf, nil
+}
diff --git a/parse-bitmap.go b/parse-bitmap.go
index a9773e5ca..9163dac23 100644
--- a/parse-bitmap.go
+++ b/parse-bitmap.go
@@ -2,18 +2,14 @@ package main
import (
"bufio"
- "encoding/binary"
- "encoding/hex"
"flag"
"fmt"
- "io"
"log"
- "math"
"os"
"regexp"
"sort"
+ "time"
- "gitlab.com/gitlab-org/gitaly/internal/git/gitio"
"gitlab.com/gitlab-org/gitaly/internal/git/packfile"
)
@@ -35,65 +31,54 @@ func main() {
}
func _main(packIdx string) error {
- reMatches := idxFileRegex.FindStringSubmatch(packIdx)
- if len(reMatches) == 0 {
- return fmt.Errorf("invalid idx filename: %q", packIdx)
- }
-
- packBase := reMatches[1] + reMatches[2]
- packID := reMatches[2]
-
- log.Print("parsing idx")
- idxObjects, err := readIndex(packBase, packID)
+ start := time.Now()
+ idx, err := packfile.ReadIndex(packIdx)
if err != nil {
return err
}
- log.Printf("found %d objects", len(idxObjects))
+ log.Printf("loaded idx with %d objects in %v", len(idx.Objects), time.Since(start))
- f, err := os.Open(packBase + ".bitmap")
- if err != nil {
- return err
- }
- defer f.Close()
-
- r := bufio.NewReader(gitio.NewHashfileReader(f))
-
- nBitmapCommits, err := parseBitmapHeader(r, packID)
- if err != nil {
+ start = time.Now()
+ if err := idx.LoadBitmap(); err != nil {
return err
}
+ log.Printf("loaded bitmap in %v", time.Since(start))
+ start = time.Now()
// Sort objects by pack offset, because the object type bitmaps use
// packfile order.
- packObjects := make([]*packfile.Object, len(idxObjects))
- copy(packObjects, idxObjects)
+ packObjects := make([]*packfile.Object, len(idx.Objects))
+ copy(packObjects, idx.Objects)
- log.Print("sorting object list")
sort.Sort(packfile.PackfileOrder(packObjects))
-
- // The type bitmaps come in this specific order: commit, tree, blob, tag.
- log.Print("labeling objects")
- for _, t := range []packfile.ObjectType{packfile.TCommit, packfile.TTree, packfile.TBlob, packfile.TTag} {
- ewah, err := packfile.ReadEWAH(r)
- if err != nil {
- return err
- }
-
+ log.Printf("sorted object list in %v", time.Since(start))
+
+ start = time.Now()
+ for _, t := range []struct {
+ objectType packfile.ObjectType
+ ewah *packfile.EWAH
+ }{
+ {packfile.TCommit, idx.Bitmap.Commits},
+ {packfile.TTree, idx.Bitmap.Trees},
+ {packfile.TBlob, idx.Bitmap.Blobs},
+ {packfile.TTag, idx.Bitmap.Tags},
+ } {
setFunc := func(i uint32) error {
obj := packObjects[i]
if obj.Type != packfile.TUnknown {
return fmt.Errorf("type already set for object %v", obj)
}
- obj.Type = t
+ obj.Type = t.objectType
return nil
}
- if err := ewah.Scan(setFunc); err != nil {
+ if err := t.ewah.Scan(setFunc); err != nil {
return err
}
}
+ log.Printf("labeled objects in %v", time.Since(start))
for _, obj := range packObjects {
if obj.Type == packfile.TUnknown {
@@ -101,25 +86,10 @@ func _main(packIdx string) error {
}
}
- for i := uint32(0); i < nBitmapCommits; i++ {
- const entryHeaderLen = 6
- if _, err := r.Discard(entryHeaderLen); err != nil {
- return err
- }
-
- if _, err := packfile.ReadEWAH(r); err != nil {
- return err
- }
- }
-
- if _, err := r.Peek(1); err != io.EOF {
- return fmt.Errorf("expected EOF, got %v", err)
- }
-
out := bufio.NewWriter(os.Stdout)
defer out.Flush()
+ fmt.Fprintf(out, "# pack-%s\n", idx.ID)
- fmt.Fprintf(out, "# pack-%s\n", packID)
for _, o := range packObjects {
if *printMap {
c := ""
@@ -142,219 +112,3 @@ func _main(packIdx string) error {
return nil
}
-
-func parseBitmapHeader(r io.Reader, packID string) (uint32, error) {
- const headerLen = 32
- header, err := readN(r, headerLen)
- if err != nil {
- return 0, err
- }
-
- const sig = "BITM\x00\x01"
- if actualSig := string(header[:len(sig)]); actualSig != sig {
- return 0, fmt.Errorf("unexpected signature %q", actualSig)
- }
- header = header[len(sig):]
-
- const flagLen = 2
- flags := binary.BigEndian.Uint16(header[:flagLen])
- const minFlags = 1
- if flags&minFlags < minFlags {
- return 0, fmt.Errorf("invalid flags %x", flags)
- }
- header = header[flagLen:]
-
- count := binary.BigEndian.Uint32(header[:4])
- header = header[4:]
-
- if s := hex.EncodeToString(header); s != packID {
- return 0, fmt.Errorf("unexpected pack ID in bitmap header: %s", s)
- }
-
- return count, nil
-}
-
-const sumSize = 20
-
-func readIndex(packBase, packID string) ([]*packfile.Object, error) {
- f, err := os.Open(packBase + ".idx")
- if err != nil {
- return nil, err
- }
- defer f.Close()
-
- r := bufio.NewReader(gitio.NewHashfileReader(f))
-
- const sig = "\377tOc\x00\x00\x00\x02"
- actualSig, err := readN(r, len(sig))
- if s := string(actualSig); s != sig {
- return nil, fmt.Errorf("unexpected idx signature %q", s)
- }
-
- count, err := nPackObjects(packBase, packID)
- if err != nil {
- return nil, err
- }
-
- // Fanout is used to speed up random access to the index. We are doing a
- // sequential read so we don't need the fanout table.
- const fanOutLen = 4 * 256
- if _, err := r.Discard(fanOutLen); err != nil {
- return nil, err
- }
-
- // TODO use a data structure other than a Go slice to hold the index
- // entries? Go slices use int as their index type, and int may not be
- // able to hold MaxUint32.
- if count > math.MaxInt32 {
- return nil, fmt.Errorf("too many objects in packfile to fit in Go slice: %d", count)
- }
- objects := make([]*packfile.Object, count)
-
- buf := make([]byte, sumSize)
- for i := 0; i < len(objects); i++ {
- if _, err := io.ReadFull(r, buf); err != nil {
- return nil, err
- }
- objects[i] = &packfile.Object{OID: hex.EncodeToString(buf)}
- }
-
- // Discard CRC32 values (one for each object)
- for i := 0; i < len(objects); i++ {
- if _, err := r.Discard(4); err != nil {
- return nil, err
- }
- }
-
- // Read 4-byte offsets
- has8ByteOffsets := false
- for i := 0; i < len(objects); i++ {
- offset, err := readUint32(r)
- if err != nil {
- return nil, err
- }
-
- const mask = 1 << 31
- if offset&mask == mask {
- has8ByteOffsets = true
- continue
- }
-
- objects[i].Offset = uint64(offset)
- }
-
- if has8ByteOffsets {
- for i := 0; i < len(objects); i++ {
- offset, err := readUint64(r)
- if err != nil {
- return nil, err
- }
-
- // TODO Not clear if all 8-byte offsets are populated, or only those that
- // don't fit into 4 bytes.
- if offset > 0 {
- objects[i].Offset = offset
- }
- }
- }
-
- idxPackID, err := readN(r, sumSize)
- if err != nil {
- return nil, err
- }
-
- if s := hex.EncodeToString(idxPackID); s != packID {
- return nil, fmt.Errorf("unexpected pack ID in idx: %s", s)
- }
-
- if _, err := r.Peek(1); err != io.EOF {
- if err == nil {
- err = fmt.Errorf("unexpected trailing data, expected EOF")
- }
- return nil, err
- }
-
- return objects, nil
-}
-
-func readN(r io.Reader, n int) ([]byte, error) {
- buf := make([]byte, n)
- if _, err := io.ReadFull(r, buf); err != nil {
- return nil, err
- }
-
- return buf, nil
-}
-
-func nPackObjects(packBase, packID string) (uint32, error) {
- f, err := openPack(packBase, packID)
- if err != nil {
- return 0, err
- }
- defer f.Close()
-
- const headerLen = 12
- header, err := readN(f, headerLen)
- if err != nil {
- return 0, err
- }
-
- const sig = "PACK\x00\x00\x00\x02"
- if s := string(header[:len(sig)]); s != sig {
- return 0, fmt.Errorf("unexpected pack signature %q", s)
- }
- header = header[len(sig):]
-
- return binary.BigEndian.Uint32(header), nil
-}
-
-func openPack(packBase, packID string) (f *os.File, err error) {
- packPath := packBase + ".pack"
- f, err = os.Open(packPath)
- if err != nil {
- return nil, err
- }
-
- defer func(f *os.File) {
- if err != nil {
- f.Close()
- }
- }(f) // Bind f early so that we can do "return nil, err".
-
- if _, err := f.Seek(-sumSize, io.SeekEnd); err != nil {
- return nil, err
- }
-
- sum, err := readN(f, sumSize)
- if err != nil {
- return nil, err
- }
-
- if s := hex.EncodeToString(sum); s != packID {
- return nil, fmt.Errorf("unexpected trailing checksum in .pack: %s", s)
- }
-
- if _, err := f.Seek(0, io.SeekStart); err != nil {
- return nil, err
- }
-
- return f, nil
-}
-
-func readUint32(r io.Reader) (uint32, error) {
- buf, err := readN(r, 4)
- if err != nil {
- return 0, err
- }
-
- return binary.BigEndian.Uint32(buf), nil
-}
-
-func readUint64(r io.Reader) (uint64, error) {
- buf, err := readN(r, 8)
- if err != nil {
- return 0, err
- }
-
- return binary.BigEndian.Uint64(buf), nil
-}