diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2023-01-17 20:42:09 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2023-01-19 09:51:09 +0300 |
commit | 7996cf67c5122e9235359f3704f12452ae6df15c (patch) | |
tree | 3cdeee2a43e8ace69562eafec13555623f1e7ed6 | |
parent | d47724f6e9e18fd7c7c73ec68d89ed874c841502 (diff) |
git/stats: Implement reading bitmap info
Git has recently introduced support for bitmap lookup tables in Git
v2.38.0. This extension allows Git to defer loading bitmaps until
they're really required, which should speed up especially large repos
that have a bunch of bitmaps.
As a first step towards enabling this extension we'll start to expose
information about bitmaps via our housekeeping machinery so that we can
monitor the rollout and whether repositories get converted to use the
new extension as expected.
Implement logic to read bitmaps in the git/stats package. The logic is
not yet wired up.
-rw-r--r-- | internal/git/stats/repository_info.go | 57 | ||||
-rw-r--r-- | internal/git/stats/repository_info_test.go | 173 |
2 files changed, 230 insertions, 0 deletions
diff --git a/internal/git/stats/repository_info.go b/internal/git/stats/repository_info.go index 363b267bd..0f7554936 100644 --- a/internal/git/stats/repository_info.go +++ b/internal/git/stats/repository_info.go @@ -1,9 +1,12 @@ package stats import ( + "bytes" "context" + "encoding/binary" "errors" "fmt" + "io" "io/fs" "os" "path/filepath" @@ -334,3 +337,57 @@ func readAlternates(repo *localrepo.Repo) ([]string, error) { return alternatePaths, nil } + +// BitmapInfo contains information about a packfile or multi-pack-index bitmap. +type BitmapInfo struct { + // Version is the version of the bitmap. Currently, this is expected to always be 1. + Version uint16 `json:"version"` + // HasHashCache indicates whether the name hash cache extension exists in the bitmap. This + // extension records hashes of the path at which trees or blobs are found at the time of + // writing the packfile so that it becomes possible to quickly find objects stored at the + // same path. This mechanism is fed into the delta compression machinery to make the delta + // heuristics more effective. + HasHashCache bool `json:"has_hash_cache"` + // HasLookupTable indicates whether the lookup table exists in the bitmap. Lookup tables + // allow to defer loading bitmaps until required and thus speed up read-only bitmap + // preparations. + HasLookupTable bool `json:"has_lookup_table"` +} + +// BitmapInfoForPath reads the bitmap at the given path and returns information on that bitmap. +func BitmapInfoForPath(path string) (BitmapInfo, error) { + // The bitmap header is defined in + // https://github.com/git/git/blob/master/Documentation/technical/bitmap-format.txt. + bitmapHeader := []byte{ + 0, 0, 0, 0, // 4-byte signature + 0, 0, // 2-byte version number in network byte order + 0, 0, // 2-byte flags in network byte order + } + + file, err := os.Open(path) + if err != nil { + return BitmapInfo{}, fmt.Errorf("opening bitmap: %w", err) + } + defer file.Close() + + if _, err := io.ReadFull(file, bitmapHeader); err != nil { + return BitmapInfo{}, fmt.Errorf("reading bitmap header: %w", err) + } + + if !bytes.Equal(bitmapHeader[0:4], []byte{'B', 'I', 'T', 'M'}) { + return BitmapInfo{}, fmt.Errorf("invalid bitmap signature: %q", string(bitmapHeader[0:4])) + } + + version := binary.BigEndian.Uint16(bitmapHeader[4:6]) + if version != 1 { + return BitmapInfo{}, fmt.Errorf("unsupported version: %d", version) + } + + flags := binary.BigEndian.Uint16(bitmapHeader[6:8]) + + return BitmapInfo{ + Version: version, + HasHashCache: flags&0x4 == 0x4, + HasLookupTable: flags&0x10 == 0x10, + }, nil +} diff --git a/internal/git/stats/repository_info_test.go b/internal/git/stats/repository_info_test.go index 1c2be2ead..b2eb646e1 100644 --- a/internal/git/stats/repository_info_test.go +++ b/internal/git/stats/repository_info_test.go @@ -2,8 +2,12 @@ package stats import ( "fmt" + "io" + "io/fs" "os" "path/filepath" + "strconv" + "syscall" "testing" "time" @@ -793,6 +797,175 @@ func TestPackfileInfoForRepository(t *testing.T) { }) } +func TestBitmapInfoForPath(t *testing.T) { + t.Parallel() + + ctx := testhelper.Context(t) + cfg := testcfg.Build(t) + + for _, bitmapTypeTC := range []struct { + desc string + repackArgs []string + verifyBitmapName func(*testing.T, string) + }{ + { + desc: "packfile bitmap", + repackArgs: []string{"-Adb"}, + verifyBitmapName: func(t *testing.T, bitmapName string) { + require.Regexp(t, "^pack-.*.bitmap$", bitmapName) + }, + }, + { + desc: "multi-pack-index bitmap", + repackArgs: []string{"-Adb", "--write-midx"}, + verifyBitmapName: func(t *testing.T, bitmapName string) { + require.Regexp(t, "^multi-pack-index-.*.bitmap$", bitmapName) + }, + }, + } { + bitmapTypeTC := bitmapTypeTC + + t.Run(bitmapTypeTC.desc, func(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + desc string + writeHashCache bool + writeLookupTable bool + expectedBitmapInfo BitmapInfo + expectedErr error + }{ + { + desc: "bitmap without any extension", + writeHashCache: false, + writeLookupTable: false, + expectedBitmapInfo: BitmapInfo{ + Version: 1, + }, + }, + { + desc: "bitmap with hash cache", + writeHashCache: true, + writeLookupTable: false, + expectedBitmapInfo: BitmapInfo{ + Version: 1, + HasHashCache: true, + }, + }, + { + desc: "bitmap with lookup table", + writeHashCache: false, + writeLookupTable: true, + expectedBitmapInfo: BitmapInfo{ + Version: 1, + HasLookupTable: true, + }, + }, + { + desc: "bitmap with all extensions", + writeHashCache: true, + writeLookupTable: true, + expectedBitmapInfo: BitmapInfo{ + Version: 1, + HasHashCache: true, + HasLookupTable: true, + }, + }, + } { + tc := tc + + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + + _, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + }) + gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) + + gittest.Exec(t, cfg, append([]string{ + "-C", repoPath, + "-c", "pack.writeBitmapHashCache=" + strconv.FormatBool(tc.writeHashCache), + "-c", "pack.writeBitmapLookupTable=" + strconv.FormatBool(tc.writeLookupTable), + "repack", + }, bitmapTypeTC.repackArgs...)...) + + bitmapPaths, err := filepath.Glob(filepath.Join(repoPath, "objects", "pack", "*.bitmap")) + require.NoError(t, err) + require.Len(t, bitmapPaths, 1) + + bitmapPath := bitmapPaths[0] + bitmapTypeTC.verifyBitmapName(t, filepath.Base(bitmapPath)) + + bitmapInfo, err := BitmapInfoForPath(bitmapPath) + require.Equal(t, tc.expectedErr, err) + require.Equal(t, tc.expectedBitmapInfo, bitmapInfo) + }) + } + }) + } + + for _, tc := range []struct { + desc string + setup func(t *testing.T) string + expectedErr error + }{ + { + desc: "nonexistent path", + setup: func(t *testing.T) string { + return "/does/not/exist" + }, + expectedErr: fmt.Errorf("opening bitmap: %w", &fs.PathError{ + Op: "open", + Path: "/does/not/exist", + Err: syscall.ENOENT, + }), + }, + { + desc: "header is too short", + setup: func(t *testing.T) string { + bitmapPath := filepath.Join(testhelper.TempDir(t), "bitmap") + require.NoError(t, os.WriteFile(bitmapPath, []byte{0, 0, 0}, 0o644)) + return bitmapPath + }, + expectedErr: fmt.Errorf("reading bitmap header: %w", io.ErrUnexpectedEOF), + }, + { + desc: "invalid signature", + setup: func(t *testing.T) string { + bitmapPath := filepath.Join(testhelper.TempDir(t), "bitmap") + require.NoError(t, os.WriteFile(bitmapPath, []byte{ + 'B', 'I', 'T', 'O', 0, 0, 0, 0, + }, 0o644)) + return bitmapPath + }, + expectedErr: fmt.Errorf("invalid bitmap signature: %q", "BITO"), + }, + { + desc: "unsupported version", + setup: func(t *testing.T) string { + bitmapPath := filepath.Join(testhelper.TempDir(t), "bitmap") + require.NoError(t, os.WriteFile(bitmapPath, []byte{ + 'B', 'I', 'T', 'M', 0, 2, 0, 0, + }, 0o644)) + return bitmapPath + }, + expectedErr: fmt.Errorf("unsupported version: 2"), + }, + } { + tc := tc + + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + + bitmapPath := tc.setup(t) + + bitmapInfo, err := BitmapInfoForPath(bitmapPath) + require.Equal(t, tc.expectedErr, err) + require.Equal(t, BitmapInfo{}, bitmapInfo) + }) + } +} + func hashDependentSize(sha1, sha256 uint64) uint64 { if gittest.DefaultObjectHash.Format == "sha1" { return sha1 |