Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2023-01-17 20:42:09 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2023-01-19 09:51:09 +0300
commit7996cf67c5122e9235359f3704f12452ae6df15c (patch)
tree3cdeee2a43e8ace69562eafec13555623f1e7ed6
parentd47724f6e9e18fd7c7c73ec68d89ed874c841502 (diff)
git/stats: Implement reading bitmap info
Git has recently introduced support for bitmap lookup tables in Git v2.38.0. This extension allows Git to defer loading bitmaps until they're really required, which should speed up especially large repos that have a bunch of bitmaps. As a first step towards enabling this extension we'll start to expose information about bitmaps via our housekeeping machinery so that we can monitor the rollout and whether repositories get converted to use the new extension as expected. Implement logic to read bitmaps in the git/stats package. The logic is not yet wired up.
-rw-r--r--internal/git/stats/repository_info.go57
-rw-r--r--internal/git/stats/repository_info_test.go173
2 files changed, 230 insertions, 0 deletions
diff --git a/internal/git/stats/repository_info.go b/internal/git/stats/repository_info.go
index 363b267bd..0f7554936 100644
--- a/internal/git/stats/repository_info.go
+++ b/internal/git/stats/repository_info.go
@@ -1,9 +1,12 @@
package stats
import (
+ "bytes"
"context"
+ "encoding/binary"
"errors"
"fmt"
+ "io"
"io/fs"
"os"
"path/filepath"
@@ -334,3 +337,57 @@ func readAlternates(repo *localrepo.Repo) ([]string, error) {
return alternatePaths, nil
}
+
+// BitmapInfo contains information about a packfile or multi-pack-index bitmap.
+type BitmapInfo struct {
+ // Version is the version of the bitmap. Currently, this is expected to always be 1.
+ Version uint16 `json:"version"`
+ // HasHashCache indicates whether the name hash cache extension exists in the bitmap. This
+ // extension records hashes of the path at which trees or blobs are found at the time of
+ // writing the packfile so that it becomes possible to quickly find objects stored at the
+ // same path. This mechanism is fed into the delta compression machinery to make the delta
+ // heuristics more effective.
+ HasHashCache bool `json:"has_hash_cache"`
+ // HasLookupTable indicates whether the lookup table exists in the bitmap. Lookup tables
+ // allow to defer loading bitmaps until required and thus speed up read-only bitmap
+ // preparations.
+ HasLookupTable bool `json:"has_lookup_table"`
+}
+
+// BitmapInfoForPath reads the bitmap at the given path and returns information on that bitmap.
+func BitmapInfoForPath(path string) (BitmapInfo, error) {
+ // The bitmap header is defined in
+ // https://github.com/git/git/blob/master/Documentation/technical/bitmap-format.txt.
+ bitmapHeader := []byte{
+ 0, 0, 0, 0, // 4-byte signature
+ 0, 0, // 2-byte version number in network byte order
+ 0, 0, // 2-byte flags in network byte order
+ }
+
+ file, err := os.Open(path)
+ if err != nil {
+ return BitmapInfo{}, fmt.Errorf("opening bitmap: %w", err)
+ }
+ defer file.Close()
+
+ if _, err := io.ReadFull(file, bitmapHeader); err != nil {
+ return BitmapInfo{}, fmt.Errorf("reading bitmap header: %w", err)
+ }
+
+ if !bytes.Equal(bitmapHeader[0:4], []byte{'B', 'I', 'T', 'M'}) {
+ return BitmapInfo{}, fmt.Errorf("invalid bitmap signature: %q", string(bitmapHeader[0:4]))
+ }
+
+ version := binary.BigEndian.Uint16(bitmapHeader[4:6])
+ if version != 1 {
+ return BitmapInfo{}, fmt.Errorf("unsupported version: %d", version)
+ }
+
+ flags := binary.BigEndian.Uint16(bitmapHeader[6:8])
+
+ return BitmapInfo{
+ Version: version,
+ HasHashCache: flags&0x4 == 0x4,
+ HasLookupTable: flags&0x10 == 0x10,
+ }, nil
+}
diff --git a/internal/git/stats/repository_info_test.go b/internal/git/stats/repository_info_test.go
index 1c2be2ead..b2eb646e1 100644
--- a/internal/git/stats/repository_info_test.go
+++ b/internal/git/stats/repository_info_test.go
@@ -2,8 +2,12 @@ package stats
import (
"fmt"
+ "io"
+ "io/fs"
"os"
"path/filepath"
+ "strconv"
+ "syscall"
"testing"
"time"
@@ -793,6 +797,175 @@ func TestPackfileInfoForRepository(t *testing.T) {
})
}
+func TestBitmapInfoForPath(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+ cfg := testcfg.Build(t)
+
+ for _, bitmapTypeTC := range []struct {
+ desc string
+ repackArgs []string
+ verifyBitmapName func(*testing.T, string)
+ }{
+ {
+ desc: "packfile bitmap",
+ repackArgs: []string{"-Adb"},
+ verifyBitmapName: func(t *testing.T, bitmapName string) {
+ require.Regexp(t, "^pack-.*.bitmap$", bitmapName)
+ },
+ },
+ {
+ desc: "multi-pack-index bitmap",
+ repackArgs: []string{"-Adb", "--write-midx"},
+ verifyBitmapName: func(t *testing.T, bitmapName string) {
+ require.Regexp(t, "^multi-pack-index-.*.bitmap$", bitmapName)
+ },
+ },
+ } {
+ bitmapTypeTC := bitmapTypeTC
+
+ t.Run(bitmapTypeTC.desc, func(t *testing.T) {
+ t.Parallel()
+
+ for _, tc := range []struct {
+ desc string
+ writeHashCache bool
+ writeLookupTable bool
+ expectedBitmapInfo BitmapInfo
+ expectedErr error
+ }{
+ {
+ desc: "bitmap without any extension",
+ writeHashCache: false,
+ writeLookupTable: false,
+ expectedBitmapInfo: BitmapInfo{
+ Version: 1,
+ },
+ },
+ {
+ desc: "bitmap with hash cache",
+ writeHashCache: true,
+ writeLookupTable: false,
+ expectedBitmapInfo: BitmapInfo{
+ Version: 1,
+ HasHashCache: true,
+ },
+ },
+ {
+ desc: "bitmap with lookup table",
+ writeHashCache: false,
+ writeLookupTable: true,
+ expectedBitmapInfo: BitmapInfo{
+ Version: 1,
+ HasLookupTable: true,
+ },
+ },
+ {
+ desc: "bitmap with all extensions",
+ writeHashCache: true,
+ writeLookupTable: true,
+ expectedBitmapInfo: BitmapInfo{
+ Version: 1,
+ HasHashCache: true,
+ HasLookupTable: true,
+ },
+ },
+ } {
+ tc := tc
+
+ t.Run(tc.desc, func(t *testing.T) {
+ t.Parallel()
+
+ _, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ })
+ gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main"))
+
+ gittest.Exec(t, cfg, append([]string{
+ "-C", repoPath,
+ "-c", "pack.writeBitmapHashCache=" + strconv.FormatBool(tc.writeHashCache),
+ "-c", "pack.writeBitmapLookupTable=" + strconv.FormatBool(tc.writeLookupTable),
+ "repack",
+ }, bitmapTypeTC.repackArgs...)...)
+
+ bitmapPaths, err := filepath.Glob(filepath.Join(repoPath, "objects", "pack", "*.bitmap"))
+ require.NoError(t, err)
+ require.Len(t, bitmapPaths, 1)
+
+ bitmapPath := bitmapPaths[0]
+ bitmapTypeTC.verifyBitmapName(t, filepath.Base(bitmapPath))
+
+ bitmapInfo, err := BitmapInfoForPath(bitmapPath)
+ require.Equal(t, tc.expectedErr, err)
+ require.Equal(t, tc.expectedBitmapInfo, bitmapInfo)
+ })
+ }
+ })
+ }
+
+ for _, tc := range []struct {
+ desc string
+ setup func(t *testing.T) string
+ expectedErr error
+ }{
+ {
+ desc: "nonexistent path",
+ setup: func(t *testing.T) string {
+ return "/does/not/exist"
+ },
+ expectedErr: fmt.Errorf("opening bitmap: %w", &fs.PathError{
+ Op: "open",
+ Path: "/does/not/exist",
+ Err: syscall.ENOENT,
+ }),
+ },
+ {
+ desc: "header is too short",
+ setup: func(t *testing.T) string {
+ bitmapPath := filepath.Join(testhelper.TempDir(t), "bitmap")
+ require.NoError(t, os.WriteFile(bitmapPath, []byte{0, 0, 0}, 0o644))
+ return bitmapPath
+ },
+ expectedErr: fmt.Errorf("reading bitmap header: %w", io.ErrUnexpectedEOF),
+ },
+ {
+ desc: "invalid signature",
+ setup: func(t *testing.T) string {
+ bitmapPath := filepath.Join(testhelper.TempDir(t), "bitmap")
+ require.NoError(t, os.WriteFile(bitmapPath, []byte{
+ 'B', 'I', 'T', 'O', 0, 0, 0, 0,
+ }, 0o644))
+ return bitmapPath
+ },
+ expectedErr: fmt.Errorf("invalid bitmap signature: %q", "BITO"),
+ },
+ {
+ desc: "unsupported version",
+ setup: func(t *testing.T) string {
+ bitmapPath := filepath.Join(testhelper.TempDir(t), "bitmap")
+ require.NoError(t, os.WriteFile(bitmapPath, []byte{
+ 'B', 'I', 'T', 'M', 0, 2, 0, 0,
+ }, 0o644))
+ return bitmapPath
+ },
+ expectedErr: fmt.Errorf("unsupported version: 2"),
+ },
+ } {
+ tc := tc
+
+ t.Run(tc.desc, func(t *testing.T) {
+ t.Parallel()
+
+ bitmapPath := tc.setup(t)
+
+ bitmapInfo, err := BitmapInfoForPath(bitmapPath)
+ require.Equal(t, tc.expectedErr, err)
+ require.Equal(t, BitmapInfo{}, bitmapInfo)
+ })
+ }
+}
+
func hashDependentSize(sha1, sha256 uint64) uint64 {
if gittest.DefaultObjectHash.Format == "sha1" {
return sha1