diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-10-21 14:42:36 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-10-21 14:42:36 +0300 |
commit | 96269fad786badf28b80b74e66457ae1a1a95210 (patch) | |
tree | 9ee92e90c570fd8dd6247d7c4b4b6a3882f2bc37 | |
parent | c3a57eaa734283ea3c39f7774e5e3550f24891aa (diff) | |
parent | ab42b0455b2417696c296c218c044e7fe7b2095d (diff) |
Merge branch 'pks-git-stats-structured-objects-info' into 'master'
git/stats: Refactor code parsing git-count-objects(1) to use struct
See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/4956
Merged-by: Patrick Steinhardt <psteinhardt@gitlab.com>
Approved-by: karthik nayak <knayak@gitlab.com>
Approved-by: Toon Claes <toon@gitlab.com>
-rw-r--r-- | internal/git/housekeeping/objects_test.go | 9 | ||||
-rw-r--r-- | internal/git/stats/git.go | 115 | ||||
-rw-r--r-- | internal/git/stats/git_test.go | 90 | ||||
-rw-r--r-- | internal/git/stats/objects_info.go | 166 | ||||
-rw-r--r-- | internal/git/stats/objects_info_test.go | 285 | ||||
-rw-r--r-- | internal/git/stats/profile.go | 151 | ||||
-rw-r--r-- | internal/git/stats/profile_test.go | 86 | ||||
-rw-r--r-- | internal/gitaly/service/objectpool/fetch_into_object_pool_test.go | 22 | ||||
-rw-r--r-- | internal/gitaly/service/repository/repack_test.go | 11 |
9 files changed, 463 insertions, 472 deletions
diff --git a/internal/git/housekeeping/objects_test.go b/internal/git/housekeeping/objects_test.go index e5194f1fa..5f6b0ec07 100644 --- a/internal/git/housekeeping/objects_test.go +++ b/internal/git/housekeeping/objects_test.go @@ -3,6 +3,7 @@ package housekeeping import ( + "context" "path/filepath" "testing" @@ -14,10 +15,10 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" ) -func requireObjectCount(t *testing.T, repoPath string, expectedObjects int64) { +func requireObjectCount(t *testing.T, ctx context.Context, repo *localrepo.Repo, expectedObjects uint64) { t.Helper() - objects, err := stats.UnpackedObjects(repoPath) + objects, err := stats.LooseObjects(ctx, repo) require.NoError(t, err) require.Equal(t, expectedObjects, objects) } @@ -44,12 +45,12 @@ func TestRepackObjects(t *testing.T) { gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) - requireObjectCount(t, repoPath, 1) + requireObjectCount(t, ctx, repo, 1) requirePackfileCount(t, repoPath, 0) require.NoError(t, RepackObjects(ctx, repo, RepackObjectsConfig{})) - requireObjectCount(t, repoPath, 0) + requireObjectCount(t, ctx, repo, 0) requirePackfileCount(t, repoPath, 1) require.NoFileExists(t, filepath.Join(repoPath, "info", "refs")) diff --git a/internal/git/stats/git.go b/internal/git/stats/git.go deleted file mode 100644 index ce9794c72..000000000 --- a/internal/git/stats/git.go +++ /dev/null @@ -1,115 +0,0 @@ -package stats - -import ( - "bufio" - "context" - "io" - "strconv" - "strings" - - "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" - "gitlab.com/gitlab-org/gitaly/v15/internal/git" -) - -// LogObjectsInfo read statistics of the git repo objects -// and logs it under 'count-objects' key as structured entry. -func LogObjectsInfo(ctx context.Context, repo git.RepositoryExecutor) { - logger := ctxlogrus.Extract(ctx) - - cmd, err := repo.Exec(ctx, git.SubCmd{ - Name: "count-objects", - Flags: []git.Option{git.Flag{Name: "--verbose"}}, - }) - if err != nil { - logger.WithError(err).Warn("failed on bootstrapping to gather object statistic") - return - } - - stats, err := readObjectInfoStatistic(cmd) - if err != nil { - logger.WithError(err).Warn("failed on reading to gather object statistic") - } - - if err := cmd.Wait(); err != nil { - logger.WithError(err).Warn("failed on waiting to gather object statistic") - return - } - - if len(stats) > 0 { - logger.WithField("count_objects", stats).Info("git repo statistic") - } -} - -/* - readObjectInfoStatistic parses output of 'git count-objects -v' command and represents it as dictionary - -current supported format is: - - count: 12 - packs: 2 - size-garbage: 934 - alternate: /some/path/to/.git/objects - alternate: "/some/other path/to/.git/objects" - -will result in: - - { - "count": 12, - "packs": 2, - "size-garbage": 934, - "alternate": ["/some/path/to/.git/objects", "/some/other path/to/.git/objects"] - } -*/ -func readObjectInfoStatistic(reader io.Reader) (map[string]interface{}, error) { - stats := map[string]interface{}{} - - scanner := bufio.NewScanner(reader) - for scanner.Scan() { - line := scanner.Text() - parts := strings.SplitN(line, ": ", 2) - if len(parts) != 2 { - continue - } - - // one of: count, size, in-pack, packs, size-pack, prune-packable, garbage, size-garbage, alternate (repeatable) - key := parts[0] - rawVal := strings.TrimPrefix(parts[1], ": ") - - switch key { - case "alternate": - addMultiString(stats, key, rawVal) - default: - addInt(stats, key, rawVal) - } - } - - return stats, scanner.Err() -} - -func addMultiString(stats map[string]interface{}, key, rawVal string) { - val := strings.Trim(rawVal, "\" \t\n") - - statVal, found := stats[key] - if !found { - stats[key] = val - return - } - - statAggr, ok := statVal.([]string) // 'alternate' is only repeatable key and it is a string type - if ok { - statAggr = append(statAggr, val) - } else { - delete(stats, key) // remove single string value of 'alternate' to replace it with slice - statAggr = []string{statVal.(string), val} - } - stats[key] = statAggr -} - -func addInt(stats map[string]interface{}, key, rawVal string) { - val, err := strconv.ParseInt(rawVal, 10, 64) - if err != nil { - return - } - - stats[key] = val -} diff --git a/internal/git/stats/git_test.go b/internal/git/stats/git_test.go deleted file mode 100644 index a07e82471..000000000 --- a/internal/git/stats/git_test.go +++ /dev/null @@ -1,90 +0,0 @@ -//go:build !gitaly_test_sha256 - -package stats - -import ( - "os" - "path/filepath" - "strings" - "testing" - - "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" - "github.com/sirupsen/logrus" - "github.com/sirupsen/logrus/hooks/test" - "github.com/stretchr/testify/require" - "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" - "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" - "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" - "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" - "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" - "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" -) - -func TestLogObjectInfo(t *testing.T) { - ctx := testhelper.Context(t) - cfg := testcfg.Build(t) - - repo1, repoPath1 := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - Seed: gittest.SeedGitLabTest, - }) - repo2, repoPath2 := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - Seed: gittest.SeedGitLabTest, - }) - - requireLog := func(entries []*logrus.Entry) map[string]interface{} { - for _, entry := range entries { - if entry.Message == "git repo statistic" { - const key = "count_objects" - data := entry.Data[key] - require.NotNil(t, data, "there is no any information about statistics") - countObjects, ok := data.(map[string]interface{}) - require.True(t, ok) - require.Contains(t, countObjects, "count") - require.Contains(t, countObjects, "size") - require.Contains(t, countObjects, "in-pack") - require.Contains(t, countObjects, "packs") - require.Contains(t, countObjects, "size-pack") - require.Contains(t, countObjects, "garbage") - require.Contains(t, countObjects, "size-garbage") - return countObjects - } - } - return nil - } - - t.Run("shared repo with multiple alternates", func(t *testing.T) { - locator := config.NewLocator(cfg) - storagePath, err := locator.GetStorageByName(repo1.GetStorageName()) - require.NoError(t, err) - - tmpDir, err := os.MkdirTemp(storagePath, "") - require.NoError(t, err) - defer func() { require.NoError(t, os.RemoveAll(tmpDir)) }() - - // clone existing local repo with two alternates - gittest.Exec(t, cfg, "clone", "--shared", repoPath1, "--reference", repoPath1, "--reference", repoPath2, tmpDir) - - logger, hook := test.NewNullLogger() - testCtx := ctxlogrus.ToContext(ctx, logger.WithField("test", "logging")) - - LogObjectsInfo(testCtx, localrepo.NewTestRepo(t, cfg, &gitalypb.Repository{ - StorageName: repo1.StorageName, - RelativePath: filepath.Join(strings.TrimPrefix(tmpDir, storagePath), ".git"), - })) - - countObjects := requireLog(hook.AllEntries()) - require.ElementsMatch(t, []string{repoPath1 + "/objects", repoPath2 + "/objects"}, countObjects["alternate"]) - }) - - t.Run("repo without alternates", func(t *testing.T) { - logger, hook := test.NewNullLogger() - testCtx := ctxlogrus.ToContext(ctx, logger.WithField("test", "logging")) - - LogObjectsInfo(testCtx, localrepo.NewTestRepo(t, cfg, repo2)) - - countObjects := requireLog(hook.AllEntries()) - require.Contains(t, countObjects, "prune-packable") - }) -} diff --git a/internal/git/stats/objects_info.go b/internal/git/stats/objects_info.go new file mode 100644 index 000000000..5ef1c9289 --- /dev/null +++ b/internal/git/stats/objects_info.go @@ -0,0 +1,166 @@ +package stats + +import ( + "bufio" + "context" + "fmt" + "io/fs" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" + "gitlab.com/gitlab-org/gitaly/v15/internal/git" +) + +// HasBitmap returns whether or not the repository contains an object bitmap. +func HasBitmap(repoPath string) (bool, error) { + bitmaps, err := filepath.Glob(filepath.Join(repoPath, "objects", "pack", "*.bitmap")) + if err != nil { + return false, err + } + + return len(bitmaps) > 0, nil +} + +// PackfilesCount returns the number of packfiles a repository has. +func PackfilesCount(repoPath string) (int, error) { + packFiles, err := GetPackfiles(repoPath) + if err != nil { + return 0, err + } + + return len(packFiles), nil +} + +// GetPackfiles returns the FileInfo of packfiles inside a repository. +func GetPackfiles(repoPath string) ([]fs.DirEntry, error) { + files, err := os.ReadDir(filepath.Join(repoPath, "objects/pack/")) + if err != nil { + return nil, err + } + + var packFiles []fs.DirEntry + for _, f := range files { + if filepath.Ext(f.Name()) == ".pack" { + packFiles = append(packFiles, f) + } + } + + return packFiles, nil +} + +// LooseObjects returns the number of loose objects that are not in a packfile. +func LooseObjects(ctx context.Context, repo git.RepositoryExecutor) (uint64, error) { + objectsInfo, err := ObjectsInfoForRepository(ctx, repo) + if err != nil { + return 0, err + } + + return objectsInfo.LooseObjects, nil +} + +// LogObjectsInfo read statistics of the git repo objects +// and logs it under 'objects_info' key as structured entry. +func LogObjectsInfo(ctx context.Context, repo git.RepositoryExecutor) { + logger := ctxlogrus.Extract(ctx) + + objectsInfo, err := ObjectsInfoForRepository(ctx, repo) + if err != nil { + logger.WithError(err).Warn("failed reading objects info") + } else { + logger.WithField("objects_info", objectsInfo).Info("repository objects info") + } +} + +// ObjectsInfo contains information on the object database. +type ObjectsInfo struct { + // LooseObjects is the count of loose objects. + LooseObjects uint64 `json:"loose_objects"` + // LooseObjectsSize is the accumulated on-disk size of all loose objects in KiB. + LooseObjectsSize uint64 `json:"loose_objects_size"` + // PackedObjects is the count of packed objects. + PackedObjects uint64 `json:"packed_objects"` + // Packfiles is the number of packfiles. + Packfiles uint64 `json:"packfiles"` + // PackfilesSize is the accumulated on-disk size of all packfiles in KiB. + PackfilesSize uint64 `json:"packfiles_size"` + // PrunableObjects is the number of objects that exist both as loose and as packed objects. + // The loose objects may be pruned in that case. + PruneableObjects uint64 `json:"prunable_objects"` + // Garbage is the count of files in the object database that are neither a valid loose + // object nor a valid packfile. + Garbage uint64 `json:"garbage"` + // GarbageSize is the accumulated on-disk size of garbage files. + GarbageSize uint64 `json:"garbage_size"` + // Alternates is the list of absolute paths of alternate object databases this repository is + // connected to. + Alternates []string `json:"alternates"` +} + +// ObjectsInfoForRepository computes the ObjectsInfo for a repository. +func ObjectsInfoForRepository(ctx context.Context, repo git.RepositoryExecutor) (ObjectsInfo, error) { + countObjects, err := repo.Exec(ctx, git.SubCmd{ + Name: "count-objects", + Flags: []git.Option{git.Flag{Name: "--verbose"}}, + }) + if err != nil { + return ObjectsInfo{}, fmt.Errorf("running git-count-objects: %w", err) + } + + var info ObjectsInfo + + // The expected format is: + // + // count: 12 + // packs: 2 + // size-garbage: 934 + // alternate: /some/path/to/.git/objects + // alternate: "/some/other path/to/.git/objects" + scanner := bufio.NewScanner(countObjects) + for scanner.Scan() { + line := scanner.Text() + + parts := strings.SplitN(line, ": ", 2) + if len(parts) != 2 { + continue + } + + var err error + switch parts[0] { + case "count": + info.LooseObjects, err = strconv.ParseUint(parts[1], 10, 64) + case "size": + info.LooseObjectsSize, err = strconv.ParseUint(parts[1], 10, 64) + case "in-pack": + info.PackedObjects, err = strconv.ParseUint(parts[1], 10, 64) + case "packs": + info.Packfiles, err = strconv.ParseUint(parts[1], 10, 64) + case "size-pack": + info.PackfilesSize, err = strconv.ParseUint(parts[1], 10, 64) + case "prune-packable": + info.PruneableObjects, err = strconv.ParseUint(parts[1], 10, 64) + case "garbage": + info.Garbage, err = strconv.ParseUint(parts[1], 10, 64) + case "size-garbage": + info.GarbageSize, err = strconv.ParseUint(parts[1], 10, 64) + case "alternate": + info.Alternates = append(info.Alternates, strings.Trim(parts[1], "\" \t\n")) + } + + if err != nil { + return ObjectsInfo{}, fmt.Errorf("parsing %q: %w", parts[0], err) + } + } + + if err := scanner.Err(); err != nil { + return ObjectsInfo{}, fmt.Errorf("scanning object info: %w", err) + } + + if err := countObjects.Wait(); err != nil { + return ObjectsInfo{}, fmt.Errorf("counting objects: %w", err) + } + + return info, nil +} diff --git a/internal/git/stats/objects_info_test.go b/internal/git/stats/objects_info_test.go new file mode 100644 index 000000000..c77c61c61 --- /dev/null +++ b/internal/git/stats/objects_info_test.go @@ -0,0 +1,285 @@ +//go:build !gitaly_test_sha256 + +package stats + +import ( + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" + "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus/hooks/test" + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v15/internal/git" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" + "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" + "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" + "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" + "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" +) + +func TestRepositoryProfile(t *testing.T) { + ctx := testhelper.Context(t) + cfg := testcfg.Build(t) + + repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + }) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + hasBitmap, err := HasBitmap(repoPath) + require.NoError(t, err) + require.False(t, hasBitmap, "repository should not have a bitmap initially") + packfiles, err := GetPackfiles(repoPath) + require.NoError(t, err) + require.Empty(t, packfiles) + packfilesCount, err := PackfilesCount(repoPath) + require.NoError(t, err) + require.Zero(t, packfilesCount) + + blobs := 10 + blobIDs := gittest.WriteBlobs(t, cfg, repoPath, blobs) + + looseObjects, err := LooseObjects(ctx, repo) + require.NoError(t, err) + require.Equal(t, uint64(blobs), looseObjects) + + for _, blobID := range blobIDs { + commitID := gittest.WriteCommit(t, cfg, repoPath, + gittest.WithTreeEntries(gittest.TreeEntry{ + Mode: "100644", Path: "blob", OID: git.ObjectID(blobID), + }), + ) + gittest.Exec(t, cfg, "-C", repoPath, "update-ref", "refs/heads/"+blobID, commitID.String()) + } + + // write a loose object + gittest.WriteBlobs(t, cfg, repoPath, 1) + + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "-b", "-d") + + looseObjects, err = LooseObjects(ctx, repo) + require.NoError(t, err) + require.Equal(t, uint64(1), looseObjects) + + // write another loose object + blobID := gittest.WriteBlobs(t, cfg, repoPath, 1)[0] + + // due to OS semantics, ensure that the blob has a timestamp that is after the packfile + theFuture := time.Now().Add(10 * time.Minute) + require.NoError(t, os.Chtimes(filepath.Join(repoPath, "objects", blobID[0:2], blobID[2:]), theFuture, theFuture)) + + looseObjects, err = LooseObjects(ctx, repo) + require.NoError(t, err) + require.Equal(t, uint64(2), looseObjects) +} + +func TestLogObjectInfo(t *testing.T) { + ctx := testhelper.Context(t) + cfg := testcfg.Build(t) + + repo1, repoPath1 := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + Seed: gittest.SeedGitLabTest, + }) + repo2, repoPath2 := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + Seed: gittest.SeedGitLabTest, + }) + + requireObjectsInfo := func(entries []*logrus.Entry) ObjectsInfo { + for _, entry := range entries { + if entry.Message == "repository objects info" { + objectsInfo, ok := entry.Data["objects_info"] + require.True(t, ok) + require.IsType(t, ObjectsInfo{}, objectsInfo) + return objectsInfo.(ObjectsInfo) + } + } + + require.FailNow(t, "no objects info log entry found") + return ObjectsInfo{} + } + + t.Run("shared repo with multiple alternates", func(t *testing.T) { + locator := config.NewLocator(cfg) + storagePath, err := locator.GetStorageByName(repo1.GetStorageName()) + require.NoError(t, err) + + tmpDir, err := os.MkdirTemp(storagePath, "") + require.NoError(t, err) + defer func() { require.NoError(t, os.RemoveAll(tmpDir)) }() + + // clone existing local repo with two alternates + gittest.Exec(t, cfg, "clone", "--shared", repoPath1, "--reference", repoPath1, "--reference", repoPath2, tmpDir) + + logger, hook := test.NewNullLogger() + testCtx := ctxlogrus.ToContext(ctx, logger.WithField("test", "logging")) + + LogObjectsInfo(testCtx, localrepo.NewTestRepo(t, cfg, &gitalypb.Repository{ + StorageName: repo1.StorageName, + RelativePath: filepath.Join(strings.TrimPrefix(tmpDir, storagePath), ".git"), + })) + + objectsInfo := requireObjectsInfo(hook.AllEntries()) + require.Equal(t, []string{repoPath1 + "/objects", repoPath2 + "/objects"}, objectsInfo.Alternates) + }) + + t.Run("repo without alternates", func(t *testing.T) { + logger, hook := test.NewNullLogger() + testCtx := ctxlogrus.ToContext(ctx, logger.WithField("test", "logging")) + + LogObjectsInfo(testCtx, localrepo.NewTestRepo(t, cfg, repo2)) + + objectsInfo := requireObjectsInfo(hook.AllEntries()) + require.NotZero(t, objectsInfo.LooseObjects) + require.NotZero(t, objectsInfo.LooseObjectsSize) + require.NotZero(t, objectsInfo.PackedObjects) + require.NotZero(t, objectsInfo.Packfiles) + require.NotZero(t, objectsInfo.PackfilesSize) + require.Nil(t, objectsInfo.Alternates) + }) +} + +func TestObjectsInfoForRepository(t *testing.T) { + t.Parallel() + + ctx := testhelper.Context(t) + cfg := testcfg.Build(t) + + _, alternatePath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + }) + alternatePath = filepath.Join(alternatePath, "objects") + + for _, tc := range []struct { + desc string + setup func(t *testing.T, repoPath string) + expectedErr error + expectedObjectsInfo ObjectsInfo + }{ + { + desc: "empty repository", + setup: func(*testing.T, string) { + }, + }, + { + desc: "single blob", + setup: func(t *testing.T, repoPath string) { + gittest.WriteBlob(t, cfg, repoPath, []byte("x")) + }, + expectedObjectsInfo: ObjectsInfo{ + LooseObjects: 1, + LooseObjectsSize: 4, + }, + }, + { + desc: "single packed blob", + setup: func(t *testing.T, repoPath string) { + blobID := gittest.WriteBlob(t, cfg, repoPath, []byte("x")) + gittest.WriteRef(t, cfg, repoPath, "refs/tags/blob", blobID) + // We use `-d`, which also prunes objects that have been packed. + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad") + }, + expectedObjectsInfo: ObjectsInfo{ + PackedObjects: 1, + Packfiles: 1, + PackfilesSize: 1, + }, + }, + { + desc: "single pruneable blob", + setup: func(t *testing.T, repoPath string) { + blobID := gittest.WriteBlob(t, cfg, repoPath, []byte("x")) + gittest.WriteRef(t, cfg, repoPath, "refs/tags/blob", blobID) + // This time we don't use `-d`, so the object will exist both in + // loose and packed form. + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-a") + }, + expectedObjectsInfo: ObjectsInfo{ + LooseObjects: 1, + LooseObjectsSize: 4, + PackedObjects: 1, + Packfiles: 1, + PackfilesSize: 1, + PruneableObjects: 1, + }, + }, + { + desc: "garbage", + setup: func(t *testing.T, repoPath string) { + garbagePath := filepath.Join(repoPath, "objects", "pack", "garbage") + require.NoError(t, os.WriteFile(garbagePath, []byte("x"), 0o600)) + }, + expectedObjectsInfo: ObjectsInfo{ + Garbage: 1, + // git-count-objects(1) somehow does not count this file's size, + // which I've verified manually. + GarbageSize: 0, + }, + }, + { + desc: "alternates", + setup: func(t *testing.T, repoPath string) { + infoAlternatesPath := filepath.Join(repoPath, "objects", "info", "alternates") + require.NoError(t, os.WriteFile(infoAlternatesPath, []byte(alternatePath), 0o600)) + }, + expectedObjectsInfo: ObjectsInfo{ + Alternates: []string{ + alternatePath, + }, + }, + }, + { + desc: "all together", + setup: func(t *testing.T, repoPath string) { + infoAlternatesPath := filepath.Join(repoPath, "objects", "info", "alternates") + require.NoError(t, os.WriteFile(infoAlternatesPath, []byte(alternatePath), 0o600)) + + // We write a single packed blob. + blobID := gittest.WriteBlob(t, cfg, repoPath, []byte("x")) + gittest.WriteRef(t, cfg, repoPath, "refs/tags/blob", blobID) + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad") + + // And two loose ones. + gittest.WriteBlob(t, cfg, repoPath, []byte("1")) + gittest.WriteBlob(t, cfg, repoPath, []byte("2")) + + // And three garbage-files. This is done so we've got unique counts + // everywhere. + for _, file := range []string{"garbage1", "garbage2", "garbage3"} { + garbagePath := filepath.Join(repoPath, "objects", "pack", file) + require.NoError(t, os.WriteFile(garbagePath, []byte("x"), 0o600)) + } + }, + expectedObjectsInfo: ObjectsInfo{ + LooseObjects: 2, + LooseObjectsSize: 8, + PackedObjects: 1, + Packfiles: 1, + PackfilesSize: 1, + Garbage: 3, + Alternates: []string{ + alternatePath, + }, + }, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + }) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + tc.setup(t, repoPath) + + objectsInfo, err := ObjectsInfoForRepository(ctx, repo) + require.Equal(t, tc.expectedErr, err) + require.Equal(t, tc.expectedObjectsInfo, objectsInfo) + }) + } +} diff --git a/internal/git/stats/profile.go b/internal/git/stats/profile.go deleted file mode 100644 index 85c4015f8..000000000 --- a/internal/git/stats/profile.go +++ /dev/null @@ -1,151 +0,0 @@ -package stats - -import ( - "context" - "errors" - "io/fs" - "os" - "path/filepath" - "strconv" - "time" - - "gitlab.com/gitlab-org/gitaly/v15/internal/git" - "gitlab.com/gitlab-org/gitaly/v15/internal/git/repository" -) - -// HasBitmap returns whether or not the repository contains an object bitmap. -func HasBitmap(repoPath string) (bool, error) { - hasBitmap, err := hasBitmap(repoPath) - if err != nil { - return false, err - } - return hasBitmap, nil -} - -// PackfilesCount returns the number of packfiles a repository has. -func PackfilesCount(repoPath string) (int, error) { - packFiles, err := GetPackfiles(repoPath) - if err != nil { - return 0, err - } - - return len(packFiles), nil -} - -// GetPackfiles returns the FileInfo of packfiles inside a repository. -func GetPackfiles(repoPath string) ([]fs.DirEntry, error) { - files, err := os.ReadDir(filepath.Join(repoPath, "objects/pack/")) - if err != nil { - return nil, err - } - - var packFiles []fs.DirEntry - for _, f := range files { - if filepath.Ext(f.Name()) == ".pack" { - packFiles = append(packFiles, f) - } - } - - return packFiles, nil -} - -// UnpackedObjects returns the number of loose objects that have a timestamp later than the newest -// packfile. -func UnpackedObjects(repoPath string) (int64, error) { - unpackedObjects, err := getUnpackedObjects(repoPath) - if err != nil { - return 0, err - } - - return unpackedObjects, nil -} - -// LooseObjects returns the number of loose objects that are not in a packfile. -func LooseObjects(ctx context.Context, gitCmdFactory git.CommandFactory, repository repository.GitRepo) (int64, error) { - cmd, err := gitCmdFactory.New(ctx, repository, git.SubCmd{Name: "count-objects", Flags: []git.Option{git.Flag{Name: "--verbose"}}}) - if err != nil { - return 0, err - } - - objectStats, err := readObjectInfoStatistic(cmd) - if err != nil { - return 0, err - } - - count, ok := objectStats["count"].(int64) - if !ok { - return 0, errors.New("could not get object count") - } - - return count, nil -} - -func hasBitmap(repoPath string) (bool, error) { - bitmaps, err := filepath.Glob(filepath.Join(repoPath, "objects", "pack", "*.bitmap")) - if err != nil { - return false, err - } - - return len(bitmaps) > 0, nil -} - -func getUnpackedObjects(repoPath string) (int64, error) { - objectDir := filepath.Join(repoPath, "objects") - - packFiles, err := filepath.Glob(filepath.Join(objectDir, "pack", "*.pack")) - if err != nil { - return 0, err - } - - var newestPackfileModtime time.Time - - for _, packFilePath := range packFiles { - stat, err := os.Stat(packFilePath) - if err != nil { - return 0, err - } - if stat.ModTime().After(newestPackfileModtime) { - newestPackfileModtime = stat.ModTime() - } - } - - var unpackedObjects int64 - if err = filepath.Walk(objectDir, func(path string, info os.FileInfo, err error) error { - if objectDir == path { - return nil - } - - if info.IsDir() { - if err := skipNonObjectDir(objectDir, path); err != nil { - return err - } - } - - if !info.IsDir() && info.ModTime().After(newestPackfileModtime) { - unpackedObjects++ - } - - return nil - }); err != nil { - return 0, err - } - - return unpackedObjects, nil -} - -func skipNonObjectDir(root, path string) error { - rel, err := filepath.Rel(root, path) - if err != nil { - return err - } - - if len(rel) != 2 { - return filepath.SkipDir - } - - if _, err := strconv.ParseUint(rel, 16, 8); err != nil { - return filepath.SkipDir - } - - return nil -} diff --git a/internal/git/stats/profile_test.go b/internal/git/stats/profile_test.go deleted file mode 100644 index 9d1c5ecf3..000000000 --- a/internal/git/stats/profile_test.go +++ /dev/null @@ -1,86 +0,0 @@ -//go:build !gitaly_test_sha256 - -package stats - -import ( - "os" - "path/filepath" - "testing" - "time" - - "github.com/stretchr/testify/require" - "gitlab.com/gitlab-org/gitaly/v15/internal/git" - "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" - "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" - "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" -) - -func TestRepositoryProfile(t *testing.T) { - ctx := testhelper.Context(t) - cfg := testcfg.Build(t) - - testRepo, testRepoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ - SkipCreationViaService: true, - }) - - hasBitmap, err := HasBitmap(testRepoPath) - require.NoError(t, err) - require.False(t, hasBitmap, "repository should not have a bitmap initially") - unpackedObjects, err := UnpackedObjects(testRepoPath) - require.NoError(t, err) - require.Zero(t, unpackedObjects) - packfiles, err := GetPackfiles(testRepoPath) - require.NoError(t, err) - require.Empty(t, packfiles) - packfilesCount, err := PackfilesCount(testRepoPath) - require.NoError(t, err) - require.Zero(t, packfilesCount) - - blobs := 10 - blobIDs := gittest.WriteBlobs(t, cfg, testRepoPath, blobs) - - unpackedObjects, err = UnpackedObjects(testRepoPath) - require.NoError(t, err) - require.Equal(t, int64(blobs), unpackedObjects) - - gitCmdFactory := gittest.NewCommandFactory(t, cfg) - looseObjects, err := LooseObjects(ctx, gitCmdFactory, testRepo) - require.NoError(t, err) - require.Equal(t, int64(blobs), looseObjects) - - for _, blobID := range blobIDs { - commitID := gittest.WriteCommit(t, cfg, testRepoPath, - gittest.WithTreeEntries(gittest.TreeEntry{ - Mode: "100644", Path: "blob", OID: git.ObjectID(blobID), - }), - ) - gittest.Exec(t, cfg, "-C", testRepoPath, "update-ref", "refs/heads/"+blobID, commitID.String()) - } - - // write a loose object - gittest.WriteBlobs(t, cfg, testRepoPath, 1) - - gittest.Exec(t, cfg, "-C", testRepoPath, "repack", "-A", "-b", "-d") - - unpackedObjects, err = UnpackedObjects(testRepoPath) - require.NoError(t, err) - require.Zero(t, unpackedObjects) - looseObjects, err = LooseObjects(ctx, gitCmdFactory, testRepo) - require.NoError(t, err) - require.Equal(t, int64(1), looseObjects) - - // write another loose object - blobID := gittest.WriteBlobs(t, cfg, testRepoPath, 1)[0] - - // due to OS semantics, ensure that the blob has a timestamp that is after the packfile - theFuture := time.Now().Add(10 * time.Minute) - require.NoError(t, os.Chtimes(filepath.Join(testRepoPath, "objects", blobID[0:2], blobID[2:]), theFuture, theFuture)) - - unpackedObjects, err = UnpackedObjects(testRepoPath) - require.NoError(t, err) - require.Equal(t, int64(1), unpackedObjects) - - looseObjects, err = LooseObjects(ctx, gitCmdFactory, testRepo) - require.NoError(t, err) - require.Equal(t, int64(2), looseObjects) -} diff --git a/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go b/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go index bb4b28984..389505b76 100644 --- a/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go +++ b/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go @@ -20,6 +20,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/git/catfile" "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v15/internal/git/housekeeping" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/stats" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/transaction" "gitlab.com/gitlab-org/gitaly/v15/internal/metadata" @@ -251,26 +252,9 @@ func TestFetchIntoObjectPool_CollectLogStatistics(t *testing.T) { _, err = client.FetchIntoObjectPool(ctx, req) require.NoError(t, err) - const key = "count_objects" for _, logEntry := range hook.AllEntries() { - if stats, ok := logEntry.Data[key]; ok { - require.IsType(t, map[string]interface{}{}, stats) - - var keys []string - for key := range stats.(map[string]interface{}) { - keys = append(keys, key) - } - - require.ElementsMatch(t, []string{ - "count", - "garbage", - "in-pack", - "packs", - "prune-packable", - "size", - "size-garbage", - "size-pack", - }, keys) + if objectsInfo, ok := logEntry.Data["objects_info"]; ok { + require.IsType(t, stats.ObjectsInfo{}, objectsInfo) return } } diff --git a/internal/gitaly/service/repository/repack_test.go b/internal/gitaly/service/repository/repack_test.go index acb581305..53fd35bdd 100644 --- a/internal/gitaly/service/repository/repack_test.go +++ b/internal/gitaly/service/repository/repack_test.go @@ -222,16 +222,13 @@ func TestRepackFullCollectLogStatistics(t *testing.T) { func mustCountObjectLog(tb testing.TB, entries ...*logrus.Entry) { tb.Helper() - const key = "count_objects" + const key = "objects_info" for _, entry := range entries { - if entry.Message == "git repo statistic" { + if entry.Message == "repository objects info" { require.Contains(tb, entry.Data, "grpc.request.glProjectPath") require.Contains(tb, entry.Data, "grpc.request.glRepository") - require.Contains(tb, entry.Data, key, "statistics not found") - - objectStats, ok := entry.Data[key].(map[string]interface{}) - require.True(tb, ok, "expected count_objects to be a map") - require.Contains(tb, objectStats, "count") + require.Contains(tb, entry.Data, key, "objects info not found") + require.IsType(tb, stats.ObjectsInfo{}, entry.Data[key]) return } } |