Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-10-21 14:42:36 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-10-21 14:42:36 +0300
commit96269fad786badf28b80b74e66457ae1a1a95210 (patch)
tree9ee92e90c570fd8dd6247d7c4b4b6a3882f2bc37
parentc3a57eaa734283ea3c39f7774e5e3550f24891aa (diff)
parentab42b0455b2417696c296c218c044e7fe7b2095d (diff)
Merge branch 'pks-git-stats-structured-objects-info' into 'master'
git/stats: Refactor code parsing git-count-objects(1) to use struct See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/4956 Merged-by: Patrick Steinhardt <psteinhardt@gitlab.com> Approved-by: karthik nayak <knayak@gitlab.com> Approved-by: Toon Claes <toon@gitlab.com>
-rw-r--r--internal/git/housekeeping/objects_test.go9
-rw-r--r--internal/git/stats/git.go115
-rw-r--r--internal/git/stats/git_test.go90
-rw-r--r--internal/git/stats/objects_info.go166
-rw-r--r--internal/git/stats/objects_info_test.go285
-rw-r--r--internal/git/stats/profile.go151
-rw-r--r--internal/git/stats/profile_test.go86
-rw-r--r--internal/gitaly/service/objectpool/fetch_into_object_pool_test.go22
-rw-r--r--internal/gitaly/service/repository/repack_test.go11
9 files changed, 463 insertions, 472 deletions
diff --git a/internal/git/housekeeping/objects_test.go b/internal/git/housekeeping/objects_test.go
index e5194f1fa..5f6b0ec07 100644
--- a/internal/git/housekeeping/objects_test.go
+++ b/internal/git/housekeeping/objects_test.go
@@ -3,6 +3,7 @@
package housekeeping
import (
+ "context"
"path/filepath"
"testing"
@@ -14,10 +15,10 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg"
)
-func requireObjectCount(t *testing.T, repoPath string, expectedObjects int64) {
+func requireObjectCount(t *testing.T, ctx context.Context, repo *localrepo.Repo, expectedObjects uint64) {
t.Helper()
- objects, err := stats.UnpackedObjects(repoPath)
+ objects, err := stats.LooseObjects(ctx, repo)
require.NoError(t, err)
require.Equal(t, expectedObjects, objects)
}
@@ -44,12 +45,12 @@ func TestRepackObjects(t *testing.T) {
gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main"))
- requireObjectCount(t, repoPath, 1)
+ requireObjectCount(t, ctx, repo, 1)
requirePackfileCount(t, repoPath, 0)
require.NoError(t, RepackObjects(ctx, repo, RepackObjectsConfig{}))
- requireObjectCount(t, repoPath, 0)
+ requireObjectCount(t, ctx, repo, 0)
requirePackfileCount(t, repoPath, 1)
require.NoFileExists(t, filepath.Join(repoPath, "info", "refs"))
diff --git a/internal/git/stats/git.go b/internal/git/stats/git.go
deleted file mode 100644
index ce9794c72..000000000
--- a/internal/git/stats/git.go
+++ /dev/null
@@ -1,115 +0,0 @@
-package stats
-
-import (
- "bufio"
- "context"
- "io"
- "strconv"
- "strings"
-
- "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
- "gitlab.com/gitlab-org/gitaly/v15/internal/git"
-)
-
-// LogObjectsInfo read statistics of the git repo objects
-// and logs it under 'count-objects' key as structured entry.
-func LogObjectsInfo(ctx context.Context, repo git.RepositoryExecutor) {
- logger := ctxlogrus.Extract(ctx)
-
- cmd, err := repo.Exec(ctx, git.SubCmd{
- Name: "count-objects",
- Flags: []git.Option{git.Flag{Name: "--verbose"}},
- })
- if err != nil {
- logger.WithError(err).Warn("failed on bootstrapping to gather object statistic")
- return
- }
-
- stats, err := readObjectInfoStatistic(cmd)
- if err != nil {
- logger.WithError(err).Warn("failed on reading to gather object statistic")
- }
-
- if err := cmd.Wait(); err != nil {
- logger.WithError(err).Warn("failed on waiting to gather object statistic")
- return
- }
-
- if len(stats) > 0 {
- logger.WithField("count_objects", stats).Info("git repo statistic")
- }
-}
-
-/*
- readObjectInfoStatistic parses output of 'git count-objects -v' command and represents it as dictionary
-
-current supported format is:
-
- count: 12
- packs: 2
- size-garbage: 934
- alternate: /some/path/to/.git/objects
- alternate: "/some/other path/to/.git/objects"
-
-will result in:
-
- {
- "count": 12,
- "packs": 2,
- "size-garbage": 934,
- "alternate": ["/some/path/to/.git/objects", "/some/other path/to/.git/objects"]
- }
-*/
-func readObjectInfoStatistic(reader io.Reader) (map[string]interface{}, error) {
- stats := map[string]interface{}{}
-
- scanner := bufio.NewScanner(reader)
- for scanner.Scan() {
- line := scanner.Text()
- parts := strings.SplitN(line, ": ", 2)
- if len(parts) != 2 {
- continue
- }
-
- // one of: count, size, in-pack, packs, size-pack, prune-packable, garbage, size-garbage, alternate (repeatable)
- key := parts[0]
- rawVal := strings.TrimPrefix(parts[1], ": ")
-
- switch key {
- case "alternate":
- addMultiString(stats, key, rawVal)
- default:
- addInt(stats, key, rawVal)
- }
- }
-
- return stats, scanner.Err()
-}
-
-func addMultiString(stats map[string]interface{}, key, rawVal string) {
- val := strings.Trim(rawVal, "\" \t\n")
-
- statVal, found := stats[key]
- if !found {
- stats[key] = val
- return
- }
-
- statAggr, ok := statVal.([]string) // 'alternate' is only repeatable key and it is a string type
- if ok {
- statAggr = append(statAggr, val)
- } else {
- delete(stats, key) // remove single string value of 'alternate' to replace it with slice
- statAggr = []string{statVal.(string), val}
- }
- stats[key] = statAggr
-}
-
-func addInt(stats map[string]interface{}, key, rawVal string) {
- val, err := strconv.ParseInt(rawVal, 10, 64)
- if err != nil {
- return
- }
-
- stats[key] = val
-}
diff --git a/internal/git/stats/git_test.go b/internal/git/stats/git_test.go
deleted file mode 100644
index a07e82471..000000000
--- a/internal/git/stats/git_test.go
+++ /dev/null
@@ -1,90 +0,0 @@
-//go:build !gitaly_test_sha256
-
-package stats
-
-import (
- "os"
- "path/filepath"
- "strings"
- "testing"
-
- "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
- "github.com/sirupsen/logrus"
- "github.com/sirupsen/logrus/hooks/test"
- "github.com/stretchr/testify/require"
- "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest"
- "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
- "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config"
- "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
- "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg"
- "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb"
-)
-
-func TestLogObjectInfo(t *testing.T) {
- ctx := testhelper.Context(t)
- cfg := testcfg.Build(t)
-
- repo1, repoPath1 := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{
- SkipCreationViaService: true,
- Seed: gittest.SeedGitLabTest,
- })
- repo2, repoPath2 := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{
- SkipCreationViaService: true,
- Seed: gittest.SeedGitLabTest,
- })
-
- requireLog := func(entries []*logrus.Entry) map[string]interface{} {
- for _, entry := range entries {
- if entry.Message == "git repo statistic" {
- const key = "count_objects"
- data := entry.Data[key]
- require.NotNil(t, data, "there is no any information about statistics")
- countObjects, ok := data.(map[string]interface{})
- require.True(t, ok)
- require.Contains(t, countObjects, "count")
- require.Contains(t, countObjects, "size")
- require.Contains(t, countObjects, "in-pack")
- require.Contains(t, countObjects, "packs")
- require.Contains(t, countObjects, "size-pack")
- require.Contains(t, countObjects, "garbage")
- require.Contains(t, countObjects, "size-garbage")
- return countObjects
- }
- }
- return nil
- }
-
- t.Run("shared repo with multiple alternates", func(t *testing.T) {
- locator := config.NewLocator(cfg)
- storagePath, err := locator.GetStorageByName(repo1.GetStorageName())
- require.NoError(t, err)
-
- tmpDir, err := os.MkdirTemp(storagePath, "")
- require.NoError(t, err)
- defer func() { require.NoError(t, os.RemoveAll(tmpDir)) }()
-
- // clone existing local repo with two alternates
- gittest.Exec(t, cfg, "clone", "--shared", repoPath1, "--reference", repoPath1, "--reference", repoPath2, tmpDir)
-
- logger, hook := test.NewNullLogger()
- testCtx := ctxlogrus.ToContext(ctx, logger.WithField("test", "logging"))
-
- LogObjectsInfo(testCtx, localrepo.NewTestRepo(t, cfg, &gitalypb.Repository{
- StorageName: repo1.StorageName,
- RelativePath: filepath.Join(strings.TrimPrefix(tmpDir, storagePath), ".git"),
- }))
-
- countObjects := requireLog(hook.AllEntries())
- require.ElementsMatch(t, []string{repoPath1 + "/objects", repoPath2 + "/objects"}, countObjects["alternate"])
- })
-
- t.Run("repo without alternates", func(t *testing.T) {
- logger, hook := test.NewNullLogger()
- testCtx := ctxlogrus.ToContext(ctx, logger.WithField("test", "logging"))
-
- LogObjectsInfo(testCtx, localrepo.NewTestRepo(t, cfg, repo2))
-
- countObjects := requireLog(hook.AllEntries())
- require.Contains(t, countObjects, "prune-packable")
- })
-}
diff --git a/internal/git/stats/objects_info.go b/internal/git/stats/objects_info.go
new file mode 100644
index 000000000..5ef1c9289
--- /dev/null
+++ b/internal/git/stats/objects_info.go
@@ -0,0 +1,166 @@
+package stats
+
+import (
+ "bufio"
+ "context"
+ "fmt"
+ "io/fs"
+ "os"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git"
+)
+
+// HasBitmap returns whether or not the repository contains an object bitmap.
+func HasBitmap(repoPath string) (bool, error) {
+ bitmaps, err := filepath.Glob(filepath.Join(repoPath, "objects", "pack", "*.bitmap"))
+ if err != nil {
+ return false, err
+ }
+
+ return len(bitmaps) > 0, nil
+}
+
+// PackfilesCount returns the number of packfiles a repository has.
+func PackfilesCount(repoPath string) (int, error) {
+ packFiles, err := GetPackfiles(repoPath)
+ if err != nil {
+ return 0, err
+ }
+
+ return len(packFiles), nil
+}
+
+// GetPackfiles returns the FileInfo of packfiles inside a repository.
+func GetPackfiles(repoPath string) ([]fs.DirEntry, error) {
+ files, err := os.ReadDir(filepath.Join(repoPath, "objects/pack/"))
+ if err != nil {
+ return nil, err
+ }
+
+ var packFiles []fs.DirEntry
+ for _, f := range files {
+ if filepath.Ext(f.Name()) == ".pack" {
+ packFiles = append(packFiles, f)
+ }
+ }
+
+ return packFiles, nil
+}
+
+// LooseObjects returns the number of loose objects that are not in a packfile.
+func LooseObjects(ctx context.Context, repo git.RepositoryExecutor) (uint64, error) {
+ objectsInfo, err := ObjectsInfoForRepository(ctx, repo)
+ if err != nil {
+ return 0, err
+ }
+
+ return objectsInfo.LooseObjects, nil
+}
+
+// LogObjectsInfo read statistics of the git repo objects
+// and logs it under 'objects_info' key as structured entry.
+func LogObjectsInfo(ctx context.Context, repo git.RepositoryExecutor) {
+ logger := ctxlogrus.Extract(ctx)
+
+ objectsInfo, err := ObjectsInfoForRepository(ctx, repo)
+ if err != nil {
+ logger.WithError(err).Warn("failed reading objects info")
+ } else {
+ logger.WithField("objects_info", objectsInfo).Info("repository objects info")
+ }
+}
+
+// ObjectsInfo contains information on the object database.
+type ObjectsInfo struct {
+ // LooseObjects is the count of loose objects.
+ LooseObjects uint64 `json:"loose_objects"`
+ // LooseObjectsSize is the accumulated on-disk size of all loose objects in KiB.
+ LooseObjectsSize uint64 `json:"loose_objects_size"`
+ // PackedObjects is the count of packed objects.
+ PackedObjects uint64 `json:"packed_objects"`
+ // Packfiles is the number of packfiles.
+ Packfiles uint64 `json:"packfiles"`
+ // PackfilesSize is the accumulated on-disk size of all packfiles in KiB.
+ PackfilesSize uint64 `json:"packfiles_size"`
+ // PrunableObjects is the number of objects that exist both as loose and as packed objects.
+ // The loose objects may be pruned in that case.
+ PruneableObjects uint64 `json:"prunable_objects"`
+ // Garbage is the count of files in the object database that are neither a valid loose
+ // object nor a valid packfile.
+ Garbage uint64 `json:"garbage"`
+ // GarbageSize is the accumulated on-disk size of garbage files.
+ GarbageSize uint64 `json:"garbage_size"`
+ // Alternates is the list of absolute paths of alternate object databases this repository is
+ // connected to.
+ Alternates []string `json:"alternates"`
+}
+
+// ObjectsInfoForRepository computes the ObjectsInfo for a repository.
+func ObjectsInfoForRepository(ctx context.Context, repo git.RepositoryExecutor) (ObjectsInfo, error) {
+ countObjects, err := repo.Exec(ctx, git.SubCmd{
+ Name: "count-objects",
+ Flags: []git.Option{git.Flag{Name: "--verbose"}},
+ })
+ if err != nil {
+ return ObjectsInfo{}, fmt.Errorf("running git-count-objects: %w", err)
+ }
+
+ var info ObjectsInfo
+
+ // The expected format is:
+ //
+ // count: 12
+ // packs: 2
+ // size-garbage: 934
+ // alternate: /some/path/to/.git/objects
+ // alternate: "/some/other path/to/.git/objects"
+ scanner := bufio.NewScanner(countObjects)
+ for scanner.Scan() {
+ line := scanner.Text()
+
+ parts := strings.SplitN(line, ": ", 2)
+ if len(parts) != 2 {
+ continue
+ }
+
+ var err error
+ switch parts[0] {
+ case "count":
+ info.LooseObjects, err = strconv.ParseUint(parts[1], 10, 64)
+ case "size":
+ info.LooseObjectsSize, err = strconv.ParseUint(parts[1], 10, 64)
+ case "in-pack":
+ info.PackedObjects, err = strconv.ParseUint(parts[1], 10, 64)
+ case "packs":
+ info.Packfiles, err = strconv.ParseUint(parts[1], 10, 64)
+ case "size-pack":
+ info.PackfilesSize, err = strconv.ParseUint(parts[1], 10, 64)
+ case "prune-packable":
+ info.PruneableObjects, err = strconv.ParseUint(parts[1], 10, 64)
+ case "garbage":
+ info.Garbage, err = strconv.ParseUint(parts[1], 10, 64)
+ case "size-garbage":
+ info.GarbageSize, err = strconv.ParseUint(parts[1], 10, 64)
+ case "alternate":
+ info.Alternates = append(info.Alternates, strings.Trim(parts[1], "\" \t\n"))
+ }
+
+ if err != nil {
+ return ObjectsInfo{}, fmt.Errorf("parsing %q: %w", parts[0], err)
+ }
+ }
+
+ if err := scanner.Err(); err != nil {
+ return ObjectsInfo{}, fmt.Errorf("scanning object info: %w", err)
+ }
+
+ if err := countObjects.Wait(); err != nil {
+ return ObjectsInfo{}, fmt.Errorf("counting objects: %w", err)
+ }
+
+ return info, nil
+}
diff --git a/internal/git/stats/objects_info_test.go b/internal/git/stats/objects_info_test.go
new file mode 100644
index 000000000..c77c61c61
--- /dev/null
+++ b/internal/git/stats/objects_info_test.go
@@ -0,0 +1,285 @@
+//go:build !gitaly_test_sha256
+
+package stats
+
+import (
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
+ "github.com/sirupsen/logrus"
+ "github.com/sirupsen/logrus/hooks/test"
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg"
+ "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb"
+)
+
+func TestRepositoryProfile(t *testing.T) {
+ ctx := testhelper.Context(t)
+ cfg := testcfg.Build(t)
+
+ repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ })
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ hasBitmap, err := HasBitmap(repoPath)
+ require.NoError(t, err)
+ require.False(t, hasBitmap, "repository should not have a bitmap initially")
+ packfiles, err := GetPackfiles(repoPath)
+ require.NoError(t, err)
+ require.Empty(t, packfiles)
+ packfilesCount, err := PackfilesCount(repoPath)
+ require.NoError(t, err)
+ require.Zero(t, packfilesCount)
+
+ blobs := 10
+ blobIDs := gittest.WriteBlobs(t, cfg, repoPath, blobs)
+
+ looseObjects, err := LooseObjects(ctx, repo)
+ require.NoError(t, err)
+ require.Equal(t, uint64(blobs), looseObjects)
+
+ for _, blobID := range blobIDs {
+ commitID := gittest.WriteCommit(t, cfg, repoPath,
+ gittest.WithTreeEntries(gittest.TreeEntry{
+ Mode: "100644", Path: "blob", OID: git.ObjectID(blobID),
+ }),
+ )
+ gittest.Exec(t, cfg, "-C", repoPath, "update-ref", "refs/heads/"+blobID, commitID.String())
+ }
+
+ // write a loose object
+ gittest.WriteBlobs(t, cfg, repoPath, 1)
+
+ gittest.Exec(t, cfg, "-C", repoPath, "repack", "-A", "-b", "-d")
+
+ looseObjects, err = LooseObjects(ctx, repo)
+ require.NoError(t, err)
+ require.Equal(t, uint64(1), looseObjects)
+
+ // write another loose object
+ blobID := gittest.WriteBlobs(t, cfg, repoPath, 1)[0]
+
+ // due to OS semantics, ensure that the blob has a timestamp that is after the packfile
+ theFuture := time.Now().Add(10 * time.Minute)
+ require.NoError(t, os.Chtimes(filepath.Join(repoPath, "objects", blobID[0:2], blobID[2:]), theFuture, theFuture))
+
+ looseObjects, err = LooseObjects(ctx, repo)
+ require.NoError(t, err)
+ require.Equal(t, uint64(2), looseObjects)
+}
+
+func TestLogObjectInfo(t *testing.T) {
+ ctx := testhelper.Context(t)
+ cfg := testcfg.Build(t)
+
+ repo1, repoPath1 := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ Seed: gittest.SeedGitLabTest,
+ })
+ repo2, repoPath2 := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ Seed: gittest.SeedGitLabTest,
+ })
+
+ requireObjectsInfo := func(entries []*logrus.Entry) ObjectsInfo {
+ for _, entry := range entries {
+ if entry.Message == "repository objects info" {
+ objectsInfo, ok := entry.Data["objects_info"]
+ require.True(t, ok)
+ require.IsType(t, ObjectsInfo{}, objectsInfo)
+ return objectsInfo.(ObjectsInfo)
+ }
+ }
+
+ require.FailNow(t, "no objects info log entry found")
+ return ObjectsInfo{}
+ }
+
+ t.Run("shared repo with multiple alternates", func(t *testing.T) {
+ locator := config.NewLocator(cfg)
+ storagePath, err := locator.GetStorageByName(repo1.GetStorageName())
+ require.NoError(t, err)
+
+ tmpDir, err := os.MkdirTemp(storagePath, "")
+ require.NoError(t, err)
+ defer func() { require.NoError(t, os.RemoveAll(tmpDir)) }()
+
+ // clone existing local repo with two alternates
+ gittest.Exec(t, cfg, "clone", "--shared", repoPath1, "--reference", repoPath1, "--reference", repoPath2, tmpDir)
+
+ logger, hook := test.NewNullLogger()
+ testCtx := ctxlogrus.ToContext(ctx, logger.WithField("test", "logging"))
+
+ LogObjectsInfo(testCtx, localrepo.NewTestRepo(t, cfg, &gitalypb.Repository{
+ StorageName: repo1.StorageName,
+ RelativePath: filepath.Join(strings.TrimPrefix(tmpDir, storagePath), ".git"),
+ }))
+
+ objectsInfo := requireObjectsInfo(hook.AllEntries())
+ require.Equal(t, []string{repoPath1 + "/objects", repoPath2 + "/objects"}, objectsInfo.Alternates)
+ })
+
+ t.Run("repo without alternates", func(t *testing.T) {
+ logger, hook := test.NewNullLogger()
+ testCtx := ctxlogrus.ToContext(ctx, logger.WithField("test", "logging"))
+
+ LogObjectsInfo(testCtx, localrepo.NewTestRepo(t, cfg, repo2))
+
+ objectsInfo := requireObjectsInfo(hook.AllEntries())
+ require.NotZero(t, objectsInfo.LooseObjects)
+ require.NotZero(t, objectsInfo.LooseObjectsSize)
+ require.NotZero(t, objectsInfo.PackedObjects)
+ require.NotZero(t, objectsInfo.Packfiles)
+ require.NotZero(t, objectsInfo.PackfilesSize)
+ require.Nil(t, objectsInfo.Alternates)
+ })
+}
+
+func TestObjectsInfoForRepository(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+ cfg := testcfg.Build(t)
+
+ _, alternatePath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ })
+ alternatePath = filepath.Join(alternatePath, "objects")
+
+ for _, tc := range []struct {
+ desc string
+ setup func(t *testing.T, repoPath string)
+ expectedErr error
+ expectedObjectsInfo ObjectsInfo
+ }{
+ {
+ desc: "empty repository",
+ setup: func(*testing.T, string) {
+ },
+ },
+ {
+ desc: "single blob",
+ setup: func(t *testing.T, repoPath string) {
+ gittest.WriteBlob(t, cfg, repoPath, []byte("x"))
+ },
+ expectedObjectsInfo: ObjectsInfo{
+ LooseObjects: 1,
+ LooseObjectsSize: 4,
+ },
+ },
+ {
+ desc: "single packed blob",
+ setup: func(t *testing.T, repoPath string) {
+ blobID := gittest.WriteBlob(t, cfg, repoPath, []byte("x"))
+ gittest.WriteRef(t, cfg, repoPath, "refs/tags/blob", blobID)
+ // We use `-d`, which also prunes objects that have been packed.
+ gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad")
+ },
+ expectedObjectsInfo: ObjectsInfo{
+ PackedObjects: 1,
+ Packfiles: 1,
+ PackfilesSize: 1,
+ },
+ },
+ {
+ desc: "single pruneable blob",
+ setup: func(t *testing.T, repoPath string) {
+ blobID := gittest.WriteBlob(t, cfg, repoPath, []byte("x"))
+ gittest.WriteRef(t, cfg, repoPath, "refs/tags/blob", blobID)
+ // This time we don't use `-d`, so the object will exist both in
+ // loose and packed form.
+ gittest.Exec(t, cfg, "-C", repoPath, "repack", "-a")
+ },
+ expectedObjectsInfo: ObjectsInfo{
+ LooseObjects: 1,
+ LooseObjectsSize: 4,
+ PackedObjects: 1,
+ Packfiles: 1,
+ PackfilesSize: 1,
+ PruneableObjects: 1,
+ },
+ },
+ {
+ desc: "garbage",
+ setup: func(t *testing.T, repoPath string) {
+ garbagePath := filepath.Join(repoPath, "objects", "pack", "garbage")
+ require.NoError(t, os.WriteFile(garbagePath, []byte("x"), 0o600))
+ },
+ expectedObjectsInfo: ObjectsInfo{
+ Garbage: 1,
+ // git-count-objects(1) somehow does not count this file's size,
+ // which I've verified manually.
+ GarbageSize: 0,
+ },
+ },
+ {
+ desc: "alternates",
+ setup: func(t *testing.T, repoPath string) {
+ infoAlternatesPath := filepath.Join(repoPath, "objects", "info", "alternates")
+ require.NoError(t, os.WriteFile(infoAlternatesPath, []byte(alternatePath), 0o600))
+ },
+ expectedObjectsInfo: ObjectsInfo{
+ Alternates: []string{
+ alternatePath,
+ },
+ },
+ },
+ {
+ desc: "all together",
+ setup: func(t *testing.T, repoPath string) {
+ infoAlternatesPath := filepath.Join(repoPath, "objects", "info", "alternates")
+ require.NoError(t, os.WriteFile(infoAlternatesPath, []byte(alternatePath), 0o600))
+
+ // We write a single packed blob.
+ blobID := gittest.WriteBlob(t, cfg, repoPath, []byte("x"))
+ gittest.WriteRef(t, cfg, repoPath, "refs/tags/blob", blobID)
+ gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad")
+
+ // And two loose ones.
+ gittest.WriteBlob(t, cfg, repoPath, []byte("1"))
+ gittest.WriteBlob(t, cfg, repoPath, []byte("2"))
+
+ // And three garbage-files. This is done so we've got unique counts
+ // everywhere.
+ for _, file := range []string{"garbage1", "garbage2", "garbage3"} {
+ garbagePath := filepath.Join(repoPath, "objects", "pack", file)
+ require.NoError(t, os.WriteFile(garbagePath, []byte("x"), 0o600))
+ }
+ },
+ expectedObjectsInfo: ObjectsInfo{
+ LooseObjects: 2,
+ LooseObjectsSize: 8,
+ PackedObjects: 1,
+ Packfiles: 1,
+ PackfilesSize: 1,
+ Garbage: 3,
+ Alternates: []string{
+ alternatePath,
+ },
+ },
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ })
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ tc.setup(t, repoPath)
+
+ objectsInfo, err := ObjectsInfoForRepository(ctx, repo)
+ require.Equal(t, tc.expectedErr, err)
+ require.Equal(t, tc.expectedObjectsInfo, objectsInfo)
+ })
+ }
+}
diff --git a/internal/git/stats/profile.go b/internal/git/stats/profile.go
deleted file mode 100644
index 85c4015f8..000000000
--- a/internal/git/stats/profile.go
+++ /dev/null
@@ -1,151 +0,0 @@
-package stats
-
-import (
- "context"
- "errors"
- "io/fs"
- "os"
- "path/filepath"
- "strconv"
- "time"
-
- "gitlab.com/gitlab-org/gitaly/v15/internal/git"
- "gitlab.com/gitlab-org/gitaly/v15/internal/git/repository"
-)
-
-// HasBitmap returns whether or not the repository contains an object bitmap.
-func HasBitmap(repoPath string) (bool, error) {
- hasBitmap, err := hasBitmap(repoPath)
- if err != nil {
- return false, err
- }
- return hasBitmap, nil
-}
-
-// PackfilesCount returns the number of packfiles a repository has.
-func PackfilesCount(repoPath string) (int, error) {
- packFiles, err := GetPackfiles(repoPath)
- if err != nil {
- return 0, err
- }
-
- return len(packFiles), nil
-}
-
-// GetPackfiles returns the FileInfo of packfiles inside a repository.
-func GetPackfiles(repoPath string) ([]fs.DirEntry, error) {
- files, err := os.ReadDir(filepath.Join(repoPath, "objects/pack/"))
- if err != nil {
- return nil, err
- }
-
- var packFiles []fs.DirEntry
- for _, f := range files {
- if filepath.Ext(f.Name()) == ".pack" {
- packFiles = append(packFiles, f)
- }
- }
-
- return packFiles, nil
-}
-
-// UnpackedObjects returns the number of loose objects that have a timestamp later than the newest
-// packfile.
-func UnpackedObjects(repoPath string) (int64, error) {
- unpackedObjects, err := getUnpackedObjects(repoPath)
- if err != nil {
- return 0, err
- }
-
- return unpackedObjects, nil
-}
-
-// LooseObjects returns the number of loose objects that are not in a packfile.
-func LooseObjects(ctx context.Context, gitCmdFactory git.CommandFactory, repository repository.GitRepo) (int64, error) {
- cmd, err := gitCmdFactory.New(ctx, repository, git.SubCmd{Name: "count-objects", Flags: []git.Option{git.Flag{Name: "--verbose"}}})
- if err != nil {
- return 0, err
- }
-
- objectStats, err := readObjectInfoStatistic(cmd)
- if err != nil {
- return 0, err
- }
-
- count, ok := objectStats["count"].(int64)
- if !ok {
- return 0, errors.New("could not get object count")
- }
-
- return count, nil
-}
-
-func hasBitmap(repoPath string) (bool, error) {
- bitmaps, err := filepath.Glob(filepath.Join(repoPath, "objects", "pack", "*.bitmap"))
- if err != nil {
- return false, err
- }
-
- return len(bitmaps) > 0, nil
-}
-
-func getUnpackedObjects(repoPath string) (int64, error) {
- objectDir := filepath.Join(repoPath, "objects")
-
- packFiles, err := filepath.Glob(filepath.Join(objectDir, "pack", "*.pack"))
- if err != nil {
- return 0, err
- }
-
- var newestPackfileModtime time.Time
-
- for _, packFilePath := range packFiles {
- stat, err := os.Stat(packFilePath)
- if err != nil {
- return 0, err
- }
- if stat.ModTime().After(newestPackfileModtime) {
- newestPackfileModtime = stat.ModTime()
- }
- }
-
- var unpackedObjects int64
- if err = filepath.Walk(objectDir, func(path string, info os.FileInfo, err error) error {
- if objectDir == path {
- return nil
- }
-
- if info.IsDir() {
- if err := skipNonObjectDir(objectDir, path); err != nil {
- return err
- }
- }
-
- if !info.IsDir() && info.ModTime().After(newestPackfileModtime) {
- unpackedObjects++
- }
-
- return nil
- }); err != nil {
- return 0, err
- }
-
- return unpackedObjects, nil
-}
-
-func skipNonObjectDir(root, path string) error {
- rel, err := filepath.Rel(root, path)
- if err != nil {
- return err
- }
-
- if len(rel) != 2 {
- return filepath.SkipDir
- }
-
- if _, err := strconv.ParseUint(rel, 16, 8); err != nil {
- return filepath.SkipDir
- }
-
- return nil
-}
diff --git a/internal/git/stats/profile_test.go b/internal/git/stats/profile_test.go
deleted file mode 100644
index 9d1c5ecf3..000000000
--- a/internal/git/stats/profile_test.go
+++ /dev/null
@@ -1,86 +0,0 @@
-//go:build !gitaly_test_sha256
-
-package stats
-
-import (
- "os"
- "path/filepath"
- "testing"
- "time"
-
- "github.com/stretchr/testify/require"
- "gitlab.com/gitlab-org/gitaly/v15/internal/git"
- "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest"
- "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
- "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg"
-)
-
-func TestRepositoryProfile(t *testing.T) {
- ctx := testhelper.Context(t)
- cfg := testcfg.Build(t)
-
- testRepo, testRepoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{
- SkipCreationViaService: true,
- })
-
- hasBitmap, err := HasBitmap(testRepoPath)
- require.NoError(t, err)
- require.False(t, hasBitmap, "repository should not have a bitmap initially")
- unpackedObjects, err := UnpackedObjects(testRepoPath)
- require.NoError(t, err)
- require.Zero(t, unpackedObjects)
- packfiles, err := GetPackfiles(testRepoPath)
- require.NoError(t, err)
- require.Empty(t, packfiles)
- packfilesCount, err := PackfilesCount(testRepoPath)
- require.NoError(t, err)
- require.Zero(t, packfilesCount)
-
- blobs := 10
- blobIDs := gittest.WriteBlobs(t, cfg, testRepoPath, blobs)
-
- unpackedObjects, err = UnpackedObjects(testRepoPath)
- require.NoError(t, err)
- require.Equal(t, int64(blobs), unpackedObjects)
-
- gitCmdFactory := gittest.NewCommandFactory(t, cfg)
- looseObjects, err := LooseObjects(ctx, gitCmdFactory, testRepo)
- require.NoError(t, err)
- require.Equal(t, int64(blobs), looseObjects)
-
- for _, blobID := range blobIDs {
- commitID := gittest.WriteCommit(t, cfg, testRepoPath,
- gittest.WithTreeEntries(gittest.TreeEntry{
- Mode: "100644", Path: "blob", OID: git.ObjectID(blobID),
- }),
- )
- gittest.Exec(t, cfg, "-C", testRepoPath, "update-ref", "refs/heads/"+blobID, commitID.String())
- }
-
- // write a loose object
- gittest.WriteBlobs(t, cfg, testRepoPath, 1)
-
- gittest.Exec(t, cfg, "-C", testRepoPath, "repack", "-A", "-b", "-d")
-
- unpackedObjects, err = UnpackedObjects(testRepoPath)
- require.NoError(t, err)
- require.Zero(t, unpackedObjects)
- looseObjects, err = LooseObjects(ctx, gitCmdFactory, testRepo)
- require.NoError(t, err)
- require.Equal(t, int64(1), looseObjects)
-
- // write another loose object
- blobID := gittest.WriteBlobs(t, cfg, testRepoPath, 1)[0]
-
- // due to OS semantics, ensure that the blob has a timestamp that is after the packfile
- theFuture := time.Now().Add(10 * time.Minute)
- require.NoError(t, os.Chtimes(filepath.Join(testRepoPath, "objects", blobID[0:2], blobID[2:]), theFuture, theFuture))
-
- unpackedObjects, err = UnpackedObjects(testRepoPath)
- require.NoError(t, err)
- require.Equal(t, int64(1), unpackedObjects)
-
- looseObjects, err = LooseObjects(ctx, gitCmdFactory, testRepo)
- require.NoError(t, err)
- require.Equal(t, int64(2), looseObjects)
-}
diff --git a/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go b/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go
index bb4b28984..389505b76 100644
--- a/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go
+++ b/internal/gitaly/service/objectpool/fetch_into_object_pool_test.go
@@ -20,6 +20,7 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/git/catfile"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/housekeeping"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/stats"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/transaction"
"gitlab.com/gitlab-org/gitaly/v15/internal/metadata"
@@ -251,26 +252,9 @@ func TestFetchIntoObjectPool_CollectLogStatistics(t *testing.T) {
_, err = client.FetchIntoObjectPool(ctx, req)
require.NoError(t, err)
- const key = "count_objects"
for _, logEntry := range hook.AllEntries() {
- if stats, ok := logEntry.Data[key]; ok {
- require.IsType(t, map[string]interface{}{}, stats)
-
- var keys []string
- for key := range stats.(map[string]interface{}) {
- keys = append(keys, key)
- }
-
- require.ElementsMatch(t, []string{
- "count",
- "garbage",
- "in-pack",
- "packs",
- "prune-packable",
- "size",
- "size-garbage",
- "size-pack",
- }, keys)
+ if objectsInfo, ok := logEntry.Data["objects_info"]; ok {
+ require.IsType(t, stats.ObjectsInfo{}, objectsInfo)
return
}
}
diff --git a/internal/gitaly/service/repository/repack_test.go b/internal/gitaly/service/repository/repack_test.go
index acb581305..53fd35bdd 100644
--- a/internal/gitaly/service/repository/repack_test.go
+++ b/internal/gitaly/service/repository/repack_test.go
@@ -222,16 +222,13 @@ func TestRepackFullCollectLogStatistics(t *testing.T) {
func mustCountObjectLog(tb testing.TB, entries ...*logrus.Entry) {
tb.Helper()
- const key = "count_objects"
+ const key = "objects_info"
for _, entry := range entries {
- if entry.Message == "git repo statistic" {
+ if entry.Message == "repository objects info" {
require.Contains(tb, entry.Data, "grpc.request.glProjectPath")
require.Contains(tb, entry.Data, "grpc.request.glRepository")
- require.Contains(tb, entry.Data, key, "statistics not found")
-
- objectStats, ok := entry.Data[key].(map[string]interface{})
- require.True(tb, ok, "expected count_objects to be a map")
- require.Contains(tb, objectStats, "count")
+ require.Contains(tb, entry.Data, key, "objects info not found")
+ require.IsType(tb, stats.ObjectsInfo{}, entry.Data[key])
return
}
}