diff options
author | Quang-Minh Nguyen <qmnguyen@gitlab.com> | 2022-12-02 07:09:44 +0300 |
---|---|---|
committer | Quang-Minh Nguyen <qmnguyen@gitlab.com> | 2022-12-02 07:09:44 +0300 |
commit | 70f8d1555481539085483e2c7af663e6fb63d39b (patch) | |
tree | 9ada06fd4d4cbc9bfd2dc142f432b4aeafd198ab | |
parent | 4f55944e6bdb1eb2b2b9f9d1aadc654372dedd99 (diff) | |
parent | e5f11a2ed2372718b5653dc45614b5b4cc401780 (diff) |
Merge branch 'pks-git-stats-replace-object-count' into 'master'
git/stats: Get rid of spawning git-count-objects(1)
See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5095
Merged-by: Quang-Minh Nguyen <qmnguyen@gitlab.com>
Approved-by: karthik nayak <knayak@gitlab.com>
Approved-by: Quang-Minh Nguyen <qmnguyen@gitlab.com>
Co-authored-by: Patrick Steinhardt <psteinhardt@gitlab.com>
-rw-r--r-- | internal/git/housekeeping/optimization_strategy.go | 6 | ||||
-rw-r--r-- | internal/git/housekeeping/optimization_strategy_test.go | 3 | ||||
-rw-r--r-- | internal/git/housekeeping/optimize_repository_test.go | 8 | ||||
-rw-r--r-- | internal/git/stats/objects_info.go | 145 | ||||
-rw-r--r-- | internal/git/stats/objects_info_test.go | 84 |
5 files changed, 117 insertions, 129 deletions
diff --git a/internal/git/housekeeping/optimization_strategy.go b/internal/git/housekeeping/optimization_strategy.go index c877e00f5..254f97a57 100644 --- a/internal/git/housekeeping/optimization_strategy.go +++ b/internal/git/housekeeping/optimization_strategy.go @@ -31,10 +31,6 @@ type OptimizationStrategy interface { ShouldWriteCommitGraph() (bool, WriteCommitGraphConfig) } -// CutOffTime is time delta that is used to indicate cutoff wherein an object would be considered -// old. Currently this is set to being 2 weeks (2 * 7days * 24hours). -const CutOffTime = -14 * 24 * time.Hour - // HeuristicalOptimizationStrategy is an optimization strategy that is based on a set of // heuristics. type HeuristicalOptimizationStrategy struct { @@ -97,7 +93,7 @@ func NewHeuristicalOptimizationStrategy(ctx context.Context, repo *localrepo.Rep strategy.packfileCount = packfilesInfo.Count strategy.packfileSize = packfilesInfo.Size - looseObjectsInfo, err := stats.LooseObjectsInfoForRepository(repo, time.Now().Add(CutOffTime)) + looseObjectsInfo, err := stats.LooseObjectsInfoForRepository(repo, time.Now().Add(stats.StaleObjectsGracePeriod)) if err != nil { return strategy, fmt.Errorf("estimating loose object count: %w", err) } diff --git a/internal/git/housekeeping/optimization_strategy_test.go b/internal/git/housekeeping/optimization_strategy_test.go index 9db8a7439..e768307c3 100644 --- a/internal/git/housekeeping/optimization_strategy_test.go +++ b/internal/git/housekeeping/optimization_strategy_test.go @@ -12,6 +12,7 @@ import ( "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/stats" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" @@ -74,7 +75,7 @@ func TestNewHeuristicalOptimizationStrategy_variousParameters(t *testing.T) { // ... and one stale object. staleObjectPath := filepath.Join(shard, "5678") require.NoError(t, os.WriteFile(staleObjectPath, nil, 0o644)) - twoWeeksAgo := time.Now().Add(CutOffTime) + twoWeeksAgo := time.Now().Add(stats.StaleObjectsGracePeriod) require.NoError(t, os.Chtimes(staleObjectPath, twoWeeksAgo, twoWeeksAgo)) return repoProto diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go index 0e442fccf..b3668963e 100644 --- a/internal/git/housekeeping/optimize_repository_test.go +++ b/internal/git/housekeeping/optimize_repository_test.go @@ -38,8 +38,8 @@ func TestRepackIfNeeded(t *testing.T) { info, err := stats.ObjectsInfoForRepository(ctx, repo) require.NoError(t, err) - require.Equal(t, expectedPackfiles, info.Packfiles) - require.Equal(t, expectedLooseObjects, info.LooseObjects) + require.Equal(t, expectedPackfiles, info.Packfiles.Count) + require.Equal(t, expectedLooseObjects, info.LooseObjects.Count) } t.Run("no repacking", func(t *testing.T) { @@ -277,7 +277,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 // We set the object's mtime to be almost two weeks ago. Given that // our timeout is at exactly two weeks this shouldn't caused them to // get pruned. - almostTwoWeeksAgo := time.Now().Add(CutOffTime).Add(time.Minute) + almostTwoWeeksAgo := time.Now().Add(stats.StaleObjectsGracePeriod).Add(time.Minute) for i := 0; i < looseObjectLimit+1; i++ { blobPath := filepath.Join(repoPath, "objects", "17", fmt.Sprintf("%d", i)) @@ -309,7 +309,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1 // broken, and thus we'll retry to prune them afterwards. require.NoError(t, os.MkdirAll(filepath.Join(repoPath, "objects", "17"), 0o755)) - moreThanTwoWeeksAgo := time.Now().Add(CutOffTime).Add(-time.Minute) + moreThanTwoWeeksAgo := time.Now().Add(stats.StaleObjectsGracePeriod).Add(-time.Minute) for i := 0; i < looseObjectLimit+1; i++ { blobPath := filepath.Join(repoPath, "objects", "17", fmt.Sprintf("%d", i)) diff --git a/internal/git/stats/objects_info.go b/internal/git/stats/objects_info.go index 471746e76..f0822139a 100644 --- a/internal/git/stats/objects_info.go +++ b/internal/git/stats/objects_info.go @@ -1,22 +1,24 @@ package stats import ( - "bufio" "context" "errors" "fmt" "io/fs" "os" "path/filepath" - "strconv" "strings" "time" "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" - "gitlab.com/gitlab-org/gitaly/v15/internal/git" "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" + "gitlab.com/gitlab-org/gitaly/v15/internal/helper/text" ) +// StaleObjectsGracePeriod is time delta that is used to indicate cutoff wherein an object would be +// considered old. Currently this is set to being 2 weeks (2 * 7days * 24hours). +const StaleObjectsGracePeriod = -14 * 24 * time.Hour + // HasBitmap returns whether or not the repository contains an object bitmap. func HasBitmap(repoPath string) (bool, error) { bitmaps, err := filepath.Glob(filepath.Join(repoPath, "objects", "pack", "*.bitmap")) @@ -61,7 +63,7 @@ func LooseObjects(ctx context.Context, repo *localrepo.Repo) (uint64, error) { return 0, err } - return objectsInfo.LooseObjects, nil + return objectsInfo.LooseObjects.Count, nil } // LogObjectsInfo read statistics of the git repo objects @@ -79,32 +81,12 @@ func LogObjectsInfo(ctx context.Context, repo *localrepo.Repo) { // ObjectsInfo contains information on the object database. type ObjectsInfo struct { - // LooseObjects is the count of loose objects. - LooseObjects uint64 `json:"loose_objects"` - // LooseObjectsSize is the accumulated on-disk size of all loose objects in KiB. - LooseObjectsSize uint64 `json:"loose_objects_size"` - // PrunableObjects is the number of objects that exist both as loose and as packed objects. - // The loose objects may be pruned in that case. - PruneableObjects uint64 `json:"prunable_objects"` - - // PackedObjects is the count of packed objects. - PackedObjects uint64 `json:"packed_objects"` - // Packfiles is the number of packfiles. - Packfiles uint64 `json:"packfiles"` - // PackfilesSize is the accumulated on-disk size of all packfiles in KiB. - PackfilesSize uint64 `json:"packfiles_size"` - // PackfileBitmapExists indicates whether the repository has a bitmap for one of its - // packfiles. - PackfileBitmapExists bool `json:"packfile_bitmap_exists"` - + // LooseObjects contains information about loose objects. + LooseObjects LooseObjectsInfo `json:"loose_objects"` + // Packfiles contains information about packfiles. + Packfiles PackfilesInfo `json:"packfiles"` // CommitGraph contains information about the repository's commit-graphs. CommitGraph CommitGraphInfo `json:"commit_graph"` - - // Garbage is the count of files in the object database that are neither a valid loose - // object nor a valid packfile. - Garbage uint64 `json:"garbage"` - // GarbageSize is the accumulated on-disk size of garbage files. - GarbageSize uint64 `json:"garbage_size"` // Alternates is the list of absolute paths of alternate object databases this repository is // connected to. Alternates []string `json:"alternates"` @@ -112,74 +94,22 @@ type ObjectsInfo struct { // ObjectsInfoForRepository computes the ObjectsInfo for a repository. func ObjectsInfoForRepository(ctx context.Context, repo *localrepo.Repo) (ObjectsInfo, error) { + var info ObjectsInfo + var err error + repoPath, err := repo.Path() if err != nil { return ObjectsInfo{}, err } - countObjects, err := repo.Exec(ctx, git.SubCmd{ - Name: "count-objects", - Flags: []git.Option{git.Flag{Name: "--verbose"}}, - }) + info.LooseObjects, err = LooseObjectsInfoForRepository(repo, time.Now().Add(StaleObjectsGracePeriod)) if err != nil { - return ObjectsInfo{}, fmt.Errorf("running git-count-objects: %w", err) - } - - var info ObjectsInfo - - // The expected format is: - // - // count: 12 - // packs: 2 - // size-garbage: 934 - // alternate: /some/path/to/.git/objects - // alternate: "/some/other path/to/.git/objects" - scanner := bufio.NewScanner(countObjects) - for scanner.Scan() { - line := scanner.Text() - - parts := strings.SplitN(line, ": ", 2) - if len(parts) != 2 { - continue - } - - var err error - switch parts[0] { - case "count": - info.LooseObjects, err = strconv.ParseUint(parts[1], 10, 64) - case "size": - info.LooseObjectsSize, err = strconv.ParseUint(parts[1], 10, 64) - case "in-pack": - info.PackedObjects, err = strconv.ParseUint(parts[1], 10, 64) - case "packs": - info.Packfiles, err = strconv.ParseUint(parts[1], 10, 64) - case "size-pack": - info.PackfilesSize, err = strconv.ParseUint(parts[1], 10, 64) - case "prune-packable": - info.PruneableObjects, err = strconv.ParseUint(parts[1], 10, 64) - case "garbage": - info.Garbage, err = strconv.ParseUint(parts[1], 10, 64) - case "size-garbage": - info.GarbageSize, err = strconv.ParseUint(parts[1], 10, 64) - case "alternate": - info.Alternates = append(info.Alternates, strings.Trim(parts[1], "\" \t\n")) - } - - if err != nil { - return ObjectsInfo{}, fmt.Errorf("parsing %q: %w", parts[0], err) - } - } - - if err := scanner.Err(); err != nil { - return ObjectsInfo{}, fmt.Errorf("scanning object info: %w", err) - } - - if err := countObjects.Wait(); err != nil { - return ObjectsInfo{}, fmt.Errorf("counting objects: %w", err) + return ObjectsInfo{}, fmt.Errorf("counting loose objects: %w", err) } - if info.PackfileBitmapExists, err = HasBitmap(repoPath); err != nil { - return ObjectsInfo{}, fmt.Errorf("checking for bitmap: %w", err) + info.Packfiles, err = PackfilesInfoForRepository(repo) + if err != nil { + return ObjectsInfo{}, fmt.Errorf("counting packfiles: %w", err) } info.CommitGraph, err = CommitGraphInfoForRepository(repoPath) @@ -187,6 +117,11 @@ func ObjectsInfoForRepository(ctx context.Context, repo *localrepo.Repo) (Object return ObjectsInfo{}, fmt.Errorf("checking commit-graph info: %w", err) } + info.Alternates, err = readAlternates(repo) + if err != nil { + return ObjectsInfo{}, fmt.Errorf("reading alterantes: %w", err) + } + return info, nil } @@ -278,6 +213,8 @@ type PackfilesInfo struct { GarbageCount uint64 `json:"garbage_count"` // GarbageSize is the total size of all garbage files in bytes. GarbageSize uint64 `json:"garbage_size"` + // HasBitmap indicates whether the packfiles have a bitmap. + HasBitmap bool `json:"has_bitmap"` } // PackfilesInfoForRepository derives various information about packfiles for the given repository. @@ -330,5 +267,37 @@ func PackfilesInfoForRepository(repo *localrepo.Repo) (PackfilesInfo, error) { } } + if info.HasBitmap, err = HasBitmap(repoPath); err != nil { + return PackfilesInfo{}, fmt.Errorf("checking for bitmap: %w", err) + } + return info, nil } + +func readAlternates(repo *localrepo.Repo) ([]string, error) { + repoPath, err := repo.Path() + if err != nil { + return nil, fmt.Errorf("getting repository path: %w", err) + } + + contents, err := os.ReadFile(filepath.Join(repoPath, "objects", "info", "alternates")) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + + return nil, fmt.Errorf("reading alternates: %w", err) + } + + relativeAlternatePaths := strings.Split(text.ChompBytes(contents), "\n") + alternatePaths := make([]string, 0, len(relativeAlternatePaths)) + for _, relativeAlternatePath := range relativeAlternatePaths { + if filepath.IsAbs(relativeAlternatePath) { + alternatePaths = append(alternatePaths, relativeAlternatePath) + } else { + alternatePaths = append(alternatePaths, filepath.Join(repoPath, "objects", relativeAlternatePath)) + } + } + + return alternatePaths, nil +} diff --git a/internal/git/stats/objects_info_test.go b/internal/git/stats/objects_info_test.go index 20c67addf..d62195b5e 100644 --- a/internal/git/stats/objects_info_test.go +++ b/internal/git/stats/objects_info_test.go @@ -73,7 +73,7 @@ func TestRepositoryProfile(t *testing.T) { looseObjects, err = LooseObjects(ctx, repo) require.NoError(t, err) - require.Equal(t, uint64(2), looseObjects) + require.EqualValues(t, 2, looseObjects) } func TestLogObjectInfo(t *testing.T) { @@ -150,8 +150,10 @@ func TestLogObjectInfo(t *testing.T) { objectsInfo := requireObjectsInfo(hook.AllEntries()) require.Equal(t, ObjectsInfo{ - LooseObjects: 2, - LooseObjectsSize: 8, + LooseObjects: LooseObjectsInfo{ + Count: 2, + Size: hashDependentSize(142, 158), + }, }, objectsInfo) }) } @@ -184,8 +186,10 @@ func TestObjectsInfoForRepository(t *testing.T) { gittest.WriteBlob(t, cfg, repoPath, []byte("x")) }, expectedObjectsInfo: ObjectsInfo{ - LooseObjects: 1, - LooseObjectsSize: 4, + LooseObjects: LooseObjectsInfo{ + Count: 1, + Size: 16, + }, }, }, { @@ -197,10 +201,11 @@ func TestObjectsInfoForRepository(t *testing.T) { gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad") }, expectedObjectsInfo: ObjectsInfo{ - PackedObjects: 1, - Packfiles: 1, - PackfilesSize: 1, - PackfileBitmapExists: true, + Packfiles: PackfilesInfo{ + Count: 1, + Size: hashDependentSize(42, 54), + HasBitmap: true, + }, }, }, { @@ -213,13 +218,15 @@ func TestObjectsInfoForRepository(t *testing.T) { gittest.Exec(t, cfg, "-C", repoPath, "repack", "-a") }, expectedObjectsInfo: ObjectsInfo{ - LooseObjects: 1, - LooseObjectsSize: 4, - PackedObjects: 1, - Packfiles: 1, - PackfilesSize: 1, - PackfileBitmapExists: true, - PruneableObjects: 1, + LooseObjects: LooseObjectsInfo{ + Count: 1, + Size: 16, + }, + Packfiles: PackfilesInfo{ + Count: 1, + Size: hashDependentSize(42, 54), + HasBitmap: true, + }, }, }, { @@ -229,10 +236,10 @@ func TestObjectsInfoForRepository(t *testing.T) { require.NoError(t, os.WriteFile(garbagePath, []byte("x"), 0o600)) }, expectedObjectsInfo: ObjectsInfo{ - Garbage: 1, - // git-count-objects(1) somehow does not count this file's size, - // which I've verified manually. - GarbageSize: 0, + Packfiles: PackfilesInfo{ + GarbageCount: 1, + GarbageSize: 1, + }, }, }, { @@ -254,8 +261,10 @@ func TestObjectsInfoForRepository(t *testing.T) { gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--reachable") }, expectedObjectsInfo: ObjectsInfo{ - LooseObjects: 2, - LooseObjectsSize: 8, + LooseObjects: LooseObjectsInfo{ + Count: 2, + Size: hashDependentSize(142, 158), + }, CommitGraph: CommitGraphInfo{ Exists: true, }, @@ -268,8 +277,10 @@ func TestObjectsInfoForRepository(t *testing.T) { gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--reachable", "--changed-paths") }, expectedObjectsInfo: ObjectsInfo{ - LooseObjects: 2, - LooseObjectsSize: 8, + LooseObjects: LooseObjectsInfo{ + Count: 2, + Size: hashDependentSize(142, 158), + }, CommitGraph: CommitGraphInfo{ Exists: true, HasBloomFilters: true, @@ -299,13 +310,17 @@ func TestObjectsInfoForRepository(t *testing.T) { } }, expectedObjectsInfo: ObjectsInfo{ - LooseObjects: 2, - LooseObjectsSize: 8, - PackedObjects: 1, - Packfiles: 1, - PackfilesSize: 1, - PackfileBitmapExists: true, - Garbage: 3, + LooseObjects: LooseObjectsInfo{ + Count: 2, + Size: 32, + }, + Packfiles: PackfilesInfo{ + Count: 1, + Size: hashDependentSize(42, 54), + GarbageCount: 3, + GarbageSize: 3, + HasBitmap: true, + }, Alternates: []string{ alternatePath, }, @@ -620,3 +635,10 @@ func TestPackfileInfoForRepository(t *testing.T) { }) }) } + +func hashDependentSize(sha1, sha256 uint64) uint64 { + if gittest.DefaultObjectHash.Format == "sha1" { + return sha1 + } + return sha256 +} |