Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-11-25 14:32:00 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-11-30 17:46:14 +0300
commit19bb255eebdd4a3d433137456ab4951115e9fdbc (patch)
tree801291b1245f048a17455b7210ce0f208774e369
parent5fbec7e59bf37c6696ffb19819aa15f4e4087534 (diff)
git/stats: Report total object size when counting loose objects
As we iterate over the loose object sizes we can easily account for the total size of loose objects as we already have that info available anyway. Tracking this number here will eventually allow us to get rid of the external git-count-objects(1) process, which currently does the book keeping for us. Convert the function to track the total loose object size. To avoid confusion at callsites and to make this function more extensible, we introduce a new structure `LooseObjectsInfo` that contains all relevant information. To match existing naming in the package, this commit also renames `CountLooseObjects()` to `LooseObjectsInfoForRepository()`.
-rw-r--r--internal/git/housekeeping/optimization_strategy.go6
-rw-r--r--internal/git/stats/objects_info.go29
-rw-r--r--internal/git/stats/objects_info_test.go66
3 files changed, 55 insertions, 46 deletions
diff --git a/internal/git/housekeeping/optimization_strategy.go b/internal/git/housekeeping/optimization_strategy.go
index 8b331b0f3..d2b819681 100644
--- a/internal/git/housekeeping/optimization_strategy.go
+++ b/internal/git/housekeeping/optimization_strategy.go
@@ -97,15 +97,17 @@ func NewHeuristicalOptimizationStrategy(ctx context.Context, repo *localrepo.Rep
strategy.packfileCount = packfilesInfo.Count
strategy.packfileSize = packfilesInfo.Size
- strategy.looseObjectCount, err = stats.CountLooseObjects(repo, time.Now())
+ looseObjectsInfo, err := stats.LooseObjectsInfoForRepository(repo, time.Now())
if err != nil {
return strategy, fmt.Errorf("estimating loose object count: %w", err)
}
+ strategy.looseObjectCount = looseObjectsInfo.Count
- strategy.oldLooseObjectCount, err = stats.CountLooseObjects(repo, time.Now().Add(CutOffTime))
+ oldLooseObjectsInfo, err := stats.LooseObjectsInfoForRepository(repo, time.Now().Add(CutOffTime))
if err != nil {
return strategy, fmt.Errorf("estimating old loose object count: %w", err)
}
+ strategy.oldLooseObjectCount = oldLooseObjectsInfo.Count
strategy.looseRefsCount, strategy.packedRefsSize, err = countLooseAndPackedRefs(ctx, repo)
if err != nil {
diff --git a/internal/git/stats/objects_info.go b/internal/git/stats/objects_info.go
index db9a54d7f..1c5553375 100644
--- a/internal/git/stats/objects_info.go
+++ b/internal/git/stats/objects_info.go
@@ -190,16 +190,24 @@ func ObjectsInfoForRepository(ctx context.Context, repo *localrepo.Repo) (Object
return info, nil
}
-// CountLooseObjects counts the number of loose objects in the repository. If a cutoff date is
-// given, then this function will only take into account objects which are older than the given
-// point in time.
-func CountLooseObjects(repo *localrepo.Repo, cutoffDate time.Time) (uint64, error) {
+// LooseObjectsInfo contains information about loose objects.
+type LooseObjectsInfo struct {
+ // Count is the number of loose objects.
+ Count uint64 `json:"count"`
+ // Size is the total size of all loose objects in bytes.
+ Size uint64 `json:"size"`
+}
+
+// LooseObjectsInfoForRepository derives information about loose objects in the repository. If a
+// cutoff date is given, then this function will only take into account objects which are older than
+// the given point in time.
+func LooseObjectsInfoForRepository(repo *localrepo.Repo, cutoffDate time.Time) (LooseObjectsInfo, error) {
repoPath, err := repo.Path()
if err != nil {
- return 0, fmt.Errorf("getting repository path: %w", err)
+ return LooseObjectsInfo{}, fmt.Errorf("getting repository path: %w", err)
}
- var looseObjects uint64
+ var info LooseObjectsInfo
for i := 0; i <= 0xFF; i++ {
entries, err := os.ReadDir(filepath.Join(repoPath, "objects", fmt.Sprintf("%02x", i)))
if err != nil {
@@ -207,7 +215,7 @@ func CountLooseObjects(repo *localrepo.Repo, cutoffDate time.Time) (uint64, erro
continue
}
- return 0, fmt.Errorf("reading loose object shard: %w", err)
+ return LooseObjectsInfo{}, fmt.Errorf("reading loose object shard: %w", err)
}
for _, entry := range entries {
@@ -221,18 +229,19 @@ func CountLooseObjects(repo *localrepo.Repo, cutoffDate time.Time) (uint64, erro
continue
}
- return 0, fmt.Errorf("reading object info: %w", err)
+ return LooseObjectsInfo{}, fmt.Errorf("reading object info: %w", err)
}
if entryInfo.ModTime().After(cutoffDate) {
continue
}
- looseObjects++
+ info.Count++
+ info.Size += uint64(entryInfo.Size())
}
}
- return looseObjects, nil
+ return info, nil
}
func isValidLooseObjectName(s string) bool {
diff --git a/internal/git/stats/objects_info_test.go b/internal/git/stats/objects_info_test.go
index 1d8873fbe..2308e14ef 100644
--- a/internal/git/stats/objects_info_test.go
+++ b/internal/git/stats/objects_info_test.go
@@ -340,12 +340,15 @@ func TestCountLooseObjects(t *testing.T) {
return localrepo.NewTestRepo(t, cfg, repoProto), repoPath
}
+ requireLooseObjectsInfo := func(t *testing.T, repo *localrepo.Repo, cutoff time.Time, expectedInfo LooseObjectsInfo) {
+ info, err := LooseObjectsInfoForRepository(repo, cutoff)
+ require.NoError(t, err)
+ require.Equal(t, expectedInfo, info)
+ }
+
t.Run("empty repository", func(t *testing.T) {
repo, _ := createRepo(t)
-
- looseObjects, err := CountLooseObjects(repo, time.Now())
- require.NoError(t, err)
- require.Zero(t, looseObjects)
+ requireLooseObjectsInfo(t, repo, time.Now(), LooseObjectsInfo{})
})
t.Run("object in random shard", func(t *testing.T) {
@@ -353,31 +356,27 @@ func TestCountLooseObjects(t *testing.T) {
differentShard := filepath.Join(repoPath, "objects", "a0")
require.NoError(t, os.MkdirAll(differentShard, 0o755))
+ require.NoError(t, os.WriteFile(filepath.Join(differentShard, "123456"), []byte("foobar"), 0o644))
- object, err := os.Create(filepath.Join(differentShard, "123456"))
- require.NoError(t, err)
- testhelper.MustClose(t, object)
-
- looseObjects, err := CountLooseObjects(repo, time.Now())
- require.NoError(t, err)
- require.EqualValues(t, 1, looseObjects)
+ requireLooseObjectsInfo(t, repo, time.Now(), LooseObjectsInfo{
+ Count: 1,
+ Size: 6,
+ })
})
t.Run("objects in multiple shards", func(t *testing.T) {
repo, repoPath := createRepo(t)
- for _, shard := range []string{"00", "17", "32", "ff"} {
+ for i, shard := range []string{"00", "17", "32", "ff"} {
shardPath := filepath.Join(repoPath, "objects", shard)
require.NoError(t, os.MkdirAll(shardPath, 0o755))
-
- object, err := os.Create(filepath.Join(shardPath, "123456"))
- require.NoError(t, err)
- testhelper.MustClose(t, object)
+ require.NoError(t, os.WriteFile(filepath.Join(shardPath, "123456"), make([]byte, i), 0o644))
}
- looseObjects, err := CountLooseObjects(repo, time.Now())
- require.NoError(t, err)
- require.EqualValues(t, 4, looseObjects)
+ requireLooseObjectsInfo(t, repo, time.Now(), LooseObjectsInfo{
+ Count: 4,
+ Size: 6,
+ })
})
t.Run("object in shard with grace period", func(t *testing.T) {
@@ -396,22 +395,21 @@ func TestCountLooseObjects(t *testing.T) {
beforeCutoffDate := cutoffDate.Add(-1 * time.Minute)
for _, objectPath := range objectPaths {
- require.NoError(t, os.WriteFile(objectPath, nil, 0o644))
+ require.NoError(t, os.WriteFile(objectPath, []byte("1"), 0o644))
require.NoError(t, os.Chtimes(objectPath, afterCutoffDate, afterCutoffDate))
}
// Objects are recent, so with the cutoff-date they shouldn't be counted.
- looseObjects, err := CountLooseObjects(repo, cutoffDate)
- require.NoError(t, err)
- require.EqualValues(t, 0, looseObjects)
+ requireLooseObjectsInfo(t, repo, time.Now(), LooseObjectsInfo{})
for i, objectPath := range objectPaths {
// Modify the object's mtime should cause it to be counted.
require.NoError(t, os.Chtimes(objectPath, beforeCutoffDate, beforeCutoffDate))
- looseObjects, err = CountLooseObjects(repo, cutoffDate)
- require.NoError(t, err)
- require.EqualValues(t, i+1, looseObjects)
+ requireLooseObjectsInfo(t, repo, time.Now(), LooseObjectsInfo{
+ Count: uint64(i) + 1,
+ Size: uint64(i) + 1,
+ })
}
})
@@ -425,9 +423,9 @@ func TestCountLooseObjects(t *testing.T) {
require.NoError(t, os.WriteFile(filepath.Join(shard, objectName), nil, 0o644))
}
- looseObjects, err := CountLooseObjects(repo, time.Now())
- require.NoError(t, err)
- require.EqualValues(t, 1, looseObjects)
+ requireLooseObjectsInfo(t, repo, time.Now(), LooseObjectsInfo{
+ Count: 1,
+ })
})
}
@@ -447,7 +445,7 @@ func BenchmarkCountLooseObjects(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
- _, err := CountLooseObjects(repo, time.Now())
+ _, err := LooseObjectsInfoForRepository(repo, time.Now())
require.NoError(b, err)
}
})
@@ -461,7 +459,7 @@ func BenchmarkCountLooseObjects(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
- _, err := CountLooseObjects(repo, time.Now())
+ _, err := LooseObjectsInfoForRepository(repo, time.Now())
require.NoError(b, err)
}
})
@@ -477,7 +475,7 @@ func BenchmarkCountLooseObjects(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
- _, err := CountLooseObjects(repo, time.Now())
+ _, err := LooseObjectsInfoForRepository(repo, time.Now())
require.NoError(b, err)
}
})
@@ -507,7 +505,7 @@ func BenchmarkCountLooseObjects(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
- _, err := CountLooseObjects(repo, time.Now())
+ _, err := LooseObjectsInfoForRepository(repo, time.Now())
require.NoError(b, err)
}
})
@@ -527,7 +525,7 @@ func BenchmarkCountLooseObjects(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
- _, err := CountLooseObjects(repo, time.Now())
+ _, err := LooseObjectsInfoForRepository(repo, time.Now())
require.NoError(b, err)
}
})