Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-11-25 14:03:48 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-11-30 17:46:13 +0300
commit5fbec7e59bf37c6696ffb19819aa15f4e4087534 (patch)
treea62a0b8c7e1f693cddab68554e82236d05efe49b
parentfeee635a4481c884f1aae8093487360772d7c61c (diff)
housekeeping: Move `CountLooseObjects` into `git/stats`
Move `CountLooseObjects` into the `git/stats` package. This will be used so that we can derive information about loose objects without having to spawn git-count-objects(1).
-rw-r--r--internal/git/housekeeping/optimization_strategy.go59
-rw-r--r--internal/git/housekeeping/optimization_strategy_test.go203
-rw-r--r--internal/git/stats/objects_info.go55
-rw-r--r--internal/git/stats/objects_info_test.go207
4 files changed, 264 insertions, 260 deletions
diff --git a/internal/git/housekeeping/optimization_strategy.go b/internal/git/housekeeping/optimization_strategy.go
index 8d26aff13..8b331b0f3 100644
--- a/internal/git/housekeeping/optimization_strategy.go
+++ b/internal/git/housekeeping/optimization_strategy.go
@@ -8,7 +8,6 @@ import (
"math"
"os"
"path/filepath"
- "strings"
"time"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
@@ -98,12 +97,12 @@ func NewHeuristicalOptimizationStrategy(ctx context.Context, repo *localrepo.Rep
strategy.packfileCount = packfilesInfo.Count
strategy.packfileSize = packfilesInfo.Size
- strategy.looseObjectCount, err = countLooseObjects(repo, time.Now())
+ strategy.looseObjectCount, err = stats.CountLooseObjects(repo, time.Now())
if err != nil {
return strategy, fmt.Errorf("estimating loose object count: %w", err)
}
- strategy.oldLooseObjectCount, err = countLooseObjects(repo, time.Now().Add(CutOffTime))
+ strategy.oldLooseObjectCount, err = stats.CountLooseObjects(repo, time.Now().Add(CutOffTime))
if err != nil {
return strategy, fmt.Errorf("estimating old loose object count: %w", err)
}
@@ -211,60 +210,6 @@ func (s HeuristicalOptimizationStrategy) ShouldRepackObjects() (bool, RepackObje
return false, RepackObjectsConfig{}
}
-// countLooseObjects counts the number of loose objects in the repository. If a cutoff date is
-// given, then this function will only take into account objects which are older than the given
-// point in time.
-func countLooseObjects(repo *localrepo.Repo, cutoffDate time.Time) (uint64, error) {
- repoPath, err := repo.Path()
- if err != nil {
- return 0, fmt.Errorf("getting repository path: %w", err)
- }
-
- var looseObjects uint64
- for i := 0; i <= 0xFF; i++ {
- entries, err := os.ReadDir(filepath.Join(repoPath, "objects", fmt.Sprintf("%02x", i)))
- if err != nil {
- if errors.Is(err, os.ErrNotExist) {
- continue
- }
-
- return 0, fmt.Errorf("reading loose object shard: %w", err)
- }
-
- for _, entry := range entries {
- if !isValidLooseObjectName(entry.Name()) {
- continue
- }
-
- entryInfo, err := entry.Info()
- if err != nil {
- if errors.Is(err, fs.ErrNotExist) {
- continue
- }
-
- return 0, fmt.Errorf("reading object info: %w", err)
- }
-
- if entryInfo.ModTime().After(cutoffDate) {
- continue
- }
-
- looseObjects++
- }
- }
-
- return looseObjects, nil
-}
-
-func isValidLooseObjectName(s string) bool {
- for _, c := range []byte(s) {
- if strings.IndexByte("0123456789abcdef", c) < 0 {
- return false
- }
- }
- return true
-}
-
// ShouldWriteCommitGraph determines whether we need to write the commit-graph and how it should be
// written.
func (s HeuristicalOptimizationStrategy) ShouldWriteCommitGraph() (bool, WriteCommitGraphConfig) {
diff --git a/internal/git/housekeeping/optimization_strategy_test.go b/internal/git/housekeeping/optimization_strategy_test.go
index e03aa65ce..9db8a7439 100644
--- a/internal/git/housekeeping/optimization_strategy_test.go
+++ b/internal/git/housekeeping/optimization_strategy_test.go
@@ -4,7 +4,6 @@ package housekeeping
import (
"fmt"
- "math"
"os"
"path/filepath"
"testing"
@@ -679,208 +678,6 @@ func TestHeuristicalOptimizationStrategy_NeedsWriteCommitGraph(t *testing.T) {
}
}
-func TestCountLooseObjects(t *testing.T) {
- t.Parallel()
-
- ctx := testhelper.Context(t)
- cfg := testcfg.Build(t)
-
- createRepo := func(t *testing.T) (*localrepo.Repo, string) {
- repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{
- SkipCreationViaService: true,
- })
- return localrepo.NewTestRepo(t, cfg, repoProto), repoPath
- }
-
- t.Run("empty repository", func(t *testing.T) {
- repo, _ := createRepo(t)
-
- looseObjects, err := countLooseObjects(repo, time.Now())
- require.NoError(t, err)
- require.Zero(t, looseObjects)
- })
-
- t.Run("object in random shard", func(t *testing.T) {
- repo, repoPath := createRepo(t)
-
- differentShard := filepath.Join(repoPath, "objects", "a0")
- require.NoError(t, os.MkdirAll(differentShard, 0o755))
-
- object, err := os.Create(filepath.Join(differentShard, "123456"))
- require.NoError(t, err)
- testhelper.MustClose(t, object)
-
- looseObjects, err := countLooseObjects(repo, time.Now())
- require.NoError(t, err)
- require.EqualValues(t, 1, looseObjects)
- })
-
- t.Run("objects in multiple shards", func(t *testing.T) {
- repo, repoPath := createRepo(t)
-
- for _, shard := range []string{"00", "17", "32", "ff"} {
- shardPath := filepath.Join(repoPath, "objects", shard)
- require.NoError(t, os.MkdirAll(shardPath, 0o755))
-
- object, err := os.Create(filepath.Join(shardPath, "123456"))
- require.NoError(t, err)
- testhelper.MustClose(t, object)
- }
-
- looseObjects, err := countLooseObjects(repo, time.Now())
- require.NoError(t, err)
- require.EqualValues(t, 4, looseObjects)
- })
-
- t.Run("object in shard with grace period", func(t *testing.T) {
- repo, repoPath := createRepo(t)
-
- shard := filepath.Join(repoPath, "objects", "17")
- require.NoError(t, os.MkdirAll(shard, 0o755))
-
- objectPaths := []string{
- filepath.Join(shard, "123456"),
- filepath.Join(shard, "654321"),
- }
-
- cutoffDate := time.Now()
- afterCutoffDate := cutoffDate.Add(1 * time.Minute)
- beforeCutoffDate := cutoffDate.Add(-1 * time.Minute)
-
- for _, objectPath := range objectPaths {
- require.NoError(t, os.WriteFile(objectPath, nil, 0o644))
- require.NoError(t, os.Chtimes(objectPath, afterCutoffDate, afterCutoffDate))
- }
-
- // Objects are recent, so with the cutoff-date they shouldn't be counted.
- looseObjects, err := countLooseObjects(repo, cutoffDate)
- require.NoError(t, err)
- require.EqualValues(t, 0, looseObjects)
-
- for i, objectPath := range objectPaths {
- // Modify the object's mtime should cause it to be counted.
- require.NoError(t, os.Chtimes(objectPath, beforeCutoffDate, beforeCutoffDate))
-
- looseObjects, err = countLooseObjects(repo, cutoffDate)
- require.NoError(t, err)
- require.EqualValues(t, i+1, looseObjects)
- }
- })
-
- t.Run("shard with garbage", func(t *testing.T) {
- repo, repoPath := createRepo(t)
-
- shard := filepath.Join(repoPath, "objects", "17")
- require.NoError(t, os.MkdirAll(shard, 0o755))
-
- for _, objectName := range []string{"garbage", "012345"} {
- require.NoError(t, os.WriteFile(filepath.Join(shard, objectName), nil, 0o644))
- }
-
- looseObjects, err := countLooseObjects(repo, time.Now())
- require.NoError(t, err)
- require.EqualValues(t, 1, looseObjects)
- })
-}
-
-func BenchmarkCountLooseObjects(b *testing.B) {
- ctx := testhelper.Context(b)
- cfg := testcfg.Build(b)
-
- createRepo := func(b *testing.B) (*localrepo.Repo, string) {
- repoProto, repoPath := gittest.CreateRepository(b, ctx, cfg, gittest.CreateRepositoryConfig{
- SkipCreationViaService: true,
- })
- return localrepo.NewTestRepo(b, cfg, repoProto), repoPath
- }
-
- b.Run("empty repository", func(b *testing.B) {
- repo, _ := createRepo(b)
-
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- _, err := countLooseObjects(repo, time.Now())
- require.NoError(b, err)
- }
- })
-
- b.Run("repository with single object", func(b *testing.B) {
- repo, repoPath := createRepo(b)
-
- objectPath := filepath.Join(repoPath, "objects", "17", "12345")
- require.NoError(b, os.Mkdir(filepath.Dir(objectPath), 0o755))
- require.NoError(b, os.WriteFile(objectPath, nil, 0o644))
-
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- _, err := countLooseObjects(repo, time.Now())
- require.NoError(b, err)
- }
- })
-
- b.Run("repository with single object in each shard", func(b *testing.B) {
- repo, repoPath := createRepo(b)
-
- for i := 0; i < 256; i++ {
- objectPath := filepath.Join(repoPath, "objects", fmt.Sprintf("%02x", i), "12345")
- require.NoError(b, os.Mkdir(filepath.Dir(objectPath), 0o755))
- require.NoError(b, os.WriteFile(objectPath, nil, 0o644))
- }
-
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- _, err := countLooseObjects(repo, time.Now())
- require.NoError(b, err)
- }
- })
-
- b.Run("repository hitting loose object limit", func(b *testing.B) {
- repo, repoPath := createRepo(b)
-
- // Usually we shouldn't have a lot more than `looseObjectCount` objects in the
- // repository because we'd repack as soon as we hit that limit. So this benchmark
- // case tries to estimate the usual upper limit for loose objects we'd typically
- // have.
- looseObjectCount := int(math.Ceil(looseObjectLimit / 256))
-
- for i := 0; i < 256; i++ {
- shardPath := filepath.Join(repoPath, "objects", fmt.Sprintf("%02x", i))
- require.NoError(b, os.Mkdir(shardPath, 0o755))
-
- for j := 0; j < looseObjectCount; j++ {
- objectPath := filepath.Join(shardPath, fmt.Sprintf("%d", j))
- require.NoError(b, os.WriteFile(objectPath, nil, 0o644))
- }
- }
-
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- _, err := countLooseObjects(repo, time.Now())
- require.NoError(b, err)
- }
- })
-
- b.Run("repository with lots of objects", func(b *testing.B) {
- repo, repoPath := createRepo(b)
-
- for i := 0; i < 256; i++ {
- shardPath := filepath.Join(repoPath, "objects", fmt.Sprintf("%02x", i))
- require.NoError(b, os.Mkdir(shardPath, 0o755))
-
- for j := 0; j < 1000; j++ {
- objectPath := filepath.Join(shardPath, fmt.Sprintf("%d", j))
- require.NoError(b, os.WriteFile(objectPath, nil, 0o644))
- }
- }
-
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- _, err := countLooseObjects(repo, time.Now())
- require.NoError(b, err)
- }
- })
-}
-
func TestNewEagerOptimizationStrategy(t *testing.T) {
t.Parallel()
diff --git a/internal/git/stats/objects_info.go b/internal/git/stats/objects_info.go
index eedd8316d..db9a54d7f 100644
--- a/internal/git/stats/objects_info.go
+++ b/internal/git/stats/objects_info.go
@@ -10,6 +10,7 @@ import (
"path/filepath"
"strconv"
"strings"
+ "time"
"github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
"gitlab.com/gitlab-org/gitaly/v15/internal/git"
@@ -189,6 +190,60 @@ func ObjectsInfoForRepository(ctx context.Context, repo *localrepo.Repo) (Object
return info, nil
}
+// CountLooseObjects counts the number of loose objects in the repository. If a cutoff date is
+// given, then this function will only take into account objects which are older than the given
+// point in time.
+func CountLooseObjects(repo *localrepo.Repo, cutoffDate time.Time) (uint64, error) {
+ repoPath, err := repo.Path()
+ if err != nil {
+ return 0, fmt.Errorf("getting repository path: %w", err)
+ }
+
+ var looseObjects uint64
+ for i := 0; i <= 0xFF; i++ {
+ entries, err := os.ReadDir(filepath.Join(repoPath, "objects", fmt.Sprintf("%02x", i)))
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ continue
+ }
+
+ return 0, fmt.Errorf("reading loose object shard: %w", err)
+ }
+
+ for _, entry := range entries {
+ if !isValidLooseObjectName(entry.Name()) {
+ continue
+ }
+
+ entryInfo, err := entry.Info()
+ if err != nil {
+ if errors.Is(err, fs.ErrNotExist) {
+ continue
+ }
+
+ return 0, fmt.Errorf("reading object info: %w", err)
+ }
+
+ if entryInfo.ModTime().After(cutoffDate) {
+ continue
+ }
+
+ looseObjects++
+ }
+ }
+
+ return looseObjects, nil
+}
+
+func isValidLooseObjectName(s string) bool {
+ for _, c := range []byte(s) {
+ if strings.IndexByte("0123456789abcdef", c) < 0 {
+ return false
+ }
+ }
+ return true
+}
+
// PackfilesInfo contains information about packfiles.
type PackfilesInfo struct {
// Count is the number of loose objects, including stale ones.
diff --git a/internal/git/stats/objects_info_test.go b/internal/git/stats/objects_info_test.go
index 85404477b..1d8873fbe 100644
--- a/internal/git/stats/objects_info_test.go
+++ b/internal/git/stats/objects_info_test.go
@@ -1,6 +1,7 @@
package stats
import (
+ "fmt"
"os"
"path/filepath"
"testing"
@@ -326,6 +327,212 @@ func TestObjectsInfoForRepository(t *testing.T) {
}
}
+func TestCountLooseObjects(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+ cfg := testcfg.Build(t)
+
+ createRepo := func(t *testing.T) (*localrepo.Repo, string) {
+ repoProto, repoPath := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ })
+ return localrepo.NewTestRepo(t, cfg, repoProto), repoPath
+ }
+
+ t.Run("empty repository", func(t *testing.T) {
+ repo, _ := createRepo(t)
+
+ looseObjects, err := CountLooseObjects(repo, time.Now())
+ require.NoError(t, err)
+ require.Zero(t, looseObjects)
+ })
+
+ t.Run("object in random shard", func(t *testing.T) {
+ repo, repoPath := createRepo(t)
+
+ differentShard := filepath.Join(repoPath, "objects", "a0")
+ require.NoError(t, os.MkdirAll(differentShard, 0o755))
+
+ object, err := os.Create(filepath.Join(differentShard, "123456"))
+ require.NoError(t, err)
+ testhelper.MustClose(t, object)
+
+ looseObjects, err := CountLooseObjects(repo, time.Now())
+ require.NoError(t, err)
+ require.EqualValues(t, 1, looseObjects)
+ })
+
+ t.Run("objects in multiple shards", func(t *testing.T) {
+ repo, repoPath := createRepo(t)
+
+ for _, shard := range []string{"00", "17", "32", "ff"} {
+ shardPath := filepath.Join(repoPath, "objects", shard)
+ require.NoError(t, os.MkdirAll(shardPath, 0o755))
+
+ object, err := os.Create(filepath.Join(shardPath, "123456"))
+ require.NoError(t, err)
+ testhelper.MustClose(t, object)
+ }
+
+ looseObjects, err := CountLooseObjects(repo, time.Now())
+ require.NoError(t, err)
+ require.EqualValues(t, 4, looseObjects)
+ })
+
+ t.Run("object in shard with grace period", func(t *testing.T) {
+ repo, repoPath := createRepo(t)
+
+ shard := filepath.Join(repoPath, "objects", "17")
+ require.NoError(t, os.MkdirAll(shard, 0o755))
+
+ objectPaths := []string{
+ filepath.Join(shard, "123456"),
+ filepath.Join(shard, "654321"),
+ }
+
+ cutoffDate := time.Now()
+ afterCutoffDate := cutoffDate.Add(1 * time.Minute)
+ beforeCutoffDate := cutoffDate.Add(-1 * time.Minute)
+
+ for _, objectPath := range objectPaths {
+ require.NoError(t, os.WriteFile(objectPath, nil, 0o644))
+ require.NoError(t, os.Chtimes(objectPath, afterCutoffDate, afterCutoffDate))
+ }
+
+ // Objects are recent, so with the cutoff-date they shouldn't be counted.
+ looseObjects, err := CountLooseObjects(repo, cutoffDate)
+ require.NoError(t, err)
+ require.EqualValues(t, 0, looseObjects)
+
+ for i, objectPath := range objectPaths {
+ // Modify the object's mtime should cause it to be counted.
+ require.NoError(t, os.Chtimes(objectPath, beforeCutoffDate, beforeCutoffDate))
+
+ looseObjects, err = CountLooseObjects(repo, cutoffDate)
+ require.NoError(t, err)
+ require.EqualValues(t, i+1, looseObjects)
+ }
+ })
+
+ t.Run("shard with garbage", func(t *testing.T) {
+ repo, repoPath := createRepo(t)
+
+ shard := filepath.Join(repoPath, "objects", "17")
+ require.NoError(t, os.MkdirAll(shard, 0o755))
+
+ for _, objectName := range []string{"garbage", "012345"} {
+ require.NoError(t, os.WriteFile(filepath.Join(shard, objectName), nil, 0o644))
+ }
+
+ looseObjects, err := CountLooseObjects(repo, time.Now())
+ require.NoError(t, err)
+ require.EqualValues(t, 1, looseObjects)
+ })
+}
+
+func BenchmarkCountLooseObjects(b *testing.B) {
+ ctx := testhelper.Context(b)
+ cfg := testcfg.Build(b)
+
+ createRepo := func(b *testing.B) (*localrepo.Repo, string) {
+ repoProto, repoPath := gittest.CreateRepository(b, ctx, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ })
+ return localrepo.NewTestRepo(b, cfg, repoProto), repoPath
+ }
+
+ b.Run("empty repository", func(b *testing.B) {
+ repo, _ := createRepo(b)
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _, err := CountLooseObjects(repo, time.Now())
+ require.NoError(b, err)
+ }
+ })
+
+ b.Run("repository with single object", func(b *testing.B) {
+ repo, repoPath := createRepo(b)
+
+ objectPath := filepath.Join(repoPath, "objects", "17", "12345")
+ require.NoError(b, os.Mkdir(filepath.Dir(objectPath), 0o755))
+ require.NoError(b, os.WriteFile(objectPath, nil, 0o644))
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _, err := CountLooseObjects(repo, time.Now())
+ require.NoError(b, err)
+ }
+ })
+
+ b.Run("repository with single object in each shard", func(b *testing.B) {
+ repo, repoPath := createRepo(b)
+
+ for i := 0; i < 256; i++ {
+ objectPath := filepath.Join(repoPath, "objects", fmt.Sprintf("%02x", i), "12345")
+ require.NoError(b, os.Mkdir(filepath.Dir(objectPath), 0o755))
+ require.NoError(b, os.WriteFile(objectPath, nil, 0o644))
+ }
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _, err := CountLooseObjects(repo, time.Now())
+ require.NoError(b, err)
+ }
+ })
+
+ b.Run("repository hitting loose object limit", func(b *testing.B) {
+ repo, repoPath := createRepo(b)
+
+ // Usually we shouldn't have a lot more than `looseObjectCount` objects in the
+ // repository because we'd repack as soon as we hit that limit. So this benchmark
+ // case tries to estimate the usual upper limit for loose objects we'd typically
+ // have.
+ //
+ // Note that we should ideally just use `housekeeping.looseObjectsLimit` here to
+ // derive that value. But due to a cyclic dependency that's not possible, so we
+ // just use a hard-coded value instead.
+ looseObjectCount := 5
+
+ for i := 0; i < 256; i++ {
+ shardPath := filepath.Join(repoPath, "objects", fmt.Sprintf("%02x", i))
+ require.NoError(b, os.Mkdir(shardPath, 0o755))
+
+ for j := 0; j < looseObjectCount; j++ {
+ objectPath := filepath.Join(shardPath, fmt.Sprintf("%d", j))
+ require.NoError(b, os.WriteFile(objectPath, nil, 0o644))
+ }
+ }
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _, err := CountLooseObjects(repo, time.Now())
+ require.NoError(b, err)
+ }
+ })
+
+ b.Run("repository with lots of objects", func(b *testing.B) {
+ repo, repoPath := createRepo(b)
+
+ for i := 0; i < 256; i++ {
+ shardPath := filepath.Join(repoPath, "objects", fmt.Sprintf("%02x", i))
+ require.NoError(b, os.Mkdir(shardPath, 0o755))
+
+ for j := 0; j < 1000; j++ {
+ objectPath := filepath.Join(shardPath, fmt.Sprintf("%d", j))
+ require.NoError(b, os.WriteFile(objectPath, nil, 0o644))
+ }
+ }
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _, err := CountLooseObjects(repo, time.Now())
+ require.NoError(b, err)
+ }
+ })
+}
+
func TestPackfileInfoForRepository(t *testing.T) {
t.Parallel()