Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQuang-Minh Nguyen <qmnguyen@gitlab.com>2022-12-02 07:09:44 +0300
committerQuang-Minh Nguyen <qmnguyen@gitlab.com>2022-12-02 07:09:44 +0300
commit70f8d1555481539085483e2c7af663e6fb63d39b (patch)
tree9ada06fd4d4cbc9bfd2dc142f432b4aeafd198ab
parent4f55944e6bdb1eb2b2b9f9d1aadc654372dedd99 (diff)
parente5f11a2ed2372718b5653dc45614b5b4cc401780 (diff)
Merge branch 'pks-git-stats-replace-object-count' into 'master'
git/stats: Get rid of spawning git-count-objects(1) See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5095 Merged-by: Quang-Minh Nguyen <qmnguyen@gitlab.com> Approved-by: karthik nayak <knayak@gitlab.com> Approved-by: Quang-Minh Nguyen <qmnguyen@gitlab.com> Co-authored-by: Patrick Steinhardt <psteinhardt@gitlab.com>
-rw-r--r--internal/git/housekeeping/optimization_strategy.go6
-rw-r--r--internal/git/housekeeping/optimization_strategy_test.go3
-rw-r--r--internal/git/housekeeping/optimize_repository_test.go8
-rw-r--r--internal/git/stats/objects_info.go145
-rw-r--r--internal/git/stats/objects_info_test.go84
5 files changed, 117 insertions, 129 deletions
diff --git a/internal/git/housekeeping/optimization_strategy.go b/internal/git/housekeeping/optimization_strategy.go
index c877e00f5..254f97a57 100644
--- a/internal/git/housekeeping/optimization_strategy.go
+++ b/internal/git/housekeeping/optimization_strategy.go
@@ -31,10 +31,6 @@ type OptimizationStrategy interface {
ShouldWriteCommitGraph() (bool, WriteCommitGraphConfig)
}
-// CutOffTime is time delta that is used to indicate cutoff wherein an object would be considered
-// old. Currently this is set to being 2 weeks (2 * 7days * 24hours).
-const CutOffTime = -14 * 24 * time.Hour
-
// HeuristicalOptimizationStrategy is an optimization strategy that is based on a set of
// heuristics.
type HeuristicalOptimizationStrategy struct {
@@ -97,7 +93,7 @@ func NewHeuristicalOptimizationStrategy(ctx context.Context, repo *localrepo.Rep
strategy.packfileCount = packfilesInfo.Count
strategy.packfileSize = packfilesInfo.Size
- looseObjectsInfo, err := stats.LooseObjectsInfoForRepository(repo, time.Now().Add(CutOffTime))
+ looseObjectsInfo, err := stats.LooseObjectsInfoForRepository(repo, time.Now().Add(stats.StaleObjectsGracePeriod))
if err != nil {
return strategy, fmt.Errorf("estimating loose object count: %w", err)
}
diff --git a/internal/git/housekeeping/optimization_strategy_test.go b/internal/git/housekeeping/optimization_strategy_test.go
index 9db8a7439..e768307c3 100644
--- a/internal/git/housekeeping/optimization_strategy_test.go
+++ b/internal/git/housekeeping/optimization_strategy_test.go
@@ -12,6 +12,7 @@ import (
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/stats"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg"
"gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb"
@@ -74,7 +75,7 @@ func TestNewHeuristicalOptimizationStrategy_variousParameters(t *testing.T) {
// ... and one stale object.
staleObjectPath := filepath.Join(shard, "5678")
require.NoError(t, os.WriteFile(staleObjectPath, nil, 0o644))
- twoWeeksAgo := time.Now().Add(CutOffTime)
+ twoWeeksAgo := time.Now().Add(stats.StaleObjectsGracePeriod)
require.NoError(t, os.Chtimes(staleObjectPath, twoWeeksAgo, twoWeeksAgo))
return repoProto
diff --git a/internal/git/housekeeping/optimize_repository_test.go b/internal/git/housekeeping/optimize_repository_test.go
index 0e442fccf..b3668963e 100644
--- a/internal/git/housekeeping/optimize_repository_test.go
+++ b/internal/git/housekeeping/optimize_repository_test.go
@@ -38,8 +38,8 @@ func TestRepackIfNeeded(t *testing.T) {
info, err := stats.ObjectsInfoForRepository(ctx, repo)
require.NoError(t, err)
- require.Equal(t, expectedPackfiles, info.Packfiles)
- require.Equal(t, expectedLooseObjects, info.LooseObjects)
+ require.Equal(t, expectedPackfiles, info.Packfiles.Count)
+ require.Equal(t, expectedLooseObjects, info.LooseObjects.Count)
}
t.Run("no repacking", func(t *testing.T) {
@@ -277,7 +277,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1
// We set the object's mtime to be almost two weeks ago. Given that
// our timeout is at exactly two weeks this shouldn't caused them to
// get pruned.
- almostTwoWeeksAgo := time.Now().Add(CutOffTime).Add(time.Minute)
+ almostTwoWeeksAgo := time.Now().Add(stats.StaleObjectsGracePeriod).Add(time.Minute)
for i := 0; i < looseObjectLimit+1; i++ {
blobPath := filepath.Join(repoPath, "objects", "17", fmt.Sprintf("%d", i))
@@ -309,7 +309,7 @@ gitaly_housekeeping_tasks_total{housekeeping_task="total", status="success"} 1
// broken, and thus we'll retry to prune them afterwards.
require.NoError(t, os.MkdirAll(filepath.Join(repoPath, "objects", "17"), 0o755))
- moreThanTwoWeeksAgo := time.Now().Add(CutOffTime).Add(-time.Minute)
+ moreThanTwoWeeksAgo := time.Now().Add(stats.StaleObjectsGracePeriod).Add(-time.Minute)
for i := 0; i < looseObjectLimit+1; i++ {
blobPath := filepath.Join(repoPath, "objects", "17", fmt.Sprintf("%d", i))
diff --git a/internal/git/stats/objects_info.go b/internal/git/stats/objects_info.go
index 471746e76..f0822139a 100644
--- a/internal/git/stats/objects_info.go
+++ b/internal/git/stats/objects_info.go
@@ -1,22 +1,24 @@
package stats
import (
- "bufio"
"context"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
- "strconv"
"strings"
"time"
"github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
- "gitlab.com/gitlab-org/gitaly/v15/internal/git"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/helper/text"
)
+// StaleObjectsGracePeriod is time delta that is used to indicate cutoff wherein an object would be
+// considered old. Currently this is set to being 2 weeks (2 * 7days * 24hours).
+const StaleObjectsGracePeriod = -14 * 24 * time.Hour
+
// HasBitmap returns whether or not the repository contains an object bitmap.
func HasBitmap(repoPath string) (bool, error) {
bitmaps, err := filepath.Glob(filepath.Join(repoPath, "objects", "pack", "*.bitmap"))
@@ -61,7 +63,7 @@ func LooseObjects(ctx context.Context, repo *localrepo.Repo) (uint64, error) {
return 0, err
}
- return objectsInfo.LooseObjects, nil
+ return objectsInfo.LooseObjects.Count, nil
}
// LogObjectsInfo read statistics of the git repo objects
@@ -79,32 +81,12 @@ func LogObjectsInfo(ctx context.Context, repo *localrepo.Repo) {
// ObjectsInfo contains information on the object database.
type ObjectsInfo struct {
- // LooseObjects is the count of loose objects.
- LooseObjects uint64 `json:"loose_objects"`
- // LooseObjectsSize is the accumulated on-disk size of all loose objects in KiB.
- LooseObjectsSize uint64 `json:"loose_objects_size"`
- // PrunableObjects is the number of objects that exist both as loose and as packed objects.
- // The loose objects may be pruned in that case.
- PruneableObjects uint64 `json:"prunable_objects"`
-
- // PackedObjects is the count of packed objects.
- PackedObjects uint64 `json:"packed_objects"`
- // Packfiles is the number of packfiles.
- Packfiles uint64 `json:"packfiles"`
- // PackfilesSize is the accumulated on-disk size of all packfiles in KiB.
- PackfilesSize uint64 `json:"packfiles_size"`
- // PackfileBitmapExists indicates whether the repository has a bitmap for one of its
- // packfiles.
- PackfileBitmapExists bool `json:"packfile_bitmap_exists"`
-
+ // LooseObjects contains information about loose objects.
+ LooseObjects LooseObjectsInfo `json:"loose_objects"`
+ // Packfiles contains information about packfiles.
+ Packfiles PackfilesInfo `json:"packfiles"`
// CommitGraph contains information about the repository's commit-graphs.
CommitGraph CommitGraphInfo `json:"commit_graph"`
-
- // Garbage is the count of files in the object database that are neither a valid loose
- // object nor a valid packfile.
- Garbage uint64 `json:"garbage"`
- // GarbageSize is the accumulated on-disk size of garbage files.
- GarbageSize uint64 `json:"garbage_size"`
// Alternates is the list of absolute paths of alternate object databases this repository is
// connected to.
Alternates []string `json:"alternates"`
@@ -112,74 +94,22 @@ type ObjectsInfo struct {
// ObjectsInfoForRepository computes the ObjectsInfo for a repository.
func ObjectsInfoForRepository(ctx context.Context, repo *localrepo.Repo) (ObjectsInfo, error) {
+ var info ObjectsInfo
+ var err error
+
repoPath, err := repo.Path()
if err != nil {
return ObjectsInfo{}, err
}
- countObjects, err := repo.Exec(ctx, git.SubCmd{
- Name: "count-objects",
- Flags: []git.Option{git.Flag{Name: "--verbose"}},
- })
+ info.LooseObjects, err = LooseObjectsInfoForRepository(repo, time.Now().Add(StaleObjectsGracePeriod))
if err != nil {
- return ObjectsInfo{}, fmt.Errorf("running git-count-objects: %w", err)
- }
-
- var info ObjectsInfo
-
- // The expected format is:
- //
- // count: 12
- // packs: 2
- // size-garbage: 934
- // alternate: /some/path/to/.git/objects
- // alternate: "/some/other path/to/.git/objects"
- scanner := bufio.NewScanner(countObjects)
- for scanner.Scan() {
- line := scanner.Text()
-
- parts := strings.SplitN(line, ": ", 2)
- if len(parts) != 2 {
- continue
- }
-
- var err error
- switch parts[0] {
- case "count":
- info.LooseObjects, err = strconv.ParseUint(parts[1], 10, 64)
- case "size":
- info.LooseObjectsSize, err = strconv.ParseUint(parts[1], 10, 64)
- case "in-pack":
- info.PackedObjects, err = strconv.ParseUint(parts[1], 10, 64)
- case "packs":
- info.Packfiles, err = strconv.ParseUint(parts[1], 10, 64)
- case "size-pack":
- info.PackfilesSize, err = strconv.ParseUint(parts[1], 10, 64)
- case "prune-packable":
- info.PruneableObjects, err = strconv.ParseUint(parts[1], 10, 64)
- case "garbage":
- info.Garbage, err = strconv.ParseUint(parts[1], 10, 64)
- case "size-garbage":
- info.GarbageSize, err = strconv.ParseUint(parts[1], 10, 64)
- case "alternate":
- info.Alternates = append(info.Alternates, strings.Trim(parts[1], "\" \t\n"))
- }
-
- if err != nil {
- return ObjectsInfo{}, fmt.Errorf("parsing %q: %w", parts[0], err)
- }
- }
-
- if err := scanner.Err(); err != nil {
- return ObjectsInfo{}, fmt.Errorf("scanning object info: %w", err)
- }
-
- if err := countObjects.Wait(); err != nil {
- return ObjectsInfo{}, fmt.Errorf("counting objects: %w", err)
+ return ObjectsInfo{}, fmt.Errorf("counting loose objects: %w", err)
}
- if info.PackfileBitmapExists, err = HasBitmap(repoPath); err != nil {
- return ObjectsInfo{}, fmt.Errorf("checking for bitmap: %w", err)
+ info.Packfiles, err = PackfilesInfoForRepository(repo)
+ if err != nil {
+ return ObjectsInfo{}, fmt.Errorf("counting packfiles: %w", err)
}
info.CommitGraph, err = CommitGraphInfoForRepository(repoPath)
@@ -187,6 +117,11 @@ func ObjectsInfoForRepository(ctx context.Context, repo *localrepo.Repo) (Object
return ObjectsInfo{}, fmt.Errorf("checking commit-graph info: %w", err)
}
+ info.Alternates, err = readAlternates(repo)
+ if err != nil {
+ return ObjectsInfo{}, fmt.Errorf("reading alterantes: %w", err)
+ }
+
return info, nil
}
@@ -278,6 +213,8 @@ type PackfilesInfo struct {
GarbageCount uint64 `json:"garbage_count"`
// GarbageSize is the total size of all garbage files in bytes.
GarbageSize uint64 `json:"garbage_size"`
+ // HasBitmap indicates whether the packfiles have a bitmap.
+ HasBitmap bool `json:"has_bitmap"`
}
// PackfilesInfoForRepository derives various information about packfiles for the given repository.
@@ -330,5 +267,37 @@ func PackfilesInfoForRepository(repo *localrepo.Repo) (PackfilesInfo, error) {
}
}
+ if info.HasBitmap, err = HasBitmap(repoPath); err != nil {
+ return PackfilesInfo{}, fmt.Errorf("checking for bitmap: %w", err)
+ }
+
return info, nil
}
+
+func readAlternates(repo *localrepo.Repo) ([]string, error) {
+ repoPath, err := repo.Path()
+ if err != nil {
+ return nil, fmt.Errorf("getting repository path: %w", err)
+ }
+
+ contents, err := os.ReadFile(filepath.Join(repoPath, "objects", "info", "alternates"))
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ return nil, nil
+ }
+
+ return nil, fmt.Errorf("reading alternates: %w", err)
+ }
+
+ relativeAlternatePaths := strings.Split(text.ChompBytes(contents), "\n")
+ alternatePaths := make([]string, 0, len(relativeAlternatePaths))
+ for _, relativeAlternatePath := range relativeAlternatePaths {
+ if filepath.IsAbs(relativeAlternatePath) {
+ alternatePaths = append(alternatePaths, relativeAlternatePath)
+ } else {
+ alternatePaths = append(alternatePaths, filepath.Join(repoPath, "objects", relativeAlternatePath))
+ }
+ }
+
+ return alternatePaths, nil
+}
diff --git a/internal/git/stats/objects_info_test.go b/internal/git/stats/objects_info_test.go
index 20c67addf..d62195b5e 100644
--- a/internal/git/stats/objects_info_test.go
+++ b/internal/git/stats/objects_info_test.go
@@ -73,7 +73,7 @@ func TestRepositoryProfile(t *testing.T) {
looseObjects, err = LooseObjects(ctx, repo)
require.NoError(t, err)
- require.Equal(t, uint64(2), looseObjects)
+ require.EqualValues(t, 2, looseObjects)
}
func TestLogObjectInfo(t *testing.T) {
@@ -150,8 +150,10 @@ func TestLogObjectInfo(t *testing.T) {
objectsInfo := requireObjectsInfo(hook.AllEntries())
require.Equal(t, ObjectsInfo{
- LooseObjects: 2,
- LooseObjectsSize: 8,
+ LooseObjects: LooseObjectsInfo{
+ Count: 2,
+ Size: hashDependentSize(142, 158),
+ },
}, objectsInfo)
})
}
@@ -184,8 +186,10 @@ func TestObjectsInfoForRepository(t *testing.T) {
gittest.WriteBlob(t, cfg, repoPath, []byte("x"))
},
expectedObjectsInfo: ObjectsInfo{
- LooseObjects: 1,
- LooseObjectsSize: 4,
+ LooseObjects: LooseObjectsInfo{
+ Count: 1,
+ Size: 16,
+ },
},
},
{
@@ -197,10 +201,11 @@ func TestObjectsInfoForRepository(t *testing.T) {
gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad")
},
expectedObjectsInfo: ObjectsInfo{
- PackedObjects: 1,
- Packfiles: 1,
- PackfilesSize: 1,
- PackfileBitmapExists: true,
+ Packfiles: PackfilesInfo{
+ Count: 1,
+ Size: hashDependentSize(42, 54),
+ HasBitmap: true,
+ },
},
},
{
@@ -213,13 +218,15 @@ func TestObjectsInfoForRepository(t *testing.T) {
gittest.Exec(t, cfg, "-C", repoPath, "repack", "-a")
},
expectedObjectsInfo: ObjectsInfo{
- LooseObjects: 1,
- LooseObjectsSize: 4,
- PackedObjects: 1,
- Packfiles: 1,
- PackfilesSize: 1,
- PackfileBitmapExists: true,
- PruneableObjects: 1,
+ LooseObjects: LooseObjectsInfo{
+ Count: 1,
+ Size: 16,
+ },
+ Packfiles: PackfilesInfo{
+ Count: 1,
+ Size: hashDependentSize(42, 54),
+ HasBitmap: true,
+ },
},
},
{
@@ -229,10 +236,10 @@ func TestObjectsInfoForRepository(t *testing.T) {
require.NoError(t, os.WriteFile(garbagePath, []byte("x"), 0o600))
},
expectedObjectsInfo: ObjectsInfo{
- Garbage: 1,
- // git-count-objects(1) somehow does not count this file's size,
- // which I've verified manually.
- GarbageSize: 0,
+ Packfiles: PackfilesInfo{
+ GarbageCount: 1,
+ GarbageSize: 1,
+ },
},
},
{
@@ -254,8 +261,10 @@ func TestObjectsInfoForRepository(t *testing.T) {
gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--reachable")
},
expectedObjectsInfo: ObjectsInfo{
- LooseObjects: 2,
- LooseObjectsSize: 8,
+ LooseObjects: LooseObjectsInfo{
+ Count: 2,
+ Size: hashDependentSize(142, 158),
+ },
CommitGraph: CommitGraphInfo{
Exists: true,
},
@@ -268,8 +277,10 @@ func TestObjectsInfoForRepository(t *testing.T) {
gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write", "--reachable", "--changed-paths")
},
expectedObjectsInfo: ObjectsInfo{
- LooseObjects: 2,
- LooseObjectsSize: 8,
+ LooseObjects: LooseObjectsInfo{
+ Count: 2,
+ Size: hashDependentSize(142, 158),
+ },
CommitGraph: CommitGraphInfo{
Exists: true,
HasBloomFilters: true,
@@ -299,13 +310,17 @@ func TestObjectsInfoForRepository(t *testing.T) {
}
},
expectedObjectsInfo: ObjectsInfo{
- LooseObjects: 2,
- LooseObjectsSize: 8,
- PackedObjects: 1,
- Packfiles: 1,
- PackfilesSize: 1,
- PackfileBitmapExists: true,
- Garbage: 3,
+ LooseObjects: LooseObjectsInfo{
+ Count: 2,
+ Size: 32,
+ },
+ Packfiles: PackfilesInfo{
+ Count: 1,
+ Size: hashDependentSize(42, 54),
+ GarbageCount: 3,
+ GarbageSize: 3,
+ HasBitmap: true,
+ },
Alternates: []string{
alternatePath,
},
@@ -620,3 +635,10 @@ func TestPackfileInfoForRepository(t *testing.T) {
})
})
}
+
+func hashDependentSize(sha1, sha256 uint64) uint64 {
+ if gittest.DefaultObjectHash.Format == "sha1" {
+ return sha1
+ }
+ return sha256
+}