diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-11-25 16:04:53 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-11-30 09:59:56 +0300 |
commit | ce7419fcd3a5bf56d4ff702fb1ddf59bd23c36f7 (patch) | |
tree | 37bbd1688e8bcd578665163fc762c98e8f4ada75 | |
parent | 5da0d1ed70c82d4ee932336aae20d82df9e6a5f1 (diff) |
git/stats: Report garbage in packfile directory
Start reporting garbage files in packfile directories. This helps us to
abandon the external git-count-objects(1) process.
-rw-r--r-- | internal/git/stats/objects_info.go | 25 | ||||
-rw-r--r-- | internal/git/stats/objects_info_test.go | 6 |
2 files changed, 25 insertions, 6 deletions
diff --git a/internal/git/stats/objects_info.go b/internal/git/stats/objects_info.go index 22f4bccb5..13b8d838c 100644 --- a/internal/git/stats/objects_info.go +++ b/internal/git/stats/objects_info.go @@ -173,6 +173,10 @@ type PackfilesInfo struct { Count uint64 `json:"count"` // Size is the total size of all loose objects in bytes, including stale ones. Size uint64 `json:"size"` + // GarbageCount is the number of garbage files. + GarbageCount uint64 `json:"garbage_count"` + // GarbageSize is the total size of all garbage files in bytes. + GarbageSize uint64 `json:"garbage_size"` } // PackfilesInfoForRepository derives various information about packfiles for the given repository. @@ -193,10 +197,6 @@ func PackfilesInfoForRepository(repo *localrepo.Repo) (PackfilesInfo, error) { var info PackfilesInfo for _, entry := range entries { - if !strings.HasSuffix(entry.Name(), ".pack") { - continue - } - entryInfo, err := entry.Info() if err != nil { if errors.Is(err, os.ErrNotExist) { @@ -206,6 +206,23 @@ func PackfilesInfoForRepository(repo *localrepo.Repo) (PackfilesInfo, error) { return PackfilesInfo{}, fmt.Errorf("getting packfile info: %w", err) } + // We're overly lenient here and only verify for known prefixes. This would already + // catch things like temporary packfiles, but it wouldn't catch other bogus files. + // This is on purpose though because Git has grown more and more metadata-style file + // formats, and we don't want to copy the list here. + if !strings.HasPrefix(entry.Name(), "pack-") { + info.GarbageCount++ + if entryInfo.Size() > 0 { + info.GarbageSize += uint64(entryInfo.Size()) + } + + continue + } + + if !strings.HasSuffix(entry.Name(), ".pack") { + continue + } + info.Count++ if entryInfo.Size() > 0 { info.Size += uint64(entryInfo.Size()) diff --git a/internal/git/stats/objects_info_test.go b/internal/git/stats/objects_info_test.go index 4ab97d0a7..b70fd683a 100644 --- a/internal/git/stats/objects_info_test.go +++ b/internal/git/stats/objects_info_test.go @@ -363,8 +363,10 @@ func TestPackfileInfoForRepository(t *testing.T) { } requirePackfilesInfo(t, repo, PackfilesInfo{ - Count: 2, - Size: 2, + Count: 2, + Size: 2, + GarbageCount: 1, + GarbageSize: 1, }) }) } |