diff options
author | John Cai <jcai@gitlab.com> | 2020-03-21 03:25:18 +0300 |
---|---|---|
committer | John Cai <jcai@gitlab.com> | 2020-03-25 20:38:45 +0300 |
commit | ba5bab48f0e402ae08c9207a73022fb0908fe01a (patch) | |
tree | 2c48249fe0df6c324d75373abba3a9f375946934 | |
parent | 1d8a0091862afee5776465ed9a018b542c418482 (diff) |
Add repository profile
It is useful to know some basic stats of a repository such as the number
of packfiles, the number of loose objects, the number of loose objects
newer than the newest packfile, whether or not the repository has a
bitmap
-rw-r--r-- | changelogs/unreleased/jc-repo-profile.yml | 5 | ||||
-rw-r--r-- | internal/git/stats/git.go (renamed from internal/stats/git.go) | 0 | ||||
-rw-r--r-- | internal/git/stats/git_test.go (renamed from internal/stats/git_test.go) | 0 | ||||
-rw-r--r-- | internal/git/stats/profile.go | 158 | ||||
-rw-r--r-- | internal/git/stats/profile_test.go | 78 | ||||
-rw-r--r-- | internal/service/objectpool/fetch_into_object_pool.go | 2 | ||||
-rw-r--r-- | internal/service/repository/gc.go | 2 | ||||
-rw-r--r-- | internal/service/repository/repack.go | 2 |
8 files changed, 244 insertions, 3 deletions
diff --git a/changelogs/unreleased/jc-repo-profile.yml b/changelogs/unreleased/jc-repo-profile.yml new file mode 100644 index 000000000..fac484ffb --- /dev/null +++ b/changelogs/unreleased/jc-repo-profile.yml @@ -0,0 +1,5 @@ +--- +title: Add repository profile +merge_request: 1959 +author: +type: added diff --git a/internal/stats/git.go b/internal/git/stats/git.go index cc8abe59b..cc8abe59b 100644 --- a/internal/stats/git.go +++ b/internal/git/stats/git.go diff --git a/internal/stats/git_test.go b/internal/git/stats/git_test.go index 0de62d3c6..0de62d3c6 100644 --- a/internal/stats/git_test.go +++ b/internal/git/stats/git_test.go diff --git a/internal/git/stats/profile.go b/internal/git/stats/profile.go new file mode 100644 index 000000000..9984bd179 --- /dev/null +++ b/internal/git/stats/profile.go @@ -0,0 +1,158 @@ +package stats + +import ( + "context" + "errors" + "os" + "path/filepath" + "strconv" + "time" + + "gitlab.com/gitlab-org/gitaly/internal/git" + "gitlab.com/gitlab-org/gitaly/internal/git/repository" + "gitlab.com/gitlab-org/gitaly/internal/helper" +) + +// RepositoryProfile contains information about a git repository. +type RepositoryProfile struct { + hasBitmap bool + packfiles int64 + unpackedObjects int64 + looseObjects int64 +} + +// HasBitmap returns whether or not the repository contains an object bitmap. +func (r *RepositoryProfile) HasBitmap() bool { + return r.hasBitmap +} + +// Packfiles returns the number of packfiles a repository has. +func (r *RepositoryProfile) Packfiles() int64 { + return r.packfiles +} + +// UnpackedObjects returns the number of loose objects that have a timestamp later than the newest +// packfile. +func (r *RepositoryProfile) UnpackedObjects() int64 { + return r.unpackedObjects +} + +// LooseObjects returns the number of loose objects that are not in a packfile. +func (r *RepositoryProfile) LooseObjects() int64 { + return r.looseObjects +} + +// GetProfile returns a RepositoryProfile given a context and a repository.GitRepo +func GetProfile(ctx context.Context, repo repository.GitRepo) (*RepositoryProfile, error) { + repoPath, err := helper.GetRepoPath(repo) + if err != nil { + return nil, err + } + + cmd, err := git.SafeCmd(ctx, repo, nil, git.SubCmd{Name: "count-objects", Flags: []git.Option{git.Flag{Name: "--verbose"}}}) + if err != nil { + return nil, err + } + + objectStats, err := readObjectInfoStatistic(cmd) + if err != nil { + return nil, err + } + + count, ok := objectStats["count"].(int64) + if !ok { + return nil, errors.New("could not get object count") + } + + packs, ok := objectStats["packs"].(int64) + if !ok { + return nil, errors.New("could not get packfile count") + } + + unpackedObjects, err := getUnpackedObjects(repoPath) + if err != nil { + return nil, err + } + + hasBitmap, err := hasBitmap(repoPath) + if err != nil { + return nil, err + } + + return &RepositoryProfile{ + hasBitmap: hasBitmap, + packfiles: packs, + unpackedObjects: unpackedObjects, + looseObjects: count, + }, nil +} + +func hasBitmap(repoPath string) (bool, error) { + bitmap, err := filepath.Glob(filepath.Join(repoPath, "objects", "pack", "*.bitmap")) + if err != nil { + return false, err + } + + return len(bitmap) > 0, nil +} + +func getUnpackedObjects(repoPath string) (int64, error) { + objectDir := filepath.Join(repoPath, "objects") + + packFiles, err := filepath.Glob(filepath.Join(objectDir, "pack", "*.pack")) + if err != nil { + return 0, err + } + + var newestPackfileModtime time.Time + + for _, packFilePath := range packFiles { + stat, err := os.Stat(packFilePath) + if err != nil { + return 0, err + } + if stat.ModTime().After(newestPackfileModtime) { + newestPackfileModtime = stat.ModTime() + } + } + + var unpackedObjects int64 + if err = filepath.Walk(objectDir, func(path string, info os.FileInfo, err error) error { + if objectDir == path { + return nil + } + + if info.IsDir() { + if err := skipNonObjectDir(objectDir, path); err != nil { + return err + } + } + + if !info.IsDir() && info.ModTime().After(newestPackfileModtime) { + unpackedObjects++ + } + + return nil + }); err != nil { + return 0, err + } + + return unpackedObjects, nil +} + +func skipNonObjectDir(root, path string) error { + rel, err := filepath.Rel(root, path) + if err != nil { + return err + } + + if len(rel) != 2 { + return filepath.SkipDir + } + + if _, err := strconv.ParseUint(rel, 16, 8); err != nil { + return filepath.SkipDir + } + + return nil +} diff --git a/internal/git/stats/profile_test.go b/internal/git/stats/profile_test.go new file mode 100644 index 000000000..76a21094a --- /dev/null +++ b/internal/git/stats/profile_test.go @@ -0,0 +1,78 @@ +package stats + +import ( + "bytes" + "os" + "path/filepath" + "strconv" + "testing" + "time" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/internal/helper/text" + "gitlab.com/gitlab-org/gitaly/internal/testhelper" +) + +func TestRepositoryProfile(t *testing.T) { + testRepo, testRepoPath, cleanup := testhelper.InitBareRepo(t) + defer cleanup() + + ctx, cancel := testhelper.Context() + defer cancel() + + profile, err := GetProfile(ctx, testRepo) + require.NoError(t, err) + + require.False(t, profile.HasBitmap(), "repository should not have a bitmap initially") + require.Zero(t, profile.UnpackedObjects()) + require.Zero(t, profile.Packfiles()) + + blobs := 10 + blobIDs := writeBlobs(t, testRepoPath, blobs) + + profile, err = GetProfile(ctx, testRepo) + require.NoError(t, err) + require.Equal(t, int64(blobs), profile.UnpackedObjects()) + require.Equal(t, int64(blobs), profile.LooseObjects()) + + for _, blobID := range blobIDs { + commitID := testhelper.CommitBlobWithName(t, testRepoPath, blobID, blobID, "adding another blob....") + testhelper.MustRunCommand(t, nil, "git", "-C", testRepoPath, "update-ref", "refs/heads/"+blobID, commitID) + } + + // write a loose object + writeBlobs(t, testRepoPath, 1) + + testhelper.MustRunCommand(t, nil, "git", "-C", testRepoPath, "repack", "-A", "-b", "-d") + + profile, err = GetProfile(ctx, testRepo) + require.NoError(t, err) + require.Zero(t, profile.UnpackedObjects()) + require.Equal(t, int64(1), profile.LooseObjects()) + + // let a ms elapse for the OS to recognize the blobs have been written after the packfile + time.Sleep(1 * time.Millisecond) + + // write another loose object + blobID := writeBlobs(t, testRepoPath, 1)[0] + + // due to OS semantics, ensure that the blob has a timestamp that is after the packfile + theFuture := time.Now().Add(10 * time.Minute) + require.NoError(t, os.Chtimes(filepath.Join(testRepoPath, "objects", blobID[0:2], blobID[2:]), theFuture, theFuture)) + + profile, err = GetProfile(ctx, testRepo) + require.NoError(t, err) + require.Equal(t, int64(1), profile.UnpackedObjects()) + require.Equal(t, int64(2), profile.LooseObjects()) +} + +func writeBlobs(t *testing.T, testRepoPath string, n int) []string { + var blobIDs []string + for i := 0; i < n; i++ { + var stdin bytes.Buffer + stdin.Write([]byte(strconv.Itoa(time.Now().Nanosecond()))) + blobIDs = append(blobIDs, text.ChompBytes(testhelper.MustRunCommand(t, &stdin, "git", "-C", testRepoPath, "hash-object", "-w", "--stdin"))) + } + + return blobIDs +} diff --git a/internal/service/objectpool/fetch_into_object_pool.go b/internal/service/objectpool/fetch_into_object_pool.go index c4e84929a..ad8d80854 100644 --- a/internal/service/objectpool/fetch_into_object_pool.go +++ b/internal/service/objectpool/fetch_into_object_pool.go @@ -6,8 +6,8 @@ import ( "fmt" "gitlab.com/gitlab-org/gitaly/internal/git/objectpool" + "gitlab.com/gitlab-org/gitaly/internal/git/stats" "gitlab.com/gitlab-org/gitaly/internal/helper" - "gitlab.com/gitlab-org/gitaly/internal/stats" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" ) diff --git a/internal/service/repository/gc.go b/internal/service/repository/gc.go index 5ed3b3116..49577a01a 100644 --- a/internal/service/repository/gc.go +++ b/internal/service/repository/gc.go @@ -12,9 +12,9 @@ import ( log "github.com/sirupsen/logrus" "gitlab.com/gitlab-org/gitaly/internal/git" "gitlab.com/gitlab-org/gitaly/internal/git/catfile" + "gitlab.com/gitlab-org/gitaly/internal/git/stats" "gitlab.com/gitlab-org/gitaly/internal/helper" "gitlab.com/gitlab-org/gitaly/internal/helper/housekeeping" - "gitlab.com/gitlab-org/gitaly/internal/stats" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" "google.golang.org/grpc/status" ) diff --git a/internal/service/repository/repack.go b/internal/service/repository/repack.go index a0add6443..4d7e0dc95 100644 --- a/internal/service/repository/repack.go +++ b/internal/service/repository/repack.go @@ -7,7 +7,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "gitlab.com/gitlab-org/gitaly/internal/git" "gitlab.com/gitlab-org/gitaly/internal/git/repository" - "gitlab.com/gitlab-org/gitaly/internal/stats" + "gitlab.com/gitlab-org/gitaly/internal/git/stats" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" |