diff options
author | Pavlo Strokov <pstrokov@gitlab.com> | 2020-02-06 19:41:42 +0300 |
---|---|---|
committer | Pavlo Strokov <pstrokov@gitlab.com> | 2020-02-06 19:41:42 +0300 |
commit | 6970aa09c6f4ffae7eba4f53df2afbc0a5eca4ac (patch) | |
tree | 96d318a2e2fbb2477cca81835a18e75d240b8920 | |
parent | cf6c950c53214f8331d8585bcb4af0e8fc9fe215 (diff) |
Log repo pack / ref statistics after repack
To get more visibility on the state of the repositories
after each repack we log statistics about objects.
The source of it is standard 'git count-objects' command.
It could be found by 'git repo statistic' message under
'count-objects' key in the logs.
Closes: https://gitlab.com/gitlab-org/gitaly/issues/2059
-rw-r--r-- | changelogs/unreleased/ps-log-pack-stats.yml | 5 | ||||
-rw-r--r-- | internal/service/objectpool/fetch_into_object_pool.go | 3 | ||||
-rw-r--r-- | internal/service/objectpool/fetch_into_object_pool_test.go | 57 | ||||
-rw-r--r-- | internal/service/repository/gc.go | 6 | ||||
-rw-r--r-- | internal/service/repository/gc_test.go | 33 | ||||
-rw-r--r-- | internal/service/repository/repack.go | 3 | ||||
-rw-r--r-- | internal/service/repository/repack_test.go | 82 | ||||
-rw-r--r-- | internal/stats/git.go | 112 | ||||
-rw-r--r-- | internal/stats/git_test.go | 78 | ||||
-rw-r--r-- | internal/testhelper/test_hook.go | 7 |
10 files changed, 383 insertions, 3 deletions
diff --git a/changelogs/unreleased/ps-log-pack-stats.yml b/changelogs/unreleased/ps-log-pack-stats.yml new file mode 100644 index 000000000..1284573e3 --- /dev/null +++ b/changelogs/unreleased/ps-log-pack-stats.yml @@ -0,0 +1,5 @@ +--- +title: Logging of repository objects statistics after repack +merge_request: 1800 +author: +type: added diff --git a/internal/service/objectpool/fetch_into_object_pool.go b/internal/service/objectpool/fetch_into_object_pool.go index 2e50e81f7..c4e84929a 100644 --- a/internal/service/objectpool/fetch_into_object_pool.go +++ b/internal/service/objectpool/fetch_into_object_pool.go @@ -7,6 +7,7 @@ import ( "gitlab.com/gitlab-org/gitaly/internal/git/objectpool" "gitlab.com/gitlab-org/gitaly/internal/helper" + "gitlab.com/gitlab-org/gitaly/internal/stats" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" ) @@ -24,6 +25,8 @@ func (s *server) FetchIntoObjectPool(ctx context.Context, req *gitalypb.FetchInt return nil, helper.ErrInternal(err) } + stats.LogObjectsInfo(ctx, req.ObjectPool.Repository) + return &gitalypb.FetchIntoObjectPoolResponse{}, nil } diff --git a/internal/service/objectpool/fetch_into_object_pool_test.go b/internal/service/objectpool/fetch_into_object_pool_test.go index b3ae1c97e..c2e409e6b 100644 --- a/internal/service/objectpool/fetch_into_object_pool_test.go +++ b/internal/service/objectpool/fetch_into_object_pool_test.go @@ -1,15 +1,21 @@ package objectpool import ( + "bytes" + "encoding/json" "path" "path/filepath" + "strings" "testing" + "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" + "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/internal/git/objectpool" "gitlab.com/gitlab-org/gitaly/internal/testhelper" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" + "gitlab.com/gitlab-org/labkit/log" "google.golang.org/grpc/codes" ) @@ -58,6 +64,57 @@ func TestFetchIntoObjectPool_Success(t *testing.T) { require.True(t, pool.IsValid(), "ensure that pool is valid") } +func TestFetchIntoObjectPool_CollectLogStatistics(t *testing.T) { + defer func(tl func(tb testing.TB) *logrus.Logger) { + testhelper.NewTestLogger = tl + }(testhelper.NewTestLogger) + + logBuffer := &bytes.Buffer{} + testhelper.NewTestLogger = func(tb testing.TB) *logrus.Logger { + return &logrus.Logger{Out: logBuffer, Formatter: new(logrus.JSONFormatter), Level: logrus.InfoLevel} + } + + ctx, cancel := testhelper.Context() + defer cancel() + ctx = ctxlogrus.ToContext(ctx, log.WithField("test", "logging")) + + server, serverSocketPath := runObjectPoolServer(t) + defer server.Stop() + + client, conn := newObjectPoolClient(t, serverSocketPath) + defer conn.Close() + + testRepo, _, cleanupFn := testhelper.NewTestRepo(t) + defer cleanupFn() + + pool, err := objectpool.NewObjectPool("default", testhelper.NewTestObjectPoolName(t)) + require.NoError(t, err) + defer pool.Remove(ctx) + + req := &gitalypb.FetchIntoObjectPoolRequest{ + ObjectPool: pool.ToProto(), + Origin: testRepo, + Repack: true, + } + + _, err = client.FetchIntoObjectPool(ctx, req) + require.NoError(t, err) + + msgs := strings.Split(logBuffer.String(), "\n") + const key = "count_objects" + for _, msg := range msgs { + if strings.Contains(msg, key) { + var out map[string]interface{} + require.NoError(t, json.NewDecoder(strings.NewReader(msg)).Decode(&out)) + require.Contains(t, out, key, "there is no any information about statistics") + countObjects := out[key].(map[string]interface{}) + assert.Contains(t, countObjects, "count") + return + } + } + require.FailNow(t, "no info about statistics") +} + func TestFetchIntoObjectPool_Failure(t *testing.T) { server, serverSocketPath := runObjectPoolServer(t) defer server.Stop() diff --git a/internal/service/repository/gc.go b/internal/service/repository/gc.go index 7add2b529..5ed3b3116 100644 --- a/internal/service/repository/gc.go +++ b/internal/service/repository/gc.go @@ -14,6 +14,7 @@ import ( "gitlab.com/gitlab-org/gitaly/internal/git/catfile" "gitlab.com/gitlab-org/gitaly/internal/helper" "gitlab.com/gitlab-org/gitaly/internal/helper/housekeeping" + "gitlab.com/gitlab-org/gitaly/internal/stats" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" "google.golang.org/grpc/status" ) @@ -24,7 +25,8 @@ func (server) GarbageCollect(ctx context.Context, in *gitalypb.GarbageCollectReq "WriteBitmaps": in.GetCreateBitmap(), }).Debug("GarbageCollect") - repoPath, err := helper.GetRepoPath(in.GetRepository()) + repo := in.GetRepository() + repoPath, err := helper.GetRepoPath(repo) if err != nil { return nil, err } @@ -51,6 +53,8 @@ func (server) GarbageCollect(ctx context.Context, in *gitalypb.GarbageCollectReq ctxlogger.WithError(err).Warn("Post gc housekeeping failed") } + stats.LogObjectsInfo(ctx, repo) + return &gitalypb.GarbageCollectResponse{}, nil } diff --git a/internal/service/repository/gc_test.go b/internal/service/repository/gc_test.go index b5bdb0c6f..9c50e8e84 100644 --- a/internal/service/repository/gc_test.go +++ b/internal/service/repository/gc_test.go @@ -1,6 +1,7 @@ package repository import ( + "bytes" "context" "fmt" "io/ioutil" @@ -10,12 +11,15 @@ import ( "testing" "time" + "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" + "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/internal/helper/text" "gitlab.com/gitlab-org/gitaly/internal/testhelper" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" + "gitlab.com/gitlab-org/labkit/log" "google.golang.org/grpc/codes" ) @@ -119,6 +123,35 @@ func TestGarbageCollectSuccess(t *testing.T) { } } +func TestGarbageCollectLogStatistics(t *testing.T) { + defer func(tl func(tb testing.TB) *logrus.Logger) { + testhelper.NewTestLogger = tl + }(testhelper.NewTestLogger) + + logBuffer := &bytes.Buffer{} + testhelper.NewTestLogger = func(tb testing.TB) *logrus.Logger { + return &logrus.Logger{Out: logBuffer, Formatter: new(logrus.JSONFormatter), Level: logrus.InfoLevel} + } + + ctx, cancel := testhelper.Context() + defer cancel() + ctx = ctxlogrus.ToContext(ctx, log.WithField("test", "logging")) + + server, serverSocketPath := runRepoServer(t) + defer server.Stop() + + client, conn := newRepositoryClient(t, serverSocketPath) + defer conn.Close() + + testRepo, _, cleanupFn := testhelper.NewTestRepo(t) + defer cleanupFn() + + _, err := client.GarbageCollect(ctx, &gitalypb.GarbageCollectRequest{Repository: testRepo}) + require.NoError(t, err) + + mustCountObjectLog(t, logBuffer.String()) +} + func TestGarbageCollectDeletesRefsLocks(t *testing.T) { server, serverSocketPath := runRepoServer(t) defer server.Stop() diff --git a/internal/service/repository/repack.go b/internal/service/repository/repack.go index ca23c039e..23897af74 100644 --- a/internal/service/repository/repack.go +++ b/internal/service/repository/repack.go @@ -9,6 +9,7 @@ import ( "gitlab.com/gitlab-org/gitaly/internal/git/objectpool" "gitlab.com/gitlab-org/gitaly/internal/git/repository" "gitlab.com/gitlab-org/gitaly/internal/metadata/featureflag" + "gitlab.com/gitlab-org/gitaly/internal/stats" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -66,6 +67,8 @@ func repackCommand(ctx context.Context, repo repository.GitRepo, bitmap bool, ar return status.Errorf(codes.Internal, err.Error()) } + stats.LogObjectsInfo(ctx, repo) + return nil } diff --git a/internal/service/repository/repack_test.go b/internal/service/repository/repack_test.go index a05bb756d..166b963a4 100644 --- a/internal/service/repository/repack_test.go +++ b/internal/service/repository/repack_test.go @@ -1,18 +1,24 @@ package repository import ( + "bytes" "context" + "encoding/json" "os/exec" "path" "path/filepath" + "strings" "testing" "time" + "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" + "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/internal/testhelper" "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" + "gitlab.com/gitlab-org/labkit/log" "google.golang.org/grpc/codes" ) @@ -43,6 +49,35 @@ func TestRepackIncrementalSuccess(t *testing.T) { assertModTimeAfter(t, testTime, packPath) } +func TestRepackIncrementalCollectLogStatistics(t *testing.T) { + defer func(tl func(tb testing.TB) *logrus.Logger) { + testhelper.NewTestLogger = tl + }(testhelper.NewTestLogger) + + logBuffer := &bytes.Buffer{} + testhelper.NewTestLogger = func(tb testing.TB) *logrus.Logger { + return &logrus.Logger{Out: logBuffer, Formatter: new(logrus.JSONFormatter), Level: logrus.InfoLevel} + } + + ctx, cancel := testhelper.Context() + defer cancel() + ctx = ctxlogrus.ToContext(ctx, log.WithField("test", "logging")) + + server, serverSocketPath := runRepoServer(t) + defer server.Stop() + + client, conn := newRepositoryClient(t, serverSocketPath) + defer conn.Close() + + testRepo, _, cleanupFn := testhelper.NewTestRepo(t) + defer cleanupFn() + + _, err := client.RepackIncremental(ctx, &gitalypb.RepackIncrementalRequest{Repository: testRepo}) + assert.NoError(t, err) + + mustCountObjectLog(t, logBuffer.String()) +} + func TestRepackLocal(t *testing.T) { server, serverSocketPath := runRepoServer(t) defer server.Stop() @@ -163,6 +198,53 @@ func TestRepackFullSuccess(t *testing.T) { } } +func TestRepackFullCollectLogStatistics(t *testing.T) { + defer func(tl func(tb testing.TB) *logrus.Logger) { + testhelper.NewTestLogger = tl + }(testhelper.NewTestLogger) + + logBuffer := &bytes.Buffer{} + testhelper.NewTestLogger = func(tb testing.TB) *logrus.Logger { + return &logrus.Logger{Out: logBuffer, Formatter: new(logrus.JSONFormatter), Level: logrus.InfoLevel} + } + + ctx, cancel := testhelper.Context() + defer cancel() + ctx = ctxlogrus.ToContext(ctx, log.WithField("test", "logging")) + + server, serverSocketPath := runRepoServer(t) + defer server.Stop() + + client, conn := newRepositoryClient(t, serverSocketPath) + defer conn.Close() + + testRepo, _, cleanupFn := testhelper.NewTestRepo(t) + defer cleanupFn() + + _, err := client.RepackFull(ctx, &gitalypb.RepackFullRequest{Repository: testRepo}) + require.NoError(t, err) + + mustCountObjectLog(t, logBuffer.String()) +} + +func mustCountObjectLog(t testing.TB, logData string) { + msgs := strings.Split(logData, "\n") + const key = "count_objects" + for _, msg := range msgs { + if strings.Contains(msg, key) { + var out map[string]interface{} + require.NoError(t, json.NewDecoder(strings.NewReader(msg)).Decode(&out)) + require.Contains(t, out, "grpc.request.glProjectPath") + require.Contains(t, out, "grpc.request.glRepository") + require.Contains(t, out, key, "there is no any information about statistics") + countObjects := out[key].(map[string]interface{}) + require.Contains(t, countObjects, "count") + return + } + } + require.FailNow(t, "no info about statistics") +} + func doBitmapsContainHashCache(t *testing.T, bitmapPaths []string) { // for each bitmap file, check the 2-byte flag as documented in // https://github.com/git/git/blob/master/Documentation/technical/bitmap-format.txt diff --git a/internal/stats/git.go b/internal/stats/git.go new file mode 100644 index 000000000..cc8abe59b --- /dev/null +++ b/internal/stats/git.go @@ -0,0 +1,112 @@ +package stats + +import ( + "bufio" + "context" + "io" + "strconv" + "strings" + + "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" + "gitlab.com/gitlab-org/gitaly/internal/git" + "gitlab.com/gitlab-org/gitaly/internal/git/repository" +) + +// LogObjectsInfo read statistics of the git repo objects +// and logs it under 'count-objects' key as structured entry. +func LogObjectsInfo(ctx context.Context, repo repository.GitRepo) { + logger := ctxlogrus.Extract(ctx) + + cmd, err := git.SafeCmd(ctx, repo, nil, git.SubCmd{ + Name: "count-objects", + Flags: []git.Option{git.Flag{Name: "--verbose"}}, + }) + + if err != nil { + logger.WithError(err).Warn("failed on bootstrapping to gather object statistic") + return + } + + stats, err := readObjectInfoStatistic(cmd) + if err != nil { + logger.WithError(err).Warn("failed on reading to gather object statistic") + } + + if err := cmd.Wait(); err != nil { + logger.WithError(err).Warn("failed on waiting to gather object statistic") + return + } + + if len(stats) > 0 { + logger.WithField("count_objects", stats).Info("git repo statistic") + } +} + +/* readObjectInfoStatistic parses output of 'git count-objects -v' command and represents it as dictionary +current supported format is: + count: 12 + packs: 2 + size-garbage: 934 + alternate: /some/path/to/.git/objects + alternate: "/some/other path/to/.git/objects" +will result in: + { + "count": 12, + "packs": 2, + "size-garbage": 934, + "alternate": ["/some/path/to/.git/objects", "/some/other path/to/.git/objects"] + } +*/ +func readObjectInfoStatistic(reader io.Reader) (map[string]interface{}, error) { + stats := map[string]interface{}{} + + scanner := bufio.NewScanner(reader) + for scanner.Scan() { + line := scanner.Text() + parts := strings.SplitN(line, ": ", 2) + if len(parts) != 2 { + continue + } + + // one of: count, size, in-pack, packs, size-pack, prune-packable, garbage, size-garbage, alternate (repeatable) + key := parts[0] + rawVal := strings.TrimPrefix(parts[1], ": ") + + switch key { + case "alternate": + addMultiString(stats, key, rawVal) + default: + addInt(stats, key, rawVal) + } + } + + return stats, scanner.Err() +} + +func addMultiString(stats map[string]interface{}, key, rawVal string) { + val := strings.Trim(rawVal, "\" \t\n") + + statVal, found := stats[key] + if !found { + stats[key] = val + return + } + + statAggr, ok := statVal.([]string) // 'alternate' is only repeatable key and it is a string type + if ok { + statAggr = append(statAggr, val) + } else { + delete(stats, key) // remove single string value of 'alternate' to replace it with slice + statAggr = []string{statVal.(string), val} + } + stats[key] = statAggr +} + +func addInt(stats map[string]interface{}, key, rawVal string) { + val, err := strconv.ParseInt(rawVal, 10, 64) + if err != nil { + return + } + + stats[key] = val +} diff --git a/internal/stats/git_test.go b/internal/stats/git_test.go new file mode 100644 index 000000000..0de62d3c6 --- /dev/null +++ b/internal/stats/git_test.go @@ -0,0 +1,78 @@ +package stats + +import ( + "bytes" + "encoding/json" + "io/ioutil" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/internal/helper" + "gitlab.com/gitlab-org/gitaly/internal/testhelper" + "gitlab.com/gitlab-org/gitaly/proto/go/gitalypb" +) + +func TestLogObjectInfo(t *testing.T) { + repo1, repoPath1, cleanup1 := testhelper.NewTestRepo(t) + defer cleanup1() + + repo2, repoPath2, cleanup2 := testhelper.NewTestRepo(t) + defer cleanup2() + + ctx, cancel := testhelper.Context() + defer cancel() + + logBuffer := &bytes.Buffer{} + log := &logrus.Logger{Out: logBuffer, Formatter: new(logrus.JSONFormatter), Level: logrus.InfoLevel} + testCtx := ctxlogrus.ToContext(ctx, log.WithField("test", "logging")) + + requireLog := func(msg string) map[string]interface{} { + var out map[string]interface{} + require.NoError(t, json.NewDecoder(strings.NewReader(msg)).Decode(&out)) + const key = "count_objects" + require.Contains(t, out, key, "there is no any information about statistics") + countObjects := out[key].(map[string]interface{}) + require.Contains(t, countObjects, "count") + require.Contains(t, countObjects, "size") + require.Contains(t, countObjects, "in-pack") + require.Contains(t, countObjects, "packs") + require.Contains(t, countObjects, "size-pack") + require.Contains(t, countObjects, "garbage") + require.Contains(t, countObjects, "size-garbage") + return countObjects + } + + t.Run("shared repo with multiple alternates", func(t *testing.T) { + storagePath, err := helper.GetStorageByName(repo1.StorageName) + require.NoError(t, err) + + tmpDir, err := ioutil.TempDir(storagePath, "") + require.NoError(t, err) + defer func() { require.NoError(t, os.RemoveAll(tmpDir)) }() + + // clone existing local repo with two alternates + testhelper.MustRunCommand(t, nil, "git", "clone", "--shared", repoPath1, "--reference", repoPath1, "--reference", repoPath2, tmpDir) + + logBuffer.Reset() + LogObjectsInfo(testCtx, &gitalypb.Repository{ + StorageName: repo1.StorageName, + RelativePath: filepath.Join(strings.TrimPrefix(tmpDir, storagePath), ".git"), + }) + + countObjects := requireLog(logBuffer.String()) + require.ElementsMatch(t, []string{repoPath1 + "/objects", repoPath2 + "/objects"}, countObjects["alternate"]) + }) + + t.Run("repo without alternates", func(t *testing.T) { + logBuffer.Reset() + LogObjectsInfo(testCtx, repo2) + + countObjects := requireLog(logBuffer.String()) + require.Contains(t, countObjects, "prune-packable") + }) +} diff --git a/internal/testhelper/test_hook.go b/internal/testhelper/test_hook.go index 448dd2c5b..7da7baffe 100644 --- a/internal/testhelper/test_hook.go +++ b/internal/testhelper/test_hook.go @@ -7,8 +7,11 @@ import ( log "github.com/sirupsen/logrus" ) -// NewTestLogger created a logrus hook -func NewTestLogger(tb testing.TB) *log.Logger { +// NewTestLogger creates logger that should be used in the tests. +var NewTestLogger = DiscardTestLogger + +// DiscardTestLogger created a logrus hook that discards everything. +func DiscardTestLogger(tb testing.TB) *log.Logger { logger := log.New() logger.Out = ioutil.Discard |