diff options
author | Toon Claes <toon@gitlab.com> | 2022-11-04 15:28:56 +0300 |
---|---|---|
committer | Toon Claes <toon@gitlab.com> | 2022-11-04 16:18:52 +0300 |
commit | ca607e6f265f39c2ae04aac46a9502a357fb738f (patch) | |
tree | c08996ae69a158258a52928ea83a9b6cd3c21e79 | |
parent | 12468228b52767dd5a66c76d7cbdb422495afc43 (diff) |
linguist: Add file count
Next to the number of bytes per language, also provide the number of
files for each language.
Issue: https://gitlab.com/gitlab-org/gitaly/-/issues/4293
Changelog: added
-rw-r--r-- | internal/gitaly/linguist/language_stats.go | 33 | ||||
-rw-r--r-- | internal/gitaly/linguist/language_stats_test.go | 30 | ||||
-rw-r--r-- | internal/gitaly/linguist/linguist.go | 16 | ||||
-rw-r--r-- | internal/gitaly/linguist/linguist_test.go | 110 | ||||
-rw-r--r-- | internal/gitaly/service/commit/languages.go | 11 | ||||
-rw-r--r-- | internal/gitaly/service/commit/languages_test.go | 32 |
6 files changed, 121 insertions, 111 deletions
diff --git a/internal/gitaly/linguist/language_stats.go b/internal/gitaly/linguist/language_stats.go index 5c6cfcfd8..986b80d92 100644 --- a/internal/gitaly/linguist/language_stats.go +++ b/internal/gitaly/linguist/language_stats.go @@ -19,6 +19,9 @@ const ( languageStatsVersion = "v3:gitaly" ) +// byteCountPerLanguage represents a counter value (bytes) per language. +type byteCountPerLanguage map[string]uint64 + // languageStats takes care of accumulating and caching language statistics for // a repository. type languageStats struct { @@ -31,16 +34,16 @@ type languageStats struct { m *sync.Mutex // Totals contains the total statistics for the CommitID - Totals ByteCountPerLanguage `json:"totals"` + Totals byteCountPerLanguage `json:"totals"` // ByFile contains the statistics for a single file, where the filename // is its key. - ByFile map[string]ByteCountPerLanguage `json:"by_file"` + ByFile map[string]byteCountPerLanguage `json:"by_file"` } func newLanguageStats() languageStats { return languageStats{ - Totals: ByteCountPerLanguage{}, - ByFile: make(map[string]ByteCountPerLanguage), + Totals: byteCountPerLanguage{}, + ByFile: make(map[string]byteCountPerLanguage), m: &sync.Mutex{}, } } @@ -92,7 +95,7 @@ func (c *languageStats) add(filename, language string, size uint64) { } } - c.ByFile[filename] = ByteCountPerLanguage{language: size} + c.ByFile[filename] = byteCountPerLanguage{language: size} if size > 0 { c.Totals[language] += size } @@ -164,3 +167,23 @@ func (c *languageStats) save(repo *localrepo.Repo, commitID string) error { return nil } + +func (c *languageStats) allCounts() CountPerLanguage { + counts := make(CountPerLanguage, len(c.Totals)) + + for _, bc := range c.ByFile { + for l, b := range bc { + if b <= 0 { + continue + } + + cl := counts[l] + cl.ByteCount += b + cl.FileCount++ + + counts[l] = cl + } + } + + return counts +} diff --git a/internal/gitaly/linguist/language_stats_test.go b/internal/gitaly/linguist/language_stats_test.go index 74f66a568..69a2c3b4f 100644 --- a/internal/gitaly/linguist/language_stats_test.go +++ b/internal/gitaly/linguist/language_stats_test.go @@ -4,9 +4,11 @@ import ( "compress/zlib" "encoding/json" "fmt" + "math/rand" "os" "path/filepath" "testing" + "time" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" @@ -43,7 +45,7 @@ func TestInitLanguageStats(t *testing.T) { stats.Totals["C"] = 555 require.NoError(t, stats.save(repo, "badcafe")) - require.Equal(t, ByteCountPerLanguage{"C": 555}, stats.Totals) + require.Equal(t, byteCountPerLanguage{"C": 555}, stats.Totals) }, }, { @@ -113,7 +115,7 @@ func TestLanguageStats_add(t *testing.T) { require.Equal(t, uint64(100), s.Totals["Go"]) require.Len(t, s.ByFile, 1) - require.Equal(t, ByteCountPerLanguage{"Go": 100}, s.ByFile["main.go"]) + require.Equal(t, byteCountPerLanguage{"Go": 100}, s.ByFile["main.go"]) }, }, { @@ -124,8 +126,8 @@ func TestLanguageStats_add(t *testing.T) { require.Equal(t, uint64(180), s.Totals["Go"]) require.Len(t, s.ByFile, 2) - require.Equal(t, ByteCountPerLanguage{"Go": 100}, s.ByFile["main.go"]) - require.Equal(t, ByteCountPerLanguage{"Go": 80}, s.ByFile["main_test.go"]) + require.Equal(t, byteCountPerLanguage{"Go": 100}, s.ByFile["main.go"]) + require.Equal(t, byteCountPerLanguage{"Go": 80}, s.ByFile["main_test.go"]) }, }, { @@ -137,8 +139,8 @@ func TestLanguageStats_add(t *testing.T) { require.Equal(t, uint64(60), s.Totals["Go"]) require.Equal(t, uint64(30), s.Totals["Make"]) require.Len(t, s.ByFile, 2) - require.Equal(t, ByteCountPerLanguage{"Go": 60}, s.ByFile["main.go"]) - require.Equal(t, ByteCountPerLanguage{"Make": 30}, s.ByFile["Makefile"]) + require.Equal(t, byteCountPerLanguage{"Go": 60}, s.ByFile["main.go"]) + require.Equal(t, byteCountPerLanguage{"Make": 30}, s.ByFile["Makefile"]) }, }, { @@ -149,7 +151,7 @@ func TestLanguageStats_add(t *testing.T) { require.Equal(t, uint64(30), s.Totals["Go"]) require.Len(t, s.ByFile, 1) - require.Equal(t, ByteCountPerLanguage{"Go": 30}, s.ByFile["main.go"]) + require.Equal(t, byteCountPerLanguage{"Go": 30}, s.ByFile["main.go"]) }, }, } { @@ -184,7 +186,7 @@ func TestLanguageStats_drop(t *testing.T) { require.Equal(t, uint64(20), s.Totals["Go"]) require.Len(t, s.ByFile, 1) - require.Equal(t, ByteCountPerLanguage{"Go": 20}, s.ByFile["main_test.go"]) + require.Equal(t, byteCountPerLanguage{"Go": 20}, s.ByFile["main_test.go"]) }, }, { @@ -194,8 +196,8 @@ func TestLanguageStats_drop(t *testing.T) { require.Equal(t, uint64(100), s.Totals["Go"]) require.Len(t, s.ByFile, 2) - require.Equal(t, ByteCountPerLanguage{"Go": 80}, s.ByFile["main.go"]) - require.Equal(t, ByteCountPerLanguage{"Go": 20}, s.ByFile["main_test.go"]) + require.Equal(t, byteCountPerLanguage{"Go": 80}, s.ByFile["main.go"]) + require.Equal(t, byteCountPerLanguage{"Go": 20}, s.ByFile["main_test.go"]) }, }, { @@ -213,8 +215,8 @@ func TestLanguageStats_drop(t *testing.T) { require.NoError(t, err) s.Totals["Go"] = 100 - s.ByFile["main.go"] = ByteCountPerLanguage{"Go": 80} - s.ByFile["main_test.go"] = ByteCountPerLanguage{"Go": 20} + s.ByFile["main.go"] = byteCountPerLanguage{"Go": 80} + s.ByFile["main_test.go"] = byteCountPerLanguage{"Go": 20} tc.run(t, s) }) @@ -235,8 +237,8 @@ func TestLanguageStats_save(t *testing.T) { require.NoError(t, err) s.Totals["Go"] = 100 - s.ByFile["main.go"] = ByteCountPerLanguage{"Go": 80} - s.ByFile["main_test.go"] = ByteCountPerLanguage{"Go": 20} + s.ByFile["main.go"] = byteCountPerLanguage{"Go": 80} + s.ByFile["main_test.go"] = byteCountPerLanguage{"Go": 20} err = s.save(repo, "buzz") require.NoError(t, err) diff --git a/internal/gitaly/linguist/linguist.go b/internal/gitaly/linguist/linguist.go index 0d139101a..3356f7742 100644 --- a/internal/gitaly/linguist/linguist.go +++ b/internal/gitaly/linguist/linguist.go @@ -17,8 +17,14 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" ) -// ByteCountPerLanguage represents a counter value (bytes) per language. -type ByteCountPerLanguage map[string]uint64 +// Count holds both the byte and file count for one language. +type Count struct { + ByteCount uint64 + FileCount uint32 +} + +// CountPerLanguage represents a byte and file count per language. +type CountPerLanguage map[string]Count // Instance is a holder of the defined in the system language settings. type Instance struct { @@ -48,13 +54,13 @@ func Color(language string) string { } // Stats returns the repository's language stats as reported by 'git-linguist'. -func (inst *Instance) Stats(ctx context.Context, commitID string) (ByteCountPerLanguage, error) { +func (inst *Instance) Stats(ctx context.Context, commitID string) (CountPerLanguage, error) { stats, err := initLanguageStats(inst.repo) if err != nil { ctxlogrus.Extract(ctx).WithError(err).Info("linguist load from cache") } if stats.CommitID == commitID { - return stats.Totals, nil + return stats.allCounts(), nil } objectReader, cancel, err := inst.catfileCache.ObjectReader(ctx, inst.repo) @@ -180,7 +186,7 @@ func (inst *Instance) Stats(ctx context.Context, commitID string) (ByteCountPerL return nil, fmt.Errorf("linguist language stats save: %w", err) } - return stats.Totals, nil + return stats.allCounts(), nil } func (inst *Instance) needsFullRecalculation(ctx context.Context, cachedID, commitID string) (bool, error) { diff --git a/internal/gitaly/linguist/linguist_test.go b/internal/gitaly/linguist/linguist_test.go index 3a67ce07e..951c00a74 100644 --- a/internal/gitaly/linguist/linguist_test.go +++ b/internal/gitaly/linguist/linguist_test.go @@ -34,7 +34,7 @@ func TestInstance_Stats(t *testing.T) { for _, tc := range []struct { desc string setup func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) - expectedStats ByteCountPerLanguage + expectedStats CountPerLanguage expectedErr string }{ { @@ -53,11 +53,11 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "CoffeeScript": 107, - "HTML": 349, - "JavaScript": 1014, - "Ruby": 2943, + expectedStats: CountPerLanguage{ + "CoffeeScript": {ByteCount: 107, FileCount: 1}, + "HTML": {ByteCount: 349, FileCount: 1}, + "JavaScript": {ByteCount: 1014, FileCount: 1}, + "Ruby": {ByteCount: 2943, FileCount: 1}, }, }, { @@ -79,8 +79,8 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "C": 85, + expectedStats: CountPerLanguage{ + "C": {ByteCount: 85, FileCount: 1}, }, }, { @@ -103,9 +103,9 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "C": 85, - "Ruby": 403, + expectedStats: CountPerLanguage{ + "C": {ByteCount: 85, FileCount: 1}, + "Ruby": {ByteCount: 403, FileCount: 1}, }, }, { @@ -125,8 +125,8 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "C": 85, + expectedStats: CountPerLanguage{ + "C": {ByteCount: 85, FileCount: 1}, }, }, { @@ -147,9 +147,9 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "C": 85, - "Ruby": 500, + expectedStats: CountPerLanguage{ + "C": {ByteCount: 85, FileCount: 1}, + "Ruby": {ByteCount: 500, FileCount: 1}, }, }, { @@ -169,8 +169,8 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "C": 85, + expectedStats: CountPerLanguage{ + "C": {ByteCount: 85, FileCount: 1}, }, }, { @@ -191,9 +191,9 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "C": 85, - "Swift": 500, + expectedStats: CountPerLanguage{ + "C": {ByteCount: 85, FileCount: 1}, + "Swift": {ByteCount: 500, FileCount: 1}, }, }, { @@ -211,8 +211,8 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "C": 85, + expectedStats: CountPerLanguage{ + "C": {ByteCount: 85, FileCount: 1}, }, }, { @@ -236,10 +236,10 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "C": 85, - "JSON": 234, - "Markdown": 553, + expectedStats: CountPerLanguage{ + "C": {ByteCount: 85, FileCount: 1}, + "JSON": {ByteCount: 234, FileCount: 1}, + "Markdown": {ByteCount: 553, FileCount: 1}, }, }, { @@ -267,8 +267,8 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "Ruby": 403, + expectedStats: CountPerLanguage{ + "Ruby": {ByteCount: 403, FileCount: 1}, }, }, { @@ -293,9 +293,9 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "KiCad Layout": 500, - "XML": 120, + expectedStats: CountPerLanguage{ + "KiCad Layout": {ByteCount: 500, FileCount: 1}, + "XML": {ByteCount: 120, FileCount: 1}, }, }, { @@ -329,8 +329,8 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "Ruby": 175, + expectedStats: CountPerLanguage{ + "Ruby": {ByteCount: 175, FileCount: 2}, }, }, { @@ -349,7 +349,7 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{}, + expectedStats: CountPerLanguage{}, }, { desc: "preexisting cache", @@ -377,11 +377,11 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "CoffeeScript": 107, - "HTML": 349, - "JavaScript": 1014, - "Ruby": 2943, + expectedStats: CountPerLanguage{ + "CoffeeScript": {ByteCount: 107, FileCount: 1}, + "HTML": {ByteCount: 349, FileCount: 1}, + "JavaScript": {ByteCount: 1014, FileCount: 1}, + "Ruby": {ByteCount: 2943, FileCount: 1}, }, }, { @@ -412,9 +412,9 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "HTML": 349, - "JavaScript": 1014, + expectedStats: CountPerLanguage{ + "HTML": {ByteCount: 349, FileCount: 1}, + "JavaScript": {ByteCount: 1014, FileCount: 1}, }, }, { @@ -443,7 +443,7 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{}, + expectedStats: CountPerLanguage{}, }, { desc: "corrupted cache", @@ -463,11 +463,11 @@ func TestInstance_Stats(t *testing.T) { return repoProto, repoPath, commitID }, - expectedStats: ByteCountPerLanguage{ - "CoffeeScript": 107, - "HTML": 349, - "JavaScript": 1014, - "Ruby": 2943, + expectedStats: CountPerLanguage{ + "CoffeeScript": {ByteCount: 107, FileCount: 1}, + "HTML": {ByteCount: 349, FileCount: 1}, + "JavaScript": {ByteCount: 1014, FileCount: 1}, + "Ruby": {ByteCount: 2943, FileCount: 1}, }, }, { @@ -490,14 +490,14 @@ func TestInstance_Stats(t *testing.T) { stats, err := New(cfg, catfileCache, repo).Stats(ctx, oldCommitID.String()) require.NoError(t, err) require.FileExists(t, filepath.Join(repoPath, languageStatsFilename)) - require.Equal(t, ByteCountPerLanguage{ - "Ruby": 19, + require.Equal(t, CountPerLanguage{ + "Ruby": {ByteCount: 19, FileCount: 1}, }, stats) return repoProto, repoPath, newCommitID }, - expectedStats: ByteCountPerLanguage{ - "Go": 12, + expectedStats: CountPerLanguage{ + "Go": {ByteCount: 12, FileCount: 1}, }, }, { @@ -621,8 +621,8 @@ func BenchmarkInstance_Stats(b *testing.B) { linguist := New(cfg, catfileCache, repo) - var scratchStat ByteCountPerLanguage - var incStats ByteCountPerLanguage + var scratchStat CountPerLanguage + var incStats CountPerLanguage b.Run("from scratch", func(b *testing.B) { for i := 0; i < b.N; i++ { diff --git a/internal/gitaly/service/commit/languages.go b/internal/gitaly/service/commit/languages.go index be2116cc9..cec725a23 100644 --- a/internal/gitaly/service/commit/languages.go +++ b/internal/gitaly/service/commit/languages.go @@ -51,7 +51,7 @@ func (s *server) CommitLanguages(ctx context.Context, req *gitalypb.CommitLangua total := uint64(0) for _, count := range stats { - total += count + total += count.ByteCount } if total == 0 { @@ -60,10 +60,11 @@ func (s *server) CommitLanguages(ctx context.Context, req *gitalypb.CommitLangua for lang, count := range stats { l := &gitalypb.CommitLanguagesResponse_Language{ - Name: lang, - Share: float32(100*count) / float32(total), - Color: linguist.Color(lang), - Bytes: stats[lang], + Name: lang, + Share: float32(100*count.ByteCount) / float32(total), + Color: linguist.Color(lang), + Bytes: count.ByteCount, + FileCount: count.FileCount, } resp.Languages = append(resp.Languages, l) } diff --git a/internal/gitaly/service/commit/languages_test.go b/internal/gitaly/service/commit/languages_test.go index 63b3a22ad..088cbd850 100644 --- a/internal/gitaly/service/commit/languages_test.go +++ b/internal/gitaly/service/commit/languages_test.go @@ -36,38 +36,16 @@ func TestLanguages(t *testing.T) { require.NoError(t, err) expectedLanguages := []*gitalypb.CommitLanguagesResponse_Language{ - {Name: "Ruby", Share: 65.28394, Color: "#701516", Bytes: 2943}, - {Name: "JavaScript", Share: 22.493345, Color: "#f1e05a", Bytes: 1014}, - {Name: "HTML", Share: 7.741792, Color: "#e34c26", Bytes: 349}, - {Name: "CoffeeScript", Share: 2.373558, Color: "#244776", Bytes: 107}, - {Name: "Modula-2", Share: 2.1073646, Color: "#10253f", Bytes: 95}, + {Name: "Ruby", Share: 65.28394, Color: "#701516", Bytes: 2943, FileCount: 4}, + {Name: "JavaScript", Share: 22.493345, Color: "#f1e05a", Bytes: 1014, FileCount: 1}, + {Name: "HTML", Share: 7.741792, Color: "#e34c26", Bytes: 349, FileCount: 1}, + {Name: "CoffeeScript", Share: 2.373558, Color: "#244776", Bytes: 107, FileCount: 1}, + {Name: "Modula-2", Share: 2.1073646, Color: "#10253f", Bytes: 95, FileCount: 1}, } testhelper.ProtoEqual(t, expectedLanguages, resp.Languages) } -func TestFileCountIsZeroWhenFeatureIsDisabled(t *testing.T) { - t.Parallel() - - ctx := testhelper.Context(t) - _, repo, _, client := setupCommitServiceWithRepo(t, ctx) - - request := &gitalypb.CommitLanguagesRequest{ - Repository: repo, - Revision: []byte("cb19058ecc02d01f8e4290b7e79cafd16a8839b6"), - } - - resp, err := client.CommitLanguages(ctx, request) - require.NoError(t, err) - - require.NotZero(t, len(resp.Languages), "number of languages in response") - - for i := range resp.Languages { - actualLanguage := resp.Languages[i] - require.Equal(t, uint32(0), actualLanguage.FileCount) - } -} - func TestLanguagesEmptyRevision(t *testing.T) { t.Parallel() |