Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorToon Claes <toon@gitlab.com>2022-11-04 15:28:56 +0300
committerToon Claes <toon@gitlab.com>2022-11-04 16:18:52 +0300
commitca607e6f265f39c2ae04aac46a9502a357fb738f (patch)
treec08996ae69a158258a52928ea83a9b6cd3c21e79
parent12468228b52767dd5a66c76d7cbdb422495afc43 (diff)
linguist: Add file count
Next to the number of bytes per language, also provide the number of files for each language. Issue: https://gitlab.com/gitlab-org/gitaly/-/issues/4293 Changelog: added
-rw-r--r--internal/gitaly/linguist/language_stats.go33
-rw-r--r--internal/gitaly/linguist/language_stats_test.go30
-rw-r--r--internal/gitaly/linguist/linguist.go16
-rw-r--r--internal/gitaly/linguist/linguist_test.go110
-rw-r--r--internal/gitaly/service/commit/languages.go11
-rw-r--r--internal/gitaly/service/commit/languages_test.go32
6 files changed, 121 insertions, 111 deletions
diff --git a/internal/gitaly/linguist/language_stats.go b/internal/gitaly/linguist/language_stats.go
index 5c6cfcfd8..986b80d92 100644
--- a/internal/gitaly/linguist/language_stats.go
+++ b/internal/gitaly/linguist/language_stats.go
@@ -19,6 +19,9 @@ const (
languageStatsVersion = "v3:gitaly"
)
+// byteCountPerLanguage represents a counter value (bytes) per language.
+type byteCountPerLanguage map[string]uint64
+
// languageStats takes care of accumulating and caching language statistics for
// a repository.
type languageStats struct {
@@ -31,16 +34,16 @@ type languageStats struct {
m *sync.Mutex
// Totals contains the total statistics for the CommitID
- Totals ByteCountPerLanguage `json:"totals"`
+ Totals byteCountPerLanguage `json:"totals"`
// ByFile contains the statistics for a single file, where the filename
// is its key.
- ByFile map[string]ByteCountPerLanguage `json:"by_file"`
+ ByFile map[string]byteCountPerLanguage `json:"by_file"`
}
func newLanguageStats() languageStats {
return languageStats{
- Totals: ByteCountPerLanguage{},
- ByFile: make(map[string]ByteCountPerLanguage),
+ Totals: byteCountPerLanguage{},
+ ByFile: make(map[string]byteCountPerLanguage),
m: &sync.Mutex{},
}
}
@@ -92,7 +95,7 @@ func (c *languageStats) add(filename, language string, size uint64) {
}
}
- c.ByFile[filename] = ByteCountPerLanguage{language: size}
+ c.ByFile[filename] = byteCountPerLanguage{language: size}
if size > 0 {
c.Totals[language] += size
}
@@ -164,3 +167,23 @@ func (c *languageStats) save(repo *localrepo.Repo, commitID string) error {
return nil
}
+
+func (c *languageStats) allCounts() CountPerLanguage {
+ counts := make(CountPerLanguage, len(c.Totals))
+
+ for _, bc := range c.ByFile {
+ for l, b := range bc {
+ if b <= 0 {
+ continue
+ }
+
+ cl := counts[l]
+ cl.ByteCount += b
+ cl.FileCount++
+
+ counts[l] = cl
+ }
+ }
+
+ return counts
+}
diff --git a/internal/gitaly/linguist/language_stats_test.go b/internal/gitaly/linguist/language_stats_test.go
index 74f66a568..69a2c3b4f 100644
--- a/internal/gitaly/linguist/language_stats_test.go
+++ b/internal/gitaly/linguist/language_stats_test.go
@@ -4,9 +4,11 @@ import (
"compress/zlib"
"encoding/json"
"fmt"
+ "math/rand"
"os"
"path/filepath"
"testing"
+ "time"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest"
@@ -43,7 +45,7 @@ func TestInitLanguageStats(t *testing.T) {
stats.Totals["C"] = 555
require.NoError(t, stats.save(repo, "badcafe"))
- require.Equal(t, ByteCountPerLanguage{"C": 555}, stats.Totals)
+ require.Equal(t, byteCountPerLanguage{"C": 555}, stats.Totals)
},
},
{
@@ -113,7 +115,7 @@ func TestLanguageStats_add(t *testing.T) {
require.Equal(t, uint64(100), s.Totals["Go"])
require.Len(t, s.ByFile, 1)
- require.Equal(t, ByteCountPerLanguage{"Go": 100}, s.ByFile["main.go"])
+ require.Equal(t, byteCountPerLanguage{"Go": 100}, s.ByFile["main.go"])
},
},
{
@@ -124,8 +126,8 @@ func TestLanguageStats_add(t *testing.T) {
require.Equal(t, uint64(180), s.Totals["Go"])
require.Len(t, s.ByFile, 2)
- require.Equal(t, ByteCountPerLanguage{"Go": 100}, s.ByFile["main.go"])
- require.Equal(t, ByteCountPerLanguage{"Go": 80}, s.ByFile["main_test.go"])
+ require.Equal(t, byteCountPerLanguage{"Go": 100}, s.ByFile["main.go"])
+ require.Equal(t, byteCountPerLanguage{"Go": 80}, s.ByFile["main_test.go"])
},
},
{
@@ -137,8 +139,8 @@ func TestLanguageStats_add(t *testing.T) {
require.Equal(t, uint64(60), s.Totals["Go"])
require.Equal(t, uint64(30), s.Totals["Make"])
require.Len(t, s.ByFile, 2)
- require.Equal(t, ByteCountPerLanguage{"Go": 60}, s.ByFile["main.go"])
- require.Equal(t, ByteCountPerLanguage{"Make": 30}, s.ByFile["Makefile"])
+ require.Equal(t, byteCountPerLanguage{"Go": 60}, s.ByFile["main.go"])
+ require.Equal(t, byteCountPerLanguage{"Make": 30}, s.ByFile["Makefile"])
},
},
{
@@ -149,7 +151,7 @@ func TestLanguageStats_add(t *testing.T) {
require.Equal(t, uint64(30), s.Totals["Go"])
require.Len(t, s.ByFile, 1)
- require.Equal(t, ByteCountPerLanguage{"Go": 30}, s.ByFile["main.go"])
+ require.Equal(t, byteCountPerLanguage{"Go": 30}, s.ByFile["main.go"])
},
},
} {
@@ -184,7 +186,7 @@ func TestLanguageStats_drop(t *testing.T) {
require.Equal(t, uint64(20), s.Totals["Go"])
require.Len(t, s.ByFile, 1)
- require.Equal(t, ByteCountPerLanguage{"Go": 20}, s.ByFile["main_test.go"])
+ require.Equal(t, byteCountPerLanguage{"Go": 20}, s.ByFile["main_test.go"])
},
},
{
@@ -194,8 +196,8 @@ func TestLanguageStats_drop(t *testing.T) {
require.Equal(t, uint64(100), s.Totals["Go"])
require.Len(t, s.ByFile, 2)
- require.Equal(t, ByteCountPerLanguage{"Go": 80}, s.ByFile["main.go"])
- require.Equal(t, ByteCountPerLanguage{"Go": 20}, s.ByFile["main_test.go"])
+ require.Equal(t, byteCountPerLanguage{"Go": 80}, s.ByFile["main.go"])
+ require.Equal(t, byteCountPerLanguage{"Go": 20}, s.ByFile["main_test.go"])
},
},
{
@@ -213,8 +215,8 @@ func TestLanguageStats_drop(t *testing.T) {
require.NoError(t, err)
s.Totals["Go"] = 100
- s.ByFile["main.go"] = ByteCountPerLanguage{"Go": 80}
- s.ByFile["main_test.go"] = ByteCountPerLanguage{"Go": 20}
+ s.ByFile["main.go"] = byteCountPerLanguage{"Go": 80}
+ s.ByFile["main_test.go"] = byteCountPerLanguage{"Go": 20}
tc.run(t, s)
})
@@ -235,8 +237,8 @@ func TestLanguageStats_save(t *testing.T) {
require.NoError(t, err)
s.Totals["Go"] = 100
- s.ByFile["main.go"] = ByteCountPerLanguage{"Go": 80}
- s.ByFile["main_test.go"] = ByteCountPerLanguage{"Go": 20}
+ s.ByFile["main.go"] = byteCountPerLanguage{"Go": 80}
+ s.ByFile["main_test.go"] = byteCountPerLanguage{"Go": 20}
err = s.save(repo, "buzz")
require.NoError(t, err)
diff --git a/internal/gitaly/linguist/linguist.go b/internal/gitaly/linguist/linguist.go
index 0d139101a..3356f7742 100644
--- a/internal/gitaly/linguist/linguist.go
+++ b/internal/gitaly/linguist/linguist.go
@@ -17,8 +17,14 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config"
)
-// ByteCountPerLanguage represents a counter value (bytes) per language.
-type ByteCountPerLanguage map[string]uint64
+// Count holds both the byte and file count for one language.
+type Count struct {
+ ByteCount uint64
+ FileCount uint32
+}
+
+// CountPerLanguage represents a byte and file count per language.
+type CountPerLanguage map[string]Count
// Instance is a holder of the defined in the system language settings.
type Instance struct {
@@ -48,13 +54,13 @@ func Color(language string) string {
}
// Stats returns the repository's language stats as reported by 'git-linguist'.
-func (inst *Instance) Stats(ctx context.Context, commitID string) (ByteCountPerLanguage, error) {
+func (inst *Instance) Stats(ctx context.Context, commitID string) (CountPerLanguage, error) {
stats, err := initLanguageStats(inst.repo)
if err != nil {
ctxlogrus.Extract(ctx).WithError(err).Info("linguist load from cache")
}
if stats.CommitID == commitID {
- return stats.Totals, nil
+ return stats.allCounts(), nil
}
objectReader, cancel, err := inst.catfileCache.ObjectReader(ctx, inst.repo)
@@ -180,7 +186,7 @@ func (inst *Instance) Stats(ctx context.Context, commitID string) (ByteCountPerL
return nil, fmt.Errorf("linguist language stats save: %w", err)
}
- return stats.Totals, nil
+ return stats.allCounts(), nil
}
func (inst *Instance) needsFullRecalculation(ctx context.Context, cachedID, commitID string) (bool, error) {
diff --git a/internal/gitaly/linguist/linguist_test.go b/internal/gitaly/linguist/linguist_test.go
index 3a67ce07e..951c00a74 100644
--- a/internal/gitaly/linguist/linguist_test.go
+++ b/internal/gitaly/linguist/linguist_test.go
@@ -34,7 +34,7 @@ func TestInstance_Stats(t *testing.T) {
for _, tc := range []struct {
desc string
setup func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID)
- expectedStats ByteCountPerLanguage
+ expectedStats CountPerLanguage
expectedErr string
}{
{
@@ -53,11 +53,11 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "CoffeeScript": 107,
- "HTML": 349,
- "JavaScript": 1014,
- "Ruby": 2943,
+ expectedStats: CountPerLanguage{
+ "CoffeeScript": {ByteCount: 107, FileCount: 1},
+ "HTML": {ByteCount: 349, FileCount: 1},
+ "JavaScript": {ByteCount: 1014, FileCount: 1},
+ "Ruby": {ByteCount: 2943, FileCount: 1},
},
},
{
@@ -79,8 +79,8 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "C": 85,
+ expectedStats: CountPerLanguage{
+ "C": {ByteCount: 85, FileCount: 1},
},
},
{
@@ -103,9 +103,9 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "C": 85,
- "Ruby": 403,
+ expectedStats: CountPerLanguage{
+ "C": {ByteCount: 85, FileCount: 1},
+ "Ruby": {ByteCount: 403, FileCount: 1},
},
},
{
@@ -125,8 +125,8 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "C": 85,
+ expectedStats: CountPerLanguage{
+ "C": {ByteCount: 85, FileCount: 1},
},
},
{
@@ -147,9 +147,9 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "C": 85,
- "Ruby": 500,
+ expectedStats: CountPerLanguage{
+ "C": {ByteCount: 85, FileCount: 1},
+ "Ruby": {ByteCount: 500, FileCount: 1},
},
},
{
@@ -169,8 +169,8 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "C": 85,
+ expectedStats: CountPerLanguage{
+ "C": {ByteCount: 85, FileCount: 1},
},
},
{
@@ -191,9 +191,9 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "C": 85,
- "Swift": 500,
+ expectedStats: CountPerLanguage{
+ "C": {ByteCount: 85, FileCount: 1},
+ "Swift": {ByteCount: 500, FileCount: 1},
},
},
{
@@ -211,8 +211,8 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "C": 85,
+ expectedStats: CountPerLanguage{
+ "C": {ByteCount: 85, FileCount: 1},
},
},
{
@@ -236,10 +236,10 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "C": 85,
- "JSON": 234,
- "Markdown": 553,
+ expectedStats: CountPerLanguage{
+ "C": {ByteCount: 85, FileCount: 1},
+ "JSON": {ByteCount: 234, FileCount: 1},
+ "Markdown": {ByteCount: 553, FileCount: 1},
},
},
{
@@ -267,8 +267,8 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "Ruby": 403,
+ expectedStats: CountPerLanguage{
+ "Ruby": {ByteCount: 403, FileCount: 1},
},
},
{
@@ -293,9 +293,9 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "KiCad Layout": 500,
- "XML": 120,
+ expectedStats: CountPerLanguage{
+ "KiCad Layout": {ByteCount: 500, FileCount: 1},
+ "XML": {ByteCount: 120, FileCount: 1},
},
},
{
@@ -329,8 +329,8 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "Ruby": 175,
+ expectedStats: CountPerLanguage{
+ "Ruby": {ByteCount: 175, FileCount: 2},
},
},
{
@@ -349,7 +349,7 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{},
+ expectedStats: CountPerLanguage{},
},
{
desc: "preexisting cache",
@@ -377,11 +377,11 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "CoffeeScript": 107,
- "HTML": 349,
- "JavaScript": 1014,
- "Ruby": 2943,
+ expectedStats: CountPerLanguage{
+ "CoffeeScript": {ByteCount: 107, FileCount: 1},
+ "HTML": {ByteCount: 349, FileCount: 1},
+ "JavaScript": {ByteCount: 1014, FileCount: 1},
+ "Ruby": {ByteCount: 2943, FileCount: 1},
},
},
{
@@ -412,9 +412,9 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "HTML": 349,
- "JavaScript": 1014,
+ expectedStats: CountPerLanguage{
+ "HTML": {ByteCount: 349, FileCount: 1},
+ "JavaScript": {ByteCount: 1014, FileCount: 1},
},
},
{
@@ -443,7 +443,7 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{},
+ expectedStats: CountPerLanguage{},
},
{
desc: "corrupted cache",
@@ -463,11 +463,11 @@ func TestInstance_Stats(t *testing.T) {
return repoProto, repoPath, commitID
},
- expectedStats: ByteCountPerLanguage{
- "CoffeeScript": 107,
- "HTML": 349,
- "JavaScript": 1014,
- "Ruby": 2943,
+ expectedStats: CountPerLanguage{
+ "CoffeeScript": {ByteCount: 107, FileCount: 1},
+ "HTML": {ByteCount: 349, FileCount: 1},
+ "JavaScript": {ByteCount: 1014, FileCount: 1},
+ "Ruby": {ByteCount: 2943, FileCount: 1},
},
},
{
@@ -490,14 +490,14 @@ func TestInstance_Stats(t *testing.T) {
stats, err := New(cfg, catfileCache, repo).Stats(ctx, oldCommitID.String())
require.NoError(t, err)
require.FileExists(t, filepath.Join(repoPath, languageStatsFilename))
- require.Equal(t, ByteCountPerLanguage{
- "Ruby": 19,
+ require.Equal(t, CountPerLanguage{
+ "Ruby": {ByteCount: 19, FileCount: 1},
}, stats)
return repoProto, repoPath, newCommitID
},
- expectedStats: ByteCountPerLanguage{
- "Go": 12,
+ expectedStats: CountPerLanguage{
+ "Go": {ByteCount: 12, FileCount: 1},
},
},
{
@@ -621,8 +621,8 @@ func BenchmarkInstance_Stats(b *testing.B) {
linguist := New(cfg, catfileCache, repo)
- var scratchStat ByteCountPerLanguage
- var incStats ByteCountPerLanguage
+ var scratchStat CountPerLanguage
+ var incStats CountPerLanguage
b.Run("from scratch", func(b *testing.B) {
for i := 0; i < b.N; i++ {
diff --git a/internal/gitaly/service/commit/languages.go b/internal/gitaly/service/commit/languages.go
index be2116cc9..cec725a23 100644
--- a/internal/gitaly/service/commit/languages.go
+++ b/internal/gitaly/service/commit/languages.go
@@ -51,7 +51,7 @@ func (s *server) CommitLanguages(ctx context.Context, req *gitalypb.CommitLangua
total := uint64(0)
for _, count := range stats {
- total += count
+ total += count.ByteCount
}
if total == 0 {
@@ -60,10 +60,11 @@ func (s *server) CommitLanguages(ctx context.Context, req *gitalypb.CommitLangua
for lang, count := range stats {
l := &gitalypb.CommitLanguagesResponse_Language{
- Name: lang,
- Share: float32(100*count) / float32(total),
- Color: linguist.Color(lang),
- Bytes: stats[lang],
+ Name: lang,
+ Share: float32(100*count.ByteCount) / float32(total),
+ Color: linguist.Color(lang),
+ Bytes: count.ByteCount,
+ FileCount: count.FileCount,
}
resp.Languages = append(resp.Languages, l)
}
diff --git a/internal/gitaly/service/commit/languages_test.go b/internal/gitaly/service/commit/languages_test.go
index 63b3a22ad..088cbd850 100644
--- a/internal/gitaly/service/commit/languages_test.go
+++ b/internal/gitaly/service/commit/languages_test.go
@@ -36,38 +36,16 @@ func TestLanguages(t *testing.T) {
require.NoError(t, err)
expectedLanguages := []*gitalypb.CommitLanguagesResponse_Language{
- {Name: "Ruby", Share: 65.28394, Color: "#701516", Bytes: 2943},
- {Name: "JavaScript", Share: 22.493345, Color: "#f1e05a", Bytes: 1014},
- {Name: "HTML", Share: 7.741792, Color: "#e34c26", Bytes: 349},
- {Name: "CoffeeScript", Share: 2.373558, Color: "#244776", Bytes: 107},
- {Name: "Modula-2", Share: 2.1073646, Color: "#10253f", Bytes: 95},
+ {Name: "Ruby", Share: 65.28394, Color: "#701516", Bytes: 2943, FileCount: 4},
+ {Name: "JavaScript", Share: 22.493345, Color: "#f1e05a", Bytes: 1014, FileCount: 1},
+ {Name: "HTML", Share: 7.741792, Color: "#e34c26", Bytes: 349, FileCount: 1},
+ {Name: "CoffeeScript", Share: 2.373558, Color: "#244776", Bytes: 107, FileCount: 1},
+ {Name: "Modula-2", Share: 2.1073646, Color: "#10253f", Bytes: 95, FileCount: 1},
}
testhelper.ProtoEqual(t, expectedLanguages, resp.Languages)
}
-func TestFileCountIsZeroWhenFeatureIsDisabled(t *testing.T) {
- t.Parallel()
-
- ctx := testhelper.Context(t)
- _, repo, _, client := setupCommitServiceWithRepo(t, ctx)
-
- request := &gitalypb.CommitLanguagesRequest{
- Repository: repo,
- Revision: []byte("cb19058ecc02d01f8e4290b7e79cafd16a8839b6"),
- }
-
- resp, err := client.CommitLanguages(ctx, request)
- require.NoError(t, err)
-
- require.NotZero(t, len(resp.Languages), "number of languages in response")
-
- for i := range resp.Languages {
- actualLanguage := resp.Languages[i]
- require.Equal(t, uint32(0), actualLanguage.FileCount)
- }
-}
-
func TestLanguagesEmptyRevision(t *testing.T) {
t.Parallel()