Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorToon Claes <toon@gitlab.com>2022-08-09 13:19:59 +0300
committerToon Claes <toon@gitlab.com>2022-08-09 13:19:59 +0300
commitbb2ca12ef31223e8dd7cc0a97b05a5c66c199c42 (patch)
tree94aad8edb1feb2cc29091e3d162eb63bc05e7876
parent70f16fabb74266808876edb2a3d3d90b992a06f9 (diff)
parentd470b8f48b5626ec1bcc13094070ac5052dbd54e (diff)
Merge branch 'toon-linguist-no-zeroes' into 'master'
linguist: Ensure empty files are omitted in totals Closes #4416 See merge request gitlab-org/gitaly!4791
-rw-r--r--internal/git/gittest/tree.go2
-rw-r--r--internal/gitaly/linguist/language_stats.go6
-rw-r--r--internal/gitaly/linguist/language_stats_test.go98
-rw-r--r--internal/gitaly/linguist/linguist_test.go95
4 files changed, 120 insertions, 81 deletions
diff --git a/internal/git/gittest/tree.go b/internal/git/gittest/tree.go
index 361767b49..8485d32c1 100644
--- a/internal/git/gittest/tree.go
+++ b/internal/git/gittest/tree.go
@@ -94,7 +94,7 @@ func WriteTree(t testing.TB, cfg config.Cfg, repoPath string, entries []TreeEntr
t.Fatalf("invalid entry type %q", entry.Mode)
}
- require.True(t, len(entry.OID) > 0 || len(entry.Content) > 0,
+ require.False(t, len(entry.OID) > 0 && len(entry.Content) > 0,
"entry cannot have both OID and content")
require.False(t, len(entry.OID) == 0 && len(entry.Content) == 0,
"entry must have either an OID or content")
diff --git a/internal/gitaly/linguist/language_stats.go b/internal/gitaly/linguist/language_stats.go
index c90452090..e65a90c9a 100644
--- a/internal/gitaly/linguist/language_stats.go
+++ b/internal/gitaly/linguist/language_stats.go
@@ -16,7 +16,7 @@ const (
// a cached version of the language statistics. The name is
// intentionally different from what the linguist gem uses.
languageStatsFilename = "gitaly-language.stats"
- languageStatsVersion = "v1:gitaly"
+ languageStatsVersion = "v2:gitaly"
)
// languageStats takes care of accumulating and caching language statistics for
@@ -89,7 +89,9 @@ func (c *languageStats) add(filename, language string, size uint64) {
}
c.ByFile[filename] = ByteCountPerLanguage{language: size}
- c.Totals[language] += size
+ if size > 0 {
+ c.Totals[language] += size
+ }
}
// drop statistics for the given files
diff --git a/internal/gitaly/linguist/language_stats_test.go b/internal/gitaly/linguist/language_stats_test.go
index a17ceeb78..a64f8ad90 100644
--- a/internal/gitaly/linguist/language_stats_test.go
+++ b/internal/gitaly/linguist/language_stats_test.go
@@ -3,6 +3,9 @@
package linguist
import (
+ "compress/zlib"
+ "encoding/json"
+ "fmt"
"os"
"path/filepath"
"testing"
@@ -17,41 +20,80 @@ func TestNewLanguageStats(t *testing.T) {
t.Parallel()
cfg := testcfg.Build(t)
- repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
- repo := localrepo.NewTestRepo(t, cfg, repoProto)
-
- t.Run("non-existing cache", func(t *testing.T) {
- s, err := newLanguageStats(repo)
- require.NoError(t, err)
- require.Empty(t, s.Totals)
- require.Empty(t, s.ByFile)
- })
-
- t.Run("pre-existing cache", func(t *testing.T) {
- s, err := newLanguageStats(repo)
- require.NoError(t, err)
- s.Totals["C"] = 555
- require.NoError(t, s.save(repo, "badcafe"))
-
- require.Equal(t, ByteCountPerLanguage{"C": 555}, s.Totals)
- })
+ for _, tc := range []struct {
+ desc string
+ run func(*testing.T, *localrepo.Repo, string)
+ }{
+ {
+ desc: "non-existing cache",
+ run: func(t *testing.T, repo *localrepo.Repo, repoPath string) {
+ stats, err := newLanguageStats(repo)
+ require.NoError(t, err)
+ require.Empty(t, stats.Totals)
+ require.Empty(t, stats.ByFile)
+ },
+ },
+ {
+ desc: "pre-existing cache",
+ run: func(t *testing.T, repo *localrepo.Repo, repoPath string) {
+ stats, err := newLanguageStats(repo)
+ require.NoError(t, err)
- t.Run("corrupt cache", func(t *testing.T) {
- require.NoError(t, os.WriteFile(filepath.Join(repoPath, languageStatsFilename), []byte("garbage"), 0o644))
+ stats.Totals["C"] = 555
+ require.NoError(t, stats.save(repo, "badcafe"))
- s, err := newLanguageStats(repo)
- require.Errorf(t, err, "new language stats zlib reader: invalid header")
- require.Empty(t, s.Totals)
- require.Empty(t, s.ByFile)
- })
+ require.Equal(t, ByteCountPerLanguage{"C": 555}, stats.Totals)
+ },
+ },
+ {
+ desc: "corrupt cache",
+ run: func(t *testing.T, repo *localrepo.Repo, repoPath string) {
+ require.NoError(t, os.WriteFile(filepath.Join(repoPath, languageStatsFilename), []byte("garbage"), 0o644))
+
+ stats, err := newLanguageStats(repo)
+ require.Errorf(t, err, "new language stats zlib reader: invalid header")
+ require.Empty(t, stats.Totals)
+ require.Empty(t, stats.ByFile)
+ },
+ },
+ {
+ desc: "incorrect version cache",
+ run: func(t *testing.T, repo *localrepo.Repo, repoPath string) {
+ stats, err := newLanguageStats(repo)
+ require.NoError(t, err)
+
+ stats.Totals["C"] = 555
+ stats.Version = "faulty"
+
+ // Copy save() behavior, but with a faulty version
+ file, err := os.OpenFile(filepath.Join(repoPath, languageStatsFilename), os.O_WRONLY|os.O_CREATE, 0o755)
+ require.NoError(t, err)
+ w := zlib.NewWriter(file)
+ require.NoError(t, json.NewEncoder(w).Encode(stats))
+ require.NoError(t, w.Close())
+ require.NoError(t, file.Sync())
+ require.NoError(t, file.Close())
+
+ newStats, err := newLanguageStats(repo)
+ require.Error(t, err, fmt.Errorf("new language stats version mismatch %s vs %s", languageStatsVersion, "faulty"))
+ require.Empty(t, newStats.Totals)
+ },
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+ tc.run(t, repo, repoPath)
+ })
+ }
}
func TestLanguageStats_add(t *testing.T) {
t.Parallel()
cfg := testcfg.Build(t)
- repoProto, _ := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ repoProto, _ := gittest.InitRepo(t, cfg, cfg.Storages[0])
repo := localrepo.NewTestRepo(t, cfg, repoProto)
for _, tc := range []struct {
@@ -118,7 +160,7 @@ func TestLanguageStats_drop(t *testing.T) {
t.Parallel()
cfg := testcfg.Build(t)
- repoProto, _ := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ repoProto, _ := gittest.InitRepo(t, cfg, cfg.Storages[0])
repo := localrepo.NewTestRepo(t, cfg, repoProto)
for _, tc := range []struct {
@@ -173,7 +215,7 @@ func TestLanguageStats_save(t *testing.T) {
t.Parallel()
cfg := testcfg.Build(t)
- repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
repo := localrepo.NewTestRepo(t, cfg, repoProto)
s, err := newLanguageStats(repo)
diff --git a/internal/gitaly/linguist/linguist_test.go b/internal/gitaly/linguist/linguist_test.go
index 82eb7bb95..cd9d68f63 100644
--- a/internal/gitaly/linguist/linguist_test.go
+++ b/internal/gitaly/linguist/linguist_test.go
@@ -7,12 +7,11 @@ import (
"encoding/json"
"os"
"path/filepath"
+ "strings"
"testing"
"github.com/go-enry/go-enry/v2"
enrydata "github.com/go-enry/go-enry/v2/data"
- "github.com/sirupsen/logrus"
- "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v15/internal/git"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/catfile"
@@ -95,8 +94,6 @@ func testInstanceStats(t *testing.T, ctx context.Context) {
catfileCache := catfile.NewCache(cfg)
t.Cleanup(catfileCache.Stop)
- commitID := git.ObjectID("1e292f8fedd741b75372e19097c76d327140c312")
-
languageStatsFilename := filenameForCache(ctx)
for _, tc := range []struct {
@@ -108,10 +105,17 @@ func testInstanceStats(t *testing.T, ctx context.Context) {
{
desc: "successful",
setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) {
- repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "webpack.coffee", Mode: "100644", Content: strings.Repeat("a", 107)},
+ gittest.TreeEntry{Path: "show_user.html", Mode: "100644", Content: strings.Repeat("a", 349)},
+ gittest.TreeEntry{Path: "api.javascript", Mode: "100644", Content: strings.Repeat("a", 1014)},
+ gittest.TreeEntry{Path: "application.rb", Mode: "100644", Content: strings.Repeat("a", 2943)},
+ ))
+
return repoProto, repoPath, commitID
},
- expectedStats: map[string]uint64{
+ expectedStats: ByteCountPerLanguage{
"CoffeeScript": 107,
"HTML": 349,
"JavaScript": 1014,
@@ -119,9 +123,30 @@ func testInstanceStats(t *testing.T, ctx context.Context) {
},
},
{
+ desc: "empty code files",
+ setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) {
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ emptyBlob := gittest.WriteBlob(t, cfg, repoPath, []byte{})
+ commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "README.md", Mode: "100644", Content: "Hello world!"},
+ gittest.TreeEntry{Path: "index.html", Mode: "100644", OID: emptyBlob},
+ gittest.TreeEntry{Path: "app.js", Mode: "100644", OID: emptyBlob},
+ ))
+
+ return repoProto, repoPath, commitID
+ },
+ expectedStats: ByteCountPerLanguage{},
+ },
+ {
desc: "preexisting cache",
setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) {
- repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "webpack.coffee", Mode: "100644", Content: strings.Repeat("a", 107)},
+ gittest.TreeEntry{Path: "show_user.html", Mode: "100644", Content: strings.Repeat("a", 349)},
+ gittest.TreeEntry{Path: "api.javascript", Mode: "100644", Content: strings.Repeat("a", 1014)},
+ gittest.TreeEntry{Path: "application.rb", Mode: "100644", Content: strings.Repeat("a", 2943)},
+ ))
repo := localrepo.NewTestRepo(t, cfg, repoProto)
// We simply run the linguist once before so that it can already
@@ -135,7 +160,7 @@ func testInstanceStats(t *testing.T, ctx context.Context) {
return repoProto, repoPath, commitID
},
- expectedStats: map[string]uint64{
+ expectedStats: ByteCountPerLanguage{
"CoffeeScript": 107,
"HTML": 349,
"JavaScript": 1014,
@@ -145,13 +170,19 @@ func testInstanceStats(t *testing.T, ctx context.Context) {
{
desc: "corrupted cache",
setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) {
- repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "webpack.coffee", Mode: "100644", Content: strings.Repeat("a", 107)},
+ gittest.TreeEntry{Path: "show_user.html", Mode: "100644", Content: strings.Repeat("a", 349)},
+ gittest.TreeEntry{Path: "api.javascript", Mode: "100644", Content: strings.Repeat("a", 1014)},
+ gittest.TreeEntry{Path: "application.rb", Mode: "100644", Content: strings.Repeat("a", 2943)},
+ ))
require.NoError(t, os.WriteFile(filepath.Join(repoPath, languageStatsFilename), []byte("garbage"), 0o644))
return repoProto, repoPath, commitID
},
- expectedStats: map[string]uint64{
+ expectedStats: ByteCountPerLanguage{
"CoffeeScript": 107,
"HTML": 349,
"JavaScript": 1014,
@@ -182,7 +213,7 @@ func testInstanceStats(t *testing.T, ctx context.Context) {
return repoProto, repoPath, newCommitID
},
- expectedStats: map[string]uint64{
+ expectedStats: ByteCountPerLanguage{
"Go": 12,
},
},
@@ -192,7 +223,7 @@ func testInstanceStats(t *testing.T, ctx context.Context) {
repoPath := filepath.Join(testhelper.TempDir(t), "nonexistent")
repoProto := &gitalypb.Repository{StorageName: cfg.Storages[0].Name, RelativePath: "nonexistent"}
- return repoProto, repoPath, commitID
+ return repoProto, repoPath, git.ObjectID("b1bb1d1b0b1d1b00")
},
expectedErr: "GetRepoPath: not a git repository",
},
@@ -200,7 +231,8 @@ func testInstanceStats(t *testing.T, ctx context.Context) {
desc: "missing commit",
setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) {
repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
- return repoProto, repoPath, commitID
+
+ return repoProto, repoPath, git.ObjectID("b1bb1d1b0b1d1b00")
},
expectedErr: "linguist",
},
@@ -244,43 +276,6 @@ func TestInstance_Stats_unmarshalJSONError(t *testing.T) {
require.False(t, ok, "expected the error not be a json Syntax Error")
}
-func TestInstance_Stats_incremental(t *testing.T) {
- t.Parallel()
-
- cfg := testcfg.Build(t)
- logger, hook := test.NewNullLogger()
- ctx := testhelper.Context(t, testhelper.ContextWithLogger(logrus.NewEntry(logger)))
- ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.GoLanguageStats, true)
-
- gitCmdFactory := gittest.NewCommandFactory(t, cfg)
-
- linguist, err := New(cfg, gitCmdFactory)
- require.NoError(t, err)
-
- catfileCache := catfile.NewCache(cfg)
- t.Cleanup(catfileCache.Stop)
-
- repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
- repo := localrepo.NewTestRepo(t, cfg, repoProto)
-
- cleanStats, err := linguist.Stats(ctx, repo, "1e292f8fedd741b75372e19097c76d327140c312", catfileCache)
- require.NoError(t, err)
- require.Len(t, hook.AllEntries(), 0)
- require.NoError(t, os.Remove(filepath.Join(repoPath, languageStatsFilename)))
-
- _, err = linguist.Stats(ctx, repo, "cfe32cf61b73a0d5e9f13e774abde7ff789b1660", catfileCache)
- require.NoError(t, err)
- require.Len(t, hook.AllEntries(), 0)
- require.FileExists(t, filepath.Join(repoPath, languageStatsFilename))
-
- incStats, err := linguist.Stats(ctx, repo, "1e292f8fedd741b75372e19097c76d327140c312", catfileCache)
- require.NoError(t, err)
- require.Len(t, hook.AllEntries(), 0)
- require.FileExists(t, filepath.Join(repoPath, languageStatsFilename))
-
- require.Equal(t, cleanStats, incStats)
-}
-
func TestNew(t *testing.T) {
cfg := testcfg.Build(t, testcfg.WithRealLinguist())