diff options
author | Toon Claes <toon@gitlab.com> | 2022-08-09 13:19:59 +0300 |
---|---|---|
committer | Toon Claes <toon@gitlab.com> | 2022-08-09 13:19:59 +0300 |
commit | bb2ca12ef31223e8dd7cc0a97b05a5c66c199c42 (patch) | |
tree | 94aad8edb1feb2cc29091e3d162eb63bc05e7876 | |
parent | 70f16fabb74266808876edb2a3d3d90b992a06f9 (diff) | |
parent | d470b8f48b5626ec1bcc13094070ac5052dbd54e (diff) |
Merge branch 'toon-linguist-no-zeroes' into 'master'
linguist: Ensure empty files are omitted in totals
Closes #4416
See merge request gitlab-org/gitaly!4791
-rw-r--r-- | internal/git/gittest/tree.go | 2 | ||||
-rw-r--r-- | internal/gitaly/linguist/language_stats.go | 6 | ||||
-rw-r--r-- | internal/gitaly/linguist/language_stats_test.go | 98 | ||||
-rw-r--r-- | internal/gitaly/linguist/linguist_test.go | 95 |
4 files changed, 120 insertions, 81 deletions
diff --git a/internal/git/gittest/tree.go b/internal/git/gittest/tree.go index 361767b49..8485d32c1 100644 --- a/internal/git/gittest/tree.go +++ b/internal/git/gittest/tree.go @@ -94,7 +94,7 @@ func WriteTree(t testing.TB, cfg config.Cfg, repoPath string, entries []TreeEntr t.Fatalf("invalid entry type %q", entry.Mode) } - require.True(t, len(entry.OID) > 0 || len(entry.Content) > 0, + require.False(t, len(entry.OID) > 0 && len(entry.Content) > 0, "entry cannot have both OID and content") require.False(t, len(entry.OID) == 0 && len(entry.Content) == 0, "entry must have either an OID or content") diff --git a/internal/gitaly/linguist/language_stats.go b/internal/gitaly/linguist/language_stats.go index c90452090..e65a90c9a 100644 --- a/internal/gitaly/linguist/language_stats.go +++ b/internal/gitaly/linguist/language_stats.go @@ -16,7 +16,7 @@ const ( // a cached version of the language statistics. The name is // intentionally different from what the linguist gem uses. languageStatsFilename = "gitaly-language.stats" - languageStatsVersion = "v1:gitaly" + languageStatsVersion = "v2:gitaly" ) // languageStats takes care of accumulating and caching language statistics for @@ -89,7 +89,9 @@ func (c *languageStats) add(filename, language string, size uint64) { } c.ByFile[filename] = ByteCountPerLanguage{language: size} - c.Totals[language] += size + if size > 0 { + c.Totals[language] += size + } } // drop statistics for the given files diff --git a/internal/gitaly/linguist/language_stats_test.go b/internal/gitaly/linguist/language_stats_test.go index a17ceeb78..a64f8ad90 100644 --- a/internal/gitaly/linguist/language_stats_test.go +++ b/internal/gitaly/linguist/language_stats_test.go @@ -3,6 +3,9 @@ package linguist import ( + "compress/zlib" + "encoding/json" + "fmt" "os" "path/filepath" "testing" @@ -17,41 +20,80 @@ func TestNewLanguageStats(t *testing.T) { t.Parallel() cfg := testcfg.Build(t) - repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0]) - repo := localrepo.NewTestRepo(t, cfg, repoProto) - - t.Run("non-existing cache", func(t *testing.T) { - s, err := newLanguageStats(repo) - require.NoError(t, err) - require.Empty(t, s.Totals) - require.Empty(t, s.ByFile) - }) - - t.Run("pre-existing cache", func(t *testing.T) { - s, err := newLanguageStats(repo) - require.NoError(t, err) - s.Totals["C"] = 555 - require.NoError(t, s.save(repo, "badcafe")) - - require.Equal(t, ByteCountPerLanguage{"C": 555}, s.Totals) - }) + for _, tc := range []struct { + desc string + run func(*testing.T, *localrepo.Repo, string) + }{ + { + desc: "non-existing cache", + run: func(t *testing.T, repo *localrepo.Repo, repoPath string) { + stats, err := newLanguageStats(repo) + require.NoError(t, err) + require.Empty(t, stats.Totals) + require.Empty(t, stats.ByFile) + }, + }, + { + desc: "pre-existing cache", + run: func(t *testing.T, repo *localrepo.Repo, repoPath string) { + stats, err := newLanguageStats(repo) + require.NoError(t, err) - t.Run("corrupt cache", func(t *testing.T) { - require.NoError(t, os.WriteFile(filepath.Join(repoPath, languageStatsFilename), []byte("garbage"), 0o644)) + stats.Totals["C"] = 555 + require.NoError(t, stats.save(repo, "badcafe")) - s, err := newLanguageStats(repo) - require.Errorf(t, err, "new language stats zlib reader: invalid header") - require.Empty(t, s.Totals) - require.Empty(t, s.ByFile) - }) + require.Equal(t, ByteCountPerLanguage{"C": 555}, stats.Totals) + }, + }, + { + desc: "corrupt cache", + run: func(t *testing.T, repo *localrepo.Repo, repoPath string) { + require.NoError(t, os.WriteFile(filepath.Join(repoPath, languageStatsFilename), []byte("garbage"), 0o644)) + + stats, err := newLanguageStats(repo) + require.Errorf(t, err, "new language stats zlib reader: invalid header") + require.Empty(t, stats.Totals) + require.Empty(t, stats.ByFile) + }, + }, + { + desc: "incorrect version cache", + run: func(t *testing.T, repo *localrepo.Repo, repoPath string) { + stats, err := newLanguageStats(repo) + require.NoError(t, err) + + stats.Totals["C"] = 555 + stats.Version = "faulty" + + // Copy save() behavior, but with a faulty version + file, err := os.OpenFile(filepath.Join(repoPath, languageStatsFilename), os.O_WRONLY|os.O_CREATE, 0o755) + require.NoError(t, err) + w := zlib.NewWriter(file) + require.NoError(t, json.NewEncoder(w).Encode(stats)) + require.NoError(t, w.Close()) + require.NoError(t, file.Sync()) + require.NoError(t, file.Close()) + + newStats, err := newLanguageStats(repo) + require.Error(t, err, fmt.Errorf("new language stats version mismatch %s vs %s", languageStatsVersion, "faulty")) + require.Empty(t, newStats.Totals) + }, + }, + } { + t.Run(tc.desc, func(t *testing.T) { + repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + tc.run(t, repo, repoPath) + }) + } } func TestLanguageStats_add(t *testing.T) { t.Parallel() cfg := testcfg.Build(t) - repoProto, _ := gittest.CloneRepo(t, cfg, cfg.Storages[0]) + repoProto, _ := gittest.InitRepo(t, cfg, cfg.Storages[0]) repo := localrepo.NewTestRepo(t, cfg, repoProto) for _, tc := range []struct { @@ -118,7 +160,7 @@ func TestLanguageStats_drop(t *testing.T) { t.Parallel() cfg := testcfg.Build(t) - repoProto, _ := gittest.CloneRepo(t, cfg, cfg.Storages[0]) + repoProto, _ := gittest.InitRepo(t, cfg, cfg.Storages[0]) repo := localrepo.NewTestRepo(t, cfg, repoProto) for _, tc := range []struct { @@ -173,7 +215,7 @@ func TestLanguageStats_save(t *testing.T) { t.Parallel() cfg := testcfg.Build(t) - repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0]) + repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) repo := localrepo.NewTestRepo(t, cfg, repoProto) s, err := newLanguageStats(repo) diff --git a/internal/gitaly/linguist/linguist_test.go b/internal/gitaly/linguist/linguist_test.go index 82eb7bb95..cd9d68f63 100644 --- a/internal/gitaly/linguist/linguist_test.go +++ b/internal/gitaly/linguist/linguist_test.go @@ -7,12 +7,11 @@ import ( "encoding/json" "os" "path/filepath" + "strings" "testing" "github.com/go-enry/go-enry/v2" enrydata "github.com/go-enry/go-enry/v2/data" - "github.com/sirupsen/logrus" - "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v15/internal/git" "gitlab.com/gitlab-org/gitaly/v15/internal/git/catfile" @@ -95,8 +94,6 @@ func testInstanceStats(t *testing.T, ctx context.Context) { catfileCache := catfile.NewCache(cfg) t.Cleanup(catfileCache.Stop) - commitID := git.ObjectID("1e292f8fedd741b75372e19097c76d327140c312") - languageStatsFilename := filenameForCache(ctx) for _, tc := range []struct { @@ -108,10 +105,17 @@ func testInstanceStats(t *testing.T, ctx context.Context) { { desc: "successful", setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) { - repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0]) + repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) + commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries( + gittest.TreeEntry{Path: "webpack.coffee", Mode: "100644", Content: strings.Repeat("a", 107)}, + gittest.TreeEntry{Path: "show_user.html", Mode: "100644", Content: strings.Repeat("a", 349)}, + gittest.TreeEntry{Path: "api.javascript", Mode: "100644", Content: strings.Repeat("a", 1014)}, + gittest.TreeEntry{Path: "application.rb", Mode: "100644", Content: strings.Repeat("a", 2943)}, + )) + return repoProto, repoPath, commitID }, - expectedStats: map[string]uint64{ + expectedStats: ByteCountPerLanguage{ "CoffeeScript": 107, "HTML": 349, "JavaScript": 1014, @@ -119,9 +123,30 @@ func testInstanceStats(t *testing.T, ctx context.Context) { }, }, { + desc: "empty code files", + setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) { + repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) + emptyBlob := gittest.WriteBlob(t, cfg, repoPath, []byte{}) + commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries( + gittest.TreeEntry{Path: "README.md", Mode: "100644", Content: "Hello world!"}, + gittest.TreeEntry{Path: "index.html", Mode: "100644", OID: emptyBlob}, + gittest.TreeEntry{Path: "app.js", Mode: "100644", OID: emptyBlob}, + )) + + return repoProto, repoPath, commitID + }, + expectedStats: ByteCountPerLanguage{}, + }, + { desc: "preexisting cache", setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) { - repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0]) + repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) + commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries( + gittest.TreeEntry{Path: "webpack.coffee", Mode: "100644", Content: strings.Repeat("a", 107)}, + gittest.TreeEntry{Path: "show_user.html", Mode: "100644", Content: strings.Repeat("a", 349)}, + gittest.TreeEntry{Path: "api.javascript", Mode: "100644", Content: strings.Repeat("a", 1014)}, + gittest.TreeEntry{Path: "application.rb", Mode: "100644", Content: strings.Repeat("a", 2943)}, + )) repo := localrepo.NewTestRepo(t, cfg, repoProto) // We simply run the linguist once before so that it can already @@ -135,7 +160,7 @@ func testInstanceStats(t *testing.T, ctx context.Context) { return repoProto, repoPath, commitID }, - expectedStats: map[string]uint64{ + expectedStats: ByteCountPerLanguage{ "CoffeeScript": 107, "HTML": 349, "JavaScript": 1014, @@ -145,13 +170,19 @@ func testInstanceStats(t *testing.T, ctx context.Context) { { desc: "corrupted cache", setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) { - repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0]) + repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) + commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries( + gittest.TreeEntry{Path: "webpack.coffee", Mode: "100644", Content: strings.Repeat("a", 107)}, + gittest.TreeEntry{Path: "show_user.html", Mode: "100644", Content: strings.Repeat("a", 349)}, + gittest.TreeEntry{Path: "api.javascript", Mode: "100644", Content: strings.Repeat("a", 1014)}, + gittest.TreeEntry{Path: "application.rb", Mode: "100644", Content: strings.Repeat("a", 2943)}, + )) require.NoError(t, os.WriteFile(filepath.Join(repoPath, languageStatsFilename), []byte("garbage"), 0o644)) return repoProto, repoPath, commitID }, - expectedStats: map[string]uint64{ + expectedStats: ByteCountPerLanguage{ "CoffeeScript": 107, "HTML": 349, "JavaScript": 1014, @@ -182,7 +213,7 @@ func testInstanceStats(t *testing.T, ctx context.Context) { return repoProto, repoPath, newCommitID }, - expectedStats: map[string]uint64{ + expectedStats: ByteCountPerLanguage{ "Go": 12, }, }, @@ -192,7 +223,7 @@ func testInstanceStats(t *testing.T, ctx context.Context) { repoPath := filepath.Join(testhelper.TempDir(t), "nonexistent") repoProto := &gitalypb.Repository{StorageName: cfg.Storages[0].Name, RelativePath: "nonexistent"} - return repoProto, repoPath, commitID + return repoProto, repoPath, git.ObjectID("b1bb1d1b0b1d1b00") }, expectedErr: "GetRepoPath: not a git repository", }, @@ -200,7 +231,8 @@ func testInstanceStats(t *testing.T, ctx context.Context) { desc: "missing commit", setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) { repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) - return repoProto, repoPath, commitID + + return repoProto, repoPath, git.ObjectID("b1bb1d1b0b1d1b00") }, expectedErr: "linguist", }, @@ -244,43 +276,6 @@ func TestInstance_Stats_unmarshalJSONError(t *testing.T) { require.False(t, ok, "expected the error not be a json Syntax Error") } -func TestInstance_Stats_incremental(t *testing.T) { - t.Parallel() - - cfg := testcfg.Build(t) - logger, hook := test.NewNullLogger() - ctx := testhelper.Context(t, testhelper.ContextWithLogger(logrus.NewEntry(logger))) - ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.GoLanguageStats, true) - - gitCmdFactory := gittest.NewCommandFactory(t, cfg) - - linguist, err := New(cfg, gitCmdFactory) - require.NoError(t, err) - - catfileCache := catfile.NewCache(cfg) - t.Cleanup(catfileCache.Stop) - - repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0]) - repo := localrepo.NewTestRepo(t, cfg, repoProto) - - cleanStats, err := linguist.Stats(ctx, repo, "1e292f8fedd741b75372e19097c76d327140c312", catfileCache) - require.NoError(t, err) - require.Len(t, hook.AllEntries(), 0) - require.NoError(t, os.Remove(filepath.Join(repoPath, languageStatsFilename))) - - _, err = linguist.Stats(ctx, repo, "cfe32cf61b73a0d5e9f13e774abde7ff789b1660", catfileCache) - require.NoError(t, err) - require.Len(t, hook.AllEntries(), 0) - require.FileExists(t, filepath.Join(repoPath, languageStatsFilename)) - - incStats, err := linguist.Stats(ctx, repo, "1e292f8fedd741b75372e19097c76d327140c312", catfileCache) - require.NoError(t, err) - require.Len(t, hook.AllEntries(), 0) - require.FileExists(t, filepath.Join(repoPath, languageStatsFilename)) - - require.Equal(t, cleanStats, incStats) -} - func TestNew(t *testing.T) { cfg := testcfg.Build(t, testcfg.WithRealLinguist()) |