Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorToon Claes <toon@gitlab.com>2022-09-29 17:30:43 +0300
committerToon Claes <toon@gitlab.com>2022-10-03 16:26:16 +0300
commitbad57b12f2234d3d1246dce658282d34a7b8f186 (patch)
tree5f06145bc0e9cac79cf458c0fa6fa5f22899377b
parentaa1e5f03017c4343fbdc3a2d527321ccd8a54f12 (diff)
linguist: Exclude some files from stats
Files that are either documentation, vendored, or generated should be excluded from the stats.
-rw-r--r--internal/gitaly/linguist/file_instance.go41
-rw-r--r--internal/gitaly/linguist/linguist.go16
-rw-r--r--internal/gitaly/linguist/linguist_test.go84
3 files changed, 131 insertions, 10 deletions
diff --git a/internal/gitaly/linguist/file_instance.go b/internal/gitaly/linguist/file_instance.go
index 5c01da23e..9b5484bf5 100644
--- a/internal/gitaly/linguist/file_instance.go
+++ b/internal/gitaly/linguist/file_instance.go
@@ -18,6 +18,35 @@ func newFileInstance(filename string) fileInstance {
}
}
+func (f fileInstance) isDocumentation() bool {
+ return enry.IsDocumentation(f.filename)
+}
+
+func (f fileInstance) isVendored() bool {
+ return enry.IsVendor(f.filename)
+}
+
+func (f fileInstance) isGenerated(content []byte) bool {
+ return enry.IsGenerated(f.filename, content)
+}
+
+func (f fileInstance) getLanguage(content []byte) string {
+ return enry.GetLanguage(f.filename, content)
+}
+
+func (f fileInstance) isIgnoredLanguage(lang string) bool {
+ // Ignore anything that's neither markup nor a programming language,
+ // similar to what the linguist gem does:
+ // https://github.com/github/linguist/blob/v7.20.0/lib/linguist/blob_helper.rb#L378-L387
+ return enry.GetLanguageType(lang) != enry.Programming &&
+ enry.GetLanguageType(lang) != enry.Markup
+}
+
+// IsExcluded returns whether
+func (f fileInstance) IsExcluded() bool {
+ return f.isDocumentation() || f.isVendored()
+}
+
// DetermineStats determines the size and language of the given file. The
// language will be an empty string when the stats should be omitted from the
// count.
@@ -28,13 +57,13 @@ func (f fileInstance) DetermineStats(object gitpipe.CatfileObjectResult) (string
return "", 0, fmt.Errorf("determine stats read blob: %w", err)
}
- lang := enry.GetLanguage(f.filename, content)
+ if f.isGenerated(content) {
+ return "", 0, nil
+ }
+
+ lang := f.getLanguage(content)
- // Ignore anything that's neither markup nor a programming language,
- // similar to what the linguist gem does:
- // https://github.com/github/linguist/blob/v7.20.0/lib/linguist/blob_helper.rb#L378-L387
- if enry.GetLanguageType(lang) != enry.Programming &&
- enry.GetLanguageType(lang) != enry.Markup {
+ if f.isIgnoredLanguage(lang) {
return "", 0, nil
}
diff --git a/internal/gitaly/linguist/linguist.go b/internal/gitaly/linguist/linguist.go
index c17cad698..6152797cc 100644
--- a/internal/gitaly/linguist/linguist.go
+++ b/internal/gitaly/linguist/linguist.go
@@ -122,20 +122,28 @@ func (inst *Instance) enryStats(ctx context.Context, commitID string) (ByteCount
var revlistIt gitpipe.RevisionIterator
if stats.CommitID == "" {
+ skipFunc := func(result *gitpipe.RevisionResult) bool {
+ // Skip files that are an excluded filetype based on filename.
+ return newFileInstance(string(result.ObjectName)).IsExcluded()
+ }
+
// No existing stats cached, so get all the files for the commit
// using git-ls-tree(1).
revlistIt = gitpipe.LsTree(ctx, inst.repo,
commitID,
gitpipe.LsTreeWithRecursive(),
gitpipe.LsTreeWithBlobFilter(),
+ gitpipe.LsTreeWithSkip(skipFunc),
)
} else {
// Stats are cached for one commit, so get the git-diff-tree(1)
// between that commit and the one we're calculating stats for.
- skipDeleted := func(result *gitpipe.RevisionResult) bool {
- // Skip files that are deleted.
- if git.ObjectHashSHA1.IsZeroOID(result.OID) {
+ skipFunc := func(result *gitpipe.RevisionResult) bool {
+ // Skip files that are deleted, or
+ // an excluded filetype based on filename.
+ if git.ObjectHashSHA1.IsZeroOID(result.OID) ||
+ newFileInstance(string(result.ObjectName)).IsExcluded() {
// It's a little bit of a hack to use this skip
// function, but for every file that's deleted,
// remove the stats.
@@ -149,7 +157,7 @@ func (inst *Instance) enryStats(ctx context.Context, commitID string) (ByteCount
stats.CommitID, commitID,
gitpipe.DiffTreeWithRecursive(),
gitpipe.DiffTreeWithIgnoreSubmodules(),
- gitpipe.DiffTreeWithSkip(skipDeleted),
+ gitpipe.DiffTreeWithSkip(skipFunc),
)
}
diff --git a/internal/gitaly/linguist/linguist_test.go b/internal/gitaly/linguist/linguist_test.go
index 79f0c3edb..8461b2e56 100644
--- a/internal/gitaly/linguist/linguist_test.go
+++ b/internal/gitaly/linguist/linguist_test.go
@@ -69,6 +69,90 @@ func testInstanceStats(t *testing.T, ctx context.Context) {
},
},
{
+ desc: "documentation is ignored",
+ setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) {
+ repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ })
+
+ docTree := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{
+ {Path: "readme.md", Mode: "100644", Content: strings.Repeat("a", 500)},
+ {Path: "index.html", Mode: "100644", Content: strings.Repeat("a", 120)},
+ {Path: "formatter.rb", Mode: "100644", Content: strings.Repeat("a", 403)},
+ })
+ commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "docs", Mode: "040000", OID: docTree},
+ gittest.TreeEntry{Path: "main.c", Mode: "100644", Content: strings.Repeat("a", 85)},
+ ))
+
+ return repoProto, repoPath, commitID
+ },
+ expectedStats: ByteCountPerLanguage{
+ "C": 85,
+ },
+ },
+ {
+ desc: "vendor is ignored",
+ setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) {
+ repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ })
+
+ vendorTree := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{
+ {Path: "app.rb", Mode: "100644", Content: strings.Repeat("a", 500)},
+ })
+ commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "vendor", Mode: "040000", OID: vendorTree},
+ gittest.TreeEntry{Path: "main.c", Mode: "100644", Content: strings.Repeat("a", 85)},
+ ))
+
+ return repoProto, repoPath, commitID
+ },
+ expectedStats: ByteCountPerLanguage{
+ "C": 85,
+ },
+ },
+ {
+ desc: "generated is ignored",
+ setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) {
+ repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ })
+
+ podsTree := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{
+ {Path: "app.swift", Mode: "100644", Content: strings.Repeat("a", 500)},
+ })
+ commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "Pods", Mode: "040000", OID: podsTree},
+ gittest.TreeEntry{Path: "main.c", Mode: "100644", Content: strings.Repeat("a", 85)},
+ ))
+
+ return repoProto, repoPath, commitID
+ },
+ expectedStats: ByteCountPerLanguage{
+ "C": 85,
+ },
+ },
+ {
+ desc: "undetectable languages are ignored",
+ setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) {
+ repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ })
+
+ commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "config.json", Mode: "100644", Content: strings.Repeat("a", 234)},
+ gittest.TreeEntry{Path: "manual.md", Mode: "100644", Content: strings.Repeat("a", 553)},
+ gittest.TreeEntry{Path: "main.c", Mode: "100644", Content: strings.Repeat("a", 85)},
+ ))
+
+ return repoProto, repoPath, commitID
+ },
+ expectedStats: ByteCountPerLanguage{
+ "C": 85,
+ },
+ },
+ {
desc: "empty code files",
setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) {
repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{