diff options
author | Toon Claes <toon@gitlab.com> | 2022-09-21 07:48:52 +0300 |
---|---|---|
committer | Toon Claes <toon@gitlab.com> | 2022-10-03 16:26:17 +0300 |
commit | d02d4e697f6464fdc91561282ea51fe8e4d5f4de (patch) | |
tree | 24db865633c3a34b02c0fa6fde83b66b14d5ef31 | |
parent | 9328317e6e61d6f479a14871eb5360dea2f732a9 (diff) |
linguist: Recalculate when .gitattributes changed
When the user has modified .gitattributes since last time the stats
where calculated, the numbers might be completely different for files
that aren't modified. To overcome any miscalculation in this case, just
do a full recalculation when .gitattributes was modified.
-rw-r--r-- | internal/gitaly/linguist/linguist.go | 34 | ||||
-rw-r--r-- | internal/gitaly/linguist/linguist_test.go | 33 |
2 files changed, 64 insertions, 3 deletions
diff --git a/internal/gitaly/linguist/linguist.go b/internal/gitaly/linguist/linguist.go index af758e8f3..f5f11b067 100644 --- a/internal/gitaly/linguist/linguist.go +++ b/internal/gitaly/linguist/linguist.go @@ -128,14 +128,21 @@ func (inst *Instance) enryStats(ctx context.Context, commitID string) (ByteCount var revlistIt gitpipe.RevisionIterator - if stats.CommitID == "" { + full, err := inst.needsFullRecalculation(ctx, stats.CommitID, commitID) + if err != nil { + return nil, fmt.Errorf("linguist cannot determine full recalculation: %w", err) + } + + if full { + stats = newLanguageStats() + skipFunc := func(result *gitpipe.RevisionResult) bool { // Skip files that are an excluded filetype based on filename. return newFileInstance(string(result.ObjectName), attrMatcher).IsExcluded() } - // No existing stats cached, so get all the files for the commit - // using git-ls-tree(1). + // Full recalculation is needed, so get all the files for the + // commit using git-ls-tree(1). revlistIt = gitpipe.LsTree(ctx, inst.repo, commitID, gitpipe.LsTreeWithRecursive(), @@ -234,3 +241,24 @@ func (inst *Instance) newAttrMatcher(ctx context.Context, objectReader catfile.O return gitattributes.NewMatcher(attrs), nil } + +func (inst *Instance) needsFullRecalculation(ctx context.Context, cachedID, commitID string) (bool, error) { + if cachedID == "" { + return true, nil + } + + err := inst.repo.ExecAndWait(ctx, git.SubCmd{ + Name: "diff", + Flags: []git.Option{git.Flag{Name: "--quiet"}}, + Args: []string{fmt.Sprintf("%v..%v", cachedID, commitID)}, + PostSepArgs: []string{".gitattributes"}, + }) + if err == nil { + return false, nil + } + if code, ok := command.ExitStatus(err); ok && code == 1 { + return true, nil + } + + return true, fmt.Errorf("git diff .gitattributes: %w", err) +} diff --git a/internal/gitaly/linguist/linguist_test.go b/internal/gitaly/linguist/linguist_test.go index c03f96fba..5d0ae925e 100644 --- a/internal/gitaly/linguist/linguist_test.go +++ b/internal/gitaly/linguist/linguist_test.go @@ -393,6 +393,39 @@ func testInstanceStats(t *testing.T, ctx context.Context) { }, }, { + desc: "preexisting cache with .gitattributes modified", + setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) { + repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + }) + + commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries( + gittest.TreeEntry{Path: "webpack.coffee", Mode: "100644", Content: strings.Repeat("a", 107)}, + gittest.TreeEntry{Path: "show_user.html", Mode: "100644", Content: strings.Repeat("a", 349)}, + gittest.TreeEntry{Path: "api.javascript", Mode: "100644", Content: strings.Repeat("a", 1014)}, + gittest.TreeEntry{Path: ".gitattributes", Mode: "100644", Content: "*.html linguist-vendored"}, + )) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + _, err := New(cfg, catfileCache, repo).Stats(ctx, commitID.String()) + require.NoError(t, err) + require.FileExists(t, filepath.Join(repoPath, languageStatsFilename)) + + commitID = gittest.WriteCommit(t, cfg, repoPath, gittest.WithTreeEntries( + gittest.TreeEntry{Path: "webpack.coffee", Mode: "100644", Content: strings.Repeat("a", 107)}, + gittest.TreeEntry{Path: "show_user.html", Mode: "100644", Content: strings.Repeat("a", 349)}, + gittest.TreeEntry{Path: "api.javascript", Mode: "100644", Content: strings.Repeat("a", 1014)}, + gittest.TreeEntry{Path: ".gitattributes", Mode: "100644", Content: "*.coffee linguist-vendored"}, + )) + + return repoProto, repoPath, commitID + }, + expectedStats: ByteCountPerLanguage{ + "HTML": 349, + "JavaScript": 1014, + }, + }, + { desc: "corrupted cache", setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) { repoProto, repoPath := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{ |