From efd9a598f50e03f05620b56f2e010600128f3b1c Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Fri, 3 Jun 2022 17:10:22 +0200 Subject: linguist: Implement Stats in pure Go This change adds an alternative implementation of linguist.Stats using go-enry as a pure Go solution. The code is behind a default disabled feature flag 'go_language_stats'. Issue: https://gitlab.com/gitlab-org/gitaly/-/issues/2571 Changelog: performance --- NOTICE | 204 +++++++++++++++++++++ go.mod | 2 + go.sum | 4 + internal/gitaly/linguist/linguist.go | 103 +++++++++++ internal/gitaly/linguist/linguist_test.go | 77 +++++++- internal/gitaly/service/commit/languages_test.go | 32 +++- .../metadata/featureflag/ff_go_language_stats.go | 10 + 7 files changed, 418 insertions(+), 14 deletions(-) create mode 100644 internal/metadata/featureflag/ff_go_language_stats.go diff --git a/NOTICE b/NOTICE index 7879c98f4..d963932ff 100644 --- a/NOTICE +++ b/NOTICE @@ -7669,6 +7669,210 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +LICENSE - github.com/go-enry/go-enry/v2 + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LICENSE.md - github.com/go-enry/go-license-detector/v4/licensedb Apache License diff --git a/go.mod b/go.mod index bb98d4040..0b7441847 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/containerd/cgroups v0.0.0-20201118023556-2819c83ced99 github.com/getsentry/sentry-go v0.13.0 github.com/git-lfs/git-lfs/v3 v3.2.0 + github.com/go-enry/go-enry/v2 v2.8.2 github.com/go-enry/go-license-detector/v4 v4.3.0 github.com/google/go-cmp v0.5.8 github.com/google/uuid v1.3.0 @@ -97,6 +98,7 @@ require ( github.com/git-lfs/go-netrc v0.0.0-20210914205454-f0c862dd687a // indirect github.com/git-lfs/pktline v0.0.0-20210330133718-06e9096e2825 // indirect github.com/git-lfs/wildmatch/v2 v2.0.1 // indirect + github.com/go-enry/go-oniguruma v1.2.1 // indirect github.com/go-git/gcfg v1.5.0 // indirect github.com/go-git/go-billy/v5 v5.1.0 // indirect github.com/go-git/go-git/v5 v5.3.0 // indirect diff --git a/go.sum b/go.sum index e36c5f944..4c2afd61c 100644 --- a/go.sum +++ b/go.sum @@ -386,8 +386,12 @@ github.com/git-lfs/wildmatch/v2 v2.0.1/go.mod h1:EVqonpk9mXbREP3N8UkwoWdrF249uHp github.com/gliderlabs/ssh v0.2.2 h1:6zsha5zo/TWhRhwqCD3+EarCAgZ2yN28ipRnGPnwkI0= github.com/gliderlabs/ssh v0.2.2/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= +github.com/go-enry/go-enry/v2 v2.8.2 h1:uiGmC+3K8sVd/6DOe2AOJEOihJdqda83nPyJNtMR8RI= +github.com/go-enry/go-enry/v2 v2.8.2/go.mod h1:GVzIiAytiS5uT/QiuakK7TF1u4xDab87Y8V5EJRpsIQ= github.com/go-enry/go-license-detector/v4 v4.3.0 h1:OFlQAVNw5FlKUjX4OuW8JOabu8MQHjTKDb9pdeNYMUw= github.com/go-enry/go-license-detector/v4 v4.3.0/go.mod h1:HaM4wdNxSlz/9Gw0uVOKSQS5JVFqf2Pk8xUPEn6bldI= +github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo= +github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4= github.com/go-errors/errors v1.0.1 h1:LUHzmkK3GUKUrL/1gfBUxAHzcev3apQlezX/+O7ma6w= github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q= github.com/go-git/gcfg v1.5.0 h1:Q5ViNfGF8zFgyJWPqYwA7qGFoMTEiBmdlkcfRmpIMa4= diff --git a/internal/gitaly/linguist/linguist.go b/internal/gitaly/linguist/linguist.go index 5e09431e3..2a1a92c60 100644 --- a/internal/gitaly/linguist/linguist.go +++ b/internal/gitaly/linguist/linguist.go @@ -10,12 +10,16 @@ import ( "os/exec" "path/filepath" + "github.com/go-enry/go-enry/v2" + "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus" "gitlab.com/gitlab-org/gitaly/v15/internal/command" "gitlab.com/gitlab-org/gitaly/v15/internal/git" "gitlab.com/gitlab-org/gitaly/v15/internal/git/catfile" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/gitpipe" "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" "gitlab.com/gitlab-org/gitaly/v15/internal/helper/env" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" ) // Language is used to parse Linguist's language.json file. @@ -56,6 +60,10 @@ func New(cfg config.Cfg, gitCmdFactory git.CommandFactory) (*Instance, error) { // Stats returns the repository's language stats as reported by 'git-linguist'. func (inst *Instance) Stats(ctx context.Context, repo *localrepo.Repo, commitID string, catfileCache catfile.Cache) (ByteCountPerLanguage, error) { + if featureflag.GoLanguageStats.IsEnabled(ctx) { + return inst.enryStats(ctx, repo, commitID, catfileCache) + } + repoPath, err := repo.Path() if err != nil { return nil, fmt.Errorf("get repo path: %w", err) @@ -152,3 +160,98 @@ func openLanguagesJSON(cfg config.Cfg) (io.ReadCloser, error) { return os.Open(filepath.Join(linguistPathSymlink.Name(), "lib", "linguist", "languages.json")) } + +func (inst *Instance) enryStats(ctx context.Context, repo *localrepo.Repo, commitID string, catfileCache catfile.Cache) (ByteCountPerLanguage, error) { + stats, err := newLanguageStats(repo) + if err != nil { + ctxlogrus.Extract(ctx).WithError(err).Info("linguist load from cache") + } + if stats.CommitID == commitID { + return stats.Totals, nil + } + + objectReader, cancel, err := catfileCache.ObjectReader(ctx, repo) + if err != nil { + return nil, fmt.Errorf("create object reader: %w", err) + } + defer cancel() + + var revlistIt gitpipe.RevisionIterator + + if stats.CommitID == "" { + // No existing stats cached, so get all the files for the commit + // using git-ls-tree(1). + revlistIt = gitpipe.LsTree(ctx, repo, + commitID, + gitpipe.LsTreeWithRecursive(), + gitpipe.LsTreeWithBlobFilter(), + ) + } else { + // Stats are cached for one commit, so get the git-diff-tree(1) + // between that commit and the one we're calculating stats for. + + skipDeleted := func(result *gitpipe.RevisionResult) bool { + // Skip files that are deleted. + if result.OID.IsZeroOID() { + // It's a little bit of a hack to use this skip + // function, but for every file that's deleted, + // remove the stats. + stats.drop(string(result.ObjectName)) + return true + } + return false + } + + revlistIt = gitpipe.DiffTree(ctx, repo, + stats.CommitID, commitID, + gitpipe.DiffTreeWithRecursive(), + gitpipe.DiffTreeWithIgnoreSubmodules(), + gitpipe.DiffTreeWithSkip(skipDeleted), + ) + } + + objectIt, err := gitpipe.CatfileObject(ctx, objectReader, revlistIt) + if err != nil { + return nil, fmt.Errorf("linguist gitpipe: %w", err) + } + + for objectIt.Next() { + object := objectIt.Result() + filename := string(object.ObjectName) + + // Read arbitrary number of bytes considered enough to determine language + content, err := io.ReadAll(io.LimitReader(object, 2048)) + if err != nil { + return nil, fmt.Errorf("linguist read blob: %w", err) + } + + if _, err := io.Copy(io.Discard, object); err != nil { + return nil, fmt.Errorf("linguist discard excess blob: %w", err) + } + + lang := enry.GetLanguage(filename, content) + + // Ignore anything that's neither markup nor a programming language, + // similar to what the linguist gem does: + // https://github.com/github/linguist/blob/v7.20.0/lib/linguist/blob_helper.rb#L378-L387 + if enry.GetLanguageType(lang) != enry.Programming && + enry.GetLanguageType(lang) != enry.Markup { + // The file might have been included in the stats before + stats.drop(filename) + + continue + } + + stats.add(filename, lang, uint64(object.Object.ObjectSize())) + } + + if err := objectIt.Err(); err != nil { + return nil, fmt.Errorf("linguist object iterator: %w", err) + } + + if err := stats.save(repo, commitID); err != nil { + return nil, fmt.Errorf("linguist language stats save: %w", err) + } + + return stats.Totals, nil +} diff --git a/internal/gitaly/linguist/linguist_test.go b/internal/gitaly/linguist/linguist_test.go index 1bc5eff43..6ef0734b0 100644 --- a/internal/gitaly/linguist/linguist_test.go +++ b/internal/gitaly/linguist/linguist_test.go @@ -1,17 +1,21 @@ package linguist import ( + "context" "encoding/json" "os" "path/filepath" "testing" + "github.com/sirupsen/logrus" + "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v15/internal/git" "gitlab.com/gitlab-org/gitaly/v15/internal/git/catfile" "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" @@ -22,7 +26,11 @@ func TestMain(m *testing.M) { } func TestInstance_Stats(t *testing.T) { - ctx := testhelper.Context(t) + testhelper.NewFeatureSets(featureflag.GoLanguageStats). + Run(t, testInstanceStats) +} + +func testInstanceStats(t *testing.T, ctx context.Context) { cfg := testcfg.Build(t) gitCmdFactory := gittest.NewCommandFactory(t, cfg) @@ -30,10 +38,12 @@ func TestInstance_Stats(t *testing.T) { require.NoError(t, err) catfileCache := catfile.NewCache(cfg) - defer catfileCache.Stop() + t.Cleanup(catfileCache.Stop) commitID := git.ObjectID("1e292f8fedd741b75372e19097c76d327140c312") + languageStatsFilename := filenameForCache(ctx) + for _, tc := range []struct { desc string setup func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) @@ -63,7 +73,10 @@ func TestInstance_Stats(t *testing.T) { // write the cache. _, err := linguist.Stats(ctx, repo, commitID.String(), catfileCache) require.NoError(t, err) - require.FileExists(t, filepath.Join(repoPath, "language-stats.cache")) + require.FileExists(t, filepath.Join(repoPath, languageStatsFilename)) + + // Make sure it isn't able to generate stats from scratch + require.NoError(t, os.RemoveAll(filepath.Join(repoPath, "objects", "pack"))) return repoProto, repoPath, commitID }, @@ -79,7 +92,7 @@ func TestInstance_Stats(t *testing.T) { setup: func(t *testing.T) (*gitalypb.Repository, string, git.ObjectID) { repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0]) - require.NoError(t, os.WriteFile(filepath.Join(repoPath, "language-stats.cache"), []byte("garbage"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(repoPath, languageStatsFilename), []byte("garbage"), 0o644)) return repoProto, repoPath, commitID }, @@ -107,7 +120,7 @@ func TestInstance_Stats(t *testing.T) { // linguist knows to update the cache. stats, err := linguist.Stats(ctx, repo, oldCommitID.String(), catfileCache) require.NoError(t, err) - require.FileExists(t, filepath.Join(repoPath, "language-stats.cache")) + require.FileExists(t, filepath.Join(repoPath, languageStatsFilename)) require.Equal(t, ByteCountPerLanguage{ "Ruby": 19, }, stats) @@ -134,7 +147,7 @@ func TestInstance_Stats(t *testing.T) { repoProto, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0]) return repoProto, repoPath, commitID }, - expectedErr: "waiting for linguist: exit status 1", + expectedErr: "linguist", }, } { t.Run(tc.desc, func(t *testing.T) { @@ -145,7 +158,7 @@ func TestInstance_Stats(t *testing.T) { if tc.expectedErr == "" { require.NoError(t, err) require.Equal(t, tc.expectedStats, stats) - require.FileExists(t, filepath.Join(repoPath, "language-stats.cache")) + require.FileExists(t, filepath.Join(repoPath, languageStatsFilename)) } else { require.Contains(t, err.Error(), tc.expectedErr) } @@ -155,12 +168,12 @@ func TestInstance_Stats(t *testing.T) { func TestInstance_Stats_unmarshalJSONError(t *testing.T) { cfg := testcfg.Build(t) - ctx := testhelper.Context(t) + ctx := featureflag.ContextWithFeatureFlag(testhelper.Context(t), featureflag.GoLanguageStats, false) gitCmdFactory := gittest.NewCommandFactory(t, cfg) invalidRepo := &gitalypb.Repository{StorageName: "fake", RelativePath: "path"} catfileCache := catfile.NewCache(cfg) - defer catfileCache.Stop() + t.Cleanup(catfileCache.Stop) repo := localrepo.New(config.NewLocator(cfg), gitCmdFactory, catfileCache, invalidRepo) @@ -176,6 +189,43 @@ func TestInstance_Stats_unmarshalJSONError(t *testing.T) { require.False(t, ok, "expected the error not be a json Syntax Error") } +func TestInstance_Stats_incremental(t *testing.T) { + t.Parallel() + + cfg := testcfg.Build(t) + logger, hook := test.NewNullLogger() + ctx := testhelper.Context(t, testhelper.ContextWithLogger(logrus.NewEntry(logger))) + ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.GoLanguageStats, true) + + gitCmdFactory := gittest.NewCommandFactory(t, cfg) + + linguist, err := New(cfg, gitCmdFactory) + require.NoError(t, err) + + catfileCache := catfile.NewCache(cfg) + t.Cleanup(catfileCache.Stop) + + repoProto, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0]) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + cleanStats, err := linguist.Stats(ctx, repo, "1e292f8fedd741b75372e19097c76d327140c312", catfileCache) + require.NoError(t, err) + require.Len(t, hook.AllEntries(), 0) + require.NoError(t, os.Remove(filepath.Join(repoPath, languageStatsFilename))) + + _, err = linguist.Stats(ctx, repo, "cfe32cf61b73a0d5e9f13e774abde7ff789b1660", catfileCache) + require.NoError(t, err) + require.Len(t, hook.AllEntries(), 0) + require.FileExists(t, filepath.Join(repoPath, languageStatsFilename)) + + incStats, err := linguist.Stats(ctx, repo, "1e292f8fedd741b75372e19097c76d327140c312", catfileCache) + require.NoError(t, err) + require.Len(t, hook.AllEntries(), 0) + require.FileExists(t, filepath.Join(repoPath, languageStatsFilename)) + + require.Equal(t, cleanStats, incStats) +} + func TestNew(t *testing.T) { cfg := testcfg.Build(t, testcfg.WithRealLinguist()) @@ -196,3 +246,12 @@ func TestNew_loadLanguagesCustomPath(t *testing.T) { require.Equal(t, "foo color", ling.Color("FooBar")) } + +// filenameForCache returns the filename where the cache is stored, depending on +// the feature flag. +func filenameForCache(ctx context.Context) string { + if featureflag.GoLanguageStats.IsDisabled(ctx) { + return "language-stats.cache" + } + return languageStatsFilename +} diff --git a/internal/gitaly/service/commit/languages_test.go b/internal/gitaly/service/commit/languages_test.go index 0f0a2ae41..a36fea35b 100644 --- a/internal/gitaly/service/commit/languages_test.go +++ b/internal/gitaly/service/commit/languages_test.go @@ -1,10 +1,12 @@ package commit import ( + "context" "testing" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" + "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" @@ -12,12 +14,16 @@ import ( ) func TestLanguages(t *testing.T) { + testhelper.NewFeatureSets(featureflag.GoLanguageStats). + Run(t, testLanguagesFeatured) +} + +func testLanguagesFeatured(t *testing.T, ctx context.Context) { t.Parallel() cfg := testcfg.Build(t, testcfg.WithRealLinguist()) cfg.SocketPath = startTestServices(t, cfg) - ctx := testhelper.Context(t) repo, _ := gittest.CreateRepository(ctx, t, cfg, gittest.CreateRepositoryConfig{ Seed: gittest.SeedGitLabTest, }) @@ -46,9 +52,13 @@ func TestLanguages(t *testing.T) { } func TestFileCountIsZeroWhenFeatureIsDisabled(t *testing.T) { + testhelper.NewFeatureSets(featureflag.GoLanguageStats). + Run(t, testFileCountIsZeroWhenFeatureIsDisabled) +} + +func testFileCountIsZeroWhenFeatureIsDisabled(t *testing.T, ctx context.Context) { t.Parallel() - ctx := testhelper.Context(t) _, repo, _, client := setupCommitServiceWithRepo(ctx, t) request := &gitalypb.CommitLanguagesRequest{ @@ -68,9 +78,13 @@ func TestFileCountIsZeroWhenFeatureIsDisabled(t *testing.T) { } func TestLanguagesEmptyRevision(t *testing.T) { + testhelper.NewFeatureSets(featureflag.GoLanguageStats). + Run(t, testLanguagesEmptyRevisionFeatured) +} + +func testLanguagesEmptyRevisionFeatured(t *testing.T, ctx context.Context) { t.Parallel() - ctx := testhelper.Context(t) _, repo, _, client := setupCommitServiceWithRepo(ctx, t) request := &gitalypb.CommitLanguagesRequest{ @@ -91,9 +105,13 @@ func TestLanguagesEmptyRevision(t *testing.T) { } func TestInvalidCommitLanguagesRequestRevision(t *testing.T) { + testhelper.NewFeatureSets(featureflag.GoLanguageStats). + Run(t, testInvalidCommitLanguagesRequestRevisionFeatured) +} + +func testInvalidCommitLanguagesRequestRevisionFeatured(t *testing.T, ctx context.Context) { t.Parallel() - ctx := testhelper.Context(t) _, repo, _, client := setupCommitServiceWithRepo(ctx, t) _, err := client.CommitLanguages(ctx, &gitalypb.CommitLanguagesRequest{ @@ -104,9 +122,13 @@ func TestInvalidCommitLanguagesRequestRevision(t *testing.T) { } func TestAmbiguousRefCommitLanguagesRequestRevision(t *testing.T) { + testhelper.NewFeatureSets(featureflag.GoLanguageStats). + Run(t, testAmbiguousRefCommitLanguagesRequestRevisionFeatured) +} + +func testAmbiguousRefCommitLanguagesRequestRevisionFeatured(t *testing.T, ctx context.Context) { t.Parallel() - ctx := testhelper.Context(t) _, repo, _, client := setupCommitServiceWithRepo(ctx, t) // gitlab-test repo has both a branch and a tag named 'v1.1.0' diff --git a/internal/metadata/featureflag/ff_go_language_stats.go b/internal/metadata/featureflag/ff_go_language_stats.go new file mode 100644 index 000000000..cf4626a06 --- /dev/null +++ b/internal/metadata/featureflag/ff_go_language_stats.go @@ -0,0 +1,10 @@ +package featureflag + +// GoLanguageStats flag enables getting CommitLanguages statistics written in +// Go. +var GoLanguageStats = NewFeatureFlag( + "go_language_stats", + "v15.2.0", + "https://gitlab.com/gitlab-org/gitaly/-/issues/4254", + false, +) -- cgit v1.2.3