From 2b9dd911b9da49236dc975133510b51d1b2522c6 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Tue, 8 Nov 2022 19:34:05 +0100 Subject: repository: Add benchmark for FindLicense To get a sense of the performance of FindLicense, add this benchmark. goos: linux goarch: amd64 pkg: gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/service/repository cpu: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz BenchmarkFindLicense BenchmarkFindLicense/read_object_cached=false BenchmarkFindLicense/read_object_cached=false/gitlab-org/gitlab.git license_test.go:380: git commands for loop: 6 license_test.go:380: git commands for loop: 6 license_test.go:380: git commands for loop: 6 BenchmarkFindLicense/read_object_cached=false/gitlab-org/gitlab.git-8 20 50347475 ns/op BenchmarkFindLicense/read_object_cached=false/stress.git license_test.go:380: git commands for loop: 113 BenchmarkFindLicense/read_object_cached=false/stress.git-8 1 1797435782 ns/op BenchmarkFindLicense/read_object_cached=true BenchmarkFindLicense/read_object_cached=true/gitlab-org/gitlab.git license_test.go:380: git commands for loop: 4 license_test.go:380: git commands for loop: 3 license_test.go:380: git commands for loop: 3 license_test.go:380: git commands for loop: 3 BenchmarkFindLicense/read_object_cached=true/gitlab-org/gitlab.git-8 48 23122846 ns/op BenchmarkFindLicense/read_object_cached=true/stress.git license_test.go:380: git commands for loop: 4 license_test.go:380: git commands for loop: 3 BenchmarkFindLicense/read_object_cached=true/stress.git-8 2 610536118 ns/op This benchmark includes the number of git commands for each loop. It's visible the benchmarks where the catfile cache is used a significant lower number of git commands is spawn. --- internal/gitaly/service/repository/license_test.go | 119 +++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/internal/gitaly/service/repository/license_test.go b/internal/gitaly/service/repository/license_test.go index 2748b4bd2..cffc9db0b 100644 --- a/internal/gitaly/service/repository/license_test.go +++ b/internal/gitaly/service/repository/license_test.go @@ -7,15 +7,20 @@ import ( "os" "testing" + "github.com/go-enry/go-license-detector/v4/licensedb" "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/catfile" "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/rubyserver" "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" + "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testserver" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" + "gitlab.com/gitlab-org/labkit/correlation" "google.golang.org/grpc/codes" + "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" ) @@ -282,3 +287,117 @@ func TestFindLicense_validate(t *testing.T) { msg := testhelper.GitalyOrPraefectMessage("empty Repository", "repo scoped: empty Repository") testhelper.RequireGrpcError(t, status.Error(codes.InvalidArgument, msg), err) } + +func BenchmarkFindLicense(b *testing.B) { + cfg := testcfg.Build(b) + ctx := testhelper.Context(b) + ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.GoFindLicense, true) + + gitCmdFactory := gittest.NewCountingCommandFactory(b, cfg) + + client, serverSocketPath := runRepositoryService( + b, + cfg, + nil, + testserver.WithGitCommandFactory(gitCmdFactory), + testserver.WithDisablePraefect(), + ) + cfg.SocketPath = serverSocketPath + + // Warm up the license database + licensedb.Preload() + + repoGitLab, _ := gittest.CreateRepository(b, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + Seed: "benchmark.git", + }) + + repoStress, repoStressPath := gittest.CreateRepository(b, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + }) + + // Based on https://github.com/go-enry/go-license-detector/blob/18a439e5437cd46905b074ac24c27cbb6cac4347/licensedb/internal/investigation.go#L28-L38 + fileNames := []string{ + "licence", + "lisence", //nolint:misspell + "lisense", //nolint:misspell + "license", + "licences", + "lisences", + "lisenses", + "licenses", + "legal", + "copyleft", + "copyright", + "copying", + "unlicense", + "gpl-v1", + "gpl-v2", + "gpl-v3", + "lgpl-v1", + "lgpl-v2", + "lgpl-v3", + "bsd", + "mit", + "apache", + } + fileExtensions := []string{ + "", + ".md", + ".rst", + ".html", + ".txt", + } + + treeEntries := make([]gittest.TreeEntry, 0, len(fileNames)*len(fileExtensions)) + + for _, name := range fileNames { + for _, ext := range fileExtensions { + treeEntries = append(treeEntries, + gittest.TreeEntry{ + Mode: "100644", + Path: name + ext, + Content: mitLicense + "\n" + name, // grain of salt + }) + } + } + + gittest.WriteCommit(b, cfg, repoStressPath, gittest.WithBranch("main"), + gittest.WithTreeEntries(treeEntries...)) + gittest.Exec(b, cfg, "-C", repoStressPath, "symbolic-ref", "HEAD", "refs/heads/main") + + testhelper.NewFeatureSets(featureflag.ReadObjectCached).Bench(b, func(b *testing.B, ctx context.Context) { + ctx = featureflag.ContextWithFeatureFlag(ctx, featureflag.GoFindLicense, true) + + ctx = correlation.ContextWithCorrelation(ctx, "1") + ctx = testhelper.MergeOutgoingMetadata(ctx, + metadata.Pairs(catfile.SessionIDField, "1"), + ) + + for _, tc := range []struct { + desc string + repo *gitalypb.Repository + }{ + { + desc: "gitlab-org/gitlab.git", + repo: repoGitLab, + }, + { + desc: "stress.git", + repo: repoStress, + }, + } { + b.Run(tc.desc, func(b *testing.B) { + gitCmdFactory.ResetCount() + + for i := 0; i < b.N; i++ { + resp, err := client.FindLicense(ctx, &gitalypb.FindLicenseRequest{Repository: tc.repo}) + require.NoError(b, err) + require.Equal(b, "mit", resp.GetLicenseShortName()) + } + + b.Logf("git commands for loop: %d\n", gitCmdFactory.Count()/uint64(b.N)) + }) + } + }) +} -- cgit v1.2.3