diff options
author | Toon Claes <toon@gitlab.com> | 2022-06-03 18:10:48 +0300 |
---|---|---|
committer | Toon Claes <toon@gitlab.com> | 2022-07-08 11:24:17 +0300 |
commit | 181a7d79de8c1fd0385baf7d114cbad6c417d639 (patch) | |
tree | 4016f719a4cce1f8883b6dfbed0d75b825073f28 | |
parent | efd9a598f50e03f05620b56f2e010600128f3b1c (diff) |
linguist: Benchmark the Go implementation
The main reason we wrote the Go implementation for linguist is getting
rid of Ruby. But we don't want this implementation to be slower. So to
compare the performance, this change adds a benchmark of both
implementations.
These are the results running it on my computer:
$ go test -run=^$ -bench=. -benchtime=4x ./internal/gitaly/linguist
goos: linux
goarch: amd64
pkg: gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/linguist
cpu: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz
BenchmarkInstance_Stats/go_language_stats=false/from_scratch-8 4 56371415917 ns/op
BenchmarkInstance_Stats/go_language_stats=false/incremental-8 4 25310716778 ns/op
BenchmarkInstance_Stats/go_language_stats=true/from_scratch-8 4 3149285756 ns/op
BenchmarkInstance_Stats/go_language_stats=true/incremental-8 4 1402539266 ns/op
Getting the stats from scratch drops from roughly 56s to 3.1s, which is
impressive. Getting stats incrementally, drops from about 31s to 1.4s,
which is also pretty nice.
On the topic of cache size, these are the file size for both
implementations:
* Ruby: 473993 bytes
* Golang: 436335 bytes
These are comparable in size and it's a relief to see it's not
significantly larger in the new implementation.
-rw-r--r-- | internal/gitaly/linguist/linguist_test.go | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/internal/gitaly/linguist/linguist_test.go b/internal/gitaly/linguist/linguist_test.go index 6ef0734b0..199300a5d 100644 --- a/internal/gitaly/linguist/linguist_test.go +++ b/internal/gitaly/linguist/linguist_test.go @@ -255,3 +255,54 @@ func filenameForCache(ctx context.Context) string { } return languageStatsFilename } + +func BenchmarkInstance_Stats(b *testing.B) { + testhelper.NewFeatureSets(featureflag.GoLanguageStats). + Bench(b, benchmarkInstanceStats) +} + +func benchmarkInstanceStats(b *testing.B, ctx context.Context) { + cfg := testcfg.Build(b) + gitCmdFactory := gittest.NewCommandFactory(b, cfg) + languageStatsFilename := filenameForCache(ctx) + + linguist, err := New(cfg, gitCmdFactory) + require.NoError(b, err) + + catfileCache := catfile.NewCache(cfg) + b.Cleanup(catfileCache.Stop) + + repoProto, repoPath := gittest.CloneRepo(b, cfg, cfg.Storages[0], gittest.CloneRepoOpts{ + SourceRepo: "benchmark.git", + }) + repo := localrepo.NewTestRepo(b, cfg, repoProto) + + var scratchStat ByteCountPerLanguage + var incStats ByteCountPerLanguage + + b.Run("from scratch", func(b *testing.B) { + for i := 0; i < b.N; i++ { + b.StopTimer() + require.NoError(b, os.RemoveAll(filepath.Join(repoPath, languageStatsFilename))) + b.StartTimer() + + scratchStat, err = linguist.Stats(ctx, repo, "f5dfdd0057cd6bffc6259a5c8533dde5bf6a9d37", catfileCache) + require.NoError(b, err) + } + }) + + b.Run("incremental", func(b *testing.B) { + for i := 0; i < b.N; i++ { + b.StopTimer() + require.NoError(b, os.RemoveAll(filepath.Join(repoPath, languageStatsFilename))) + // a commit about 3 months older than the next + _, err = linguist.Stats(ctx, repo, "3c813b292d25a9b2ffda70e7f609f623bfc0cb37", catfileCache) + b.StartTimer() + + incStats, err = linguist.Stats(ctx, repo, "f5dfdd0057cd6bffc6259a5c8533dde5bf6a9d37", catfileCache) + require.NoError(b, err) + } + }) + + require.Equal(b, scratchStat, incStats) +} |