diff options
author | Zeger-Jan van de Weg <git@zjvandeweg.nl> | 2018-06-04 15:58:22 +0300 |
---|---|---|
committer | Zeger-Jan van de Weg <git@zjvandeweg.nl> | 2018-07-16 18:02:12 +0300 |
commit | 8dde19c4b8e6c56cb221267581474638ce3a55fd (patch) | |
tree | df44f5a9317a84a58f6f1dab3f1ab9e405bf7990 | |
parent | b4c53bc4704a07f85b9006d6f2b50ca80d377fd8 (diff) |
Cache linguist results allowing incremental detection
Prior to this change, Gitaly didn't cache an results from linguist,
meaning that large repositories would be scanned fully, each time a user
requested the Charts page. This made if fairly impractical, and resource
intensive. Futhermore, this might allow for other features that scan
more often.
-rw-r--r-- | changelogs/unreleased/zj-linguist-ruby-sidecar.yml | 5 | ||||
-rw-r--r-- | internal/service/commit/testhelper_test.go | 2 | ||||
-rw-r--r-- | ruby/lib/gitaly_server.rb | 2 | ||||
-rw-r--r-- | ruby/lib/gitaly_server/commit_service.rb | 20 | ||||
-rw-r--r-- | ruby/lib/gitlab/linguist/cache.rb | 50 | ||||
-rw-r--r-- | ruby/lib/gitlab/linguist/repository_languages.rb | 24 | ||||
-rw-r--r-- | ruby/spec/lib/gitlab/linguist/cache_spec.rb | 37 |
7 files changed, 124 insertions, 16 deletions
diff --git a/changelogs/unreleased/zj-linguist-ruby-sidecar.yml b/changelogs/unreleased/zj-linguist-ruby-sidecar.yml new file mode 100644 index 000000000..bae12a105 --- /dev/null +++ b/changelogs/unreleased/zj-linguist-ruby-sidecar.yml @@ -0,0 +1,5 @@ +--- +title: Use caching for linguist results +merge_request: 751 +author: +type: performance diff --git a/internal/service/commit/testhelper_test.go b/internal/service/commit/testhelper_test.go index 0f0480347..f7f455143 100644 --- a/internal/service/commit/testhelper_test.go +++ b/internal/service/commit/testhelper_test.go @@ -17,8 +17,6 @@ import ( pb "gitlab.com/gitlab-org/gitaly-proto/go" ) -var () - func TestMain(m *testing.M) { os.Exit(testMain(m)) } diff --git a/ruby/lib/gitaly_server.rb b/ruby/lib/gitaly_server.rb index a03f0dab6..1866d183f 100644 --- a/ruby/lib/gitaly_server.rb +++ b/ruby/lib/gitaly_server.rb @@ -1,6 +1,8 @@ require 'gitaly' require_relative 'gitlab/git.rb' +require_relative 'gitlab/linguist/repository_languages.rb' +require_relative 'gitlab/linguist/cache.rb' require_relative 'gitaly_server/client.rb' require_relative 'gitaly_server/utils.rb' diff --git a/ruby/lib/gitaly_server/commit_service.rb b/ruby/lib/gitaly_server/commit_service.rb index 26c836649..dea435581 100644 --- a/ruby/lib/gitaly_server/commit_service.rb +++ b/ruby/lib/gitaly_server/commit_service.rb @@ -167,21 +167,17 @@ module GitalyServer commit = Gitlab::Git::Commit.find(repo, request.revision) raise GRPC::InvalidArgument, 'revision could not be resolved' unless commit - languages = Linguist::Repository.new(repo.rugged, commit.id) - .languages - .sort_by { |_k, v| v } - .reverse + languages = + Gitlab::Linguist::RepositoryLanguages.new(repo, commit) + .detect - total_bytes = languages.sum(&:last) - return Gitaly::CommitLanguagesResponse.new(languages: []) if total_bytes == 0 + total_bytes = languages.sum(&:last).to_f languages.map! do |name, bytes| - warn "#{bytes} of a total of #{total_bytes}" if name == 'Ruby' - Gitaly::CommitLanguagesResponse::Language.new( name: name.to_s, - share: ((bytes.to_f / total_bytes.to_f) * 100).round, - color: linguist_color(name) + color: ::Linguist::Language.find_by_name(name)&.color || "##{Digest::SHA256.hexdigest(name)[0..5]}", + share: ((bytes / total_bytes) * 100).round, ) end @@ -220,9 +216,5 @@ module GitalyServer yield nil, chunk end end - - def linguist_color(language) - Linguist::Language.find_by_name(language)&.color || "##{Digest::SHA256.hexdigest(language)[0..5]}" - end end end diff --git a/ruby/lib/gitlab/linguist/cache.rb b/ruby/lib/gitlab/linguist/cache.rb new file mode 100644 index 000000000..292e281f7 --- /dev/null +++ b/ruby/lib/gitlab/linguist/cache.rb @@ -0,0 +1,50 @@ +module Gitlab + module Linguist + class Cache + OLD_STATS_KEY = 'old_stats'.freeze + OLD_COMMIT_OID_KEY = 'old_commit_oid'.freeze + + def initialize(repo_path) + @path = repo_path + end + + def write(linguist, commit_oid) + return if old_commit_oid == commit_oid + + FileUtils.mkdir_p(linguist_cache_directory) unless Dir.exist?(linguist_cache_directory) + + new_cache = { OLD_STATS_KEY => linguist.cache, OLD_COMMIT_OID_KEY => commit_oid } + + File.write(cache_path, Marshal.dump(new_cache)) + end + + def old_stats + cache[OLD_STATS_KEY] + end + + def old_commit_oid + cache[OLD_COMMIT_OID_KEY] + end + + private + + def cache + @cache ||= if File.exist?(cache_path) + Marshal.load(File.binread(cache_path)) + else + {} + end + rescue ArgumentError + @cache = {} + end + + def cache_path + File.join(linguist_cache_directory, 'linguist-cache') + end + + def linguist_cache_directory + File.join(@path, 'gitaly') + end + end + end +end diff --git a/ruby/lib/gitlab/linguist/repository_languages.rb b/ruby/lib/gitlab/linguist/repository_languages.rb new file mode 100644 index 000000000..8ddbb32a2 --- /dev/null +++ b/ruby/lib/gitlab/linguist/repository_languages.rb @@ -0,0 +1,24 @@ +module Gitlab + module Linguist + class RepositoryLanguages + def initialize(repo, commit) + @repo = repo + @commit = commit + @cache = Gitlab::Linguist::Cache.new(repo.path) + end + + def detect + linguist = ::Linguist::Repository.incremental(@repo.rugged, @commit.id, @cache.old_commit_oid, @cache.old_stats) + + languages = linguist + .languages + .sort_by { |_k, v| v } + .reverse + + @cache.write(linguist, @commit.id) + + languages + end + end + end +end diff --git a/ruby/spec/lib/gitlab/linguist/cache_spec.rb b/ruby/spec/lib/gitlab/linguist/cache_spec.rb new file mode 100644 index 000000000..d5f0d997f --- /dev/null +++ b/ruby/spec/lib/gitlab/linguist/cache_spec.rb @@ -0,0 +1,37 @@ +require 'spec_helper' + +describe Gitlab::Linguist::Cache do + include TestRepo + + let(:repository) { gitlab_git_from_gitaly(new_mutable_test_repo) } + let(:old_stats) { [{ 'foo.rb' => 'Ruby'}, { 'bar.go' => 'Go' }] } + let(:linguist) { double('linguist', cache: old_stats) } + + subject { described_class.new(repository.path) } + + describe '#write' do + it 'writes the cache in the Gitaly cache directory' do + subject.write(linguist, '0' * 40) + + expect(File.exist?(File.join(repository.path, 'gitaly', 'linguist-cache'))).to be(true) + end + end + + describe 'old_stats' do + context 'when there is no cache yet' do + it 'returns nil' do + expect(subject.old_stats).to be_nil + end + + end + + context 'when the cache has been written' do + before do + subject.write(linguist, '0' * 40) + + expect(subject.old_stats).not_to be_nil + expect(subject.old_stats).to eq('0' * 40) + end + end + end +end |