Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZeger-Jan van de Weg <git@zjvandeweg.nl>2018-06-04 15:58:22 +0300
committerZeger-Jan van de Weg <git@zjvandeweg.nl>2018-07-16 18:02:12 +0300
commit8dde19c4b8e6c56cb221267581474638ce3a55fd (patch)
treedf44f5a9317a84a58f6f1dab3f1ab9e405bf7990
parentb4c53bc4704a07f85b9006d6f2b50ca80d377fd8 (diff)
Cache linguist results allowing incremental detection
Prior to this change, Gitaly didn't cache an results from linguist, meaning that large repositories would be scanned fully, each time a user requested the Charts page. This made if fairly impractical, and resource intensive. Futhermore, this might allow for other features that scan more often.
-rw-r--r--changelogs/unreleased/zj-linguist-ruby-sidecar.yml5
-rw-r--r--internal/service/commit/testhelper_test.go2
-rw-r--r--ruby/lib/gitaly_server.rb2
-rw-r--r--ruby/lib/gitaly_server/commit_service.rb20
-rw-r--r--ruby/lib/gitlab/linguist/cache.rb50
-rw-r--r--ruby/lib/gitlab/linguist/repository_languages.rb24
-rw-r--r--ruby/spec/lib/gitlab/linguist/cache_spec.rb37
7 files changed, 124 insertions, 16 deletions
diff --git a/changelogs/unreleased/zj-linguist-ruby-sidecar.yml b/changelogs/unreleased/zj-linguist-ruby-sidecar.yml
new file mode 100644
index 000000000..bae12a105
--- /dev/null
+++ b/changelogs/unreleased/zj-linguist-ruby-sidecar.yml
@@ -0,0 +1,5 @@
+---
+title: Use caching for linguist results
+merge_request: 751
+author:
+type: performance
diff --git a/internal/service/commit/testhelper_test.go b/internal/service/commit/testhelper_test.go
index 0f0480347..f7f455143 100644
--- a/internal/service/commit/testhelper_test.go
+++ b/internal/service/commit/testhelper_test.go
@@ -17,8 +17,6 @@ import (
pb "gitlab.com/gitlab-org/gitaly-proto/go"
)
-var ()
-
func TestMain(m *testing.M) {
os.Exit(testMain(m))
}
diff --git a/ruby/lib/gitaly_server.rb b/ruby/lib/gitaly_server.rb
index a03f0dab6..1866d183f 100644
--- a/ruby/lib/gitaly_server.rb
+++ b/ruby/lib/gitaly_server.rb
@@ -1,6 +1,8 @@
require 'gitaly'
require_relative 'gitlab/git.rb'
+require_relative 'gitlab/linguist/repository_languages.rb'
+require_relative 'gitlab/linguist/cache.rb'
require_relative 'gitaly_server/client.rb'
require_relative 'gitaly_server/utils.rb'
diff --git a/ruby/lib/gitaly_server/commit_service.rb b/ruby/lib/gitaly_server/commit_service.rb
index 26c836649..dea435581 100644
--- a/ruby/lib/gitaly_server/commit_service.rb
+++ b/ruby/lib/gitaly_server/commit_service.rb
@@ -167,21 +167,17 @@ module GitalyServer
commit = Gitlab::Git::Commit.find(repo, request.revision)
raise GRPC::InvalidArgument, 'revision could not be resolved' unless commit
- languages = Linguist::Repository.new(repo.rugged, commit.id)
- .languages
- .sort_by { |_k, v| v }
- .reverse
+ languages =
+ Gitlab::Linguist::RepositoryLanguages.new(repo, commit)
+ .detect
- total_bytes = languages.sum(&:last)
- return Gitaly::CommitLanguagesResponse.new(languages: []) if total_bytes == 0
+ total_bytes = languages.sum(&:last).to_f
languages.map! do |name, bytes|
- warn "#{bytes} of a total of #{total_bytes}" if name == 'Ruby'
-
Gitaly::CommitLanguagesResponse::Language.new(
name: name.to_s,
- share: ((bytes.to_f / total_bytes.to_f) * 100).round,
- color: linguist_color(name)
+ color: ::Linguist::Language.find_by_name(name)&.color || "##{Digest::SHA256.hexdigest(name)[0..5]}",
+ share: ((bytes / total_bytes) * 100).round,
)
end
@@ -220,9 +216,5 @@ module GitalyServer
yield nil, chunk
end
end
-
- def linguist_color(language)
- Linguist::Language.find_by_name(language)&.color || "##{Digest::SHA256.hexdigest(language)[0..5]}"
- end
end
end
diff --git a/ruby/lib/gitlab/linguist/cache.rb b/ruby/lib/gitlab/linguist/cache.rb
new file mode 100644
index 000000000..292e281f7
--- /dev/null
+++ b/ruby/lib/gitlab/linguist/cache.rb
@@ -0,0 +1,50 @@
+module Gitlab
+ module Linguist
+ class Cache
+ OLD_STATS_KEY = 'old_stats'.freeze
+ OLD_COMMIT_OID_KEY = 'old_commit_oid'.freeze
+
+ def initialize(repo_path)
+ @path = repo_path
+ end
+
+ def write(linguist, commit_oid)
+ return if old_commit_oid == commit_oid
+
+ FileUtils.mkdir_p(linguist_cache_directory) unless Dir.exist?(linguist_cache_directory)
+
+ new_cache = { OLD_STATS_KEY => linguist.cache, OLD_COMMIT_OID_KEY => commit_oid }
+
+ File.write(cache_path, Marshal.dump(new_cache))
+ end
+
+ def old_stats
+ cache[OLD_STATS_KEY]
+ end
+
+ def old_commit_oid
+ cache[OLD_COMMIT_OID_KEY]
+ end
+
+ private
+
+ def cache
+ @cache ||= if File.exist?(cache_path)
+ Marshal.load(File.binread(cache_path))
+ else
+ {}
+ end
+ rescue ArgumentError
+ @cache = {}
+ end
+
+ def cache_path
+ File.join(linguist_cache_directory, 'linguist-cache')
+ end
+
+ def linguist_cache_directory
+ File.join(@path, 'gitaly')
+ end
+ end
+ end
+end
diff --git a/ruby/lib/gitlab/linguist/repository_languages.rb b/ruby/lib/gitlab/linguist/repository_languages.rb
new file mode 100644
index 000000000..8ddbb32a2
--- /dev/null
+++ b/ruby/lib/gitlab/linguist/repository_languages.rb
@@ -0,0 +1,24 @@
+module Gitlab
+ module Linguist
+ class RepositoryLanguages
+ def initialize(repo, commit)
+ @repo = repo
+ @commit = commit
+ @cache = Gitlab::Linguist::Cache.new(repo.path)
+ end
+
+ def detect
+ linguist = ::Linguist::Repository.incremental(@repo.rugged, @commit.id, @cache.old_commit_oid, @cache.old_stats)
+
+ languages = linguist
+ .languages
+ .sort_by { |_k, v| v }
+ .reverse
+
+ @cache.write(linguist, @commit.id)
+
+ languages
+ end
+ end
+ end
+end
diff --git a/ruby/spec/lib/gitlab/linguist/cache_spec.rb b/ruby/spec/lib/gitlab/linguist/cache_spec.rb
new file mode 100644
index 000000000..d5f0d997f
--- /dev/null
+++ b/ruby/spec/lib/gitlab/linguist/cache_spec.rb
@@ -0,0 +1,37 @@
+require 'spec_helper'
+
+describe Gitlab::Linguist::Cache do
+ include TestRepo
+
+ let(:repository) { gitlab_git_from_gitaly(new_mutable_test_repo) }
+ let(:old_stats) { [{ 'foo.rb' => 'Ruby'}, { 'bar.go' => 'Go' }] }
+ let(:linguist) { double('linguist', cache: old_stats) }
+
+ subject { described_class.new(repository.path) }
+
+ describe '#write' do
+ it 'writes the cache in the Gitaly cache directory' do
+ subject.write(linguist, '0' * 40)
+
+ expect(File.exist?(File.join(repository.path, 'gitaly', 'linguist-cache'))).to be(true)
+ end
+ end
+
+ describe 'old_stats' do
+ context 'when there is no cache yet' do
+ it 'returns nil' do
+ expect(subject.old_stats).to be_nil
+ end
+
+ end
+
+ context 'when the cache has been written' do
+ before do
+ subject.write(linguist, '0' * 40)
+
+ expect(subject.old_stats).not_to be_nil
+ expect(subject.old_stats).to eq('0' * 40)
+ end
+ end
+ end
+end