Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2022-05-20 09:05:52 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2022-05-23 10:23:12 +0300
commit41fe00cab752de8701a0bc4f86866a30b9157d45 (patch)
treef3b24549ba18b6334a63f302ddfc6e87d61d0826
parent2c70420ae66af132e431dbe9402c496edcea54da (diff)
linguist: Implement wrapper to ignore gitconfig in Rugged
We're using the `git-linguist` binary to derive programming-language statics for a repository. This binary is using Rugged to read a given commit and analyze all blobs referenced by the root tree so that it can return accumulated lines of counts for every language. Unfortunately, `git-linguist` reads in gitconfig files by default with no escape hatch, which sabotages our efforts to get gitconfig-clean in the Gitaly codebase. We are thus forced to implement our own wrapper script around the Linguist Gem that allows us to ignore the gitconfig in Rugged. Do so and implement a new `gitaly-linguist` binary that mostly mirrors what `git-linguist` is doing. This allows us to override the Rugged search path and point it to `/dev/null` so that it won't read any gitconfig files at all. Note that we do not use e.g. Gitaly's own gitconfig as computed by the Git command factory. Ultimately, the Linguist Gem does not read any Git configuration that would change its behaviour, so it would be overkill to do so. Changelog: changed
-rw-r--r--internal/gitaly/linguist/linguist.go12
-rw-r--r--internal/gitaly/linguist/linguist_test.go125
-rwxr-xr-xruby/bin/gitaly-linguist86
3 files changed, 209 insertions, 14 deletions
diff --git a/internal/gitaly/linguist/linguist.go b/internal/gitaly/linguist/linguist.go
index 7976ddfb5..39ac845c0 100644
--- a/internal/gitaly/linguist/linguist.go
+++ b/internal/gitaly/linguist/linguist.go
@@ -54,7 +54,7 @@ func New(cfg config.Cfg, gitCmdFactory git.CommandFactory) (*Instance, error) {
// Stats returns the repository's language stats as reported by 'git-linguist'.
func (inst *Instance) Stats(ctx context.Context, repoPath string, commitID string) (ByteCountPerLanguage, error) {
- cmd, err := inst.startGitLinguist(ctx, repoPath, commitID, "stats")
+ cmd, err := inst.startGitLinguist(ctx, repoPath, commitID)
if err != nil {
return nil, fmt.Errorf("starting linguist: %w", err)
}
@@ -86,7 +86,7 @@ func (inst *Instance) Color(language string) string {
return fmt.Sprintf("#%x", colorSha[0:3])
}
-func (inst *Instance) startGitLinguist(ctx context.Context, repoPath string, commitID string, linguistCommand string) (*command.Command, error) {
+func (inst *Instance) startGitLinguist(ctx context.Context, repoPath string, commitID string) (*command.Command, error) {
bundle, err := exec.LookPath("bundle")
if err != nil {
return nil, fmt.Errorf("finding bundle executable: %w", err)
@@ -95,11 +95,9 @@ func (inst *Instance) startGitLinguist(ctx context.Context, repoPath string, com
args := []string{
bundle,
"exec",
- "bin/ruby-cd",
- repoPath,
- "git-linguist",
+ "bin/gitaly-linguist",
+ "--repository=" + repoPath,
"--commit=" + commitID,
- linguistCommand,
}
gitExecEnv := inst.gitCmdFactory.GetExecutionEnvironment(ctx)
@@ -128,7 +126,7 @@ func (inst *Instance) startGitLinguist(ctx context.Context, repoPath string, com
}
internalCmd.SetMetricsCmd("git-linguist")
- internalCmd.SetMetricsSubCmd(linguistCommand)
+ internalCmd.SetMetricsSubCmd("stats")
return internalCmd, nil
}
diff --git a/internal/gitaly/linguist/linguist_test.go b/internal/gitaly/linguist/linguist_test.go
index edc7a801c..24cacf81e 100644
--- a/internal/gitaly/linguist/linguist_test.go
+++ b/internal/gitaly/linguist/linguist_test.go
@@ -2,10 +2,12 @@ package linguist
import (
"encoding/json"
+ "os"
"path/filepath"
"testing"
"github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
@@ -16,17 +18,126 @@ func TestMain(m *testing.M) {
testhelper.Run(m)
}
-func TestInstance_Stats_successful(t *testing.T) {
+func TestInstance_Stats(t *testing.T) {
ctx := testhelper.Context(t)
+ cfg := testcfg.Build(t)
- cfg, _, repoPath := testcfg.BuildWithRepo(t)
-
- ling, err := New(cfg, gittest.NewCommandFactory(t, cfg))
+ linguist, err := New(cfg, gittest.NewCommandFactory(t, cfg))
require.NoError(t, err)
- counts, err := ling.Stats(ctx, repoPath, "1e292f8fedd741b75372e19097c76d327140c312")
- require.NoError(t, err)
- require.Equal(t, uint64(2943), counts["Ruby"])
+ commitID := git.ObjectID("1e292f8fedd741b75372e19097c76d327140c312")
+
+ for _, tc := range []struct {
+ desc string
+ setup func(t *testing.T) (string, git.ObjectID)
+ expectedStats ByteCountPerLanguage
+ expectedErr string
+ }{
+ {
+ desc: "successful",
+ setup: func(t *testing.T) (string, git.ObjectID) {
+ _, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+ return repoPath, commitID
+ },
+ expectedStats: map[string]uint64{
+ "CoffeeScript": 107,
+ "HTML": 349,
+ "JavaScript": 1014,
+ "Ruby": 2943,
+ },
+ },
+ {
+ desc: "preexisting cache",
+ setup: func(t *testing.T) (string, git.ObjectID) {
+ _, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+
+ // We simply run the linguist once before so that it can already
+ // write the cache.
+ _, err := linguist.Stats(ctx, repoPath, commitID.String())
+ require.NoError(t, err)
+ require.FileExists(t, filepath.Join(repoPath, "language-stats.cache"))
+
+ return repoPath, commitID
+ },
+ expectedStats: map[string]uint64{
+ "CoffeeScript": 107,
+ "HTML": 349,
+ "JavaScript": 1014,
+ "Ruby": 2943,
+ },
+ },
+ {
+ desc: "corrupted cache",
+ setup: func(t *testing.T) (string, git.ObjectID) {
+ _, repoPath := gittest.CloneRepo(t, cfg, cfg.Storages[0])
+
+ require.NoError(t, os.WriteFile(filepath.Join(repoPath, "language-stats.cache"), []byte("garbage"), 0o644))
+
+ return repoPath, commitID
+ },
+ expectedStats: map[string]uint64{
+ "CoffeeScript": 107,
+ "HTML": 349,
+ "JavaScript": 1014,
+ "Ruby": 2943,
+ },
+ },
+ {
+ desc: "old cache",
+ setup: func(t *testing.T) (string, git.ObjectID) {
+ _, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+
+ oldCommitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(), gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "main.rb", Content: "require 'fileutils'", Mode: "100644"},
+ ))
+ newCommitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(oldCommitID), gittest.WithTreeEntries(
+ gittest.TreeEntry{Path: "main.go", Content: "package main", Mode: "100644"},
+ ))
+
+ // Precreate the cache with the old commit. This ensures that
+ // linguist knows to update the cache.
+ stats, err := linguist.Stats(ctx, repoPath, oldCommitID.String())
+ require.NoError(t, err)
+ require.FileExists(t, filepath.Join(repoPath, "language-stats.cache"))
+ require.Equal(t, ByteCountPerLanguage{
+ "Ruby": 19,
+ }, stats)
+
+ return repoPath, newCommitID
+ },
+ expectedStats: map[string]uint64{
+ "Go": 12,
+ },
+ },
+ {
+ desc: "missing repository",
+ setup: func(t *testing.T) (string, git.ObjectID) {
+ return filepath.Join(testhelper.TempDir(t), "nonexistent"), commitID
+ },
+ expectedErr: "waiting for linguist: exit status 1",
+ },
+ {
+ desc: "missing commit",
+ setup: func(t *testing.T) (string, git.ObjectID) {
+ _, repoPath := gittest.InitRepo(t, cfg, cfg.Storages[0])
+ return repoPath, commitID
+ },
+ expectedErr: "waiting for linguist: exit status 1",
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ repoPath, objectID := tc.setup(t)
+
+ stats, err := linguist.Stats(ctx, repoPath, objectID.String())
+ if tc.expectedErr == "" {
+ require.NoError(t, err)
+ require.Equal(t, tc.expectedStats, stats)
+ require.FileExists(t, filepath.Join(repoPath, "language-stats.cache"))
+ } else {
+ require.EqualError(t, err, tc.expectedErr)
+ }
+ })
+ }
}
func TestInstance_Stats_unmarshalJSONError(t *testing.T) {
diff --git a/ruby/bin/gitaly-linguist b/ruby/bin/gitaly-linguist
new file mode 100755
index 000000000..f06e29b35
--- /dev/null
+++ b/ruby/bin/gitaly-linguist
@@ -0,0 +1,86 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require 'json'
+require 'linguist'
+require 'optparse'
+require 'rugged'
+require 'tempfile'
+require 'zlib'
+
+LANGUAGE_STATS_CACHE = 'language-stats.cache'
+LANGUAGE_STATS_CACHE_VERSION = "v3:#{Linguist::VERSION}"
+
+def gitaly_linguist(args)
+ repository_path = nil
+ commit = nil
+
+ parser = OptionParser.new do |opts|
+ opts.on("-rREPOSITORY", "--repository=REPOSITORY", "Repository to scan") { |r| repository_path = r }
+ opts.on("-cCOMMIT", "--commit=COMMIT", "Commit to scan") { |c| commit = c }
+ opts.on("-h", "--help", "Prints this help") do
+ puts opts
+ exit
+ end
+ end
+
+ parser.parse!(args)
+
+ raise OptionParser::MissingArgument, 'repository' if repository_path.nil?
+ raise OptionParser::MissingArgument, 'commit' if commit.nil?
+
+ Rugged::Settings['search_path_system'] = '/dev/null'
+ Rugged::Settings['search_path_global'] = '/dev/null'
+ Rugged::Settings['search_path_xdg'] = '/dev/null'
+
+ repository = Rugged::Repository.bare(repository_path)
+ project = Linguist::Repository.new(repository, commit)
+
+ if (cache = load_cache(repository_path))
+ old_commit_oid, old_stats = cache
+
+ project.load_existing_stats(old_commit_oid, old_stats)
+ end
+
+ puts JSON.dump(project.languages)
+
+ write_cache(repository_path, commit, project.cache)
+end
+
+def cache_file(repo_path)
+ File.join(repo_path, LANGUAGE_STATS_CACHE)
+end
+
+def load_cache(repo_path)
+ cached_data = File.open(cache_file(repo_path), "rb") do |f|
+ Zlib::Inflate.inflate(f.read)
+ end
+
+ # rubocop:disable Security/MarshalLoad
+ #
+ # While this is ugly, it's the same as we previously did in git-linguist. So
+ # for backwards-compatibility reasons we can't change this.
+ version, commit, stats = Marshal.load(cached_data)
+ # rubocop:enable Security/MarshalLoad
+
+ if version == LANGUAGE_STATS_CACHE_VERSION && commit && stats
+ [commit, stats]
+ end
+rescue SystemCallError, ::Zlib::DataError, ::Zlib::BufError, TypeError
+ nil
+end
+
+def write_cache(repo_path, commit, stats)
+ cache = [LANGUAGE_STATS_CACHE_VERSION, commit, stats]
+
+ Tempfile.open('cache_file', repo_path) do |f|
+ marshal = Marshal.dump(cache)
+ f.write(Zlib::Deflate.deflate(marshal))
+ f.close
+ File.rename(f.path, cache_file(repo_path))
+ end
+
+ FileUtils.chmod 0o644, cache_file(repo_path)
+end
+
+gitaly_linguist(ARGV)