diff options
author | Toon Claes <toon@gitlab.com> | 2022-09-26 17:34:35 +0300 |
---|---|---|
committer | Toon Claes <toon@gitlab.com> | 2022-10-03 16:26:16 +0300 |
commit | 6f08751ecdc3d76f559554dea894a1c5ee6c55d1 (patch) | |
tree | 71afe03a5c530b868116306560b8db920a3ecc04 | |
parent | 85200970d13d6b5407eabbfdcd8a24433089a780 (diff) |
linguist: Move individual file handling into separate file
As an extra step to prep for more features, this change moves the
determining the language of a single file into a separate file and
struct.
-rw-r--r-- | internal/gitaly/linguist/file_instance.go | 42 | ||||
-rw-r--r-- | internal/gitaly/linguist/linguist.go | 25 |
2 files changed, 43 insertions, 24 deletions
diff --git a/internal/gitaly/linguist/file_instance.go b/internal/gitaly/linguist/file_instance.go new file mode 100644 index 000000000..5c01da23e --- /dev/null +++ b/internal/gitaly/linguist/file_instance.go @@ -0,0 +1,42 @@ +package linguist + +import ( + "fmt" + "io" + + "github.com/go-enry/go-enry/v2" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/gitpipe" +) + +type fileInstance struct { + filename string +} + +func newFileInstance(filename string) fileInstance { + return fileInstance{ + filename: filename, + } +} + +// DetermineStats determines the size and language of the given file. The +// language will be an empty string when the stats should be omitted from the +// count. +func (f fileInstance) DetermineStats(object gitpipe.CatfileObjectResult) (string, uint64, error) { + // Read arbitrary number of bytes considered enough to determine language + content, err := io.ReadAll(io.LimitReader(object, 2048)) + if err != nil { + return "", 0, fmt.Errorf("determine stats read blob: %w", err) + } + + lang := enry.GetLanguage(f.filename, content) + + // Ignore anything that's neither markup nor a programming language, + // similar to what the linguist gem does: + // https://github.com/github/linguist/blob/v7.20.0/lib/linguist/blob_helper.rb#L378-L387 + if enry.GetLanguageType(lang) != enry.Programming && + enry.GetLanguageType(lang) != enry.Markup { + return "", 0, nil + } + + return lang, uint64(object.Object.ObjectSize()), nil +} diff --git a/internal/gitaly/linguist/linguist.go b/internal/gitaly/linguist/linguist.go index 0eef5e4f4..c17cad698 100644 --- a/internal/gitaly/linguist/linguist.go +++ b/internal/gitaly/linguist/linguist.go @@ -162,7 +162,7 @@ func (inst *Instance) enryStats(ctx context.Context, commitID string) (ByteCount object := objectIt.Result() filename := string(object.ObjectName) - lang, size, err := inst.determineStats(filename, object) + lang, size, err := newFileInstance(filename).DetermineStats(object) if err != nil { return nil, fmt.Errorf("linguist determine stats: %w", err) } @@ -191,26 +191,3 @@ func (inst *Instance) enryStats(ctx context.Context, commitID string) (ByteCount return stats.Totals, nil } - -// determineStats determines the size and language of the given file. The -// language will be an empty string when the stats should be omitted from the -// count. -func (inst *Instance) determineStats(filename string, object gitpipe.CatfileObjectResult) (string, uint64, error) { - // Read arbitrary number of bytes considered enough to determine language - content, err := io.ReadAll(io.LimitReader(object, 2048)) - if err != nil { - return "", 0, fmt.Errorf("determine stats read blob: %w", err) - } - - lang := enry.GetLanguage(filename, content) - - // Ignore anything that's neither markup nor a programming language, - // similar to what the linguist gem does: - // https://github.com/github/linguist/blob/v7.20.0/lib/linguist/blob_helper.rb#L378-L387 - if enry.GetLanguageType(lang) != enry.Programming && - enry.GetLanguageType(lang) != enry.Markup { - return "", 0, nil - } - - return lang, uint64(object.Object.ObjectSize()), nil -} |