Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'workhorse/internal/lsif_transformer')
-rw-r--r--workhorse/internal/lsif_transformer/parser/code_hover.go28
-rw-r--r--workhorse/internal/lsif_transformer/parser/code_hover_test.go48
2 files changed, 76 insertions, 0 deletions
diff --git a/workhorse/internal/lsif_transformer/parser/code_hover.go b/workhorse/internal/lsif_transformer/parser/code_hover.go
index 25550cce29e..ab3ab291432 100644
--- a/workhorse/internal/lsif_transformer/parser/code_hover.go
+++ b/workhorse/internal/lsif_transformer/parser/code_hover.go
@@ -28,6 +28,16 @@ type truncatableString struct {
Truncated bool
}
+// supportedLexerLanguages is used for a fast lookup to ensure the language
+// is supported by the lexer library.
+var supportedLexerLanguages = map[string]struct{}{}
+
+func init() {
+ for _, name := range lexers.Names(true) {
+ supportedLexerLanguages[name] = struct{}{}
+ }
+}
+
func (ts *truncatableString) UnmarshalText(b []byte) error {
s := 0
for i := 0; s < len(b); i++ {
@@ -93,6 +103,24 @@ func newCodeHover(content json.RawMessage) (*codeHover, error) {
}
func (c *codeHover) setTokens() {
+ // fastpath: bail early if no language specified
+ if c.Language == "" {
+ return
+ }
+
+ // fastpath: lexer.Get() will first match against indexed languages by
+ // name and alias, and then fallback to a very slow filepath match. We
+ // avoid this slow path by first checking against languages we know to
+ // be within the index, and bailing if not found.
+ //
+ // Not case-folding immediately is done intentionally. These two lookups
+ // mirror the behaviour of lexer.Get().
+ if _, ok := supportedLexerLanguages[c.Language]; !ok {
+ if _, ok := supportedLexerLanguages[strings.ToLower(c.Language)]; !ok {
+ return
+ }
+ }
+
lexer := lexers.Get(c.Language)
if lexer == nil {
return
diff --git a/workhorse/internal/lsif_transformer/parser/code_hover_test.go b/workhorse/internal/lsif_transformer/parser/code_hover_test.go
index c09636b2f76..7dc9e126ae7 100644
--- a/workhorse/internal/lsif_transformer/parser/code_hover_test.go
+++ b/workhorse/internal/lsif_transformer/parser/code_hover_test.go
@@ -56,6 +56,14 @@ func TestHighlight(t *testing.T) {
},
},
{
+ name: "ruby by file extension",
+ language: "rb",
+ value: `print hello`,
+ want: [][]token{
+ {{Value: "print hello"}},
+ },
+ },
+ {
name: "unknown/malicious language is passed",
language: "<lang> alert(1); </lang>",
value: `def a;\nend`,
@@ -116,3 +124,43 @@ func TestTruncatingMultiByteChars(t *testing.T) {
symbolSize := 3
require.Equal(t, value[0:maxValueSize*symbolSize], c.TruncatedValue.Value)
}
+
+func BenchmarkHighlight(b *testing.B) {
+ type entry struct {
+ Language string `json:"language"`
+ Value string `json:"value"`
+ }
+
+ tests := []entry{
+ {
+ Language: "go",
+ Value: "func main()",
+ },
+ {
+ Language: "ruby",
+ Value: "def read(line)",
+ },
+ {
+ Language: "",
+ Value: "<html><head>foobar</head></html>",
+ },
+ {
+ Language: "zzz",
+ Value: "def read(line)",
+ },
+ }
+
+ for _, tc := range tests {
+ b.Run("lang:"+tc.Language, func(b *testing.B) {
+ raw, err := json.Marshal(tc)
+ require.NoError(b, err)
+
+ b.ResetTimer()
+
+ for n := 0; n < b.N; n++ {
+ _, err := newCodeHovers(raw)
+ require.NoError(b, err)
+ }
+ })
+ }
+}