diff options
Diffstat (limited to 'workhorse/internal/lsif_transformer/parser')
-rw-r--r-- | workhorse/internal/lsif_transformer/parser/code_hover.go | 28 | ||||
-rw-r--r-- | workhorse/internal/lsif_transformer/parser/code_hover_test.go | 48 |
2 files changed, 76 insertions, 0 deletions
diff --git a/workhorse/internal/lsif_transformer/parser/code_hover.go b/workhorse/internal/lsif_transformer/parser/code_hover.go index 25550cce29e..ab3ab291432 100644 --- a/workhorse/internal/lsif_transformer/parser/code_hover.go +++ b/workhorse/internal/lsif_transformer/parser/code_hover.go @@ -28,6 +28,16 @@ type truncatableString struct { Truncated bool } +// supportedLexerLanguages is used for a fast lookup to ensure the language +// is supported by the lexer library. +var supportedLexerLanguages = map[string]struct{}{} + +func init() { + for _, name := range lexers.Names(true) { + supportedLexerLanguages[name] = struct{}{} + } +} + func (ts *truncatableString) UnmarshalText(b []byte) error { s := 0 for i := 0; s < len(b); i++ { @@ -93,6 +103,24 @@ func newCodeHover(content json.RawMessage) (*codeHover, error) { } func (c *codeHover) setTokens() { + // fastpath: bail early if no language specified + if c.Language == "" { + return + } + + // fastpath: lexer.Get() will first match against indexed languages by + // name and alias, and then fallback to a very slow filepath match. We + // avoid this slow path by first checking against languages we know to + // be within the index, and bailing if not found. + // + // Not case-folding immediately is done intentionally. These two lookups + // mirror the behaviour of lexer.Get(). + if _, ok := supportedLexerLanguages[c.Language]; !ok { + if _, ok := supportedLexerLanguages[strings.ToLower(c.Language)]; !ok { + return + } + } + lexer := lexers.Get(c.Language) if lexer == nil { return diff --git a/workhorse/internal/lsif_transformer/parser/code_hover_test.go b/workhorse/internal/lsif_transformer/parser/code_hover_test.go index c09636b2f76..7dc9e126ae7 100644 --- a/workhorse/internal/lsif_transformer/parser/code_hover_test.go +++ b/workhorse/internal/lsif_transformer/parser/code_hover_test.go @@ -56,6 +56,14 @@ func TestHighlight(t *testing.T) { }, }, { + name: "ruby by file extension", + language: "rb", + value: `print hello`, + want: [][]token{ + {{Value: "print hello"}}, + }, + }, + { name: "unknown/malicious language is passed", language: "<lang> alert(1); </lang>", value: `def a;\nend`, @@ -116,3 +124,43 @@ func TestTruncatingMultiByteChars(t *testing.T) { symbolSize := 3 require.Equal(t, value[0:maxValueSize*symbolSize], c.TruncatedValue.Value) } + +func BenchmarkHighlight(b *testing.B) { + type entry struct { + Language string `json:"language"` + Value string `json:"value"` + } + + tests := []entry{ + { + Language: "go", + Value: "func main()", + }, + { + Language: "ruby", + Value: "def read(line)", + }, + { + Language: "", + Value: "<html><head>foobar</head></html>", + }, + { + Language: "zzz", + Value: "def read(line)", + }, + } + + for _, tc := range tests { + b.Run("lang:"+tc.Language, func(b *testing.B) { + raw, err := json.Marshal(tc) + require.NoError(b, err) + + b.ResetTimer() + + for n := 0; n < b.N; n++ { + _, err := newCodeHovers(raw) + require.NoError(b, err) + } + }) + } +} |