Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stevedonovan/Penlight.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThijs Schreijer <thijs@thijsschreijer.nl>2018-12-23 16:39:26 +0300
committerThijs Schreijer <thijs@thijsschreijer.nl>2018-12-23 16:39:26 +0300
commitfffa8441c970a7cd3cba5507c4cc536df2fc35cf (patch)
treef495cf4a402a144bd76bf6fe007081f46632c342
parentee381b7c2a582162e85c1920d6b3562ea92a7d17 (diff)
lexer read ahead in file mode for multi-line patternslexer-fix
fixes #271
-rw-r--r--docs_topics/06-data.md3
-rw-r--r--lua/pl/lexer.lua29
-rw-r--r--tests/test-lexer.lua4
3 files changed, 27 insertions, 9 deletions
diff --git a/docs_topics/06-data.md b/docs_topics/06-data.md
index 8c759d7..36a0257 100644
--- a/docs_topics/06-data.md
+++ b/docs_topics/06-data.md
@@ -664,8 +664,7 @@ A lexical scanner is useful where you have highly-structured data which is not
nicely delimited by newlines. For example, here is a snippet of a in-house file
format which it was my task to maintain:
- points
-(818344.1,-20389.7,-0.1),(818337.9,-20389.3,-0.1),(818332.5,-20387.8,-0.1)
+ points (818344.1,-20389.7,-0.1),(818337.9,-20389.3,-0.1),(818332.5,-20387.8,-0.1)
,(818327.4,-20388,-0.1),(818322,-20387.7,-0.1),(818316.3,-20388.6,-0.1)
,(818309.7,-20389.4,-0.1),(818303.5,-20390.6,-0.1),(818295.8,-20388.3,-0.1)
,(818290.5,-20386.9,-0.1),(818285.2,-20386.1,-0.1),(818279.3,-20383.6,-0.1)
diff --git a/lua/pl/lexer.lua b/lua/pl/lexer.lua
index 68a0874..23024c6 100644
--- a/lua/pl/lexer.lua
+++ b/lua/pl/lexer.lua
@@ -100,7 +100,7 @@ local function wsdump (tok)
return yield("space",tok)
end
-local function pdump (tok)
+local function pdump(tok)
return yield('prepro',tok)
end
@@ -214,8 +214,23 @@ function lexer.scan(s,matches,filter,options)
for _,m in ipairs(matches) do
local pat = m[1]
local fun = m[2]
+ local pat_full = m[3]
local findres = {strfind(s,pat,idx)}
local i1, i2 = findres[1], findres[2]
+ if i1 and pat_full then
+ -- a multi-line pattern
+ findres = {strfind(s,pat_full,idx)}
+ i1, i2 = findres[1], findres[2]
+ while not i1 do -- read lines until we have a full pattern
+ if not next_line then break end
+ line_nr = line_nr + 1
+ s = s .. next_line .. '\n'
+ next_line = file:read()
+ sz = #s
+ findres = {strfind(s,pat_full,idx)}
+ i1, i2 = findres[1], findres[2]
+ end
+ end
if i1 then
local tok = strsub(s,i1,i2)
idx = i2 + 1
@@ -325,9 +340,12 @@ function lexer.lua(s,filter,options)
{STRING1,sdump},
{STRING2,sdump},
{STRING3,sdump},
- {'^%-%-%[(=*)%[.-%]%1%]',cdump},
- {'^%-%-.-\n',cdump},
- {'^%[(=*)%[.-%]%1%]',sdump_l},
+ {'^%-%-%[(=*)%[',cdump,'^%-%-%[(=*)%[.-%]%1%]'},
+ --{'^%-%-%[(=*)%[.-%]%1%]',cdump},
+ {'^%-%-[^%[].-\n',cdump},
+ {'^%-%-\n',cdump},
+ {'^%[(=*)%[',sdump_l, '^%[(=*)%[.-%]%1%]'},
+ --{'^%[(=*)%[.-%]%1%]',sdump_l},
{'^==',tdump},
{'^~=',tdump},
{'^<=',tdump},
@@ -379,7 +397,8 @@ function lexer.cpp(s,filter,options)
{STRING2,sdump},
{STRING3,sdump},
{'^//.-\n',cdump},
- {'^/%*.-%*/',cdump},
+ {'^/%*',cdump,'^/%*.-%*/'},
+ --{'^/%*.-%*/',cdump},
{'^==',tdump},
{'^!=',tdump},
{'^<=',tdump},
diff --git a/tests/test-lexer.lua b/tests/test-lexer.lua
index 20a86a1..807e3f5 100644
--- a/tests/test-lexer.lua
+++ b/tests/test-lexer.lua
@@ -14,9 +14,9 @@ local function test_scan(str, filter, options, expected_tokens, lang)
end
asserteq(copy2(lexer[lang](str, matches, filter, options)), expected_tokens)
- if lang == 'scan' then
+ --if lang == 'scan' then
asserteq(copy2(lexer[lang](open(str), matches, filter, options)), expected_tokens)
- end
+ --end
end
local s = '20 = hello'