lexer read ahead in file mode for multi-line patternslexer-fix

fixes #271
author: Thijs Schreijer <thijs@thijsschreijer.nl> 2018-12-23 16:39:26 +0300
committer: Thijs Schreijer <thijs@thijsschreijer.nl> 2018-12-23 16:39:26 +0300
commit: fffa8441c970a7cd3cba5507c4cc536df2fc35cf (patch)
tree: f495cf4a402a144bd76bf6fe007081f46632c342
parent: ee381b7c2a582162e85c1920d6b3562ea92a7d17 (diff)
3 files changed, 27 insertions, 9 deletions
diff --git a/docs_topics/06-data.md b/docs_topics/06-data.md
index 8c759d7..36a0257 100644
--- a/docs_topics/06-data.md
+++ b/docs_topics/06-data.md
@@ -664,8 +664,7 @@ A lexical scanner is useful where you have highly-structured data which is not
 nicely delimited by newlines. For example, here is a snippet of a in-house file
 format which it was my task to maintain:
 
-    points
-(818344.1,-20389.7,-0.1),(818337.9,-20389.3,-0.1),(818332.5,-20387.8,-0.1)
+    points (818344.1,-20389.7,-0.1),(818337.9,-20389.3,-0.1),(818332.5,-20387.8,-0.1)
         ,(818327.4,-20388,-0.1),(818322,-20387.7,-0.1),(818316.3,-20388.6,-0.1)
         ,(818309.7,-20389.4,-0.1),(818303.5,-20390.6,-0.1),(818295.8,-20388.3,-0.1)
         ,(818290.5,-20386.9,-0.1),(818285.2,-20386.1,-0.1),(818279.3,-20383.6,-0.1)
diff --git a/lua/pl/lexer.lua b/lua/pl/lexer.lua
index 68a0874..23024c6 100644
--- a/lua/pl/lexer.lua
+++ b/lua/pl/lexer.lua
@@ -100,7 +100,7 @@ local function wsdump (tok)
     return yield("space",tok)
 end
 
-local function pdump (tok)
+local function pdump(tok)
     return yield('prepro',tok)
 end
 
@@ -214,8 +214,23 @@ function lexer.scan(s,matches,filter,options)
             for _,m in ipairs(matches) do
                 local pat = m[1]
                 local fun = m[2]
+                local pat_full = m[3]
                 local findres = {strfind(s,pat,idx)}
                 local i1, i2 = findres[1], findres[2]
+                if i1 and pat_full then
+                    -- a multi-line pattern
+                    findres = {strfind(s,pat_full,idx)}
+                    i1, i2 = findres[1], findres[2]
+                    while not i1 do -- read lines until we have a full pattern
+                      if not next_line then break end
+                      line_nr = line_nr + 1
+                      s = s .. next_line .. '\n'
+                      next_line = file:read()
+                      sz = #s
+                      findres = {strfind(s,pat_full,idx)}
+                      i1, i2 = findres[1], findres[2]
+                    end
+                end
                 if i1 then
                     local tok = strsub(s,i1,i2)
                     idx = i2 + 1
@@ -325,9 +340,12 @@ function lexer.lua(s,filter,options)
             {STRING1,sdump},
             {STRING2,sdump},
             {STRING3,sdump},
-            {'^%-%-%[(=*)%[.-%]%1%]',cdump},
-            {'^%-%-.-\n',cdump},
-            {'^%[(=*)%[.-%]%1%]',sdump_l},
+            {'^%-%-%[(=*)%[',cdump,'^%-%-%[(=*)%[.-%]%1%]'},
+            --{'^%-%-%[(=*)%[.-%]%1%]',cdump},
+            {'^%-%-[^%[].-\n',cdump},
+            {'^%-%-\n',cdump},
+            {'^%[(=*)%[',sdump_l, '^%[(=*)%[.-%]%1%]'},
+            --{'^%[(=*)%[.-%]%1%]',sdump_l},
             {'^==',tdump},
             {'^~=',tdump},
             {'^<=',tdump},
@@ -379,7 +397,8 @@ function lexer.cpp(s,filter,options)
             {STRING2,sdump},
             {STRING3,sdump},
             {'^//.-\n',cdump},
-            {'^/%*.-%*/',cdump},
+            {'^/%*',cdump,'^/%*.-%*/'},
+            --{'^/%*.-%*/',cdump},
             {'^==',tdump},
             {'^!=',tdump},
             {'^<=',tdump},
diff --git a/tests/test-lexer.lua b/tests/test-lexer.lua
index 20a86a1..807e3f5 100644
--- a/tests/test-lexer.lua
+++ b/tests/test-lexer.lua
@@ -14,9 +14,9 @@ local function test_scan(str, filter, options, expected_tokens, lang)
     end
 
     asserteq(copy2(lexer[lang](str, matches, filter, options)), expected_tokens)
-    if lang == 'scan' then
+    --if lang == 'scan' then
         asserteq(copy2(lexer[lang](open(str), matches, filter, options)), expected_tokens)
-    end
+    --end
 end
 
 local s = '20 = hello'
author	Thijs Schreijer <thijs@thijsschreijer.nl>	2018-12-23 16:39:26 +0300
committer	Thijs Schreijer <thijs@thijsschreijer.nl>	2018-12-23 16:39:26 +0300
commit	fffa8441c970a7cd3cba5507c4cc536df2fc35cf (patch)
tree	f495cf4a402a144bd76bf6fe007081f46632c342
parent	ee381b7c2a582162e85c1920d6b3562ea92a7d17 (diff)