Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/windirstat/premake-4.x-stable.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOliver Schneider <oliver@assarbad.net>2014-01-10 05:58:31 +0400
committerOliver Schneider <oliver@assarbad.net>2014-01-10 05:58:31 +0400
commit15523cfc399dcb4dd87cdf78bb554094de04a58c (patch)
treeb4f6e01b4a1406bce06edb6fc7d91933a5fffb17 /scripts
parent9cd341021453186b1ae26f1ec10559a4e74a8503 (diff)
Adding vanilla LuaSrcDiet 0.12.1
--HG-- branch : WDS-build
Diffstat (limited to 'scripts')
-rw-r--r--scripts/luasrcdiet/COPYRIGHT45
-rw-r--r--scripts/luasrcdiet/LuaSrcDiet.lua4598
-rw-r--r--scripts/luasrcdiet/README.LuaSrcDiet140
3 files changed, 4783 insertions, 0 deletions
diff --git a/scripts/luasrcdiet/COPYRIGHT b/scripts/luasrcdiet/COPYRIGHT
new file mode 100644
index 0000000..4afe0e3
--- /dev/null
+++ b/scripts/luasrcdiet/COPYRIGHT
@@ -0,0 +1,45 @@
+LuaSrcDiet License
+------------------
+
+LuaSrcDiet is licensed under the terms of the MIT license reproduced
+below. This means that LuaSrcDiet is free software and can be used for
+both academic and commercial purposes at absolutely no cost.
+
+Think of LuaSrcDiet as a compiler or a text filter; whatever that is
+processed by LuaSrcDiet is not affected by its license. It does not add
+anything new into your source code; it only transforms code that already
+exist.
+
+Hence, there is no need to tag this license onto Lua programs that are
+only processed. Given the liberal terms of this kind of license, the
+primary purpose is just to claim authorship of LuaSrcDiet.
+
+Parts of LuaSrcDiet is based on Lua 5 code. See the file COPYRIGHT_Lua51
+(Lua 5.1.4) for Lua 5's license.
+
+===============================================================================
+
+Copyright (C) 2005-2008,2011 Kein-Hong Man <keinhong@gmail.com>
+Lua 5.1.4 Copyright (C) 1994-2008 Lua.org, PUC-Rio.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+===============================================================================
+
+(end of COPYRIGHT)
diff --git a/scripts/luasrcdiet/LuaSrcDiet.lua b/scripts/luasrcdiet/LuaSrcDiet.lua
new file mode 100644
index 0000000..c87cee2
--- /dev/null
+++ b/scripts/luasrcdiet/LuaSrcDiet.lua
@@ -0,0 +1,4598 @@
+#!/usr/bin/env lua
+--[[--------------------------------------------------------------------
+
+ LuaSrcDiet
+ Compresses Lua source code by removing unnecessary characters.
+ For Lua 5.1.x source code.
+
+ Copyright (c) 2008,2011,2012 Kein-Hong Man <keinhong@gmail.com>
+ The COPYRIGHT file describes the conditions
+ under which this software may be distributed.
+
+----------------------------------------------------------------------]]
+
+--[[--------------------------------------------------------------------
+-- NOTES:
+-- * Remember to update version and date information below (MSG_TITLE)
+-- * TODO: passing data tables around is a horrific mess
+-- * TODO: to implement pcall() to properly handle lexer etc. errors
+-- * TODO: need some automatic testing for a semblance of sanity
+-- * TODO: the plugin module is highly experimental and unstable
+----------------------------------------------------------------------]]
+
+-- standard libraries, functions
+local string = string
+local math = math
+local table = table
+local require = require
+local print = print
+local sub = string.sub
+local gmatch = string.gmatch
+local match = string.match
+
+-- modules incorporated as preload functions follows
+local preload = package.preload
+local base = _G
+
+local plugin_info = {
+ html = "html generates a HTML file for checking globals",
+ sloc = "sloc calculates SLOC for given source file",
+}
+
+local p_embedded = {
+ 'html',
+ 'sloc',
+}
+
+-- preload function for module llex
+preload.llex =
+function()
+--start of inserted module
+module "llex"
+
+local string = base.require "string"
+local find = string.find
+local match = string.match
+local sub = string.sub
+
+----------------------------------------------------------------------
+-- initialize keyword list, variables
+----------------------------------------------------------------------
+
+local kw = {}
+for v in string.gmatch([[
+and break do else elseif end false for function if in
+local nil not or repeat return then true until while]], "%S+") do
+ kw[v] = true
+end
+
+-- see init() for module variables (externally visible):
+-- tok, seminfo, tokln
+
+local z, -- source stream
+ sourceid, -- name of source
+ I, -- position of lexer
+ buff, -- buffer for strings
+ ln -- line number
+
+----------------------------------------------------------------------
+-- add information to token listing
+----------------------------------------------------------------------
+
+local function addtoken(token, info)
+ local i = #tok + 1
+ tok[i] = token
+ seminfo[i] = info
+ tokln[i] = ln
+end
+
+----------------------------------------------------------------------
+-- handles line number incrementation and end-of-line characters
+----------------------------------------------------------------------
+
+local function inclinenumber(i, is_tok)
+ local sub = sub
+ local old = sub(z, i, i)
+ i = i + 1 -- skip '\n' or '\r'
+ local c = sub(z, i, i)
+ if (c == "\n" or c == "\r") and (c ~= old) then
+ i = i + 1 -- skip '\n\r' or '\r\n'
+ old = old..c
+ end
+ if is_tok then addtoken("TK_EOL", old) end
+ ln = ln + 1
+ I = i
+ return i
+end
+
+----------------------------------------------------------------------
+-- initialize lexer for given source _z and source name _sourceid
+----------------------------------------------------------------------
+
+function init(_z, _sourceid)
+ z = _z -- source
+ sourceid = _sourceid -- name of source
+ I = 1 -- lexer's position in source
+ ln = 1 -- line number
+ tok = {} -- lexed token list*
+ seminfo = {} -- lexed semantic information list*
+ tokln = {} -- line numbers for messages*
+ -- (*) externally visible thru' module
+ --------------------------------------------------------------------
+ -- initial processing (shbang handling)
+ --------------------------------------------------------------------
+ local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)")
+ if p then -- skip first line
+ I = I + #q
+ addtoken("TK_COMMENT", q)
+ if #r > 0 then inclinenumber(I, true) end
+ end
+end
+
+----------------------------------------------------------------------
+-- returns a chunk name or id, no truncation for long names
+----------------------------------------------------------------------
+
+function chunkid()
+ if sourceid and match(sourceid, "^[=@]") then
+ return sub(sourceid, 2) -- remove first char
+ end
+ return "[string]"
+end
+
+----------------------------------------------------------------------
+-- formats error message and throws error
+-- * a simplified version, does not report what token was responsible
+----------------------------------------------------------------------
+
+function errorline(s, line)
+ local e = error or base.error
+ e(string.format("%s:%d: %s", chunkid(), line or ln, s))
+end
+local errorline = errorline
+
+------------------------------------------------------------------------
+-- count separators ("=") in a long string delimiter
+------------------------------------------------------------------------
+
+local function skip_sep(i)
+ local sub = sub
+ local s = sub(z, i, i)
+ i = i + 1
+ local count = #match(z, "=*", i)
+ i = i + count
+ I = i
+ return (sub(z, i, i) == s) and count or (-count) - 1
+end
+
+----------------------------------------------------------------------
+-- reads a long string or long comment
+----------------------------------------------------------------------
+
+local function read_long_string(is_str, sep)
+ local i = I + 1 -- skip 2nd '['
+ local sub = sub
+ local c = sub(z, i, i)
+ if c == "\r" or c == "\n" then -- string starts with a newline?
+ i = inclinenumber(i) -- skip it
+ end
+ while true do
+ local p, q, r = find(z, "([\r\n%]])", i) -- (long range match)
+ if not p then
+ errorline(is_str and "unfinished long string" or
+ "unfinished long comment")
+ end
+ i = p
+ if r == "]" then -- delimiter test
+ if skip_sep(i) == sep then
+ buff = sub(z, buff, I)
+ I = I + 1 -- skip 2nd ']'
+ return buff
+ end
+ i = I
+ else -- newline
+ buff = buff.."\n"
+ i = inclinenumber(i)
+ end
+ end--while
+end
+
+----------------------------------------------------------------------
+-- reads a string
+----------------------------------------------------------------------
+
+local function read_string(del)
+ local i = I
+ local find = find
+ local sub = sub
+ while true do
+ local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range match)
+ if p then
+ if r == "\n" or r == "\r" then
+ errorline("unfinished string")
+ end
+ i = p
+ if r == "\\" then -- handle escapes
+ i = i + 1
+ r = sub(z, i, i)
+ if r == "" then break end -- (EOZ error)
+ p = find("abfnrtv\n\r", r, 1, true)
+ ------------------------------------------------------
+ if p then -- special escapes
+ if p > 7 then
+ i = inclinenumber(i)
+ else
+ i = i + 1
+ end
+ ------------------------------------------------------
+ elseif find(r, "%D") then -- other non-digits
+ i = i + 1
+ ------------------------------------------------------
+ else -- \xxx sequence
+ local p, q, s = find(z, "^(%d%d?%d?)", i)
+ i = q + 1
+ if s + 1 > 256 then -- UCHAR_MAX
+ errorline("escape sequence too large")
+ end
+ ------------------------------------------------------
+ end--if p
+ else
+ i = i + 1
+ if r == del then -- ending delimiter
+ I = i
+ return sub(z, buff, i - 1) -- return string
+ end
+ end--if r
+ else
+ break -- (error)
+ end--if p
+ end--while
+ errorline("unfinished string")
+end
+
+------------------------------------------------------------------------
+-- main lexer function
+------------------------------------------------------------------------
+
+function llex()
+ local find = find
+ local match = match
+ while true do--outer
+ local i = I
+ -- inner loop allows break to be used to nicely section tests
+ while true do--inner
+ ----------------------------------------------------------------
+ local p, _, r = find(z, "^([_%a][_%w]*)", i)
+ if p then
+ I = i + #r
+ if kw[r] then
+ addtoken("TK_KEYWORD", r) -- reserved word (keyword)
+ else
+ addtoken("TK_NAME", r) -- identifier
+ end
+ break -- (continue)
+ end
+ ----------------------------------------------------------------
+ local p, _, r = find(z, "^(%.?)%d", i)
+ if p then -- numeral
+ if r == "." then i = i + 1 end
+ local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i)
+ i = q + 1
+ if #r == 1 then -- optional exponent
+ if match(z, "^[%+%-]", i) then -- optional sign
+ i = i + 1
+ end
+ end
+ local _, q = find(z, "^[_%w]*", i)
+ I = q + 1
+ local v = sub(z, p, q) -- string equivalent
+ if not base.tonumber(v) then -- handles hex test also
+ errorline("malformed number")
+ end
+ addtoken("TK_NUMBER", v)
+ break -- (continue)
+ end
+ ----------------------------------------------------------------
+ local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i)
+ if p then
+ if t == "\n" or t == "\r" then -- newline
+ inclinenumber(i, true)
+ else
+ I = q + 1 -- whitespace
+ addtoken("TK_SPACE", r)
+ end
+ break -- (continue)
+ end
+ ----------------------------------------------------------------
+ local r = match(z, "^%p", i)
+ if r then
+ buff = i
+ local p = find("-[\"\'.=<>~", r, 1, true)
+ if p then
+ -- two-level if block for punctuation/symbols
+ --------------------------------------------------------
+ if p <= 2 then
+ if p == 1 then -- minus
+ local c = match(z, "^%-%-(%[?)", i)
+ if c then
+ i = i + 2
+ local sep = -1
+ if c == "[" then
+ sep = skip_sep(i)
+ end
+ if sep >= 0 then -- long comment
+ addtoken("TK_LCOMMENT", read_long_string(false, sep))
+ else -- short comment
+ I = find(z, "[\n\r]", i) or (#z + 1)
+ addtoken("TK_COMMENT", sub(z, buff, I - 1))
+ end
+ break -- (continue)
+ end
+ -- (fall through for "-")
+ else -- [ or long string
+ local sep = skip_sep(i)
+ if sep >= 0 then
+ addtoken("TK_LSTRING", read_long_string(true, sep))
+ elseif sep == -1 then
+ addtoken("TK_OP", "[")
+ else
+ errorline("invalid long string delimiter")
+ end
+ break -- (continue)
+ end
+ --------------------------------------------------------
+ elseif p <= 5 then
+ if p < 5 then -- strings
+ I = i + 1
+ addtoken("TK_STRING", read_string(r))
+ break -- (continue)
+ end
+ r = match(z, "^%.%.?%.?", i) -- .|..|... dots
+ -- (fall through)
+ --------------------------------------------------------
+ else -- relational
+ r = match(z, "^%p=?", i)
+ -- (fall through)
+ end
+ end
+ I = i + #r
+ addtoken("TK_OP", r) -- for other symbols, fall through
+ break -- (continue)
+ end
+ ----------------------------------------------------------------
+ local r = sub(z, i, i)
+ if r ~= "" then
+ I = i + 1
+ addtoken("TK_OP", r) -- other single-char tokens
+ break
+ end
+ addtoken("TK_EOS", "") -- end of stream,
+ return -- exit here
+ ----------------------------------------------------------------
+ end--while inner
+ end--while outer
+end
+--end of inserted module
+end
+
+-- preload function for module lparser
+preload.lparser =
+function()
+--start of inserted module
+module "lparser"
+
+local string = base.require "string"
+
+--[[--------------------------------------------------------------------
+-- variable and data structure initialization
+----------------------------------------------------------------------]]
+
+----------------------------------------------------------------------
+-- initialization: main variables
+----------------------------------------------------------------------
+
+local toklist, -- grammar-only token tables (token table,
+ seminfolist, -- semantic information table, line number
+ toklnlist, -- table, cross-reference table)
+ xreflist,
+ tpos, -- token position
+
+ line, -- start line # for error messages
+ lastln, -- last line # for ambiguous syntax chk
+ tok, seminfo, ln, xref, -- token, semantic info, line
+ nameref, -- proper position of <name> token
+ fs, -- current function state
+ top_fs, -- top-level function state
+
+ globalinfo, -- global variable information table
+ globallookup, -- global variable name lookup table
+ localinfo, -- local variable information table
+ ilocalinfo, -- inactive locals (prior to activation)
+ ilocalrefs, -- corresponding references to activate
+ statinfo -- statements labeled by type
+
+-- forward references for local functions
+local explist1, expr, block, exp1, body, chunk
+
+----------------------------------------------------------------------
+-- initialization: data structures
+----------------------------------------------------------------------
+
+local gmatch = string.gmatch
+
+local block_follow = {} -- lookahead check in chunk(), returnstat()
+for v in gmatch("else elseif end until <eof>", "%S+") do
+ block_follow[v] = true
+end
+
+local binopr_left = {} -- binary operators, left priority
+local binopr_right = {} -- binary operators, right priority
+for op, lt, rt in gmatch([[
+{+ 6 6}{- 6 6}{* 7 7}{/ 7 7}{% 7 7}
+{^ 10 9}{.. 5 4}
+{~= 3 3}{== 3 3}
+{< 3 3}{<= 3 3}{> 3 3}{>= 3 3}
+{and 2 2}{or 1 1}
+]], "{(%S+)%s(%d+)%s(%d+)}") do
+ binopr_left[op] = lt + 0
+ binopr_right[op] = rt + 0
+end
+
+local unopr = { ["not"] = true, ["-"] = true,
+ ["#"] = true, } -- unary operators
+local UNARY_PRIORITY = 8 -- priority for unary operators
+
+--[[--------------------------------------------------------------------
+-- support functions
+----------------------------------------------------------------------]]
+
+----------------------------------------------------------------------
+-- formats error message and throws error (duplicated from llex)
+-- * a simplified version, does not report what token was responsible
+----------------------------------------------------------------------
+
+local function errorline(s, line)
+ local e = error or base.error
+ e(string.format("(source):%d: %s", line or ln, s))
+end
+
+----------------------------------------------------------------------
+-- handles incoming token, semantic information pairs
+-- * NOTE: 'nextt' is named 'next' originally
+----------------------------------------------------------------------
+
+-- reads in next token
+local function nextt()
+ lastln = toklnlist[tpos]
+ tok, seminfo, ln, xref
+ = toklist[tpos], seminfolist[tpos], toklnlist[tpos], xreflist[tpos]
+ tpos = tpos + 1
+end
+
+-- peek at next token (single lookahead for table constructor)
+local function lookahead()
+ return toklist[tpos]
+end
+
+----------------------------------------------------------------------
+-- throws a syntax error, or if token expected is not there
+----------------------------------------------------------------------
+
+local function syntaxerror(msg)
+ local tok = tok
+ if tok ~= "<number>" and tok ~= "<string>" then
+ if tok == "<name>" then tok = seminfo end
+ tok = "'"..tok.."'"
+ end
+ errorline(msg.." near "..tok)
+end
+
+local function error_expected(token)
+ syntaxerror("'"..token.."' expected")
+end
+
+----------------------------------------------------------------------
+-- tests for a token, returns outcome
+-- * return value changed to boolean
+----------------------------------------------------------------------
+
+local function testnext(c)
+ if tok == c then nextt(); return true end
+end
+
+----------------------------------------------------------------------
+-- check for existence of a token, throws error if not found
+----------------------------------------------------------------------
+
+local function check(c)
+ if tok ~= c then error_expected(c) end
+end
+
+----------------------------------------------------------------------
+-- verify existence of a token, then skip it
+----------------------------------------------------------------------
+
+local function checknext(c)
+ check(c); nextt()
+end
+
+----------------------------------------------------------------------
+-- throws error if condition not matched
+----------------------------------------------------------------------
+
+local function check_condition(c, msg)
+ if not c then syntaxerror(msg) end
+end
+
+----------------------------------------------------------------------
+-- verifies token conditions are met or else throw error
+----------------------------------------------------------------------
+
+local function check_match(what, who, where)
+ if not testnext(what) then
+ if where == ln then
+ error_expected(what)
+ else
+ syntaxerror("'"..what.."' expected (to close '"..who.."' at line "..where..")")
+ end
+ end
+end
+
+----------------------------------------------------------------------
+-- expect that token is a name, return the name
+----------------------------------------------------------------------
+
+local function str_checkname()
+ check("<name>")
+ local ts = seminfo
+ nameref = xref
+ nextt()
+ return ts
+end
+
+----------------------------------------------------------------------
+-- adds given string s in string pool, sets e as VK
+----------------------------------------------------------------------
+
+local function codestring(e, s)
+ e.k = "VK"
+end
+
+----------------------------------------------------------------------
+-- consume a name token, adds it to string pool
+----------------------------------------------------------------------
+
+local function checkname(e)
+ codestring(e, str_checkname())
+end
+
+--[[--------------------------------------------------------------------
+-- variable (global|local|upvalue) handling
+-- * to track locals and globals, variable management code needed
+-- * entry point is singlevar() for variable lookups
+-- * lookup tables (bl.locallist) are maintained awkwardly in the basic
+-- block data structures, PLUS the function data structure (this is
+-- an inelegant hack, since bl is nil for the top level of a function)
+----------------------------------------------------------------------]]
+
+----------------------------------------------------------------------
+-- register a local variable, create local variable object, set in
+-- to-activate variable list
+-- * used in new_localvarliteral(), parlist(), fornum(), forlist(),
+-- localfunc(), localstat()
+----------------------------------------------------------------------
+
+local function new_localvar(name, special)
+ local bl = fs.bl
+ local locallist
+ -- locate locallist in current block object or function root object
+ if bl then
+ locallist = bl.locallist
+ else
+ locallist = fs.locallist
+ end
+ -- build local variable information object and set localinfo
+ local id = #localinfo + 1
+ localinfo[id] = { -- new local variable object
+ name = name, -- local variable name
+ xref = { nameref }, -- xref, first value is declaration
+ decl = nameref, -- location of declaration, = xref[1]
+ }
+ if special then -- "self" must be not be changed
+ localinfo[id].isself = true
+ end
+ -- this can override a local with the same name in the same scope
+ -- but first, keep it inactive until it gets activated
+ local i = #ilocalinfo + 1
+ ilocalinfo[i] = id
+ ilocalrefs[i] = locallist
+end
+
+----------------------------------------------------------------------
+-- actually activate the variables so that they are visible
+-- * remember Lua semantics, e.g. RHS is evaluated first, then LHS
+-- * used in parlist(), forbody(), localfunc(), localstat(), body()
+----------------------------------------------------------------------
+
+local function adjustlocalvars(nvars)
+ local sz = #ilocalinfo
+ -- i goes from left to right, in order of local allocation, because
+ -- of something like: local a,a,a = 1,2,3 which gives a = 3
+ while nvars > 0 do
+ nvars = nvars - 1
+ local i = sz - nvars
+ local id = ilocalinfo[i] -- local's id
+ local obj = localinfo[id]
+ local name = obj.name -- name of local
+ obj.act = xref -- set activation location
+ ilocalinfo[i] = nil
+ local locallist = ilocalrefs[i] -- ref to lookup table to update
+ ilocalrefs[i] = nil
+ local existing = locallist[name] -- if existing, remove old first!
+ if existing then -- do not overlap, set special
+ obj = localinfo[existing] -- form of rem, as -id
+ obj.rem = -id
+ end
+ locallist[name] = id -- activate, now visible to Lua
+ end
+end
+
+----------------------------------------------------------------------
+-- remove (deactivate) variables in current scope (before scope exits)
+-- * zap entire locallist tables since we are not allocating registers
+-- * used in leaveblock(), close_func()
+----------------------------------------------------------------------
+
+local function removevars()
+ local bl = fs.bl
+ local locallist
+ -- locate locallist in current block object or function root object
+ if bl then
+ locallist = bl.locallist
+ else
+ locallist = fs.locallist
+ end
+ -- enumerate the local list at current scope and deactivate 'em
+ for name, id in base.pairs(locallist) do
+ local obj = localinfo[id]
+ obj.rem = xref -- set deactivation location
+ end
+end
+
+----------------------------------------------------------------------
+-- creates a new local variable given a name
+-- * skips internal locals (those starting with '('), so internal
+-- locals never needs a corresponding adjustlocalvars() call
+-- * special is true for "self" which must not be optimized
+-- * used in fornum(), forlist(), parlist(), body()
+----------------------------------------------------------------------
+
+local function new_localvarliteral(name, special)
+ if string.sub(name, 1, 1) == "(" then -- can skip internal locals
+ return
+ end
+ new_localvar(name, special)
+end
+
+----------------------------------------------------------------------
+-- search the local variable namespace of the given fs for a match
+-- * returns localinfo index
+-- * used only in singlevaraux()
+----------------------------------------------------------------------
+
+local function searchvar(fs, n)
+ local bl = fs.bl
+ local locallist
+ if bl then
+ locallist = bl.locallist
+ while locallist do
+ if locallist[n] then return locallist[n] end -- found
+ bl = bl.prev
+ locallist = bl and bl.locallist
+ end
+ end
+ locallist = fs.locallist
+ return locallist[n] or -1 -- found or not found (-1)
+end
+
+----------------------------------------------------------------------
+-- handle locals, globals and upvalues and related processing
+-- * search mechanism is recursive, calls itself to search parents
+-- * used only in singlevar()
+----------------------------------------------------------------------
+
+local function singlevaraux(fs, n, var)
+ if fs == nil then -- no more levels?
+ var.k = "VGLOBAL" -- default is global variable
+ return "VGLOBAL"
+ else
+ local v = searchvar(fs, n) -- look up at current level
+ if v >= 0 then
+ var.k = "VLOCAL"
+ var.id = v
+ -- codegen may need to deal with upvalue here
+ return "VLOCAL"
+ else -- not found at current level; try upper one
+ if singlevaraux(fs.prev, n, var) == "VGLOBAL" then
+ return "VGLOBAL"
+ end
+ -- else was LOCAL or UPVAL, handle here
+ var.k = "VUPVAL" -- upvalue in this level
+ return "VUPVAL"
+ end--if v
+ end--if fs
+end
+
+----------------------------------------------------------------------
+-- consume a name token, creates a variable (global|local|upvalue)
+-- * used in prefixexp(), funcname()
+----------------------------------------------------------------------
+
+local function singlevar(v)
+ local name = str_checkname()
+ singlevaraux(fs, name, v)
+ ------------------------------------------------------------------
+ -- variable tracking
+ ------------------------------------------------------------------
+ if v.k == "VGLOBAL" then
+ -- if global being accessed, keep track of it by creating an object
+ local id = globallookup[name]
+ if not id then
+ id = #globalinfo + 1
+ globalinfo[id] = { -- new global variable object
+ name = name, -- global variable name
+ xref = { nameref }, -- xref, first value is declaration
+ }
+ globallookup[name] = id -- remember it
+ else
+ local obj = globalinfo[id].xref
+ obj[#obj + 1] = nameref -- add xref
+ end
+ else
+ -- local/upvalue is being accessed, keep track of it
+ local id = v.id
+ local obj = localinfo[id].xref
+ obj[#obj + 1] = nameref -- add xref
+ end
+end
+
+--[[--------------------------------------------------------------------
+-- state management functions with open/close pairs
+----------------------------------------------------------------------]]
+
+----------------------------------------------------------------------
+-- enters a code unit, initializes elements
+----------------------------------------------------------------------
+
+local function enterblock(isbreakable)
+ local bl = {} -- per-block state
+ bl.isbreakable = isbreakable
+ bl.prev = fs.bl
+ bl.locallist = {}
+ fs.bl = bl
+end
+
+----------------------------------------------------------------------
+-- leaves a code unit, close any upvalues
+----------------------------------------------------------------------
+
+local function leaveblock()
+ local bl = fs.bl
+ removevars()
+ fs.bl = bl.prev
+end
+
+----------------------------------------------------------------------
+-- opening of a function
+-- * top_fs is only for anchoring the top fs, so that parser() can
+-- return it to the caller function along with useful output
+-- * used in parser() and body()
+----------------------------------------------------------------------
+
+local function open_func()
+ local new_fs -- per-function state
+ if not fs then -- top_fs is created early
+ new_fs = top_fs
+ else
+ new_fs = {}
+ end
+ new_fs.prev = fs -- linked list of function states
+ new_fs.bl = nil
+ new_fs.locallist = {}
+ fs = new_fs
+end
+
+----------------------------------------------------------------------
+-- closing of a function
+-- * used in parser() and body()
+----------------------------------------------------------------------
+
+local function close_func()
+ removevars()
+ fs = fs.prev
+end
+
+--[[--------------------------------------------------------------------
+-- other parsing functions
+-- * for table constructor, parameter list, argument list
+----------------------------------------------------------------------]]
+
+----------------------------------------------------------------------
+-- parse a function name suffix, for function call specifications
+-- * used in primaryexp(), funcname()
+----------------------------------------------------------------------
+
+local function field(v)
+ -- field -> ['.' | ':'] NAME
+ local key = {}
+ nextt() -- skip the dot or colon
+ checkname(key)
+ v.k = "VINDEXED"
+end
+
+----------------------------------------------------------------------
+-- parse a table indexing suffix, for constructors, expressions
+-- * used in recfield(), primaryexp()
+----------------------------------------------------------------------
+
+local function yindex(v)
+ -- index -> '[' expr ']'
+ nextt() -- skip the '['
+ expr(v)
+ checknext("]")
+end
+
+----------------------------------------------------------------------
+-- parse a table record (hash) field
+-- * used in constructor()
+----------------------------------------------------------------------
+
+local function recfield(cc)
+ -- recfield -> (NAME | '['exp1']') = exp1
+ local key, val = {}, {}
+ if tok == "<name>" then
+ checkname(key)
+ else-- tok == '['
+ yindex(key)
+ end
+ checknext("=")
+ expr(val)
+end
+
+----------------------------------------------------------------------
+-- emit a set list instruction if enough elements (LFIELDS_PER_FLUSH)
+-- * note: retained in this skeleton because it modifies cc.v.k
+-- * used in constructor()
+----------------------------------------------------------------------
+
+local function closelistfield(cc)
+ if cc.v.k == "VVOID" then return end -- there is no list item
+ cc.v.k = "VVOID"
+end
+
+----------------------------------------------------------------------
+-- parse a table list (array) field
+-- * used in constructor()
+----------------------------------------------------------------------
+
+local function listfield(cc)
+ expr(cc.v)
+end
+
+----------------------------------------------------------------------
+-- parse a table constructor
+-- * used in funcargs(), simpleexp()
+----------------------------------------------------------------------
+
+local function constructor(t)
+ -- constructor -> '{' [ field { fieldsep field } [ fieldsep ] ] '}'
+ -- field -> recfield | listfield
+ -- fieldsep -> ',' | ';'
+ local line = ln
+ local cc = {}
+ cc.v = {}
+ cc.t = t
+ t.k = "VRELOCABLE"
+ cc.v.k = "VVOID"
+ checknext("{")
+ repeat
+ if tok == "}" then break end
+ -- closelistfield(cc) here
+ local c = tok
+ if c == "<name>" then -- may be listfields or recfields
+ if lookahead() ~= "=" then -- look ahead: expression?
+ listfield(cc)
+ else
+ recfield(cc)
+ end
+ elseif c == "[" then -- constructor_item -> recfield
+ recfield(cc)
+ else -- constructor_part -> listfield
+ listfield(cc)
+ end
+ until not testnext(",") and not testnext(";")
+ check_match("}", "{", line)
+ -- lastlistfield(cc) here
+end
+
+----------------------------------------------------------------------
+-- parse the arguments (parameters) of a function declaration
+-- * used in body()
+----------------------------------------------------------------------
+
+local function parlist()
+ -- parlist -> [ param { ',' param } ]
+ local nparams = 0
+ if tok ~= ")" then -- is 'parlist' not empty?
+ repeat
+ local c = tok
+ if c == "<name>" then -- param -> NAME
+ new_localvar(str_checkname())
+ nparams = nparams + 1
+ elseif c == "..." then
+ nextt()
+ fs.is_vararg = true
+ else
+ syntaxerror("<name> or '...' expected")
+ end
+ until fs.is_vararg or not testnext(",")
+ end--if
+ adjustlocalvars(nparams)
+end
+
+----------------------------------------------------------------------
+-- parse the parameters of a function call
+-- * contrast with parlist(), used in function declarations
+-- * used in primaryexp()
+----------------------------------------------------------------------
+
+local function funcargs(f)
+ local args = {}
+ local line = ln
+ local c = tok
+ if c == "(" then -- funcargs -> '(' [ explist1 ] ')'
+ if line ~= lastln then
+ syntaxerror("ambiguous syntax (function call x new statement)")
+ end
+ nextt()
+ if tok == ")" then -- arg list is empty?
+ args.k = "VVOID"
+ else
+ explist1(args)
+ end
+ check_match(")", "(", line)
+ elseif c == "{" then -- funcargs -> constructor
+ constructor(args)
+ elseif c == "<string>" then -- funcargs -> STRING
+ codestring(args, seminfo)
+ nextt() -- must use 'seminfo' before 'next'
+ else
+ syntaxerror("function arguments expected")
+ return
+ end--if c
+ f.k = "VCALL"
+end
+
+--[[--------------------------------------------------------------------
+-- mostly expression functions
+----------------------------------------------------------------------]]
+
+----------------------------------------------------------------------
+-- parses an expression in parentheses or a single variable
+-- * used in primaryexp()
+----------------------------------------------------------------------
+
+local function prefixexp(v)
+ -- prefixexp -> NAME | '(' expr ')'
+ local c = tok
+ if c == "(" then
+ local line = ln
+ nextt()
+ expr(v)
+ check_match(")", "(", line)
+ elseif c == "<name>" then
+ singlevar(v)
+ else
+ syntaxerror("unexpected symbol")
+ end--if c
+end
+
+----------------------------------------------------------------------
+-- parses a prefixexp (an expression in parentheses or a single
+-- variable) or a function call specification
+-- * used in simpleexp(), assignment(), expr_stat()
+----------------------------------------------------------------------
+
+local function primaryexp(v)
+ -- primaryexp ->
+ -- prefixexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs }
+ prefixexp(v)
+ while true do
+ local c = tok
+ if c == "." then -- field
+ field(v)
+ elseif c == "[" then -- '[' exp1 ']'
+ local key = {}
+ yindex(key)
+ elseif c == ":" then -- ':' NAME funcargs
+ local key = {}
+ nextt()
+ checkname(key)
+ funcargs(v)
+ elseif c == "(" or c == "<string>" or c == "{" then -- funcargs
+ funcargs(v)
+ else
+ return
+ end--if c
+ end--while
+end
+
+----------------------------------------------------------------------
+-- parses general expression types, constants handled here
+-- * used in subexpr()
+----------------------------------------------------------------------
+
+local function simpleexp(v)
+ -- simpleexp -> NUMBER | STRING | NIL | TRUE | FALSE | ... |
+ -- constructor | FUNCTION body | primaryexp
+ local c = tok
+ if c == "<number>" then
+ v.k = "VKNUM"
+ elseif c == "<string>" then
+ codestring(v, seminfo)
+ elseif c == "nil" then
+ v.k = "VNIL"
+ elseif c == "true" then
+ v.k = "VTRUE"
+ elseif c == "false" then
+ v.k = "VFALSE"
+ elseif c == "..." then -- vararg
+ check_condition(fs.is_vararg == true,
+ "cannot use '...' outside a vararg function");
+ v.k = "VVARARG"
+ elseif c == "{" then -- constructor
+ constructor(v)
+ return
+ elseif c == "function" then
+ nextt()
+ body(v, false, ln)
+ return
+ else
+ primaryexp(v)
+ return
+ end--if c
+ nextt()
+end
+
+------------------------------------------------------------------------
+-- Parse subexpressions. Includes handling of unary operators and binary
+-- operators. A subexpr is given the rhs priority level of the operator
+-- immediately left of it, if any (limit is -1 if none,) and if a binop
+-- is found, limit is compared with the lhs priority level of the binop
+-- in order to determine which executes first.
+-- * recursively called
+-- * used in expr()
+------------------------------------------------------------------------
+
+local function subexpr(v, limit)
+ -- subexpr -> (simpleexp | unop subexpr) { binop subexpr }
+ -- * where 'binop' is any binary operator with a priority
+ -- higher than 'limit'
+ local op = tok
+ local uop = unopr[op]
+ if uop then
+ nextt()
+ subexpr(v, UNARY_PRIORITY)
+ else
+ simpleexp(v)
+ end
+ -- expand while operators have priorities higher than 'limit'
+ op = tok
+ local binop = binopr_left[op]
+ while binop and binop > limit do
+ local v2 = {}
+ nextt()
+ -- read sub-expression with higher priority
+ local nextop = subexpr(v2, binopr_right[op])
+ op = nextop
+ binop = binopr_left[op]
+ end
+ return op -- return first untreated operator
+end
+
+----------------------------------------------------------------------
+-- Expression parsing starts here. Function subexpr is entered with the
+-- left operator (which is non-existent) priority of -1, which is lower
+-- than all actual operators. Expr information is returned in parm v.
+-- * used in cond(), explist1(), index(), recfield(), listfield(),
+-- prefixexp(), while_stat(), exp1()
+----------------------------------------------------------------------
+
+-- this is a forward-referenced local
+function expr(v)
+ -- expr -> subexpr
+ subexpr(v, 0)
+end
+
+--[[--------------------------------------------------------------------
+-- third level parsing functions
+----------------------------------------------------------------------]]
+
+------------------------------------------------------------------------
+-- parse a variable assignment sequence
+-- * recursively called
+-- * used in expr_stat()
+------------------------------------------------------------------------
+
+local function assignment(v)
+ local e = {}
+ local c = v.v.k
+ check_condition(c == "VLOCAL" or c == "VUPVAL" or c == "VGLOBAL"
+ or c == "VINDEXED", "syntax error")
+ if testnext(",") then -- assignment -> ',' primaryexp assignment
+ local nv = {} -- expdesc
+ nv.v = {}
+ primaryexp(nv.v)
+ -- lparser.c deals with some register usage conflict here
+ assignment(nv)
+ else -- assignment -> '=' explist1
+ checknext("=")
+ explist1(e)
+ return -- avoid default
+ end
+ e.k = "VNONRELOC"
+end
+
+----------------------------------------------------------------------
+-- parse a for loop body for both versions of the for loop
+-- * used in fornum(), forlist()
+----------------------------------------------------------------------
+
+local function forbody(nvars, isnum)
+ -- forbody -> DO block
+ checknext("do")
+ enterblock(false) -- scope for declared variables
+ adjustlocalvars(nvars)
+ block()
+ leaveblock() -- end of scope for declared variables
+end
+
+----------------------------------------------------------------------
+-- parse a numerical for loop, calls forbody()
+-- * used in for_stat()
+----------------------------------------------------------------------
+
+local function fornum(varname)
+ -- fornum -> NAME = exp1, exp1 [, exp1] DO body
+ local line = line
+ new_localvarliteral("(for index)")
+ new_localvarliteral("(for limit)")
+ new_localvarliteral("(for step)")
+ new_localvar(varname)
+ checknext("=")
+ exp1() -- initial value
+ checknext(",")
+ exp1() -- limit
+ if testnext(",") then
+ exp1() -- optional step
+ else
+ -- default step = 1
+ end
+ forbody(1, true)
+end
+
+----------------------------------------------------------------------
+-- parse a generic for loop, calls forbody()
+-- * used in for_stat()
+----------------------------------------------------------------------
+
+local function forlist(indexname)
+ -- forlist -> NAME {, NAME} IN explist1 DO body
+ local e = {}
+ -- create control variables
+ new_localvarliteral("(for generator)")
+ new_localvarliteral("(for state)")
+ new_localvarliteral("(for control)")
+ -- create declared variables
+ new_localvar(indexname)
+ local nvars = 1
+ while testnext(",") do
+ new_localvar(str_checkname())
+ nvars = nvars + 1
+ end
+ checknext("in")
+ local line = line
+ explist1(e)
+ forbody(nvars, false)
+end
+
+----------------------------------------------------------------------
+-- parse a function name specification
+-- * used in func_stat()
+----------------------------------------------------------------------
+
+local function funcname(v)
+ -- funcname -> NAME {field} [':' NAME]
+ local needself = false
+ singlevar(v)
+ while tok == "." do
+ field(v)
+ end
+ if tok == ":" then
+ needself = true
+ field(v)
+ end
+ return needself
+end
+
+----------------------------------------------------------------------
+-- parse the single expressions needed in numerical for loops
+-- * used in fornum()
+----------------------------------------------------------------------
+
+-- this is a forward-referenced local
+function exp1()
+ -- exp1 -> expr
+ local e = {}
+ expr(e)
+end
+
+----------------------------------------------------------------------
+-- parse condition in a repeat statement or an if control structure
+-- * used in repeat_stat(), test_then_block()
+----------------------------------------------------------------------
+
+local function cond()
+ -- cond -> expr
+ local v = {}
+ expr(v) -- read condition
+end
+
+----------------------------------------------------------------------
+-- parse part of an if control structure, including the condition
+-- * used in if_stat()
+----------------------------------------------------------------------
+
+local function test_then_block()
+ -- test_then_block -> [IF | ELSEIF] cond THEN block
+ nextt() -- skip IF or ELSEIF
+ cond()
+ checknext("then")
+ block() -- 'then' part
+end
+
+----------------------------------------------------------------------
+-- parse a local function statement
+-- * used in local_stat()
+----------------------------------------------------------------------
+
+local function localfunc()
+ -- localfunc -> NAME body
+ local v, b = {}
+ new_localvar(str_checkname())
+ v.k = "VLOCAL"
+ adjustlocalvars(1)
+ body(b, false, ln)
+end
+
+----------------------------------------------------------------------
+-- parse a local variable declaration statement
+-- * used in local_stat()
+----------------------------------------------------------------------
+
+local function localstat()
+ -- localstat -> NAME {',' NAME} ['=' explist1]
+ local nvars = 0
+ local e = {}
+ repeat
+ new_localvar(str_checkname())
+ nvars = nvars + 1
+ until not testnext(",")
+ if testnext("=") then
+ explist1(e)
+ else
+ e.k = "VVOID"
+ end
+ adjustlocalvars(nvars)
+end
+
+----------------------------------------------------------------------
+-- parse a list of comma-separated expressions
+-- * used in return_stat(), localstat(), funcargs(), assignment(),
+-- forlist()
+----------------------------------------------------------------------
+
+-- this is a forward-referenced local
+function explist1(e)
+ -- explist1 -> expr { ',' expr }
+ expr(e)
+ while testnext(",") do
+ expr(e)
+ end
+end
+
+----------------------------------------------------------------------
+-- parse function declaration body
+-- * used in simpleexp(), localfunc(), func_stat()
+----------------------------------------------------------------------
+
+-- this is a forward-referenced local
+function body(e, needself, line)
+ -- body -> '(' parlist ')' chunk END
+ open_func()
+ checknext("(")
+ if needself then
+ new_localvarliteral("self", true)
+ adjustlocalvars(1)
+ end
+ parlist()
+ checknext(")")
+ chunk()
+ check_match("end", "function", line)
+ close_func()
+end
+
+----------------------------------------------------------------------
+-- parse a code block or unit
+-- * used in do_stat(), while_stat(), forbody(), test_then_block(),
+-- if_stat()
+----------------------------------------------------------------------
+
+-- this is a forward-referenced local
+function block()
+ -- block -> chunk
+ enterblock(false)
+ chunk()
+ leaveblock()
+end
+
+--[[--------------------------------------------------------------------
+-- second level parsing functions, all with '_stat' suffix
+-- * since they are called via a table lookup, they cannot be local
+-- functions (a lookup table of local functions might be smaller...)
+-- * stat() -> *_stat()
+----------------------------------------------------------------------]]
+
+----------------------------------------------------------------------
+-- initial parsing for a for loop, calls fornum() or forlist()
+-- * removed 'line' parameter (used to set debug information only)
+-- * used in stat()
+----------------------------------------------------------------------
+
+local function for_stat()
+ -- stat -> for_stat -> FOR (fornum | forlist) END
+ local line = line
+ enterblock(true) -- scope for loop and control variables
+ nextt() -- skip 'for'
+ local varname = str_checkname() -- first variable name
+ local c = tok
+ if c == "=" then
+ fornum(varname)
+ elseif c == "," or c == "in" then
+ forlist(varname)
+ else
+ syntaxerror("'=' or 'in' expected")
+ end
+ check_match("end", "for", line)
+ leaveblock() -- loop scope (`break' jumps to this point)
+end
+
+----------------------------------------------------------------------
+-- parse a while-do control structure, body processed by block()
+-- * used in stat()
+----------------------------------------------------------------------
+
+local function while_stat()
+ -- stat -> while_stat -> WHILE cond DO block END
+ local line = line
+ nextt() -- skip WHILE
+ cond() -- parse condition
+ enterblock(true)
+ checknext("do")
+ block()
+ check_match("end", "while", line)
+ leaveblock()
+end
+
+----------------------------------------------------------------------
+-- parse a repeat-until control structure, body parsed by chunk()
+-- * originally, repeatstat() calls breakstat() too if there is an
+-- upvalue in the scope block; nothing is actually lexed, it is
+-- actually the common code in breakstat() for closing of upvalues
+-- * used in stat()
+----------------------------------------------------------------------
+
+local function repeat_stat()
+ -- stat -> repeat_stat -> REPEAT block UNTIL cond
+ local line = line
+ enterblock(true) -- loop block
+ enterblock(false) -- scope block
+ nextt() -- skip REPEAT
+ chunk()
+ check_match("until", "repeat", line)
+ cond()
+ -- close upvalues at scope level below
+ leaveblock() -- finish scope
+ leaveblock() -- finish loop
+end
+
+----------------------------------------------------------------------
+-- parse an if control structure
+-- * used in stat()
+----------------------------------------------------------------------
+
+local function if_stat()
+ -- stat -> if_stat -> IF cond THEN block
+ -- {ELSEIF cond THEN block} [ELSE block] END
+ local line = line
+ local v = {}
+ test_then_block() -- IF cond THEN block
+ while tok == "elseif" do
+ test_then_block() -- ELSEIF cond THEN block
+ end
+ if tok == "else" then
+ nextt() -- skip ELSE
+ block() -- 'else' part
+ end
+ check_match("end", "if", line)
+end
+
+----------------------------------------------------------------------
+-- parse a return statement
+-- * used in stat()
+----------------------------------------------------------------------
+
+local function return_stat()
+ -- stat -> return_stat -> RETURN explist
+ local e = {}
+ nextt() -- skip RETURN
+ local c = tok
+ if block_follow[c] or c == ";" then
+ -- return no values
+ else
+ explist1(e) -- optional return values
+ end
+end
+
+----------------------------------------------------------------------
+-- parse a break statement
+-- * used in stat()
+----------------------------------------------------------------------
+
+local function break_stat()
+ -- stat -> break_stat -> BREAK
+ local bl = fs.bl
+ nextt() -- skip BREAK
+ while bl and not bl.isbreakable do -- find a breakable block
+ bl = bl.prev
+ end
+ if not bl then
+ syntaxerror("no loop to break")
+ end
+end
+
+----------------------------------------------------------------------
+-- parse a function call with no returns or an assignment statement
+-- * the struct with .prev is used for name searching in lparse.c,
+-- so it is retained for now; present in assignment() also
+-- * used in stat()
+----------------------------------------------------------------------
+
+local function expr_stat()
+ local id = tpos - 1
+ -- stat -> expr_stat -> func | assignment
+ local v = {}
+ v.v = {}
+ primaryexp(v.v)
+ if v.v.k == "VCALL" then -- stat -> func
+ -- call statement uses no results
+ statinfo[id] = "call"
+ else -- stat -> assignment
+ v.prev = nil
+ assignment(v)
+ statinfo[id] = "assign"
+ end
+end
+
+----------------------------------------------------------------------
+-- parse a function statement
+-- * used in stat()
+----------------------------------------------------------------------
+
+local function function_stat()
+ -- stat -> function_stat -> FUNCTION funcname body
+ local line = line
+ local v, b = {}, {}
+ nextt() -- skip FUNCTION
+ local needself = funcname(v)
+ body(b, needself, line)
+end
+
+----------------------------------------------------------------------
+-- parse a simple block enclosed by a DO..END pair
+-- * used in stat()
+----------------------------------------------------------------------
+
+local function do_stat()
+ -- stat -> do_stat -> DO block END
+ local line = line
+ nextt() -- skip DO
+ block()
+ check_match("end", "do", line)
+end
+
+----------------------------------------------------------------------
+-- parse a statement starting with LOCAL
+-- * used in stat()
+----------------------------------------------------------------------
+
+local function local_stat()
+ -- stat -> local_stat -> LOCAL FUNCTION localfunc
+ -- -> LOCAL localstat
+ nextt() -- skip LOCAL
+ if testnext("function") then -- local function?
+ localfunc()
+ else
+ localstat()
+ end
+end
+
+--[[--------------------------------------------------------------------
+-- main functions, top level parsing functions
+-- * accessible functions are: init(lexer), parser()
+-- * [entry] -> parser() -> chunk() -> stat()
+----------------------------------------------------------------------]]
+
+----------------------------------------------------------------------
+-- initial parsing for statements, calls '_stat' suffixed functions
+-- * used in chunk()
+----------------------------------------------------------------------
+
+local stat_call = { -- lookup for calls in stat()
+ ["if"] = if_stat,
+ ["while"] = while_stat,
+ ["do"] = do_stat,
+ ["for"] = for_stat,
+ ["repeat"] = repeat_stat,
+ ["function"] = function_stat,
+ ["local"] = local_stat,
+ ["return"] = return_stat,
+ ["break"] = break_stat,
+}
+
+local function stat()
+ -- stat -> if_stat while_stat do_stat for_stat repeat_stat
+ -- function_stat local_stat return_stat break_stat
+ -- expr_stat
+ line = ln -- may be needed for error messages
+ local c = tok
+ local fn = stat_call[c]
+ -- handles: if while do for repeat function local return break
+ if fn then
+ statinfo[tpos - 1] = c
+ fn()
+ -- return or break must be last statement
+ if c == "return" or c == "break" then return true end
+ else
+ expr_stat()
+ end
+ return false
+end
+
+----------------------------------------------------------------------
+-- parse a chunk, which consists of a bunch of statements
+-- * used in parser(), body(), block(), repeat_stat()
+----------------------------------------------------------------------
+
+-- this is a forward-referenced local
+function chunk()
+ -- chunk -> { stat [';'] }
+ local islast = false
+ while not islast and not block_follow[tok] do
+ islast = stat()
+ testnext(";")
+ end
+end
+
+----------------------------------------------------------------------
+-- performs parsing, returns parsed data structure
+----------------------------------------------------------------------
+
+function parser()
+ open_func()
+ fs.is_vararg = true -- main func. is always vararg
+ nextt() -- read first token
+ chunk()
+ check("<eof>")
+ close_func()
+ return { -- return everything
+ globalinfo = globalinfo,
+ localinfo = localinfo,
+ statinfo = statinfo,
+ toklist = toklist,
+ seminfolist = seminfolist,
+ toklnlist = toklnlist,
+ xreflist = xreflist,
+ }
+end
+
+----------------------------------------------------------------------
+-- initialization function
+----------------------------------------------------------------------
+
+function init(tokorig, seminfoorig, toklnorig)
+ tpos = 1 -- token position
+ top_fs = {} -- reset top level function state
+ ------------------------------------------------------------------
+ -- set up grammar-only token tables; impedance-matching...
+ -- note that constants returned by the lexer is source-level, so
+ -- for now, fake(!) constant tokens (TK_NUMBER|TK_STRING|TK_LSTRING)
+ ------------------------------------------------------------------
+ local j = 1
+ toklist, seminfolist, toklnlist, xreflist = {}, {}, {}, {}
+ for i = 1, #tokorig do
+ local tok = tokorig[i]
+ local yep = true
+ if tok == "TK_KEYWORD" or tok == "TK_OP" then
+ tok = seminfoorig[i]
+ elseif tok == "TK_NAME" then
+ tok = "<name>"
+ seminfolist[j] = seminfoorig[i]
+ elseif tok == "TK_NUMBER" then
+ tok = "<number>"
+ seminfolist[j] = 0 -- fake!
+ elseif tok == "TK_STRING" or tok == "TK_LSTRING" then
+ tok = "<string>"
+ seminfolist[j] = "" -- fake!
+ elseif tok == "TK_EOS" then
+ tok = "<eof>"
+ else
+ -- non-grammar tokens; ignore them
+ yep = false
+ end
+ if yep then -- set rest of the information
+ toklist[j] = tok
+ toklnlist[j] = toklnorig[i]
+ xreflist[j] = i
+ j = j + 1
+ end
+ end--for
+ ------------------------------------------------------------------
+ -- initialize data structures for variable tracking
+ ------------------------------------------------------------------
+ globalinfo, globallookup, localinfo = {}, {}, {}
+ ilocalinfo, ilocalrefs = {}, {}
+ statinfo = {} -- experimental
+end
+--end of inserted module
+end
+
+-- preload function for module optlex
+preload.optlex =
+function()
+--start of inserted module
+module "optlex"
+
+local string = base.require "string"
+local match = string.match
+local sub = string.sub
+local find = string.find
+local rep = string.rep
+local print
+
+------------------------------------------------------------------------
+-- variables and data structures
+------------------------------------------------------------------------
+
+-- error function, can override by setting own function into module
+error = base.error
+
+warn = {} -- table for warning flags
+
+local stoks, sinfos, stoklns -- source lists
+
+local is_realtoken = { -- significant (grammar) tokens
+ TK_KEYWORD = true,
+ TK_NAME = true,
+ TK_NUMBER = true,
+ TK_STRING = true,
+ TK_LSTRING = true,
+ TK_OP = true,
+ TK_EOS = true,
+}
+local is_faketoken = { -- whitespace (non-grammar) tokens
+ TK_COMMENT = true,
+ TK_LCOMMENT = true,
+ TK_EOL = true,
+ TK_SPACE = true,
+}
+
+local opt_details -- for extra information
+
+------------------------------------------------------------------------
+-- true if current token is at the start of a line
+-- * skips over deleted tokens via recursion
+------------------------------------------------------------------------
+
+local function atlinestart(i)
+ local tok = stoks[i - 1]
+ if i <= 1 or tok == "TK_EOL" then
+ return true
+ elseif tok == "" then
+ return atlinestart(i - 1)
+ end
+ return false
+end
+
+------------------------------------------------------------------------
+-- true if current token is at the end of a line
+-- * skips over deleted tokens via recursion
+------------------------------------------------------------------------
+
+local function atlineend(i)
+ local tok = stoks[i + 1]
+ if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then
+ return true
+ elseif tok == "" then
+ return atlineend(i + 1)
+ end
+ return false
+end
+
+------------------------------------------------------------------------
+-- counts comment EOLs inside a long comment
+-- * in order to keep line numbering, EOLs need to be reinserted
+------------------------------------------------------------------------
+
+local function commenteols(lcomment)
+ local sep = #match(lcomment, "^%-%-%[=*%[")
+ local z = sub(lcomment, sep + 1, -(sep - 1)) -- remove delims
+ local i, c = 1, 0
+ while true do
+ local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
+ if not p then break end -- if no matches, done
+ i = p + 1
+ c = c + 1
+ if #s > 0 and r ~= s then -- skip CRLF or LFCR
+ i = i + 1
+ end
+ end
+ return c
+end
+
+------------------------------------------------------------------------
+-- compares two tokens (i, j) and returns the whitespace required
+-- * see documentation for a reference table of interactions
+-- * only two grammar/real tokens are being considered
+-- * if "", no separation is needed
+-- * if " ", then at least one whitespace (or EOL) is required
+-- * NOTE: this doesn't work at the start or the end or for EOS!
+------------------------------------------------------------------------
+
+local function checkpair(i, j)
+ local match = match
+ local t1, t2 = stoks[i], stoks[j]
+ --------------------------------------------------------------------
+ if t1 == "TK_STRING" or t1 == "TK_LSTRING" or
+ t2 == "TK_STRING" or t2 == "TK_LSTRING" then
+ return ""
+ --------------------------------------------------------------------
+ elseif t1 == "TK_OP" or t2 == "TK_OP" then
+ if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or
+ (t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then
+ return ""
+ end
+ if t1 == "TK_OP" and t2 == "TK_OP" then
+ -- for TK_OP/TK_OP pairs, see notes in technotes.txt
+ local op, op2 = sinfos[i], sinfos[j]
+ if (match(op, "^%.%.?$") and match(op2, "^%.")) or
+ (match(op, "^[~=<>]$") and op2 == "=") or
+ (op == "[" and (op2 == "[" or op2 == "=")) then
+ return " "
+ end
+ return ""
+ end
+ -- "TK_OP" + "TK_NUMBER" case
+ local op = sinfos[i]
+ if t2 == "TK_OP" then op = sinfos[j] end
+ if match(op, "^%.%.?%.?$") then
+ return " "
+ end
+ return ""
+ --------------------------------------------------------------------
+ else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then
+ return " "
+ --------------------------------------------------------------------
+ end
+end
+
+------------------------------------------------------------------------
+-- repack tokens, removing deletions caused by optimization process
+------------------------------------------------------------------------
+
+local function repack_tokens()
+ local dtoks, dinfos, dtoklns = {}, {}, {}
+ local j = 1
+ for i = 1, #stoks do
+ local tok = stoks[i]
+ if tok ~= "" then
+ dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i]
+ j = j + 1
+ end
+ end
+ stoks, sinfos, stoklns = dtoks, dinfos, dtoklns
+end
+
+------------------------------------------------------------------------
+-- number optimization
+-- * optimization using string formatting functions is one way of doing
+-- this, but here, we consider all cases and handle them separately
+-- (possibly an idiotic approach...)
+-- * scientific notation being generated is not in canonical form, this
+-- may or may not be a bad thing
+-- * note: intermediate portions need to fit into a normal number range
+-- * optimizations can be divided based on number patterns:
+-- * hexadecimal:
+-- (1) no need to remove leading zeros, just skip to (2)
+-- (2) convert to integer if size equal or smaller
+-- * change if equal size -> lose the 'x' to reduce entropy
+-- (3) number is then processed as an integer
+-- (4) note: does not make 0[xX] consistent
+-- * integer:
+-- (1) note: includes anything with trailing ".", ".0", ...
+-- (2) remove useless fractional part, if present, e.g. 123.000
+-- (3) remove leading zeros, e.g. 000123
+-- (4) switch to scientific if shorter, e.g. 123000 -> 123e3
+-- * with fraction:
+-- (1) split into digits dot digits
+-- (2) if no integer portion, take as zero (can omit later)
+-- (3) handle degenerate .000 case, after which the fractional part
+-- must be non-zero (if zero, it's matched as an integer)
+-- (4) remove trailing zeros for fractional portion
+-- (5) p.q where p > 0 and q > 0 cannot be shortened any more
+-- (6) otherwise p == 0 and the form is .q, e.g. .000123
+-- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6
+-- * scientific:
+-- (1) split into (digits dot digits) [eE] ([+-] digits)
+-- (2) if significand has ".", shift it out so it becomes an integer
+-- (3) if significand is zero, just use zero
+-- (4) remove leading zeros for significand
+-- (5) shift out trailing zeros for significand
+-- (6) examine exponent and determine which format is best:
+-- integer, with fraction, scientific
+------------------------------------------------------------------------
+
+local function do_number(i)
+ local before = sinfos[i] -- 'before'
+ local z = before -- working representation
+ local y -- 'after', if better
+ --------------------------------------------------------------------
+ if match(z, "^0[xX]") then -- hexadecimal number
+ local v = base.tostring(base.tonumber(z))
+ if #v <= #z then
+ z = v -- change to integer, AND continue
+ else
+ return -- no change; stick to hex
+ end
+ end
+ --------------------------------------------------------------------
+ if match(z, "^%d+%.?0*$") then -- integer or has useless frac
+ z = match(z, "^(%d+)%.?0*$") -- int portion only
+ if z + 0 > 0 then
+ z = match(z, "^0*([1-9]%d*)$") -- remove leading zeros
+ local v = #match(z, "0*$")
+ local nv = base.tostring(v)
+ if v > #nv + 1 then -- scientific is shorter
+ z = sub(z, 1, #z - v).."e"..nv
+ end
+ y = z
+ else
+ y = "0" -- basic zero
+ end
+ --------------------------------------------------------------------
+ elseif not match(z, "[eE]") then -- number with fraction part
+ local p, q = match(z, "^(%d*)%.(%d+)$") -- split
+ if p == "" then p = 0 end -- int part zero
+ if q + 0 == 0 and p == 0 then
+ y = "0" -- degenerate .000 case
+ else
+ -- now, q > 0 holds and p is a number
+ local v = #match(q, "0*$") -- remove trailing zeros
+ if v > 0 then
+ q = sub(q, 1, #q - v)
+ end
+ -- if p > 0, nothing else we can do to simplify p.q case
+ if p + 0 > 0 then
+ y = p.."."..q
+ else
+ y = "."..q -- tentative, e.g. .000123
+ local v = #match(q, "^0*") -- # leading spaces
+ local w = #q - v -- # significant digits
+ local nv = base.tostring(#q)
+ -- e.g. compare 123e-6 versus .000123
+ if w + 2 + #nv < 1 + #q then
+ y = sub(q, -w).."e-"..nv
+ end
+ end
+ end
+ --------------------------------------------------------------------
+ else -- scientific number
+ local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$")
+ ex = base.tonumber(ex)
+ -- if got ".", shift out fractional portion of significand
+ local p, q = match(sig, "^(%d*)%.(%d*)$")
+ if p then
+ ex = ex - #q
+ sig = p..q
+ end
+ if sig + 0 == 0 then
+ y = "0" -- basic zero
+ else
+ local v = #match(sig, "^0*") -- remove leading zeros
+ sig = sub(sig, v + 1)
+ v = #match(sig, "0*$") -- shift out trailing zeros
+ if v > 0 then
+ sig = sub(sig, 1, #sig - v)
+ ex = ex + v
+ end
+ -- examine exponent and determine which format is best
+ local nex = base.tostring(ex)
+ if ex == 0 then -- it's just an integer
+ y = sig
+ elseif ex > 0 and (ex <= 1 + #nex) then -- a number
+ y = sig..rep("0", ex)
+ elseif ex < 0 and (ex >= -#sig) then -- fraction, e.g. .123
+ v = #sig + ex
+ y = sub(sig, 1, v).."."..sub(sig, v + 1)
+ elseif ex < 0 and (#nex >= -ex - #sig) then
+ -- e.g. compare 1234e-5 versus .01234
+ -- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig
+ -- -> #nex >= -ex - #sig
+ v = -ex - #sig
+ y = "."..rep("0", v)..sig
+ else -- non-canonical scientific representation
+ y = sig.."e"..ex
+ end
+ end--if sig
+ end
+ --------------------------------------------------------------------
+ if y and y ~= sinfos[i] then
+ if opt_details then
+ print("<number> (line "..stoklns[i]..") "..sinfos[i].." -> "..y)
+ opt_details = opt_details + 1
+ end
+ sinfos[i] = y
+ end
+end
+
+------------------------------------------------------------------------
+-- string optimization
+-- * note: works on well-formed strings only!
+-- * optimizations on characters can be summarized as follows:
+-- \a\b\f\n\r\t\v -- no change
+-- \\ -- no change
+-- \"\' -- depends on delim, other can remove \
+-- \[\] -- remove \
+-- \<char> -- general escape, remove \
+-- \<eol> -- normalize the EOL only
+-- \ddd -- if \a\b\f\n\r\t\v, change to latter
+-- if other < ascii 32, keep ddd but zap leading zeros
+-- but cannot have following digits
+-- if >= ascii 32, translate it into the literal, then also
+-- do escapes for \\,\",\' cases
+-- <other> -- no change
+-- * switch delimiters if string becomes shorter
+------------------------------------------------------------------------
+
+local function do_string(I)
+ local info = sinfos[I]
+ local delim = sub(info, 1, 1) -- delimiter used
+ local ndelim = (delim == "'") and '"' or "'" -- opposite " <-> '
+ local z = sub(info, 2, -2) -- actual string
+ local i = 1
+ local c_delim, c_ndelim = 0, 0 -- "/' counts
+ --------------------------------------------------------------------
+ while i <= #z do
+ local c = sub(z, i, i)
+ ----------------------------------------------------------------
+ if c == "\\" then -- escaped stuff
+ local j = i + 1
+ local d = sub(z, j, j)
+ local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true)
+ ------------------------------------------------------------
+ if not p then -- \<char> -- remove \
+ z = sub(z, 1, i - 1)..sub(z, j)
+ i = i + 1
+ ------------------------------------------------------------
+ elseif p <= 8 then -- \a\b\f\n\r\t\v\\
+ i = i + 2 -- no change
+ ------------------------------------------------------------
+ elseif p <= 10 then -- \<eol> -- normalize EOL
+ local eol = sub(z, j, j + 1)
+ if eol == "\r\n" or eol == "\n\r" then
+ z = sub(z, 1, i).."\n"..sub(z, j + 2)
+ elseif p == 10 then -- \r case
+ z = sub(z, 1, i).."\n"..sub(z, j + 1)
+ end
+ i = i + 2
+ ------------------------------------------------------------
+ elseif p <= 12 then -- \"\' -- remove \ for ndelim
+ if d == delim then
+ c_delim = c_delim + 1
+ i = i + 2
+ else
+ c_ndelim = c_ndelim + 1
+ z = sub(z, 1, i - 1)..sub(z, j)
+ i = i + 1
+ end
+ ------------------------------------------------------------
+ else -- \ddd -- various steps
+ local s = match(z, "^(%d%d?%d?)", j)
+ j = i + 1 + #s -- skip to location
+ local cv = s + 0
+ local cc = string.char(cv)
+ local p = find("\a\b\f\n\r\t\v", cc, 1, true)
+ if p then -- special escapes
+ s = "\\"..sub("abfnrtv", p, p)
+ elseif cv < 32 then -- normalized \ddd
+ if match(sub(z, j, j), "%d") then
+ -- if a digit follows, \ddd cannot be shortened
+ s = "\\"..s
+ else
+ s = "\\"..cv
+ end
+ elseif cc == delim then -- \<delim>
+ s = "\\"..cc
+ c_delim = c_delim + 1
+ elseif cc == "\\" then -- \\
+ s = "\\\\"
+ else -- literal character
+ s = cc
+ if cc == ndelim then
+ c_ndelim = c_ndelim + 1
+ end
+ end
+ z = sub(z, 1, i - 1)..s..sub(z, j)
+ i = i + #s
+ ------------------------------------------------------------
+ end--if p
+ ----------------------------------------------------------------
+ else-- c ~= "\\" -- <other> -- no change
+ i = i + 1
+ if c == ndelim then -- count ndelim, for switching delimiters
+ c_ndelim = c_ndelim + 1
+ end
+ ----------------------------------------------------------------
+ end--if c
+ end--while
+ --------------------------------------------------------------------
+ -- switching delimiters, a long-winded derivation:
+ -- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes
+ -- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes
+ -- simplifying the condition (1)>(2) --> c_delim > c_ndelim
+ if c_delim > c_ndelim then
+ i = 1
+ while i <= #z do
+ local p, q, r = find(z, "([\'\"])", i)
+ if not p then break end
+ if r == delim then -- \<delim> -> <delim>
+ z = sub(z, 1, p - 2)..sub(z, p)
+ i = p
+ else-- r == ndelim -- <ndelim> -> \<ndelim>
+ z = sub(z, 1, p - 1).."\\"..sub(z, p)
+ i = p + 2
+ end
+ end--while
+ delim = ndelim -- actually change delimiters
+ end
+ --------------------------------------------------------------------
+ z = delim..z..delim
+ if z ~= sinfos[I] then
+ if opt_details then
+ print("<string> (line "..stoklns[I]..") "..sinfos[I].." -> "..z)
+ opt_details = opt_details + 1
+ end
+ sinfos[I] = z
+ end
+end
+
+------------------------------------------------------------------------
+-- long string optimization
+-- * note: warning flagged if trailing whitespace found, not trimmed
+-- * remove first optional newline
+-- * normalize embedded newlines
+-- * reduce '=' separators in delimiters if possible
+------------------------------------------------------------------------
+
+local function do_lstring(I)
+ local info = sinfos[I]
+ local delim1 = match(info, "^%[=*%[") -- cut out delimiters
+ local sep = #delim1
+ local delim2 = sub(info, -sep, -1)
+ local z = sub(info, sep + 1, -(sep + 1)) -- lstring without delims
+ local y = ""
+ local i = 1
+ --------------------------------------------------------------------
+ while true do
+ local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
+ -- deal with a single line
+ local ln
+ if not p then
+ ln = sub(z, i)
+ elseif p >= i then
+ ln = sub(z, i, p - 1)
+ end
+ if ln ~= "" then
+ -- flag a warning if there are trailing spaces, won't optimize!
+ if match(ln, "%s+$") then
+ warn.LSTRING = "trailing whitespace in long string near line "..stoklns[I]
+ end
+ y = y..ln
+ end
+ if not p then -- done if no more EOLs
+ break
+ end
+ -- deal with line endings, normalize them
+ i = p + 1
+ if p then
+ if #s > 0 and r ~= s then -- skip CRLF or LFCR
+ i = i + 1
+ end
+ -- skip first newline, which can be safely deleted
+ if not(i == 1 and i == p) then
+ y = y.."\n"
+ end
+ end
+ end--while
+ --------------------------------------------------------------------
+ -- handle possible deletion of one or more '=' separators
+ if sep >= 3 then
+ local chk, okay = sep - 1
+ -- loop to test ending delimiter with less of '=' down to zero
+ while chk >= 2 do
+ local delim = "%]"..rep("=", chk - 2).."%]"
+ if not match(y, delim) then okay = chk end
+ chk = chk - 1
+ end
+ if okay then -- change delimiters
+ sep = rep("=", okay - 2)
+ delim1, delim2 = "["..sep.."[", "]"..sep.."]"
+ end
+ end
+ --------------------------------------------------------------------
+ sinfos[I] = delim1..y..delim2
+end
+
+------------------------------------------------------------------------
+-- long comment optimization
+-- * note: does not remove first optional newline
+-- * trim trailing whitespace
+-- * normalize embedded newlines
+-- * reduce '=' separators in delimiters if possible
+------------------------------------------------------------------------
+
+local function do_lcomment(I)
+ local info = sinfos[I]
+ local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
+ local sep = #delim1
+ local delim2 = sub(info, -(sep - 2), -1)
+ local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
+ local y = ""
+ local i = 1
+ --------------------------------------------------------------------
+ while true do
+ local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
+ -- deal with a single line, extract and check trailing whitespace
+ local ln
+ if not p then
+ ln = sub(z, i)
+ elseif p >= i then
+ ln = sub(z, i, p - 1)
+ end
+ if ln ~= "" then
+ -- trim trailing whitespace if non-empty line
+ local ws = match(ln, "%s*$")
+ if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end
+ y = y..ln
+ end
+ if not p then -- done if no more EOLs
+ break
+ end
+ -- deal with line endings, normalize them
+ i = p + 1
+ if p then
+ if #s > 0 and r ~= s then -- skip CRLF or LFCR
+ i = i + 1
+ end
+ y = y.."\n"
+ end
+ end--while
+ --------------------------------------------------------------------
+ -- handle possible deletion of one or more '=' separators
+ sep = sep - 2
+ if sep >= 3 then
+ local chk, okay = sep - 1
+ -- loop to test ending delimiter with less of '=' down to zero
+ while chk >= 2 do
+ local delim = "%]"..rep("=", chk - 2).."%]"
+ if not match(y, delim) then okay = chk end
+ chk = chk - 1
+ end
+ if okay then -- change delimiters
+ sep = rep("=", okay - 2)
+ delim1, delim2 = "--["..sep.."[", "]"..sep.."]"
+ end
+ end
+ --------------------------------------------------------------------
+ sinfos[I] = delim1..y..delim2
+end
+
+------------------------------------------------------------------------
+-- short comment optimization
+-- * trim trailing whitespace
+------------------------------------------------------------------------
+
+local function do_comment(i)
+ local info = sinfos[i]
+ local ws = match(info, "%s*$") -- just look from end of string
+ if #ws > 0 then
+ info = sub(info, 1, -(ws + 1)) -- trim trailing whitespace
+ end
+ sinfos[i] = info
+end
+
+------------------------------------------------------------------------
+-- returns true if string found in long comment
+-- * this is a feature to keep copyright or license texts
+------------------------------------------------------------------------
+
+local function keep_lcomment(opt_keep, info)
+ if not opt_keep then return false end -- option not set
+ local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
+ local sep = #delim1
+ local delim2 = sub(info, -sep, -1)
+ local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
+ if find(z, opt_keep, 1, true) then -- try to match
+ return true
+ end
+end
+
+------------------------------------------------------------------------
+-- main entry point
+-- * currently, lexer processing has 2 passes
+-- * processing is done on a line-oriented basis, which is easier to
+-- grok due to the next point...
+-- * since there are various options that can be enabled or disabled,
+-- processing is a little messy or convoluted
+------------------------------------------------------------------------
+
+function optimize(option, toklist, semlist, toklnlist)
+ --------------------------------------------------------------------
+ -- set option flags
+ --------------------------------------------------------------------
+ local opt_comments = option["opt-comments"]
+ local opt_whitespace = option["opt-whitespace"]
+ local opt_emptylines = option["opt-emptylines"]
+ local opt_eols = option["opt-eols"]
+ local opt_strings = option["opt-strings"]
+ local opt_numbers = option["opt-numbers"]
+ local opt_x = option["opt-experimental"]
+ local opt_keep = option.KEEP
+ opt_details = option.DETAILS and 0 -- upvalues for details display
+ print = print or base.print
+ if opt_eols then -- forced settings, otherwise won't work properly
+ opt_comments = true
+ opt_whitespace = true
+ opt_emptylines = true
+ elseif opt_x then
+ opt_whitespace = true
+ end
+ --------------------------------------------------------------------
+ -- variable initialization
+ --------------------------------------------------------------------
+ stoks, sinfos, stoklns -- set source lists
+ = toklist, semlist, toklnlist
+ local i = 1 -- token position
+ local tok, info -- current token
+ local prev -- position of last grammar token
+ -- on same line (for TK_SPACE stuff)
+ --------------------------------------------------------------------
+ -- changes a token, info pair
+ --------------------------------------------------------------------
+ local function settoken(tok, info, I)
+ I = I or i
+ stoks[I] = tok or ""
+ sinfos[I] = info or ""
+ end
+ --------------------------------------------------------------------
+ -- experimental optimization for ';' operator
+ --------------------------------------------------------------------
+ if opt_x then
+ while true do
+ tok, info = stoks[i], sinfos[i]
+ if tok == "TK_EOS" then -- end of stream/pass
+ break
+ elseif tok == "TK_OP" and info == ";" then
+ -- ';' operator found, since it is entirely optional, set it
+ -- as a space to let whitespace optimization do the rest
+ settoken("TK_SPACE", " ")
+ end
+ i = i + 1
+ end
+ repack_tokens()
+ end
+ --------------------------------------------------------------------
+ -- processing loop (PASS 1)
+ --------------------------------------------------------------------
+ i = 1
+ while true do
+ tok, info = stoks[i], sinfos[i]
+ ----------------------------------------------------------------
+ local atstart = atlinestart(i) -- set line begin flag
+ if atstart then prev = nil end
+ ----------------------------------------------------------------
+ if tok == "TK_EOS" then -- end of stream/pass
+ break
+ ----------------------------------------------------------------
+ elseif tok == "TK_KEYWORD" or -- keywords, identifiers,
+ tok == "TK_NAME" or -- operators
+ tok == "TK_OP" then
+ -- TK_KEYWORD and TK_OP can't be optimized without a big
+ -- optimization framework; it would be more of an optimizing
+ -- compiler, not a source code compressor
+ -- TK_NAME that are locals needs parser to analyze/optimize
+ prev = i
+ ----------------------------------------------------------------
+ elseif tok == "TK_NUMBER" then -- numbers
+ if opt_numbers then
+ do_number(i) -- optimize
+ end
+ prev = i
+ ----------------------------------------------------------------
+ elseif tok == "TK_STRING" or -- strings, long strings
+ tok == "TK_LSTRING" then
+ if opt_strings then
+ if tok == "TK_STRING" then
+ do_string(i) -- optimize
+ else
+ do_lstring(i) -- optimize
+ end
+ end
+ prev = i
+ ----------------------------------------------------------------
+ elseif tok == "TK_COMMENT" then -- short comments
+ if opt_comments then
+ if i == 1 and sub(info, 1, 1) == "#" then
+ -- keep shbang comment, trim whitespace
+ do_comment(i)
+ else
+ -- safe to delete, as a TK_EOL (or TK_EOS) always follows
+ settoken() -- remove entirely
+ end
+ elseif opt_whitespace then -- trim whitespace only
+ do_comment(i)
+ end
+ ----------------------------------------------------------------
+ elseif tok == "TK_LCOMMENT" then -- long comments
+ if keep_lcomment(opt_keep, info) then
+ ------------------------------------------------------------
+ -- if --keep, we keep a long comment if <msg> is found;
+ -- this is a feature to keep copyright or license texts
+ if opt_whitespace then -- trim whitespace only
+ do_lcomment(i)
+ end
+ prev = i
+ elseif opt_comments then
+ local eols = commenteols(info)
+ ------------------------------------------------------------
+ -- prepare opt_emptylines case first, if a disposable token
+ -- follows, current one is safe to dump, else keep a space;
+ -- it is implied that the operation is safe for '-', because
+ -- current is a TK_LCOMMENT, and must be separate from a '-'
+ if is_faketoken[stoks[i + 1]] then
+ settoken() -- remove entirely
+ tok = ""
+ else
+ settoken("TK_SPACE", " ")
+ end
+ ------------------------------------------------------------
+ -- if there are embedded EOLs to keep and opt_emptylines is
+ -- disabled, then switch the token into one or more EOLs
+ if not opt_emptylines and eols > 0 then
+ settoken("TK_EOL", rep("\n", eols))
+ end
+ ------------------------------------------------------------
+ -- if optimizing whitespaces, force reinterpretation of the
+ -- token to give a chance for the space to be optimized away
+ if opt_whitespace and tok ~= "" then
+ i = i - 1 -- to reinterpret
+ end
+ ------------------------------------------------------------
+ else -- disabled case
+ if opt_whitespace then -- trim whitespace only
+ do_lcomment(i)
+ end
+ prev = i
+ end
+ ----------------------------------------------------------------
+ elseif tok == "TK_EOL" then -- line endings
+ if atstart and opt_emptylines then
+ settoken() -- remove entirely
+ elseif info == "\r\n" or info == "\n\r" then
+ -- normalize the rest of the EOLs for CRLF/LFCR only
+ -- (note that TK_LCOMMENT can change into several EOLs)
+ settoken("TK_EOL", "\n")
+ end
+ ----------------------------------------------------------------
+ elseif tok == "TK_SPACE" then -- whitespace
+ if opt_whitespace then
+ if atstart or atlineend(i) then
+ -- delete leading and trailing whitespace
+ settoken() -- remove entirely
+ else
+ ------------------------------------------------------------
+ -- at this point, since leading whitespace have been removed,
+ -- there should be a either a real token or a TK_LCOMMENT
+ -- prior to hitting this whitespace; the TK_LCOMMENT case
+ -- only happens if opt_comments is disabled; so prev ~= nil
+ local ptok = stoks[prev]
+ if ptok == "TK_LCOMMENT" then
+ -- previous TK_LCOMMENT can abut with anything
+ settoken() -- remove entirely
+ else
+ -- prev must be a grammar token; consecutive TK_SPACE
+ -- tokens is impossible when optimizing whitespace
+ local ntok = stoks[i + 1]
+ if is_faketoken[ntok] then
+ -- handle special case where a '-' cannot abut with
+ -- either a short comment or a long comment
+ if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and
+ ptok == "TK_OP" and sinfos[prev] == "-" then
+ -- keep token
+ else
+ settoken() -- remove entirely
+ end
+ else--is_realtoken
+ -- check a pair of grammar tokens, if can abut, then
+ -- delete space token entirely, otherwise keep one space
+ local s = checkpair(prev, i + 1)
+ if s == "" then
+ settoken() -- remove entirely
+ else
+ settoken("TK_SPACE", " ")
+ end
+ end
+ end
+ ------------------------------------------------------------
+ end
+ end
+ ----------------------------------------------------------------
+ else
+ error("unidentified token encountered")
+ end
+ ----------------------------------------------------------------
+ i = i + 1
+ end--while
+ repack_tokens()
+ --------------------------------------------------------------------
+ -- processing loop (PASS 2)
+ --------------------------------------------------------------------
+ if opt_eols then
+ i = 1
+ -- aggressive EOL removal only works with most non-grammar tokens
+ -- optimized away because it is a rather simple scheme -- basically
+ -- it just checks 'real' token pairs around EOLs
+ if stoks[1] == "TK_COMMENT" then
+ -- first comment still existing must be shbang, skip whole line
+ i = 3
+ end
+ while true do
+ tok, info = stoks[i], sinfos[i]
+ --------------------------------------------------------------
+ if tok == "TK_EOS" then -- end of stream/pass
+ break
+ --------------------------------------------------------------
+ elseif tok == "TK_EOL" then -- consider each TK_EOL
+ local t1, t2 = stoks[i - 1], stoks[i + 1]
+ if is_realtoken[t1] and is_realtoken[t2] then -- sanity check
+ local s = checkpair(i - 1, i + 1)
+ if s == "" or t2 == "TK_EOS" then
+ settoken() -- remove entirely
+ end
+ end
+ end--if tok
+ --------------------------------------------------------------
+ i = i + 1
+ end--while
+ repack_tokens()
+ end
+ --------------------------------------------------------------------
+ if opt_details and opt_details > 0 then print() end -- spacing
+ return stoks, sinfos, stoklns
+end
+--end of inserted module
+end
+
+-- preload function for module optparser
+preload.optparser =
+function()
+--start of inserted module
+module "optparser"
+
+local string = base.require "string"
+local table = base.require "table"
+
+----------------------------------------------------------------------
+-- Letter frequencies for reducing symbol entropy (fixed version)
+-- * Might help a wee bit when the output file is compressed
+-- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies
+-- * We use letter frequencies according to a Linotype keyboard, plus
+-- the underscore, and both lower case and upper case letters.
+-- * The arrangement below (LC, underscore, %d, UC) is arbitrary.
+-- * This is certainly not optimal, but is quick-and-dirty and the
+-- process has no significant overhead
+----------------------------------------------------------------------
+
+local LETTERS = "etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ"
+local ALPHANUM = "etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ"
+
+-- names or identifiers that must be skipped
+-- * the first two lines are for keywords
+local SKIP_NAME = {}
+for v in string.gmatch([[
+and break do else elseif end false for function if in
+local nil not or repeat return then true until while
+self]], "%S+") do
+ SKIP_NAME[v] = true
+end
+
+------------------------------------------------------------------------
+-- variables and data structures
+------------------------------------------------------------------------
+
+local toklist, seminfolist, -- token lists (lexer output)
+ tokpar, seminfopar, xrefpar, -- token lists (parser output)
+ globalinfo, localinfo, -- variable information tables
+ statinfo, -- statment type table
+ globaluniq, localuniq, -- unique name tables
+ var_new, -- index of new variable names
+ varlist -- list of output variables
+
+----------------------------------------------------------------------
+-- preprocess information table to get lists of unique names
+----------------------------------------------------------------------
+
+local function preprocess(infotable)
+ local uniqtable = {}
+ for i = 1, #infotable do -- enumerate info table
+ local obj = infotable[i]
+ local name = obj.name
+ --------------------------------------------------------------------
+ if not uniqtable[name] then -- not found, start an entry
+ uniqtable[name] = {
+ decl = 0, token = 0, size = 0,
+ }
+ end
+ --------------------------------------------------------------------
+ local uniq = uniqtable[name] -- count declarations, tokens, size
+ uniq.decl = uniq.decl + 1
+ local xref = obj.xref
+ local xcount = #xref
+ uniq.token = uniq.token + xcount
+ uniq.size = uniq.size + xcount * #name
+ --------------------------------------------------------------------
+ if obj.decl then -- if local table, create first,last pairs
+ obj.id = i
+ obj.xcount = xcount
+ if xcount > 1 then -- if ==1, means local never accessed
+ obj.first = xref[2]
+ obj.last = xref[xcount]
+ end
+ --------------------------------------------------------------------
+ else -- if global table, add a back ref
+ uniq.id = i
+ end
+ --------------------------------------------------------------------
+ end--for
+ return uniqtable
+end
+
+----------------------------------------------------------------------
+-- calculate actual symbol frequencies, in order to reduce entropy
+-- * this may help further reduce the size of compressed sources
+-- * note that since parsing optimizations is put before lexing
+-- optimizations, the frequency table is not exact!
+-- * yes, this will miss --keep block comments too...
+----------------------------------------------------------------------
+
+local function recalc_for_entropy(option)
+ local byte = string.byte
+ local char = string.char
+ -- table of token classes to accept in calculating symbol frequency
+ local ACCEPT = {
+ TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true,
+ TK_STRING = true, TK_LSTRING = true,
+ }
+ if not option["opt-comments"] then
+ ACCEPT.TK_COMMENT = true
+ ACCEPT.TK_LCOMMENT = true
+ end
+ --------------------------------------------------------------------
+ -- create a new table and remove any original locals by filtering
+ --------------------------------------------------------------------
+ local filtered = {}
+ for i = 1, #toklist do
+ filtered[i] = seminfolist[i]
+ end
+ for i = 1, #localinfo do -- enumerate local info table
+ local obj = localinfo[i]
+ local xref = obj.xref
+ for j = 1, obj.xcount do
+ local p = xref[j]
+ filtered[p] = "" -- remove locals
+ end
+ end
+ --------------------------------------------------------------------
+ local freq = {} -- reset symbol frequency table
+ for i = 0, 255 do freq[i] = 0 end
+ for i = 1, #toklist do -- gather symbol frequency
+ local tok, info = toklist[i], filtered[i]
+ if ACCEPT[tok] then
+ for j = 1, #info do
+ local c = byte(info, j)
+ freq[c] = freq[c] + 1
+ end
+ end--if
+ end--for
+ --------------------------------------------------------------------
+ -- function to re-sort symbols according to actual frequencies
+ --------------------------------------------------------------------
+ local function resort(symbols)
+ local symlist = {}
+ for i = 1, #symbols do -- prepare table to sort
+ local c = byte(symbols, i)
+ symlist[i] = { c = c, freq = freq[c], }
+ end
+ table.sort(symlist, -- sort selected symbols
+ function(v1, v2)
+ return v1.freq > v2.freq
+ end
+ )
+ local charlist = {} -- reconstitute the string
+ for i = 1, #symlist do
+ charlist[i] = char(symlist[i].c)
+ end
+ return table.concat(charlist)
+ end
+ --------------------------------------------------------------------
+ LETTERS = resort(LETTERS) -- change letter arrangement
+ ALPHANUM = resort(ALPHANUM)
+end
+
+----------------------------------------------------------------------
+-- returns a string containing a new local variable name to use, and
+-- a flag indicating whether it collides with a global variable
+-- * trapping keywords and other names like 'self' is done elsewhere
+----------------------------------------------------------------------
+
+local function new_var_name()
+ local var
+ local cletters, calphanum = #LETTERS, #ALPHANUM
+ local v = var_new
+ if v < cletters then -- single char
+ v = v + 1
+ var = string.sub(LETTERS, v, v)
+ else -- longer names
+ local range, sz = cletters, 1 -- calculate # chars fit
+ repeat
+ v = v - range
+ range = range * calphanum
+ sz = sz + 1
+ until range > v
+ local n = v % cletters -- left side cycles faster
+ v = (v - n) / cletters -- do first char first
+ n = n + 1
+ var = string.sub(LETTERS, n, n)
+ while sz > 1 do
+ local m = v % calphanum
+ v = (v - m) / calphanum
+ m = m + 1
+ var = var..string.sub(ALPHANUM, m, m)
+ sz = sz - 1
+ end
+ end
+ var_new = var_new + 1
+ return var, globaluniq[var] ~= nil
+end
+
+----------------------------------------------------------------------
+-- calculate and print some statistics
+-- * probably better in main source, put here for now
+----------------------------------------------------------------------
+
+local function stats_summary(globaluniq, localuniq, afteruniq, option)
+ local print = print or base.print
+ local fmt = string.format
+ local opt_details = option.DETAILS
+ if option.QUIET then return end
+ local uniq_g , uniq_li, uniq_lo, uniq_ti, uniq_to, -- stats needed
+ decl_g, decl_li, decl_lo, decl_ti, decl_to,
+ token_g, token_li, token_lo, token_ti, token_to,
+ size_g, size_li, size_lo, size_ti, size_to
+ = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ local function avg(c, l) -- safe average function
+ if c == 0 then return 0 end
+ return l / c
+ end
+ --------------------------------------------------------------------
+ -- collect statistics (note: globals do not have declarations!)
+ --------------------------------------------------------------------
+ for name, uniq in base.pairs(globaluniq) do
+ uniq_g = uniq_g + 1
+ token_g = token_g + uniq.token
+ size_g = size_g + uniq.size
+ end
+ for name, uniq in base.pairs(localuniq) do
+ uniq_li = uniq_li + 1
+ decl_li = decl_li + uniq.decl
+ token_li = token_li + uniq.token
+ size_li = size_li + uniq.size
+ end
+ for name, uniq in base.pairs(afteruniq) do
+ uniq_lo = uniq_lo + 1
+ decl_lo = decl_lo + uniq.decl
+ token_lo = token_lo + uniq.token
+ size_lo = size_lo + uniq.size
+ end
+ uniq_ti = uniq_g + uniq_li
+ decl_ti = decl_g + decl_li
+ token_ti = token_g + token_li
+ size_ti = size_g + size_li
+ uniq_to = uniq_g + uniq_lo
+ decl_to = decl_g + decl_lo
+ token_to = token_g + token_lo
+ size_to = size_g + size_lo
+ --------------------------------------------------------------------
+ -- detailed stats: global list
+ --------------------------------------------------------------------
+ if opt_details then
+ local sorted = {} -- sort table of unique global names by size
+ for name, uniq in base.pairs(globaluniq) do
+ uniq.name = name
+ sorted[#sorted + 1] = uniq
+ end
+ table.sort(sorted,
+ function(v1, v2)
+ return v1.size > v2.size
+ end
+ )
+ local tabf1, tabf2 = "%8s%8s%10s %s", "%8d%8d%10.2f %s"
+ local hl = string.rep("-", 44)
+ print("*** global variable list (sorted by size) ***\n"..hl)
+ print(fmt(tabf1, "Token", "Input", "Input", "Global"))
+ print(fmt(tabf1, "Count", "Bytes", "Average", "Name"))
+ print(hl)
+ for i = 1, #sorted do
+ local uniq = sorted[i]
+ print(fmt(tabf2, uniq.token, uniq.size, avg(uniq.token, uniq.size), uniq.name))
+ end
+ print(hl)
+ print(fmt(tabf2, token_g, size_g, avg(token_g, size_g), "TOTAL"))
+ print(hl.."\n")
+ --------------------------------------------------------------------
+ -- detailed stats: local list
+ --------------------------------------------------------------------
+ local tabf1, tabf2 = "%8s%8s%8s%10s%8s%10s %s", "%8d%8d%8d%10.2f%8d%10.2f %s"
+ local hl = string.rep("-", 70)
+ print("*** local variable list (sorted by allocation order) ***\n"..hl)
+ print(fmt(tabf1, "Decl.", "Token", "Input", "Input", "Output", "Output", "Global"))
+ print(fmt(tabf1, "Count", "Count", "Bytes", "Average", "Bytes", "Average", "Name"))
+ print(hl)
+ for i = 1, #varlist do -- iterate according to order assigned
+ local name = varlist[i]
+ local uniq = afteruniq[name]
+ local old_t, old_s = 0, 0
+ for j = 1, #localinfo do -- find corresponding old names and calculate
+ local obj = localinfo[j]
+ if obj.name == name then
+ old_t = old_t + obj.xcount
+ old_s = old_s + obj.xcount * #obj.oldname
+ end
+ end
+ print(fmt(tabf2, uniq.decl, uniq.token, old_s, avg(old_t, old_s),
+ uniq.size, avg(uniq.token, uniq.size), name))
+ end
+ print(hl)
+ print(fmt(tabf2, decl_lo, token_lo, size_li, avg(token_li, size_li),
+ size_lo, avg(token_lo, size_lo), "TOTAL"))
+ print(hl.."\n")
+ end--if opt_details
+ --------------------------------------------------------------------
+ -- display output
+ --------------------------------------------------------------------
+ local tabf1, tabf2 = "%-16s%8s%8s%8s%8s%10s", "%-16s%8d%8d%8d%8d%10.2f"
+ local hl = string.rep("-", 58)
+ print("*** local variable optimization summary ***\n"..hl)
+ print(fmt(tabf1, "Variable", "Unique", "Decl.", "Token", "Size", "Average"))
+ print(fmt(tabf1, "Types", "Names", "Count", "Count", "Bytes", "Bytes"))
+ print(hl)
+ print(fmt(tabf2, "Global", uniq_g, decl_g, token_g, size_g, avg(token_g, size_g)))
+ print(hl)
+ print(fmt(tabf2, "Local (in)", uniq_li, decl_li, token_li, size_li, avg(token_li, size_li)))
+ print(fmt(tabf2, "TOTAL (in)", uniq_ti, decl_ti, token_ti, size_ti, avg(token_ti, size_ti)))
+ print(hl)
+ print(fmt(tabf2, "Local (out)", uniq_lo, decl_lo, token_lo, size_lo, avg(token_lo, size_lo)))
+ print(fmt(tabf2, "TOTAL (out)", uniq_to, decl_to, token_to, size_to, avg(token_to, size_to)))
+ print(hl.."\n")
+end
+
+----------------------------------------------------------------------
+-- experimental optimization for f("string") statements
+-- * safe to delete parentheses without adding whitespace, as both
+-- kinds of strings can abut with anything else
+----------------------------------------------------------------------
+
+local function optimize_func1()
+ ------------------------------------------------------------------
+ local function is_strcall(j) -- find f("string") pattern
+ local t1 = tokpar[j + 1] or ""
+ local t2 = tokpar[j + 2] or ""
+ local t3 = tokpar[j + 3] or ""
+ if t1 == "(" and t2 == "<string>" and t3 == ")" then
+ return true
+ end
+ end
+ ------------------------------------------------------------------
+ local del_list = {} -- scan for function pattern,
+ local i = 1 -- tokens to be deleted are marked
+ while i <= #tokpar do
+ local id = statinfo[i]
+ if id == "call" and is_strcall(i) then -- found & mark ()
+ del_list[i + 1] = true -- '('
+ del_list[i + 3] = true -- ')'
+ i = i + 3
+ end
+ i = i + 1
+ end
+ ------------------------------------------------------------------
+ -- delete a token and adjust all relevant tables
+ -- * currently invalidates globalinfo and localinfo (not updated),
+ -- so any other optimization is done after processing locals
+ -- (of course, we can also lex the source data again...)
+ -- * faster one-pass token deletion
+ ------------------------------------------------------------------
+ local i, dst, idend = 1, 1, #tokpar
+ local del_list2 = {}
+ while dst <= idend do -- process parser tables
+ if del_list[i] then -- found a token to delete?
+ del_list2[xrefpar[i]] = true
+ i = i + 1
+ end
+ if i > dst then
+ if i <= idend then -- shift table items lower
+ tokpar[dst] = tokpar[i]
+ seminfopar[dst] = seminfopar[i]
+ xrefpar[dst] = xrefpar[i] - (i - dst)
+ statinfo[dst] = statinfo[i]
+ else -- nil out excess entries
+ tokpar[dst] = nil
+ seminfopar[dst] = nil
+ xrefpar[dst] = nil
+ statinfo[dst] = nil
+ end
+ end
+ i = i + 1
+ dst = dst + 1
+ end
+ local i, dst, idend = 1, 1, #toklist
+ while dst <= idend do -- process lexer tables
+ if del_list2[i] then -- found a token to delete?
+ i = i + 1
+ end
+ if i > dst then
+ if i <= idend then -- shift table items lower
+ toklist[dst] = toklist[i]
+ seminfolist[dst] = seminfolist[i]
+ else -- nil out excess entries
+ toklist[dst] = nil
+ seminfolist[dst] = nil
+ end
+ end
+ i = i + 1
+ dst = dst + 1
+ end
+end
+
+----------------------------------------------------------------------
+-- local variable optimization
+----------------------------------------------------------------------
+
+local function optimize_locals(option)
+ var_new = 0 -- reset variable name allocator
+ varlist = {}
+ ------------------------------------------------------------------
+ -- preprocess global/local tables, handle entropy reduction
+ ------------------------------------------------------------------
+ globaluniq = preprocess(globalinfo)
+ localuniq = preprocess(localinfo)
+ if option["opt-entropy"] then -- for entropy improvement
+ recalc_for_entropy(option)
+ end
+ ------------------------------------------------------------------
+ -- build initial declared object table, then sort according to
+ -- token count, this might help assign more tokens to more common
+ -- variable names such as 'e' thus possibly reducing entropy
+ -- * an object knows its localinfo index via its 'id' field
+ -- * special handling for "self" special local (parameter) here
+ ------------------------------------------------------------------
+ local object = {}
+ for i = 1, #localinfo do
+ object[i] = localinfo[i]
+ end
+ table.sort(object, -- sort largest first
+ function(v1, v2)
+ return v1.xcount > v2.xcount
+ end
+ )
+ ------------------------------------------------------------------
+ -- the special "self" function parameters must be preserved
+ -- * the allocator below will never use "self", so it is safe to
+ -- keep those implicit declarations as-is
+ ------------------------------------------------------------------
+ local temp, j, gotself = {}, 1, false
+ for i = 1, #object do
+ local obj = object[i]
+ if not obj.isself then
+ temp[j] = obj
+ j = j + 1
+ else
+ gotself = true
+ end
+ end
+ object = temp
+ ------------------------------------------------------------------
+ -- a simple first-come first-served heuristic name allocator,
+ -- note that this is in no way optimal...
+ -- * each object is a local variable declaration plus existence
+ -- * the aim is to assign short names to as many tokens as possible,
+ -- so the following tries to maximize name reuse
+ -- * note that we preserve sort order
+ ------------------------------------------------------------------
+ local nobject = #object
+ while nobject > 0 do
+ local varname, gcollide
+ repeat
+ varname, gcollide = new_var_name() -- collect a variable name
+ until not SKIP_NAME[varname] -- skip all special names
+ varlist[#varlist + 1] = varname -- keep a list
+ local oleft = nobject
+ ------------------------------------------------------------------
+ -- if variable name collides with an existing global, the name
+ -- cannot be used by a local when the name is accessed as a global
+ -- during which the local is alive (between 'act' to 'rem'), so
+ -- we drop objects that collides with the corresponding global
+ ------------------------------------------------------------------
+ if gcollide then
+ -- find the xref table of the global
+ local gref = globalinfo[globaluniq[varname].id].xref
+ local ngref = #gref
+ -- enumerate for all current objects; all are valid at this point
+ for i = 1, nobject do
+ local obj = object[i]
+ local act, rem = obj.act, obj.rem -- 'live' range of local
+ -- if rem < 0, it is a -id to a local that had the same name
+ -- so follow rem to extend it; does this make sense?
+ while rem < 0 do
+ rem = localinfo[-rem].rem
+ end
+ local drop
+ for j = 1, ngref do
+ local p = gref[j]
+ if p >= act and p <= rem then drop = true end -- in range?
+ end
+ if drop then
+ obj.skip = true
+ oleft = oleft - 1
+ end
+ end--for
+ end--if gcollide
+ ------------------------------------------------------------------
+ -- now the first unassigned local (since it's sorted) will be the
+ -- one with the most tokens to rename, so we set this one and then
+ -- eliminate all others that collides, then any locals that left
+ -- can then reuse the same variable name; this is repeated until
+ -- all local declaration that can use this name is assigned
+ -- * the criteria for local-local reuse/collision is:
+ -- A is the local with a name already assigned
+ -- B is the unassigned local under consideration
+ -- => anytime A is accessed, it cannot be when B is 'live'
+ -- => to speed up things, we have first/last accesses noted
+ ------------------------------------------------------------------
+ while oleft > 0 do
+ local i = 1
+ while object[i].skip do -- scan for first object
+ i = i + 1
+ end
+ ------------------------------------------------------------------
+ -- first object is free for assignment of the variable name
+ -- [first,last] gives the access range for collision checking
+ ------------------------------------------------------------------
+ oleft = oleft - 1
+ local obja = object[i]
+ i = i + 1
+ obja.newname = varname
+ obja.skip = true
+ obja.done = true
+ local first, last = obja.first, obja.last
+ local xref = obja.xref
+ ------------------------------------------------------------------
+ -- then, scan all the rest and drop those colliding
+ -- if A was never accessed then it'll never collide with anything
+ -- otherwise trivial skip if:
+ -- * B was activated after A's last access (last < act)
+ -- * B was removed before A's first access (first > rem)
+ -- if not, see detailed skip below...
+ ------------------------------------------------------------------
+ if first and oleft > 0 then -- must have at least 1 access
+ local scanleft = oleft
+ while scanleft > 0 do
+ while object[i].skip do -- next valid object
+ i = i + 1
+ end
+ scanleft = scanleft - 1
+ local objb = object[i]
+ i = i + 1
+ local act, rem = objb.act, objb.rem -- live range of B
+ -- if rem < 0, extend range of rem thru' following local
+ while rem < 0 do
+ rem = localinfo[-rem].rem
+ end
+ --------------------------------------------------------
+ if not(last < act or first > rem) then -- possible collision
+ --------------------------------------------------------
+ -- B is activated later than A or at the same statement,
+ -- this means for no collision, A cannot be accessed when B
+ -- is alive, since B overrides A (or is a peer)
+ --------------------------------------------------------
+ if act >= obja.act then
+ for j = 1, obja.xcount do -- ... then check every access
+ local p = xref[j]
+ if p >= act and p <= rem then -- A accessed when B live!
+ oleft = oleft - 1
+ objb.skip = true
+ break
+ end
+ end--for
+ --------------------------------------------------------
+ -- A is activated later than B, this means for no collision,
+ -- A's access is okay since it overrides B, but B's last
+ -- access need to be earlier than A's activation time
+ --------------------------------------------------------
+ else
+ if objb.last and objb.last >= obja.act then
+ oleft = oleft - 1
+ objb.skip = true
+ end
+ end
+ end
+ --------------------------------------------------------
+ if oleft == 0 then break end
+ end
+ end--if first
+ ------------------------------------------------------------------
+ end--while
+ ------------------------------------------------------------------
+ -- after assigning all possible locals to one variable name, the
+ -- unassigned locals/objects have the skip field reset and the table
+ -- is compacted, to hopefully reduce iteration time
+ ------------------------------------------------------------------
+ local temp, j = {}, 1
+ for i = 1, nobject do
+ local obj = object[i]
+ if not obj.done then
+ obj.skip = false
+ temp[j] = obj
+ j = j + 1
+ end
+ end
+ object = temp -- new compacted object table
+ nobject = #object -- objects left to process
+ ------------------------------------------------------------------
+ end--while
+ ------------------------------------------------------------------
+ -- after assigning all locals with new variable names, we can
+ -- patch in the new names, and reprocess to get 'after' stats
+ ------------------------------------------------------------------
+ for i = 1, #localinfo do -- enumerate all locals
+ local obj = localinfo[i]
+ local xref = obj.xref
+ if obj.newname then -- if got new name, patch it in
+ for j = 1, obj.xcount do
+ local p = xref[j] -- xrefs indexes the token list
+ seminfolist[p] = obj.newname
+ end
+ obj.name, obj.oldname -- adjust names
+ = obj.newname, obj.name
+ else
+ obj.oldname = obj.name -- for cases like 'self'
+ end
+ end
+ ------------------------------------------------------------------
+ -- deal with statistics output
+ ------------------------------------------------------------------
+ if gotself then -- add 'self' to end of list
+ varlist[#varlist + 1] = "self"
+ end
+ local afteruniq = preprocess(localinfo)
+ stats_summary(globaluniq, localuniq, afteruniq, option)
+end
+
+
+----------------------------------------------------------------------
+-- main entry point
+----------------------------------------------------------------------
+
+function optimize(option, _toklist, _seminfolist, xinfo)
+ -- set tables
+ toklist, seminfolist -- from lexer
+ = _toklist, _seminfolist
+ tokpar, seminfopar, xrefpar -- from parser
+ = xinfo.toklist, xinfo.seminfolist, xinfo.xreflist
+ globalinfo, localinfo, statinfo -- from parser
+ = xinfo.globalinfo, xinfo.localinfo, xinfo.statinfo
+ ------------------------------------------------------------------
+ -- optimize locals
+ ------------------------------------------------------------------
+ if option["opt-locals"] then
+ optimize_locals(option)
+ end
+ ------------------------------------------------------------------
+ -- other optimizations
+ ------------------------------------------------------------------
+ if option["opt-experimental"] then -- experimental
+ optimize_func1()
+ -- WARNING globalinfo and localinfo now invalidated!
+ end
+end
+--end of inserted module
+end
+
+-- preload function for module equiv
+preload.equiv =
+function()
+--start of inserted module
+module "equiv"
+
+local string = base.require "string"
+local loadstring = base.loadstring
+local sub = string.sub
+local match = string.match
+local dump = string.dump
+local byte = string.byte
+
+--[[--------------------------------------------------------------------
+-- variable and data initialization
+----------------------------------------------------------------------]]
+
+local is_realtoken = { -- significant (grammar) tokens
+ TK_KEYWORD = true,
+ TK_NAME = true,
+ TK_NUMBER = true,
+ TK_STRING = true,
+ TK_LSTRING = true,
+ TK_OP = true,
+ TK_EOS = true,
+}
+
+local option, llex, warn
+
+--[[--------------------------------------------------------------------
+-- functions
+----------------------------------------------------------------------]]
+
+------------------------------------------------------------------------
+-- initialization function
+------------------------------------------------------------------------
+
+function init(_option, _llex, _warn)
+ option = _option
+ llex = _llex
+ warn = _warn
+end
+
+------------------------------------------------------------------------
+-- function to build lists containing a 'normal' lexer stream
+------------------------------------------------------------------------
+
+local function build_stream(s)
+ llex.init(s)
+ llex.llex()
+ local stok, sseminfo -- source list (with whitespace elements)
+ = llex.tok, llex.seminfo
+ local tok, seminfo -- processed list (real elements only)
+ = {}, {}
+ for i = 1, #stok do
+ local t = stok[i]
+ if is_realtoken[t] then
+ tok[#tok + 1] = t
+ seminfo[#seminfo + 1] = sseminfo[i]
+ end
+ end--for
+ return tok, seminfo
+end
+
+------------------------------------------------------------------------
+-- test source (lexer stream) equivalence
+------------------------------------------------------------------------
+
+function source(z, dat)
+ --------------------------------------------------------------------
+ -- function to return a dumped string for seminfo compares
+ --------------------------------------------------------------------
+ local function dumpsem(s)
+ local sf = loadstring("return "..s, "z")
+ if sf then
+ return dump(sf)
+ end
+ end
+ --------------------------------------------------------------------
+ -- mark and optionally report non-equivalence
+ --------------------------------------------------------------------
+ local function bork(msg)
+ if option.DETAILS then base.print("SRCEQUIV: "..msg) end
+ warn.SRC_EQUIV = true
+ end
+ --------------------------------------------------------------------
+ -- get lexer streams for both source strings, compare
+ --------------------------------------------------------------------
+ local tok1, seminfo1 = build_stream(z) -- original
+ local tok2, seminfo2 = build_stream(dat) -- compressed
+ --------------------------------------------------------------------
+ -- compare shbang lines ignoring EOL
+ --------------------------------------------------------------------
+ local sh1 = match(z, "^(#[^\r\n]*)")
+ local sh2 = match(dat, "^(#[^\r\n]*)")
+ if sh1 or sh2 then
+ if not sh1 or not sh2 or sh1 ~= sh2 then
+ bork("shbang lines different")
+ end
+ end
+ --------------------------------------------------------------------
+ -- compare by simple count
+ --------------------------------------------------------------------
+ if #tok1 ~= #tok2 then
+ bork("count "..#tok1.." "..#tok2)
+ return
+ end
+ --------------------------------------------------------------------
+ -- compare each element the best we can
+ --------------------------------------------------------------------
+ for i = 1, #tok1 do
+ local t1, t2 = tok1[i], tok2[i]
+ local s1, s2 = seminfo1[i], seminfo2[i]
+ if t1 ~= t2 then -- by type
+ bork("type ["..i.."] "..t1.." "..t2)
+ break
+ end
+ if t1 == "TK_KEYWORD" or t1 == "TK_NAME" or t1 == "TK_OP" then
+ if t1 == "TK_NAME" and option["opt-locals"] then
+ -- can't compare identifiers of locals that are optimized
+ elseif s1 ~= s2 then -- by semantic info (simple)
+ bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2)
+ break
+ end
+ elseif t1 == "TK_EOS" then
+ -- no seminfo to compare
+ else-- "TK_NUMBER" or "TK_STRING" or "TK_LSTRING"
+ -- compare 'binary' form, so dump a function
+ local s1b,s2b = dumpsem(s1), dumpsem(s2)
+ if not s1b or not s2b or s1b ~= s2b then
+ bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2)
+ break
+ end
+ end
+ end--for
+ --------------------------------------------------------------------
+ -- successful comparison if end is reached with no borks
+ --------------------------------------------------------------------
+end
+
+------------------------------------------------------------------------
+-- test binary chunk equivalence
+------------------------------------------------------------------------
+
+function binary(z, dat)
+ local TNIL = 0
+ local TBOOLEAN = 1
+ local TNUMBER = 3
+ local TSTRING = 4
+ --------------------------------------------------------------------
+ -- mark and optionally report non-equivalence
+ --------------------------------------------------------------------
+ local function bork(msg)
+ if option.DETAILS then base.print("BINEQUIV: "..msg) end
+ warn.BIN_EQUIV = true
+ end
+ --------------------------------------------------------------------
+ -- function to remove shbang line so that loadstring runs
+ --------------------------------------------------------------------
+ local function zap_shbang(s)
+ local shbang = match(s, "^(#[^\r\n]*\r?\n?)")
+ if shbang then -- cut out shbang
+ s = sub(s, #shbang + 1)
+ end
+ return s
+ end
+ --------------------------------------------------------------------
+ -- attempt to compile, then dump to get binary chunk string
+ --------------------------------------------------------------------
+ local cz = loadstring(zap_shbang(z), "z")
+ if not cz then
+ bork("failed to compile original sources for binary chunk comparison")
+ return
+ end
+ local cdat = loadstring(zap_shbang(dat), "z")
+ if not cdat then
+ bork("failed to compile compressed result for binary chunk comparison")
+ end
+ -- if loadstring() works, dump assuming string.dump() is error-free
+ local c1 = { i = 1, dat = dump(cz) }
+ c1.len = #c1.dat
+ local c2 = { i = 1, dat = dump(cdat) }
+ c2.len = #c2.dat
+ --------------------------------------------------------------------
+ -- support functions to handle binary chunk reading
+ --------------------------------------------------------------------
+ local endian,
+ sz_int, sz_sizet, -- sizes of data types
+ sz_inst, sz_number,
+ getint, getsizet
+ --------------------------------------------------------------------
+ local function ensure(c, sz) -- check if bytes exist
+ if c.i + sz - 1 > c.len then return end
+ return true
+ end
+ --------------------------------------------------------------------
+ local function skip(c, sz) -- skip some bytes
+ if not sz then sz = 1 end
+ c.i = c.i + sz
+ end
+ --------------------------------------------------------------------
+ local function getbyte(c) -- return a byte value
+ local i = c.i
+ if i > c.len then return end
+ local d = sub(c.dat, i, i)
+ c.i = i + 1
+ return byte(d)
+ end
+ --------------------------------------------------------------------
+ local function getint_l(c) -- return an int value (little-endian)
+ local n, scale = 0, 1
+ if not ensure(c, sz_int) then return end
+ for j = 1, sz_int do
+ n = n + scale * getbyte(c)
+ scale = scale * 256
+ end
+ return n
+ end
+ --------------------------------------------------------------------
+ local function getint_b(c) -- return an int value (big-endian)
+ local n = 0
+ if not ensure(c, sz_int) then return end
+ for j = 1, sz_int do
+ n = n * 256 + getbyte(c)
+ end
+ return n
+ end
+ --------------------------------------------------------------------
+ local function getsizet_l(c) -- return a size_t value (little-endian)
+ local n, scale = 0, 1
+ if not ensure(c, sz_sizet) then return end
+ for j = 1, sz_sizet do
+ n = n + scale * getbyte(c)
+ scale = scale * 256
+ end
+ return n
+ end
+ --------------------------------------------------------------------
+ local function getsizet_b(c) -- return a size_t value (big-endian)
+ local n = 0
+ if not ensure(c, sz_sizet) then return end
+ for j = 1, sz_sizet do
+ n = n * 256 + getbyte(c)
+ end
+ return n
+ end
+ --------------------------------------------------------------------
+ local function getblock(c, sz) -- return a block (as a string)
+ local i = c.i
+ local j = i + sz - 1
+ if j > c.len then return end
+ local d = sub(c.dat, i, j)
+ c.i = i + sz
+ return d
+ end
+ --------------------------------------------------------------------
+ local function getstring(c) -- return a string
+ local n = getsizet(c)
+ if not n then return end
+ if n == 0 then return "" end
+ return getblock(c, n)
+ end
+ --------------------------------------------------------------------
+ local function goodbyte(c1, c2) -- compare byte value
+ local b1, b2 = getbyte(c1), getbyte(c2)
+ if not b1 or not b2 or b1 ~= b2 then
+ return
+ end
+ return b1
+ end
+ --------------------------------------------------------------------
+ local function badbyte(c1, c2) -- compare byte value
+ local b = goodbyte(c1, c2)
+ if not b then return true end
+ end
+ --------------------------------------------------------------------
+ local function goodint(c1, c2) -- compare int value
+ local i1, i2 = getint(c1), getint(c2)
+ if not i1 or not i2 or i1 ~= i2 then
+ return
+ end
+ return i1
+ end
+ --------------------------------------------------------------------
+ -- recursively-called function to compare function prototypes
+ --------------------------------------------------------------------
+ local function getfunc(c1, c2)
+ -- source name (ignored)
+ if not getstring(c1) or not getstring(c2) then
+ bork("bad source name"); return
+ end
+ -- linedefined (ignored)
+ if not getint(c1) or not getint(c2) then
+ bork("bad linedefined"); return
+ end
+ -- lastlinedefined (ignored)
+ if not getint(c1) or not getint(c2) then
+ bork("bad lastlinedefined"); return
+ end
+ if not (ensure(c1, 4) and ensure(c2, 4)) then
+ bork("prototype header broken")
+ end
+ -- nups (compared)
+ if badbyte(c1, c2) then
+ bork("bad nups"); return
+ end
+ -- numparams (compared)
+ if badbyte(c1, c2) then
+ bork("bad numparams"); return
+ end
+ -- is_vararg (compared)
+ if badbyte(c1, c2) then
+ bork("bad is_vararg"); return
+ end
+ -- maxstacksize (compared)
+ if badbyte(c1, c2) then
+ bork("bad maxstacksize"); return
+ end
+ -- code (compared)
+ local ncode = goodint(c1, c2)
+ if not ncode then
+ bork("bad ncode"); return
+ end
+ local code1 = getblock(c1, ncode * sz_inst)
+ local code2 = getblock(c2, ncode * sz_inst)
+ if not code1 or not code2 or code1 ~= code2 then
+ bork("bad code block"); return
+ end
+ -- constants (compared)
+ local nconst = goodint(c1, c2)
+ if not nconst then
+ bork("bad nconst"); return
+ end
+ for i = 1, nconst do
+ local ctype = goodbyte(c1, c2)
+ if not ctype then
+ bork("bad const type"); return
+ end
+ if ctype == TBOOLEAN then
+ if badbyte(c1, c2) then
+ bork("bad boolean value"); return
+ end
+ elseif ctype == TNUMBER then
+ local num1 = getblock(c1, sz_number)
+ local num2 = getblock(c2, sz_number)
+ if not num1 or not num2 or num1 ~= num2 then
+ bork("bad number value"); return
+ end
+ elseif ctype == TSTRING then
+ local str1 = getstring(c1)
+ local str2 = getstring(c2)
+ if not str1 or not str2 or str1 ~= str2 then
+ bork("bad string value"); return
+ end
+ end
+ end
+ -- prototypes (compared recursively)
+ local nproto = goodint(c1, c2)
+ if not nproto then
+ bork("bad nproto"); return
+ end
+ for i = 1, nproto do
+ if not getfunc(c1, c2) then
+ bork("bad function prototype"); return
+ end
+ end
+ -- debug information (ignored)
+ -- lineinfo (ignored)
+ local sizelineinfo1 = getint(c1)
+ if not sizelineinfo1 then
+ bork("bad sizelineinfo1"); return
+ end
+ local sizelineinfo2 = getint(c2)
+ if not sizelineinfo2 then
+ bork("bad sizelineinfo2"); return
+ end
+ if not getblock(c1, sizelineinfo1 * sz_int) then
+ bork("bad lineinfo1"); return
+ end
+ if not getblock(c2, sizelineinfo2 * sz_int) then
+ bork("bad lineinfo2"); return
+ end
+ -- locvars (ignored)
+ local sizelocvars1 = getint(c1)
+ if not sizelocvars1 then
+ bork("bad sizelocvars1"); return
+ end
+ local sizelocvars2 = getint(c2)
+ if not sizelocvars2 then
+ bork("bad sizelocvars2"); return
+ end
+ for i = 1, sizelocvars1 do
+ if not getstring(c1) or not getint(c1) or not getint(c1) then
+ bork("bad locvars1"); return
+ end
+ end
+ for i = 1, sizelocvars2 do
+ if not getstring(c2) or not getint(c2) or not getint(c2) then
+ bork("bad locvars2"); return
+ end
+ end
+ -- upvalues (ignored)
+ local sizeupvalues1 = getint(c1)
+ if not sizeupvalues1 then
+ bork("bad sizeupvalues1"); return
+ end
+ local sizeupvalues2 = getint(c2)
+ if not sizeupvalues2 then
+ bork("bad sizeupvalues2"); return
+ end
+ for i = 1, sizeupvalues1 do
+ if not getstring(c1) then bork("bad upvalues1"); return end
+ end
+ for i = 1, sizeupvalues2 do
+ if not getstring(c2) then bork("bad upvalues2"); return end
+ end
+ return true
+ end
+ --------------------------------------------------------------------
+ -- parse binary chunks to verify equivalence
+ -- * for headers, handle sizes to allow a degree of flexibility
+ -- * assume a valid binary chunk is generated, since it was not
+ -- generated via external means
+ --------------------------------------------------------------------
+ if not (ensure(c1, 12) and ensure(c2, 12)) then
+ bork("header broken")
+ end
+ skip(c1, 6) -- skip signature(4), version, format
+ endian = getbyte(c1) -- 1 = little endian
+ sz_int = getbyte(c1) -- get data type sizes
+ sz_sizet = getbyte(c1)
+ sz_inst = getbyte(c1)
+ sz_number = getbyte(c1)
+ skip(c1) -- skip integral flag
+ skip(c2, 12) -- skip other header (assume similar)
+ if endian == 1 then -- set for endian sensitive data we need
+ getint = getint_l
+ getsizet = getsizet_l
+ else
+ getint = getint_b
+ getsizet = getsizet_b
+ end
+ getfunc(c1, c2) -- get prototype at root
+ if c1.i ~= c1.len + 1 then
+ bork("inconsistent binary chunk1"); return
+ elseif c2.i ~= c2.len + 1 then
+ bork("inconsistent binary chunk2"); return
+ end
+ --------------------------------------------------------------------
+ -- successful comparison if end is reached with no borks
+ --------------------------------------------------------------------
+end
+--end of inserted module
+end
+
+-- preload function for module plugin/html
+preload["plugin/html"] =
+function()
+--start of inserted module
+module "plugin/html"
+
+local string = base.require "string"
+local table = base.require "table"
+local io = base.require "io"
+
+------------------------------------------------------------------------
+-- constants and configuration
+------------------------------------------------------------------------
+
+local HTML_EXT = ".html"
+local ENTITIES = {
+ ["&"] = "&amp;", ["<"] = "&lt;", [">"] = "&gt;",
+ ["'"] = "&apos;", ["\""] = "&quot;",
+}
+
+-- simple headers and footers
+local HEADER = [[
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<title>%s</title>
+<meta name="Generator" content="LuaSrcDiet">
+<style type="text/css">
+%s</style>
+</head>
+<body>
+<pre class="code">
+]]
+local FOOTER = [[
+</pre>
+</body>
+</html>
+]]
+-- for more, please see wikimain.css from the Lua wiki site
+local STYLESHEET = [[
+BODY {
+ background: white;
+ color: navy;
+}
+pre.code { color: black; }
+span.comment { color: #00a000; }
+span.string { color: #009090; }
+span.keyword { color: black; font-weight: bold; }
+span.number { color: #993399; }
+span.operator { }
+span.name { }
+span.global { color: #ff0000; font-weight: bold; }
+span.local { color: #0000ff; font-weight: bold; }
+]]
+
+------------------------------------------------------------------------
+-- option handling, plays nice with --quiet option
+------------------------------------------------------------------------
+
+local option -- local reference to list of options
+local srcfl, destfl -- filenames
+local toklist, seminfolist, toklnlist -- token data
+
+local function print(...) -- handle quiet option
+ if option.QUIET then return end
+ base.print(...)
+end
+
+------------------------------------------------------------------------
+-- initialization
+------------------------------------------------------------------------
+
+function init(_option, _srcfl, _destfl)
+ option = _option
+ srcfl = _srcfl
+ local extb, exte = string.find(srcfl, "%.[^%.%\\%/]*$")
+ local basename, extension = srcfl, ""
+ if extb and extb > 1 then
+ basename = string.sub(srcfl, 1, extb - 1)
+ extension = string.sub(srcfl, extb, exte)
+ end
+ destfl = basename..HTML_EXT
+ if option.OUTPUT_FILE then
+ destfl = option.OUTPUT_FILE
+ end
+ if srcfl == destfl then
+ base.error("output filename identical to input filename")
+ end
+end
+
+------------------------------------------------------------------------
+-- message display, post-load processing
+------------------------------------------------------------------------
+
+function post_load(z)
+ print([[
+HTML plugin module for LuaSrcDiet
+]])
+ print("Exporting: "..srcfl.." -> "..destfl.."\n")
+end
+
+------------------------------------------------------------------------
+-- post-lexing processing, can work on lexer table output
+------------------------------------------------------------------------
+
+function post_lex(_toklist, _seminfolist, _toklnlist)
+ toklist, seminfolist, toklnlist
+ = _toklist, _seminfolist, _toklnlist
+end
+
+------------------------------------------------------------------------
+-- escape the usual suspects for HTML/XML
+------------------------------------------------------------------------
+
+local function do_entities(z)
+ local i = 1
+ while i <= #z do
+ local c = string.sub(z, i, i)
+ local d = ENTITIES[c]
+ if d then
+ c = d
+ z = string.sub(z, 1, i - 1)..c..string.sub(z, i + 1)
+ end
+ i = i + #c
+ end--while
+ return z
+end
+
+------------------------------------------------------------------------
+-- save source code to file
+------------------------------------------------------------------------
+
+local function save_file(fname, dat)
+ local OUTF = io.open(fname, "wb")
+ if not OUTF then base.error("cannot open \""..fname.."\" for writing") end
+ local status = OUTF:write(dat)
+ if not status then base.error("cannot write to \""..fname.."\"") end
+ OUTF:close()
+end
+
+------------------------------------------------------------------------
+-- post-parsing processing, gives globalinfo, localinfo
+------------------------------------------------------------------------
+
+function post_parse(globalinfo, localinfo)
+ local html = {}
+ local function add(s) -- html helpers
+ html[#html + 1] = s
+ end
+ local function span(class, s)
+ add('<span class="'..class..'">'..s..'</span>')
+ end
+ ----------------------------------------------------------------------
+ for i = 1, #globalinfo do -- mark global identifiers as TK_GLOBAL
+ local obj = globalinfo[i]
+ local xref = obj.xref
+ for j = 1, #xref do
+ local p = xref[j]
+ toklist[p] = "TK_GLOBAL"
+ end
+ end--for
+ ----------------------------------------------------------------------
+ for i = 1, #localinfo do -- mark local identifiers as TK_LOCAL
+ local obj = localinfo[i]
+ local xref = obj.xref
+ for j = 1, #xref do
+ local p = xref[j]
+ toklist[p] = "TK_LOCAL"
+ end
+ end--for
+ ----------------------------------------------------------------------
+ add(string.format(HEADER, -- header and leading stuff
+ do_entities(srcfl),
+ STYLESHEET))
+ for i = 1, #toklist do -- enumerate token list
+ local tok, info = toklist[i], seminfolist[i]
+ if tok == "TK_KEYWORD" then
+ span("keyword", info)
+ elseif tok == "TK_STRING" or tok == "TK_LSTRING" then
+ span("string", do_entities(info))
+ elseif tok == "TK_COMMENT" or tok == "TK_LCOMMENT" then
+ span("comment", do_entities(info))
+ elseif tok == "TK_GLOBAL" then
+ span("global", info)
+ elseif tok == "TK_LOCAL" then
+ span("local", info)
+ elseif tok == "TK_NAME" then
+ span("name", info)
+ elseif tok == "TK_NUMBER" then
+ span("number", info)
+ elseif tok == "TK_OP" then
+ span("operator", do_entities(info))
+ elseif tok ~= "TK_EOS" then -- TK_EOL, TK_SPACE
+ add(info)
+ end
+ end--for
+ add(FOOTER)
+ save_file(destfl, table.concat(html))
+ option.EXIT = true
+end
+--end of inserted module
+end
+
+-- preload function for module plugin/sloc
+preload["plugin/sloc"] =
+function()
+--start of inserted module
+module "plugin/sloc"
+
+local string = base.require "string"
+local table = base.require "table"
+
+------------------------------------------------------------------------
+-- initialization
+------------------------------------------------------------------------
+
+local option -- local reference to list of options
+local srcfl -- source file name
+
+function init(_option, _srcfl, _destfl)
+ option = _option
+ option.QUIET = true
+ srcfl = _srcfl
+end
+
+------------------------------------------------------------------------
+-- splits a block into a table of lines (minus EOLs)
+------------------------------------------------------------------------
+
+local function split(blk)
+ local lines = {}
+ local i, nblk = 1, #blk
+ while i <= nblk do
+ local p, q, r, s = string.find(blk, "([\r\n])([\r\n]?)", i)
+ if not p then
+ p = nblk + 1
+ end
+ lines[#lines + 1] = string.sub(blk, i, p - 1)
+ i = p + 1
+ if p < nblk and q > p and r ~= s then -- handle Lua-style CRLF, LFCR
+ i = i + 1
+ end
+ end
+ return lines
+end
+
+------------------------------------------------------------------------
+-- post-lexing processing, can work on lexer table output
+------------------------------------------------------------------------
+
+function post_lex(toklist, seminfolist, toklnlist)
+ local lnow, sloc = 0, 0
+ local function chk(ln) -- if a new line, count it as an SLOC
+ if ln > lnow then -- new line # must be > old line #
+ sloc = sloc + 1; lnow = ln
+ end
+ end
+ for i = 1, #toklist do -- enumerate over all tokens
+ local tok, info, ln
+ = toklist[i], seminfolist[i], toklnlist[i]
+ --------------------------------------------------------------------
+ if tok == "TK_KEYWORD" or tok == "TK_NAME" or -- significant
+ tok == "TK_NUMBER" or tok == "TK_OP" then
+ chk(ln)
+ --------------------------------------------------------------------
+ -- Both TK_STRING and TK_LSTRING may be multi-line, hence, a loop
+ -- is needed in order to mark off lines one-by-one. Since llex.lua
+ -- currently returns the line number of the last part of the string,
+ -- we must subtract in order to get the starting line number.
+ --------------------------------------------------------------------
+ elseif tok == "TK_STRING" then -- possible multi-line
+ local t = split(info)
+ ln = ln - #t + 1
+ for j = 1, #t do
+ chk(ln); ln = ln + 1
+ end
+ --------------------------------------------------------------------
+ elseif tok == "TK_LSTRING" then -- possible multi-line
+ local t = split(info)
+ ln = ln - #t + 1
+ for j = 1, #t do
+ if t[j] ~= "" then chk(ln) end
+ ln = ln + 1
+ end
+ --------------------------------------------------------------------
+ -- other tokens are comments or whitespace and are ignored
+ --------------------------------------------------------------------
+ end
+ end--for
+ base.print(srcfl..": "..sloc) -- display result
+ option.EXIT = true
+end
+--end of inserted module
+end
+
+-- support modules
+local llex = require "llex"
+local lparser = require "lparser"
+local optlex = require "optlex"
+local optparser = require "optparser"
+local equiv = require "equiv"
+local plugin
+
+--[[--------------------------------------------------------------------
+-- messages and textual data
+----------------------------------------------------------------------]]
+
+local MSG_TITLE = [[
+LuaSrcDiet: Puts your Lua 5.1 source code on a diet
+Version 0.12.1 (20120407) Copyright (c) 2012 Kein-Hong Man
+The COPYRIGHT file describes the conditions under which this
+software may be distributed.
+]]
+
+local MSG_USAGE = [[
+usage: LuaSrcDiet [options] [filenames]
+
+example:
+ >LuaSrcDiet myscript.lua -o myscript_.lua
+
+options:
+ -v, --version prints version information
+ -h, --help prints usage information
+ -o <file> specify file name to write output
+ -s <suffix> suffix for output files (default '_')
+ --keep <msg> keep block comment with <msg> inside
+ --plugin <module> run <module> in plugin/ directory
+ - stop handling arguments
+
+ (optimization levels)
+ --none all optimizations off (normalizes EOLs only)
+ --basic lexer-based optimizations only
+ --maximum maximize reduction of source
+
+ (informational)
+ --quiet process files quietly
+ --read-only read file and print token stats only
+ --dump-lexer dump raw tokens from lexer to stdout
+ --dump-parser dump variable tracking tables from parser
+ --details extra info (strings, numbers, locals)
+
+features (to disable, insert 'no' prefix like --noopt-comments):
+%s
+default settings:
+%s]]
+
+------------------------------------------------------------------------
+-- optimization options, for ease of switching on and off
+-- * positive to enable optimization, negative (no) to disable
+-- * these options should follow --opt-* and --noopt-* style for now
+------------------------------------------------------------------------
+
+local OPTION = [[
+--opt-comments,'remove comments and block comments'
+--opt-whitespace,'remove whitespace excluding EOLs'
+--opt-emptylines,'remove empty lines'
+--opt-eols,'all above, plus remove unnecessary EOLs'
+--opt-strings,'optimize strings and long strings'
+--opt-numbers,'optimize numbers'
+--opt-locals,'optimize local variable names'
+--opt-entropy,'tries to reduce symbol entropy of locals'
+--opt-srcequiv,'insist on source (lexer stream) equivalence'
+--opt-binequiv,'insist on binary chunk equivalence'
+--opt-experimental,'apply experimental optimizations'
+]]
+
+-- preset configuration
+local DEFAULT_CONFIG = [[
+ --opt-comments --opt-whitespace --opt-emptylines
+ --opt-numbers --opt-locals
+ --opt-srcequiv --opt-binequiv
+]]
+-- override configurations
+-- * MUST explicitly enable/disable everything for
+-- total option replacement
+local BASIC_CONFIG = [[
+ --opt-comments --opt-whitespace --opt-emptylines
+ --noopt-eols --noopt-strings --noopt-numbers
+ --noopt-locals --noopt-entropy
+ --opt-srcequiv --opt-binequiv
+]]
+local MAXIMUM_CONFIG = [[
+ --opt-comments --opt-whitespace --opt-emptylines
+ --opt-eols --opt-strings --opt-numbers
+ --opt-locals --opt-entropy
+ --opt-srcequiv --opt-binequiv
+]]
+local NONE_CONFIG = [[
+ --noopt-comments --noopt-whitespace --noopt-emptylines
+ --noopt-eols --noopt-strings --noopt-numbers
+ --noopt-locals --noopt-entropy
+ --opt-srcequiv --opt-binequiv
+]]
+
+local DEFAULT_SUFFIX = "_" -- default suffix for file renaming
+local PLUGIN_SUFFIX = "plugin/" -- relative location of plugins
+
+--[[--------------------------------------------------------------------
+-- startup and initialize option list handling
+----------------------------------------------------------------------]]
+
+-- simple error message handler; change to error if traceback wanted
+local function die(msg)
+ print("LuaSrcDiet (error): "..msg); os.exit(1)
+end
+--die = error--DEBUG
+
+if not match(_VERSION, "5.1", 1, 1) then -- sanity check
+ die("requires Lua 5.1 to run")
+end
+
+------------------------------------------------------------------------
+-- prepares text for list of optimizations, prepare lookup table
+------------------------------------------------------------------------
+
+local MSG_OPTIONS = ""
+do
+ local WIDTH = 24
+ local o = {}
+ for op, desc in gmatch(OPTION, "%s*([^,]+),'([^']+)'") do
+ local msg = " "..op
+ msg = msg..string.rep(" ", WIDTH - #msg)..desc.."\n"
+ MSG_OPTIONS = MSG_OPTIONS..msg
+ o[op] = true
+ o["--no"..sub(op, 3)] = true
+ end
+ OPTION = o -- replace OPTION with lookup table
+end
+
+MSG_USAGE = string.format(MSG_USAGE, MSG_OPTIONS, DEFAULT_CONFIG)
+
+if p_embedded then -- embedded plugins
+ local EMBED_INFO = "\nembedded plugins:\n"
+ for i = 1, #p_embedded do
+ local p = p_embedded[i]
+ EMBED_INFO = EMBED_INFO.." "..plugin_info[p].."\n"
+ end
+ MSG_USAGE = MSG_USAGE..EMBED_INFO
+end
+
+------------------------------------------------------------------------
+-- global variable initialization, option set handling
+------------------------------------------------------------------------
+
+local suffix = DEFAULT_SUFFIX -- file suffix
+local option = {} -- program options
+local stat_c, stat_l -- statistics tables
+
+-- function to set option lookup table based on a text list of options
+-- note: additional forced settings for --opt-eols is done in optlex.lua
+local function set_options(CONFIG)
+ for op in gmatch(CONFIG, "(%-%-%S+)") do
+ if sub(op, 3, 4) == "no" and -- handle negative options
+ OPTION["--"..sub(op, 5)] then
+ option[sub(op, 5)] = false
+ else
+ option[sub(op, 3)] = true
+ end
+ end
+end
+
+--[[--------------------------------------------------------------------
+-- support functions
+----------------------------------------------------------------------]]
+
+-- list of token types, parser-significant types are up to TTYPE_GRAMMAR
+-- while the rest are not used by parsers; arranged for stats display
+local TTYPES = {
+ "TK_KEYWORD", "TK_NAME", "TK_NUMBER", -- grammar
+ "TK_STRING", "TK_LSTRING", "TK_OP",
+ "TK_EOS",
+ "TK_COMMENT", "TK_LCOMMENT", -- non-grammar
+ "TK_EOL", "TK_SPACE",
+}
+local TTYPE_GRAMMAR = 7
+
+local EOLTYPES = { -- EOL names for token dump
+ ["\n"] = "LF", ["\r"] = "CR",
+ ["\n\r"] = "LFCR", ["\r\n"] = "CRLF",
+}
+
+------------------------------------------------------------------------
+-- read source code from file
+------------------------------------------------------------------------
+
+local function load_file(fname)
+ local INF = io.open(fname, "rb")
+ if not INF then die('cannot open "'..fname..'" for reading') end
+ local dat = INF:read("*a")
+ if not dat then die('cannot read from "'..fname..'"') end
+ INF:close()
+ return dat
+end
+
+------------------------------------------------------------------------
+-- save source code to file
+------------------------------------------------------------------------
+
+local function save_file(fname, dat)
+ local OUTF = io.open(fname, "wb")
+ if not OUTF then die('cannot open "'..fname..'" for writing') end
+ local status = OUTF:write(dat)
+ if not status then die('cannot write to "'..fname..'"') end
+ OUTF:close()
+end
+
+------------------------------------------------------------------------
+-- functions to deal with statistics
+------------------------------------------------------------------------
+
+-- initialize statistics table
+local function stat_init()
+ stat_c, stat_l = {}, {}
+ for i = 1, #TTYPES do
+ local ttype = TTYPES[i]
+ stat_c[ttype], stat_l[ttype] = 0, 0
+ end
+end
+
+-- add a token to statistics table
+local function stat_add(tok, seminfo)
+ stat_c[tok] = stat_c[tok] + 1
+ stat_l[tok] = stat_l[tok] + #seminfo
+end
+
+-- do totals for statistics table, return average table
+local function stat_calc()
+ local function avg(c, l) -- safe average function
+ if c == 0 then return 0 end
+ return l / c
+ end
+ local stat_a = {}
+ local c, l = 0, 0
+ for i = 1, TTYPE_GRAMMAR do -- total grammar tokens
+ local ttype = TTYPES[i]
+ c = c + stat_c[ttype]; l = l + stat_l[ttype]
+ end
+ stat_c.TOTAL_TOK, stat_l.TOTAL_TOK = c, l
+ stat_a.TOTAL_TOK = avg(c, l)
+ c, l = 0, 0
+ for i = 1, #TTYPES do -- total all tokens
+ local ttype = TTYPES[i]
+ c = c + stat_c[ttype]; l = l + stat_l[ttype]
+ stat_a[ttype] = avg(stat_c[ttype], stat_l[ttype])
+ end
+ stat_c.TOTAL_ALL, stat_l.TOTAL_ALL = c, l
+ stat_a.TOTAL_ALL = avg(c, l)
+ return stat_a
+end
+
+--[[--------------------------------------------------------------------
+-- main tasks
+----------------------------------------------------------------------]]
+
+------------------------------------------------------------------------
+-- a simple token dumper, minimal translation of seminfo data
+------------------------------------------------------------------------
+
+local function dump_tokens(srcfl)
+ --------------------------------------------------------------------
+ -- load file and process source input into tokens
+ --------------------------------------------------------------------
+ local z = load_file(srcfl)
+ llex.init(z)
+ llex.llex()
+ local toklist, seminfolist = llex.tok, llex.seminfo
+ --------------------------------------------------------------------
+ -- display output
+ --------------------------------------------------------------------
+ for i = 1, #toklist do
+ local tok, seminfo = toklist[i], seminfolist[i]
+ if tok == "TK_OP" and string.byte(seminfo) < 32 then
+ seminfo = "(".. string.byte(seminfo)..")"
+ elseif tok == "TK_EOL" then
+ seminfo = EOLTYPES[seminfo]
+ else
+ seminfo = "'"..seminfo.."'"
+ end
+ print(tok.." "..seminfo)
+ end--for
+end
+
+----------------------------------------------------------------------
+-- parser dump; dump globalinfo and localinfo tables
+----------------------------------------------------------------------
+
+local function dump_parser(srcfl)
+ local print = print
+ --------------------------------------------------------------------
+ -- load file and process source input into tokens
+ --------------------------------------------------------------------
+ local z = load_file(srcfl)
+ llex.init(z)
+ llex.llex()
+ local toklist, seminfolist, toklnlist
+ = llex.tok, llex.seminfo, llex.tokln
+ --------------------------------------------------------------------
+ -- do parser optimization here
+ --------------------------------------------------------------------
+ lparser.init(toklist, seminfolist, toklnlist)
+ local xinfo = lparser.parser()
+ local globalinfo, localinfo =
+ xinfo.globalinfo, xinfo.localinfo
+ --------------------------------------------------------------------
+ -- display output
+ --------------------------------------------------------------------
+ local hl = string.rep("-", 72)
+ print("*** Local/Global Variable Tracker Tables ***")
+ print(hl.."\n GLOBALS\n"..hl)
+ -- global tables have a list of xref numbers only
+ for i = 1, #globalinfo do
+ local obj = globalinfo[i]
+ local msg = "("..i..") '"..obj.name.."' -> "
+ local xref = obj.xref
+ for j = 1, #xref do msg = msg..xref[j].." " end
+ print(msg)
+ end
+ -- local tables have xref numbers and a few other special
+ -- numbers that are specially named: decl (declaration xref),
+ -- act (activation xref), rem (removal xref)
+ print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl)
+ for i = 1, #localinfo do
+ local obj = localinfo[i]
+ local msg = "("..i..") '"..obj.name.."' decl:"..obj.decl..
+ " act:"..obj.act.." rem:"..obj.rem
+ if obj.isself then
+ msg = msg.." isself"
+ end
+ msg = msg.." -> "
+ local xref = obj.xref
+ for j = 1, #xref do msg = msg..xref[j].." " end
+ print(msg)
+ end
+ print(hl.."\n")
+end
+
+------------------------------------------------------------------------
+-- reads source file(s) and reports some statistics
+------------------------------------------------------------------------
+
+local function read_only(srcfl)
+ local print = print
+ --------------------------------------------------------------------
+ -- load file and process source input into tokens
+ --------------------------------------------------------------------
+ local z = load_file(srcfl)
+ llex.init(z)
+ llex.llex()
+ local toklist, seminfolist = llex.tok, llex.seminfo
+ print(MSG_TITLE)
+ print("Statistics for: "..srcfl.."\n")
+ --------------------------------------------------------------------
+ -- collect statistics
+ --------------------------------------------------------------------
+ stat_init()
+ for i = 1, #toklist do
+ local tok, seminfo = toklist[i], seminfolist[i]
+ stat_add(tok, seminfo)
+ end--for
+ local stat_a = stat_calc()
+ --------------------------------------------------------------------
+ -- display output
+ --------------------------------------------------------------------
+ local fmt = string.format
+ local function figures(tt)
+ return stat_c[tt], stat_l[tt], stat_a[tt]
+ end
+ local tabf1, tabf2 = "%-16s%8s%8s%10s", "%-16s%8d%8d%10.2f"
+ local hl = string.rep("-", 42)
+ print(fmt(tabf1, "Lexical", "Input", "Input", "Input"))
+ print(fmt(tabf1, "Elements", "Count", "Bytes", "Average"))
+ print(hl)
+ for i = 1, #TTYPES do
+ local ttype = TTYPES[i]
+ print(fmt(tabf2, ttype, figures(ttype)))
+ if ttype == "TK_EOS" then print(hl) end
+ end
+ print(hl)
+ print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
+ print(hl)
+ print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
+ print(hl.."\n")
+end
+
+------------------------------------------------------------------------
+-- process source file(s), write output and reports some statistics
+------------------------------------------------------------------------
+
+local function process_file(srcfl, destfl)
+ local function print(...) -- handle quiet option
+ if option.QUIET then return end
+ _G.print(...)
+ end
+ if plugin and plugin.init then -- plugin init
+ option.EXIT = false
+ plugin.init(option, srcfl, destfl)
+ if option.EXIT then return end
+ end
+ print(MSG_TITLE) -- title message
+ --------------------------------------------------------------------
+ -- load file and process source input into tokens
+ --------------------------------------------------------------------
+ local z = load_file(srcfl)
+ if plugin and plugin.post_load then -- plugin post-load
+ z = plugin.post_load(z) or z
+ if option.EXIT then return end
+ end
+ llex.init(z)
+ llex.llex()
+ local toklist, seminfolist, toklnlist
+ = llex.tok, llex.seminfo, llex.tokln
+ if plugin and plugin.post_lex then -- plugin post-lex
+ plugin.post_lex(toklist, seminfolist, toklnlist)
+ if option.EXIT then return end
+ end
+ --------------------------------------------------------------------
+ -- collect 'before' statistics
+ --------------------------------------------------------------------
+ stat_init()
+ for i = 1, #toklist do
+ local tok, seminfo = toklist[i], seminfolist[i]
+ stat_add(tok, seminfo)
+ end--for
+ local stat1_a = stat_calc()
+ local stat1_c, stat1_l = stat_c, stat_l
+ --------------------------------------------------------------------
+ -- do parser optimization here
+ --------------------------------------------------------------------
+ optparser.print = print -- hack
+ lparser.init(toklist, seminfolist, toklnlist)
+ local xinfo = lparser.parser()
+ if plugin and plugin.post_parse then -- plugin post-parse
+ plugin.post_parse(xinfo.globalinfo, xinfo.localinfo)
+ if option.EXIT then return end
+ end
+ optparser.optimize(option, toklist, seminfolist, xinfo)
+ if plugin and plugin.post_optparse then -- plugin post-optparse
+ plugin.post_optparse()
+ if option.EXIT then return end
+ end
+ --------------------------------------------------------------------
+ -- do lexer optimization here, save output file
+ --------------------------------------------------------------------
+ local warn = optlex.warn -- use this as a general warning lookup
+ optlex.print = print -- hack
+ toklist, seminfolist, toklnlist
+ = optlex.optimize(option, toklist, seminfolist, toklnlist)
+ if plugin and plugin.post_optlex then -- plugin post-optlex
+ plugin.post_optlex(toklist, seminfolist, toklnlist)
+ if option.EXIT then return end
+ end
+ local dat = table.concat(seminfolist)
+ -- depending on options selected, embedded EOLs in long strings and
+ -- long comments may not have been translated to \n, tack a warning
+ if string.find(dat, "\r\n", 1, 1) or
+ string.find(dat, "\n\r", 1, 1) then
+ warn.MIXEDEOL = true
+ end
+ --------------------------------------------------------------------
+ -- test source and binary chunk equivalence
+ --------------------------------------------------------------------
+ equiv.init(option, llex, warn)
+ equiv.source(z, dat)
+ equiv.binary(z, dat)
+ local smsg = "before and after lexer streams are NOT equivalent!"
+ local bmsg = "before and after binary chunks are NOT equivalent!"
+ -- for reporting, die if option was selected, else just warn
+ if warn.SRC_EQUIV then
+ if option["opt-srcequiv"] then die(smsg) end
+ else
+ print("*** SRCEQUIV: token streams are sort of equivalent")
+ if option["opt-locals"] then
+ print("(but no identifier comparisons since --opt-locals enabled)")
+ end
+ print()
+ end
+ if warn.BIN_EQUIV then
+ if option["opt-binequiv"] then die(bmsg) end
+ else
+ print("*** BINEQUIV: binary chunks are sort of equivalent")
+ print()
+ end
+ --------------------------------------------------------------------
+ -- save optimized source stream to output file
+ --------------------------------------------------------------------
+ save_file(destfl, dat)
+ --------------------------------------------------------------------
+ -- collect 'after' statistics
+ --------------------------------------------------------------------
+ stat_init()
+ for i = 1, #toklist do
+ local tok, seminfo = toklist[i], seminfolist[i]
+ stat_add(tok, seminfo)
+ end--for
+ local stat_a = stat_calc()
+ --------------------------------------------------------------------
+ -- display output
+ --------------------------------------------------------------------
+ print("Statistics for: "..srcfl.." -> "..destfl.."\n")
+ local fmt = string.format
+ local function figures(tt)
+ return stat1_c[tt], stat1_l[tt], stat1_a[tt],
+ stat_c[tt], stat_l[tt], stat_a[tt]
+ end
+ local tabf1, tabf2 = "%-16s%8s%8s%10s%8s%8s%10s",
+ "%-16s%8d%8d%10.2f%8d%8d%10.2f"
+ local hl = string.rep("-", 68)
+ print("*** lexer-based optimizations summary ***\n"..hl)
+ print(fmt(tabf1, "Lexical",
+ "Input", "Input", "Input",
+ "Output", "Output", "Output"))
+ print(fmt(tabf1, "Elements",
+ "Count", "Bytes", "Average",
+ "Count", "Bytes", "Average"))
+ print(hl)
+ for i = 1, #TTYPES do
+ local ttype = TTYPES[i]
+ print(fmt(tabf2, ttype, figures(ttype)))
+ if ttype == "TK_EOS" then print(hl) end
+ end
+ print(hl)
+ print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
+ print(hl)
+ print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
+ print(hl)
+ --------------------------------------------------------------------
+ -- report warning flags from optimizing process
+ --------------------------------------------------------------------
+ if warn.LSTRING then
+ print("* WARNING: "..warn.LSTRING)
+ elseif warn.MIXEDEOL then
+ print("* WARNING: ".."output still contains some CRLF or LFCR line endings")
+ elseif warn.SRC_EQUIV then
+ print("* WARNING: "..smsg)
+ elseif warn.BIN_EQUIV then
+ print("* WARNING: "..bmsg)
+ end
+ print()
+end
+
+--[[--------------------------------------------------------------------
+-- main functions
+----------------------------------------------------------------------]]
+
+local arg = {...} -- program arguments
+local fspec = {}
+set_options(DEFAULT_CONFIG) -- set to default options at beginning
+
+------------------------------------------------------------------------
+-- per-file handling, ship off to tasks
+------------------------------------------------------------------------
+
+local function do_files(fspec)
+ for i = 1, #fspec do
+ local srcfl = fspec[i]
+ local destfl
+ ------------------------------------------------------------------
+ -- find and replace extension for filenames
+ ------------------------------------------------------------------
+ local extb, exte = string.find(srcfl, "%.[^%.%\\%/]*$")
+ local basename, extension = srcfl, ""
+ if extb and extb > 1 then
+ basename = sub(srcfl, 1, extb - 1)
+ extension = sub(srcfl, extb, exte)
+ end
+ destfl = basename..suffix..extension
+ if #fspec == 1 and option.OUTPUT_FILE then
+ destfl = option.OUTPUT_FILE
+ end
+ if srcfl == destfl then
+ die("output filename identical to input filename")
+ end
+ ------------------------------------------------------------------
+ -- perform requested operations
+ ------------------------------------------------------------------
+ if option.DUMP_LEXER then
+ dump_tokens(srcfl)
+ elseif option.DUMP_PARSER then
+ dump_parser(srcfl)
+ elseif option.READ_ONLY then
+ read_only(srcfl)
+ else
+ process_file(srcfl, destfl)
+ end
+ end--for
+end
+
+------------------------------------------------------------------------
+-- main function (entry point is after this definition)
+------------------------------------------------------------------------
+
+local function main()
+ local argn, i = #arg, 1
+ if argn == 0 then
+ option.HELP = true
+ end
+ --------------------------------------------------------------------
+ -- handle arguments
+ --------------------------------------------------------------------
+ while i <= argn do
+ local o, p = arg[i], arg[i + 1]
+ local dash = match(o, "^%-%-?")
+ if dash == "-" then -- single-dash options
+ if o == "-h" then
+ option.HELP = true; break
+ elseif o == "-v" then
+ option.VERSION = true; break
+ elseif o == "-s" then
+ if not p then die("-s option needs suffix specification") end
+ suffix = p
+ i = i + 1
+ elseif o == "-o" then
+ if not p then die("-o option needs a file name") end
+ option.OUTPUT_FILE = p
+ i = i + 1
+ elseif o == "-" then
+ break -- ignore rest of args
+ else
+ die("unrecognized option "..o)
+ end
+ elseif dash == "--" then -- double-dash options
+ if o == "--help" then
+ option.HELP = true; break
+ elseif o == "--version" then
+ option.VERSION = true; break
+ elseif o == "--keep" then
+ if not p then die("--keep option needs a string to match for") end
+ option.KEEP = p
+ i = i + 1
+ elseif o == "--plugin" then
+ if not p then die("--plugin option needs a module name") end
+ if option.PLUGIN then die("only one plugin can be specified") end
+ option.PLUGIN = p
+ plugin = require(PLUGIN_SUFFIX..p)
+ i = i + 1
+ elseif o == "--quiet" then
+ option.QUIET = true
+ elseif o == "--read-only" then
+ option.READ_ONLY = true
+ elseif o == "--basic" then
+ set_options(BASIC_CONFIG)
+ elseif o == "--maximum" then
+ set_options(MAXIMUM_CONFIG)
+ elseif o == "--none" then
+ set_options(NONE_CONFIG)
+ elseif o == "--dump-lexer" then
+ option.DUMP_LEXER = true
+ elseif o == "--dump-parser" then
+ option.DUMP_PARSER = true
+ elseif o == "--details" then
+ option.DETAILS = true
+ elseif OPTION[o] then -- lookup optimization options
+ set_options(o)
+ else
+ die("unrecognized option "..o)
+ end
+ else
+ fspec[#fspec + 1] = o -- potential filename
+ end
+ i = i + 1
+ end--while
+ if option.HELP then
+ print(MSG_TITLE..MSG_USAGE); return true
+ elseif option.VERSION then
+ print(MSG_TITLE); return true
+ end
+ if #fspec > 0 then
+ if #fspec > 1 and option.OUTPUT_FILE then
+ die("with -o, only one source file can be specified")
+ end
+ do_files(fspec)
+ return true
+ else
+ die("nothing to do!")
+ end
+end
+
+-- entry point -> main() -> do_files()
+if not main() then
+ die("Please run with option -h or --help for usage information")
+end
+
+-- end of script
diff --git a/scripts/luasrcdiet/README.LuaSrcDiet b/scripts/luasrcdiet/README.LuaSrcDiet
new file mode 100644
index 0000000..950ceff
--- /dev/null
+++ b/scripts/luasrcdiet/README.LuaSrcDiet
@@ -0,0 +1,140 @@
+
+ LuaSrcDiet
+ Compresses Lua source code by removing unnecessary characters.
+
+ Copyright (c) 2005-2008,2011,2012 Kein-Hong Man <keinhong@gmail.com>
+ The COPYRIGHT file describes the conditions
+ under which this software may be distributed.
+
+ http://code.google.com/p/luasrcdiet/
+
+========================================================================
+
+WHAT'S NEW IN VERSION 0.12.1
+----------------------------
+
+* Fixed a long comment glitch when using the --keep option. (Two
+ extra characters were duplicated before the ending brackets.)
+
+* Faster function call syntax sugar optimization using a one-pass
+ token deletion loop.
+
+WHAT'S NEW IN VERSION 0.12.0
+----------------------------
+
+* Added single-file versions of LuaSrcDiet in various sizes. First
+ done by some other projects that packaged LuaSrcDiet, e.g. eLua.
+
+* BUG FIX: String optimization of "\ddd" type escape mechanism,
+ "\00101" was incorrectly optimized to "\101".
+
+* --opt-srcequiv: Source equivalence checking. Tries hard to compare
+ 'before' and 'after' lexer token streams for equivalence.
+
+* --opt-binequiv: Binary chunk equivalence checking. Tries hard to
+ compare 'before' and 'after' binary chunks for equivalence.
+
+* When using --opt-eols, the last EOL character is now removed.
+
+* --opt-experimental: Turns on a few experimental optimizations:
+ (a) ';' operator removal (deleted or turned into whitespace).
+ (b) f("string") f('string') f([[string]]) calls are turned
+ into their syntactic sugar equivalents, e.g. f"string"
+
+* Plugins are now embedded into single-file versions.
+
+* First release of completed documentation files.
+
+* New Makefile and numerous minor updates.
+
+* Old code for Lua 5.0 removed.
+
+BUGS
+----
+
+* Nothing in my list. See below for limitations...
+
+INCOMPLETE SUPPORT
+------------------
+
+* Locals optimization does NOT understand implicit 'arg' locals in
+ vararg functions (see option LUA_COMPAT_VARARG in the Lua sources).
+
+* NO support in lexer for decimal points other than '.'.
+
+* NO support in lexer for Lua 5.0.x nested long strings.
+
+EXPERIMENTAL SOFTWARE
+---------------------
+
+LuaSrcDiet is "experimental software". For LuaSrcDiet, this means that
+it was coded for one user -- the coder. Although I may be able to help
+LuaSrcDiet users, there should not be any expectation of 'support'.
+
+Don't hook this thing up to nuclear missiles.
+
+I don't have the time for steady maintenance or for building up and
+cultivating a user base, so developers are welcome to fork LuaSrcDiet or
+incorporate it into their own software, as long as authorship
+attribution for LuaSrcDiet source code is maintained. Say if LuaSrcDiet
+is called as a separate program, then it is simply an aggregation of
+separate software and each program should stick to its own license.
+
+Programs you process using LuaSrcDiet are of course not affected at all
+by LuaSrcDiet's license; it's just a text filter. See COPYRIGHT. If you
+insist on extreme COPYRIGHT views, then better delete this whole thing
+right away, then gouge your eyes out. :-p
+
+OLDER STUFF
+-----------
+
+There has been some slash-and-burn going on. I'm inclined to move
+forward, and not spend time maintaining older stuff forever. If you
+still need the older stuff, they can be found in:
+
+* Lua 5.0.x old versions: last seen in version 0.11.2.
+
+* Lua 5.1.x old codebase: last seen in version 0.11.2.
+
+FUTURE PLANS
+------------
+
+Lua 5.1.x releases for LuaSrcDiet will pretty much stagnate at 0.12.1
+after implementation of a couple more experimental optimizations, and
+effort will be shifted towards something for Lua 5.2.x. The timeline for
+this is indeterminate.
+
+========================================================================
+
+USING LUASRCDIET
+
+Now is a good time to take a look at the documentation. Start with
+LuaSrcDiet.html in the doc directory.
+
+LuaSrcDiet is now packaged as a single-file Lua script for maximum
+convenience. Just drop it in and splice something into your Makefile.
+
+New source stream and binary chunk equivalence checking minimizes the
+possibility of LuaSrcDiet borking your stuff.
+
+========================================================================
+
+ACKNOWLEDGEMENTS
+
+Coded using SciTE. Developed mostly under Cygwin with a generic Lua
+5.1.4 binary.
+
+========================================================================
+
+FEEDBACK
+
+Feedback and contributions are welcome. Your name will be acknowledged,
+as long as you are willing to comply with COPYRIGHT. If your material is
+self-contained, you can retain a copyright notice for those material in
+your own name, as long as you use the same Lua 5/MIT-style copyright.
+
+Enjoy!
+
+Kein-Hong Man (esq.)
+Kuala Lumpur
+Malaysia 20120407