Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stevedonovan/Penlight.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/lua
diff options
context:
space:
mode:
authorThijs Schreijer <thijs@thijsschreijer.nl>2021-12-18 00:56:55 +0300
committerThijs Schreijer <thijs@thijsschreijer.nl>2022-01-10 14:25:49 +0300
commitb749d881b353d076291df826c36f7fc86baf4202 (patch)
tree0e2498df565c080eaad49c0f84f2337050bc50f3 /lua
parent76d789463c28ebf84f806252d00518818e9adc79 (diff)
chore(docs) update xml module
refactoring some code, and a lot of doc updates
Diffstat (limited to 'lua')
-rw-r--r--lua/pl/xml.lua1326
1 files changed, 846 insertions, 480 deletions
diff --git a/lua/pl/xml.lua b/lua/pl/xml.lua
index 860e6b3..f91107d 100644
--- a/lua/pl/xml.lua
+++ b/lua/pl/xml.lua
@@ -30,505 +30,870 @@
-- @module pl.xml
local utils = require 'pl.utils'
-local split = utils.split;
-local t_insert = table.insert;
-local t_concat = table.concat;
-local t_remove = table.remove;
-local s_match = string.match;
-local tostring = tostring;
-local setmetatable = setmetatable;
-local getmetatable = getmetatable;
-local pairs = pairs;
-local ipairs = ipairs;
-local type = type;
-local next = next;
-local print = print;
-local unpack = utils.unpack;
-local s_gsub = string.gsub;
-local s_find = string.find;
-local pcall,require,io = pcall,require,io
+local split = utils.split
+local t_insert = table.insert
+local t_concat = table.concat
+local t_remove = table.remove
+local s_match = string.match
+local tostring = tostring
+local setmetatable = setmetatable
+local getmetatable = getmetatable
+local pairs = pairs
+local ipairs = ipairs
+local type = type
+local next = next
+local print = print
+local unpack = utils.unpack
+local s_gsub = string.gsub
+local s_sub = string.sub
+local s_find = string.find
+local pcall = pcall
+local require = require
+
local _M = {}
local Doc = { __type = "doc" };
Doc.__index = Doc;
+
+local function is_text(s) return type(s) == 'string' end
+local function is_tag(d) return type(d) == 'table' and is_text(d.tag) end
+
+
+
--- create a new document node.
--- @param tag the tag name
--- @param attr optional attributes (table of name-value pairs)
+-- @tparam string tag the tag name
+-- @tparam[opt={}] table attr attributes (table of name-value pairs)
+-- @return the Node object
+-- @see xml.elem
+-- @usage
+-- local doc = xml.new("main", { hello = "world", answer = "42" })
+-- print(doc) --> <main hello='world' answer='42'/>
function _M.new(tag, attr)
- local doc = { tag = tag, attr = attr or {}, last_add = {}};
- return setmetatable(doc, Doc);
+ if type(tag) ~= "string" then
+ error("expected 'tag' to be a string value, got: " .. type(tag), 2)
+ end
+ attr = attr or {}
+ if type(attr) ~= "table" then
+ error("expected 'attr' to be a table value, got: " .. type(attr), 2)
+ end
+
+ local doc = { tag = tag, attr = attr, last_add = {}};
+ return setmetatable(doc, Doc);
end
---- parse an XML document. By default, this uses lxp.lom.parse, but
--- falls back to basic_parse, or if use_basic is true
--- @param text_or_file file or string representation
+
+--- parse an XML document. By default, this uses lxp.lom.parse, but
+-- falls back to basic_parse, or if `use_basic` is truthy
+-- @param text_or_filename file or string representation
-- @param is_file whether text_or_file is a file name or not
-- @param use_basic do a basic parse
-- @return a parsed LOM document with the document metatatables set
-- @return nil, error the error can either be a file error or a parse error
-function _M.parse(text_or_file, is_file, use_basic)
- local parser,status,lom
- if use_basic then
- parser = _M.basic_parse
+function _M.parse(text_or_filename, is_file, use_basic)
+ local parser,status,lom
+ if use_basic then
+ parser = _M.basic_parse
+ else
+ status,lom = pcall(require,'lxp.lom')
+ if not status then
+ parser = _M.basic_parse
else
- status,lom = pcall(require,'lxp.lom')
- if not status then
- parser = _M.basic_parse
- else
- parser = lom.parse
- end
+ parser = lom.parse
end
+ end
- if is_file then
- local f,err = io.open(text_or_file)
- if not f then return nil,err end
- text_or_file = f:read '*a'
- f:close()
+ if is_file then
+ local text_or_filename, err = utils.readfile(text_or_filename)
+ if not text_or_filename then
+ return nil, err
end
+ end
+
+ local doc, err = parser(text_or_filename)
+ if not doc then
+ return nil, err
+ end
+
+ if lom then
+ _M.walk(doc, false, function(_, d)
+ setmetatable(d, Doc)
+ end)
+ end
+ return doc
+end
+
- local doc,err = parser(text_or_file)
- if not doc then
- return nil,err
+--- Create a Node with a set of children (text or Nodes) and attributes.
+-- @tparam string tag a tag name
+-- @tparam table|string items either a single child (text or Node), or a table where the hash
+-- part is the attributes and the list part is the children (text or Nodes).
+-- @return the new Node
+-- @see xml.new
+-- @see xml.tags
+-- @usage
+-- local doc = xml.elem("top", "hello world") -- <top>hello world</top>
+-- local doc = xml.elem("main", xml.new("child")) -- <main><child/></main>
+-- local doc = xml.elem("main", { "this ", "is ", "nice" }) -- <main>this is nice</main>
+-- local doc = xml.elem("main", { xml.new "this",
+-- xml.new "is",
+-- xml.new "nice" }) -- <main><this/><is/><nice/></main>
+-- local doc = xml.elem("main", { hello = "world" }) -- <main hello='world'/>
+-- local doc = xml.elem("main", {
+-- "prefix",
+-- xml.elem("child", { "this ", "is ", "nice"}),
+-- "postfix",
+-- attrib = "value"
+-- }) -- <main attrib='value'>prefix<child>this is nice</child>postfix</main>"
+function _M.elem(tag, items)
+ local s = _M.new(tag)
+ if is_text(items) then items = {items} end
+ if is_tag(items) then
+ t_insert(s,items)
+ elseif type(items) == 'table' then
+ for k,v in pairs(items) do
+ if is_text(k) then
+ s.attr[k] = v
+ t_insert(s.attr,k)
+ else
+ s[k] = v
+ end
end
+ end
+ return s
+end
- if lom then
- _M.walk(doc,false,function(_,d)
- setmetatable(d,Doc)
- end)
+
+--- given a list of names, return a number of element constructors.
+-- If passing a comma-separated string, then whitespace surrounding the values
+-- will be stripped.
+--
+-- The returned constructor functions are a shortcut to `xml.elem` where you
+-- no longer provide the tag-name, but only the `items` table.
+-- @tparam string|table list a list of names, or a comma-separated string.
+-- @return (multiple) constructor functions; `function(items)`. For the `items`
+-- parameter see `xml.elem`.
+-- @see xml.elem
+-- @usage
+-- local new_parent, new_child = xml.tags 'mom, kid'
+-- doc = new_parent {new_child 'Bob', new_child 'Annie'}
+-- -- <mom><kid>Bob</kid><kid>Annie</kid></mom>
+function _M.tags(list)
+ local ctors = {}
+ if is_text(list) then
+ list = split(list:match("^%s*(.-)%s*$"),'%s*,%s*')
+ end
+ for i,tag in ipairs(list) do
+ local function ctor(items)
+ return _M.elem(tag,items)
end
- return doc
+ ctors[i] = ctor
+ end
+ return unpack(ctors)
end
----- convenient function to add a document node, This updates the last inserted position.
--- @param tag a tag name
--- @param attrs optional set of attributes (name-string pairs)
+
+--- Adds a document Node, at current position.
+-- This updates the last inserted position to the new Node.
+-- @tparam string tag the tag name
+-- @tparam[opt={}] table attrs attributes (table of name-value pairs)
+-- @return the current node (`self`)
+-- @usage
+-- local doc = xml.new("main")
+-- doc:addtag("penlight", { hello = "world"})
+-- doc:addtag("expat") -- added to 'penlight' since position moved
+-- print(doc) --> <main><penlight hello='world'><expat/></penlight></main>
function Doc:addtag(tag, attrs)
- local s = _M.new(tag, attrs);
- (self.last_add[#self.last_add] or self):add_direct_child(s);
- t_insert(self.last_add, s);
- return self;
+ local s = _M.new(tag, attrs)
+ self:add_child(s)
+ t_insert(self.last_add, s)
+ return self
end
---- convenient function to add a text node. This updates the last inserted position.
--- @param text a string
+
+--- Adds a text node, at current position.
+-- @tparam string text a string
+-- @return the current node (`self`)
+-- @usage
+-- local doc = xml.new("main")
+-- doc:text("penlight")
+-- doc:text("expat")
+-- print(doc) --> <main><penlightexpat</main>
function Doc:text(text)
- (self.last_add[#self.last_add] or self):add_direct_child(text);
- return self;
+ self:add_child(text)
+ return self
end
----- go up one level in a document
+
+--- Moves current position up one level.
+-- @return the current node (`self`)
function Doc:up()
- t_remove(self.last_add);
- return self;
+ t_remove(self.last_add)
+ return self
end
+
+--- Resets current position to top level.
+-- Resets to the `self` node.
+-- @return the current node (`self`)
function Doc:reset()
- local last_add = self.last_add;
- for i = 1,#last_add do
- last_add[i] = nil;
- end
- return self;
+ local last_add = self.last_add
+ for i = 1,#last_add do
+ last_add[i] = nil
+ end
+ return self
end
---- append a child to a document directly.
+
+--- Append a child to the currrent Node (ignoring current position).
-- @param child a child node (either text or a document)
+-- @return the current node (`self`)
+-- @usage
+-- local doc = xml.new("main")
+-- doc:add_direct_child("dog")
+-- doc:add_direct_child(xml.new("child"))
+-- doc:add_direct_child("cat")
+-- print(doc) --> <main>dog<child/>cat</main>
function Doc:add_direct_child(child)
- t_insert(self, child);
+ t_insert(self, child)
+ return self
end
---- append a child to a document at the last element added
+
+--- Append a child at the current position (without changing position).
-- @param child a child node (either text or a document)
+-- @return the current node (`self`)
+-- @usage
+-- local doc = xml.new("main")
+-- doc:addtag("one")
+-- doc:add_child(xml.new("item1"))
+-- doc:add_child(xml.new("item2"))
+-- doc:add_child(xml.new("item3"))
+-- print(doc) --> <main><one><item1/><item2/><item3/></one></main>
function Doc:add_child(child)
- (self.last_add[#self.last_add] or self):add_direct_child(child);
- return self;
+ (self.last_add[#self.last_add] or self):add_direct_child(child)
+ return self
end
+
--accessing attributes: useful not to have to expose implementation (attr)
--but also can allow attr to be nil in any future optimizations
---- set attributes of a document node.
--- @param t a table containing attribute/value pairs
-function Doc:set_attribs (t)
- for k,v in pairs(t) do
- self.attr[k] = v
- end
+
+--- Set attributes of a document node.
+-- Will add/overwite values, but will not remove existing ones.
+-- Operates on the Node itself, will not take position into account.
+-- @tparam table t a table containing attribute/value pairs
+-- @return the current node (`self`)
+function Doc:set_attribs(t)
+ -- TODO: keep array part in sync
+ for k,v in pairs(t) do
+ self.attr[k] = v
+ end
+ return self
end
---- set a single attribute of a document node.
+
+--- Set a single attribute of a document node.
+-- Operates on the Node itself, will not take position into account.
-- @param a attribute
--- @param v its value
+-- @param v its value, pass in `nil` to delete the attribute
+-- @return the current node (`self`)
function Doc:set_attrib(a,v)
- self.attr[a] = v
+ -- TODO: keep array part in sync
+ self.attr[a] = v
+ return self
end
---- access the attributes of a document node.
+
+--- Gets the attributes of a document node.
+-- Operates on the Node itself, will not take position into account.
+-- @return table with attributes (attribute/value pairs)
function Doc:get_attribs()
- return self.attr
+ return self.attr
end
-local function is_text(s) return type(s) == 'string' end
-
---- function to create an element with a given tag name and a set of children.
--- @param tag a tag name
--- @param items either text or a table where the hash part is the attributes and the list part is the children.
-function _M.elem(tag,items)
- local s = _M.new(tag)
- if is_text(items) then items = {items} end
- if _M.is_tag(items) then
- t_insert(s,items)
- elseif type(items) == 'table' then
- for k,v in pairs(items) do
- if is_text(k) then
- s.attr[k] = v
- t_insert(s.attr,k)
- else
- s[k] = v
- end
- end
- end
- return s
-end
---- given a list of names, return a number of element constructors.
--- @param list a list of names, or a comma-separated string.
--- @usage local parent,children = doc.tags 'parent,children' <br>
--- doc = parent {child 'one', child 'two'}
-function _M.tags(list)
- local ctors = {}
- if is_text(list) then list = split(list,'%s*,%s*') end
- for _,tag in ipairs(list) do
- local ctor = function(items) return _M.elem(tag,items) end
- t_insert(ctors,ctor)
- end
- return unpack(ctors)
-end
-local templ_cache = {}
+local template_cache do
+ local templ_cache = {}
-local function template_cache (templ)
+ -- @param templ a template, a string being valid xml to be parsed, or a Node object
+ function template_cache(templ)
if is_text(templ) then
- if templ_cache[templ] then
- templ = templ_cache[templ]
- else
- local str,err = templ
- templ,err = _M.parse(str,false,true)
- if not templ then return nil,err end
- templ_cache[str] = templ
+ if templ_cache[templ] then
+ -- cache hit
+ return templ_cache[templ]
+
+ else
+ -- parse and cache
+ local ptempl, err = _M.parse(templ,false,true)
+ if not ptempl then
+ return nil, err
end
- elseif not _M.is_tag(templ) then
- return nil, "template is not a document"
+ templ_cache[templ] = ptempl
+ return ptempl
+ end
end
- return templ
+
+ if is_tag(templ) then
+ return templ
+ end
+
+ return nil, "template is not a document"
+ end
end
-local function is_data(data)
+
+do
+ local function is_data(data)
return #data == 0 or type(data[1]) ~= 'table'
-end
+ end
-local function prepare_data(data)
+
+ local function prepare_data(data)
-- a hack for ensuring that $1 maps to first element of data, etc.
-- Either this or could change the gsub call just below.
for i,v in ipairs(data) do
- data[tostring(i)] = v
+ data[tostring(i)] = v
+ end
+ end
+
+ --- create a substituted copy of a document,
+ -- @param template may be a document or a string representation which will be parsed and cached
+ -- @param data a table of name-value pairs or a list of such tables
+ -- @return an XML document
+ function Doc.subst(template, data)
+ if type(data) ~= 'table' or not next(data) then
+ return nil, "data must be a non-empty table"
end
-end
---- create a substituted copy of a document,
--- @param templ may be a document or a string representation which will be parsed and cached
--- @param data a table of name-value pairs or a list of such tables
--- @return an XML document
-function Doc.subst(templ, data)
- local err
- if type(data) ~= 'table' or not next(data) then return nil, "data must be a non-empty table" end
if is_data(data) then
- prepare_data(data)
+ prepare_data(data)
end
- templ,err = template_cache(templ)
- if err then return nil, err end
+
+ local templ, err = template_cache(template)
+ if err then
+ return nil, err
+ end
+
local function _subst(item)
- return _M.clone(templ,function(s)
- return s:gsub('%$(%w+)',item)
- end)
+ return _M.clone(templ, function(s)
+ return s:gsub('%$(%w+)', item)
+ end)
+ end
+
+ if is_data(data) then
+ return _subst(data)
end
- if is_data(data) then return _subst(data) end
+
local list = {}
- for _,item in ipairs(data) do
- prepare_data(item)
- t_insert(list,_subst(item))
+ for _, item in ipairs(data) do
+ prepare_data(item)
+ t_insert(list, _subst(item))
end
+
if data.tag then
- list = _M.elem(data.tag,list)
+ list = _M.elem(data.tag,list)
end
return list
+ end
end
---- get the first child with a given tag name.
+--- Return the first child with a given tag name (non-recursive).
-- @param tag the tag name
+-- @return the child Node found or `nil` if not found
function Doc:child_with_name(tag)
- for _, child in ipairs(self) do
- if child.tag == tag then return child; end
+ for _, child in ipairs(self) do
+ if child.tag == tag then
+ return child
end
+ end
end
-local _children_with_name
-function _children_with_name(self,tag,list,recurse)
- for _, child in ipairs(self) do if type(child) == 'table' then
- if child.tag == tag then t_insert(list,child) end
- if recurse then _children_with_name(child,tag,list,recurse) end
- end end
-end
---- get all elements in a document that have a given tag.
--- @param tag a tag name
--- @param dont_recurse optionally only return the immediate children with this tag name
--- @return a list of elements
-function Doc:get_elements_with_name(tag,dont_recurse)
+do
+ -- @param self document node to traverse
+ -- @param tag tag-name to look for
+ -- @param list array table to add the matching ones to
+ -- @param recurse if truthy, recursivly search the node
+ local function _children_with_name(self, tag, list, recurse)
+ -- TODO: protect against recursion
+ for _, child in ipairs(self) do
+ if type(child) == 'table' then
+ if child.tag == tag then
+ t_insert(list, child)
+ end
+ if recurse then
+ _children_with_name(child, tag, list, recurse)
+ end
+ end
+ end
+ end
+
+ --- Returns all elements in a document that have a given tag.
+ -- @tparam string tag a tag name
+ -- @tparam[opt=false] boolean dont_recurse optionally only return the immediate children with this tag name
+ -- @return a list of elements found, list will be empty if none was found.
+ function Doc:get_elements_with_name(tag, dont_recurse)
local res = {}
- _children_with_name(self,tag,res,not dont_recurse)
+ _children_with_name(self, tag, res, not dont_recurse)
return res
+ end
end
--- iterate over all children of a document node, including text nodes.
+
+
+--- Iterator over all children of a document node, including text nodes.
+-- This function is not recursive, so returns only direct child nodes.
+-- @return iterator that returns a single Node per iteration.
function Doc:children()
- local i = 0;
- return function (a)
- i = i + 1
- return a[i];
- end, self, i;
+ local i = 0;
+ return function (a)
+ i = i + 1
+ return a[i];
+ end, self, i;
end
--- return the first child element of a node, if it exists.
+
+--- Return the first child element of a node, if it exists.
+-- This will skip text nodes.
+-- @return first child Node or `nil` if there is none.
function Doc:first_childtag()
- if #self == 0 then return end
- for _,t in ipairs(self) do
- if type(t) == 'table' then return t end
+ if #self == 0 then
+ return
+ end
+ for _, t in ipairs(self) do
+ if is_tag(t) then
+ return t
end
+ end
end
+
+--- Iterator that matches tag names, and a namespace (non-recursive).
+-- @tparam[opt=nil] string tag tag names to return. Returns all tags if not provided.
+-- @tparam[opt=nil] string xmlns the namespace value ('xmlns' attribute) to return. If not
+-- provided will match all namespaces.
+-- @return iterator that returns a single Node per iteration.
function Doc:matching_tags(tag, xmlns)
- xmlns = xmlns or self.attr.xmlns;
- local tags = self;
- local start_i, max_i, v = 1, #tags;
- return function ()
- for i=start_i,max_i do
- v = tags[i];
- if (not tag or v.tag == tag)
- and (not xmlns or xmlns == v.attr.xmlns) then
- start_i = i+1;
- return v;
- end
- end
- end, tags, start_i;
+ -- TODO: this doesn't make sense??? namespaces are not "xmnls", as matched below
+ -- but "xmlns:name"... so should be a string-prefix match if anything...
+ xmlns = xmlns or self.attr.xmlns;
+ local tags = self
+ local next_i = 1
+ local max_i = #tags
+ local node
+ return function ()
+ for i = next_i, max_i do
+ node = tags[i];
+ if (not tag or node.tag == tag) and
+ (not xmlns or xmlns == node.attr.xmlns) then
+ next_i = i + 1
+ return node
+ end
+ end
+ end, tags, next_i
end
---- iterate over all child elements of a document node.
+
+--- Iterator over all child tags of a document node. This will skip over
+-- text nodes.
+-- @return iterator that returns a single Node per iteration.
function Doc:childtags()
- local i = 0;
- return function (a)
- local v
- repeat
- i = i + 1
- v = self[i]
- if v and type(v) == 'table' then return v; end
- until not v
- end, self[1], i;
-end
-
---- visit child element of a node and call a function, possibility modifying the document.
--- @param callback a function passed the node (text or element). If it returns nil, that node will be removed.
--- If it returns a value, that will replace the current node.
-function Doc:maptags(callback)
- local is_tag = _M.is_tag
- local i = 1;
- while i <= #self do
- if is_tag(self[i]) then
- local ret = callback(self[i]);
- if ret == nil then
- t_remove(self, i);
- else
- self[i] = ret;
- i = i + 1;
- end
- else
- i = i + 1
+ local i = 0;
+ return function (a)
+ local v
+ repeat
+ i = i + 1
+ v = self[i]
+ if v and type(v) == 'table' then
+ return v
end
+ until not v
+ end, self[1], i;
+end
+
+
+--- Visit child Nodes of a node and call a function, possibly modifying the document.
+-- Text elements will be skipped.
+-- This is not recursive, so only direct children will be passed.
+-- @tparam function callback a function with signature `function(node)`, passed the node.
+-- The element will be updated with the returned value, or deleted if it returns `nil`.
+function Doc:maptags(callback)
+ local i = 1;
+
+ while i <= #self do
+ if is_tag(self[i]) then
+ local ret = callback(self[i]);
+ if ret == nil then
+ -- remove it
+ t_remove(self, i);
+
+ else
+ -- update it
+ self[i] = ret;
+ i = i + 1;
+ end
+ else
+ i = i + 1
end
- return self;
+ end
+
+ return self;
end
-local xml_escape
+
do
- local escape_table = { ["'"] = "&apos;", ["\""] = "&quot;", ["<"] = "&lt;", [">"] = "&gt;", ["&"] = "&amp;" };
- function xml_escape(str) return (s_gsub(str, "['&<>\"]", escape_table)); end
- _M.xml_escape = xml_escape;
+ local escape_table = {
+ ["'"] = "&apos;",
+ ['"'] = "&quot;",
+ ["<"] = "&lt;",
+ [">"] = "&gt;",
+ ["&"] = "&amp;",
+ }
+
+ --- Escapes a string for safe use in xml.
+ -- Handles quotes(single+double), less-than, greater-than, and ampersand.
+ -- @tparam string str string value to escape
+ -- @return escaped string
+ -- @usage
+ -- local esc = xml.xml_escape([["'<>&]]) --> "&quot;&apos;&lt;&gt;&amp;"
+ function _M.xml_escape(str)
+ return (s_gsub(str, "['&<>\"]", escape_table))
+ end
end
+local xml_escape = _M.xml_escape
+
+do
+ local escape_table = {
+ quot = '"',
+ apos = "'",
+ lt = "<",
+ gt = ">",
+ amp = "&",
+ }
+
+ --- Unescapes a string from xml.
+ -- Handles quotes(single+double), less-than, greater-than, and ampersand.
+ -- @tparam string str string value to unescape
+ -- @return unescaped string
+ -- @usage
+ -- local unesc = xml.xml_escape("&quot;&apos;&lt;&gt;&amp;") --> [["'<>&]]
+ function _M.xml_unescape(str)
+ return (str:gsub( "&(%a+);", escape_table))
+ end
+end
+local xml_unescape = _M.xml_unescape
-- pretty printing
-- if indent, then put each new tag on its own line
-- if attr_indent, put each new attribute on its own line
-local function _dostring(t, buf, self, xml_escape, parentns, idn, indent, attr_indent)
- local nsid = 0;
- local tag = t.tag
- local lf,alf = ""," "
- if indent then lf = '\n'..idn end
- if attr_indent then alf = '\n'..idn..attr_indent end
- t_insert(buf, lf.."<"..tag);
- local function write_attr(k,v)
- if s_find(k, "\1", 1, true) then
- local ns, attrk = s_match(k, "^([^\1]*)\1?(.*)$");
- nsid = nsid + 1;
- t_insert(buf, " xmlns:ns"..nsid.."='"..xml_escape(ns).."' ".."ns"..nsid..":"..attrk.."='"..xml_escape(v).."'");
- elseif not(k == "xmlns" and v == parentns) then
- t_insert(buf, alf..k.."='"..xml_escape(v).."'");
- end
+local function _dostring(t, buf, parentns, block_indent, tag_indent, attr_indent)
+ local nsid = 0
+ local tag = t.tag
+
+ local lf = ""
+ if tag_indent then
+ lf = '\n'..block_indent
+ end
+
+ local alf = " "
+ if attr_indent then
+ alf = '\n'..block_indent..attr_indent
+ end
+
+ t_insert(buf, lf.."<"..tag)
+
+ local function write_attr(k,v)
+ if s_find(k, "\1", 1, true) then
+ nsid = nsid + 1
+ local ns, attrk = s_match(k, "^([^\1]*)\1?(.*)$")
+ t_insert(buf, " xmlns:ns"..nsid.."='"..xml_escape(ns).."' ".."ns"..nsid..":"..attrk.."='"..xml_escape(v).."'")
+
+ elseif not (k == "xmlns" and v == parentns) then
+ t_insert(buf, alf..k.."='"..xml_escape(v).."'");
end
- -- it's useful for testing to have predictable attribute ordering, if available
- if #t.attr > 0 then
- for _,k in ipairs(t.attr) do
- write_attr(k,t.attr[k])
- end
- else
- for k, v in pairs(t.attr) do
- write_attr(k,v)
- end
+ end
+
+ -- it's useful for testing to have predictable attribute ordering, if available
+ if #t.attr > 0 then
+ -- TODO: the key-value list is leading, what if they are not in-sync
+ for _,k in ipairs(t.attr) do
+ write_attr(k,t.attr[k])
+ end
+ else
+ for k, v in pairs(t.attr) do
+ write_attr(k,v)
+ end
+ end
+
+ local len = #t
+ local has_children
+
+ if len == 0 then
+ t_insert(buf, attr_indent and '\n'..block_indent.."/>" or "/>")
+
+ else
+ t_insert(buf, ">");
+
+ for n = 1, len do
+ local child = t[n]
+
+ if child.tag then
+ has_children = true
+ _dostring(child, buf, t.attr.xmlns, block_indent and block_indent..tag_indent, tag_indent, attr_indent)
+
+ else
+ -- text element
+ t_insert(buf, xml_escape(child))
+ end
end
- local len,has_children = #t;
- if len == 0 then
- local out = "/>"
- if attr_indent then out = '\n'..idn..out end
- t_insert(buf, out);
+
+ t_insert(buf, (has_children and lf or '').."</"..tag..">");
+ end
+end
+
+--- Function to pretty-print an XML document.
+-- @param doc an XML document
+-- @tparam[opt] string|int b_ind an initial block-indent (required when `t_ind` is set)
+-- @tparam[opt] string|int t_ind an tag-indent for each level (required when `a_ind` is set)
+-- @tparam[opt] string|int a_ind if given, indent each attribute pair and put on a separate line
+-- @tparam[opt] string|bool xml_preface force prefacing with default or custom <?xml...>, if truthy then `&lt;?xml version='1.0'?&gt;` will be used as default.
+-- @return a string representation
+-- @see Doc:tostring
+function _M.tostring(doc, b_ind, t_ind, a_ind, xml_preface)
+ local buf = {}
+
+ if type(b_ind) == "number" then b_ind = (" "):rep(b_ind) end
+ if type(t_ind) == "number" then t_ind = (" "):rep(t_ind) end
+ if type(a_ind) == "number" then a_ind = (" "):rep(a_ind) end
+
+ if xml_preface then
+ if type(xml_preface) == "string" then
+ buf[1] = xml_preface
else
- t_insert(buf, ">");
- for n=1,len do
- local child = t[n];
- if child.tag then
- self(child, buf, self, xml_escape, t.attr.xmlns,idn and idn..indent, indent, attr_indent );
- has_children = true
- else -- text element
- t_insert(buf, xml_escape(child));
- end
- end
- t_insert(buf, (has_children and lf or '').."</"..tag..">");
- end
-end
-
----- pretty-print an XML document
---- @param t an XML document
---- @param idn an initial indent (indents are all strings)
---- @param indent an indent for each level
---- @param attr_indent if given, indent each attribute pair and put on a separate line
---- @param xml force prefacing with default or custom <?xml...>
---- @return a string representation
-function _M.tostring(t,idn,indent, attr_indent, xml)
- local buf = {};
- if xml then
- if type(xml) == "string" then
- buf[1] = xml
- else
- buf[1] = "<?xml version='1.0'?>"
- end
+ buf[1] = "<?xml version='1.0'?>"
end
- _dostring(t, buf, _dostring, xml_escape, nil,idn,indent, attr_indent);
- return t_concat(buf);
+ end
+
+ _dostring(doc, buf, nil, b_ind, t_ind, a_ind, xml_preface)
+
+ return t_concat(buf)
end
+
Doc.__tostring = _M.tostring
---- get the full text value of an element
+
+--- Method to pretty-print an XML document.
+-- Invokes `xml.tostring`.
+-- @tparam[opt] string|int b_ind an initial indent (required when `t_ind` is set)
+-- @tparam[opt] string|int t_ind an indent for each level (required when `a_ind` is set)
+-- @tparam[opt] string|int a_ind if given, indent each attribute pair and put on a separate line
+-- @tparam[opt="&lt;?xml version='1.0'?&gt;"] string xml_preface force prefacing with default or custom <?xml...>
+-- @return a string representation
+-- @see xml.tostring
+function Doc:tostring(b_ind, t_ind, a_ind, xml_preface)
+ return _M.tostring(self, b_ind, t_ind, a_ind, xml_preface)
+end
+
+
+--- get the full text value of an element.
+-- @return a single string with all text elements concatenated
+-- @usage
+-- local doc = xml.new("main")
+-- doc:text("one")
+-- doc:add_child(xml.elem "two")
+-- doc:text("three")
+--
+-- local t = doc:get_text() --> "onethree"
function Doc:get_text()
- local res = {}
- for i,el in ipairs(self) do
- if is_text(el) then t_insert(res,el) end
- end
- return t_concat(res);
+ local res = {}
+ for i,el in ipairs(self) do
+ if is_text(el) then t_insert(res,el) end
+ end
+ return t_concat(res);
end
---- make a copy of a document
--- @param doc the original document
--- @param strsubst an optional function for handling string copying which could do substitution, etc.
-function _M.clone(doc, strsubst)
- local lookup_table = {};
- local function _copy(object,kind,parent)
- if type(object) ~= "table" then
- if strsubst and is_text(object) then return strsubst(object,kind,parent)
- else return object
- end
- elseif lookup_table[object] then
- return lookup_table[object]
- end
- local new_table = {};
- lookup_table[object] = new_table
- local tag = object.tag
- new_table.tag = _copy(tag,'*TAG',parent)
- if object.attr then
- local res = {}
- for attr,value in pairs(object.attr) do
- res[attr] = _copy(value,attr,object)
- end
- new_table.attr = res
- end
- for index = 1,#object do
- local v = _copy(object[index],'*TEXT',object)
- t_insert(new_table,v)
+
+do
+ local function _copy(object, kind, parent, strsubst, lookup_table)
+ if type(object) ~= "table" then
+ if strsubst and is_text(object) then
+ return strsubst(object, kind, parent)
+ else
+ return object
+ end
+ end
+
+ if lookup_table[object] then
+ error("recursion detected")
+ end
+ lookup_table[object] = true
+
+ local new_table = {}
+ lookup_table[object] = new_table
+
+ local tag = object.tag
+ new_table.tag = _copy(tag, '*TAG', parent, strsubst, lookup_table)
+
+ if object.attr then
+ local res = {}
+ for attr, value in pairs(object.attr) do
+ if type(attr) == "string" then
+ res[attr] = _copy(value, attr, object, strsubst, lookup_table)
end
- return setmetatable(new_table, getmetatable(object))
+ end
+ new_table.attr = res
+ end
+
+ for index = 1, #object do
+ local v = _copy(object[index], '*TEXT', object, strsubst, lookup_table)
+ t_insert(new_table,v)
end
- return _copy(doc)
+ return setmetatable(new_table, getmetatable(object))
+ end
+
+ --- Returns a copy of a document.
+ -- The `strsubst` parameter is a callback with signature `function(object, kind, parent)`.
+ --
+ -- Param `kind` has the following values, and parameters:
+ --
+ -- - `"*TAG"`: `object` is the tag-name, `parent` is the Node object. Returns the new tag name.
+ --
+ -- - `"*TEXT"`: `object` is the text-element, `parent` is the Node object. Returns the new text value.
+ --
+ -- - other strings not prefixed with `*`: `kind` is the attribute name, `object` is the
+ -- attribute value, `parent` is the Node object. Returns the new attribute value.
+ --
+ -- @tparam Node|string doc a Node object or string (text node)
+ -- @tparam[opt] function strsubst an optional function for handling string copying
+ -- which could do substitution, etc.
+ -- @return copy of the document
+ -- @see Doc:filter
+ function _M.clone(doc, strsubst)
+ return _copy(doc, nil, nil, strsubst, {})
+ end
end
+
+--- Returns a copy of a document.
+-- This is the method version of `xml.clone`.
+-- @see xml.clone
+-- @name Doc:filter
+-- @tparam[opt] function strsubst an optional function for handling string copying
Doc.filter = _M.clone -- also available as method
---- compare two documents.
--- @param t1 any value
--- @param t2 any value
-function _M.compare(t1,t2)
+do
+ local function _compare(t1, t2, recurse_check)
+
local ty1 = type(t1)
local ty2 = type(t2)
- if ty1 ~= ty2 then return false, 'type mismatch' end
+
+ if ty1 ~= ty2 then
+ return false, 'type mismatch'
+ end
+
if ty1 == 'string' then
- return t1 == t2 and true or 'text '..t1..' ~= text '..t2
+ if t1 == t2 then
+ return true
+ else
+ return false, 'text '..t1..' ~= text '..t2
+ end
+ end
+
+ if ty1 ~= 'table' or ty2 ~= 'table' then
+ return false, 'not a document'
end
- if ty1 ~= 'table' or ty2 ~= 'table' then return false, 'not a document' end
- if t1.tag ~= t2.tag then return false, 'tag '..t1.tag..' ~= tag '..t2.tag end
- if #t1 ~= #t2 then return false, 'size '..#t1..' ~= size '..#t2..' for tag '..t1.tag end
+
+ if recurse_check[t1] then
+ return false, "recursive document"
+ end
+ recurse_check[t1] = true
+
+ if t1.tag ~= t2.tag then
+ return false, 'tag '..t1.tag..' ~= tag '..t2.tag
+ end
+
+ if #t1 ~= #t2 then
+ return false, 'size '..#t1..' ~= size '..#t2..' for tag '..t1.tag
+ end
+
-- compare attributes
for k,v in pairs(t1.attr) do
- if t2.attr[k] ~= v then return false, 'mismatch attrib' end
+ local t2_value = t2.attr[k]
+ if type(k) == "string" then
+ if t2_value ~= v then return false, 'mismatch attrib' end
+ else
+ if t2_value ~= nil and t2_value ~= v then return false, "mismatch attrib order" end
+ end
end
for k,v in pairs(t2.attr) do
- if t1.attr[k] ~= v then return false, 'mismatch attrib' end
+ local t1_value = t1.attr[k]
+ if type(k) == "string" then
+ if t1_value ~= v then return false, 'mismatch attrib' end
+ else
+ if t1_value ~= nil and t1_value ~= v then return false, "mismatch attrib order" end
+ end
end
+
-- compare children
- for i = 1,#t1 do
- local yes,err = _M.compare(t1[i],t2[i])
- if not yes then return err end
+ for i = 1, #t1 do
+ local ok, err = _compare(t1[i], t2[i], recurse_check)
+ if not ok then
+ return ok, err
+ end
end
return true
+ end
+
+ --- Compare two documents or elements.
+ -- Equality is based on tag, child nodes (text and tags), attributes and order
+ -- of those (order only fails if both are given, and not equal).
+ -- @tparam Node|string t1 a Node object or string (text node)
+ -- @tparam Node|string t2 a Node object or string (text node)
+ -- @treturn boolean `true` when the Nodes are equal.
+ function _M.compare(t1,t2)
+ return _compare(t1, t2, {})
+ end
end
+
--- is this value a document element?
-- @param d any value
-function _M.is_tag(d)
- return type(d) == 'table' and is_text(d.tag)
-end
+-- @treturn boolean `true` if it is a `table` with property `tag` being a string value.
+-- @name is_tag
+_M.is_tag = is_tag
---- call the desired function recursively over the document.
--- @param doc the document
--- @param depth_first visit child notes first, then the current node
--- @param operation a function which will receive the current tag name and current node.
-function _M.walk (doc, depth_first, operation)
- if not depth_first then operation(doc.tag,doc) end
+
+do
+ local function _walk(doc, depth_first, operation, recurse_check)
+ if not depth_first then operation(doc.tag, doc) end
for _,d in ipairs(doc) do
- if _M.is_tag(d) then
- _M.walk(d,depth_first,operation)
- end
+ if is_tag(d) then
+ assert(not recurse_check[d], "recursion detected")
+ recurse_check[d] = true
+ _walk(d, depth_first, operation, recurse_check)
+ end
end
- if depth_first then operation(doc.tag,doc) end
+ if depth_first then operation(doc.tag, doc) end
+ end
+
+ --- Calls a function recursively over Nodes in the document.
+ -- Will only call on tags, it will skip text nodes.
+ -- The function signature for `operation` is `function(tag_name, Node)`.
+ -- @tparam Node|string doc a Node object or string (text node)
+ -- @tparam boolean depth_first visit child nodes first, then the current node
+ -- @tparam function operation a function which will receive the current tag name and current node.
+ function _M.walk(doc, depth_first, operation)
+ return _walk(doc, depth_first, operation, {})
+ end
end
+
local html_empty_elements = { --lists all HTML empty (void) elements
br = true,
img = true,
@@ -546,13 +911,10 @@ local html_empty_elements = { --lists all HTML empty (void) elements
embed = true,
}
-local escapes = { quot = "\"", apos = "'", lt = "<", gt = ">", amp = "&" }
-local function unescape(str) return (str:gsub( "&(%a+);", escapes)); end
-
--- Parse a well-formed HTML file as a string.
-- Tags are case-insenstive, DOCTYPE is ignored, and empty elements can be .. empty.
-- @param s the HTML
-function _M.parsehtml (s)
+function _M.parsehtml(s)
return _M.basic_parse(s,false,true)
end
@@ -560,9 +922,7 @@ end
-- @param s the XML document to be parsed.
-- @param all_text if true, preserves all whitespace. Otherwise only text containing non-whitespace is included.
-- @param html if true, uses relaxed HTML rules for parsing
-function _M.basic_parse(s,all_text,html)
- local t_insert,t_remove = table.insert,table.remove
- local s_find,s_sub = string.find,string.sub
+function _M.basic_parse(s, all_text, html)
local stack = {}
local top = {}
@@ -570,12 +930,12 @@ function _M.basic_parse(s,all_text,html)
local arg = {}
s:gsub("([%w:%-_]+)%s*=%s*([\"'])(.-)%2", function (w, _, a)
if html then w = w:lower() end
- arg[w] = unescape(a)
+ arg[w] = xml_unescape(a)
end)
if html then
s:gsub("([%w:%-_]+)%s*=%s*([^\"']+)%s*", function (w, a)
w = w:lower()
- arg[w] = unescape(a)
+ arg[w] = xml_unescape(a)
end)
end
return arg
@@ -610,7 +970,7 @@ function _M.basic_parse(s,all_text,html)
if html_empty_elements[label] then empty = "/" end
end
if all_text or not s_find(text, "^%s*$") then
- t_insert(top, unescape(text))
+ t_insert(top, xml_unescape(text))
end
if empty == "/" then -- empty element tag
t_insert(top, setmetatable({tag=label, attr=parseargs(xarg), empty=1},Doc))
@@ -633,7 +993,7 @@ function _M.basic_parse(s,all_text,html)
end
local text = s_sub(s, i)
if all_text or not s_find(text, "^%s*$") then
- t_insert(stack[#stack], unescape(text))
+ t_insert(stack[#stack], xml_unescape(text))
end
if #stack > 1 then
error("unclosed "..stack[#stack].tag)
@@ -642,145 +1002,151 @@ function _M.basic_parse(s,all_text,html)
return is_text(res[1]) and res[2] or res[1]
end
-local function empty(attr) return not attr or not next(attr) end
-local function is_element(d) return type(d) == 'table' and d.tag ~= nil end
+do
+ local match do
--- returns the key,value pair from a table if it has exactly one entry
-local function has_one_element(t)
- local key,value = next(t)
- if next(t,key) ~= nil then return false end
- return key,value
-end
+ local function empty(attr) return not attr or not next(attr) end
-local function append_capture(res,tbl)
- if not empty(tbl) then -- no point in capturing empty tables...
- local key
- if tbl._ then -- if $_ was set then it is meant as the top-level key for the captured table
- key = tbl._
- tbl._ = nil
- if empty(tbl) then return end
- end
- -- a table with only one pair {[0]=value} shall be reduced to that value
- local numkey,val = has_one_element(tbl)
- if numkey == 0 then tbl = val end
- if key then
- res[key] = tbl
- else -- otherwise, we append the captured table
- t_insert(res,tbl)
- end
+ local append_capture do
+ -- returns the key,value pair from a table if it has exactly one entry
+ local function has_one_element(t)
+ local key,value = next(t)
+ if next(t,key) ~= nil then return false end
+ return key,value
+ end
+
+ function append_capture(res,tbl)
+ if not empty(tbl) then -- no point in capturing empty tables...
+ local key
+ if tbl._ then -- if $_ was set then it is meant as the top-level key for the captured table
+ key = tbl._
+ tbl._ = nil
+ if empty(tbl) then return end
+ end
+ -- a table with only one pair {[0]=value} shall be reduced to that value
+ local numkey,val = has_one_element(tbl)
+ if numkey == 0 then tbl = val end
+ if key then
+ res[key] = tbl
+ else -- otherwise, we append the captured table
+ t_insert(res,tbl)
+ end
+ end
+ end
end
-end
-local function make_number(pat)
- if pat:find '^%d+$' then -- $1 etc means use this as an array location
- pat = tonumber(pat)
+ local function make_number(pat)
+ if pat:find '^%d+$' then -- $1 etc means use this as an array location
+ pat = tonumber(pat)
+ end
+ return pat
end
- return pat
-end
-local function capture_attrib(res,pat,value)
- pat = make_number(pat:sub(2))
- res[pat] = value
- return true
-end
+ local function capture_attrib(res,pat,value)
+ pat = make_number(pat:sub(2))
+ res[pat] = value
+ return true
+ end
-local match
-function match(d,pat,res,keep_going)
- local ret = true
- if d == nil then d = '' end --return false end
- -- attribute string matching is straight equality, except if the pattern is a $ capture,
- -- which always succeeds.
- if is_text(d) then
- if not is_text(pat) then return false end
- if _M.debug then print(d,pat) end
- if pat:find '^%$' then
- return capture_attrib(res,pat,d)
+ function match(d,pat,res,keep_going)
+ local ret = true
+ if d == nil then d = '' end --return false end
+ -- attribute string matching is straight equality, except if the pattern is a $ capture,
+ -- which always succeeds.
+ if is_text(d) then
+ if not is_text(pat) then return false end
+ if _M.debug then print(d,pat) end
+ if pat:find '^%$' then
+ return capture_attrib(res,pat,d)
+ else
+ return d == pat
+ end
else
- return d == pat
- end
- else
- if _M.debug then print(d.tag,pat.tag) end
- -- this is an element node. For a match to succeed, the attributes must
- -- match as well.
- -- a tagname in the pattern ending with '-' is a wildcard and matches like an attribute
- local tagpat = pat.tag:match '^(.-)%-$'
- if tagpat then
- tagpat = make_number(tagpat)
- res[tagpat] = d.tag
- end
- if d.tag == pat.tag or tagpat then
-
- if not empty(pat.attr) then
- if empty(d.attr) then ret = false
- else
- for prop,pval in pairs(pat.attr) do
- local dval = d.attr[prop]
- if not match(dval,pval,res) then ret = false; break end
- end
- end
+ if _M.debug then print(d.tag,pat.tag) end
+ -- this is an element node. For a match to succeed, the attributes must
+ -- match as well.
+ -- a tagname in the pattern ending with '-' is a wildcard and matches like an attribute
+ local tagpat = pat.tag:match '^(.-)%-$'
+ if tagpat then
+ tagpat = make_number(tagpat)
+ res[tagpat] = d.tag
end
- -- the pattern may have child nodes. We match partially, so that {P1,P2} shall match {X,P1,X,X,P2,..}
- if ret and #pat > 0 then
- local i,j = 1,1
- local function next_elem()
- j = j + 1 -- next child element of data
- if is_text(d[j]) then j = j + 1 end
- return j <= #d
- end
- repeat
- local p = pat[i]
- -- repeated {{<...>}} patterns shall match one or more elements
- -- so e.g. {P+} will match {X,X,P,P,X,P,X,X,X}
- if is_element(p) and p.repeated then
- local found
- repeat
- local tbl = {}
- ret = match(d[j],p,tbl,false)
- if ret then
- found = false --true
- append_capture(res,tbl)
- end
- until not next_elem() or (found and not ret)
- i = i + 1
+ if d.tag == pat.tag or tagpat then
+
+ if not empty(pat.attr) then
+ if empty(d.attr) then ret = false
else
- ret = match(d[j],p,res,false)
- if ret then i = i + 1 end
+ for prop,pval in pairs(pat.attr) do
+ local dval = d.attr[prop]
+ if not match(dval,pval,res) then ret = false; break end
+ end
+ end
+ end
+ -- the pattern may have child nodes. We match partially, so that {P1,P2} shall match {X,P1,X,X,P2,..}
+ if ret and #pat > 0 then
+ local i,j = 1,1
+ local function next_elem()
+ j = j + 1 -- next child element of data
+ if is_text(d[j]) then j = j + 1 end
+ return j <= #d
end
- until not next_elem() or i > #pat -- run out of elements or patterns to match
- -- if every element in our pattern matched ok, then it's been a successful match
- if i > #pat then return true end
+ repeat
+ local p = pat[i]
+ -- repeated {{<...>}} patterns shall match one or more elements
+ -- so e.g. {P+} will match {X,X,P,P,X,P,X,X,X}
+ if is_tag(p) and p.repeated then
+ local found
+ repeat
+ local tbl = {}
+ ret = match(d[j],p,tbl,false)
+ if ret then
+ found = false --true
+ append_capture(res,tbl)
+ end
+ until not next_elem() or (found and not ret)
+ i = i + 1
+ else
+ ret = match(d[j],p,res,false)
+ if ret then i = i + 1 end
+ end
+ until not next_elem() or i > #pat -- run out of elements or patterns to match
+ -- if every element in our pattern matched ok, then it's been a successful match
+ if i > #pat then return true end
+ end
+ if ret then return true end
+ else
+ ret = false
end
- if ret then return true end
- else
- ret = false
- end
- -- keep going anyway - look at the children!
- if keep_going then
- for child in d:childtags() do
- ret = match(child,pat,res,keep_going)
- if ret then break end
+ -- keep going anyway - look at the children!
+ if keep_going then
+ for child in d:childtags() do
+ ret = match(child,pat,res,keep_going)
+ if ret then break end
+ end
end
end
+ return ret
end
- return ret
-end
+ end
-function Doc:match(pat)
- local err
- pat,err = template_cache(pat)
- if not pat then return nil, err end
- _M.walk(pat,false,function(_,d)
- if is_text(d[1]) and is_element(d[2]) and is_text(d[3]) and
- d[1]:find '%s*{{' and d[3]:find '}}%s*' then
- t_remove(d,1)
- t_remove(d,2)
- d[1].repeated = true
- end
- end)
+ --- does something...
+ function Doc:match(pat)
+ local err
+ pat,err = template_cache(pat)
+ if not pat then return nil, err end
+ _M.walk(pat,false,function(_,d)
+ if is_text(d[1]) and is_tag(d[2]) and is_text(d[3]) and
+ d[1]:find '%s*{{' and d[3]:find '}}%s*' then
+ t_remove(d,1)
+ t_remove(d,2)
+ d[1].repeated = true
+ end
+ end)
- local res = {}
- local ret = match(self,pat,res,true)
- return res,ret
+ local res = {}
+ local ret = match(self,pat,res,true)
+ return res,ret
+ end
end