Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stevedonovan/Penlight.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThijs Schreijer <thijs@thijsschreijer.nl>2022-01-05 21:14:01 +0300
committerThijs Schreijer <thijs@thijsschreijer.nl>2022-01-10 13:59:11 +0300
commit6031edda456663f6e61fa80b1b56885a818f1202 (patch)
tree3bfd018cf8270a240f3dea2fdd53f049ed774180
parentfef9a3492c8766c2c53fe4a5544f29f1a7e9a339 (diff)
feat(stringx) add the functions from `pl.text`
this is a step towards deprecating `pl.text`
-rw-r--r--lua/pl/stringx.lua318
-rw-r--r--spec/stringx_spec.lua311
2 files changed, 626 insertions, 3 deletions
diff --git a/lua/pl/stringx.lua b/lua/pl/stringx.lua
index 8ba6763..75c61c7 100644
--- a/lua/pl/stringx.lua
+++ b/lua/pl/stringx.lua
@@ -6,9 +6,10 @@
--
-- See @{03-strings.md|the Guide}
--
--- Dependencies: `pl.utils`
+-- Dependencies: `pl.utils`, `pl.types`
-- @module pl.stringx
local utils = require 'pl.utils'
+local is_callable = require 'pl.types'.is_callable
local string = string
local find = string.find
local type,setmetatable,ipairs = type,setmetatable,ipairs
@@ -19,10 +20,13 @@ local sub = string.sub
local reverse = string.reverse
local concat = table.concat
local append = table.insert
+local remove = table.remove
local escape = utils.escape
local ceil, max = math.ceil, math.max
local assert_arg,usplit = utils.assert_arg,utils.split
local lstrip
+local unpack = utils.unpack
+local pack = utils.pack
local function assert_string (n,s)
assert_arg(n,s,'string')
@@ -420,8 +424,8 @@ function stringx.strip(s,chrs)
return _strip(s,true,true,chrs)
end
---- Partioning Strings
--- @section partioning
+--- Partitioning Strings
+-- @section partitioning
--- split a string using a pattern. Note that at least one value will be returned!
-- @string s the string
@@ -488,6 +492,250 @@ function stringx.at(s,idx)
return sub(s,idx,idx)
end
+
+--- Text handling
+-- @section text
+
+
+--- indent a multiline string.
+-- @tparam string s the (multiline) string
+-- @tparam integer n the size of the indent
+-- @tparam[opt=' '] string ch the character to use when indenting
+-- @return indented string
+function stringx.indent (s,n,ch)
+ assert_arg(1,s,'string')
+ assert_arg(2,n,'number')
+ local lines = usplit(s ,'\n')
+ local prefix = string.rep(ch or ' ',n)
+ for i, line in ipairs(lines) do
+ lines[i] = prefix..line
+ end
+ return concat(lines,'\n')..'\n'
+end
+
+
+--- dedent a multiline string by removing any initial indent.
+-- useful when working with [[..]] strings.
+-- Empty lines are ignored.
+-- @tparam string s the (multiline) string
+-- @return a string with initial indent zero.
+-- @usage
+-- local s = dedent [[
+-- One
+--
+-- Two
+--
+-- Three
+-- ]]
+-- assert(s == [[
+-- One
+--
+-- Two
+--
+-- Three
+-- ]])
+function stringx.dedent (s)
+ assert_arg(1,s,'string')
+ local lst = usplit(s,'\n')
+ if #lst>0 then
+ local ind_size = math.huge
+ for i, line in ipairs(lst) do
+ local i1, i2 = lst[i]:find('^%s*[^%s]')
+ if i1 and i2 < ind_size then
+ ind_size = i2
+ end
+ end
+ for i, line in ipairs(lst) do
+ lst[i] = lst[i]:sub(ind_size, -1)
+ end
+ end
+ return concat(lst,'\n')..'\n'
+end
+
+
+
+do
+ local buildline = function(words, size, breaklong)
+ -- if overflow is set, a word longer than size, will overflow the size
+ -- otherwise it will be chopped in line-length pieces
+ local line = {}
+ if #words[1] > size then
+ -- word longer than line
+ if not breaklong then
+ line[1] = words[1]
+ remove(words, 1)
+ else
+ line[1] = words[1]:sub(1, size)
+ words[1] = words[1]:sub(size + 1, -1)
+ end
+ else
+ local len = 0
+ while words[1] and (len + #words[1] <= size) or
+ (len == 0 and #words[1] == size) do
+ if words[1] ~= "" then
+ line[#line+1] = words[1]
+ len = len + #words[1] + 1
+ end
+ remove(words, 1)
+ end
+ end
+ return stringx.strip(concat(line, " ")), words
+ end
+
+ --- format a paragraph into lines so that they fit into a line width.
+ -- It will not break long words by default, so lines can be over the length
+ -- to that extent.
+ -- @tparam string s the string to format
+ -- @tparam[opt=70] integer width the margin width
+ -- @tparam[opt=false] boolean breaklong if truthy, words longer than the width given will be forced split.
+ -- @return a list of lines (List object), use `fill` to return a string instead of a `List`.
+ -- @see pl.List
+ -- @see fill
+ stringx.wrap = function(s, width, breaklong)
+ s = s:gsub('\n',' ') -- remove line breaks
+ s = stringx.strip(s) -- remove leading/trailing whitespace
+ if s == "" then
+ return { "" }
+ end
+ width = width or 70
+ local out = {}
+ local words = usplit(s, "%s")
+ while words[1] do
+ out[#out+1], words = buildline(words, width, breaklong)
+ end
+ return makelist(out)
+ end
+end
+
+--- format a paragraph so that it fits into a line width.
+-- @tparam string s the string to format
+-- @tparam[opt=70] integer width the margin width
+-- @tparam[opt=false] boolean breaklong if truthy, words longer than the width given will be forced split.
+-- @return a string, use `wrap` to return a list of lines instead of a string.
+-- @see wrap
+function stringx.fill (s,width,breaklong)
+ return concat(stringx.wrap(s,width,breaklong),'\n') .. '\n'
+end
+
+--- Template
+-- @section Template
+
+
+local function _substitute(s,tbl,safe)
+ local subst
+ if is_callable(tbl) then
+ subst = tbl
+ else
+ function subst(f)
+ local s = tbl[f]
+ if not s then
+ if safe then
+ return f
+ else
+ error("not present in table "..f)
+ end
+ else
+ return s
+ end
+ end
+ end
+ local res = gsub(s,'%${([%w_]+)}',subst)
+ return (gsub(res,'%$([%w_]+)',subst))
+end
+
+
+
+local Template = {}
+stringx.Template = Template
+Template.__index = Template
+setmetatable(Template, {
+ __call = function(obj,tmpl)
+ return Template.new(tmpl)
+ end
+})
+
+--- Creates a new Template class.
+-- This is a shortcut to `Template.new(tmpl)`.
+-- @tparam string tmpl the template string
+-- @function Template
+-- @treturn Template
+function Template.new(tmpl)
+ assert_arg(1,tmpl,'string')
+ local res = {}
+ res.tmpl = tmpl
+ setmetatable(res,Template)
+ return res
+end
+
+--- substitute values into a template, throwing an error.
+-- This will throw an error if no name is found.
+-- @tparam table tbl a table of name-value pairs.
+-- @return string with place holders substituted
+function Template:substitute(tbl)
+ assert_arg(1,tbl,'table')
+ return _substitute(self.tmpl,tbl,false)
+end
+
+--- substitute values into a template.
+-- This version just passes unknown names through.
+-- @tparam table tbl a table of name-value pairs.
+-- @return string with place holders substituted
+function Template:safe_substitute(tbl)
+ assert_arg(1,tbl,'table')
+ return _substitute(self.tmpl,tbl,true)
+end
+
+--- substitute values into a template, preserving indentation. <br>
+-- If the value is a multiline string _or_ a template, it will insert
+-- the lines at the correct indentation. <br>
+-- Furthermore, if a template, then that template will be substituted
+-- using the same table.
+-- @tparam table tbl a table of name-value pairs.
+-- @return string with place holders substituted
+function Template:indent_substitute(tbl)
+ assert_arg(1,tbl,'table')
+ if not self.strings then
+ self.strings = usplit(self.tmpl,'\n')
+ end
+
+ -- the idea is to substitute line by line, grabbing any spaces as
+ -- well as the $var. If the value to be substituted contains newlines,
+ -- then we split that into lines and adjust the indent before inserting.
+ local function subst(line)
+ return line:gsub('(%s*)%$([%w_]+)',function(sp,f)
+ local subtmpl
+ local s = tbl[f]
+ if not s then error("not present in table "..f) end
+ if getmetatable(s) == Template then
+ subtmpl = s
+ s = s.tmpl
+ else
+ s = tostring(s)
+ end
+ if s:find '\n' then
+ local lines = usplit(s, '\n')
+ for i, line in ipairs(lines) do
+ lines[i] = sp..line
+ end
+ s = concat(lines, '\n') .. '\n'
+ end
+ if subtmpl then
+ return _substitute(s, tbl)
+ else
+ return s
+ end
+ end)
+ end
+
+ local lines = {}
+ for i, line in ipairs(self.strings) do
+ lines[i] = subst(line)
+ end
+ return concat(lines,'\n')..'\n'
+end
+
+
+
--- Miscelaneous
-- @section misc
@@ -598,6 +846,70 @@ do
end
end
+
+--- Python-style formatting operator.
+-- Calling `text.format_operator()` overloads the % operator for strings to give
+-- Python/Ruby style formated output.
+-- This is extended to also do template-like substitution for map-like data.
+--
+-- Note this goes further than the original, and will allow these cases:
+--
+-- 1. a single value
+-- 2. a list of values
+-- 3. a map of var=value pairs
+-- 4. a function, as in gsub
+--
+-- For the second two cases, it uses $-variable substituion.
+--
+-- When called, this function will monkey-patch the global `string` metatable by
+-- adding a `__mod` method.
+--
+-- See <a href="http://lua-users.org/wiki/StringInterpolation">the lua-users wiki</a>
+--
+-- @usage
+-- require 'pl.text'.format_operator()
+-- local out1 = '%s = %5.3f' % {'PI',math.pi} --> 'PI = 3.142'
+-- local out2 = '$name = $value' % {name='dog',value='Pluto'} --> 'dog = Pluto'
+function stringx.format_operator()
+
+ local format = string.format
+
+ -- a more forgiving version of string.format, which applies
+ -- tostring() to any value with a %s format.
+ local function formatx (fmt,...)
+ local args = pack(...)
+ local i = 1
+ for p in fmt:gmatch('%%.') do
+ if p == '%s' and type(args[i]) ~= 'string' then
+ args[i] = tostring(args[i])
+ end
+ i = i + 1
+ end
+ return format(fmt,unpack(args))
+ end
+
+ local function basic_subst(s,t)
+ return (s:gsub('%$([%w_]+)',t))
+ end
+
+ getmetatable("").__mod = function(a, b)
+ if b == nil then
+ return a
+ elseif type(b) == "table" and getmetatable(b) == nil then
+ if #b == 0 then -- assume a map-like table
+ return _substitute(a,b,true)
+ else
+ return formatx(a,unpack(b))
+ end
+ elseif type(b) == 'function' then
+ return basic_subst(a,b)
+ else
+ return formatx(a,b)
+ end
+ end
+end
+
+--- import the stringx functions into the global string (meta)table
function stringx.import()
utils.import(stringx,string)
end
diff --git a/spec/stringx_spec.lua b/spec/stringx_spec.lua
index acd800c..1738bde 100644
--- a/spec/stringx_spec.lua
+++ b/spec/stringx_spec.lua
@@ -312,6 +312,273 @@ describe("stringx", function()
end)
+
+ describe("indent()", function()
+
+ it("adds an indent", function()
+ local t = "a whole lot\nof love"
+
+ assert.equal([[
+ a whole lot
+ of love
+]], stringx.indent(t, 4))
+
+ assert.equal([[
+**easy
+**
+**enough!
+]], stringx.indent("easy\n\nenough!", 2 ,'*'))
+ end)
+
+ it("appends a newline if not present", function()
+ assert.equal(" hello\n world\n", stringx.indent("hello\nworld", 2))
+ assert.equal(" hello\n world\n", stringx.indent("hello\nworld\n", 2))
+ end)
+
+ end)
+
+
+
+ describe("dedent()", function()
+
+ it("removes prefixed whitespace", function()
+ assert.equal([[
+one
+two
+three
+]], stringx.dedent [[
+ one
+ two
+ three
+]])
+ end)
+
+ it("removes prefixed whitespace, retains structure", function()
+ assert.equal([[
+ one
+
+ two
+
+three
+]], stringx.dedent [[
+ one
+
+ two
+
+ three
+]])
+ end)
+
+ it("appends a newline if not present", function()
+ assert.equal("hello\nworld\n", stringx.dedent(" hello\n world"))
+ assert.equal("hello\nworld\n", stringx.dedent(" hello\n world\n"))
+ end)
+
+ end)
+
+
+
+
+ describe("fill()/wrap()", function()
+
+ it("wraps width over limit", function()
+ assert.same({
+ "abc",
+ "def"
+ }, stringx.wrap("abc def", 2))
+ end)
+
+ it("wraps width at limit", function()
+ assert.same({
+ "abc",
+ "def"
+ }, stringx.wrap("abc def", 3))
+ assert.same({
+ "a c",
+ "d f"
+ }, stringx.wrap("a c d f", 3))
+ end)
+
+ it("wraps single letters", function()
+ assert.same({"a"}, stringx.wrap("a"))
+ end)
+
+ it("wraps empty strings", function()
+ assert.same({""}, stringx.wrap(""))
+ assert.same({""}, stringx.wrap(" "))
+ end)
+
+ it("handles leading/trailing whitespace", function()
+ assert.same({"hello"}, stringx.wrap(" hello ", 10))
+ assert.same({"hello"}, stringx.wrap(" hello ", 2))
+ assert.same({"he", "ll", "o"}, stringx.wrap(" hello ", 2, true))
+ end)
+
+ it("handles line-breaks", function()
+ assert.same({"Hello", "Dolly"}, stringx.wrap("Hello\nDolly", 10))
+ assert.same({"Hello Dolly"}, stringx.wrap("Hello\nDolly", 20))
+ end)
+
+ it("doesn't split on accented characters", function()
+ assert.same({"àbcdéfghîj"}, stringx.wrap("àbcdéfghîj"))
+ end)
+
+ it("word-wraps a text", function()
+ -- local binstring = require("luassert.formatters.binarystring")
+ -- assert:add_formatter(binstring)
+ assert.equal([[
+It is often said of
+Lua that it does not
+include batteries.
+That is because the
+goal of Lua is to
+produce a lean
+expressive language
+that will be used on
+all sorts of
+machines, (some of
+which don't even
+have hierarchical
+filesystems). The
+Lua language is the
+equivalent of an
+operating system
+kernel; the creators
+of Lua do not see it
+as their
+responsibility to
+create a full
+software ecosystem
+around the language.
+That is the role of
+the community.
+]], stringx.fill("It is often said of Lua that it does not include batteries. That is because the goal of Lua is to produce a lean expressive language that will be used on all sorts of machines, (some of which don't even have hierarchical filesystems). The Lua language is the equivalent of an operating system kernel; the creators of Lua do not see it as their responsibility to create a full software ecosystem around the language. That is the role of the community.", 20))
+ end)
+
+
+ it("generic wrap test", function()
+ local t = [[
+hello "world" 'this' -is- a bb ccc dddd test... but wouldn't it pass??? final. word-that-can-be-broken
+]]
+
+ assert.same({
+ "hello",
+ '"world"',
+ "'this'",
+ "-is-",
+ "a",
+ "bb",
+ "ccc",
+ "dddd",
+ "test...",
+ "but",
+ "wouldn't",
+ "it",
+ "pass???",
+ "final.",
+ "word-that-can-be-broken",
+ }, stringx.wrap(t, 3))
+ end)
+
+ it("generic wrap test, with overflow breaking", function()
+ local t = [[
+hello "world" 'this' -is- a bb ccc dddd test... but wouldn't it pass??? final. word-that-can-be-broken
+]]
+
+ assert.same({
+ "hel",
+ "lo",
+ '"wo',
+ 'rld',
+ '"',
+ "'th",
+ "is'",
+ "-is",
+ "- a",
+ "bb",
+ "ccc",
+ "ddd",
+ "d",
+ "tes",
+ "t..",
+ ".",
+ "but",
+ "wou",
+ "ldn",
+ "'t",
+ "it",
+ "pas",
+ "s??",
+ "?",
+ "fin",
+ "al.",
+ "wor",
+ "d-t",
+ "hat",
+ "-ca",
+ "n-b",
+ "e-b",
+ "rok",
+ "en",
+ }, stringx.wrap(t, 3, true))
+ end)
+
+ end)
+
+
+
+ describe("Template", function()
+
+ local Template = stringx.Template
+
+
+ it("substitute() replaces placeholders", function()
+ local t1 = Template [[
+while true do
+ $contents
+end
+]]
+
+ assert.equal([[
+while true do
+ print "hello"
+end
+]], t1:substitute {contents = 'print "hello"'})
+ end)
+
+
+ it("substitute() replaces multiple placeholders", function ()
+ local template = Template("${here} is the $answer")
+ local out = template:substitute({ here = 'one', answer = 'two' })
+ assert.is.equal('one is the two', out)
+ end)
+
+
+ it("indent_substitute() indents replaced multi-lines", function()
+ local t1 = Template [[
+while true do
+ $contents
+end
+]]
+
+ assert.equal(
+"while true do\n"..
+" for i = 1,10 do\n"..
+" gotcha(i)\n"..
+" end\n"..
+"\n"..
+"end\n"
+, t1:indent_substitute {contents = [[
+for i = 1,10 do
+ gotcha(i)
+end
+]]})
+ end)
+
+ end)
+
+
+
it("lines()", function()
local function merge(it, ...)
assert(select('#', ...) == 0)
@@ -425,5 +692,49 @@ describe("stringx", function()
assert_str_round_trip( '""')
end)
+
+
+ describe("format_operator()", function()
+
+ setup(function()
+ stringx.format_operator()
+ end)
+
+
+ it("handles plain substitutions", function()
+ assert.equal('[home]', '[%s]' % 'home')
+ assert.equal('fred = 42', '%s = %d' % {'fred',42})
+ end)
+
+
+ it("invokes tostring on %s formats", function()
+ -- mostly works like string.format, except that %s forces use of tostring()
+ -- rather than throwing an error
+ local List = require 'pl.List'
+ assert.equal('TBL:{1,2,3}', 'TBL:%s' % List{1,2,3})
+ end)
+
+
+ it("replaces '$field' references", function()
+ -- table with keys and format with $
+ assert.equal('<1>', '<$one>' % {one=1})
+ end)
+
+
+ it("accepts replacement functions", function()
+ local function subst(k)
+ if k == 'A' then
+ return 'ay'
+ elseif k == 'B' then
+ return 'bee'
+ else
+ return '?'
+ end
+ end
+ assert.equal('ay & bee', '$A & $B' % subst)
+ end)
+
+ end)
+
end)