Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stevedonovan/Penlight.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThijs Schreijer <thijs@thijsschreijer.nl>2022-01-04 11:48:43 +0300
committerThijs Schreijer <thijs@thijsschreijer.nl>2022-01-05 16:23:14 +0300
commit838a27aa13469f9173c74e0ad5e3a16c1469f3e4 (patch)
treece5aefc891a7bff0f331750088caa73bafaff6db
parentb08b18afa129c3c0f924fb139568d09c67f93db7 (diff)
feat(text) wrap/fill can break long words, honours width
-rw-r--r--CHANGELOG.md6
-rw-r--r--lua/pl/text.lua81
-rw-r--r--spec/text_spec.lua136
3 files changed, 166 insertions, 57 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 152c6c9..fe6d7ed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,12 @@ see [CONTRIBUTING.md](CONTRIBUTING.md#release-instructions-for-a-new-version) fo
also other functions now take a range. [#404](https://github.com/lunarmodules/Penlight/pull/404)
- fix: `lapp` enums allow [patterns magic characters](https://www.lua.org/pil/20.2.html)
[#393](https://github.com/lunarmodules/Penlight/pull/393)
+ - fix: `text.wrap` and `text.fill` numerous fixes for handling whitespace,
+ accented characters, honouring width, etc.
+ [#400](https://github.com/lunarmodules/Penlight/pull/400)
+ - feat: `text.wrap` and `text.fill` have a new parameter to forcefully break words
+ longer than the width given.
+ [#400](https://github.com/lunarmodules/Penlight/pull/400)
## 1.11.0 (2021-08-18)
diff --git a/lua/pl/text.lua b/lua/pl/text.lua
index 92ac62b..eaf1cee 100644
--- a/lua/pl/text.lua
+++ b/lua/pl/text.lua
@@ -10,7 +10,7 @@
-- @module pl.text
local gsub = string.gsub
-local concat,append = table.concat,table.insert
+local concat, t_remove = table.concat, table.remove
local utils = require 'pl.utils'
local bind1,usplit,assert_arg = utils.bind1,utils.split,utils.assert_arg
local is_callable = require 'pl.types'.is_callable
@@ -88,39 +88,68 @@ function text.dedent (s)
return concat(lst,'\n')..'\n'
end
---- format a paragraph into lines so that they fit into a line width.
--- It will not break long words, so lines can be over the length
--- to that extent.
--- @tparam string s the string to format
--- @tparam[opt=70] integer width the margin width
--- @return a list of lines (List object), use `fill` to return a string instead of a `List`.
--- @see pl.List
--- @see fill
-function text.wrap (s,width)
- assert_arg(1,s,'string')
- width = width or 70
- s = s:gsub('\n',' ')
- local i,nxt = 1
- local lines,line = {}
- repeat
- nxt = i+width
- if s:find("%S",nxt) then -- inside a word
- nxt = s:find('%s',nxt) -- so find word boundary
+
+do
+ local buildline = function(words, size, breaklong)
+ -- if overflow is set, a word longer than size, will overflow the size
+ -- otherwise it will be chopped in line-length pieces
+ local line = {}
+ if #words[1] > size then
+ -- word longer than line
+ if not breaklong then
+ line[1] = words[1]
+ t_remove(words, 1)
+ else
+ line[1] = words[1]:sub(1, size)
+ words[1] = words[1]:sub(size + 1, -1)
+ end
+ else
+ local len = 0
+ while words[1] and (len + #words[1] <= size) or
+ (len == 0 and #words[1] == size) do
+ if words[1] ~= "" then
+ line[#line+1] = words[1]
+ len = len + #words[1] + 1
end
- line = s:sub(i,nxt)
- i = i + #line
- append(lines,strip(line))
- until i > (#s - 1)
- return makelist(lines)
+ t_remove(words, 1)
+ end
+ end
+ return strip(concat(line, " ")), words
+ end
+
+ --- format a paragraph into lines so that they fit into a line width.
+ -- It will not break long words by default, so lines can be over the length
+ -- to that extent.
+ -- @tparam string s the string to format
+ -- @tparam[opt=70] integer width the margin width
+ -- @tparam[opt=false] boolean breaklong if truthy, words longer than the width given will be forced split.
+ -- @return a list of lines (List object), use `fill` to return a string instead of a `List`.
+ -- @see pl.List
+ -- @see fill
+ text.wrap = function(s, width, breaklong)
+ s = s:gsub('\n',' ') -- remove line breaks
+ s = strip(s) -- remove leading/trailing whitespace
+ if s == "" then
+ return { "" }
+ end
+ width = width or 70
+ local out = {}
+ local words = split(s, "%s")
+ while words[1] do
+ out[#out+1], words = buildline(words, width, breaklong)
+ end
+ return makelist(out)
+ end
end
--- format a paragraph so that it fits into a line width.
-- @tparam string s the string to format
-- @tparam[opt=70] integer width the margin width
+-- @tparam[opt=false] boolean breaklong if truthy, words longer than the width given will be forced split.
-- @return a string, use `wrap` to return a list of lines instead of a string.
-- @see wrap
-function text.fill (s,width)
- return concat(text.wrap(s,width),'\n') .. '\n'
+function text.fill (s,width,breaklong)
+ return concat(text.wrap(s,width,breaklong),'\n') .. '\n'
end
diff --git a/spec/text_spec.lua b/spec/text_spec.lua
index 8182302..b69bf66 100644
--- a/spec/text_spec.lua
+++ b/spec/text_spec.lua
@@ -165,34 +165,6 @@ three
describe("fill()/wrap()", function()
- it("word-wraps a text", function()
- assert.equal([[
-It is often said of Lua
-that it does not include
-batteries. That is because
-the goal of Lua is to
-produce a lean expressive
-language that will be
-used on all sorts of
-machines, (some of which
-don't even have hierarchical
-filesystems). The Lua
-language is the equivalent
-of an operating system
-kernel; the creators
-of Lua do not see it
-as their responsibility
-to create a full software
-ecosystem around the
-language. That is the
-role of the community.
-]], text.fill("It is often said of Lua that it does not include batteries. That is because the goal of Lua is to produce a lean expressive language that will be used on all sorts of machines, (some of which don't even have hierarchical filesystems). The Lua language is the equivalent of an operating system kernel; the creators of Lua do not see it as their responsibility to create a full software ecosystem around the language. That is the role of the community.", 20))
- end)
-
- it("wraps single letters", function()
- assert.same({"a"}, text.wrap("a"))
- end)
-
it("wraps width over limit", function()
assert.same({
"abc",
@@ -205,15 +177,72 @@ role of the community.
"abc",
"def"
}, text.wrap("abc def", 3))
+ assert.same({
+ "a c",
+ "d f"
+ }, text.wrap("a c d f", 3))
+ end)
+
+ it("wraps single letters", function()
+ assert.same({"a"}, text.wrap("a"))
+ end)
+
+ it("wraps empty strings", function()
+ assert.same({""}, text.wrap(""))
+ assert.same({""}, text.wrap(" "))
+ end)
+
+ it("handles leading/trailing whitespace", function()
+ assert.same({"hello"}, text.wrap(" hello ", 10))
+ assert.same({"hello"}, text.wrap(" hello ", 2))
+ assert.same({"he", "ll", "o"}, text.wrap(" hello ", 2, true))
+ end)
+
+ it("handles line-breaks", function()
+ assert.same({"Hello", "Dolly"}, text.wrap("Hello\nDolly", 10))
+ assert.same({"Hello Dolly"}, text.wrap("Hello\nDolly", 20))
end)
it("doesn't split on accented characters", function()
assert.same({"àbcdéfghîj"}, text.wrap("àbcdéfghîj"))
end)
+ it("word-wraps a text", function()
+ local binstring = require("luassert.formatters.binarystring")
+ assert:add_formatter(binstring)
+ assert.equal([[
+It is often said of
+Lua that it does not
+include batteries.
+That is because the
+goal of Lua is to
+produce a lean
+expressive language
+that will be used on
+all sorts of
+machines, (some of
+which don't even
+have hierarchical
+filesystems). The
+Lua language is the
+equivalent of an
+operating system
+kernel; the creators
+of Lua do not see it
+as their
+responsibility to
+create a full
+software ecosystem
+around the language.
+That is the role of
+the community.
+]], text.fill("It is often said of Lua that it does not include batteries. That is because the goal of Lua is to produce a lean expressive language that will be used on all sorts of machines, (some of which don't even have hierarchical filesystems). The Lua language is the equivalent of an operating system kernel; the creators of Lua do not see it as their responsibility to create a full software ecosystem around the language. That is the role of the community.", 20))
+ end)
+
+
it("generic wrap test", function()
local t = [[
-hello "world" 'this' -is- a bb ccc dddd test... but wouldn't it pass??? final. word-that-can-be-broken
+hello "world" 'this' -is- a bb ccc dddd test... but wouldn't it pass??? final. word-that-can-be-broken
]]
assert.same({
@@ -221,18 +250,63 @@ hello "world" 'this' -is- a bb ccc dddd test... but wouldn't it pass??? final. w
'"world"',
"'this'",
"-is-",
- "a bb",
+ "a",
+ "bb",
"ccc",
"dddd",
"test...",
"but",
"wouldn't",
- "it pass???",
+ "it",
+ "pass???",
"final.",
"word-that-can-be-broken",
}, text.wrap(t, 3))
end)
+ it("generic wrap test, with overflow breaking", function()
+ local t = [[
+hello "world" 'this' -is- a bb ccc dddd test... but wouldn't it pass??? final. word-that-can-be-broken
+]]
+
+ assert.same({
+ "hel",
+ "lo",
+ '"wo',
+ 'rld',
+ '"',
+ "'th",
+ "is'",
+ "-is",
+ "- a",
+ "bb",
+ "ccc",
+ "ddd",
+ "d",
+ "tes",
+ "t..",
+ ".",
+ "but",
+ "wou",
+ "ldn",
+ "'t",
+ "it",
+ "pas",
+ "s??",
+ "?",
+ "fin",
+ "al.",
+ "wor",
+ "d-t",
+ "hat",
+ "-ca",
+ "n-b",
+ "e-b",
+ "rok",
+ "en",
+ }, text.wrap(t, 3, true))
+ end)
+
end)
end)