diff options
author | dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> | 2021-12-09 16:10:44 +0300 |
---|---|---|
committer | Richard Steinmetz <richard@steinmetz.cloud> | 2021-12-10 19:24:07 +0300 |
commit | cd9b01d1474a158922cc6534d2aa6e0f6467098c (patch) | |
tree | 24bd16be5eff3fe327412bceae59cf826a574f90 /src | |
parent | 71dca9e3e9bd375880b9ca8e2d8acbd6e18fc903 (diff) |
Bump html-to-text from 5.1.1 to 8.1.0
Bumps [html-to-text](https://github.com/html-to-text/node-html-to-text) from 5.1.1 to 8.1.0.
- [Release notes](https://github.com/html-to-text/node-html-to-text/releases)
- [Changelog](https://github.com/html-to-text/node-html-to-text/blob/master/CHANGELOG.md)
- [Commits](https://github.com/html-to-text/node-html-to-text/compare/5.1.1...8.1.0)
---
updated-dependencies:
- dependency-name: html-to-text
dependency-type: direct:production
update-type: version-update:semver-major
...
Signed-off-by: dependabot[bot] <support@github.com>
Signed-off-by: Richard Steinmetz <richard@steinmetz.cloud>
Diffstat (limited to 'src')
-rw-r--r-- | src/tests/unit/util/text.spec.js | 80 | ||||
-rw-r--r-- | src/util/text.js | 81 |
2 files changed, 142 insertions, 19 deletions
diff --git a/src/tests/unit/util/text.spec.js b/src/tests/unit/util/text.spec.js index 368a0d887..3107011bd 100644 --- a/src/tests/unit/util/text.spec.js +++ b/src/tests/unit/util/text.spec.js @@ -33,6 +33,33 @@ describe('text', () => { expect(actual).to.deep.equal(expected) }) + it('removes leading line breaks', () => { + const source = html('<br><br><br>hello world') + const expected = plain('hello world') + + const actual = toPlain(source) + + expect(actual).to.deep.equal(expected) + }) + + it('removes trailing line breaks', () => { + const source = html('hello world<br><br><br>') + const expected = plain('hello world') + + const actual = toPlain(source) + + expect(actual).to.deep.equal(expected) + }) + + it('removes trailing spaces of each line', () => { + const source = html('line1 <br>line2 <br>line3') + const expected = plain('line1\nline2\nline3') + + const actual = toPlain(source) + + expect(actual).to.deep.equal(expected) + }) + it('breaks on divs', () => { const source = html('<div>one</div><div>two</div>') @@ -41,9 +68,18 @@ describe('text', () => { expect(actual).to.deep.equal(plain('one\ntwo')) }) + it('merges spaces at the beginning of a line', () => { + const source = html('<div> <div> line1</div></div>') + const expected = plain(' line1') + + const actual = toPlain(source) + + expect(actual).to.deep.equal(expected) + }) + it('produces a line break for each ending div element', () => { - const source = html('<div>' + ' <div>' + ' line1' + ' </div>' + '</div>' + '<div>line2</div>') - const expected = plain(' line1\n\nline2') + const source = html('<div><div>line1</div></div><div>line3</div>') + const expected = plain('line1\n\nline3') const actual = toPlain(source) @@ -68,7 +104,7 @@ describe('text', () => { expect(actual).to.deep.equal(expected) }) - it('converts paragraphs to text', () => { + it('produces a single line break between paragraphs', () => { const source = html('<p>hello</p><p>world</p>') const expected = plain('hello\nworld') @@ -77,6 +113,44 @@ describe('text', () => { expect(actual).to.deep.equal(expected) }) + it('produces a single line break between a div and a paragraph', () => { + const source = html('<div>hello</div><p>world</p>') + const expected = plain('hello\nworld') + + const actual = toPlain(source) + + expect(actual).to.deep.equal(expected) + }) + + it('produces a single line break after each block element', () => { + const selectors = ['p', 'div', 'header', 'footer', 'form', 'article', 'aside', 'main', 'nav', 'section'] + const source = html( + selectors + .map(tag => `<${tag}>foobar</${tag}>`) + .join('') + ) + const expected = plain(selectors.map(tag => 'foobar').join('\n')) + + const actual = toPlain(source) + + expect(actual).to.deep.equal(expected) + }) + + it('produces exactly one line break for each closing block element', () => { + const selectors = ['p', 'div', 'header', 'footer', 'form', 'article', 'aside', 'main', 'nav', 'section'] + const source = html( + selectors + .map(tag => `<${tag}><${tag}>foobar</${tag}></${tag}>`) + .join('') + ) + const expected = plain(selectors.map(tag => 'foobar').join('\n\n')) + + const actual = toPlain(source) + + + expect(actual).to.deep.equal(expected) + }) + it('converts lists to text', () => { const source = html('<ul><li>one</li><li>two</li><li>three</li></ul>') const expected = plain(' * one\n * two\n * three') diff --git a/src/util/text.js b/src/util/text.js index ab961813f..f5794c01a 100644 --- a/src/util/text.js +++ b/src/util/text.js @@ -21,7 +21,7 @@ import isString from 'lodash/fp/isString' import { curry } from 'ramda' -import { fromString } from 'html-to-text' +import { convert } from 'html-to-text' /** * @type {Text} @@ -114,30 +114,79 @@ export const toPlain = (text) => { if (text.format === 'plain') { return text } - const withBlockBreaks = text.value.replace(/<\/div>/gi, '</div><br>') - const converted = fromString(withBlockBreaks, { - noLinkBrackets: true, - ignoreHref: true, - ignoreImage: true, + // Build shared options for all block tags + const blockTags = ['p', 'div', 'header', 'footer', 'form', 'article', 'aside', 'main', 'nav', 'section'] + const blockSelectors = blockTags.map(tag => ({ + selector: tag, + format: 'customBlock', + options: { + preserveLeadingWhitespace: true, + }, + })) + + const converted = convert(text.value, { wordwrap: false, - format: { - blockquote(element, fn, options) { - return fn(element.children, options) - .replace(/\n\n\n/g, '\n\n') // remove triple line breaks - .replace(/^/gm, '> ') // add > quotation to each line + formatters: { + customBlock(elem, walk, builder, formatOptions) { + builder.openBlock({ + isPre: formatOptions.preserveLeadingWhitespace, + leadingLineBreaks: 0, + }) + walk(elem.children, builder) + builder.closeBlock({ + trailingLineBreaks: 0, + blockTransform: text => text + .replace(/^ {2,}/gm, ' '), // merge leading spaces + }) + // Don't rely on the built-in leading/trailing line break feature. + // Instead, we add a forced line break here because otherwise multiple + // line breaks might be merged. But we want exactly one line break for + // each closing tag. + builder.addLineBreak() }, - paragraph(element, fn, options) { - return fn(element.children, options) + '\n' + customBlockQuote(elem, walk, builder, formatOptions) { + builder.openBlock({ + leadingLineBreaks: formatOptions.leadingLineBreaks, + }) + walk(elem.children, builder) + builder.closeBlock({ + trailingLineBreaks: formatOptions.trailingLineBreaks, + blockTransform: text => text + .replace(/\n{3,}/g, '\n\n') // merge 3 or more line breaks + .replace(/^/gm, '> '), // add quote marker at the start of each line + }) }, }, + selectors: [ + { + selector: 'img', + format: 'skip', + }, + { + selector: 'a', + options: { + linkBrackets: false, + ignoreHref: true, + }, + }, + { + selector: 'blockquote', + format: 'customBlockQuote', + options: { + leadingLineBreaks: 0, + trailingLineBreaks: 1, + }, + }, + ...blockSelectors, + ], }) return plain( converted - .replace(/\n\n\n/g, '\n\n') // remove triple line breaks - .replace(/^[\n\r]+/g, '') // trim line breaks at beginning and end - .replace(/ $/gm, '') // trim white space at end of each line + .replace(/^\n+/, '') // trim leading line breaks + .replace(/\n+$/, '') // trim trailing line breaks + .replace(/ +$/gm, '') // trim trailing spaces of each line .replace(/^--$/gm, '-- ') // hack to create the correct email signature separator ) } |