diff options
Diffstat (limited to 'app/assets/javascripts/content_editor/services/hast_to_prosemirror_converter.js')
-rw-r--r-- | app/assets/javascripts/content_editor/services/hast_to_prosemirror_converter.js | 329 |
1 files changed, 231 insertions, 98 deletions
diff --git a/app/assets/javascripts/content_editor/services/hast_to_prosemirror_converter.js b/app/assets/javascripts/content_editor/services/hast_to_prosemirror_converter.js index b6a3e0bc26a..2c462cdde91 100644 --- a/app/assets/javascripts/content_editor/services/hast_to_prosemirror_converter.js +++ b/app/assets/javascripts/content_editor/services/hast_to_prosemirror_converter.js @@ -20,9 +20,9 @@ */ import { Mark } from 'prosemirror-model'; -import { visitParents } from 'unist-util-visit-parents'; +import { visitParents, SKIP } from 'unist-util-visit-parents'; import { toString } from 'hast-util-to-string'; -import { isFunction } from 'lodash'; +import { isFunction, isString, noop } from 'lodash'; /** * Merges two ProseMirror text nodes if both text nodes @@ -63,10 +63,12 @@ function maybeMerge(a, b) { function createSourceMapAttributes(hastNode, source) { const { position } = hastNode; - return { - sourceMapKey: `${position.start.offset}:${position.end.offset}`, - sourceMarkdown: source.substring(position.start.offset, position.end.offset), - }; + return position && position.end + ? { + sourceMapKey: `${position.start.offset}:${position.end.offset}`, + sourceMarkdown: source.substring(position.start.offset, position.end.offset), + } + : {}; } /** @@ -141,6 +143,20 @@ class HastToProseMirrorConverterState { return this.stack.length === 0; } + findInStack(fn) { + const last = this.stack.length - 1; + + for (let i = last; i >= 0; i -= 1) { + const item = this.stack[i]; + + if (fn(item) === true) { + return item; + } + } + + return null; + } + /** * Creates a text node and adds it to * the top node in the stack. @@ -249,33 +265,38 @@ class HastToProseMirrorConverterState { * @returns An object that contains ProseMirror node factories */ const createProseMirrorNodeFactories = (schema, proseMirrorFactorySpecs, source) => { - const handlers = { - root: (state, hastNode) => state.openNode(schema.topNodeType, hastNode, {}), - text: (state, hastNode) => { - const { factorySpec } = state.top; - - if (/^\s+$/.test(hastNode.value)) { - return; - } + const factories = { + root: { + selector: 'root', + wrapInParagraph: true, + handle: (state, hastNode) => state.openNode(schema.topNodeType, hastNode, {}, {}), + }, + text: { + selector: 'text', + handle: (state, hastNode) => { + const found = state.findInStack((node) => isFunction(node.factorySpec.processText)); + const { value: text } = hastNode; + + if (/^\s+$/.test(text)) { + return; + } - if (factorySpec.wrapTextInParagraph === true) { - state.openNode(schema.nodeType('paragraph')); - state.addText(schema, hastNode.value); - state.closeNode(); - } else { - state.addText(schema, hastNode.value); - } + state.addText(schema, found ? found.factorySpec.processText(text) : text); + }, }, }; - - for (const [hastNodeTagName, factorySpec] of Object.entries(proseMirrorFactorySpecs)) { - if (factorySpec.block) { - handlers[hastNodeTagName] = (state, hastNode, parent, ancestors) => { - const nodeType = schema.nodeType( - isFunction(factorySpec.block) - ? factorySpec.block(hastNode, parent, ancestors) - : factorySpec.block, - ); + for (const [proseMirrorName, factorySpec] of Object.entries(proseMirrorFactorySpecs)) { + const factory = { + selector: factorySpec.selector, + skipChildren: factorySpec.skipChildren, + processText: factorySpec.processText, + parent: factorySpec.parent, + wrapInParagraph: factorySpec.wrapInParagraph, + }; + + if (factorySpec.type === 'block') { + factory.handle = (state, hastNode, parent) => { + const nodeType = schema.nodeType(proseMirrorName); state.closeUntil(parent); state.openNode( @@ -297,9 +318,9 @@ const createProseMirrorNodeFactories = (schema, proseMirrorFactorySpecs, source) state.closeNode(); } }; - } else if (factorySpec.inline) { - const nodeType = schema.nodeType(factorySpec.inline); - handlers[hastNodeTagName] = (state, hastNode, parent) => { + } else if (factorySpec.type === 'inline') { + const nodeType = schema.nodeType(proseMirrorName); + factory.handle = (state, hastNode, parent) => { state.closeUntil(parent); state.openNode( nodeType, @@ -310,23 +331,115 @@ const createProseMirrorNodeFactories = (schema, proseMirrorFactorySpecs, source) // Inline nodes do not have children therefore they are immediately closed state.closeNode(); }; - } else if (factorySpec.mark) { - const markType = schema.marks[factorySpec.mark]; - handlers[hastNodeTagName] = (state, hastNode, parent) => { + } else if (factorySpec.type === 'mark') { + const markType = schema.marks[proseMirrorName]; + factory.handle = (state, hastNode, parent) => { state.openMark(markType, getAttrs(factorySpec, hastNode, parent, source)); if (factorySpec.inlineContent) { state.addText(schema, hastNode.value); } }; + } else if (factorySpec.type === 'ignore') { + factory.handle = noop; } else { - throw new RangeError(`Unrecognized node factory spec ${JSON.stringify(factorySpec)}`); + throw new RangeError( + `Unrecognized ProseMirror object type ${JSON.stringify(factorySpec.type)}`, + ); } + + factories[proseMirrorName] = factory; } - return handlers; + return factories; }; +const findFactory = (hastNode, ancestors, factories) => + Object.entries(factories).find(([, factorySpec]) => { + const { selector } = factorySpec; + + return isFunction(selector) + ? selector(hastNode, ancestors) + : [hastNode.tagName, hastNode.type].includes(selector); + })?.[1]; + +const findParent = (ancestors, parent) => { + if (isString(parent)) { + return ancestors.reverse().find((ancestor) => ancestor.tagName === parent); + } + + return ancestors[ancestors.length - 1]; +}; + +const calcTextNodePosition = (textNode) => { + const { position, value, type } = textNode; + + if (type !== 'text' || (!position.start && !position.end) || (position.start && position.end)) { + return textNode.position; + } + + const span = value.length - 1; + + if (position.start && !position.end) { + const { start } = position; + + return { + start, + end: { + row: start.row, + column: start.column + span, + offset: start.offset + span, + }, + }; + } + + const { end } = position; + + return { + start: { + row: end.row, + column: end.column - span, + offset: end.offset - span, + }, + end, + }; +}; + +const removeEmptyTextNodes = (nodes) => + nodes.filter( + (node) => node.type !== 'text' || (node.type === 'text' && !/^\s+$/.test(node.value)), + ); + +const wrapInlineElements = (nodes, wrappableTags) => + nodes.reduce((children, child) => { + const previous = children[children.length - 1]; + + if (child.type !== 'text' && !wrappableTags.includes(child.tagName)) { + return [...children, child]; + } + + const wrapperExists = previous?.properties.wrapper; + + if (wrapperExists) { + const wrapper = previous; + + wrapper.position.end = child.position.end; + wrapper.children.push(child); + + return children; + } + + const wrapper = { + type: 'element', + tagName: 'p', + position: calcTextNodePosition(child), + children: [child], + properties: { wrapper: true }, + }; + + return [...children, wrapper]; + }, []); + /** * Converts a Hast AST to a ProseMirror document based on a series * of specifications that describe how to map all the nodes of the former @@ -339,8 +452,9 @@ const createProseMirrorNodeFactories = (schema, proseMirrorFactorySpecs, source) * The object should have the following shape: * * { - * [hastNode.tagName]: { - * [block|node|mark]: [ProseMirror.Node.name], + * [ProseMirrorNodeOrMarkName]: { + * type: 'block' | 'inline' | 'mark', + * selector: String | hastNode -> Boolean, * ...configurationOptions * } * } @@ -348,57 +462,21 @@ const createProseMirrorNodeFactories = (schema, proseMirrorFactorySpecs, source) * Where each property in the object represents a HAST node with a given tag name, for example: * * { - * h1: {}, - * h2: {}, - * table: {}, - * strong: {}, - * // etc - * } - * - * You can specify the type of ProseMirror object adding one the following - * properties: - * - * 1. "block": A ProseMirror node that contains one or more children. - * 2. "inline": A ProseMirror node that doesn’t contain any children although - * it can have inline content like a code block or a reference. - * 3. "mark": A ProseMirror mark. - * - * The value of that property should be the name of the ProseMirror node or mark, i.e: - * - * { - * h1: { - * block: 'heading', + * horizontalRule: { + * type: 'block', + * selector: 'hr', * }, - * h2: { - * block: 'heading', + * heading: { + * type: 'block', + * selector: (hastNode) => ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(hastNode), * }, - * img: { - * node: 'image', + * bold: { + * type: 'mark' + * selector: (hastNode) => ['b', 'strong'].includes(hastNode), * }, - * strong: { - * mark: 'bold', - * } - * } + * // etc + * } * - * You can compute a ProseMirror’s node or mark name based on the HAST node - * by passing a function instead of a String. The converter invokes the function - * and provides a HAST node object: - * - * { - * list: { - * block: (hastNode) => { - * let type = 'bulletList'; - - * if (hastNode.children.some(isTaskItem)) { - * type = 'taskList'; - * } else if (hastNode.ordered) { - * type = 'orderedList'; - * } - - * return type; - * } - * } - * } * * Configuration options * ---------------------- @@ -406,6 +484,28 @@ const createProseMirrorNodeFactories = (schema, proseMirrorFactorySpecs, source) * You can customize the conversion process for every node or mark * setting the following properties in the specification object: * + * **type** + * + * The `type` property should have one of following three values: + * + * 1. "block": A ProseMirror node that contains one or more children. + * 2. "inline": A ProseMirror node that doesn’t contain any children although + * it can have inline content like an image or a mention object. + * 3. "mark": A ProseMirror mark. + * 4. "ignore": A hast node that should be ignored and won’t be mapped to a + * ProseMirror node. + * + * **selector** + * + * The `selector` property matches a HastNode to a ProseMirror node or + * Mark. If you assign a string value to this property, the converter + * will match the first hast node with a `tagName` or `type` property + * that equals the string value. + * + * If you assign a function, the converter will invoke the function with + * the hast node and its ancestors. The function should return `true` + * if the hastNode matches the custom criteria implemented in the function + * * **getAttrs** * * Computes a ProseMirror node or mark attributes. The converter will invoke @@ -415,12 +515,19 @@ const createProseMirrorNodeFactories = (schema, proseMirrorFactorySpecs, source) * 2. hasParents: All the hast node’s ancestors up to the root node * 3. source: Markdown source file’s content * - * **wrapTextInParagraph** + * **wrapInParagraph** * - * This property only applies to block nodes. If a block node contains text, - * it will wrap that text in a paragraph. This is useful for ProseMirror block + * This property only applies to block nodes. If a block node contains inline + * elements like text, images, links, etc, the converter will wrap those inline + * elements in a paragraph. This is useful for ProseMirror block * nodes that don’t allow text directly such as list items and tables. * + * **processText** + * + * This property only applies to block nodes. If a block node contains text, + * it allows applying a processing function to that text. This is useful when + * you can transform the text node, i.e trim(), substring(), etc. + * * **skipChildren** * * Skips a hast node’s children while traversing the tree. @@ -434,6 +541,13 @@ const createProseMirrorNodeFactories = (schema, proseMirrorFactorySpecs, source) * Use this property along skipChildren to provide custom processing of child nodes * for a block node. * + * **parent** + * + * Specifies what is the node’s parent. This is useful when the node’s parent is not + * its direct ancestor in Abstract Syntax Tree. For example, imagine that you want + * to make <tr> elements a direct children of tables and skip `<thead>` and `<tbody>` + * altogether. + * * @param {model.Document_Schema} params.schema A ProseMirror schema that specifies the shape * of the ProseMirror document. * @param {Object} params.factorySpec A factory specification as described above @@ -442,17 +556,20 @@ const createProseMirrorNodeFactories = (schema, proseMirrorFactorySpecs, source) * * @returns A ProseMirror document */ -export const createProseMirrorDocFromMdastTree = ({ schema, factorySpecs, tree, source }) => { +export const createProseMirrorDocFromMdastTree = ({ + schema, + factorySpecs, + wrappableTags, + tree, + source, +}) => { const proseMirrorNodeFactories = createProseMirrorNodeFactories(schema, factorySpecs, source); const state = new HastToProseMirrorConverterState(); visitParents(tree, (hastNode, ancestors) => { - const parent = ancestors[ancestors.length - 1]; - const skipChildren = factorySpecs[hastNode.tagName]?.skipChildren; - - const handler = proseMirrorNodeFactories[hastNode.tagName || hastNode.type]; + const factory = findFactory(hastNode, ancestors, proseMirrorNodeFactories); - if (!handler) { + if (!factory) { throw new Error( `Hast node of type "${ hastNode.tagName || hastNode.type @@ -460,9 +577,25 @@ export const createProseMirrorDocFromMdastTree = ({ schema, factorySpecs, tree, ); } - handler(state, hastNode, parent, ancestors); + const parent = findParent(ancestors, factory.parent); + + if (factory.wrapInParagraph) { + /** + * Modifying parameters is a bad practice. For performance reasons, + * the author of the unist-util-visit-parents function recommends + * modifying nodes in place to avoid traversing the Abstract Syntax + * Tree more than once + */ + // eslint-disable-next-line no-param-reassign + hastNode.children = wrapInlineElements( + removeEmptyTextNodes(hastNode.children), + wrappableTags, + ); + } + + factory.handle(state, hastNode, parent); - return skipChildren === true ? 'skip' : true; + return factory.skipChildren === true ? SKIP : true; }); let doc; |