diff options
Diffstat (limited to 'lib/kramdown/parser/atlassian_document_format.rb')
-rw-r--r-- | lib/kramdown/parser/atlassian_document_format.rb | 381 |
1 files changed, 381 insertions, 0 deletions
diff --git a/lib/kramdown/parser/atlassian_document_format.rb b/lib/kramdown/parser/atlassian_document_format.rb new file mode 100644 index 00000000000..4ceb879a04c --- /dev/null +++ b/lib/kramdown/parser/atlassian_document_format.rb @@ -0,0 +1,381 @@ +# frozen_string_literal: true + +module Kramdown + module Parser + # Parses an Atlassian Document Format (ADF) json into a + # Kramdown AST tree, for conversion to another format. + # The primary goal is to convert in GitLab Markdown. + # + # This parser does NOT resolve external resources, such as media/attachments. + # A special url is generated for media based on the id, for example + # ![jira-10050-field-description](adf-media://79411c6b-50e0-477f-b4ed-ac3a5887750c) + # so that a later filter/process can resolve those. + # + # @see https://developer.atlassian.com/cloud/jira/platform/apis/document/structure/ ADF Document Structure + # @see https://developer.atlassian.com/cloud/jira/platform/apis/document/playground/ ADF Playground + # @see https://developer.atlassian.com/cloud/jira/platform/apis/document/viewer/ ADF Viewer + class AtlassianDocumentFormat < Kramdown::Parser::Base + unless defined?(TOP_LEVEL_BLOCK_NODES) + TOP_LEVEL_BLOCK_NODES = %w[blockquote + bulletList + codeBlock + heading + mediaGroup + mediaSingle + orderedList + panel + paragraph + rule + table].freeze + + CHILD_BLOCK_NODES = %w[listItem + media + table_cell + table_header + table_row].freeze + + INLINE_NODES = %w[emoji + hardBreak + inlineCard + mention + text].freeze + + MARKS = %w[code + em + link + strike + strong + subsup + textColor + underline].freeze + + TABLE_CELL_NODES = %w[blockquote + bulletList + codeBlock + heading + mediaGroup + orderedList + panel + paragraph + rule].freeze + + LIST_ITEM_NODES = %w[bulletList + codeBlock + mediaSingle + orderedList + paragraph].freeze + + PANEL_NODES = %w[bulletList + heading + orderedList + paragraph].freeze + + PANEL_EMOJIS = { info: ':information_source:', + note: ':notepad_spiral:', + warning: ':warning:', + success: ':white_check_mark:', + error: ':octagonal_sign:' }.freeze + + # The default language for code blocks is `java`, as indicated in + # You can't change the default in Jira. There was a comment that indicated + # Confluence can set the default language. + # @see https://jira.atlassian.com/secure/WikiRendererHelpAction.jspa?section=advanced&_ga=2.5135221.773220073.1591894917-438867908.1591894917 + # @see https://jira.atlassian.com/browse/JRASERVER-29184?focusedCommentId=832255&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-832255 + CODE_BLOCK_DEFAULT_LANGUAGE = 'java' + end + + def parse + ast = Gitlab::Json.parse(@source) + + validate_document(ast) + + process_content(@root, ast, TOP_LEVEL_BLOCK_NODES) + rescue ::JSON::ParserError => e + msg = 'Invalid Atlassian Document Format JSON' + Gitlab::AppLogger.error msg + Gitlab::AppLogger.error e + + raise ::Kramdown::Error, msg + end + + def process_content(element, ast_node, allowed_types) + ast_node['content'].each do |node| + next unless allowed_types.include?(node['type']) + + public_send("process_#{node['type'].underscore}", element, node) # rubocop:disable GitlabSecurity/PublicSend + end + end + + def process_blockquote(element, ast_node) + new_element = Element.new(:blockquote) + element.children << new_element + + process_content(new_element, ast_node, TOP_LEVEL_BLOCK_NODES) + end + + def process_bullet_list(element, ast_node) + new_element = Element.new(:ul) + element.children << new_element + + process_content(new_element, ast_node, %w[listItem]) + end + + def process_code_block(element, ast_node) + code_text = gather_text(ast_node) + lang = ast_node.dig('attrs', 'language') || CODE_BLOCK_DEFAULT_LANGUAGE + + element.children << Element.new(:codeblock, code_text, {}, { lang: lang }) + end + + def process_emoji(element, ast_node) + emoji = ast_node.dig('attrs', 'text') || ast_node.dig('attrs', 'shortName') + return unless emoji + + add_text(emoji, element, :text) + end + + def process_hard_break(element, ast_node) + element.children << Element.new(:br) + end + + def process_heading(element, ast_node) + level = ast_node.dig('attrs', 'level').to_i.clamp(1, 6) + options = { level: level } + new_element = Element.new(:header, nil, nil, options) + element.children << new_element + + process_content(new_element, ast_node, INLINE_NODES) + extract_element_text(new_element, new_element.options[:raw_text] = +'') + end + + def process_inline_card(element, ast_node) + url = ast_node.dig('attrs', 'url') + data = ast_node.dig('attrs', 'data') + + if url + # we don't pull a description from the link and create a panel, + # just convert to a normal link + new_element = Element.new(:text, url) + element.children << wrap_element(new_element, :a, nil, { 'href' => url }) + elsif data + # data is JSONLD (https://json-ld.org/), so for now output + # as a codespan of text, with `adf-inlineCard: ` at the start + text = "adf-inlineCard: #{data}" + element.children << Element.new(:codespan, text, nil, { lang: 'adf-inlinecard' }) + end + end + + def process_list_item(element, ast_node) + new_element = Element.new(:li) + element.children << new_element + + process_content(new_element, ast_node, LIST_ITEM_NODES) + end + + def process_media(element, ast_node) + media_url = "adf-media://#{ast_node['attrs']['id']}" + + case ast_node['attrs']['type'] + when 'file' + attrs = { 'src' => media_url, 'alt' => ast_node['attrs']['collection'] } + media_element = Element.new(:img, nil, attrs) + when 'link' + attrs = { 'href' => media_url } + media_element = wrap_element(Element.new(:text, media_url), :a, nil, attrs) + end + + media_element = wrap_element(media_element, :p) + element.children << media_element + end + + # wraps a single media element. + # Currently ignore attrs.layout and attrs.width + def process_media_single(element, ast_node) + new_element = Element.new(:p) + element.children << new_element + + process_content(new_element, ast_node, %w[media]) + end + + # wraps a group media element. + # Currently ignore attrs.layout and attrs.width + def process_media_group(element, ast_node) + ul_element = Element.new(:ul) + element.children << ul_element + + ast_node['content'].each do |node| + next unless node['type'] == 'media' + + li_element = Element.new(:li) + ul_element.children << li_element + + process_media(li_element, node) + end + end + + def process_mention(element, ast_node) + # Make it `@adf-mention:` since there is no guarantee that it is + # a valid username in our system. This gives us an + # opportunity to replace it later. Mention name can have + # spaces, so double quote it + mention_text = ast_node.dig('attrs', 'text')&.gsub('@', '') + mention_text = %Q("#{mention_text}") if mention_text.match?(/ /) + mention_text = %Q(@adf-mention:#{mention_text}) + + add_text(mention_text, element, :text) + end + + def process_ordered_list(element, ast_node) + # `attrs.order` is not supported in the Kramdown AST + new_element = Element.new(:ol) + element.children << new_element + + process_content(new_element, ast_node, %w[listItem]) + end + + # since we don't have something similar, then put <hr> around it and + # add a bolded status text (eg: "Error:") to the front of it. + def process_panel(element, ast_node) + panel_type = ast_node.dig('attrs', 'panelType') + return unless %w[info note warning success error].include?(panel_type) + + panel_header_text = "#{PANEL_EMOJIS[panel_type.to_sym]} " + panel_header_element = Element.new(:text, panel_header_text) + + new_element = Element.new(:blockquote) + new_element.children << panel_header_element + element.children << new_element + + process_content(new_element, ast_node, PANEL_NODES) + end + + def process_paragraph(element, ast_node) + new_element = Element.new(:p) + element.children << new_element + + process_content(new_element, ast_node, INLINE_NODES) + end + + def process_rule(element, ast_node) + element.children << Element.new(:hr) + end + + def process_table(element, ast_node) + table = Element.new(:table, nil, nil, { alignment: [:default, :default] }) + element.children << table + + tbody = Element.new(:tbody) + table.children << tbody + + process_content(tbody, ast_node, %w[tableRow]) + end + + # we ignore the attributes, attrs.background, attrs.colspan, + # attrs.colwidth, and attrs.rowspan + def process_table_cell(element, ast_node) + new_element = Element.new(:td) + element.children << new_element + + process_content(new_element, ast_node, TABLE_CELL_NODES) + end + + # we ignore the attributes, attrs.background, attrs.colspan, + # attrs.colwidth, and attrs.rowspan + def process_table_header(element, ast_node) + new_element = Element.new(:th) + element.children << new_element + + process_content(new_element, ast_node, TABLE_CELL_NODES) + end + + def process_table_row(element, ast_node) + new_element = Element.new(:tr) + element.children << new_element + + process_content(new_element, ast_node, %w[tableHeader tableCell]) + end + + def process_text(element, ast_node) + new_element = Element.new(:text, ast_node['text']) + new_element = apply_marks(new_element, ast_node, MARKS) + element.children << new_element + end + + private + + def validate_document(ast) + return if ast['type'] == 'doc' + + raise ::JSON::ParserError, 'missing doc node' + end + + # ADF marks are an attribute on the node. For kramdown, + # we have to wrap the node with an element for the mark. + def apply_marks(element, ast_node, allowed_types) + return element unless ast_node['marks'] + + new_element = element + + ast_node['marks'].each do |mark| + next unless allowed_types.include?(mark['type']) + + case mark['type'] + when 'code' + new_element = Element.new(:codespan, ast_node['text']) + when 'em' + new_element = wrap_element(new_element, :em) + when 'link' + attrs = { 'href' => mark.dig('attrs', 'href') } + attrs['title'] = mark.dig('attrs', 'title') + new_element = wrap_element(new_element, :a, nil, attrs) + when 'strike' + new_element = wrap_element(new_element, :html_element, 'del', {}, category: :span) + when 'strong' + new_element = wrap_element(new_element, :strong) + when 'subsup' + type = mark.dig('attrs', 'type') + + case type + when 'sub' + new_element = wrap_element(new_element, :html_element, 'sub', {}, category: :span) + when 'sup' + new_element = wrap_element(new_element, :html_element, 'sup', {}, category: :span) + else + next + end + when 'textColor' + color = mark.dig('attrs', 'color') + new_element = wrap_element(new_element, :html_element, 'span', { color: color }, category: :span) + when 'underline' + new_element = wrap_element(new_element, :html_element, 'u', {}, category: :span) + else + next + end + end + + new_element + end + + def wrap_element(element, type, *args) + wrapper = Element.new(type, *args) + wrapper.children << element + wrapper + end + + def extract_element_text(element, raw) + raw << element.value.to_s if element.type == :text + element.children.each { |c| extract_element_text(c, raw) } + end + + def gather_text(ast_node) + ast_node['content'].inject('') do |memo, node| + node['type'] == 'text' ? (memo + node['text']) : memo + end + end + + def method_missing(method, *args) + raise NotImplementedError, "method `#{method}` not implemented yet" + end + end + end +end |