diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2023-07-19 17:16:28 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2023-07-19 17:16:28 +0300 |
commit | e4384360a16dd9a19d4d2d25d0ef1f2b862ed2a6 (patch) | |
tree | 2fcdfa7dcdb9db8f5208b2562f4b4e803d671243 /gems/ipynbdiff/lib | |
parent | ffda4e7bcac36987f936b4ba515995a6698698f0 (diff) |
Add latest changes from gitlab-org/gitlab@16-2-stable-eev16.2.0-rc42
Diffstat (limited to 'gems/ipynbdiff/lib')
-rw-r--r-- | gems/ipynbdiff/lib/ipynb_diff.rb | 24 | ||||
-rw-r--r-- | gems/ipynbdiff/lib/ipynb_diff/diff.rb | 20 | ||||
-rw-r--r-- | gems/ipynbdiff/lib/ipynb_diff/output_transformer.rb | 73 | ||||
-rw-r--r-- | gems/ipynbdiff/lib/ipynb_diff/symbol_map.rb | 109 | ||||
-rw-r--r-- | gems/ipynbdiff/lib/ipynb_diff/symbolized_markdown_helper.rb | 25 | ||||
-rw-r--r-- | gems/ipynbdiff/lib/ipynb_diff/transformed_notebook.rb | 20 | ||||
-rw-r--r-- | gems/ipynbdiff/lib/ipynb_diff/transformer.rb | 111 | ||||
-rw-r--r-- | gems/ipynbdiff/lib/ipynb_diff/version.rb | 7 |
8 files changed, 389 insertions, 0 deletions
diff --git a/gems/ipynbdiff/lib/ipynb_diff.rb b/gems/ipynbdiff/lib/ipynb_diff.rb new file mode 100644 index 00000000000..605ff6e4a75 --- /dev/null +++ b/gems/ipynbdiff/lib/ipynb_diff.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +require 'ipynb_diff/transformer' +require 'ipynb_diff/diff' +require 'ipynb_diff/symbol_map' + +# Human Readable Jupyter Diffs +module IpynbDiff + def self.diff(from, to, raise_if_invalid_nb: false, include_frontmatter: false, hide_images: false, diffy_opts: {}) + transformer = Transformer.new(include_frontmatter: include_frontmatter, hide_images: hide_images) + + Diff.new(transformer.transform(from), transformer.transform(to), diffy_opts) + rescue InvalidNotebookError + raise if raise_if_invalid_nb + end + + def self.transform(notebook, raise_errors: false, include_frontmatter: true, hide_images: false) + return unless notebook + + Transformer.new(include_frontmatter: include_frontmatter, hide_images: hide_images).transform(notebook).as_text + rescue InvalidNotebookError + raise if raise_errors + end +end diff --git a/gems/ipynbdiff/lib/ipynb_diff/diff.rb b/gems/ipynbdiff/lib/ipynb_diff/diff.rb new file mode 100644 index 00000000000..3554ac55d99 --- /dev/null +++ b/gems/ipynbdiff/lib/ipynb_diff/diff.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +# Custom differ for Jupyter Notebooks +module IpynbDiff + require 'delegate' + + # The result of a diff object + class Diff < SimpleDelegator + require 'diffy' + + attr_reader :from, :to + + def initialize(from, to, diffy_opts) + super(Diffy::Diff.new(from.as_text, to.as_text, **diffy_opts)) + + @from = from + @to = to + end + end +end diff --git a/gems/ipynbdiff/lib/ipynb_diff/output_transformer.rb b/gems/ipynbdiff/lib/ipynb_diff/output_transformer.rb new file mode 100644 index 00000000000..95dbcecf95c --- /dev/null +++ b/gems/ipynbdiff/lib/ipynb_diff/output_transformer.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +require 'ipynb_diff/symbolized_markdown_helper' + +module IpynbDiff + # Transforms Jupyter output data into markdown + class OutputTransformer + include SymbolizedMarkdownHelper + + HIDDEN_IMAGE_OUTPUT = ' [Hidden Image Output]' + + ORDERED_KEYS = { + 'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain], + 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex], + 'stream' => %w[text] + }.freeze + + def initialize(hide_images = false) + @hide_images = hide_images + end + + def transform(output, symbol) + case (output_type = output['output_type']) + when 'error' + transform_error(output['traceback'], symbol / 'traceback') + when 'execute_result', 'display_data' + transform_non_error(ORDERED_KEYS[output_type], output['data'], symbol / 'data') + when 'stream' + transform_element('text', output['text'], symbol) + end + end + + def transform_error(traceback, symbol) + traceback.map.with_index do |t, idx| + t.split("\n").map do |l| + ___(symbol / idx, l.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').delete("\u001B").rstrip) + end + end + end + + def transform_non_error(accepted_keys, elements, symbol) + accepted_keys.filter { |key| elements.key?(key) }.map do |key| + transform_element(key, elements[key], symbol) + end + end + + def transform_element(output_type, output_element, symbol_prefix) + new_symbol = symbol_prefix / output_type + case output_type + when 'image/png', 'image/jpeg' + transform_image("#{output_type};base64", output_element, new_symbol) + when 'image/svg+xml' + transform_image("#{output_type};utf8", output_element, new_symbol) + when 'text/markdown', 'text/latex', 'text/plain', 'text' + transform_text(output_element, new_symbol) + end + end + + def transform_image(image_type, image_content, symbol) + return ___(nil, HIDDEN_IMAGE_OUTPUT) if @hide_images + + lines = image_content.is_a?(Array) ? image_content : [image_content] + + single_line = lines.map(&:strip).join.gsub(/\s+/, ' ') + + ___(symbol, " ![](data:#{image_type},#{single_line})") + end + + def transform_text(text_content, symbol) + symbolize_array(symbol, text_content) { |l| " #{l.rstrip}" } + end + end +end diff --git a/gems/ipynbdiff/lib/ipynb_diff/symbol_map.rb b/gems/ipynbdiff/lib/ipynb_diff/symbol_map.rb new file mode 100644 index 00000000000..383f1de5c18 --- /dev/null +++ b/gems/ipynbdiff/lib/ipynb_diff/symbol_map.rb @@ -0,0 +1,109 @@ +# frozen_string_literal: true + +module IpynbDiff + require 'oj' + + # Creates a map from a symbol to the line number it appears in a Json file + # + # Example: + # + # Input: + # + # 1. { + # 2. 'obj1': [ + # 3. { + # 4. 'obj2': 5 + # 5. }, + # 6. 3, + # 7. { + # 8. 'obj3': { + # 9. 'obj4': 'b' + # 10. } + # 11. } + # 12. ] + # 13.} + # + # Output: + # + # Symbol Line Number + # .obj1 -> 2 + # .obj1.0 -> 3 + # .obj1.0 -> 3 + # .obj1.0.obj2 -> 4 + # .obj1.1 -> 6 + # .obj1.2 -> 7 + # .obj1.2.obj3 -> 8 + # .obj1.2.obj3.obj4 -> 9 + # + class SymbolMap + # rubocop:disable Lint/UnusedMethodArgument + class << self + def handler + @handler ||= SymbolMap.new + end + + def parser + @parser ||= Oj::Parser.new(:saj).tap { |p| p.handler = handler } + end + + def parse(notebook, *args) + handler.reset + parser.parse(notebook) + handler.symbols + end + end + + attr_accessor :symbols + + def hash_start(key, line, column) + add_symbol(key_or_index(key), line) + end + + def hash_end(key, line, column) + @current_path.pop + end + + def array_start(key, line, column) + @current_array_index << 0 + + add_symbol(key, line) + end + + def array_end(key, line, column) + @current_path.pop + @current_array_index.pop + end + + def add_value(value, key, line, column) + add_symbol(key_or_index(key), line) + + @current_path.pop + end + + def add_symbol(symbol, line) + @symbols[@current_path.append(symbol).join('.')] = line if symbol + end + + def key_or_index(key) + if key.nil? # value in an array + if @current_path.empty? + @current_path = [''] + return + end + + symbol = @current_array_index.last + @current_array_index[-1] += 1 + symbol + else + key + end + end + + def reset + @current_path = [] + @symbols = {} + @current_array_index = [] + end + # rubocop:enable Lint/UnusedMethodArgument + end +end diff --git a/gems/ipynbdiff/lib/ipynb_diff/symbolized_markdown_helper.rb b/gems/ipynbdiff/lib/ipynb_diff/symbolized_markdown_helper.rb new file mode 100644 index 00000000000..991c9e493bc --- /dev/null +++ b/gems/ipynbdiff/lib/ipynb_diff/symbolized_markdown_helper.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module IpynbDiff + # Helper functions + module SymbolizedMarkdownHelper + def ___(symbol = nil, content = '') + { symbol: symbol, content: content } + end + + def symbolize_array(symbol, content) + if content.is_a?(Array) + content.map.with_index { |l, idx| ___(symbol / idx, yield(l)) } + else + content.split("\n").map { |c| ___(symbol, c) } + end + end + end + + # Simple wrapper for a string + class JsonSymbol < String + def /(other) + JsonSymbol.new((other.is_a?(Array) ? [self, *other] : [self, other]).join('.')) + end + end +end diff --git a/gems/ipynbdiff/lib/ipynb_diff/transformed_notebook.rb b/gems/ipynbdiff/lib/ipynb_diff/transformed_notebook.rb new file mode 100644 index 00000000000..f98e5f68086 --- /dev/null +++ b/gems/ipynbdiff/lib/ipynb_diff/transformed_notebook.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module IpynbDiff + # Notebook that was transformed into md, including location of source cells + class TransformedNotebook + attr_reader :blocks + + def as_text + @blocks.map { |b| b[:content].gsub(/\n/, '\\n') }.join("\n") + end + + private + + def initialize(lines = [], symbol_map = {}) + @blocks = lines.map do |line| + { content: line[:content], source_symbol: (symbol = line[:symbol]), source_line: symbol && symbol_map[symbol] } + end + end + end +end diff --git a/gems/ipynbdiff/lib/ipynb_diff/transformer.rb b/gems/ipynbdiff/lib/ipynb_diff/transformer.rb new file mode 100644 index 00000000000..2b386168b5d --- /dev/null +++ b/gems/ipynbdiff/lib/ipynb_diff/transformer.rb @@ -0,0 +1,111 @@ +# frozen_string_literal: true + +require 'json' +require 'yaml' +require 'ipynb_diff/output_transformer' +require 'ipynb_diff/symbolized_markdown_helper' +require 'ipynb_diff/symbol_map' +require 'ipynb_diff/transformed_notebook' +require 'oj' + +module IpynbDiff + InvalidNotebookError = Class.new(StandardError) + + # Returns a markdown version of the Jupyter Notebook + class Transformer + include SymbolizedMarkdownHelper + + @include_frontmatter = true + + def initialize(include_frontmatter: true, hide_images: false) + @include_frontmatter = include_frontmatter + @hide_images = hide_images + @out_transformer = OutputTransformer.new(hide_images) + end + + def validate_notebook(notebook) + notebook_json = Oj::Parser.usual.parse(notebook) + + return notebook_json if notebook_json&.key?('cells') + + raise InvalidNotebookError + rescue EncodingError, Oj::ParseError, JSON::ParserError + raise InvalidNotebookError + end + + def transform(notebook) + return TransformedNotebook.new unless notebook + + notebook_json = validate_notebook(notebook) + transformed = transform_document(notebook_json) + symbol_map = SymbolMap.parse(notebook) + + TransformedNotebook.new(transformed, symbol_map) + end + + def transform_document(notebook) + symbol = JsonSymbol.new('.cells') + + transformed_blocks = notebook['cells'].map.with_index do |cell, idx| + decorate_cell(transform_cell(cell, notebook, symbol / idx), cell, symbol / idx) + end + + transformed_blocks.prepend(transform_metadata(notebook)) if @include_frontmatter + transformed_blocks.flatten + end + + def decorate_cell(rows, cell, symbol) + tags = cell['metadata']&.fetch('tags', []) + type = cell['cell_type'] || 'raw' + + [ + ___(symbol, %(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')})), + ___, + rows, + ___ + ] + end + + def transform_cell(cell, notebook, symbol) + cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook, symbol) : transform_text_cell(cell, symbol) + end + + def transform_code_cell(cell, notebook, symbol) + [ + ___(symbol / 'source', %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''})), + symbolize_array(symbol / 'source', cell['source'], &:rstrip), + ___(nil, '```'), + transform_outputs(cell['outputs'], symbol) + ] + end + + def transform_outputs(outputs, symbol) + transformed = outputs.map + .with_index { |output, i| @out_transformer.transform(output, symbol / ['outputs', i]) } + .compact + .map { |el| [___, el] } + + [ + transformed.empty? ? [] : [___, ___(symbol / 'outputs', '%% Output')], + transformed + ] + end + + def transform_text_cell(cell, symbol) + symbolize_array(symbol / 'source', cell['source'], &:rstrip) + end + + def transform_metadata(notebook_json) + as_yaml = { + 'jupyter' => { + 'kernelspec' => notebook_json['metadata']['kernelspec'], + 'language_info' => notebook_json['metadata']['language_info'], + 'nbformat' => notebook_json['nbformat'], + 'nbformat_minor' => notebook_json['nbformat_minor'] + } + }.to_yaml + + as_yaml.split("\n").map { |l| ___(nil, l) }.append(___(nil, '---'), ___) + end + end +end diff --git a/gems/ipynbdiff/lib/ipynb_diff/version.rb b/gems/ipynbdiff/lib/ipynb_diff/version.rb new file mode 100644 index 00000000000..1a407f9c0fa --- /dev/null +++ b/gems/ipynbdiff/lib/ipynb_diff/version.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +module IpynbDiff + module Version + VERSION = '0.4.7' + end +end |