Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2023-07-19 17:16:28 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2023-07-19 17:16:28 +0300
commite4384360a16dd9a19d4d2d25d0ef1f2b862ed2a6 (patch)
tree2fcdfa7dcdb9db8f5208b2562f4b4e803d671243 /gems/ipynbdiff/lib
parentffda4e7bcac36987f936b4ba515995a6698698f0 (diff)
Add latest changes from gitlab-org/gitlab@16-2-stable-eev16.2.0-rc42
Diffstat (limited to 'gems/ipynbdiff/lib')
-rw-r--r--gems/ipynbdiff/lib/ipynb_diff.rb24
-rw-r--r--gems/ipynbdiff/lib/ipynb_diff/diff.rb20
-rw-r--r--gems/ipynbdiff/lib/ipynb_diff/output_transformer.rb73
-rw-r--r--gems/ipynbdiff/lib/ipynb_diff/symbol_map.rb109
-rw-r--r--gems/ipynbdiff/lib/ipynb_diff/symbolized_markdown_helper.rb25
-rw-r--r--gems/ipynbdiff/lib/ipynb_diff/transformed_notebook.rb20
-rw-r--r--gems/ipynbdiff/lib/ipynb_diff/transformer.rb111
-rw-r--r--gems/ipynbdiff/lib/ipynb_diff/version.rb7
8 files changed, 389 insertions, 0 deletions
diff --git a/gems/ipynbdiff/lib/ipynb_diff.rb b/gems/ipynbdiff/lib/ipynb_diff.rb
new file mode 100644
index 00000000000..605ff6e4a75
--- /dev/null
+++ b/gems/ipynbdiff/lib/ipynb_diff.rb
@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+
+require 'ipynb_diff/transformer'
+require 'ipynb_diff/diff'
+require 'ipynb_diff/symbol_map'
+
+# Human Readable Jupyter Diffs
+module IpynbDiff
+ def self.diff(from, to, raise_if_invalid_nb: false, include_frontmatter: false, hide_images: false, diffy_opts: {})
+ transformer = Transformer.new(include_frontmatter: include_frontmatter, hide_images: hide_images)
+
+ Diff.new(transformer.transform(from), transformer.transform(to), diffy_opts)
+ rescue InvalidNotebookError
+ raise if raise_if_invalid_nb
+ end
+
+ def self.transform(notebook, raise_errors: false, include_frontmatter: true, hide_images: false)
+ return unless notebook
+
+ Transformer.new(include_frontmatter: include_frontmatter, hide_images: hide_images).transform(notebook).as_text
+ rescue InvalidNotebookError
+ raise if raise_errors
+ end
+end
diff --git a/gems/ipynbdiff/lib/ipynb_diff/diff.rb b/gems/ipynbdiff/lib/ipynb_diff/diff.rb
new file mode 100644
index 00000000000..3554ac55d99
--- /dev/null
+++ b/gems/ipynbdiff/lib/ipynb_diff/diff.rb
@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+# Custom differ for Jupyter Notebooks
+module IpynbDiff
+ require 'delegate'
+
+ # The result of a diff object
+ class Diff < SimpleDelegator
+ require 'diffy'
+
+ attr_reader :from, :to
+
+ def initialize(from, to, diffy_opts)
+ super(Diffy::Diff.new(from.as_text, to.as_text, **diffy_opts))
+
+ @from = from
+ @to = to
+ end
+ end
+end
diff --git a/gems/ipynbdiff/lib/ipynb_diff/output_transformer.rb b/gems/ipynbdiff/lib/ipynb_diff/output_transformer.rb
new file mode 100644
index 00000000000..95dbcecf95c
--- /dev/null
+++ b/gems/ipynbdiff/lib/ipynb_diff/output_transformer.rb
@@ -0,0 +1,73 @@
+# frozen_string_literal: true
+
+require 'ipynb_diff/symbolized_markdown_helper'
+
+module IpynbDiff
+ # Transforms Jupyter output data into markdown
+ class OutputTransformer
+ include SymbolizedMarkdownHelper
+
+ HIDDEN_IMAGE_OUTPUT = ' [Hidden Image Output]'
+
+ ORDERED_KEYS = {
+ 'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
+ 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex],
+ 'stream' => %w[text]
+ }.freeze
+
+ def initialize(hide_images = false)
+ @hide_images = hide_images
+ end
+
+ def transform(output, symbol)
+ case (output_type = output['output_type'])
+ when 'error'
+ transform_error(output['traceback'], symbol / 'traceback')
+ when 'execute_result', 'display_data'
+ transform_non_error(ORDERED_KEYS[output_type], output['data'], symbol / 'data')
+ when 'stream'
+ transform_element('text', output['text'], symbol)
+ end
+ end
+
+ def transform_error(traceback, symbol)
+ traceback.map.with_index do |t, idx|
+ t.split("\n").map do |l|
+ ___(symbol / idx, l.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').delete("\u001B").rstrip)
+ end
+ end
+ end
+
+ def transform_non_error(accepted_keys, elements, symbol)
+ accepted_keys.filter { |key| elements.key?(key) }.map do |key|
+ transform_element(key, elements[key], symbol)
+ end
+ end
+
+ def transform_element(output_type, output_element, symbol_prefix)
+ new_symbol = symbol_prefix / output_type
+ case output_type
+ when 'image/png', 'image/jpeg'
+ transform_image("#{output_type};base64", output_element, new_symbol)
+ when 'image/svg+xml'
+ transform_image("#{output_type};utf8", output_element, new_symbol)
+ when 'text/markdown', 'text/latex', 'text/plain', 'text'
+ transform_text(output_element, new_symbol)
+ end
+ end
+
+ def transform_image(image_type, image_content, symbol)
+ return ___(nil, HIDDEN_IMAGE_OUTPUT) if @hide_images
+
+ lines = image_content.is_a?(Array) ? image_content : [image_content]
+
+ single_line = lines.map(&:strip).join.gsub(/\s+/, ' ')
+
+ ___(symbol, " ![](data:#{image_type},#{single_line})")
+ end
+
+ def transform_text(text_content, symbol)
+ symbolize_array(symbol, text_content) { |l| " #{l.rstrip}" }
+ end
+ end
+end
diff --git a/gems/ipynbdiff/lib/ipynb_diff/symbol_map.rb b/gems/ipynbdiff/lib/ipynb_diff/symbol_map.rb
new file mode 100644
index 00000000000..383f1de5c18
--- /dev/null
+++ b/gems/ipynbdiff/lib/ipynb_diff/symbol_map.rb
@@ -0,0 +1,109 @@
+# frozen_string_literal: true
+
+module IpynbDiff
+ require 'oj'
+
+ # Creates a map from a symbol to the line number it appears in a Json file
+ #
+ # Example:
+ #
+ # Input:
+ #
+ # 1. {
+ # 2. 'obj1': [
+ # 3. {
+ # 4. 'obj2': 5
+ # 5. },
+ # 6. 3,
+ # 7. {
+ # 8. 'obj3': {
+ # 9. 'obj4': 'b'
+ # 10. }
+ # 11. }
+ # 12. ]
+ # 13.}
+ #
+ # Output:
+ #
+ # Symbol Line Number
+ # .obj1 -> 2
+ # .obj1.0 -> 3
+ # .obj1.0 -> 3
+ # .obj1.0.obj2 -> 4
+ # .obj1.1 -> 6
+ # .obj1.2 -> 7
+ # .obj1.2.obj3 -> 8
+ # .obj1.2.obj3.obj4 -> 9
+ #
+ class SymbolMap
+ # rubocop:disable Lint/UnusedMethodArgument
+ class << self
+ def handler
+ @handler ||= SymbolMap.new
+ end
+
+ def parser
+ @parser ||= Oj::Parser.new(:saj).tap { |p| p.handler = handler }
+ end
+
+ def parse(notebook, *args)
+ handler.reset
+ parser.parse(notebook)
+ handler.symbols
+ end
+ end
+
+ attr_accessor :symbols
+
+ def hash_start(key, line, column)
+ add_symbol(key_or_index(key), line)
+ end
+
+ def hash_end(key, line, column)
+ @current_path.pop
+ end
+
+ def array_start(key, line, column)
+ @current_array_index << 0
+
+ add_symbol(key, line)
+ end
+
+ def array_end(key, line, column)
+ @current_path.pop
+ @current_array_index.pop
+ end
+
+ def add_value(value, key, line, column)
+ add_symbol(key_or_index(key), line)
+
+ @current_path.pop
+ end
+
+ def add_symbol(symbol, line)
+ @symbols[@current_path.append(symbol).join('.')] = line if symbol
+ end
+
+ def key_or_index(key)
+ if key.nil? # value in an array
+ if @current_path.empty?
+ @current_path = ['']
+ return
+ end
+
+ symbol = @current_array_index.last
+ @current_array_index[-1] += 1
+ symbol
+ else
+ key
+ end
+ end
+
+ def reset
+ @current_path = []
+ @symbols = {}
+ @current_array_index = []
+ end
+ # rubocop:enable Lint/UnusedMethodArgument
+ end
+end
diff --git a/gems/ipynbdiff/lib/ipynb_diff/symbolized_markdown_helper.rb b/gems/ipynbdiff/lib/ipynb_diff/symbolized_markdown_helper.rb
new file mode 100644
index 00000000000..991c9e493bc
--- /dev/null
+++ b/gems/ipynbdiff/lib/ipynb_diff/symbolized_markdown_helper.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+module IpynbDiff
+ # Helper functions
+ module SymbolizedMarkdownHelper
+ def ___(symbol = nil, content = '')
+ { symbol: symbol, content: content }
+ end
+
+ def symbolize_array(symbol, content)
+ if content.is_a?(Array)
+ content.map.with_index { |l, idx| ___(symbol / idx, yield(l)) }
+ else
+ content.split("\n").map { |c| ___(symbol, c) }
+ end
+ end
+ end
+
+ # Simple wrapper for a string
+ class JsonSymbol < String
+ def /(other)
+ JsonSymbol.new((other.is_a?(Array) ? [self, *other] : [self, other]).join('.'))
+ end
+ end
+end
diff --git a/gems/ipynbdiff/lib/ipynb_diff/transformed_notebook.rb b/gems/ipynbdiff/lib/ipynb_diff/transformed_notebook.rb
new file mode 100644
index 00000000000..f98e5f68086
--- /dev/null
+++ b/gems/ipynbdiff/lib/ipynb_diff/transformed_notebook.rb
@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+module IpynbDiff
+ # Notebook that was transformed into md, including location of source cells
+ class TransformedNotebook
+ attr_reader :blocks
+
+ def as_text
+ @blocks.map { |b| b[:content].gsub(/\n/, '\\n') }.join("\n")
+ end
+
+ private
+
+ def initialize(lines = [], symbol_map = {})
+ @blocks = lines.map do |line|
+ { content: line[:content], source_symbol: (symbol = line[:symbol]), source_line: symbol && symbol_map[symbol] }
+ end
+ end
+ end
+end
diff --git a/gems/ipynbdiff/lib/ipynb_diff/transformer.rb b/gems/ipynbdiff/lib/ipynb_diff/transformer.rb
new file mode 100644
index 00000000000..2b386168b5d
--- /dev/null
+++ b/gems/ipynbdiff/lib/ipynb_diff/transformer.rb
@@ -0,0 +1,111 @@
+# frozen_string_literal: true
+
+require 'json'
+require 'yaml'
+require 'ipynb_diff/output_transformer'
+require 'ipynb_diff/symbolized_markdown_helper'
+require 'ipynb_diff/symbol_map'
+require 'ipynb_diff/transformed_notebook'
+require 'oj'
+
+module IpynbDiff
+ InvalidNotebookError = Class.new(StandardError)
+
+ # Returns a markdown version of the Jupyter Notebook
+ class Transformer
+ include SymbolizedMarkdownHelper
+
+ @include_frontmatter = true
+
+ def initialize(include_frontmatter: true, hide_images: false)
+ @include_frontmatter = include_frontmatter
+ @hide_images = hide_images
+ @out_transformer = OutputTransformer.new(hide_images)
+ end
+
+ def validate_notebook(notebook)
+ notebook_json = Oj::Parser.usual.parse(notebook)
+
+ return notebook_json if notebook_json&.key?('cells')
+
+ raise InvalidNotebookError
+ rescue EncodingError, Oj::ParseError, JSON::ParserError
+ raise InvalidNotebookError
+ end
+
+ def transform(notebook)
+ return TransformedNotebook.new unless notebook
+
+ notebook_json = validate_notebook(notebook)
+ transformed = transform_document(notebook_json)
+ symbol_map = SymbolMap.parse(notebook)
+
+ TransformedNotebook.new(transformed, symbol_map)
+ end
+
+ def transform_document(notebook)
+ symbol = JsonSymbol.new('.cells')
+
+ transformed_blocks = notebook['cells'].map.with_index do |cell, idx|
+ decorate_cell(transform_cell(cell, notebook, symbol / idx), cell, symbol / idx)
+ end
+
+ transformed_blocks.prepend(transform_metadata(notebook)) if @include_frontmatter
+ transformed_blocks.flatten
+ end
+
+ def decorate_cell(rows, cell, symbol)
+ tags = cell['metadata']&.fetch('tags', [])
+ type = cell['cell_type'] || 'raw'
+
+ [
+ ___(symbol, %(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')})),
+ ___,
+ rows,
+ ___
+ ]
+ end
+
+ def transform_cell(cell, notebook, symbol)
+ cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook, symbol) : transform_text_cell(cell, symbol)
+ end
+
+ def transform_code_cell(cell, notebook, symbol)
+ [
+ ___(symbol / 'source', %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''})),
+ symbolize_array(symbol / 'source', cell['source'], &:rstrip),
+ ___(nil, '```'),
+ transform_outputs(cell['outputs'], symbol)
+ ]
+ end
+
+ def transform_outputs(outputs, symbol)
+ transformed = outputs.map
+ .with_index { |output, i| @out_transformer.transform(output, symbol / ['outputs', i]) }
+ .compact
+ .map { |el| [___, el] }
+
+ [
+ transformed.empty? ? [] : [___, ___(symbol / 'outputs', '%% Output')],
+ transformed
+ ]
+ end
+
+ def transform_text_cell(cell, symbol)
+ symbolize_array(symbol / 'source', cell['source'], &:rstrip)
+ end
+
+ def transform_metadata(notebook_json)
+ as_yaml = {
+ 'jupyter' => {
+ 'kernelspec' => notebook_json['metadata']['kernelspec'],
+ 'language_info' => notebook_json['metadata']['language_info'],
+ 'nbformat' => notebook_json['nbformat'],
+ 'nbformat_minor' => notebook_json['nbformat_minor']
+ }
+ }.to_yaml
+
+ as_yaml.split("\n").map { |l| ___(nil, l) }.append(___(nil, '---'), ___)
+ end
+ end
+end
diff --git a/gems/ipynbdiff/lib/ipynb_diff/version.rb b/gems/ipynbdiff/lib/ipynb_diff/version.rb
new file mode 100644
index 00000000000..1a407f9c0fa
--- /dev/null
+++ b/gems/ipynbdiff/lib/ipynb_diff/version.rb
@@ -0,0 +1,7 @@
+# frozen_string_literal: true
+
+module IpynbDiff
+ module Version
+ VERSION = '0.4.7'
+ end
+end