diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2023-08-18 13:50:51 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2023-08-18 13:50:51 +0300 |
commit | db384e6b19af03b4c3c82a5760d83a3fd79f7982 (patch) | |
tree | 34beaef37df5f47ccbcf5729d7583aae093cffa0 /gems/csv_builder | |
parent | 54fd7b1bad233e3944434da91d257fa7f63c3996 (diff) |
Add latest changes from gitlab-org/gitlab@16-3-stable-eev16.3.0-rc42
Diffstat (limited to 'gems/csv_builder')
-rw-r--r-- | gems/csv_builder/.gitignore | 11 | ||||
-rw-r--r-- | gems/csv_builder/.gitlab-ci.yml | 4 | ||||
-rw-r--r-- | gems/csv_builder/.rspec | 3 | ||||
-rw-r--r-- | gems/csv_builder/.rubocop.yml | 8 | ||||
-rw-r--r-- | gems/csv_builder/Gemfile | 6 | ||||
-rw-r--r-- | gems/csv_builder/Gemfile.lock | 100 | ||||
-rw-r--r-- | gems/csv_builder/README.md | 48 | ||||
-rw-r--r-- | gems/csv_builder/csv_builder.gemspec | 26 | ||||
-rw-r--r-- | gems/csv_builder/lib/csv_builder.rb | 38 | ||||
-rw-r--r-- | gems/csv_builder/lib/csv_builder/builder.rb | 111 | ||||
-rw-r--r-- | gems/csv_builder/lib/csv_builder/gzip.rb | 23 | ||||
-rw-r--r-- | gems/csv_builder/lib/csv_builder/single_batch.rb | 11 | ||||
-rw-r--r-- | gems/csv_builder/lib/csv_builder/stream.rb | 17 | ||||
-rw-r--r-- | gems/csv_builder/lib/csv_builder/version.rb | 7 | ||||
-rw-r--r-- | gems/csv_builder/spec/csv_builder/gzip_spec.rb | 33 | ||||
-rw-r--r-- | gems/csv_builder/spec/csv_builder/stream_spec.rb | 44 | ||||
-rw-r--r-- | gems/csv_builder/spec/csv_builder_spec.rb | 142 | ||||
-rw-r--r-- | gems/csv_builder/spec/spec_helper.rb | 15 |
18 files changed, 647 insertions, 0 deletions
diff --git a/gems/csv_builder/.gitignore b/gems/csv_builder/.gitignore new file mode 100644 index 00000000000..b04a8c840df --- /dev/null +++ b/gems/csv_builder/.gitignore @@ -0,0 +1,11 @@ +/.bundle/ +/.yardoc +/_yardoc/ +/coverage/ +/doc/ +/pkg/ +/spec/reports/ +/tmp/ + +# rspec failure tracking +.rspec_status diff --git a/gems/csv_builder/.gitlab-ci.yml b/gems/csv_builder/.gitlab-ci.yml new file mode 100644 index 00000000000..e620c7bacd5 --- /dev/null +++ b/gems/csv_builder/.gitlab-ci.yml @@ -0,0 +1,4 @@ +include: + - local: gems/gem.gitlab-ci.yml + inputs: + gem_name: "csv_builder"
\ No newline at end of file diff --git a/gems/csv_builder/.rspec b/gems/csv_builder/.rspec new file mode 100644 index 00000000000..34c5164d9b5 --- /dev/null +++ b/gems/csv_builder/.rspec @@ -0,0 +1,3 @@ +--format documentation +--color +--require spec_helper diff --git a/gems/csv_builder/.rubocop.yml b/gems/csv_builder/.rubocop.yml new file mode 100644 index 00000000000..d004dd48db7 --- /dev/null +++ b/gems/csv_builder/.rubocop.yml @@ -0,0 +1,8 @@ +inherit_from: + - ../config/rubocop.yml + +RSpec/MultipleMemoizedHelpers: + Max: 25 + +RSpec/VerifiedDoubles: + Enabled: false
\ No newline at end of file diff --git a/gems/csv_builder/Gemfile b/gems/csv_builder/Gemfile new file mode 100644 index 00000000000..81bdfcabdae --- /dev/null +++ b/gems/csv_builder/Gemfile @@ -0,0 +1,6 @@ +# frozen_string_literal: true + +source "https://rubygems.org" + +# Specify your gem's dependencies in csv_builder.gemspec +gemspec diff --git a/gems/csv_builder/Gemfile.lock b/gems/csv_builder/Gemfile.lock new file mode 100644 index 00000000000..04992abc4d6 --- /dev/null +++ b/gems/csv_builder/Gemfile.lock @@ -0,0 +1,100 @@ +PATH + remote: . + specs: + csv_builder (0.1.0) + +GEM + remote: https://rubygems.org/ + specs: + activesupport (7.0.6) + concurrent-ruby (~> 1.0, >= 1.0.2) + i18n (>= 1.6, < 2) + minitest (>= 5.1) + tzinfo (~> 2.0) + ast (2.4.2) + coderay (1.1.3) + concurrent-ruby (1.2.2) + diff-lcs (1.5.0) + gitlab-styles (10.1.0) + rubocop (~> 1.50.2) + rubocop-graphql (~> 0.18) + rubocop-performance (~> 1.15) + rubocop-rails (~> 2.17) + rubocop-rspec (~> 2.22) + i18n (1.14.1) + concurrent-ruby (~> 1.0) + json (2.6.3) + method_source (1.0.0) + minitest (5.19.0) + parallel (1.23.0) + parser (3.2.2.3) + ast (~> 2.4.1) + racc + pry (0.14.2) + coderay (~> 1.1) + method_source (~> 1.0) + racc (1.7.1) + rack (3.0.8) + rainbow (3.1.1) + regexp_parser (2.8.1) + rexml (3.2.6) + rspec (3.12.0) + rspec-core (~> 3.12.0) + rspec-expectations (~> 3.12.0) + rspec-mocks (~> 3.12.0) + rspec-core (3.12.2) + rspec-support (~> 3.12.0) + rspec-expectations (3.12.3) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.12.0) + rspec-mocks (3.12.6) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.12.0) + rspec-support (3.12.1) + rubocop (1.50.2) + json (~> 2.3) + parallel (~> 1.10) + parser (>= 3.2.0.0) + rainbow (>= 2.2.2, < 4.0) + regexp_parser (>= 1.8, < 3.0) + rexml (>= 3.2.5, < 4.0) + rubocop-ast (>= 1.28.0, < 2.0) + ruby-progressbar (~> 1.7) + unicode-display_width (>= 2.4.0, < 3.0) + rubocop-ast (1.29.0) + parser (>= 3.2.1.0) + rubocop-capybara (2.18.0) + rubocop (~> 1.41) + rubocop-factory_bot (2.23.1) + rubocop (~> 1.33) + rubocop-graphql (0.19.0) + rubocop (>= 0.87, < 2) + rubocop-performance (1.18.0) + rubocop (>= 1.7.0, < 2.0) + rubocop-ast (>= 0.4.0) + rubocop-rails (2.20.2) + activesupport (>= 4.2.0) + rack (>= 1.1) + rubocop (>= 1.33.0, < 2.0) + rubocop-rspec (2.22.0) + rubocop (~> 1.33) + rubocop-capybara (~> 2.17) + rubocop-factory_bot (~> 2.22) + ruby-progressbar (1.13.0) + tzinfo (2.0.6) + concurrent-ruby (~> 1.0) + unicode-display_width (2.4.2) + +PLATFORMS + ruby + +DEPENDENCIES + csv_builder! + gitlab-styles (~> 10.1.0) + pry + rspec (~> 3.0) + rubocop (~> 1.50) + rubocop-rspec (~> 2.22) + +BUNDLED WITH + 2.4.17 diff --git a/gems/csv_builder/README.md b/gems/csv_builder/README.md new file mode 100644 index 00000000000..37dde4b334c --- /dev/null +++ b/gems/csv_builder/README.md @@ -0,0 +1,48 @@ +# CsvBuilder + +## Usage + +Generate a CSV given a collection and a mapping. + +```ruby +columns = { + 'Title' => 'title', + 'Comment' => 'comment', + 'Author' => -> (post) { post.author.full_name } + 'Created At (UTC)' => -> (post) { post.created_at&.strftime('%Y-%m-%d %H:%M:%S') } +} + +CsvBuilder.new(@posts, columns).render +``` + +When the value of the mapping is a string, a method is called with the given name +on the record (for example: `post.title`). +When the value of the mapping is a lambda, it is lazily executed. + +It's possible to also pass ActiveRecord associations to preload when batching +through the collection: + +```ruby +CsvBuilder.new(@posts, columns, [:author, :comments]).render +``` + +### SingleBatch builder + +When the collection is an array or enumerable you can use: + +```ruby +CsvBuilder::SingleBatch.new(@posts, columns).render +``` + +### Stream builder + +A stream builder uses a lazy and more efficient iterator and by default returns +up to 100,000 records from the collection. + +```ruby +CsvBuilder::Stream.new(@posts, columns).render(1_000) +``` + +## Development + +Follow the GitLab [gems development guidelines](../../doc/development/gems.md). diff --git a/gems/csv_builder/csv_builder.gemspec b/gems/csv_builder/csv_builder.gemspec new file mode 100644 index 00000000000..956fe2d6108 --- /dev/null +++ b/gems/csv_builder/csv_builder.gemspec @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require_relative "lib/csv_builder/version" + +Gem::Specification.new do |spec| + spec.name = "csv_builder" + spec.version = CsvBuilder::Version::VERSION + spec.authors = ["group::tenant-scale"] + spec.email = ["engineering@gitlab.com"] + + spec.summary = "Provides enhancements to the CSV standard library" + spec.description = "Provides enhancements to the CSV standard library" + spec.homepage = "https://gitlab.com/gitlab-org/gitlab/-/tree/master/gems/csv_builder" + spec.license = 'MIT' + spec.required_ruby_version = ">= 3.0.0" + spec.metadata["rubygems_mfa_required"] = "true" + + spec.files = Dir['lib/**/*.rb'] + spec.require_paths = ["lib"] + + spec.add_development_dependency "gitlab-styles", "~> 10.1.0" + spec.add_development_dependency "pry" + spec.add_development_dependency "rspec", "~> 3.0" + spec.add_development_dependency "rubocop", "~> 1.50" + spec.add_development_dependency "rubocop-rspec", "~> 2.22" +end diff --git a/gems/csv_builder/lib/csv_builder.rb b/gems/csv_builder/lib/csv_builder.rb new file mode 100644 index 00000000000..86b682939dc --- /dev/null +++ b/gems/csv_builder/lib/csv_builder.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +require 'csv' +require 'tempfile' +require 'zlib' + +require_relative "csv_builder/version" +require_relative "csv_builder/builder" +require_relative "csv_builder/single_batch" +require_relative "csv_builder/stream" +require_relative "csv_builder/gzip" + +# Generates CSV when given a collection and a mapping. +# +# Example: +# +# columns = { +# 'Title' => 'title', +# 'Comment' => 'comment', +# 'Author' => -> (post) { post.author.full_name } +# 'Created At (UTC)' => -> (post) { post.created_at&.strftime('%Y-%m-%d %H:%M:%S') } +# } +# +# CsvBuilder.new(@posts, columns).render +# +module CsvBuilder + # + # * +collection+ - The data collection to be used + # * +header_to_value_hash+ - A hash of 'Column Heading' => 'value_method'. + # * +associations_to_preload+ - An array of records to preload with a batch of records. + # + # The value method will be called once for each object in the collection, to + # determine the value for that row. It can either be the name of a method on + # the object, or a lamda to call passing in the object. + def self.new(collection, header_to_value_hash, associations_to_preload = []) + CsvBuilder::Builder.new(collection, header_to_value_hash, associations_to_preload) + end +end diff --git a/gems/csv_builder/lib/csv_builder/builder.rb b/gems/csv_builder/lib/csv_builder/builder.rb new file mode 100644 index 00000000000..99b63153ab2 --- /dev/null +++ b/gems/csv_builder/lib/csv_builder/builder.rb @@ -0,0 +1,111 @@ +# frozen_string_literal: true + +module CsvBuilder + class Builder + UNSAFE_EXCEL_PREFIX = /\A[=\+\-@;]/ # rubocop:disable Style/RedundantRegexpEscape + + attr_reader :rows_written + + def initialize(collection, header_to_value_hash, associations_to_preload = []) + @header_to_value_hash = header_to_value_hash + @collection = collection + @truncated = false + @rows_written = 0 + @associations_to_preload = associations_to_preload + end + + # Renders the csv to a string + def render(truncate_after_bytes = nil) + Tempfile.open(['csv']) do |tempfile| + csv = CSV.new(tempfile) + + write_csv csv, until_condition: -> do + truncate_after_bytes && tempfile.size > truncate_after_bytes + end + + if block_given? + yield tempfile + else + tempfile.rewind + tempfile.read + end + end + end + + def truncated? + @truncated + end + + def rows_expected + if truncated? || rows_written.zero? + @collection.count + else + rows_written + end + end + + def status + { + truncated: truncated?, + rows_written: rows_written, + rows_expected: rows_expected + } + end + + protected + + def each(&block) + if @associations_to_preload&.any? && @collection.respond_to?(:each_batch) + @collection.each_batch(order_hint: :created_at) do |relation| + relation.preload(@associations_to_preload).order(:id).each(&block) + end + elsif @collection.respond_to?(:find_each) + @collection.find_each(&block) + else + @collection.each(&block) + end + end + + private + + def headers + @headers ||= @header_to_value_hash.keys + end + + def attributes + @attributes ||= @header_to_value_hash.values + end + + def row(object) + attributes.map do |attribute| + if attribute.respond_to?(:call) + excel_sanitize(attribute.call(object)) + else + excel_sanitize(object.public_send(attribute)) # rubocop:disable GitlabSecurity/PublicSend + end + end + end + + def write_csv(csv, until_condition:) + csv << headers + + each do |object| + csv << row(object) + + @rows_written += 1 + + if until_condition.call + @truncated = true + break + end + end + end + + def excel_sanitize(line) + return if line.nil? + return line unless line.is_a?(String) && line.match?(UNSAFE_EXCEL_PREFIX) + + ["'", line].join + end + end +end diff --git a/gems/csv_builder/lib/csv_builder/gzip.rb b/gems/csv_builder/lib/csv_builder/gzip.rb new file mode 100644 index 00000000000..60875006a35 --- /dev/null +++ b/gems/csv_builder/lib/csv_builder/gzip.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module CsvBuilder + class Gzip < CsvBuilder::Builder + # Writes the CSV file compressed and yields the written tempfile. + # + # Example: + # > CsvBuilder::Gzip.new(Issue, { title: -> (row) { row.title.upcase }, id: :id }).render do |tempfile| + # > puts tempfile.path + # > puts `zcat #{tempfile.path}` + # > end + def render + Tempfile.open(['csv_builder_gzip', '.csv.gz']) do |tempfile| + csv = CSV.new(Zlib::GzipWriter.open(tempfile.path)) + + write_csv csv, until_condition: -> {} # truncation must be handled outside of the CsvBuilder + + csv.close + yield tempfile + end + end + end +end diff --git a/gems/csv_builder/lib/csv_builder/single_batch.rb b/gems/csv_builder/lib/csv_builder/single_batch.rb new file mode 100644 index 00000000000..e7731f27fd0 --- /dev/null +++ b/gems/csv_builder/lib/csv_builder/single_batch.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module CsvBuilder + class SingleBatch < CsvBuilder::Builder + protected + + def each(&block) + @collection.each(&block) + end + end +end diff --git a/gems/csv_builder/lib/csv_builder/stream.rb b/gems/csv_builder/lib/csv_builder/stream.rb new file mode 100644 index 00000000000..3e1a6c84ce9 --- /dev/null +++ b/gems/csv_builder/lib/csv_builder/stream.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module CsvBuilder + class Stream < CsvBuilder::Builder + def render(max_rows = 100_000) + max_rows_including_header = max_rows + 1 + + Enumerator.new do |csv| + csv << CSV.generate_line(headers) + + each do |object| + csv << CSV.generate_line(row(object)) + end + end.lazy.take(max_rows_including_header) + end + end +end diff --git a/gems/csv_builder/lib/csv_builder/version.rb b/gems/csv_builder/lib/csv_builder/version.rb new file mode 100644 index 00000000000..b7baf16ad0a --- /dev/null +++ b/gems/csv_builder/lib/csv_builder/version.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +module CsvBuilder + module Version + VERSION = "0.1.0" + end +end diff --git a/gems/csv_builder/spec/csv_builder/gzip_spec.rb b/gems/csv_builder/spec/csv_builder/gzip_spec.rb new file mode 100644 index 00000000000..9d24d351247 --- /dev/null +++ b/gems/csv_builder/spec/csv_builder/gzip_spec.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe CsvBuilder::Gzip do + let(:event_1) { double(title: 'Added salt', description: 'A teaspoon') } + let(:event_2) { double(title: 'Added sugar', description: 'Just a pinch') } + let(:items) { [event_1, event_2] } + + subject(:builder) { described_class.new(items, 'Title' => 'title', 'Description' => 'description') } + + describe '#render' do + it 'returns yields a tempfile' do + written_content = nil + + builder.render do |tempfile| + reader = Zlib::GzipReader.new(tempfile) + written_content = reader.read.split("\n") + end + + expect(written_content).to eq( + [ + "Title,Description", + "Added salt,A teaspoon", + "Added sugar,Just a pinch" + ]) + end + + it 'requires a block' do + expect { builder.render }.to raise_error(LocalJumpError) + end + end +end diff --git a/gems/csv_builder/spec/csv_builder/stream_spec.rb b/gems/csv_builder/spec/csv_builder/stream_spec.rb new file mode 100644 index 00000000000..d23e63520af --- /dev/null +++ b/gems/csv_builder/spec/csv_builder/stream_spec.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe CsvBuilder::Stream do + let(:event_1) { double(title: 'Added salt', description: 'A teaspoon') } + let(:event_2) { double(title: 'Added sugar', description: 'Just a pinch') } + let(:fake_relation) { described_class::FakeRelation.new([event_1, event_2]) } + + subject(:builder) { described_class.new(fake_relation, 'Title' => 'title', 'Description' => 'description') } + + describe '#render' do + before do + stub_const("#{described_class}::FakeRelation", Array) + + described_class::FakeRelation.class_eval do + def find_each(&block) + each(&block) + end + end + end + + it 'returns a lazy enumerator' do + expect(builder.render).to be_an(Enumerator::Lazy) + end + + it 'returns all rows up to default max value' do + expect(builder.render.to_a).to eq( + [ + "Title,Description\n", + "Added salt,A teaspoon\n", + "Added sugar,Just a pinch\n" + ]) + end + + it 'truncates to max rows' do + expect(builder.render(1).to_a).to eq( + [ + "Title,Description\n", + "Added salt,A teaspoon\n" + ]) + end + end +end diff --git a/gems/csv_builder/spec/csv_builder_spec.rb b/gems/csv_builder/spec/csv_builder_spec.rb new file mode 100644 index 00000000000..9d6283b3985 --- /dev/null +++ b/gems/csv_builder/spec/csv_builder_spec.rb @@ -0,0 +1,142 @@ +# frozen_string_literal: true + +RSpec.describe CsvBuilder do + let(:object) { double(question: :answer) } + let(:csv_data) { subject.render } + + let(:subject) do + described_class.new( + enumerable, 'Q & A' => :question, 'Reversed' => ->(o) { o.question.to_s.reverse }) + end + + shared_examples 'csv builder examples' do + let(:items) { [object] } + + it "has a version number" do + expect(CsvBuilder::Version::VERSION).not_to be nil + end + + it 'generates a csv' do + expect(csv_data.scan(/(,|\n)/).join).to include ",\n," + end + + it 'uses a temporary file to reduce memory allocation' do + expect(CSV).to receive(:new).with(instance_of(Tempfile)).and_call_original + + subject.render + end + + it 'counts the number of rows' do + subject.render + + expect(subject.rows_written).to eq 1 + end + + describe 'rows_expected' do + it 'uses rows_written if CSV rendered successfully' do + subject.render + + expect(enumerable).not_to receive(:count) + expect(subject.rows_expected).to eq 1 + end + + it 'falls back to calling .count before rendering begins' do + expect(subject.rows_expected).to eq 1 + end + end + + describe 'truncation' do + let(:big_object) { double(question: 'Long' * 1024) } + let(:row_size) { big_object.question.length * 2 } + let(:items) { [big_object, big_object, big_object] } + + it 'occurs after given number of bytes' do + expect(subject.render(row_size * 2).length).to be_between(row_size * 2, row_size * 3) + expect(subject).to be_truncated + expect(subject.rows_written).to eq 2 + end + + it 'is ignored by default' do + expect(subject.render.length).to be > row_size * 3 + expect(subject.rows_written).to eq 3 + end + + it 'causes rows_expected to fall back to .count' do + subject.render(0) + + expect(enumerable).to receive(:count).and_call_original + expect(subject.rows_expected).to eq 3 + end + end + + it 'avoids loading all data in a single query' do + expect(enumerable).to receive(:find_each) + + subject.render + end + + it 'uses hash keys as headers' do + expect(csv_data).to start_with 'Q & A' + end + + it 'gets data by calling method provided as hash value' do + expect(csv_data).to include 'answer' + end + + it 'allows lamdas to look up more complicated data' do + expect(csv_data).to include 'rewsna' + end + + describe 'excel sanitization' do + let(:dangerous_title) { double(title: "=cmd|' /C calc'!A0 title", description: "*safe_desc") } + let(:dangerous_desc) { double(title: "*safe_title", description: "=cmd|' /C calc'!A0 desc") } + let(:items) { [dangerous_title, dangerous_desc] } + let(:subject) { described_class.new(enumerable, 'Title' => 'title', 'Description' => 'description') } + let(:csv_data) { subject.render } + + it 'sanitizes dangerous characters at the beginning of a column' do + expect(csv_data).to include "'=cmd|' /C calc'!A0 title" + expect(csv_data).to include "'=cmd|' /C calc'!A0 desc" + end + + it 'does not sanitize safe symbols at the beginning of a column' do + expect(csv_data).not_to include "'*safe_desc" + expect(csv_data).not_to include "'*safe_title" + end + + context 'when dangerous characters are after a line break' do + let(:items) { [double(title: "Safe title", description: "With task list\n-[x] todo 1")] } + + it 'does not append single quote to description' do + builder = described_class.new(enumerable, 'Title' => 'title', 'Description' => 'description') + + csv_data = builder.render + + expect(csv_data).to eq("Title,Description\nSafe title,\"With task list\n-[x] todo 1\"\n") + end + end + end + end + + context 'when ActiveRecord::Relation like object is given' do + let(:enumerable) { described_class::FakeRelation.new(items) } + + before do + stub_const("#{described_class}::FakeRelation", Array) + + described_class::FakeRelation.class_eval do + def find_each(&block) + each(&block) + end + end + end + + it_behaves_like 'csv builder examples' + end + + context 'when Enumerable like object is given' do + let(:enumerable) { items } + + it_behaves_like 'csv builder examples' + end +end diff --git a/gems/csv_builder/spec/spec_helper.rb b/gems/csv_builder/spec/spec_helper.rb new file mode 100644 index 00000000000..eb21c2271f6 --- /dev/null +++ b/gems/csv_builder/spec/spec_helper.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +require "csv_builder" + +RSpec.configure do |config| + # Enable flags like --only-failures and --next-failure + config.example_status_persistence_file_path = ".rspec_status" + + # Disable RSpec exposing methods globally on `Module` and `main` + config.disable_monkey_patching! + + config.expect_with :rspec do |c| + c.syntax = :expect + end +end |