diff options
Diffstat (limited to 'gems')
27 files changed, 902 insertions, 84 deletions
diff --git a/gems/config/rubocop.yml b/gems/config/rubocop.yml index ca46e30e2cd..7cad83bbe66 100644 --- a/gems/config/rubocop.yml +++ b/gems/config/rubocop.yml @@ -96,10 +96,6 @@ RSpec/ContextWording: RSpec/FeatureCategory: Enabled: false -# Enable once we drop 3.0 support -Style/HashSyntax: - Enabled: false - Style/InlineDisableAnnotation: Enabled: false @@ -114,3 +110,8 @@ Style/RegexpLiteralMixedPreserve: - mixed - mixed_preserve EnforcedStyle: mixed_preserve + +# Short-hand Hash syntax does not work prior 3.1. +# See https://gitlab.com/gitlab-org/gitlab/-/issues/435940#note_1703307479 +Style/HashSyntax: + EnforcedShorthandSyntax: never diff --git a/gems/gem-pg.gitlab-ci.yml b/gems/gem-pg.gitlab-ci.yml index c48e18fa297..2806437e15b 100644 --- a/gems/gem-pg.gitlab-ci.yml +++ b/gems/gem-pg.gitlab-ci.yml @@ -63,6 +63,7 @@ rubocop: rules: - exists: ["$[[inputs.gem_path_prefix]]$[[inputs.gem_name]]/.rubocop.yml"] script: + - $CI_PROJECT_DIR/scripts/validate-monorepo-gem "$[[inputs.gem_name]]" - bundle exec rubocop rspec: diff --git a/gems/gem.gitlab-ci.yml b/gems/gem.gitlab-ci.yml index a379a887bdd..3ced4b5e364 100644 --- a/gems/gem.gitlab-ci.yml +++ b/gems/gem.gitlab-ci.yml @@ -50,6 +50,7 @@ rubocop: rules: - exists: ["$[[inputs.gem_path_prefix]]$[[inputs.gem_name]]/.rubocop.yml"] script: + - $CI_PROJECT_DIR/scripts/validate-monorepo-gem "$[[inputs.gem_name]]" - bundle exec rubocop rspec: diff --git a/gems/gitlab-database-load_balancing/Gemfile.lock b/gems/gitlab-database-load_balancing/Gemfile.lock index b2d66b9a386..9c8653d9389 100644 --- a/gems/gitlab-database-load_balancing/Gemfile.lock +++ b/gems/gitlab-database-load_balancing/Gemfile.lock @@ -16,6 +16,7 @@ PATH remote: ../gitlab-safe_request_store specs: gitlab-safe_request_store (0.1.0) + rack (~> 2.2.8) request_store PATH @@ -25,7 +26,6 @@ PATH actionview (>= 6.1.7.2) activesupport (>= 6.1.7.2) addressable (~> 2.8) - nokogiri (~> 1.15.2) rake (~> 13.0) PATH diff --git a/gems/gitlab-housekeeper/Gemfile.lock b/gems/gitlab-housekeeper/Gemfile.lock index 2b18c02558c..9fbdc246811 100644 --- a/gems/gitlab-housekeeper/Gemfile.lock +++ b/gems/gitlab-housekeeper/Gemfile.lock @@ -10,6 +10,7 @@ PATH remote: . specs: gitlab-housekeeper (0.1.0) + activesupport httparty rubocop @@ -122,7 +123,7 @@ GEM hashdiff (>= 0.4.0, < 2.0.0) PLATFORMS - arm64-darwin-22 + ruby DEPENDENCIES gitlab-housekeeper! diff --git a/gems/gitlab-housekeeper/README.md b/gems/gitlab-housekeeper/README.md index f707b99c6f0..25d68e67bd8 100644 --- a/gems/gitlab-housekeeper/README.md +++ b/gems/gitlab-housekeeper/README.md @@ -1,6 +1,86 @@ # Gitlab::Housekeeper -Housekeeping following https://gitlab.com/gitlab-org/gitlab/-/merge_requests/134487 +This is a gem which can be run locally or in CI to do static and dynamic +analysis of the GitLab codebase and, using a list of predefined "keeps", it will +automatically create merge requests for things that developers would have +otherwise needed to remember to do themselves. + +It is analogous to a mix of `rubocop -a` and GitLab Dependency Bot. + +The word "keep" is used to describe a specific rule to apply to the code to +match a required change and actually edit the code. The word "keep" was chosen +as it sounds like "cop" and is very similar to implementing a rubocop rule as +well as code to autocorrect the rule. + +You can see the existing keeps in +https://gitlab.com/gitlab-org/gitlab/-/tree/master/keeps . + +## How the code is organized + +The code is organized in a very similar way to RuboCop in that we have an +overall gem called `gitlab-housekeeper` that contains the generic logic of +looping over all `keeps` (analogous to Cops) which are rules for how to detect +changes that can be made to the code and then actually how to correct them. + +Then users of this gem are expected to add a `keeps` directory in their project +with all the keeps specific to their project. This gem may at some point +include keeps that are generic enough to be used by other projects. + +## How to implement a keep + +The only thing you need to implement is an `each_change` method. The method +should yield changes in the form of a `::Gitlab::Housekeeper::Change` object, +where each change object represents a merge request that will be created. +The object describes the files that should be commited and other metadata +should be added to the merge request. Before yielding the `Change` the keep +should also edit the files locally. + +Here is an example of a very simple keep that creates 3 new files called +`new_file1.txt`, `new_file2.txt` and `new_file3.txt`: + +```ruby +# keeps/pretty_useless_keep.rb + +module Keeps + class PrettyUselessKeep < ::Gitlab::Housekeeper::Keep + def each_change + (1..3).each do |i| + file_name = "new_file#{i}.txt" + + `touch #{file_name}` + + identifiers = [self.class.name.demodulize, "new_file#{i}"] + + title = "Make new file #{file_name}" + + description = <<~MARKDOWN + ## New files + + This MR makes a new file #{file_name} + MARKDOWN + + labels = %w(type::feature) + + changed_files = [file_name] + + yield(::Gitlab::Housekeeper::Change.new( + identifiers, title, description, changed_files, labels + )) + end + end + end +end +``` + +You can dry-run this locally with the following command: + +``` +bundle exec gitlab-housekeeper -r keeps/pretty_useless_keep.rb -k Keeps::PrettyUselessKeep -d -m 3 +``` + +The `-d` just prints the contents of the merge request. Removing this will +actually create merge requests and requires setting a few environment +variables described below. ## Running @@ -14,7 +94,7 @@ a mistake. The alternative of using your own API token with it's permissions to 1. Create a project access token for that project 1. Set `housekeeper` remote to the fork you created ``` - git remote add housekeeper git@gitlab.com:DylanGriffith/gitlab.git + git remote add housekeeper <FORK_GIT_URL> ``` 1. Open a Postgres.ai tunnel on localhost port 6305 1. Set the Postgres AI env vars matching the tunnel details for your tunnel @@ -24,8 +104,8 @@ a mistake. The alternative of using your own API token with it's permissions to ``` 1. Set the GitLab client details. Will be used to create MR from housekeeper remote: ``` - export HOUSEKEEPER_FORK_PROJECT_ID=52263761 # Same project as housekeeper remote - export HOUSEKEEPER_TARGET_PROJECT_ID=52263761 # Can be 278964 (gitlab-org/gitlab) when ready to create real MRs + export HOUSEKEEPER_FORK_PROJECT_ID=<FORK_PROJECT_ID> # Same project as housekeeper remote + export HOUSEKEEPER_TARGET_PROJECT_ID=<TARGET_PROJECT_ID> # Can be 278964 (gitlab-org/gitlab) when ready to create real MRs export HOUSEKEEPER_GITLAB_API_TOKEN=the-api-token ``` 1. Run it: diff --git a/gems/gitlab-housekeeper/gitlab-housekeeper.gemspec b/gems/gitlab-housekeeper/gitlab-housekeeper.gemspec index 798ca5dcfe6..4083b3c2d11 100644 --- a/gems/gitlab-housekeeper/gitlab-housekeeper.gemspec +++ b/gems/gitlab-housekeeper/gitlab-housekeeper.gemspec @@ -19,11 +19,12 @@ Gem::Specification.new do |spec| spec.require_paths = ["lib"] spec.executables = ['gitlab-housekeeper'] + spec.add_runtime_dependency 'activesupport' spec.add_runtime_dependency 'httparty' spec.add_runtime_dependency 'rubocop' spec.add_development_dependency 'gitlab-styles' spec.add_development_dependency 'rspec-rails' - spec.add_development_dependency "rubocop-rspec" + spec.add_development_dependency 'rubocop-rspec' spec.add_development_dependency 'webmock' end diff --git a/gems/gitlab-housekeeper/lib/gitlab/housekeeper/gitlab_client.rb b/gems/gitlab-housekeeper/lib/gitlab/housekeeper/gitlab_client.rb index b28d44195cb..d2bb5065824 100644 --- a/gems/gitlab-housekeeper/lib/gitlab/housekeeper/gitlab_client.rb +++ b/gems/gitlab-housekeeper/lib/gitlab/housekeeper/gitlab_client.rb @@ -13,13 +13,65 @@ module Gitlab @base_uri = 'https://gitlab.com/api/v4' end + # This looks at the system notes of the merge request to detect if it has been updated by anyone other than the + # current housekeeper user. If it has then it assumes that they did this for a reason and we can skip updating + # this detail of the merge request. Otherwise we assume we should generate it again using the latest output. + def non_housekeeper_changes( + source_project_id:, + source_branch:, + target_branch:, + target_project_id: + ) + + iid = get_existing_merge_request( + source_project_id: source_project_id, + source_branch: source_branch, + target_branch: target_branch, + target_project_id: target_project_id + ) + + return [] if iid.nil? + + merge_request_notes = get_merge_request_notes(target_project_id: target_project_id, iid: iid) + + changes = Set.new + + merge_request_notes.each do |note| + next false unless note["system"] + next false if note["author"]["id"] == current_user_id + + changes << :title if note['body'].start_with?("changed title from") + changes << :description if note['body'] == "changed the description" + changes << :code if note['body'].match?(/added \d+ commit/) + end + + resource_label_events = get_merge_request_resource_label_events(target_project_id: target_project_id, iid: iid) + + resource_label_events.each do |event| + next if event["user"]["id"] == current_user_id + + # Labels are routinely added by both humans and bots, so addition events aren't cause for concern. + # However, if labels have been removed it may mean housekeeper added an incorrect label, and we shouldn't + # re-add them. + # + # TODO: Inspect the actual labels housekeeper wants to add, and add if they haven't previously been removed. + changes << :labels if event["action"] == "remove" + end + + changes.to_a + end + def create_or_update_merge_request( source_project_id:, title:, description:, + labels:, source_branch:, target_branch:, - target_project_id: + target_project_id:, + update_title:, + update_description:, + update_labels: ) existing_iid = get_existing_merge_request( source_project_id: source_project_id, @@ -33,13 +85,18 @@ module Gitlab existing_iid: existing_iid, title: title, description: description, - target_project_id: target_project_id + labels: labels, + target_project_id: target_project_id, + update_title: update_title, + update_description: update_description, + update_labels: update_labels ) else create_merge_request( source_project_id: source_project_id, title: title, description: description, + labels: labels, source_branch: source_branch, target_branch: target_branch, target_project_id: target_project_id @@ -49,6 +106,63 @@ module Gitlab private + def get_merge_request_notes(target_project_id:, iid:) + response = HTTParty.get( + "#{@base_uri}/projects/#{target_project_id}/merge_requests/#{iid}/notes", + query: { + per_page: 100 + }, + headers: { + "Private-Token" => @token + } + ) + + unless (200..299).cover?(response.code) + raise Error, + "Failed to get merge request notes with response code: #{response.code} and body:\n#{response.body}" + end + + JSON.parse(response.body) + end + + def get_merge_request_resource_label_events(target_project_id:, iid:) + response = HTTParty.get( + "#{@base_uri}/projects/#{target_project_id}/merge_requests/#{iid}/resource_label_events", + query: { + per_page: 100 + }, + headers: { + "Private-Token" => @token + } + ) + + unless (200..299).cover?(response.code) + raise Error, + "Failed to get merge request label events with response code: #{response.code} and body:\n#{response.body}" + end + + JSON.parse(response.body) + end + + def current_user_id + @current_user_id = begin + response = HTTParty.get( + "#{@base_uri}/user", + headers: { + "Private-Token" => @token + } + ) + + unless (200..299).cover?(response.code) + raise Error, + "Failed with response code: #{response.code} and body:\n#{response.body}" + end + + data = JSON.parse(response.body) + data['id'] + end + end + def get_existing_merge_request(source_project_id:, source_branch:, target_branch:, target_project_id:) response = HTTParty.get("#{@base_uri}/projects/#{target_project_id}/merge_requests", query: { @@ -78,11 +192,18 @@ module Gitlab end def create_merge_request( - source_project_id:, title:, description:, source_branch:, target_branch:, - target_project_id:) + source_project_id:, + title:, + description:, + labels:, + source_branch:, + target_branch:, + target_project_id: + ) response = HTTParty.post("#{@base_uri}/projects/#{source_project_id}/merge_requests", body: { title: title, description: description, + labels: Array(labels).join(','), source_branch: source_branch, target_branch: target_branch, target_project_id: target_project_id @@ -98,11 +219,26 @@ module Gitlab "Failed with response code: #{response.code} and body:\n#{response.body}" end - def update_existing_merge_request(existing_iid:, title:, description:, target_project_id:) - response = HTTParty.put("#{@base_uri}/projects/#{target_project_id}/merge_requests/#{existing_iid}", body: { - title: title, - description: description - }.to_json, + def update_existing_merge_request( + existing_iid:, + title:, + description:, + labels:, + target_project_id:, + update_title:, + update_description:, + update_labels: + ) + body = {} + + body[:title] = title if update_title + body[:description] = description if update_description + body[:add_labels] = Array(labels).join(',') if update_labels + + return if body.empty? + + response = HTTParty.put("#{@base_uri}/projects/#{target_project_id}/merge_requests/#{existing_iid}", + body: body.to_json, headers: { 'Private-Token' => @token, 'Content-Type' => 'application/json' diff --git a/gems/gitlab-housekeeper/lib/gitlab/housekeeper/runner.rb b/gems/gitlab-housekeeper/lib/gitlab/housekeeper/runner.rb index 76d629e29a3..45d52a0bd8b 100644 --- a/gems/gitlab-housekeeper/lib/gitlab/housekeeper/runner.rb +++ b/gems/gitlab-housekeeper/lib/gitlab/housekeeper/runner.rb @@ -1,13 +1,14 @@ # frozen_string_literal: true +require 'active_support/core_ext/string' require 'gitlab/housekeeper/keep' -require "gitlab/housekeeper/gitlab_client" -require "gitlab/housekeeper/git" +require 'gitlab/housekeeper/gitlab_client' +require 'gitlab/housekeeper/git' require 'digest' module Gitlab module Housekeeper - Change = Struct.new(:identifiers, :title, :description, :changed_files) + Change = Struct.new(:identifiers, :title, :description, :changed_files, :labels) class Runner def initialize(max_mrs: 1, dry_run: false, require: [], keeps: nil) @@ -59,26 +60,52 @@ module Gitlab end def dry_run(change, branch_name) + puts "=> #{change.identifiers.join(': ')}" + + if change.labels.present? + puts '=> Attributes:' + puts "Labels: #{change.labels.join(', ')}" + puts + end + + puts '=> Title:' + puts change.title puts - puts "# #{change.title}" - puts + + puts '=> Description:' puts change.description puts + + puts '=> Diff:' puts Shell.execute('git', '--no-pager', 'diff', 'master', branch_name, '--', *change.changed_files) + puts end def create(change, branch_name) dry_run(change, branch_name) - Shell.execute('git', 'push', '-f', 'housekeeper', "#{branch_name}:#{branch_name}") + non_housekeeper_changes = gitlab_client.non_housekeeper_changes( + source_project_id: housekeeper_fork_project_id, + source_branch: branch_name, + target_branch: 'master', + target_project_id: housekeeper_target_project_id + ) + + unless non_housekeeper_changes.include?(:code) + Shell.execute('git', 'push', '-f', 'housekeeper', "#{branch_name}:#{branch_name}") + end gitlab_client.create_or_update_merge_request( source_project_id: housekeeper_fork_project_id, title: change.title, description: change.description, + labels: change.labels, source_branch: branch_name, target_branch: 'master', - target_project_id: housekeeper_target_project_id + target_project_id: housekeeper_target_project_id, + update_title: !non_housekeeper_changes.include?(:title), + update_description: !non_housekeeper_changes.include?(:description), + update_labels: !non_housekeeper_changes.include?(:labels) ) end diff --git a/gems/gitlab-housekeeper/spec/gitlab/housekeeper/gitlab_client_spec.rb b/gems/gitlab-housekeeper/spec/gitlab/housekeeper/gitlab_client_spec.rb index 36b2afdc306..ffb9d2f8d23 100644 --- a/gems/gitlab-housekeeper/spec/gitlab/housekeeper/gitlab_client_spec.rb +++ b/gems/gitlab-housekeeper/spec/gitlab/housekeeper/gitlab_client_spec.rb @@ -3,26 +3,225 @@ require 'spec_helper' require 'gitlab/housekeeper/gitlab_client' +# rubocop:disable RSpec/MultipleMemoizedHelpers -- there are lots of parameters at play RSpec.describe ::Gitlab::Housekeeper::GitlabClient do let(:client) { described_class.new } + before do + stub_env('HOUSEKEEPER_GITLAB_API_TOKEN', 'the-api-token') + end + + describe '#non_housekeeper_changes' do + let(:housekeeper_user_id) { 666 } + + let(:added_commit_note) do + { + id: 1698248524, + body: "added 1 commit\n\n<ul><li>41b3a17f - Update stuff to test...", + author: { "id" => 1234 }, + system: true + } + end + + let(:irrelevant_note1) do + { + id: 1698248523, + body: "changed this line in ...", + author: { "id" => 1234 }, + system: true + } + end + + let(:not_a_system_note) do + { + id: 1698248524, + body: "added 1 commit\n\n<ul><li>41b3a17f - Update stuff to test...", + author: { "id" => 1234 }, + system: false + } + end + + let(:updated_title_note) do + { + id: 1698248527, + body: "changed title from **Add sharding{- -}key `namespace_id` to achievements**...", + author: { "id" => 1235 }, + system: true + } + end + + let(:updated_description_note) do + { + id: 1698248530, + body: "changed the description", + author: { "id" => 1236 }, + system: true + } + end + + let(:notes) do + [irrelevant_note1, not_a_system_note] + end + + let(:added_label_event) do + { + id: 274504558, + user: { id: 18645100 }, + label: { id: 2492649, name: "good label" }, + action: "add" + } + end + + let(:removed_label_event) do + { + id: 274504558, + user: { id: 18645100 }, + label: { id: 2492649, name: "bad label" }, + action: "remove" + } + end + + let(:resource_label_events) do + [added_label_event] + end + + subject(:non_housekeeper_changes) do + client.non_housekeeper_changes( + source_project_id: 123, + target_project_id: 456, + source_branch: 'the-source-branch', + target_branch: 'the-target-branch' + ) + end + + before do + # Get the current housekeeper user + stub_request(:get, "https://gitlab.com/api/v4/user") + .with( + headers: { + 'Private-Token' => 'the-api-token' + } + ) + .to_return(status: 200, body: { id: housekeeper_user_id }.to_json) + + # Get the id of the current merge request + stub_request(:get, "https://gitlab.com/api/v4/projects/456/merge_requests?state=opened&source_branch=the-source-branch&target_branch=the-target-branch&source_project_id=123") + .with( + headers: { + 'Private-Token' => 'the-api-token' + } + ) + .to_return(status: 200, body: [{ iid: 8765 }].to_json) + + # Get the notes of the current merge request + stub_request(:get, "https://gitlab.com/api/v4/projects/456/merge_requests/8765/notes?per_page=100") + .with( + headers: { + 'Private-Token' => 'the-api-token' + } + ) + .to_return(status: 200, body: notes.to_json) + + # Get the label changes for the merge request + stub_request(:get, "https://gitlab.com/api/v4/projects/456/merge_requests/8765/resource_label_events?per_page=100") + .with( + headers: { + 'Private-Token' => 'the-api-token' + } + ) + .to_return(status: 200, body: resource_label_events.to_json) + end + + it 'does not match irrelevant notes' do + expect(non_housekeeper_changes).to eq([]) + end + + context 'when all important things change' do + let(:notes) do + [not_a_system_note, updated_title_note, updated_description_note, added_commit_note] + end + + let(:resource_label_events) do + [removed_label_event] + end + + it 'returns :title, :description, :code, :labels' do + expect(non_housekeeper_changes).to include(:title) + expect(non_housekeeper_changes).to include(:description) + expect(non_housekeeper_changes).to include(:code) + expect(non_housekeeper_changes).to include(:labels) + end + end + + context 'when title changes' do + let(:notes) do + [not_a_system_note, updated_title_note] + end + + it 'returns :title' do + expect(non_housekeeper_changes).to include(:title) + expect(non_housekeeper_changes).not_to include(:description) + expect(non_housekeeper_changes).not_to include(:code) + expect(non_housekeeper_changes).not_to include(:labels) + end + end + + context 'when description changes' do + let(:notes) do + [not_a_system_note, updated_description_note] + end + + it 'returns :description' do + expect(non_housekeeper_changes).not_to include(:title) + expect(non_housekeeper_changes).to include(:description) + expect(non_housekeeper_changes).not_to include(:code) + expect(non_housekeeper_changes).not_to include(:labels) + end + end + + context 'when labels change' do + let(:notes) do + [not_a_system_note] + end + + let(:resource_label_events) do + [added_label_event, removed_label_event] + end + + it 'returns :labels' do + expect(non_housekeeper_changes).not_to include(:title) + expect(non_housekeeper_changes).not_to include(:description) + expect(non_housekeeper_changes).not_to include(:code) + expect(non_housekeeper_changes).to include(:labels) + end + end + + context 'when the merge request does not exist' do + it 'returns empty array' do + expect(non_housekeeper_changes).to eq([]) + end + end + end + describe '#create_or_update_merge_request' do let(:params) do { source_project_id: 123, title: 'A new merge request!', + labels: %w[label-1 label-2], description: 'This merge request is pretty good.', source_branch: 'the-source-branch', target_branch: 'the-target-branch', - target_project_id: 456 + target_project_id: 456, + update_title: true, + update_description: true, + update_labels: true } end let(:existing_mrs) { [] } before do - stub_env('HOUSEKEEPER_GITLAB_API_TOKEN', 'the-api-token') - # Stub the check to see if the merge request already exists stub_request(:get, "https://gitlab.com/api/v4/projects/456/merge_requests?state=opened&source_branch=the-source-branch&target_branch=the-target-branch&source_project_id=123") .with( @@ -39,6 +238,7 @@ RSpec.describe ::Gitlab::Housekeeper::GitlabClient do body: { title: "A new merge request!", description: "This merge request is pretty good.", + labels: "label-1,label-2", source_branch: "the-source-branch", target_branch: "the-target-branch", target_project_id: 456 @@ -64,7 +264,8 @@ RSpec.describe ::Gitlab::Housekeeper::GitlabClient do .with( body: { title: "A new merge request!", - description: "This merge request is pretty good." + description: "This merge request is pretty good.", + add_labels: "label-1,label-2" }.to_json, headers: { 'Content-Type' => 'application/json', @@ -85,6 +286,70 @@ RSpec.describe ::Gitlab::Housekeeper::GitlabClient do expect { client.create_or_update_merge_request(**params) }.to raise_error(described_class::Error) end end + + context 'when update_title: false' do + it 'does not update the title' do + stub = stub_request(:put, "https://gitlab.com/api/v4/projects/456/merge_requests/1234") + .with( + body: { + description: "This merge request is pretty good.", + add_labels: "label-1,label-2" + }.to_json, + headers: { + 'Content-Type' => 'application/json', + 'Private-Token' => 'the-api-token' + } + ).to_return(status: 200, body: "") + + client.create_or_update_merge_request(**params.merge(update_title: false)) + expect(stub).to have_been_requested + end + end + + context 'when update_description: false' do + it 'does not update the description' do + stub = stub_request(:put, "https://gitlab.com/api/v4/projects/456/merge_requests/1234") + .with( + body: { + title: "A new merge request!", + add_labels: "label-1,label-2" + }.to_json, + headers: { + 'Content-Type' => 'application/json', + 'Private-Token' => 'the-api-token' + } + ).to_return(status: 200, body: "") + + client.create_or_update_merge_request(**params.merge(update_description: false)) + expect(stub).to have_been_requested + end + end + + context 'when update_labels: false' do + it 'does not update the labels' do + stub = stub_request(:put, "https://gitlab.com/api/v4/projects/456/merge_requests/1234") + .with( + body: { + title: "A new merge request!", + description: "This merge request is pretty good." + }.to_json, + headers: { + 'Content-Type' => 'application/json', + 'Private-Token' => 'the-api-token' + } + ).to_return(status: 200, body: "") + + client.create_or_update_merge_request(**params.merge(update_labels: false)) + expect(stub).to have_been_requested + end + end + + context 'when there is nothing to update' do + it 'does not make a request' do + client.create_or_update_merge_request(**params.merge(update_description: false, update_title: false, + update_labels: false)) + end + end end it 'raises an error when unsuccessful response' do @@ -97,3 +362,4 @@ RSpec.describe ::Gitlab::Housekeeper::GitlabClient do end end end +# rubocop:enable RSpec/MultipleMemoizedHelpers diff --git a/gems/gitlab-housekeeper/spec/gitlab/housekeeper/runner_spec.rb b/gems/gitlab-housekeeper/spec/gitlab/housekeeper/runner_spec.rb index 49b4926dbdd..d1fdbcb1004 100644 --- a/gems/gitlab-housekeeper/spec/gitlab/housekeeper/runner_spec.rb +++ b/gems/gitlab-housekeeper/spec/gitlab/housekeeper/runner_spec.rb @@ -3,6 +3,7 @@ require 'spec_helper' require 'gitlab/housekeeper/runner' +# rubocop:disable RSpec/MultipleMemoizedHelpers -- there are lots of parameters at play RSpec.describe ::Gitlab::Housekeeper::Runner do let(:fake_keep) { instance_double(Class) } @@ -11,7 +12,8 @@ RSpec.describe ::Gitlab::Housekeeper::Runner do %w[the identifier for the first change], "The title of MR1", "The description of the MR", - ['change1.txt', 'change2.txt'] + ['change1.txt', 'change2.txt'], + ['example-label'] ) end @@ -20,7 +22,8 @@ RSpec.describe ::Gitlab::Housekeeper::Runner do %w[the identifier for the second change], "The title of MR2", "The description of the MR", - ['change1.txt', 'change2.txt'] + ['change1.txt', 'change2.txt'], + ['example-label'] ) end @@ -29,7 +32,8 @@ RSpec.describe ::Gitlab::Housekeeper::Runner do %w[the identifier for the third change], "The title of MR3", "The description of the MR", - ['change1.txt', 'change2.txt'] + ['change1.txt', 'change2.txt'], + ['example-label'] ) end @@ -44,18 +48,34 @@ RSpec.describe ::Gitlab::Housekeeper::Runner do end describe '#run' do + let(:git) { instance_double(::Gitlab::Housekeeper::Git) } + let(:gitlab_client) { instance_double(::Gitlab::Housekeeper::GitlabClient) } + before do stub_env('HOUSEKEEPER_FORK_PROJECT_ID', '123') stub_env('HOUSEKEEPER_TARGET_PROJECT_ID', '456') + + allow(::Gitlab::Housekeeper::Git).to receive(:new) + .and_return(git) + + allow(git).to receive(:with_branch_from_branch) + .and_yield + allow(git).to receive(:commit_in_branch).with(change1) + .and_return('the-identifier-for-the-first-change') + allow(git).to receive(:commit_in_branch).with(change2) + .and_return('the-identifier-for-the-second-change') + + allow(::Gitlab::Housekeeper::GitlabClient).to receive(:new) + .and_return(gitlab_client) + + allow(gitlab_client).to receive(:non_housekeeper_changes) + .and_return([]) + + allow(::Gitlab::Housekeeper::Shell).to receive(:execute) end it 'loops over the keeps and creates MRs limited by max_mrs' do # Branches get created - git = instance_double(::Gitlab::Housekeeper::Git) - expect(::Gitlab::Housekeeper::Git).to receive(:new) - .and_return(git) - expect(git).to receive(:with_branch_from_branch) - .and_yield expect(git).to receive(:commit_in_branch).with(change1) .and_return('the-identifier-for-the-first-change') expect(git).to receive(:commit_in_branch).with(change2) @@ -76,29 +96,93 @@ RSpec.describe ::Gitlab::Housekeeper::Runner do 'the-identifier-for-the-second-change:the-identifier-for-the-second-change') # Merge requests get created - gitlab_client = instance_double(::Gitlab::Housekeeper::GitlabClient) - expect(::Gitlab::Housekeeper::GitlabClient).to receive(:new) - .and_return(gitlab_client) expect(gitlab_client).to receive(:create_or_update_merge_request) .with( source_project_id: '123', title: 'The title of MR1', description: 'The description of the MR', + labels: ['example-label'], source_branch: 'the-identifier-for-the-first-change', target_branch: 'master', - target_project_id: '456' + target_project_id: '456', + update_title: true, + update_description: true, + update_labels: true ) expect(gitlab_client).to receive(:create_or_update_merge_request) .with( source_project_id: '123', title: 'The title of MR2', description: 'The description of the MR', + labels: ['example-label'], source_branch: 'the-identifier-for-the-second-change', target_branch: 'master', - target_project_id: '456' + target_project_id: '456', + update_title: true, + update_description: true, + update_labels: true ) described_class.new(max_mrs: 2, keeps: [fake_keep]).run end + + context 'when title, description, code has changed already' do + it 'does not update the changed details' do + # First change has updated code and description so should only update title + expect(gitlab_client).to receive(:non_housekeeper_changes) + .with( + source_project_id: '123', + source_branch: 'the-identifier-for-the-first-change', + target_branch: 'master', + target_project_id: '456' + ).and_return([:code, :description]) + + # Second change has updated title and description so it should push the code + expect(gitlab_client).to receive(:non_housekeeper_changes) + .with( + source_project_id: '123', + source_branch: 'the-identifier-for-the-second-change', + target_branch: 'master', + target_project_id: '456' + ).and_return([:title, :description]) + + expect(::Gitlab::Housekeeper::Shell).not_to receive(:execute) + .with('git', 'push', '-f', 'housekeeper', + 'the-identifier-for-the-first-change:the-identifier-for-the-first-change') + expect(::Gitlab::Housekeeper::Shell).to receive(:execute) + .with('git', 'push', '-f', 'housekeeper', + 'the-identifier-for-the-second-change:the-identifier-for-the-second-change') + + expect(gitlab_client).to receive(:create_or_update_merge_request) + .with( + source_project_id: '123', + title: 'The title of MR1', + description: 'The description of the MR', + labels: ['example-label'], + source_branch: 'the-identifier-for-the-first-change', + target_branch: 'master', + target_project_id: '456', + update_title: true, + update_description: false, + update_labels: true + ) + expect(gitlab_client).to receive(:create_or_update_merge_request) + .with( + source_project_id: '123', + title: 'The title of MR2', + description: 'The description of the MR', + labels: ['example-label'], + source_branch: 'the-identifier-for-the-second-change', + target_branch: 'master', + target_project_id: '456', + update_title: false, + update_description: false, + update_labels: true + ) + + described_class.new(max_mrs: 2, keeps: [fake_keep]).run + end + end end end +# rubocop:enable RSpec/MultipleMemoizedHelpers diff --git a/gems/gitlab-http/Gemfile.lock b/gems/gitlab-http/Gemfile.lock index 5fb1963d8f3..149a6805f05 100644 --- a/gems/gitlab-http/Gemfile.lock +++ b/gems/gitlab-http/Gemfile.lock @@ -13,7 +13,6 @@ PATH actionview (>= 6.1.7.2) activesupport (>= 6.1.7.2) addressable (~> 2.8) - nokogiri (~> 1.15.2) rake (~> 13.0) PATH @@ -24,7 +23,6 @@ PATH concurrent-ruby (~> 1.2) httparty (~> 0.21.0) ipaddress (~> 0.8.3) - nokogiri (~> 1.15.4) railties (~> 7) GEM diff --git a/gems/gitlab-http/gitlab-http.gemspec b/gems/gitlab-http/gitlab-http.gemspec index 0033f17447b..27366246bb1 100644 --- a/gems/gitlab-http/gitlab-http.gemspec +++ b/gems/gitlab-http/gitlab-http.gemspec @@ -23,7 +23,6 @@ Gem::Specification.new do |spec| spec.add_runtime_dependency 'concurrent-ruby', '~> 1.2' spec.add_runtime_dependency 'httparty', '~> 0.21.0' spec.add_runtime_dependency 'ipaddress', '~> 0.8.3' - spec.add_runtime_dependency 'nokogiri', '~> 1.15.4' spec.add_runtime_dependency "railties", "~> 7" spec.add_development_dependency 'gitlab-styles', '~> 10.1.0' diff --git a/gems/gitlab-http/lib/gitlab/http_v2/lazy_response.rb b/gems/gitlab-http/lib/gitlab/http_v2/lazy_response.rb index 65d1ab96644..7c7a44f77e2 100644 --- a/gems/gitlab-http/lib/gitlab/http_v2/lazy_response.rb +++ b/gems/gitlab-http/lib/gitlab/http_v2/lazy_response.rb @@ -7,7 +7,7 @@ module Gitlab attr_reader :promise - delegate :state, to: :promise + delegate :state, :complete?, to: :promise def initialize(promise, path, options, log_info) @promise = promise diff --git a/gems/gitlab-rspec_flaky/lib/gitlab/rspec_flaky/listener.rb b/gems/gitlab-rspec_flaky/lib/gitlab/rspec_flaky/listener.rb index 6f4dce9df33..c40d9e90b56 100644 --- a/gems/gitlab-rspec_flaky/lib/gitlab/rspec_flaky/listener.rb +++ b/gems/gitlab-rspec_flaky/lib/gitlab/rspec_flaky/listener.rb @@ -36,6 +36,10 @@ module Gitlab end def dump_summary(_) + rails_logger_warn( + "\n#{flaky_examples.count} known flaky example(s) detected. " \ + "Writing this to #{Config.flaky_examples_report_path}.\n" + ) Report.new(flaky_examples).write(Config.flaky_examples_report_path) return unless new_flaky_examples.any? diff --git a/gems/gitlab-rspec_flaky/spec/gitlab/rspec_flaky/listener_spec.rb b/gems/gitlab-rspec_flaky/spec/gitlab/rspec_flaky/listener_spec.rb index b46044e4521..a830226d88f 100644 --- a/gems/gitlab-rspec_flaky/spec/gitlab/rspec_flaky/listener_spec.rb +++ b/gems/gitlab-rspec_flaky/spec/gitlab/rspec_flaky/listener_spec.rb @@ -197,6 +197,8 @@ RSpec.describe Gitlab::RspecFlaky::Listener, :aggregate_failures do end describe '#dump_summary' do + subject { listener.dump_summary(nil) } + let(:listener) { described_class.new(suite_flaky_example_report.to_json) } let(:new_flaky_rspec_example) { double(new_example_attrs.merge(attempts: 2)) } let(:already_flaky_rspec_example) { double(already_flaky_example_attrs.merge(attempts: 2)) } @@ -207,6 +209,39 @@ RSpec.describe Gitlab::RspecFlaky::Listener, :aggregate_failures do allow(Kernel).to receive(:warn) end + context 'when not flaky tests were found' do + it 'prints a message in the console' do + allow(Kernel).to receive(:warn).and_call_original + + expect { subject }.to output( + %r{0 known flaky example\(s\) detected\. Writing this to rspec/flaky/report\.json} + ).to_stderr + end + end + + context 'when existing flaky tests were found' do + before do + listener.example_passed(notification_already_flaky_rspec_example) + end + + it 'does write them in the correct report' do + report = double + + expect(Gitlab::RspecFlaky::Report).to receive(:new).with(listener.flaky_examples).and_return(report) + expect(report).to receive(:write).with(Gitlab::RspecFlaky::Config.flaky_examples_report_path) + + subject + end + + it 'prints a message in the console' do + allow(Kernel).to receive(:warn).and_call_original + + expect { subject }.to output( + %r{1 known flaky example\(s\) detected\. Writing this to rspec/flaky/report\.json} + ).to_stderr + end + end + context 'when a report file path is set by FLAKY_RSPEC_REPORT_PATH' do it 'delegates the writes to RspecFlaky::Report' do listener.example_passed(notification_new_flaky_rspec_example) @@ -222,7 +257,7 @@ RSpec.describe Gitlab::RspecFlaky::Listener, :aggregate_failures do .to receive(:new).with(listener.__send__(:new_flaky_examples)).and_return(report2) expect(report2).to receive(:write).with(Gitlab::RspecFlaky::Config.new_flaky_examples_report_path) - listener.dump_summary(nil) + subject end end end diff --git a/gems/gitlab-safe_request_store/Gemfile.lock b/gems/gitlab-safe_request_store/Gemfile.lock index 50ff694d7d5..d26bab2f4f0 100644 --- a/gems/gitlab-safe_request_store/Gemfile.lock +++ b/gems/gitlab-safe_request_store/Gemfile.lock @@ -2,6 +2,7 @@ PATH remote: . specs: gitlab-safe_request_store (0.1.0) + rack (~> 2.2.8) request_store GEM @@ -35,7 +36,7 @@ GEM coderay (~> 1.1) method_source (~> 1.0) racc (1.6.2) - rack (3.0.4.1) + rack (2.2.8) rainbow (3.1.1) regexp_parser (2.7.0) request_store (1.5.1) diff --git a/gems/gitlab-safe_request_store/gitlab-safe_request_store.gemspec b/gems/gitlab-safe_request_store/gitlab-safe_request_store.gemspec index a685a1eb447..b3ced3929c8 100644 --- a/gems/gitlab-safe_request_store/gitlab-safe_request_store.gemspec +++ b/gems/gitlab-safe_request_store/gitlab-safe_request_store.gemspec @@ -18,6 +18,7 @@ Gem::Specification.new do |spec| spec.files = Dir['lib/**/*.rb'] spec.require_paths = ["lib"] + spec.add_runtime_dependency "rack", "~> 2.2.8" spec.add_runtime_dependency "request_store" spec.add_development_dependency "gitlab-styles", "~> 10.1.0" diff --git a/gems/gitlab-secret_detection/Gemfile.lock b/gems/gitlab-secret_detection/Gemfile.lock index dd9f621ee4a..029ad5d778e 100644 --- a/gems/gitlab-secret_detection/Gemfile.lock +++ b/gems/gitlab-secret_detection/Gemfile.lock @@ -2,6 +2,7 @@ PATH remote: . specs: gitlab-secret_detection (0.1.0) + parallel (~> 1.22) re2 (~> 2.4) toml-rb (~> 2.2) @@ -47,7 +48,7 @@ GEM mini_portile2 (2.8.5) minitest (5.20.0) mutex_m (0.2.0) - parallel (1.23.0) + parallel (1.24.0) parser (3.2.2.4) ast (~> 2.4.1) racc @@ -136,6 +137,7 @@ PLATFORMS ruby DEPENDENCIES + benchmark-malloc (~> 0.2) gitlab-secret_detection! gitlab-styles (~> 11.0) rspec (~> 3.0) diff --git a/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec b/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec index be9db3aa389..147f370a716 100644 --- a/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec +++ b/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec @@ -24,9 +24,11 @@ Gem::Specification.new do |spec| spec.files = Dir['lib/**/*.rb'] spec.require_paths = ["lib"] + spec.add_runtime_dependency "parallel", "~> 1.22" spec.add_runtime_dependency "re2", "~> 2.4" spec.add_runtime_dependency "toml-rb", "~> 2.2" + spec.add_development_dependency "benchmark-malloc", "~> 0.2" spec.add_development_dependency "gitlab-styles", "~> 11.0" spec.add_development_dependency "rspec", "~> 3.0" spec.add_development_dependency "rspec-benchmark", "~> 0.6.0" diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb index 20d630d5dbb..37103912615 100644 --- a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb +++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb @@ -4,6 +4,7 @@ require 'toml-rb' require 're2' require 'logger' require 'timeout' +require 'parallel' module Gitlab module SecretDetection @@ -23,12 +24,18 @@ module Gitlab DEFAULT_BLOB_TIMEOUT_SECS = 5 # file path where the secrets ruleset file is located RULESET_FILE_PATH = File.expand_path('../../gitleaks.toml', __dir__) - # ignore the scanning of a line which ends with the following keyword - GITLEAKS_KEYWORD_IGNORE = 'gitleaks:allow' + # Max no of child processes to spawn per request + # ref: https://gitlab.com/gitlab-org/gitlab/-/issues/430160 + MAX_PROCS_PER_REQUEST = 5 + # Minimum cumulative size of the blobs required to spawn and + # run the scan within a new subprocess. + MIN_CHUNK_SIZE_PER_PROC_BYTES = 2_097_152 # 2MiB + # Whether to run scan in subprocesses or not. Default is true. + RUN_IN_SUBPROCESS = true # Initializes the instance with logger along with following operations: # 1. Parse ruleset for the given +ruleset_path+(default: +RULESET_FILE_PATH+). Raises +RulesetParseError+ - # incase the operation fails. + # in case the operation fails. # 2. Extract keywords from the parsed ruleset to use it for matching keywords before regex operation. # 3. Build and Compile rule regex patterns obtained from the ruleset. Raises +RulesetCompilationError+ # in case the compilation fails. @@ -46,13 +53,31 @@ module Gitlab # +timeout+:: No of seconds(accepts floating point for smaller time values) to limit the total scan duration # +blob_timeout+:: No of seconds(accepts floating point for smaller time values) to limit # the scan duration on each blob + # +subprocess+:: If passed true, the scan is performed within subprocess instead of main process. + # To avoid over-consuming memory by running scan on multiple large blobs within a single subprocess, + # it instead groups the blobs into smaller array where each array contains blobs with cumulative size of + # +MIN_CHUNK_SIZE_PER_PROC_BYTES+ bytes and each group runs in a separate sub-process. Default value + # is true. + # + # NOTE: + # Running the scan in fork mode primarily focuses on reducing the memory consumption of the scan by + # offloading regex operations on large blobs to sub-processes. However, it does not assure the improvement + # in the overall latency of the scan, specifically in the case of smaller blob sizes, where the overhead of + # forking a new process adds to the overall latency of the scan instead. More reference on Subprocess-based + # execution is found here: https://gitlab.com/gitlab-org/gitlab/-/issues/430160. # # Returns an instance of SecretDetection::Response by following below structure: # { # status: One of the SecretDetection::Status values # results: [SecretDetection::Finding] # } - def secrets_scan(blobs, timeout: DEFAULT_SCAN_TIMEOUT_SECS, blob_timeout: DEFAULT_BLOB_TIMEOUT_SECS) + # + def secrets_scan( + blobs, + timeout: DEFAULT_SCAN_TIMEOUT_SECS, + blob_timeout: DEFAULT_BLOB_TIMEOUT_SECS, + subprocess: RUN_IN_SUBPROCESS + ) return SecretDetection::Response.new(SecretDetection::Status::INPUT_ERROR) unless validate_scan_input(blobs) Timeout.timeout(timeout) do @@ -60,7 +85,11 @@ module Gitlab next SecretDetection::Response.new(SecretDetection::Status::NOT_FOUND) if matched_blobs.empty? - secrets = find_secrets_bulk(matched_blobs, blob_timeout) + secrets = if subprocess + run_scan_within_subprocess(blobs, blob_timeout) + else + run_scan(blobs, blob_timeout) + end scan_status = overall_scan_status(secrets) @@ -114,7 +143,7 @@ module Gitlab secrets_keywords.flatten.compact.to_set end - # returns only those blobs that contain atleast one of the keywords + # returns only those blobs that contain at least one of the keywords # from the keywords list def filter_by_keywords(blobs) matched_blobs = [] @@ -126,22 +155,43 @@ module Gitlab matched_blobs.freeze end - # finds secrets in the given list of blobs - def find_secrets_bulk(blobs, blob_timeout) - found_secrets = [] - - blobs.each do |blob| - found_secrets << Timeout.timeout(blob_timeout) { find_secrets(blob) } + def run_scan(blobs, blob_timeout) + found_secrets = blobs.flat_map do |blob| + Timeout.timeout(blob_timeout) do + find_secrets(blob) + end rescue Timeout::Error => e - logger.error "Secret detection scan timed out on the blob(id:#{blob.id}): #{e}" + logger.error "Secret Detection scan timed out on the blob(id:#{blob.id}): #{e}" + SecretDetection::Finding.new(blob.id, + SecretDetection::Status::BLOB_TIMEOUT) + end + + found_secrets.freeze + end - found_secrets << SecretDetection::Finding.new( - blob.id, - SecretDetection::Status::BLOB_TIMEOUT - ) + def run_scan_within_subprocess(blobs, blob_timeout) + blob_sizes = blobs.map(&:size) + grouped_blob_indicies = group_by_chunk_size(blob_sizes) + + grouped_blobs = grouped_blob_indicies.map { |idx_arr| idx_arr.map { |i| blobs[i] } } + + found_secrets = Parallel.flat_map( + grouped_blobs, + in_processes: MAX_PROCS_PER_REQUEST, + isolation: true # do not reuse sub-processes + ) do |grouped_blob| + grouped_blob.flat_map do |blob| + Timeout.timeout(blob_timeout) do + find_secrets(blob) + end + rescue Timeout::Error => e + logger.error "Secret Detection scan timed out on the blob(id:#{blob.id}): #{e}" + SecretDetection::Finding.new(blob.id, + SecretDetection::Status::BLOB_TIMEOUT) + end end - found_secrets.flatten.freeze + found_secrets.freeze end # finds secrets in the given blob with a timeout circuit breaker @@ -149,10 +199,8 @@ module Gitlab secrets = [] blob.data.each_line.with_index do |line, index| - # ignore the line scan if it is suffixed with '#gitleaks:allow' - next if line.end_with?(GITLEAKS_KEYWORD_IGNORE) + patterns = pattern_matcher.match(line, exception: false) - patterns = pattern_matcher.match(line, :exception => false) next unless patterns.any? line_number = index + 1 @@ -172,7 +220,7 @@ module Gitlab secrets rescue StandardError => e - logger.error "Secret detection scan failed on the blob(id:#{blob.id}): #{e}" + logger.error "Secret Detection scan failed on the blob(id:#{blob.id}): #{e}" SecretDetection::Finding.new(blob.id, SecretDetection::Status::SCAN_ERROR) end @@ -201,6 +249,35 @@ module Gitlab SecretDetection::Status::FOUND_WITH_ERRORS end end + + # This method accepts an array of blob sizes(in bytes) and groups them into an array + # of arrays structure where each element is the group of indicies of the input + # array whose cumulative blob sizes has at least +MIN_CHUNK_SIZE_PER_PROC_BYTES+ + def group_by_chunk_size(blob_size_arr) + cumulative_size = 0 + chunk_indexes = [] + chunk_idx_start = 0 + + blob_size_arr.each_with_index do |size, index| + cumulative_size += size + next unless cumulative_size >= MIN_CHUNK_SIZE_PER_PROC_BYTES + + chunk_indexes << (chunk_idx_start..index).to_a + + chunk_idx_start = index + 1 + cumulative_size = 0 + end + + if cumulative_size.positive? && (chunk_idx_start < blob_size_arr.length) + chunk_indexes << if chunk_idx_start == blob_size_arr.length - 1 + [chunk_idx_start] + else + (chunk_idx_start..blob_size_arr.length - 1).to_a + end + end + + chunk_indexes + end end end end diff --git a/gems/gitlab-secret_detection/spec/lib/gitlab/secret_detection/scan_spec.rb b/gems/gitlab-secret_detection/spec/lib/gitlab/secret_detection/scan_spec.rb index 1e6ccf1e6a0..e69fcceeaab 100644 --- a/gems/gitlab-secret_detection/spec/lib/gitlab/secret_detection/scan_spec.rb +++ b/gems/gitlab-secret_detection/spec/lib/gitlab/secret_detection/scan_spec.rb @@ -16,28 +16,28 @@ RSpec.describe Gitlab::SecretDetection::Scan, feature_category: :secret_detectio { "id" => "gitlab_personal_access_token", "description" => "GitLab Personal Access Token", - "regex" => "glpat-[0-9a-zA-Z_\\-]{20}", + "regex" => "\bglpat-[0-9a-zA-Z_-]{20}\b", "tags" => %w[gitlab revocation_type], "keywords" => ["glpat"] }, { "id" => "gitlab_pipeline_trigger_token", "description" => "GitLab Pipeline Trigger Token", - "regex" => "glptt-[0-9a-zA-Z_\\-]{40}", + "regex" => "\bglptt-[0-9a-zA-Z_-]{40}\b", "tags" => ["gitlab"], "keywords" => ["glptt"] }, { "id" => "gitlab_runner_registration_token", "description" => "GitLab Runner Registration Token", - "regex" => "GR1348941[0-9a-zA-Z_-]{20}", + "regex" => "\bGR1348941[0-9a-zA-Z_-]{20}\b", "tags" => ["gitlab"], "keywords" => ["GR1348941"] }, { "id" => "gitlab_feed_token_v2", "description" => "GitLab Feed Token", - "regex" => "glft-[0-9a-zA-Z_-]{20}", + "regex" => "\bglft-[0-9a-zA-Z_-]{20}\b", "tags" => ["gitlab"], "keywords" => ["glft"] } @@ -98,12 +98,16 @@ RSpec.describe Gitlab::SecretDetection::Scan, feature_category: :secret_detectio new_blob(id: 111, data: "glpat-12312312312312312312"), # gitleaks:allow new_blob(id: 222, data: "\n\nglptt-1231231231231231231212312312312312312312"), # gitleaks:allow new_blob(id: 333, data: "data with no secret"), - new_blob(id: 444, data: "GR134894112312312312312312312\nglft-12312312312312312312") # gitleaks:allow + new_blob(id: 444, + data: "GR134894112312312312312312312\nglft-12312312312312312312"), # gitleaks:allow + new_blob(id: 555, data: "data with no secret"), + new_blob(id: 666, data: "data with no secret"), + new_blob(id: 777, data: "\nglptt-1231231231231231231212312312312312312312") # gitleaks:allow ] end - it "matches different types of rules" do - expected_response = Gitlab::SecretDetection::Response.new( + let(:expected_response) do + Gitlab::SecretDetection::Response.new( Gitlab::SecretDetection::Status::FOUND, [ Gitlab::SecretDetection::Finding.new( @@ -133,11 +137,66 @@ RSpec.describe Gitlab::SecretDetection::Scan, feature_category: :secret_detectio 2, ruleset['rules'][3]['id'], ruleset['rules'][3]['description'] + ), + Gitlab::SecretDetection::Finding.new( + blobs[6].id, + Gitlab::SecretDetection::Status::FOUND, + 2, + ruleset['rules'][1]['id'], + ruleset['rules'][1]['description'] ) ] ) + end - expect(scan.secrets_scan(blobs)).to eq(expected_response) + it "matches multiple rules when running in main process" do + expect(scan.secrets_scan(blobs, subprocess: false)).to eq(expected_response) + end + + context "in subprocess" do + let(:dummy_lines) do + 10_000 + end + + let(:large_blobs) do + dummy_data = "\nrandom data" * dummy_lines + [ + new_blob(id: 111, data: "glpat-12312312312312312312#{dummy_data}"), # gitleaks:allow + new_blob(id: 222, data: "\n\nglptt-1231231231231231231212312312312312312312#{dummy_data}"), # gitleaks:allow + new_blob(id: 333, data: "data with no secret#{dummy_data}"), + new_blob(id: 444, + data: "GR134894112312312312312312312\nglft-12312312312312312312#{dummy_data}"), # gitleaks:allow + new_blob(id: 555, data: "data with no secret#{dummy_data}"), + new_blob(id: 666, data: "data with no secret#{dummy_data}"), + new_blob(id: 777, data: "#{dummy_data}\nglptt-1231231231231231231212312312312312312312") # gitleaks:allow + ] + end + + it "matches multiple rules" do + expect(scan.secrets_scan(blobs, subprocess: true)).to eq(expected_response) + end + + it "takes at least same time to run as running in main process" do + expect { scan.secrets_scan(large_blobs, subprocess: true) }.to perform_faster_than { + scan.secrets_scan(large_blobs, + subprocess: false) + }.once + end + + it "allocates less memory than when running in main process" do + forked_stats = Benchmark::Malloc.new.run { scan.secrets_scan(large_blobs, subprocess: true) } + non_forked_stats = Benchmark::Malloc.new.run { scan.secrets_scan(large_blobs, subprocess: false) } + + max_processes = Gitlab::SecretDetection::Scan::MAX_PROCS_PER_REQUEST + + forked_memory = forked_stats.allocated.total_memory + non_forked_memory = non_forked_stats.allocated.total_memory + forked_obj_allocs = forked_stats.allocated.total_objects + non_forked_obj_allocs = non_forked_stats.allocated.total_objects + + expect(non_forked_memory).to be >= forked_memory * max_processes + expect(non_forked_obj_allocs).to be >= forked_obj_allocs * max_processes + end end end diff --git a/gems/gitlab-secret_detection/spec/spec_helper.rb b/gems/gitlab-secret_detection/spec/spec_helper.rb index b694e52d2b6..04cb58edd67 100644 --- a/gems/gitlab-secret_detection/spec/spec_helper.rb +++ b/gems/gitlab-secret_detection/spec/spec_helper.rb @@ -2,6 +2,8 @@ require 'gitlab/secret_detection' require 'rspec-parameterized' +require 'rspec-benchmark' +require 'benchmark-malloc' RSpec.configure do |config| # Enable flags like --only-failures and --next-failure @@ -10,9 +12,17 @@ RSpec.configure do |config| # Disable RSpec exposing methods globally on `Module` and `main` config.disable_monkey_patching! + config.include RSpec::Benchmark::Matchers + Dir['./spec/support/**/*.rb'].each { |f| require f } config.expect_with :rspec do |c| c.syntax = :expect end + + # configure benchmark factors + RSpec::Benchmark.configure do |cfg| + # to avoid retention of allocated memory by the perf tests in the main process + cfg.run_in_subprocess = true + end end diff --git a/gems/gitlab-utils/Gemfile.lock b/gems/gitlab-utils/Gemfile.lock index ef7c2d57c7a..76c6c1e2110 100644 --- a/gems/gitlab-utils/Gemfile.lock +++ b/gems/gitlab-utils/Gemfile.lock @@ -13,7 +13,6 @@ PATH actionview (>= 6.1.7.2) activesupport (>= 6.1.7.2) addressable (~> 2.8) - nokogiri (~> 1.15.2) rake (~> 13.0) GEM @@ -77,7 +76,7 @@ GEM method_source (1.0.0) mini_portile2 (2.8.2) minitest (5.18.1) - nokogiri (1.15.2) + nokogiri (1.16.0) mini_portile2 (~> 2.8.2) racc (~> 1.4) parallel (1.23.0) diff --git a/gems/gitlab-utils/gitlab-utils.gemspec b/gems/gitlab-utils/gitlab-utils.gemspec index d5f6deb7fe6..b66f467d0b6 100644 --- a/gems/gitlab-utils/gitlab-utils.gemspec +++ b/gems/gitlab-utils/gitlab-utils.gemspec @@ -21,7 +21,6 @@ Gem::Specification.new do |spec| spec.add_runtime_dependency "actionview", ">= 6.1.7.2" spec.add_runtime_dependency "activesupport", ">= 6.1.7.2" spec.add_runtime_dependency "addressable", "~> 2.8" - spec.add_runtime_dependency "nokogiri", "~> 1.15.2" spec.add_runtime_dependency "rake", "~> 13.0" spec.add_development_dependency "factory_bot_rails", "~> 6.2.0" diff --git a/gems/gitlab-utils/lib/gitlab/utils.rb b/gems/gitlab-utils/lib/gitlab/utils.rb index b7cb63210ee..d5a514bfae8 100644 --- a/gems/gitlab-utils/lib/gitlab/utils.rb +++ b/gems/gitlab-utils/lib/gitlab/utils.rb @@ -261,6 +261,8 @@ module Gitlab end end + # Use this method to set the `restrict_within_concurrent_ruby` to `true` for the block. + # `raise_if_concurrent_ruby!` will use this flag to raise an error if it's set to `true`. def restrict_within_concurrent_ruby previous = Thread.current[:restrict_within_concurrent_ruby] Thread.current[:restrict_within_concurrent_ruby] = true @@ -270,6 +272,18 @@ module Gitlab Thread.current[:restrict_within_concurrent_ruby] = previous end + # Use this method to disable the `restrict_within_concurrent_ruby` for the block. + # It is mainly used to prevent infinite loop when `ConcurrentRubyThreadIsUsedError` is rescued and sent to Sentry. + # More info: https://gitlab.com/gitlab-org/gitlab/-/issues/432145#note_1671305713 + def allow_within_concurrent_ruby + previous = Thread.current[:restrict_within_concurrent_ruby] + Thread.current[:restrict_within_concurrent_ruby] = false + + yield + ensure + Thread.current[:restrict_within_concurrent_ruby] = previous + end + # Running external methods can allocate I/O bound resources (like PostgreSQL connection or Gitaly) # This is forbidden when running within a concurrent Ruby thread, for example `async` HTTP requests # provided by the `gitlab-http` gem. diff --git a/gems/gitlab-utils/spec/gitlab/utils_spec.rb b/gems/gitlab-utils/spec/gitlab/utils_spec.rb index 69531225eef..02d288acedf 100644 --- a/gems/gitlab-utils/spec/gitlab/utils_spec.rb +++ b/gems/gitlab-utils/spec/gitlab/utils_spec.rb @@ -489,6 +489,26 @@ RSpec.describe Gitlab::Utils, feature_category: :shared do end end + describe '.allow_within_concurrent_ruby' do + it 'assigns restrict_within_concurrent_ruby false to the current thread and ensures it restores' do + expect(Thread.current[:restrict_within_concurrent_ruby]).to be_nil + + described_class.allow_within_concurrent_ruby do + expect(Thread.current[:restrict_within_concurrent_ruby]).to be(false) + end + + expect(Thread.current[:restrict_within_concurrent_ruby]).to be_nil + end + + it 'overrides the value of restrict_within_concurrent_ruby' do + described_class.restrict_within_concurrent_ruby do + described_class.allow_within_concurrent_ruby do + expect(Thread.current[:restrict_within_concurrent_ruby]).to be(false) + end + end + end + end + describe '.raise_if_concurrent_ruby!' do subject(:raise_if_concurrent_ruby!) { described_class.raise_if_concurrent_ruby!('test') } |