Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/gems
diff options
context:
space:
mode:
Diffstat (limited to 'gems')
-rw-r--r--gems/config/rubocop.yml9
-rw-r--r--gems/gem-pg.gitlab-ci.yml1
-rw-r--r--gems/gem.gitlab-ci.yml1
-rw-r--r--gems/gitlab-database-load_balancing/Gemfile.lock2
-rw-r--r--gems/gitlab-housekeeper/Gemfile.lock3
-rw-r--r--gems/gitlab-housekeeper/README.md88
-rw-r--r--gems/gitlab-housekeeper/gitlab-housekeeper.gemspec3
-rw-r--r--gems/gitlab-housekeeper/lib/gitlab/housekeeper/gitlab_client.rb154
-rw-r--r--gems/gitlab-housekeeper/lib/gitlab/housekeeper/runner.rb41
-rw-r--r--gems/gitlab-housekeeper/spec/gitlab/housekeeper/gitlab_client_spec.rb274
-rw-r--r--gems/gitlab-housekeeper/spec/gitlab/housekeeper/runner_spec.rb110
-rw-r--r--gems/gitlab-http/Gemfile.lock2
-rw-r--r--gems/gitlab-http/gitlab-http.gemspec1
-rw-r--r--gems/gitlab-http/lib/gitlab/http_v2/lazy_response.rb2
-rw-r--r--gems/gitlab-rspec_flaky/lib/gitlab/rspec_flaky/listener.rb4
-rw-r--r--gems/gitlab-rspec_flaky/spec/gitlab/rspec_flaky/listener_spec.rb37
-rw-r--r--gems/gitlab-safe_request_store/Gemfile.lock3
-rw-r--r--gems/gitlab-safe_request_store/gitlab-safe_request_store.gemspec1
-rw-r--r--gems/gitlab-secret_detection/Gemfile.lock4
-rw-r--r--gems/gitlab-secret_detection/gitlab-secret_detection.gemspec2
-rw-r--r--gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb121
-rw-r--r--gems/gitlab-secret_detection/spec/lib/gitlab/secret_detection/scan_spec.rb75
-rw-r--r--gems/gitlab-secret_detection/spec/spec_helper.rb10
-rw-r--r--gems/gitlab-utils/Gemfile.lock3
-rw-r--r--gems/gitlab-utils/gitlab-utils.gemspec1
-rw-r--r--gems/gitlab-utils/lib/gitlab/utils.rb14
-rw-r--r--gems/gitlab-utils/spec/gitlab/utils_spec.rb20
27 files changed, 902 insertions, 84 deletions
diff --git a/gems/config/rubocop.yml b/gems/config/rubocop.yml
index ca46e30e2cd..7cad83bbe66 100644
--- a/gems/config/rubocop.yml
+++ b/gems/config/rubocop.yml
@@ -96,10 +96,6 @@ RSpec/ContextWording:
RSpec/FeatureCategory:
Enabled: false
-# Enable once we drop 3.0 support
-Style/HashSyntax:
- Enabled: false
-
Style/InlineDisableAnnotation:
Enabled: false
@@ -114,3 +110,8 @@ Style/RegexpLiteralMixedPreserve:
- mixed
- mixed_preserve
EnforcedStyle: mixed_preserve
+
+# Short-hand Hash syntax does not work prior 3.1.
+# See https://gitlab.com/gitlab-org/gitlab/-/issues/435940#note_1703307479
+Style/HashSyntax:
+ EnforcedShorthandSyntax: never
diff --git a/gems/gem-pg.gitlab-ci.yml b/gems/gem-pg.gitlab-ci.yml
index c48e18fa297..2806437e15b 100644
--- a/gems/gem-pg.gitlab-ci.yml
+++ b/gems/gem-pg.gitlab-ci.yml
@@ -63,6 +63,7 @@ rubocop:
rules:
- exists: ["$[[inputs.gem_path_prefix]]$[[inputs.gem_name]]/.rubocop.yml"]
script:
+ - $CI_PROJECT_DIR/scripts/validate-monorepo-gem "$[[inputs.gem_name]]"
- bundle exec rubocop
rspec:
diff --git a/gems/gem.gitlab-ci.yml b/gems/gem.gitlab-ci.yml
index a379a887bdd..3ced4b5e364 100644
--- a/gems/gem.gitlab-ci.yml
+++ b/gems/gem.gitlab-ci.yml
@@ -50,6 +50,7 @@ rubocop:
rules:
- exists: ["$[[inputs.gem_path_prefix]]$[[inputs.gem_name]]/.rubocop.yml"]
script:
+ - $CI_PROJECT_DIR/scripts/validate-monorepo-gem "$[[inputs.gem_name]]"
- bundle exec rubocop
rspec:
diff --git a/gems/gitlab-database-load_balancing/Gemfile.lock b/gems/gitlab-database-load_balancing/Gemfile.lock
index b2d66b9a386..9c8653d9389 100644
--- a/gems/gitlab-database-load_balancing/Gemfile.lock
+++ b/gems/gitlab-database-load_balancing/Gemfile.lock
@@ -16,6 +16,7 @@ PATH
remote: ../gitlab-safe_request_store
specs:
gitlab-safe_request_store (0.1.0)
+ rack (~> 2.2.8)
request_store
PATH
@@ -25,7 +26,6 @@ PATH
actionview (>= 6.1.7.2)
activesupport (>= 6.1.7.2)
addressable (~> 2.8)
- nokogiri (~> 1.15.2)
rake (~> 13.0)
PATH
diff --git a/gems/gitlab-housekeeper/Gemfile.lock b/gems/gitlab-housekeeper/Gemfile.lock
index 2b18c02558c..9fbdc246811 100644
--- a/gems/gitlab-housekeeper/Gemfile.lock
+++ b/gems/gitlab-housekeeper/Gemfile.lock
@@ -10,6 +10,7 @@ PATH
remote: .
specs:
gitlab-housekeeper (0.1.0)
+ activesupport
httparty
rubocop
@@ -122,7 +123,7 @@ GEM
hashdiff (>= 0.4.0, < 2.0.0)
PLATFORMS
- arm64-darwin-22
+ ruby
DEPENDENCIES
gitlab-housekeeper!
diff --git a/gems/gitlab-housekeeper/README.md b/gems/gitlab-housekeeper/README.md
index f707b99c6f0..25d68e67bd8 100644
--- a/gems/gitlab-housekeeper/README.md
+++ b/gems/gitlab-housekeeper/README.md
@@ -1,6 +1,86 @@
# Gitlab::Housekeeper
-Housekeeping following https://gitlab.com/gitlab-org/gitlab/-/merge_requests/134487
+This is a gem which can be run locally or in CI to do static and dynamic
+analysis of the GitLab codebase and, using a list of predefined "keeps", it will
+automatically create merge requests for things that developers would have
+otherwise needed to remember to do themselves.
+
+It is analogous to a mix of `rubocop -a` and GitLab Dependency Bot.
+
+The word "keep" is used to describe a specific rule to apply to the code to
+match a required change and actually edit the code. The word "keep" was chosen
+as it sounds like "cop" and is very similar to implementing a rubocop rule as
+well as code to autocorrect the rule.
+
+You can see the existing keeps in
+https://gitlab.com/gitlab-org/gitlab/-/tree/master/keeps .
+
+## How the code is organized
+
+The code is organized in a very similar way to RuboCop in that we have an
+overall gem called `gitlab-housekeeper` that contains the generic logic of
+looping over all `keeps` (analogous to Cops) which are rules for how to detect
+changes that can be made to the code and then actually how to correct them.
+
+Then users of this gem are expected to add a `keeps` directory in their project
+with all the keeps specific to their project. This gem may at some point
+include keeps that are generic enough to be used by other projects.
+
+## How to implement a keep
+
+The only thing you need to implement is an `each_change` method. The method
+should yield changes in the form of a `::Gitlab::Housekeeper::Change` object,
+where each change object represents a merge request that will be created.
+The object describes the files that should be commited and other metadata
+should be added to the merge request. Before yielding the `Change` the keep
+should also edit the files locally.
+
+Here is an example of a very simple keep that creates 3 new files called
+`new_file1.txt`, `new_file2.txt` and `new_file3.txt`:
+
+```ruby
+# keeps/pretty_useless_keep.rb
+
+module Keeps
+ class PrettyUselessKeep < ::Gitlab::Housekeeper::Keep
+ def each_change
+ (1..3).each do |i|
+ file_name = "new_file#{i}.txt"
+
+ `touch #{file_name}`
+
+ identifiers = [self.class.name.demodulize, "new_file#{i}"]
+
+ title = "Make new file #{file_name}"
+
+ description = <<~MARKDOWN
+ ## New files
+
+ This MR makes a new file #{file_name}
+ MARKDOWN
+
+ labels = %w(type::feature)
+
+ changed_files = [file_name]
+
+ yield(::Gitlab::Housekeeper::Change.new(
+ identifiers, title, description, changed_files, labels
+ ))
+ end
+ end
+ end
+end
+```
+
+You can dry-run this locally with the following command:
+
+```
+bundle exec gitlab-housekeeper -r keeps/pretty_useless_keep.rb -k Keeps::PrettyUselessKeep -d -m 3
+```
+
+The `-d` just prints the contents of the merge request. Removing this will
+actually create merge requests and requires setting a few environment
+variables described below.
## Running
@@ -14,7 +94,7 @@ a mistake. The alternative of using your own API token with it's permissions to
1. Create a project access token for that project
1. Set `housekeeper` remote to the fork you created
```
- git remote add housekeeper git@gitlab.com:DylanGriffith/gitlab.git
+ git remote add housekeeper <FORK_GIT_URL>
```
1. Open a Postgres.ai tunnel on localhost port 6305
1. Set the Postgres AI env vars matching the tunnel details for your tunnel
@@ -24,8 +104,8 @@ a mistake. The alternative of using your own API token with it's permissions to
```
1. Set the GitLab client details. Will be used to create MR from housekeeper remote:
```
- export HOUSEKEEPER_FORK_PROJECT_ID=52263761 # Same project as housekeeper remote
- export HOUSEKEEPER_TARGET_PROJECT_ID=52263761 # Can be 278964 (gitlab-org/gitlab) when ready to create real MRs
+ export HOUSEKEEPER_FORK_PROJECT_ID=<FORK_PROJECT_ID> # Same project as housekeeper remote
+ export HOUSEKEEPER_TARGET_PROJECT_ID=<TARGET_PROJECT_ID> # Can be 278964 (gitlab-org/gitlab) when ready to create real MRs
export HOUSEKEEPER_GITLAB_API_TOKEN=the-api-token
```
1. Run it:
diff --git a/gems/gitlab-housekeeper/gitlab-housekeeper.gemspec b/gems/gitlab-housekeeper/gitlab-housekeeper.gemspec
index 798ca5dcfe6..4083b3c2d11 100644
--- a/gems/gitlab-housekeeper/gitlab-housekeeper.gemspec
+++ b/gems/gitlab-housekeeper/gitlab-housekeeper.gemspec
@@ -19,11 +19,12 @@ Gem::Specification.new do |spec|
spec.require_paths = ["lib"]
spec.executables = ['gitlab-housekeeper']
+ spec.add_runtime_dependency 'activesupport'
spec.add_runtime_dependency 'httparty'
spec.add_runtime_dependency 'rubocop'
spec.add_development_dependency 'gitlab-styles'
spec.add_development_dependency 'rspec-rails'
- spec.add_development_dependency "rubocop-rspec"
+ spec.add_development_dependency 'rubocop-rspec'
spec.add_development_dependency 'webmock'
end
diff --git a/gems/gitlab-housekeeper/lib/gitlab/housekeeper/gitlab_client.rb b/gems/gitlab-housekeeper/lib/gitlab/housekeeper/gitlab_client.rb
index b28d44195cb..d2bb5065824 100644
--- a/gems/gitlab-housekeeper/lib/gitlab/housekeeper/gitlab_client.rb
+++ b/gems/gitlab-housekeeper/lib/gitlab/housekeeper/gitlab_client.rb
@@ -13,13 +13,65 @@ module Gitlab
@base_uri = 'https://gitlab.com/api/v4'
end
+ # This looks at the system notes of the merge request to detect if it has been updated by anyone other than the
+ # current housekeeper user. If it has then it assumes that they did this for a reason and we can skip updating
+ # this detail of the merge request. Otherwise we assume we should generate it again using the latest output.
+ def non_housekeeper_changes(
+ source_project_id:,
+ source_branch:,
+ target_branch:,
+ target_project_id:
+ )
+
+ iid = get_existing_merge_request(
+ source_project_id: source_project_id,
+ source_branch: source_branch,
+ target_branch: target_branch,
+ target_project_id: target_project_id
+ )
+
+ return [] if iid.nil?
+
+ merge_request_notes = get_merge_request_notes(target_project_id: target_project_id, iid: iid)
+
+ changes = Set.new
+
+ merge_request_notes.each do |note|
+ next false unless note["system"]
+ next false if note["author"]["id"] == current_user_id
+
+ changes << :title if note['body'].start_with?("changed title from")
+ changes << :description if note['body'] == "changed the description"
+ changes << :code if note['body'].match?(/added \d+ commit/)
+ end
+
+ resource_label_events = get_merge_request_resource_label_events(target_project_id: target_project_id, iid: iid)
+
+ resource_label_events.each do |event|
+ next if event["user"]["id"] == current_user_id
+
+ # Labels are routinely added by both humans and bots, so addition events aren't cause for concern.
+ # However, if labels have been removed it may mean housekeeper added an incorrect label, and we shouldn't
+ # re-add them.
+ #
+ # TODO: Inspect the actual labels housekeeper wants to add, and add if they haven't previously been removed.
+ changes << :labels if event["action"] == "remove"
+ end
+
+ changes.to_a
+ end
+
def create_or_update_merge_request(
source_project_id:,
title:,
description:,
+ labels:,
source_branch:,
target_branch:,
- target_project_id:
+ target_project_id:,
+ update_title:,
+ update_description:,
+ update_labels:
)
existing_iid = get_existing_merge_request(
source_project_id: source_project_id,
@@ -33,13 +85,18 @@ module Gitlab
existing_iid: existing_iid,
title: title,
description: description,
- target_project_id: target_project_id
+ labels: labels,
+ target_project_id: target_project_id,
+ update_title: update_title,
+ update_description: update_description,
+ update_labels: update_labels
)
else
create_merge_request(
source_project_id: source_project_id,
title: title,
description: description,
+ labels: labels,
source_branch: source_branch,
target_branch: target_branch,
target_project_id: target_project_id
@@ -49,6 +106,63 @@ module Gitlab
private
+ def get_merge_request_notes(target_project_id:, iid:)
+ response = HTTParty.get(
+ "#{@base_uri}/projects/#{target_project_id}/merge_requests/#{iid}/notes",
+ query: {
+ per_page: 100
+ },
+ headers: {
+ "Private-Token" => @token
+ }
+ )
+
+ unless (200..299).cover?(response.code)
+ raise Error,
+ "Failed to get merge request notes with response code: #{response.code} and body:\n#{response.body}"
+ end
+
+ JSON.parse(response.body)
+ end
+
+ def get_merge_request_resource_label_events(target_project_id:, iid:)
+ response = HTTParty.get(
+ "#{@base_uri}/projects/#{target_project_id}/merge_requests/#{iid}/resource_label_events",
+ query: {
+ per_page: 100
+ },
+ headers: {
+ "Private-Token" => @token
+ }
+ )
+
+ unless (200..299).cover?(response.code)
+ raise Error,
+ "Failed to get merge request label events with response code: #{response.code} and body:\n#{response.body}"
+ end
+
+ JSON.parse(response.body)
+ end
+
+ def current_user_id
+ @current_user_id = begin
+ response = HTTParty.get(
+ "#{@base_uri}/user",
+ headers: {
+ "Private-Token" => @token
+ }
+ )
+
+ unless (200..299).cover?(response.code)
+ raise Error,
+ "Failed with response code: #{response.code} and body:\n#{response.body}"
+ end
+
+ data = JSON.parse(response.body)
+ data['id']
+ end
+ end
+
def get_existing_merge_request(source_project_id:, source_branch:, target_branch:, target_project_id:)
response = HTTParty.get("#{@base_uri}/projects/#{target_project_id}/merge_requests",
query: {
@@ -78,11 +192,18 @@ module Gitlab
end
def create_merge_request(
- source_project_id:, title:, description:, source_branch:, target_branch:,
- target_project_id:)
+ source_project_id:,
+ title:,
+ description:,
+ labels:,
+ source_branch:,
+ target_branch:,
+ target_project_id:
+ )
response = HTTParty.post("#{@base_uri}/projects/#{source_project_id}/merge_requests", body: {
title: title,
description: description,
+ labels: Array(labels).join(','),
source_branch: source_branch,
target_branch: target_branch,
target_project_id: target_project_id
@@ -98,11 +219,26 @@ module Gitlab
"Failed with response code: #{response.code} and body:\n#{response.body}"
end
- def update_existing_merge_request(existing_iid:, title:, description:, target_project_id:)
- response = HTTParty.put("#{@base_uri}/projects/#{target_project_id}/merge_requests/#{existing_iid}", body: {
- title: title,
- description: description
- }.to_json,
+ def update_existing_merge_request(
+ existing_iid:,
+ title:,
+ description:,
+ labels:,
+ target_project_id:,
+ update_title:,
+ update_description:,
+ update_labels:
+ )
+ body = {}
+
+ body[:title] = title if update_title
+ body[:description] = description if update_description
+ body[:add_labels] = Array(labels).join(',') if update_labels
+
+ return if body.empty?
+
+ response = HTTParty.put("#{@base_uri}/projects/#{target_project_id}/merge_requests/#{existing_iid}",
+ body: body.to_json,
headers: {
'Private-Token' => @token,
'Content-Type' => 'application/json'
diff --git a/gems/gitlab-housekeeper/lib/gitlab/housekeeper/runner.rb b/gems/gitlab-housekeeper/lib/gitlab/housekeeper/runner.rb
index 76d629e29a3..45d52a0bd8b 100644
--- a/gems/gitlab-housekeeper/lib/gitlab/housekeeper/runner.rb
+++ b/gems/gitlab-housekeeper/lib/gitlab/housekeeper/runner.rb
@@ -1,13 +1,14 @@
# frozen_string_literal: true
+require 'active_support/core_ext/string'
require 'gitlab/housekeeper/keep'
-require "gitlab/housekeeper/gitlab_client"
-require "gitlab/housekeeper/git"
+require 'gitlab/housekeeper/gitlab_client'
+require 'gitlab/housekeeper/git'
require 'digest'
module Gitlab
module Housekeeper
- Change = Struct.new(:identifiers, :title, :description, :changed_files)
+ Change = Struct.new(:identifiers, :title, :description, :changed_files, :labels)
class Runner
def initialize(max_mrs: 1, dry_run: false, require: [], keeps: nil)
@@ -59,26 +60,52 @@ module Gitlab
end
def dry_run(change, branch_name)
+ puts "=> #{change.identifiers.join(': ')}"
+
+ if change.labels.present?
+ puts '=> Attributes:'
+ puts "Labels: #{change.labels.join(', ')}"
+ puts
+ end
+
+ puts '=> Title:'
+ puts change.title
puts
- puts "# #{change.title}"
- puts
+
+ puts '=> Description:'
puts change.description
puts
+
+ puts '=> Diff:'
puts Shell.execute('git', '--no-pager', 'diff', 'master', branch_name, '--', *change.changed_files)
+ puts
end
def create(change, branch_name)
dry_run(change, branch_name)
- Shell.execute('git', 'push', '-f', 'housekeeper', "#{branch_name}:#{branch_name}")
+ non_housekeeper_changes = gitlab_client.non_housekeeper_changes(
+ source_project_id: housekeeper_fork_project_id,
+ source_branch: branch_name,
+ target_branch: 'master',
+ target_project_id: housekeeper_target_project_id
+ )
+
+ unless non_housekeeper_changes.include?(:code)
+ Shell.execute('git', 'push', '-f', 'housekeeper', "#{branch_name}:#{branch_name}")
+ end
gitlab_client.create_or_update_merge_request(
source_project_id: housekeeper_fork_project_id,
title: change.title,
description: change.description,
+ labels: change.labels,
source_branch: branch_name,
target_branch: 'master',
- target_project_id: housekeeper_target_project_id
+ target_project_id: housekeeper_target_project_id,
+ update_title: !non_housekeeper_changes.include?(:title),
+ update_description: !non_housekeeper_changes.include?(:description),
+ update_labels: !non_housekeeper_changes.include?(:labels)
)
end
diff --git a/gems/gitlab-housekeeper/spec/gitlab/housekeeper/gitlab_client_spec.rb b/gems/gitlab-housekeeper/spec/gitlab/housekeeper/gitlab_client_spec.rb
index 36b2afdc306..ffb9d2f8d23 100644
--- a/gems/gitlab-housekeeper/spec/gitlab/housekeeper/gitlab_client_spec.rb
+++ b/gems/gitlab-housekeeper/spec/gitlab/housekeeper/gitlab_client_spec.rb
@@ -3,26 +3,225 @@
require 'spec_helper'
require 'gitlab/housekeeper/gitlab_client'
+# rubocop:disable RSpec/MultipleMemoizedHelpers -- there are lots of parameters at play
RSpec.describe ::Gitlab::Housekeeper::GitlabClient do
let(:client) { described_class.new }
+ before do
+ stub_env('HOUSEKEEPER_GITLAB_API_TOKEN', 'the-api-token')
+ end
+
+ describe '#non_housekeeper_changes' do
+ let(:housekeeper_user_id) { 666 }
+
+ let(:added_commit_note) do
+ {
+ id: 1698248524,
+ body: "added 1 commit\n\n<ul><li>41b3a17f - Update stuff to test...",
+ author: { "id" => 1234 },
+ system: true
+ }
+ end
+
+ let(:irrelevant_note1) do
+ {
+ id: 1698248523,
+ body: "changed this line in ...",
+ author: { "id" => 1234 },
+ system: true
+ }
+ end
+
+ let(:not_a_system_note) do
+ {
+ id: 1698248524,
+ body: "added 1 commit\n\n<ul><li>41b3a17f - Update stuff to test...",
+ author: { "id" => 1234 },
+ system: false
+ }
+ end
+
+ let(:updated_title_note) do
+ {
+ id: 1698248527,
+ body: "changed title from **Add sharding{- -}key `namespace_id` to achievements**...",
+ author: { "id" => 1235 },
+ system: true
+ }
+ end
+
+ let(:updated_description_note) do
+ {
+ id: 1698248530,
+ body: "changed the description",
+ author: { "id" => 1236 },
+ system: true
+ }
+ end
+
+ let(:notes) do
+ [irrelevant_note1, not_a_system_note]
+ end
+
+ let(:added_label_event) do
+ {
+ id: 274504558,
+ user: { id: 18645100 },
+ label: { id: 2492649, name: "good label" },
+ action: "add"
+ }
+ end
+
+ let(:removed_label_event) do
+ {
+ id: 274504558,
+ user: { id: 18645100 },
+ label: { id: 2492649, name: "bad label" },
+ action: "remove"
+ }
+ end
+
+ let(:resource_label_events) do
+ [added_label_event]
+ end
+
+ subject(:non_housekeeper_changes) do
+ client.non_housekeeper_changes(
+ source_project_id: 123,
+ target_project_id: 456,
+ source_branch: 'the-source-branch',
+ target_branch: 'the-target-branch'
+ )
+ end
+
+ before do
+ # Get the current housekeeper user
+ stub_request(:get, "https://gitlab.com/api/v4/user")
+ .with(
+ headers: {
+ 'Private-Token' => 'the-api-token'
+ }
+ )
+ .to_return(status: 200, body: { id: housekeeper_user_id }.to_json)
+
+ # Get the id of the current merge request
+ stub_request(:get, "https://gitlab.com/api/v4/projects/456/merge_requests?state=opened&source_branch=the-source-branch&target_branch=the-target-branch&source_project_id=123")
+ .with(
+ headers: {
+ 'Private-Token' => 'the-api-token'
+ }
+ )
+ .to_return(status: 200, body: [{ iid: 8765 }].to_json)
+
+ # Get the notes of the current merge request
+ stub_request(:get, "https://gitlab.com/api/v4/projects/456/merge_requests/8765/notes?per_page=100")
+ .with(
+ headers: {
+ 'Private-Token' => 'the-api-token'
+ }
+ )
+ .to_return(status: 200, body: notes.to_json)
+
+ # Get the label changes for the merge request
+ stub_request(:get, "https://gitlab.com/api/v4/projects/456/merge_requests/8765/resource_label_events?per_page=100")
+ .with(
+ headers: {
+ 'Private-Token' => 'the-api-token'
+ }
+ )
+ .to_return(status: 200, body: resource_label_events.to_json)
+ end
+
+ it 'does not match irrelevant notes' do
+ expect(non_housekeeper_changes).to eq([])
+ end
+
+ context 'when all important things change' do
+ let(:notes) do
+ [not_a_system_note, updated_title_note, updated_description_note, added_commit_note]
+ end
+
+ let(:resource_label_events) do
+ [removed_label_event]
+ end
+
+ it 'returns :title, :description, :code, :labels' do
+ expect(non_housekeeper_changes).to include(:title)
+ expect(non_housekeeper_changes).to include(:description)
+ expect(non_housekeeper_changes).to include(:code)
+ expect(non_housekeeper_changes).to include(:labels)
+ end
+ end
+
+ context 'when title changes' do
+ let(:notes) do
+ [not_a_system_note, updated_title_note]
+ end
+
+ it 'returns :title' do
+ expect(non_housekeeper_changes).to include(:title)
+ expect(non_housekeeper_changes).not_to include(:description)
+ expect(non_housekeeper_changes).not_to include(:code)
+ expect(non_housekeeper_changes).not_to include(:labels)
+ end
+ end
+
+ context 'when description changes' do
+ let(:notes) do
+ [not_a_system_note, updated_description_note]
+ end
+
+ it 'returns :description' do
+ expect(non_housekeeper_changes).not_to include(:title)
+ expect(non_housekeeper_changes).to include(:description)
+ expect(non_housekeeper_changes).not_to include(:code)
+ expect(non_housekeeper_changes).not_to include(:labels)
+ end
+ end
+
+ context 'when labels change' do
+ let(:notes) do
+ [not_a_system_note]
+ end
+
+ let(:resource_label_events) do
+ [added_label_event, removed_label_event]
+ end
+
+ it 'returns :labels' do
+ expect(non_housekeeper_changes).not_to include(:title)
+ expect(non_housekeeper_changes).not_to include(:description)
+ expect(non_housekeeper_changes).not_to include(:code)
+ expect(non_housekeeper_changes).to include(:labels)
+ end
+ end
+
+ context 'when the merge request does not exist' do
+ it 'returns empty array' do
+ expect(non_housekeeper_changes).to eq([])
+ end
+ end
+ end
+
describe '#create_or_update_merge_request' do
let(:params) do
{
source_project_id: 123,
title: 'A new merge request!',
+ labels: %w[label-1 label-2],
description: 'This merge request is pretty good.',
source_branch: 'the-source-branch',
target_branch: 'the-target-branch',
- target_project_id: 456
+ target_project_id: 456,
+ update_title: true,
+ update_description: true,
+ update_labels: true
}
end
let(:existing_mrs) { [] }
before do
- stub_env('HOUSEKEEPER_GITLAB_API_TOKEN', 'the-api-token')
-
# Stub the check to see if the merge request already exists
stub_request(:get, "https://gitlab.com/api/v4/projects/456/merge_requests?state=opened&source_branch=the-source-branch&target_branch=the-target-branch&source_project_id=123")
.with(
@@ -39,6 +238,7 @@ RSpec.describe ::Gitlab::Housekeeper::GitlabClient do
body: {
title: "A new merge request!",
description: "This merge request is pretty good.",
+ labels: "label-1,label-2",
source_branch: "the-source-branch",
target_branch: "the-target-branch",
target_project_id: 456
@@ -64,7 +264,8 @@ RSpec.describe ::Gitlab::Housekeeper::GitlabClient do
.with(
body: {
title: "A new merge request!",
- description: "This merge request is pretty good."
+ description: "This merge request is pretty good.",
+ add_labels: "label-1,label-2"
}.to_json,
headers: {
'Content-Type' => 'application/json',
@@ -85,6 +286,70 @@ RSpec.describe ::Gitlab::Housekeeper::GitlabClient do
expect { client.create_or_update_merge_request(**params) }.to raise_error(described_class::Error)
end
end
+
+ context 'when update_title: false' do
+ it 'does not update the title' do
+ stub = stub_request(:put, "https://gitlab.com/api/v4/projects/456/merge_requests/1234")
+ .with(
+ body: {
+ description: "This merge request is pretty good.",
+ add_labels: "label-1,label-2"
+ }.to_json,
+ headers: {
+ 'Content-Type' => 'application/json',
+ 'Private-Token' => 'the-api-token'
+ }
+ ).to_return(status: 200, body: "")
+
+ client.create_or_update_merge_request(**params.merge(update_title: false))
+ expect(stub).to have_been_requested
+ end
+ end
+
+ context 'when update_description: false' do
+ it 'does not update the description' do
+ stub = stub_request(:put, "https://gitlab.com/api/v4/projects/456/merge_requests/1234")
+ .with(
+ body: {
+ title: "A new merge request!",
+ add_labels: "label-1,label-2"
+ }.to_json,
+ headers: {
+ 'Content-Type' => 'application/json',
+ 'Private-Token' => 'the-api-token'
+ }
+ ).to_return(status: 200, body: "")
+
+ client.create_or_update_merge_request(**params.merge(update_description: false))
+ expect(stub).to have_been_requested
+ end
+ end
+
+ context 'when update_labels: false' do
+ it 'does not update the labels' do
+ stub = stub_request(:put, "https://gitlab.com/api/v4/projects/456/merge_requests/1234")
+ .with(
+ body: {
+ title: "A new merge request!",
+ description: "This merge request is pretty good."
+ }.to_json,
+ headers: {
+ 'Content-Type' => 'application/json',
+ 'Private-Token' => 'the-api-token'
+ }
+ ).to_return(status: 200, body: "")
+
+ client.create_or_update_merge_request(**params.merge(update_labels: false))
+ expect(stub).to have_been_requested
+ end
+ end
+
+ context 'when there is nothing to update' do
+ it 'does not make a request' do
+ client.create_or_update_merge_request(**params.merge(update_description: false, update_title: false,
+ update_labels: false))
+ end
+ end
end
it 'raises an error when unsuccessful response' do
@@ -97,3 +362,4 @@ RSpec.describe ::Gitlab::Housekeeper::GitlabClient do
end
end
end
+# rubocop:enable RSpec/MultipleMemoizedHelpers
diff --git a/gems/gitlab-housekeeper/spec/gitlab/housekeeper/runner_spec.rb b/gems/gitlab-housekeeper/spec/gitlab/housekeeper/runner_spec.rb
index 49b4926dbdd..d1fdbcb1004 100644
--- a/gems/gitlab-housekeeper/spec/gitlab/housekeeper/runner_spec.rb
+++ b/gems/gitlab-housekeeper/spec/gitlab/housekeeper/runner_spec.rb
@@ -3,6 +3,7 @@
require 'spec_helper'
require 'gitlab/housekeeper/runner'
+# rubocop:disable RSpec/MultipleMemoizedHelpers -- there are lots of parameters at play
RSpec.describe ::Gitlab::Housekeeper::Runner do
let(:fake_keep) { instance_double(Class) }
@@ -11,7 +12,8 @@ RSpec.describe ::Gitlab::Housekeeper::Runner do
%w[the identifier for the first change],
"The title of MR1",
"The description of the MR",
- ['change1.txt', 'change2.txt']
+ ['change1.txt', 'change2.txt'],
+ ['example-label']
)
end
@@ -20,7 +22,8 @@ RSpec.describe ::Gitlab::Housekeeper::Runner do
%w[the identifier for the second change],
"The title of MR2",
"The description of the MR",
- ['change1.txt', 'change2.txt']
+ ['change1.txt', 'change2.txt'],
+ ['example-label']
)
end
@@ -29,7 +32,8 @@ RSpec.describe ::Gitlab::Housekeeper::Runner do
%w[the identifier for the third change],
"The title of MR3",
"The description of the MR",
- ['change1.txt', 'change2.txt']
+ ['change1.txt', 'change2.txt'],
+ ['example-label']
)
end
@@ -44,18 +48,34 @@ RSpec.describe ::Gitlab::Housekeeper::Runner do
end
describe '#run' do
+ let(:git) { instance_double(::Gitlab::Housekeeper::Git) }
+ let(:gitlab_client) { instance_double(::Gitlab::Housekeeper::GitlabClient) }
+
before do
stub_env('HOUSEKEEPER_FORK_PROJECT_ID', '123')
stub_env('HOUSEKEEPER_TARGET_PROJECT_ID', '456')
+
+ allow(::Gitlab::Housekeeper::Git).to receive(:new)
+ .and_return(git)
+
+ allow(git).to receive(:with_branch_from_branch)
+ .and_yield
+ allow(git).to receive(:commit_in_branch).with(change1)
+ .and_return('the-identifier-for-the-first-change')
+ allow(git).to receive(:commit_in_branch).with(change2)
+ .and_return('the-identifier-for-the-second-change')
+
+ allow(::Gitlab::Housekeeper::GitlabClient).to receive(:new)
+ .and_return(gitlab_client)
+
+ allow(gitlab_client).to receive(:non_housekeeper_changes)
+ .and_return([])
+
+ allow(::Gitlab::Housekeeper::Shell).to receive(:execute)
end
it 'loops over the keeps and creates MRs limited by max_mrs' do
# Branches get created
- git = instance_double(::Gitlab::Housekeeper::Git)
- expect(::Gitlab::Housekeeper::Git).to receive(:new)
- .and_return(git)
- expect(git).to receive(:with_branch_from_branch)
- .and_yield
expect(git).to receive(:commit_in_branch).with(change1)
.and_return('the-identifier-for-the-first-change')
expect(git).to receive(:commit_in_branch).with(change2)
@@ -76,29 +96,93 @@ RSpec.describe ::Gitlab::Housekeeper::Runner do
'the-identifier-for-the-second-change:the-identifier-for-the-second-change')
# Merge requests get created
- gitlab_client = instance_double(::Gitlab::Housekeeper::GitlabClient)
- expect(::Gitlab::Housekeeper::GitlabClient).to receive(:new)
- .and_return(gitlab_client)
expect(gitlab_client).to receive(:create_or_update_merge_request)
.with(
source_project_id: '123',
title: 'The title of MR1',
description: 'The description of the MR',
+ labels: ['example-label'],
source_branch: 'the-identifier-for-the-first-change',
target_branch: 'master',
- target_project_id: '456'
+ target_project_id: '456',
+ update_title: true,
+ update_description: true,
+ update_labels: true
)
expect(gitlab_client).to receive(:create_or_update_merge_request)
.with(
source_project_id: '123',
title: 'The title of MR2',
description: 'The description of the MR',
+ labels: ['example-label'],
source_branch: 'the-identifier-for-the-second-change',
target_branch: 'master',
- target_project_id: '456'
+ target_project_id: '456',
+ update_title: true,
+ update_description: true,
+ update_labels: true
)
described_class.new(max_mrs: 2, keeps: [fake_keep]).run
end
+
+ context 'when title, description, code has changed already' do
+ it 'does not update the changed details' do
+ # First change has updated code and description so should only update title
+ expect(gitlab_client).to receive(:non_housekeeper_changes)
+ .with(
+ source_project_id: '123',
+ source_branch: 'the-identifier-for-the-first-change',
+ target_branch: 'master',
+ target_project_id: '456'
+ ).and_return([:code, :description])
+
+ # Second change has updated title and description so it should push the code
+ expect(gitlab_client).to receive(:non_housekeeper_changes)
+ .with(
+ source_project_id: '123',
+ source_branch: 'the-identifier-for-the-second-change',
+ target_branch: 'master',
+ target_project_id: '456'
+ ).and_return([:title, :description])
+
+ expect(::Gitlab::Housekeeper::Shell).not_to receive(:execute)
+ .with('git', 'push', '-f', 'housekeeper',
+ 'the-identifier-for-the-first-change:the-identifier-for-the-first-change')
+ expect(::Gitlab::Housekeeper::Shell).to receive(:execute)
+ .with('git', 'push', '-f', 'housekeeper',
+ 'the-identifier-for-the-second-change:the-identifier-for-the-second-change')
+
+ expect(gitlab_client).to receive(:create_or_update_merge_request)
+ .with(
+ source_project_id: '123',
+ title: 'The title of MR1',
+ description: 'The description of the MR',
+ labels: ['example-label'],
+ source_branch: 'the-identifier-for-the-first-change',
+ target_branch: 'master',
+ target_project_id: '456',
+ update_title: true,
+ update_description: false,
+ update_labels: true
+ )
+ expect(gitlab_client).to receive(:create_or_update_merge_request)
+ .with(
+ source_project_id: '123',
+ title: 'The title of MR2',
+ description: 'The description of the MR',
+ labels: ['example-label'],
+ source_branch: 'the-identifier-for-the-second-change',
+ target_branch: 'master',
+ target_project_id: '456',
+ update_title: false,
+ update_description: false,
+ update_labels: true
+ )
+
+ described_class.new(max_mrs: 2, keeps: [fake_keep]).run
+ end
+ end
end
end
+# rubocop:enable RSpec/MultipleMemoizedHelpers
diff --git a/gems/gitlab-http/Gemfile.lock b/gems/gitlab-http/Gemfile.lock
index 5fb1963d8f3..149a6805f05 100644
--- a/gems/gitlab-http/Gemfile.lock
+++ b/gems/gitlab-http/Gemfile.lock
@@ -13,7 +13,6 @@ PATH
actionview (>= 6.1.7.2)
activesupport (>= 6.1.7.2)
addressable (~> 2.8)
- nokogiri (~> 1.15.2)
rake (~> 13.0)
PATH
@@ -24,7 +23,6 @@ PATH
concurrent-ruby (~> 1.2)
httparty (~> 0.21.0)
ipaddress (~> 0.8.3)
- nokogiri (~> 1.15.4)
railties (~> 7)
GEM
diff --git a/gems/gitlab-http/gitlab-http.gemspec b/gems/gitlab-http/gitlab-http.gemspec
index 0033f17447b..27366246bb1 100644
--- a/gems/gitlab-http/gitlab-http.gemspec
+++ b/gems/gitlab-http/gitlab-http.gemspec
@@ -23,7 +23,6 @@ Gem::Specification.new do |spec|
spec.add_runtime_dependency 'concurrent-ruby', '~> 1.2'
spec.add_runtime_dependency 'httparty', '~> 0.21.0'
spec.add_runtime_dependency 'ipaddress', '~> 0.8.3'
- spec.add_runtime_dependency 'nokogiri', '~> 1.15.4'
spec.add_runtime_dependency "railties", "~> 7"
spec.add_development_dependency 'gitlab-styles', '~> 10.1.0'
diff --git a/gems/gitlab-http/lib/gitlab/http_v2/lazy_response.rb b/gems/gitlab-http/lib/gitlab/http_v2/lazy_response.rb
index 65d1ab96644..7c7a44f77e2 100644
--- a/gems/gitlab-http/lib/gitlab/http_v2/lazy_response.rb
+++ b/gems/gitlab-http/lib/gitlab/http_v2/lazy_response.rb
@@ -7,7 +7,7 @@ module Gitlab
attr_reader :promise
- delegate :state, to: :promise
+ delegate :state, :complete?, to: :promise
def initialize(promise, path, options, log_info)
@promise = promise
diff --git a/gems/gitlab-rspec_flaky/lib/gitlab/rspec_flaky/listener.rb b/gems/gitlab-rspec_flaky/lib/gitlab/rspec_flaky/listener.rb
index 6f4dce9df33..c40d9e90b56 100644
--- a/gems/gitlab-rspec_flaky/lib/gitlab/rspec_flaky/listener.rb
+++ b/gems/gitlab-rspec_flaky/lib/gitlab/rspec_flaky/listener.rb
@@ -36,6 +36,10 @@ module Gitlab
end
def dump_summary(_)
+ rails_logger_warn(
+ "\n#{flaky_examples.count} known flaky example(s) detected. " \
+ "Writing this to #{Config.flaky_examples_report_path}.\n"
+ )
Report.new(flaky_examples).write(Config.flaky_examples_report_path)
return unless new_flaky_examples.any?
diff --git a/gems/gitlab-rspec_flaky/spec/gitlab/rspec_flaky/listener_spec.rb b/gems/gitlab-rspec_flaky/spec/gitlab/rspec_flaky/listener_spec.rb
index b46044e4521..a830226d88f 100644
--- a/gems/gitlab-rspec_flaky/spec/gitlab/rspec_flaky/listener_spec.rb
+++ b/gems/gitlab-rspec_flaky/spec/gitlab/rspec_flaky/listener_spec.rb
@@ -197,6 +197,8 @@ RSpec.describe Gitlab::RspecFlaky::Listener, :aggregate_failures do
end
describe '#dump_summary' do
+ subject { listener.dump_summary(nil) }
+
let(:listener) { described_class.new(suite_flaky_example_report.to_json) }
let(:new_flaky_rspec_example) { double(new_example_attrs.merge(attempts: 2)) }
let(:already_flaky_rspec_example) { double(already_flaky_example_attrs.merge(attempts: 2)) }
@@ -207,6 +209,39 @@ RSpec.describe Gitlab::RspecFlaky::Listener, :aggregate_failures do
allow(Kernel).to receive(:warn)
end
+ context 'when not flaky tests were found' do
+ it 'prints a message in the console' do
+ allow(Kernel).to receive(:warn).and_call_original
+
+ expect { subject }.to output(
+ %r{0 known flaky example\(s\) detected\. Writing this to rspec/flaky/report\.json}
+ ).to_stderr
+ end
+ end
+
+ context 'when existing flaky tests were found' do
+ before do
+ listener.example_passed(notification_already_flaky_rspec_example)
+ end
+
+ it 'does write them in the correct report' do
+ report = double
+
+ expect(Gitlab::RspecFlaky::Report).to receive(:new).with(listener.flaky_examples).and_return(report)
+ expect(report).to receive(:write).with(Gitlab::RspecFlaky::Config.flaky_examples_report_path)
+
+ subject
+ end
+
+ it 'prints a message in the console' do
+ allow(Kernel).to receive(:warn).and_call_original
+
+ expect { subject }.to output(
+ %r{1 known flaky example\(s\) detected\. Writing this to rspec/flaky/report\.json}
+ ).to_stderr
+ end
+ end
+
context 'when a report file path is set by FLAKY_RSPEC_REPORT_PATH' do
it 'delegates the writes to RspecFlaky::Report' do
listener.example_passed(notification_new_flaky_rspec_example)
@@ -222,7 +257,7 @@ RSpec.describe Gitlab::RspecFlaky::Listener, :aggregate_failures do
.to receive(:new).with(listener.__send__(:new_flaky_examples)).and_return(report2)
expect(report2).to receive(:write).with(Gitlab::RspecFlaky::Config.new_flaky_examples_report_path)
- listener.dump_summary(nil)
+ subject
end
end
end
diff --git a/gems/gitlab-safe_request_store/Gemfile.lock b/gems/gitlab-safe_request_store/Gemfile.lock
index 50ff694d7d5..d26bab2f4f0 100644
--- a/gems/gitlab-safe_request_store/Gemfile.lock
+++ b/gems/gitlab-safe_request_store/Gemfile.lock
@@ -2,6 +2,7 @@ PATH
remote: .
specs:
gitlab-safe_request_store (0.1.0)
+ rack (~> 2.2.8)
request_store
GEM
@@ -35,7 +36,7 @@ GEM
coderay (~> 1.1)
method_source (~> 1.0)
racc (1.6.2)
- rack (3.0.4.1)
+ rack (2.2.8)
rainbow (3.1.1)
regexp_parser (2.7.0)
request_store (1.5.1)
diff --git a/gems/gitlab-safe_request_store/gitlab-safe_request_store.gemspec b/gems/gitlab-safe_request_store/gitlab-safe_request_store.gemspec
index a685a1eb447..b3ced3929c8 100644
--- a/gems/gitlab-safe_request_store/gitlab-safe_request_store.gemspec
+++ b/gems/gitlab-safe_request_store/gitlab-safe_request_store.gemspec
@@ -18,6 +18,7 @@ Gem::Specification.new do |spec|
spec.files = Dir['lib/**/*.rb']
spec.require_paths = ["lib"]
+ spec.add_runtime_dependency "rack", "~> 2.2.8"
spec.add_runtime_dependency "request_store"
spec.add_development_dependency "gitlab-styles", "~> 10.1.0"
diff --git a/gems/gitlab-secret_detection/Gemfile.lock b/gems/gitlab-secret_detection/Gemfile.lock
index dd9f621ee4a..029ad5d778e 100644
--- a/gems/gitlab-secret_detection/Gemfile.lock
+++ b/gems/gitlab-secret_detection/Gemfile.lock
@@ -2,6 +2,7 @@ PATH
remote: .
specs:
gitlab-secret_detection (0.1.0)
+ parallel (~> 1.22)
re2 (~> 2.4)
toml-rb (~> 2.2)
@@ -47,7 +48,7 @@ GEM
mini_portile2 (2.8.5)
minitest (5.20.0)
mutex_m (0.2.0)
- parallel (1.23.0)
+ parallel (1.24.0)
parser (3.2.2.4)
ast (~> 2.4.1)
racc
@@ -136,6 +137,7 @@ PLATFORMS
ruby
DEPENDENCIES
+ benchmark-malloc (~> 0.2)
gitlab-secret_detection!
gitlab-styles (~> 11.0)
rspec (~> 3.0)
diff --git a/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec b/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec
index be9db3aa389..147f370a716 100644
--- a/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec
+++ b/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec
@@ -24,9 +24,11 @@ Gem::Specification.new do |spec|
spec.files = Dir['lib/**/*.rb']
spec.require_paths = ["lib"]
+ spec.add_runtime_dependency "parallel", "~> 1.22"
spec.add_runtime_dependency "re2", "~> 2.4"
spec.add_runtime_dependency "toml-rb", "~> 2.2"
+ spec.add_development_dependency "benchmark-malloc", "~> 0.2"
spec.add_development_dependency "gitlab-styles", "~> 11.0"
spec.add_development_dependency "rspec", "~> 3.0"
spec.add_development_dependency "rspec-benchmark", "~> 0.6.0"
diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb
index 20d630d5dbb..37103912615 100644
--- a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb
+++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb
@@ -4,6 +4,7 @@ require 'toml-rb'
require 're2'
require 'logger'
require 'timeout'
+require 'parallel'
module Gitlab
module SecretDetection
@@ -23,12 +24,18 @@ module Gitlab
DEFAULT_BLOB_TIMEOUT_SECS = 5
# file path where the secrets ruleset file is located
RULESET_FILE_PATH = File.expand_path('../../gitleaks.toml', __dir__)
- # ignore the scanning of a line which ends with the following keyword
- GITLEAKS_KEYWORD_IGNORE = 'gitleaks:allow'
+ # Max no of child processes to spawn per request
+ # ref: https://gitlab.com/gitlab-org/gitlab/-/issues/430160
+ MAX_PROCS_PER_REQUEST = 5
+ # Minimum cumulative size of the blobs required to spawn and
+ # run the scan within a new subprocess.
+ MIN_CHUNK_SIZE_PER_PROC_BYTES = 2_097_152 # 2MiB
+ # Whether to run scan in subprocesses or not. Default is true.
+ RUN_IN_SUBPROCESS = true
# Initializes the instance with logger along with following operations:
# 1. Parse ruleset for the given +ruleset_path+(default: +RULESET_FILE_PATH+). Raises +RulesetParseError+
- # incase the operation fails.
+ # in case the operation fails.
# 2. Extract keywords from the parsed ruleset to use it for matching keywords before regex operation.
# 3. Build and Compile rule regex patterns obtained from the ruleset. Raises +RulesetCompilationError+
# in case the compilation fails.
@@ -46,13 +53,31 @@ module Gitlab
# +timeout+:: No of seconds(accepts floating point for smaller time values) to limit the total scan duration
# +blob_timeout+:: No of seconds(accepts floating point for smaller time values) to limit
# the scan duration on each blob
+ # +subprocess+:: If passed true, the scan is performed within subprocess instead of main process.
+ # To avoid over-consuming memory by running scan on multiple large blobs within a single subprocess,
+ # it instead groups the blobs into smaller array where each array contains blobs with cumulative size of
+ # +MIN_CHUNK_SIZE_PER_PROC_BYTES+ bytes and each group runs in a separate sub-process. Default value
+ # is true.
+ #
+ # NOTE:
+ # Running the scan in fork mode primarily focuses on reducing the memory consumption of the scan by
+ # offloading regex operations on large blobs to sub-processes. However, it does not assure the improvement
+ # in the overall latency of the scan, specifically in the case of smaller blob sizes, where the overhead of
+ # forking a new process adds to the overall latency of the scan instead. More reference on Subprocess-based
+ # execution is found here: https://gitlab.com/gitlab-org/gitlab/-/issues/430160.
#
# Returns an instance of SecretDetection::Response by following below structure:
# {
# status: One of the SecretDetection::Status values
# results: [SecretDetection::Finding]
# }
- def secrets_scan(blobs, timeout: DEFAULT_SCAN_TIMEOUT_SECS, blob_timeout: DEFAULT_BLOB_TIMEOUT_SECS)
+ #
+ def secrets_scan(
+ blobs,
+ timeout: DEFAULT_SCAN_TIMEOUT_SECS,
+ blob_timeout: DEFAULT_BLOB_TIMEOUT_SECS,
+ subprocess: RUN_IN_SUBPROCESS
+ )
return SecretDetection::Response.new(SecretDetection::Status::INPUT_ERROR) unless validate_scan_input(blobs)
Timeout.timeout(timeout) do
@@ -60,7 +85,11 @@ module Gitlab
next SecretDetection::Response.new(SecretDetection::Status::NOT_FOUND) if matched_blobs.empty?
- secrets = find_secrets_bulk(matched_blobs, blob_timeout)
+ secrets = if subprocess
+ run_scan_within_subprocess(blobs, blob_timeout)
+ else
+ run_scan(blobs, blob_timeout)
+ end
scan_status = overall_scan_status(secrets)
@@ -114,7 +143,7 @@ module Gitlab
secrets_keywords.flatten.compact.to_set
end
- # returns only those blobs that contain atleast one of the keywords
+ # returns only those blobs that contain at least one of the keywords
# from the keywords list
def filter_by_keywords(blobs)
matched_blobs = []
@@ -126,22 +155,43 @@ module Gitlab
matched_blobs.freeze
end
- # finds secrets in the given list of blobs
- def find_secrets_bulk(blobs, blob_timeout)
- found_secrets = []
-
- blobs.each do |blob|
- found_secrets << Timeout.timeout(blob_timeout) { find_secrets(blob) }
+ def run_scan(blobs, blob_timeout)
+ found_secrets = blobs.flat_map do |blob|
+ Timeout.timeout(blob_timeout) do
+ find_secrets(blob)
+ end
rescue Timeout::Error => e
- logger.error "Secret detection scan timed out on the blob(id:#{blob.id}): #{e}"
+ logger.error "Secret Detection scan timed out on the blob(id:#{blob.id}): #{e}"
+ SecretDetection::Finding.new(blob.id,
+ SecretDetection::Status::BLOB_TIMEOUT)
+ end
+
+ found_secrets.freeze
+ end
- found_secrets << SecretDetection::Finding.new(
- blob.id,
- SecretDetection::Status::BLOB_TIMEOUT
- )
+ def run_scan_within_subprocess(blobs, blob_timeout)
+ blob_sizes = blobs.map(&:size)
+ grouped_blob_indicies = group_by_chunk_size(blob_sizes)
+
+ grouped_blobs = grouped_blob_indicies.map { |idx_arr| idx_arr.map { |i| blobs[i] } }
+
+ found_secrets = Parallel.flat_map(
+ grouped_blobs,
+ in_processes: MAX_PROCS_PER_REQUEST,
+ isolation: true # do not reuse sub-processes
+ ) do |grouped_blob|
+ grouped_blob.flat_map do |blob|
+ Timeout.timeout(blob_timeout) do
+ find_secrets(blob)
+ end
+ rescue Timeout::Error => e
+ logger.error "Secret Detection scan timed out on the blob(id:#{blob.id}): #{e}"
+ SecretDetection::Finding.new(blob.id,
+ SecretDetection::Status::BLOB_TIMEOUT)
+ end
end
- found_secrets.flatten.freeze
+ found_secrets.freeze
end
# finds secrets in the given blob with a timeout circuit breaker
@@ -149,10 +199,8 @@ module Gitlab
secrets = []
blob.data.each_line.with_index do |line, index|
- # ignore the line scan if it is suffixed with '#gitleaks:allow'
- next if line.end_with?(GITLEAKS_KEYWORD_IGNORE)
+ patterns = pattern_matcher.match(line, exception: false)
- patterns = pattern_matcher.match(line, :exception => false)
next unless patterns.any?
line_number = index + 1
@@ -172,7 +220,7 @@ module Gitlab
secrets
rescue StandardError => e
- logger.error "Secret detection scan failed on the blob(id:#{blob.id}): #{e}"
+ logger.error "Secret Detection scan failed on the blob(id:#{blob.id}): #{e}"
SecretDetection::Finding.new(blob.id, SecretDetection::Status::SCAN_ERROR)
end
@@ -201,6 +249,35 @@ module Gitlab
SecretDetection::Status::FOUND_WITH_ERRORS
end
end
+
+ # This method accepts an array of blob sizes(in bytes) and groups them into an array
+ # of arrays structure where each element is the group of indicies of the input
+ # array whose cumulative blob sizes has at least +MIN_CHUNK_SIZE_PER_PROC_BYTES+
+ def group_by_chunk_size(blob_size_arr)
+ cumulative_size = 0
+ chunk_indexes = []
+ chunk_idx_start = 0
+
+ blob_size_arr.each_with_index do |size, index|
+ cumulative_size += size
+ next unless cumulative_size >= MIN_CHUNK_SIZE_PER_PROC_BYTES
+
+ chunk_indexes << (chunk_idx_start..index).to_a
+
+ chunk_idx_start = index + 1
+ cumulative_size = 0
+ end
+
+ if cumulative_size.positive? && (chunk_idx_start < blob_size_arr.length)
+ chunk_indexes << if chunk_idx_start == blob_size_arr.length - 1
+ [chunk_idx_start]
+ else
+ (chunk_idx_start..blob_size_arr.length - 1).to_a
+ end
+ end
+
+ chunk_indexes
+ end
end
end
end
diff --git a/gems/gitlab-secret_detection/spec/lib/gitlab/secret_detection/scan_spec.rb b/gems/gitlab-secret_detection/spec/lib/gitlab/secret_detection/scan_spec.rb
index 1e6ccf1e6a0..e69fcceeaab 100644
--- a/gems/gitlab-secret_detection/spec/lib/gitlab/secret_detection/scan_spec.rb
+++ b/gems/gitlab-secret_detection/spec/lib/gitlab/secret_detection/scan_spec.rb
@@ -16,28 +16,28 @@ RSpec.describe Gitlab::SecretDetection::Scan, feature_category: :secret_detectio
{
"id" => "gitlab_personal_access_token",
"description" => "GitLab Personal Access Token",
- "regex" => "glpat-[0-9a-zA-Z_\\-]{20}",
+ "regex" => "\bglpat-[0-9a-zA-Z_-]{20}\b",
"tags" => %w[gitlab revocation_type],
"keywords" => ["glpat"]
},
{
"id" => "gitlab_pipeline_trigger_token",
"description" => "GitLab Pipeline Trigger Token",
- "regex" => "glptt-[0-9a-zA-Z_\\-]{40}",
+ "regex" => "\bglptt-[0-9a-zA-Z_-]{40}\b",
"tags" => ["gitlab"],
"keywords" => ["glptt"]
},
{
"id" => "gitlab_runner_registration_token",
"description" => "GitLab Runner Registration Token",
- "regex" => "GR1348941[0-9a-zA-Z_-]{20}",
+ "regex" => "\bGR1348941[0-9a-zA-Z_-]{20}\b",
"tags" => ["gitlab"],
"keywords" => ["GR1348941"]
},
{
"id" => "gitlab_feed_token_v2",
"description" => "GitLab Feed Token",
- "regex" => "glft-[0-9a-zA-Z_-]{20}",
+ "regex" => "\bglft-[0-9a-zA-Z_-]{20}\b",
"tags" => ["gitlab"],
"keywords" => ["glft"]
}
@@ -98,12 +98,16 @@ RSpec.describe Gitlab::SecretDetection::Scan, feature_category: :secret_detectio
new_blob(id: 111, data: "glpat-12312312312312312312"), # gitleaks:allow
new_blob(id: 222, data: "\n\nglptt-1231231231231231231212312312312312312312"), # gitleaks:allow
new_blob(id: 333, data: "data with no secret"),
- new_blob(id: 444, data: "GR134894112312312312312312312\nglft-12312312312312312312") # gitleaks:allow
+ new_blob(id: 444,
+ data: "GR134894112312312312312312312\nglft-12312312312312312312"), # gitleaks:allow
+ new_blob(id: 555, data: "data with no secret"),
+ new_blob(id: 666, data: "data with no secret"),
+ new_blob(id: 777, data: "\nglptt-1231231231231231231212312312312312312312") # gitleaks:allow
]
end
- it "matches different types of rules" do
- expected_response = Gitlab::SecretDetection::Response.new(
+ let(:expected_response) do
+ Gitlab::SecretDetection::Response.new(
Gitlab::SecretDetection::Status::FOUND,
[
Gitlab::SecretDetection::Finding.new(
@@ -133,11 +137,66 @@ RSpec.describe Gitlab::SecretDetection::Scan, feature_category: :secret_detectio
2,
ruleset['rules'][3]['id'],
ruleset['rules'][3]['description']
+ ),
+ Gitlab::SecretDetection::Finding.new(
+ blobs[6].id,
+ Gitlab::SecretDetection::Status::FOUND,
+ 2,
+ ruleset['rules'][1]['id'],
+ ruleset['rules'][1]['description']
)
]
)
+ end
- expect(scan.secrets_scan(blobs)).to eq(expected_response)
+ it "matches multiple rules when running in main process" do
+ expect(scan.secrets_scan(blobs, subprocess: false)).to eq(expected_response)
+ end
+
+ context "in subprocess" do
+ let(:dummy_lines) do
+ 10_000
+ end
+
+ let(:large_blobs) do
+ dummy_data = "\nrandom data" * dummy_lines
+ [
+ new_blob(id: 111, data: "glpat-12312312312312312312#{dummy_data}"), # gitleaks:allow
+ new_blob(id: 222, data: "\n\nglptt-1231231231231231231212312312312312312312#{dummy_data}"), # gitleaks:allow
+ new_blob(id: 333, data: "data with no secret#{dummy_data}"),
+ new_blob(id: 444,
+ data: "GR134894112312312312312312312\nglft-12312312312312312312#{dummy_data}"), # gitleaks:allow
+ new_blob(id: 555, data: "data with no secret#{dummy_data}"),
+ new_blob(id: 666, data: "data with no secret#{dummy_data}"),
+ new_blob(id: 777, data: "#{dummy_data}\nglptt-1231231231231231231212312312312312312312") # gitleaks:allow
+ ]
+ end
+
+ it "matches multiple rules" do
+ expect(scan.secrets_scan(blobs, subprocess: true)).to eq(expected_response)
+ end
+
+ it "takes at least same time to run as running in main process" do
+ expect { scan.secrets_scan(large_blobs, subprocess: true) }.to perform_faster_than {
+ scan.secrets_scan(large_blobs,
+ subprocess: false)
+ }.once
+ end
+
+ it "allocates less memory than when running in main process" do
+ forked_stats = Benchmark::Malloc.new.run { scan.secrets_scan(large_blobs, subprocess: true) }
+ non_forked_stats = Benchmark::Malloc.new.run { scan.secrets_scan(large_blobs, subprocess: false) }
+
+ max_processes = Gitlab::SecretDetection::Scan::MAX_PROCS_PER_REQUEST
+
+ forked_memory = forked_stats.allocated.total_memory
+ non_forked_memory = non_forked_stats.allocated.total_memory
+ forked_obj_allocs = forked_stats.allocated.total_objects
+ non_forked_obj_allocs = non_forked_stats.allocated.total_objects
+
+ expect(non_forked_memory).to be >= forked_memory * max_processes
+ expect(non_forked_obj_allocs).to be >= forked_obj_allocs * max_processes
+ end
end
end
diff --git a/gems/gitlab-secret_detection/spec/spec_helper.rb b/gems/gitlab-secret_detection/spec/spec_helper.rb
index b694e52d2b6..04cb58edd67 100644
--- a/gems/gitlab-secret_detection/spec/spec_helper.rb
+++ b/gems/gitlab-secret_detection/spec/spec_helper.rb
@@ -2,6 +2,8 @@
require 'gitlab/secret_detection'
require 'rspec-parameterized'
+require 'rspec-benchmark'
+require 'benchmark-malloc'
RSpec.configure do |config|
# Enable flags like --only-failures and --next-failure
@@ -10,9 +12,17 @@ RSpec.configure do |config|
# Disable RSpec exposing methods globally on `Module` and `main`
config.disable_monkey_patching!
+ config.include RSpec::Benchmark::Matchers
+
Dir['./spec/support/**/*.rb'].each { |f| require f }
config.expect_with :rspec do |c|
c.syntax = :expect
end
+
+ # configure benchmark factors
+ RSpec::Benchmark.configure do |cfg|
+ # to avoid retention of allocated memory by the perf tests in the main process
+ cfg.run_in_subprocess = true
+ end
end
diff --git a/gems/gitlab-utils/Gemfile.lock b/gems/gitlab-utils/Gemfile.lock
index ef7c2d57c7a..76c6c1e2110 100644
--- a/gems/gitlab-utils/Gemfile.lock
+++ b/gems/gitlab-utils/Gemfile.lock
@@ -13,7 +13,6 @@ PATH
actionview (>= 6.1.7.2)
activesupport (>= 6.1.7.2)
addressable (~> 2.8)
- nokogiri (~> 1.15.2)
rake (~> 13.0)
GEM
@@ -77,7 +76,7 @@ GEM
method_source (1.0.0)
mini_portile2 (2.8.2)
minitest (5.18.1)
- nokogiri (1.15.2)
+ nokogiri (1.16.0)
mini_portile2 (~> 2.8.2)
racc (~> 1.4)
parallel (1.23.0)
diff --git a/gems/gitlab-utils/gitlab-utils.gemspec b/gems/gitlab-utils/gitlab-utils.gemspec
index d5f6deb7fe6..b66f467d0b6 100644
--- a/gems/gitlab-utils/gitlab-utils.gemspec
+++ b/gems/gitlab-utils/gitlab-utils.gemspec
@@ -21,7 +21,6 @@ Gem::Specification.new do |spec|
spec.add_runtime_dependency "actionview", ">= 6.1.7.2"
spec.add_runtime_dependency "activesupport", ">= 6.1.7.2"
spec.add_runtime_dependency "addressable", "~> 2.8"
- spec.add_runtime_dependency "nokogiri", "~> 1.15.2"
spec.add_runtime_dependency "rake", "~> 13.0"
spec.add_development_dependency "factory_bot_rails", "~> 6.2.0"
diff --git a/gems/gitlab-utils/lib/gitlab/utils.rb b/gems/gitlab-utils/lib/gitlab/utils.rb
index b7cb63210ee..d5a514bfae8 100644
--- a/gems/gitlab-utils/lib/gitlab/utils.rb
+++ b/gems/gitlab-utils/lib/gitlab/utils.rb
@@ -261,6 +261,8 @@ module Gitlab
end
end
+ # Use this method to set the `restrict_within_concurrent_ruby` to `true` for the block.
+ # `raise_if_concurrent_ruby!` will use this flag to raise an error if it's set to `true`.
def restrict_within_concurrent_ruby
previous = Thread.current[:restrict_within_concurrent_ruby]
Thread.current[:restrict_within_concurrent_ruby] = true
@@ -270,6 +272,18 @@ module Gitlab
Thread.current[:restrict_within_concurrent_ruby] = previous
end
+ # Use this method to disable the `restrict_within_concurrent_ruby` for the block.
+ # It is mainly used to prevent infinite loop when `ConcurrentRubyThreadIsUsedError` is rescued and sent to Sentry.
+ # More info: https://gitlab.com/gitlab-org/gitlab/-/issues/432145#note_1671305713
+ def allow_within_concurrent_ruby
+ previous = Thread.current[:restrict_within_concurrent_ruby]
+ Thread.current[:restrict_within_concurrent_ruby] = false
+
+ yield
+ ensure
+ Thread.current[:restrict_within_concurrent_ruby] = previous
+ end
+
# Running external methods can allocate I/O bound resources (like PostgreSQL connection or Gitaly)
# This is forbidden when running within a concurrent Ruby thread, for example `async` HTTP requests
# provided by the `gitlab-http` gem.
diff --git a/gems/gitlab-utils/spec/gitlab/utils_spec.rb b/gems/gitlab-utils/spec/gitlab/utils_spec.rb
index 69531225eef..02d288acedf 100644
--- a/gems/gitlab-utils/spec/gitlab/utils_spec.rb
+++ b/gems/gitlab-utils/spec/gitlab/utils_spec.rb
@@ -489,6 +489,26 @@ RSpec.describe Gitlab::Utils, feature_category: :shared do
end
end
+ describe '.allow_within_concurrent_ruby' do
+ it 'assigns restrict_within_concurrent_ruby false to the current thread and ensures it restores' do
+ expect(Thread.current[:restrict_within_concurrent_ruby]).to be_nil
+
+ described_class.allow_within_concurrent_ruby do
+ expect(Thread.current[:restrict_within_concurrent_ruby]).to be(false)
+ end
+
+ expect(Thread.current[:restrict_within_concurrent_ruby]).to be_nil
+ end
+
+ it 'overrides the value of restrict_within_concurrent_ruby' do
+ described_class.restrict_within_concurrent_ruby do
+ described_class.allow_within_concurrent_ruby do
+ expect(Thread.current[:restrict_within_concurrent_ruby]).to be(false)
+ end
+ end
+ end
+ end
+
describe '.raise_if_concurrent_ruby!' do
subject(:raise_if_concurrent_ruby!) { described_class.raise_if_concurrent_ruby!('test') }