Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2024-01-10 06:12:01 +0300
committerGitLab Bot <gitlab-bot@gitlab.com>2024-01-10 06:12:01 +0300
commitfea86fb8bf2339727de5e91ccf17ab105e993dca (patch)
tree25ddd67b8131643fa648f052eb29d527d72bdda3 /lib
parentec4891efa777d951afdbff95557bbcf5fda00188 (diff)
Add latest changes from gitlab-org/gitlab@master
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/cleanup/orphan_job_artifact_final_objects/job_artifact_object.rb62
-rw-r--r--lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/aws.rb27
-rw-r--r--lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/base_paginator.rb49
-rw-r--r--lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/google.rb32
-rw-r--r--lib/gitlab/cleanup/orphan_job_artifact_final_objects_cleaner.rb161
-rw-r--r--lib/gitlab/security/features.rb56
-rw-r--r--lib/tasks/gitlab/cleanup.rake27
7 files changed, 385 insertions, 29 deletions
diff --git a/lib/gitlab/cleanup/orphan_job_artifact_final_objects/job_artifact_object.rb b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/job_artifact_object.rb
new file mode 100644
index 00000000000..61e7c6c43a6
--- /dev/null
+++ b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/job_artifact_object.rb
@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Cleanup
+ module OrphanJobArtifactFinalObjects
+ class JobArtifactObject
+ include Gitlab::Utils::StrongMemoize
+
+ attr_reader :path, :size
+
+ def initialize(fog_file, bucket_prefix: nil)
+ @fog_file = fog_file
+ @path = fog_file.key
+ @size = fog_file.content_length
+ @bucket_prefix = bucket_prefix
+ end
+
+ def in_final_location?
+ path.include?('/@final/')
+ end
+
+ def orphan?
+ !job_artifact_record_exists? && !pending_direct_upload?
+ end
+
+ def delete
+ fog_file.destroy
+ end
+
+ private
+
+ attr_reader :fog_file, :bucket_prefix
+
+ def job_artifact_record_exists?
+ ::Ci::JobArtifact.exists?(file_final_path: path_without_bucket_prefix) # rubocop:disable CodeReuse/ActiveRecord -- too simple and specific for this usecase to be its own AR method
+ end
+
+ def pending_direct_upload?
+ ::ObjectStorage::PendingDirectUpload.exists?(:artifacts, path_without_bucket_prefix) # rubocop:disable CodeReuse/ActiveRecord -- `exists?` here is not the same as the AR method
+ end
+
+ def path_without_bucket_prefix
+ # `path` contains the fog file's key. It is the object path relative to the artifacts bucket, for example:
+ # aa/bb/abc123/@final/12/34/def12345
+ #
+ # But if the instance is configured to only use a single bucket combined with bucket prefixes,
+ # for example if the `bucket_prefix` is "my/artifacts", the `path` would then look like:
+ # my/artifacts/aa/bb/abc123/@final/12/34/def12345
+ #
+ # For `orphan?` to function properly, we need to strip the bucket_prefix
+ # off of the `path` because we need this to match the correct job artifact record by
+ # its `file_final_path` column, or the pending direct upload redis entry, which both contains
+ # the object's path without `bucket_prefix`.
+ #
+ # If bucket_prefix is not present, this will just return the original path.
+ Pathname.new(path).relative_path_from(bucket_prefix.to_s).to_s
+ end
+ strong_memoize_attr :path_without_bucket_prefix
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/aws.rb b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/aws.rb
new file mode 100644
index 00000000000..7fedd8f4306
--- /dev/null
+++ b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/aws.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Cleanup
+ module OrphanJobArtifactFinalObjects
+ module Paginators
+ class Aws < BasePaginator
+ def page_marker_filter_key
+ :marker
+ end
+
+ def max_results_filter_key
+ :max_keys
+ end
+
+ def last_page?(batch)
+ batch.empty?
+ end
+
+ def get_next_marker(batch)
+ batch.last.key
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/base_paginator.rb b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/base_paginator.rb
new file mode 100644
index 00000000000..7bc7f9c2661
--- /dev/null
+++ b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/base_paginator.rb
@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Cleanup
+ module OrphanJobArtifactFinalObjects
+ module Paginators
+ class BasePaginator
+ BATCH_SIZE = Rails.env.development? ? 5 : 200
+
+ def initialize(bucket_prefix: nil)
+ @bucket_prefix = bucket_prefix
+ end
+
+ def filters(marker)
+ {
+ page_marker_filter_key => marker,
+ max_results_filter_key => BATCH_SIZE,
+ prefix: bucket_prefix
+ }
+ end
+
+ def last_page?(batch)
+ # Fog providers have different indicators of last page, so we want to delegate this
+ # knowledge to the specific provider implementation.
+ raise NotImplementedError, "Subclasses must define `last_page?(batch)` instance method"
+ end
+
+ def get_next_marker(batch)
+ # Fog providers have different ways to get the next marker, so we want to delegate this
+ # knowledge to the specific provider implementation.
+ raise NotImplementedError, "Subclasses must define `get_next_marker(batch)` instance method"
+ end
+
+ private
+
+ attr_reader :bucket_prefix
+
+ def page_marker_filter_key
+ raise NotImplementedError, "Subclasses must define `page_marker_key` instance method"
+ end
+
+ def max_results_filter_key
+ raise NotImplementedError, "Subclasses must define `max_results_filter_key` instance method"
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/google.rb b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/google.rb
new file mode 100644
index 00000000000..9b0da9910cd
--- /dev/null
+++ b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/google.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Cleanup
+ module OrphanJobArtifactFinalObjects
+ module Paginators
+ class Google < BasePaginator
+ def filters(marker)
+ pattern = [bucket_prefix, '*/*/*/@final/**'].compact.join('/')
+ super.merge(match_glob: pattern)
+ end
+
+ def page_marker_filter_key
+ :page_token
+ end
+
+ def max_results_filter_key
+ :max_results
+ end
+
+ def last_page?(batch)
+ batch.next_page_token.nil?
+ end
+
+ def get_next_marker(batch)
+ batch.next_page_token
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/cleanup/orphan_job_artifact_final_objects_cleaner.rb b/lib/gitlab/cleanup/orphan_job_artifact_final_objects_cleaner.rb
new file mode 100644
index 00000000000..4726d68e024
--- /dev/null
+++ b/lib/gitlab/cleanup/orphan_job_artifact_final_objects_cleaner.rb
@@ -0,0 +1,161 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Cleanup
+ class OrphanJobArtifactFinalObjectsCleaner
+ include Gitlab::Utils::StrongMemoize
+
+ UnsupportedProviderError = Class.new(StandardError)
+
+ PAGINATORS = {
+ google: Gitlab::Cleanup::OrphanJobArtifactFinalObjects::Paginators::Google,
+ aws: Gitlab::Cleanup::OrphanJobArtifactFinalObjects::Paginators::Aws
+ }.freeze
+
+ LAST_PAGE_MARKER_REDIS_KEY = 'orphan-job-artifact-objects-cleanup-last-page-marker'
+
+ def initialize(provider: nil, dry_run: true, force_restart: false, logger: Gitlab::AppLogger)
+ @paginator = determine_paginator!(provider)
+ @dry_run = dry_run
+ @force_restart = force_restart
+ @logger = logger
+ end
+
+ def run!
+ log_info('Looking for orphan job artifact objects under the `@final` directories')
+
+ each_final_object do |object|
+ next unless object.orphan?
+
+ object.delete unless dry_run
+ log_info("Delete #{object.path} (#{object.size} bytes)")
+ end
+
+ log_info("Done.")
+ end
+
+ private
+
+ attr_reader :paginator, :dry_run, :force_restart, :logger
+
+ def determine_paginator!(provided_provider)
+ # provider can be nil if user didn't specify it when running the clean up task.
+ # In this case, we automatically determine the provider based on the object storage configuration.
+ provider = provided_provider
+ provider ||= configuration.connection.provider
+ klass = PAGINATORS.fetch(provider.downcase.to_sym)
+ klass.new(bucket_prefix: bucket_prefix)
+ rescue KeyError
+ msg = if provided_provider.present?
+ "The provided provider is unsupported. Please select from #{PAGINATORS.keys.join(', ')}."
+ else
+ <<-MSG.strip_heredoc
+ The provider found in the object storage configuration is unsupported.
+ Please re-run the task and specify a provider from #{PAGINATORS.keys.join(', ')},
+ whichever is compatible with your provider's object storage API."
+ MSG
+ end
+
+ raise UnsupportedProviderError, msg
+ end
+
+ def each_final_object
+ each_batch do |files|
+ files.each_file_this_page do |fog_file|
+ object = ::Gitlab::Cleanup::OrphanJobArtifactFinalObjects::JobArtifactObject.new(
+ fog_file,
+ bucket_prefix: bucket_prefix
+ )
+
+ # We still need to check here if the object is in the final location because
+ # if the provider does not support filtering objects by glob pattern, we will
+ # then receive all job artifact objects here, even the ones not in the @final directory.
+ yield object if object.in_final_location?
+ end
+ end
+ end
+
+ def each_batch
+ next_marker = resume_from_last_page_marker
+
+ loop do
+ batch = fetch_batch(next_marker)
+ yield batch
+
+ break if paginator.last_page?(batch)
+
+ next_marker = paginator.get_next_marker(batch)
+ save_last_page_marker(next_marker)
+ end
+
+ clear_last_page_marker
+ end
+
+ def fetch_batch(marker)
+ page_name = marker ? "marker: #{marker}" : "first page"
+ log_info("Loading page (#{page_name})")
+
+ # We are using files.all instead of files.each because we want to track the
+ # current page token so that we can resume from it if ever the task is abruptly interrupted.
+ artifacts_directory.files.all(
+ paginator.filters(marker)
+ )
+ end
+
+ def resume_from_last_page_marker
+ if force_restart
+ log_info("Force restarted. Will not resume from last known page marker.")
+ nil
+ else
+ get_last_page_marker
+ end
+ end
+
+ def get_last_page_marker
+ Gitlab::Redis::SharedState.with do |redis|
+ marker = redis.get(LAST_PAGE_MARKER_REDIS_KEY)
+ log_info("Resuming from last page marker: #{marker}") if marker
+ marker
+ end
+ end
+
+ def save_last_page_marker(marker)
+ Gitlab::Redis::SharedState.with do |redis|
+ # Set TTL to 1 day (86400 seconds)
+ redis.set(LAST_PAGE_MARKER_REDIS_KEY, marker, ex: 86400)
+ end
+ end
+
+ def clear_last_page_marker
+ Gitlab::Redis::SharedState.with do |redis|
+ redis.del(LAST_PAGE_MARKER_REDIS_KEY)
+ end
+ end
+
+ def connection
+ ::Fog::Storage.new(configuration['connection'].symbolize_keys)
+ end
+
+ def configuration
+ Gitlab.config.artifacts.object_store
+ end
+
+ def bucket
+ configuration.remote_directory
+ end
+
+ def bucket_prefix
+ configuration.bucket_prefix
+ end
+
+ def artifacts_directory
+ connection.directories.new(key: bucket)
+ end
+ strong_memoize_attr :artifacts_directory
+
+ def log_info(msg)
+ logger.info("#{'[DRY RUN] ' if dry_run}#{msg}")
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/security/features.rb b/lib/gitlab/security/features.rb
index 5eb54ee567c..2176e588d77 100644
--- a/lib/gitlab/security/features.rb
+++ b/lib/gitlab/security/features.rb
@@ -12,16 +12,16 @@ module Gitlab
short_name: _('SAST'),
description: _('Analyze your source code for known vulnerabilities.'),
help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/sast/index'),
- config_help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/sast/index',
+ configuration_help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/sast/index',
anchor: 'configuration'),
type: 'sast'
},
sast_iac: {
name: _('Infrastructure as Code (IaC) Scanning'),
- short_name: _('ciReport|SAST IaC'),
+ short_name: s_('ciReport|SAST IaC'),
description: _('Analyze your infrastructure as code configuration files for known vulnerabilities.'),
help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/iac_scanning/index'),
- config_help_path: Gitlab::Routing.url_helpers.help_page_path(
+ configuration_help_path: Gitlab::Routing.url_helpers.help_page_path(
'user/application_security/iac_scanning/index',
anchor: 'configuration'),
type: 'sast_iac'
@@ -36,16 +36,16 @@ module Gitlab
secondary: {
type: 'dast_profiles',
name: _('DAST profiles'),
- description: _('SecurityConfiguration|Manage profiles for use by DAST scans.'),
- configuration_text: _('SecurityConfiguration|Manage profiles')
+ description: s_('SecurityConfiguration|Manage profiles for use by DAST scans.'),
+ configuration_text: s_('SecurityConfiguration|Manage profiles')
},
name: _('Dynamic Application Security Testing (DAST)'),
- short_name: _('ciReport|DAST'),
- description: _('ciReport|Analyze a deployed version of your web application for known vulnerabilities by ' \
- 'examining it from the outside in. DAST works by simulating external attacks ' \
- 'on your application while it is running.'),
+ short_name: s_('ciReport|DAST'),
+ description: s_('ciReport|Analyze a deployed version of your web application for known ' \
+ 'vulnerabilities by examining it from the outside in. DAST works ' \
+ 'by simulating external attacks on your application while it is running.'),
help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/dast/index'),
- config_help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/dast/index',
+ configuration_help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/dast/index',
anchor: 'enable-automatic-dast-run'),
type: 'dast',
anchor: 'dast'
@@ -55,7 +55,7 @@ module Gitlab
description: _('Analyze your dependencies for known vulnerabilities.'),
help_path: Gitlab::Routing.url_helpers.help_page_path(
'user/application_security/dependency_scanning/index'),
- config_help_path: Gitlab::Routing.url_helpers.help_page_path(
+ configuration_help_path: Gitlab::Routing.url_helpers.help_page_path(
'user/application_security/dependency_scanning/index', anchor: 'configuration'),
type: 'dependency_scanning',
anchor: 'dependency-scanning'
@@ -65,7 +65,7 @@ module Gitlab
description: _('Check your Docker images for known vulnerabilities.'),
help_path: Gitlab::Routing.url_helpers.help_page_path(
'user/application_security/container_scanning/index'),
- config_help_path: Gitlab::Routing.url_helpers.help_page_path(
+ configuration_help_path: Gitlab::Routing.url_helpers.help_page_path(
'user/application_security/container_scanning/index', anchor: 'configuration'),
type: 'container_scanning'
},
@@ -74,7 +74,7 @@ module Gitlab
description: _('Analyze your source code and git history for secrets.'),
help_path: Gitlab::Routing.url_helpers.help_page_path(
'user/application_security/secret_detection/index'),
- config_help_path: Gitlab::Routing.url_helpers.help_page_path(
+ configuration_help_path: Gitlab::Routing.url_helpers.help_page_path(
'user/application_security/secret_detection/index', anchor: 'configuration'),
type: 'secret_detection'
},
@@ -90,40 +90,40 @@ module Gitlab
description: _('Find bugs in your code with coverage-guided fuzzing.'),
help_path: Gitlab::Routing.url_helpers.help_page_path(
'user/application_security/coverage_fuzzing/index'),
- config_help_path: Gitlab::Routing.url_helpers.help_page_path(
+ configuration_help_path: Gitlab::Routing.url_helpers.help_page_path(
'user/application_security/coverage_fuzzing/index', anchor: 'enable-coverage-guided-fuzz-testing'),
type: 'coverage_fuzzing',
secondary: {
type: 'corpus_management',
name: _('Corpus Management'),
- description: _('SecurityConfiguration|Manage corpus files used as seed ' \
- 'inputs with coverage-guided fuzzing.'),
- configuration_text: _('SecurityConfiguration|Manage corpus')
+ description: s_('SecurityConfiguration|Manage corpus files used as seed ' \
+ 'inputs with coverage-guided fuzzing.'),
+ configuration_text: s_('SecurityConfiguration|Manage corpus')
}
},
breach_and_attack_simulation: {
anchor: 'bas',
badge: {
always_display: true,
- text: _('SecurityConfiguration|Incubating feature'),
- tooltip_text: _('SecurityConfiguration|Breach and Attack Simulation is an incubating ' \
- 'feature extending existing security testing by simulating adversary activity.'),
+ text: s_('SecurityConfiguration|Incubating feature'),
+ tooltip_text: s_('SecurityConfiguration|Breach and Attack Simulation is an incubating ' \
+ 'feature extending existing security testing by simulating adversary activity.'),
variant: 'info'
},
- description: _('SecurityConfiguration|Simulate breach and attack scenarios against your ' \
- 'running application by attempting to detect and exploit known vulnerabilities.'),
- name: _('SecurityConfiguration|Breach and Attack Simulation (BAS)'),
+ description: s_('SecurityConfiguration|Simulate breach and attack scenarios against your ' \
+ 'running application by attempting to detect and exploit known vulnerabilities.'),
+ name: s_('SecurityConfiguration|Breach and Attack Simulation (BAS)'),
help_path: Gitlab::Routing.url_helpers.help_page_path(
'user/application_security/breach_and_attack_simulation/index'),
secondary: {
- config_help_path: Gitlab::Routing.url_helpers.help_page_path(
+ configuration_help_path: Gitlab::Routing.url_helpers.help_page_path(
'user/application_security/breach_and_attack_simulation/index',
anchor: 'extend-dynamic-application-security-testing-dast'),
- description: _('SecurityConfiguration|Enable incubating Breach and Attack Simulation focused ' \
- 'features such as callback attacks in your DAST scans.'),
- name: _('SecurityConfiguration|Out-of-Band Application Security Testing (OAST)')
+ description: s_('SecurityConfiguration|Enable incubating Breach and Attack Simulation focused ' \
+ 'features such as callback attacks in your DAST scans.'),
+ name: s_('SecurityConfiguration|Out-of-Band Application Security Testing (OAST)')
},
- short_name: _('SecurityConfiguration|BAS'),
+ short_name: s_('SecurityConfiguration|BAS'),
type: 'breach_and_attack_simulation'
}
}.freeze
diff --git a/lib/tasks/gitlab/cleanup.rake b/lib/tasks/gitlab/cleanup.rake
index ecf5b10690a..c20190a2f64 100644
--- a/lib/tasks/gitlab/cleanup.rake
+++ b/lib/tasks/gitlab/cleanup.rake
@@ -51,7 +51,7 @@ namespace :gitlab do
end
end
- desc 'GitLab | Cleanup | Clean orphan job artifact files'
+ desc 'GitLab | Cleanup | Clean orphan job artifact files in local storage'
task orphan_job_artifact_files: :gitlab_environment do
warn_user_is_not_gitlab
@@ -63,6 +63,31 @@ namespace :gitlab do
end
end
+ desc 'GitLab | Cleanup | Clean orphan job artifact files stored in the @final directory in object storage'
+ task :orphan_job_artifact_final_objects, [:provider] => :gitlab_environment do |_, args|
+ warn_user_is_not_gitlab
+
+ force_restart = ENV['FORCE_RESTART'].present?
+
+ begin
+ cleaner = Gitlab::Cleanup::OrphanJobArtifactFinalObjectsCleaner.new(
+ provider: args.provider,
+ force_restart: force_restart,
+ dry_run: dry_run?,
+ logger: logger
+ )
+
+ cleaner.run!
+
+ if dry_run?
+ logger.info "To clean up all orphan files that were found, run this command with DRY_RUN=false".color(:yellow)
+ end
+ rescue Gitlab::Cleanup::OrphanJobArtifactFinalObjectsCleaner::UnsupportedProviderError => e
+ abort %(#{e.message}
+Usage: rake "gitlab:cleanup:orphan_job_artifact_final_objects[provider]")
+ end
+ end
+
desc 'GitLab | Cleanup | Clean orphan LFS file references'
task orphan_lfs_file_references: :gitlab_environment do
warn_user_is_not_gitlab