diff options
author | GitLab Bot <gitlab-bot@gitlab.com> | 2024-01-10 06:12:01 +0300 |
---|---|---|
committer | GitLab Bot <gitlab-bot@gitlab.com> | 2024-01-10 06:12:01 +0300 |
commit | fea86fb8bf2339727de5e91ccf17ab105e993dca (patch) | |
tree | 25ddd67b8131643fa648f052eb29d527d72bdda3 /lib | |
parent | ec4891efa777d951afdbff95557bbcf5fda00188 (diff) |
Add latest changes from gitlab-org/gitlab@master
Diffstat (limited to 'lib')
7 files changed, 385 insertions, 29 deletions
diff --git a/lib/gitlab/cleanup/orphan_job_artifact_final_objects/job_artifact_object.rb b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/job_artifact_object.rb new file mode 100644 index 00000000000..61e7c6c43a6 --- /dev/null +++ b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/job_artifact_object.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +module Gitlab + module Cleanup + module OrphanJobArtifactFinalObjects + class JobArtifactObject + include Gitlab::Utils::StrongMemoize + + attr_reader :path, :size + + def initialize(fog_file, bucket_prefix: nil) + @fog_file = fog_file + @path = fog_file.key + @size = fog_file.content_length + @bucket_prefix = bucket_prefix + end + + def in_final_location? + path.include?('/@final/') + end + + def orphan? + !job_artifact_record_exists? && !pending_direct_upload? + end + + def delete + fog_file.destroy + end + + private + + attr_reader :fog_file, :bucket_prefix + + def job_artifact_record_exists? + ::Ci::JobArtifact.exists?(file_final_path: path_without_bucket_prefix) # rubocop:disable CodeReuse/ActiveRecord -- too simple and specific for this usecase to be its own AR method + end + + def pending_direct_upload? + ::ObjectStorage::PendingDirectUpload.exists?(:artifacts, path_without_bucket_prefix) # rubocop:disable CodeReuse/ActiveRecord -- `exists?` here is not the same as the AR method + end + + def path_without_bucket_prefix + # `path` contains the fog file's key. It is the object path relative to the artifacts bucket, for example: + # aa/bb/abc123/@final/12/34/def12345 + # + # But if the instance is configured to only use a single bucket combined with bucket prefixes, + # for example if the `bucket_prefix` is "my/artifacts", the `path` would then look like: + # my/artifacts/aa/bb/abc123/@final/12/34/def12345 + # + # For `orphan?` to function properly, we need to strip the bucket_prefix + # off of the `path` because we need this to match the correct job artifact record by + # its `file_final_path` column, or the pending direct upload redis entry, which both contains + # the object's path without `bucket_prefix`. + # + # If bucket_prefix is not present, this will just return the original path. + Pathname.new(path).relative_path_from(bucket_prefix.to_s).to_s + end + strong_memoize_attr :path_without_bucket_prefix + end + end + end +end diff --git a/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/aws.rb b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/aws.rb new file mode 100644 index 00000000000..7fedd8f4306 --- /dev/null +++ b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/aws.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module Gitlab + module Cleanup + module OrphanJobArtifactFinalObjects + module Paginators + class Aws < BasePaginator + def page_marker_filter_key + :marker + end + + def max_results_filter_key + :max_keys + end + + def last_page?(batch) + batch.empty? + end + + def get_next_marker(batch) + batch.last.key + end + end + end + end + end +end diff --git a/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/base_paginator.rb b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/base_paginator.rb new file mode 100644 index 00000000000..7bc7f9c2661 --- /dev/null +++ b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/base_paginator.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +module Gitlab + module Cleanup + module OrphanJobArtifactFinalObjects + module Paginators + class BasePaginator + BATCH_SIZE = Rails.env.development? ? 5 : 200 + + def initialize(bucket_prefix: nil) + @bucket_prefix = bucket_prefix + end + + def filters(marker) + { + page_marker_filter_key => marker, + max_results_filter_key => BATCH_SIZE, + prefix: bucket_prefix + } + end + + def last_page?(batch) + # Fog providers have different indicators of last page, so we want to delegate this + # knowledge to the specific provider implementation. + raise NotImplementedError, "Subclasses must define `last_page?(batch)` instance method" + end + + def get_next_marker(batch) + # Fog providers have different ways to get the next marker, so we want to delegate this + # knowledge to the specific provider implementation. + raise NotImplementedError, "Subclasses must define `get_next_marker(batch)` instance method" + end + + private + + attr_reader :bucket_prefix + + def page_marker_filter_key + raise NotImplementedError, "Subclasses must define `page_marker_key` instance method" + end + + def max_results_filter_key + raise NotImplementedError, "Subclasses must define `max_results_filter_key` instance method" + end + end + end + end + end +end diff --git a/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/google.rb b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/google.rb new file mode 100644 index 00000000000..9b0da9910cd --- /dev/null +++ b/lib/gitlab/cleanup/orphan_job_artifact_final_objects/paginators/google.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Gitlab + module Cleanup + module OrphanJobArtifactFinalObjects + module Paginators + class Google < BasePaginator + def filters(marker) + pattern = [bucket_prefix, '*/*/*/@final/**'].compact.join('/') + super.merge(match_glob: pattern) + end + + def page_marker_filter_key + :page_token + end + + def max_results_filter_key + :max_results + end + + def last_page?(batch) + batch.next_page_token.nil? + end + + def get_next_marker(batch) + batch.next_page_token + end + end + end + end + end +end diff --git a/lib/gitlab/cleanup/orphan_job_artifact_final_objects_cleaner.rb b/lib/gitlab/cleanup/orphan_job_artifact_final_objects_cleaner.rb new file mode 100644 index 00000000000..4726d68e024 --- /dev/null +++ b/lib/gitlab/cleanup/orphan_job_artifact_final_objects_cleaner.rb @@ -0,0 +1,161 @@ +# frozen_string_literal: true + +module Gitlab + module Cleanup + class OrphanJobArtifactFinalObjectsCleaner + include Gitlab::Utils::StrongMemoize + + UnsupportedProviderError = Class.new(StandardError) + + PAGINATORS = { + google: Gitlab::Cleanup::OrphanJobArtifactFinalObjects::Paginators::Google, + aws: Gitlab::Cleanup::OrphanJobArtifactFinalObjects::Paginators::Aws + }.freeze + + LAST_PAGE_MARKER_REDIS_KEY = 'orphan-job-artifact-objects-cleanup-last-page-marker' + + def initialize(provider: nil, dry_run: true, force_restart: false, logger: Gitlab::AppLogger) + @paginator = determine_paginator!(provider) + @dry_run = dry_run + @force_restart = force_restart + @logger = logger + end + + def run! + log_info('Looking for orphan job artifact objects under the `@final` directories') + + each_final_object do |object| + next unless object.orphan? + + object.delete unless dry_run + log_info("Delete #{object.path} (#{object.size} bytes)") + end + + log_info("Done.") + end + + private + + attr_reader :paginator, :dry_run, :force_restart, :logger + + def determine_paginator!(provided_provider) + # provider can be nil if user didn't specify it when running the clean up task. + # In this case, we automatically determine the provider based on the object storage configuration. + provider = provided_provider + provider ||= configuration.connection.provider + klass = PAGINATORS.fetch(provider.downcase.to_sym) + klass.new(bucket_prefix: bucket_prefix) + rescue KeyError + msg = if provided_provider.present? + "The provided provider is unsupported. Please select from #{PAGINATORS.keys.join(', ')}." + else + <<-MSG.strip_heredoc + The provider found in the object storage configuration is unsupported. + Please re-run the task and specify a provider from #{PAGINATORS.keys.join(', ')}, + whichever is compatible with your provider's object storage API." + MSG + end + + raise UnsupportedProviderError, msg + end + + def each_final_object + each_batch do |files| + files.each_file_this_page do |fog_file| + object = ::Gitlab::Cleanup::OrphanJobArtifactFinalObjects::JobArtifactObject.new( + fog_file, + bucket_prefix: bucket_prefix + ) + + # We still need to check here if the object is in the final location because + # if the provider does not support filtering objects by glob pattern, we will + # then receive all job artifact objects here, even the ones not in the @final directory. + yield object if object.in_final_location? + end + end + end + + def each_batch + next_marker = resume_from_last_page_marker + + loop do + batch = fetch_batch(next_marker) + yield batch + + break if paginator.last_page?(batch) + + next_marker = paginator.get_next_marker(batch) + save_last_page_marker(next_marker) + end + + clear_last_page_marker + end + + def fetch_batch(marker) + page_name = marker ? "marker: #{marker}" : "first page" + log_info("Loading page (#{page_name})") + + # We are using files.all instead of files.each because we want to track the + # current page token so that we can resume from it if ever the task is abruptly interrupted. + artifacts_directory.files.all( + paginator.filters(marker) + ) + end + + def resume_from_last_page_marker + if force_restart + log_info("Force restarted. Will not resume from last known page marker.") + nil + else + get_last_page_marker + end + end + + def get_last_page_marker + Gitlab::Redis::SharedState.with do |redis| + marker = redis.get(LAST_PAGE_MARKER_REDIS_KEY) + log_info("Resuming from last page marker: #{marker}") if marker + marker + end + end + + def save_last_page_marker(marker) + Gitlab::Redis::SharedState.with do |redis| + # Set TTL to 1 day (86400 seconds) + redis.set(LAST_PAGE_MARKER_REDIS_KEY, marker, ex: 86400) + end + end + + def clear_last_page_marker + Gitlab::Redis::SharedState.with do |redis| + redis.del(LAST_PAGE_MARKER_REDIS_KEY) + end + end + + def connection + ::Fog::Storage.new(configuration['connection'].symbolize_keys) + end + + def configuration + Gitlab.config.artifacts.object_store + end + + def bucket + configuration.remote_directory + end + + def bucket_prefix + configuration.bucket_prefix + end + + def artifacts_directory + connection.directories.new(key: bucket) + end + strong_memoize_attr :artifacts_directory + + def log_info(msg) + logger.info("#{'[DRY RUN] ' if dry_run}#{msg}") + end + end + end +end diff --git a/lib/gitlab/security/features.rb b/lib/gitlab/security/features.rb index 5eb54ee567c..2176e588d77 100644 --- a/lib/gitlab/security/features.rb +++ b/lib/gitlab/security/features.rb @@ -12,16 +12,16 @@ module Gitlab short_name: _('SAST'), description: _('Analyze your source code for known vulnerabilities.'), help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/sast/index'), - config_help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/sast/index', + configuration_help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/sast/index', anchor: 'configuration'), type: 'sast' }, sast_iac: { name: _('Infrastructure as Code (IaC) Scanning'), - short_name: _('ciReport|SAST IaC'), + short_name: s_('ciReport|SAST IaC'), description: _('Analyze your infrastructure as code configuration files for known vulnerabilities.'), help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/iac_scanning/index'), - config_help_path: Gitlab::Routing.url_helpers.help_page_path( + configuration_help_path: Gitlab::Routing.url_helpers.help_page_path( 'user/application_security/iac_scanning/index', anchor: 'configuration'), type: 'sast_iac' @@ -36,16 +36,16 @@ module Gitlab secondary: { type: 'dast_profiles', name: _('DAST profiles'), - description: _('SecurityConfiguration|Manage profiles for use by DAST scans.'), - configuration_text: _('SecurityConfiguration|Manage profiles') + description: s_('SecurityConfiguration|Manage profiles for use by DAST scans.'), + configuration_text: s_('SecurityConfiguration|Manage profiles') }, name: _('Dynamic Application Security Testing (DAST)'), - short_name: _('ciReport|DAST'), - description: _('ciReport|Analyze a deployed version of your web application for known vulnerabilities by ' \ - 'examining it from the outside in. DAST works by simulating external attacks ' \ - 'on your application while it is running.'), + short_name: s_('ciReport|DAST'), + description: s_('ciReport|Analyze a deployed version of your web application for known ' \ + 'vulnerabilities by examining it from the outside in. DAST works ' \ + 'by simulating external attacks on your application while it is running.'), help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/dast/index'), - config_help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/dast/index', + configuration_help_path: Gitlab::Routing.url_helpers.help_page_path('user/application_security/dast/index', anchor: 'enable-automatic-dast-run'), type: 'dast', anchor: 'dast' @@ -55,7 +55,7 @@ module Gitlab description: _('Analyze your dependencies for known vulnerabilities.'), help_path: Gitlab::Routing.url_helpers.help_page_path( 'user/application_security/dependency_scanning/index'), - config_help_path: Gitlab::Routing.url_helpers.help_page_path( + configuration_help_path: Gitlab::Routing.url_helpers.help_page_path( 'user/application_security/dependency_scanning/index', anchor: 'configuration'), type: 'dependency_scanning', anchor: 'dependency-scanning' @@ -65,7 +65,7 @@ module Gitlab description: _('Check your Docker images for known vulnerabilities.'), help_path: Gitlab::Routing.url_helpers.help_page_path( 'user/application_security/container_scanning/index'), - config_help_path: Gitlab::Routing.url_helpers.help_page_path( + configuration_help_path: Gitlab::Routing.url_helpers.help_page_path( 'user/application_security/container_scanning/index', anchor: 'configuration'), type: 'container_scanning' }, @@ -74,7 +74,7 @@ module Gitlab description: _('Analyze your source code and git history for secrets.'), help_path: Gitlab::Routing.url_helpers.help_page_path( 'user/application_security/secret_detection/index'), - config_help_path: Gitlab::Routing.url_helpers.help_page_path( + configuration_help_path: Gitlab::Routing.url_helpers.help_page_path( 'user/application_security/secret_detection/index', anchor: 'configuration'), type: 'secret_detection' }, @@ -90,40 +90,40 @@ module Gitlab description: _('Find bugs in your code with coverage-guided fuzzing.'), help_path: Gitlab::Routing.url_helpers.help_page_path( 'user/application_security/coverage_fuzzing/index'), - config_help_path: Gitlab::Routing.url_helpers.help_page_path( + configuration_help_path: Gitlab::Routing.url_helpers.help_page_path( 'user/application_security/coverage_fuzzing/index', anchor: 'enable-coverage-guided-fuzz-testing'), type: 'coverage_fuzzing', secondary: { type: 'corpus_management', name: _('Corpus Management'), - description: _('SecurityConfiguration|Manage corpus files used as seed ' \ - 'inputs with coverage-guided fuzzing.'), - configuration_text: _('SecurityConfiguration|Manage corpus') + description: s_('SecurityConfiguration|Manage corpus files used as seed ' \ + 'inputs with coverage-guided fuzzing.'), + configuration_text: s_('SecurityConfiguration|Manage corpus') } }, breach_and_attack_simulation: { anchor: 'bas', badge: { always_display: true, - text: _('SecurityConfiguration|Incubating feature'), - tooltip_text: _('SecurityConfiguration|Breach and Attack Simulation is an incubating ' \ - 'feature extending existing security testing by simulating adversary activity.'), + text: s_('SecurityConfiguration|Incubating feature'), + tooltip_text: s_('SecurityConfiguration|Breach and Attack Simulation is an incubating ' \ + 'feature extending existing security testing by simulating adversary activity.'), variant: 'info' }, - description: _('SecurityConfiguration|Simulate breach and attack scenarios against your ' \ - 'running application by attempting to detect and exploit known vulnerabilities.'), - name: _('SecurityConfiguration|Breach and Attack Simulation (BAS)'), + description: s_('SecurityConfiguration|Simulate breach and attack scenarios against your ' \ + 'running application by attempting to detect and exploit known vulnerabilities.'), + name: s_('SecurityConfiguration|Breach and Attack Simulation (BAS)'), help_path: Gitlab::Routing.url_helpers.help_page_path( 'user/application_security/breach_and_attack_simulation/index'), secondary: { - config_help_path: Gitlab::Routing.url_helpers.help_page_path( + configuration_help_path: Gitlab::Routing.url_helpers.help_page_path( 'user/application_security/breach_and_attack_simulation/index', anchor: 'extend-dynamic-application-security-testing-dast'), - description: _('SecurityConfiguration|Enable incubating Breach and Attack Simulation focused ' \ - 'features such as callback attacks in your DAST scans.'), - name: _('SecurityConfiguration|Out-of-Band Application Security Testing (OAST)') + description: s_('SecurityConfiguration|Enable incubating Breach and Attack Simulation focused ' \ + 'features such as callback attacks in your DAST scans.'), + name: s_('SecurityConfiguration|Out-of-Band Application Security Testing (OAST)') }, - short_name: _('SecurityConfiguration|BAS'), + short_name: s_('SecurityConfiguration|BAS'), type: 'breach_and_attack_simulation' } }.freeze diff --git a/lib/tasks/gitlab/cleanup.rake b/lib/tasks/gitlab/cleanup.rake index ecf5b10690a..c20190a2f64 100644 --- a/lib/tasks/gitlab/cleanup.rake +++ b/lib/tasks/gitlab/cleanup.rake @@ -51,7 +51,7 @@ namespace :gitlab do end end - desc 'GitLab | Cleanup | Clean orphan job artifact files' + desc 'GitLab | Cleanup | Clean orphan job artifact files in local storage' task orphan_job_artifact_files: :gitlab_environment do warn_user_is_not_gitlab @@ -63,6 +63,31 @@ namespace :gitlab do end end + desc 'GitLab | Cleanup | Clean orphan job artifact files stored in the @final directory in object storage' + task :orphan_job_artifact_final_objects, [:provider] => :gitlab_environment do |_, args| + warn_user_is_not_gitlab + + force_restart = ENV['FORCE_RESTART'].present? + + begin + cleaner = Gitlab::Cleanup::OrphanJobArtifactFinalObjectsCleaner.new( + provider: args.provider, + force_restart: force_restart, + dry_run: dry_run?, + logger: logger + ) + + cleaner.run! + + if dry_run? + logger.info "To clean up all orphan files that were found, run this command with DRY_RUN=false".color(:yellow) + end + rescue Gitlab::Cleanup::OrphanJobArtifactFinalObjectsCleaner::UnsupportedProviderError => e + abort %(#{e.message} +Usage: rake "gitlab:cleanup:orphan_job_artifact_final_objects[provider]") + end + end + desc 'GitLab | Cleanup | Clean orphan LFS file references' task orphan_lfs_file_references: :gitlab_environment do warn_user_is_not_gitlab |