Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorToon Claes <toon@gitlab.com>2019-06-14 00:07:59 +0300
committerToon Claes <toon@gitlab.com>2019-06-28 11:02:18 +0300
commitdabd91b2c8a42ac0d0c357190002a5a4b96a57a6 (patch)
tree6010ff883161d0df297005e9fc1c66ce845bf482 /lib/gitlab/cleanup
parent8df6508ca35f211b8ded2291785751bdbc4a8c1a (diff)
Add rake task to clean orphan artifact files
This adds the rake task rake gitlab:cleanup:orphan_job_artifact_files. This rake task cleans all orphan job artifact files it can find on disk. It performs a search on the complete folder of all artifacts on disk. Then it filters out all the job artifact ID for which it could not find a record with matching ID in the database. For these, the file is deleted from disk.
Diffstat (limited to 'lib/gitlab/cleanup')
-rw-r--r--lib/gitlab/cleanup/orphan_job_artifact_files.rb132
-rw-r--r--lib/gitlab/cleanup/orphan_job_artifact_files_batch.rb80
2 files changed, 212 insertions, 0 deletions
diff --git a/lib/gitlab/cleanup/orphan_job_artifact_files.rb b/lib/gitlab/cleanup/orphan_job_artifact_files.rb
new file mode 100644
index 00000000000..ee7164b3e55
--- /dev/null
+++ b/lib/gitlab/cleanup/orphan_job_artifact_files.rb
@@ -0,0 +1,132 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Cleanup
+ class OrphanJobArtifactFiles
+ include Gitlab::Utils::StrongMemoize
+
+ ABSOLUTE_ARTIFACT_DIR = ::JobArtifactUploader.root.freeze
+ LOST_AND_FOUND = File.join(ABSOLUTE_ARTIFACT_DIR, '-', 'lost+found').freeze
+ BATCH_SIZE = 500
+ DEFAULT_NICENESS = 'Best-effort'
+
+ attr_accessor :batch, :total_found, :total_cleaned
+ attr_reader :limit, :dry_run, :niceness, :logger
+
+ def initialize(limit: nil, dry_run: true, niceness: nil, logger: nil)
+ @limit = limit
+ @dry_run = dry_run
+ @niceness = niceness || DEFAULT_NICENESS
+ @logger = logger || Rails.logger
+ @total_found = @total_cleaned = 0
+
+ new_batch!
+ end
+
+ def run!
+ log_info('Looking for orphan job artifacts to clean up')
+
+ find_artifacts do |artifact_file|
+ batch << artifact_file
+
+ clean_batch! if batch.full?
+ break if limit_reached?
+ end
+
+ clean_batch!
+
+ log_info("Processed #{total_found} job artifacts to find and clean #{total_cleaned} orphans.")
+ end
+
+ private
+
+ def new_batch!
+ self.batch = ::Gitlab::Cleanup::OrphanJobArtifactFilesBatch
+ .new(batch_size: batch_size, logger: logger, dry_run: dry_run)
+ end
+
+ def clean_batch!
+ batch.clean!
+
+ update_stats!(batch)
+
+ new_batch!
+ end
+
+ def update_stats!(batch)
+ self.total_found += batch.artifact_files.count
+ self.total_cleaned += batch.lost_and_found.count
+ end
+
+ def limit_reached?
+ return false unless limit
+
+ total_cleaned >= limit
+ end
+
+ def batch_size
+ return BATCH_SIZE unless limit
+ return if limit_reached?
+
+ todo = limit - total_cleaned
+ [BATCH_SIZE, todo].min
+ end
+
+ def find_artifacts
+ Open3.popen3(*find_command) do |stdin, stdout, stderr, status_thread|
+ stdout.each_line do |line|
+ yield line
+ end
+
+ log_error(stderr.read.color(:red)) unless status_thread.value.success?
+ end
+ end
+
+ def find_command
+ strong_memoize(:find_command) do
+ cmd = %W[find -L #{absolute_artifact_dir}]
+
+ # Search for Job Artifact IDs, they are found 6 directory
+ # levels deep. For example:
+ # shared/artifacts/2c/62/2c...a3/2019_02_27/836/628/job.log
+ # 1 2 3 4 5 6
+ # | | | ^- date | ^- Job Artifact ID
+ # | | | ^- Job ID
+ # ^--+--+- components of hashed storage project path
+ cmd += %w[-mindepth 6 -maxdepth 6]
+
+ # Artifact directories are named on their ID
+ cmd += %w[-type d]
+
+ if ionice
+ raise ArgumentError, 'Invalid niceness' unless niceness.match?(/^\w[\w\-]*$/)
+
+ cmd.unshift(*%W[#{ionice} --class #{niceness}])
+ end
+
+ log_info("find command: '#{cmd.join(' ')}'")
+
+ cmd
+ end
+ end
+
+ def absolute_artifact_dir
+ File.absolute_path(ABSOLUTE_ARTIFACT_DIR)
+ end
+
+ def ionice
+ strong_memoize(:ionice) do
+ Gitlab::Utils.which('ionice')
+ end
+ end
+
+ def log_info(msg, params = {})
+ logger.info("#{'[DRY RUN]' if dry_run} #{msg}")
+ end
+
+ def log_error(msg, params = {})
+ logger.error(msg)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/cleanup/orphan_job_artifact_files_batch.rb b/lib/gitlab/cleanup/orphan_job_artifact_files_batch.rb
new file mode 100644
index 00000000000..5c30258c0fc
--- /dev/null
+++ b/lib/gitlab/cleanup/orphan_job_artifact_files_batch.rb
@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Cleanup
+ class OrphanJobArtifactFilesBatch
+ BatchFull = Class.new(StandardError)
+
+ class ArtifactFile
+ attr_accessor :path
+
+ def initialize(path)
+ @path = path
+ end
+
+ def artifact_id
+ path.split('/').last.to_i
+ end
+ end
+
+ include Gitlab::Utils::StrongMemoize
+
+ attr_reader :batch_size, :dry_run
+ attr_accessor :artifact_files
+
+ def initialize(batch_size:, dry_run: true, logger: Rails.logger)
+ @batch_size = batch_size
+ @dry_run = dry_run
+ @logger = logger
+ @artifact_files = []
+ end
+
+ def clean!
+ return if artifact_files.empty?
+
+ lost_and_found.each do |artifact|
+ clean_one!(artifact)
+ end
+ end
+
+ def full?
+ artifact_files.count >= batch_size
+ end
+
+ def <<(artifact_path)
+ raise BatchFull, "Batch full! Already contains #{artifact_files.count} artifacts" if full?
+
+ artifact_files << ArtifactFile.new(artifact_path)
+ end
+
+ def lost_and_found
+ strong_memoize(:lost_and_found) do
+ artifact_file_ids = artifact_files.map(&:artifact_id)
+ existing_artifact_ids = ::Ci::JobArtifact.id_in(artifact_file_ids).pluck_primary_key
+
+ artifact_files.reject { |artifact| existing_artifact_ids.include?(artifact.artifact_id) }
+ end
+ end
+
+ private
+
+ def clean_one!(artifact_file)
+ log_debug("Found orphan job artifact file @ #{artifact_file.path}")
+
+ remove_file!(artifact_file) unless dry_run
+ end
+
+ def remove_file!(artifact_file)
+ FileUtils.rm_rf(artifact_file.path)
+ end
+
+ def log_info(msg, params = {})
+ @logger.info("#{'[DRY RUN]' if dry_run} #{msg}")
+ end
+
+ def log_debug(msg, params = {})
+ @logger.debug(msg)
+ end
+ end
+ end
+end