Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Kozono <mkozono@gmail.com>2018-07-27 00:23:33 +0300
committerMichael Kozono <mkozono@gmail.com>2018-07-31 21:09:42 +0300
commit3cbd8b13436be25074db17dfbd555588e917279d (patch)
tree483da69a036ae655db97ac56b056e0b811f25b15 /lib/gitlab/cleanup
parent400925c480d95df735862d7edd58cc36cb8fbf68 (diff)
Add local project uploads cleanup task
Diffstat (limited to 'lib/gitlab/cleanup')
-rw-r--r--lib/gitlab/cleanup/project_upload_file_finder.rb66
-rw-r--r--lib/gitlab/cleanup/project_uploads.rb125
2 files changed, 191 insertions, 0 deletions
diff --git a/lib/gitlab/cleanup/project_upload_file_finder.rb b/lib/gitlab/cleanup/project_upload_file_finder.rb
new file mode 100644
index 00000000000..2ee8b60e76a
--- /dev/null
+++ b/lib/gitlab/cleanup/project_upload_file_finder.rb
@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Cleanup
+ class ProjectUploadFileFinder
+ FIND_BATCH_SIZE = 500
+ ABSOLUTE_UPLOAD_DIR = FileUploader.root.freeze
+ EXCLUDED_SYSTEM_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/-/*".freeze
+ EXCLUDED_HASHED_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/@hashed/*".freeze
+ EXCLUDED_TMP_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/tmp/*".freeze
+
+ # Paths are relative to the upload directory
+ def each_file_batch(batch_size: FIND_BATCH_SIZE, &block)
+ cmd = build_find_command(ABSOLUTE_UPLOAD_DIR)
+
+ Open3.popen2(*cmd) do |stdin, stdout, status_thread|
+ yield_paths_in_batches(stdout, batch_size, &block)
+
+ raise "Find command failed" unless status_thread.value.success?
+ end
+ end
+
+ private
+
+ def yield_paths_in_batches(stdout, batch_size, &block)
+ paths = []
+
+ stdout.each_line("\0") do |line|
+ paths << line.chomp("\0")
+
+ if paths.size >= batch_size
+ yield(paths)
+ paths = []
+ end
+ end
+
+ yield(paths) if paths.any?
+ end
+
+ def build_find_command(search_dir)
+ cmd = %W[find -L #{search_dir}
+ -type f
+ ! ( -path #{EXCLUDED_SYSTEM_UPLOADS_PATH} -prune )
+ ! ( -path #{EXCLUDED_HASHED_UPLOADS_PATH} -prune )
+ ! ( -path #{EXCLUDED_TMP_UPLOADS_PATH} -prune )
+ -print0]
+
+ ionice = which_ionice
+ cmd = %W[#{ionice} -c Idle] + cmd if ionice
+
+ log_msg = "find command: \"#{cmd.join(' ')}\""
+ Rails.logger.info log_msg
+
+ cmd
+ end
+
+ def which_ionice
+ Gitlab::Utils.which('ionice')
+ rescue StandardError
+ # In this case, returning false is relatively safe,
+ # even though it isn't very nice
+ false
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/cleanup/project_uploads.rb b/lib/gitlab/cleanup/project_uploads.rb
new file mode 100644
index 00000000000..b88e00311d5
--- /dev/null
+++ b/lib/gitlab/cleanup/project_uploads.rb
@@ -0,0 +1,125 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Cleanup
+ class ProjectUploads
+ LOST_AND_FOUND = File.join(ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR, '-', 'project-lost-found')
+
+ attr_reader :logger
+
+ def initialize(logger: nil)
+ @logger = logger || Rails.logger
+ end
+
+ def run!(dry_run: true)
+ logger.info "Looking for orphaned project uploads to clean up#{'. Dry run' if dry_run}..."
+
+ each_orphan_file do |path, upload_path|
+ result = cleanup(path, upload_path, dry_run)
+
+ logger.info result
+ end
+ end
+
+ private
+
+ def cleanup(path, upload_path, dry_run)
+ # This happened in staging:
+ # `find` returned a path on which `File.delete` raised `Errno::ENOENT`
+ return "Cannot find file: #{path}" unless File.exist?(path)
+
+ correct_path = upload_path && find_correct_path(upload_path)
+
+ if correct_path
+ move(path, correct_path, 'fix', dry_run)
+ else
+ move_to_lost_and_found(path, dry_run)
+ end
+ end
+
+ # Accepts a path in the form of "#{hex_secret}/#{filename}"
+ def find_correct_path(upload_path)
+ upload = Upload.find_by(uploader: 'FileUploader', path: upload_path)
+ return unless upload && upload.local?
+
+ upload.absolute_path
+ rescue => e
+ logger.error e.message
+
+ # absolute_path depends on a lot of code. If it doesn't work, then it
+ # it doesn't matter if the upload file is in the right place. Treat it
+ # as uncorrectable.
+ # I.e. the project record might be missing, which raises an exception.
+ nil
+ end
+
+ def move_to_lost_and_found(path, dry_run)
+ new_path = path.sub(/\A#{ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR}/, LOST_AND_FOUND)
+
+ move(path, new_path, 'move to lost and found', dry_run)
+ end
+
+ def move(path, new_path, prefix, dry_run)
+ action = "#{prefix} #{path} -> #{new_path}"
+
+ if dry_run
+ "Can #{action}"
+ else
+ begin
+ FileUtils.mkdir_p(File.dirname(new_path))
+ FileUtils.mv(path, new_path)
+
+ "Did #{action}"
+ rescue => e
+ "Error during #{action}: #{e.inspect}"
+ end
+ end
+ end
+
+ # Yields absolute paths of project upload files that are not in the
+ # uploads table
+ def each_orphan_file
+ ProjectUploadFileFinder.new.each_file_batch do |file_paths|
+ logger.debug "Processing batch of #{file_paths.size} project upload file paths, starting with #{file_paths.first}"
+
+ file_paths.each do |path|
+ pup = ProjectUploadPath.from_path(path)
+
+ yield(path, pup.upload_path) if pup.orphan?
+ end
+ end
+ end
+
+ class ProjectUploadPath
+ PROJECT_FULL_PATH_REGEX = %r{\A#{FileUploader.root}/(.+)/(\h+/[^/]+)\z}.freeze
+
+ attr_reader :full_path, :upload_path
+
+ def initialize(full_path, upload_path)
+ @full_path = full_path
+ @upload_path = upload_path
+ end
+
+ def self.from_path(path)
+ path_matched = path.match(PROJECT_FULL_PATH_REGEX)
+ return new(nil, nil) unless path_matched
+
+ new(path_matched[1], path_matched[2])
+ end
+
+ def orphan?
+ return true if full_path.nil? || upload_path.nil?
+
+ # It's possible to reduce to one query, but `where_full_path_in` is complex
+ !Upload.exists?(path: upload_path, model_id: project_id, model_type: 'Project', uploader: 'FileUploader')
+ end
+
+ private
+
+ def project_id
+ @project_id ||= Project.where_full_path_in([full_path]).pluck(:id)
+ end
+ end
+ end
+ end
+end