Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorSean McGivern <sean@mcgivern.me.uk>2018-03-05 14:16:26 +0300
committerSean McGivern <sean@mcgivern.me.uk>2018-03-05 14:16:26 +0300
commitb7cacaaf4fedd3d9b3b19ea3f1fe3eb585112b88 (patch)
tree4f2e1e6405eb72b46cfbcbb1d6e9d06f57269220 /lib
parentd50caa64f8701bdd0520eee5f9e8ad6755e2601b (diff)
parent6f945f20b4c3683bc862ebc476bad9331d72784e (diff)
Merge branch 'ee-4862-verify-file-checksums' into 'master'
Foreground verification of uploads and LFS objects See merge request gitlab-org/gitlab-ce!17402
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/verify/batch_verifier.rb64
-rw-r--r--lib/gitlab/verify/lfs_objects.rb27
-rw-r--r--lib/gitlab/verify/rake_task.rb53
-rw-r--r--lib/gitlab/verify/uploads.rb27
-rw-r--r--lib/tasks/gitlab/lfs/check.rake8
-rw-r--r--lib/tasks/gitlab/uploads.rake44
-rw-r--r--lib/tasks/gitlab/uploads/check.rake8
7 files changed, 187 insertions, 44 deletions
diff --git a/lib/gitlab/verify/batch_verifier.rb b/lib/gitlab/verify/batch_verifier.rb
new file mode 100644
index 00000000000..1ef369a4b67
--- /dev/null
+++ b/lib/gitlab/verify/batch_verifier.rb
@@ -0,0 +1,64 @@
+module Gitlab
+ module Verify
+ class BatchVerifier
+ attr_reader :batch_size, :start, :finish
+
+ def initialize(batch_size:, start: nil, finish: nil)
+ @batch_size = batch_size
+ @start = start
+ @finish = finish
+ end
+
+ # Yields a Range of IDs and a Hash of failed verifications (object => error)
+ def run_batches(&blk)
+ relation.in_batches(of: batch_size, start: start, finish: finish) do |relation| # rubocop: disable Cop/InBatches
+ range = relation.first.id..relation.last.id
+ failures = run_batch(relation)
+
+ yield(range, failures)
+ end
+ end
+
+ def name
+ raise NotImplementedError.new
+ end
+
+ def describe(_object)
+ raise NotImplementedError.new
+ end
+
+ private
+
+ def run_batch(relation)
+ relation.map { |upload| verify(upload) }.compact.to_h
+ end
+
+ def verify(object)
+ expected = expected_checksum(object)
+ actual = actual_checksum(object)
+
+ raise 'Checksum missing' unless expected.present?
+ raise 'Checksum mismatch' unless expected == actual
+
+ nil
+ rescue => err
+ [object, err]
+ end
+
+ # This should return an ActiveRecord::Relation suitable for calling #in_batches on
+ def relation
+ raise NotImplementedError.new
+ end
+
+ # The checksum we expect the object to have
+ def expected_checksum(_object)
+ raise NotImplementedError.new
+ end
+
+ # The freshly-recalculated checksum of the object
+ def actual_checksum(_object)
+ raise NotImplementedError.new
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/verify/lfs_objects.rb b/lib/gitlab/verify/lfs_objects.rb
new file mode 100644
index 00000000000..fe51edbdeeb
--- /dev/null
+++ b/lib/gitlab/verify/lfs_objects.rb
@@ -0,0 +1,27 @@
+module Gitlab
+ module Verify
+ class LfsObjects < BatchVerifier
+ def name
+ 'LFS objects'
+ end
+
+ def describe(object)
+ "LFS object: #{object.oid}"
+ end
+
+ private
+
+ def relation
+ LfsObject.all
+ end
+
+ def expected_checksum(lfs_object)
+ lfs_object.oid
+ end
+
+ def actual_checksum(lfs_object)
+ LfsObject.calculate_oid(lfs_object.file.path)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/verify/rake_task.rb b/lib/gitlab/verify/rake_task.rb
new file mode 100644
index 00000000000..dd138e6b92b
--- /dev/null
+++ b/lib/gitlab/verify/rake_task.rb
@@ -0,0 +1,53 @@
+module Gitlab
+ module Verify
+ class RakeTask
+ def self.run!(verify_kls)
+ verifier = verify_kls.new(
+ batch_size: ENV.fetch('BATCH', 200).to_i,
+ start: ENV['ID_FROM'],
+ finish: ENV['ID_TO']
+ )
+
+ verbose = Gitlab::Utils.to_boolean(ENV['VERBOSE'])
+
+ new(verifier, verbose).run!
+ end
+
+ attr_reader :verifier, :output
+
+ def initialize(verifier, verbose)
+ @verifier = verifier
+ @verbose = verbose
+ end
+
+ def run!
+ say "Checking integrity of #{verifier.name}"
+
+ verifier.run_batches { |*args| run_batch(*args) }
+
+ say 'Done!'
+ end
+
+ def verbose?
+ !!@verbose
+ end
+
+ private
+
+ def say(text)
+ puts(text) # rubocop:disable Rails/Output
+ end
+
+ def run_batch(range, failures)
+ status_color = failures.empty? ? :green : :red
+ say "- #{range}: Failures: #{failures.count}".color(status_color)
+
+ return unless verbose?
+
+ failures.each do |object, error|
+ say " - #{verifier.describe(object)}: #{error.inspect}".color(:red)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/verify/uploads.rb b/lib/gitlab/verify/uploads.rb
new file mode 100644
index 00000000000..6972e517ea5
--- /dev/null
+++ b/lib/gitlab/verify/uploads.rb
@@ -0,0 +1,27 @@
+module Gitlab
+ module Verify
+ class Uploads < BatchVerifier
+ def name
+ 'Uploads'
+ end
+
+ def describe(object)
+ "Upload: #{object.id}"
+ end
+
+ private
+
+ def relation
+ Upload.all
+ end
+
+ def expected_checksum(upload)
+ upload.checksum
+ end
+
+ def actual_checksum(upload)
+ Upload.hexdigest(upload.absolute_path)
+ end
+ end
+ end
+end
diff --git a/lib/tasks/gitlab/lfs/check.rake b/lib/tasks/gitlab/lfs/check.rake
new file mode 100644
index 00000000000..869463d4e5d
--- /dev/null
+++ b/lib/tasks/gitlab/lfs/check.rake
@@ -0,0 +1,8 @@
+namespace :gitlab do
+ namespace :lfs do
+ desc 'GitLab | LFS | Check integrity of uploaded LFS objects'
+ task check: :environment do
+ Gitlab::Verify::RakeTask.run!(Gitlab::Verify::LfsObjects)
+ end
+ end
+end
diff --git a/lib/tasks/gitlab/uploads.rake b/lib/tasks/gitlab/uploads.rake
deleted file mode 100644
index df31567ce64..00000000000
--- a/lib/tasks/gitlab/uploads.rake
+++ /dev/null
@@ -1,44 +0,0 @@
-namespace :gitlab do
- namespace :uploads do
- desc 'GitLab | Uploads | Check integrity of uploaded files'
- task check: :environment do
- puts 'Checking integrity of uploaded files'
-
- uploads_batches do |batch|
- batch.each do |upload|
- puts "- Checking file (#{upload.id}): #{upload.absolute_path}".color(:green)
-
- if upload.exist?
- check_checksum(upload)
- else
- puts " * File does not exist on the file system".color(:red)
- end
- end
- end
-
- puts 'Done!'
- end
-
- def batch_size
- ENV.fetch('BATCH', 200).to_i
- end
-
- def calculate_checksum(absolute_path)
- Digest::SHA256.file(absolute_path).hexdigest
- end
-
- def check_checksum(upload)
- checksum = calculate_checksum(upload.absolute_path)
-
- if checksum != upload.checksum
- puts " * File checksum (#{checksum}) does not match the one in the database (#{upload.checksum})".color(:red)
- end
- end
-
- def uploads_batches(&block)
- Upload.all.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
- yield relation
- end
- end
- end
-end
diff --git a/lib/tasks/gitlab/uploads/check.rake b/lib/tasks/gitlab/uploads/check.rake
new file mode 100644
index 00000000000..2be2ec7f9c9
--- /dev/null
+++ b/lib/tasks/gitlab/uploads/check.rake
@@ -0,0 +1,8 @@
+namespace :gitlab do
+ namespace :uploads do
+ desc 'GitLab | Uploads | Check integrity of uploaded files'
+ task check: :environment do
+ Gitlab::Verify::RakeTask.run!(Gitlab::Verify::Uploads)
+ end
+ end
+end