Welcome to mirror list, hosted at ThFree Co, Russian Federation.

lfs_objects_pipeline.rb « pipelines « common « bulk_imports « lib - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: bd09b6add00701bcc9ed0d64c1dc03a480fdcf8f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# frozen_string_literal: true

module BulkImports
  module Common
    module Pipelines
      class LfsObjectsPipeline
        include Pipeline

        file_extraction_pipeline!

        def extract(_context)
          download_service.execute
          decompression_service.execute
          extraction_service.execute

          file_paths = Dir.glob(File.join(tmpdir, '*'))

          BulkImports::Pipeline::ExtractedData.new(data: file_paths)
        end

        # rubocop: disable CodeReuse/ActiveRecord
        def load(_context, file_path)
          Gitlab::PathTraversal.check_path_traversal!(file_path)
          Gitlab::PathTraversal.check_allowed_absolute_path!(file_path, [Dir.tmpdir])

          return if tar_filepath?(file_path)
          return if lfs_json_filepath?(file_path)
          return if File.directory?(file_path)
          return if Gitlab::Utils::FileInfo.linked?(file_path)

          size = File.size(file_path)
          oid = LfsObject.calculate_oid(file_path)

          lfs_object = LfsObject.find_or_initialize_by(oid: oid, size: size)
          lfs_object.file = File.open(file_path) unless lfs_object.file&.exists?
          lfs_object.save! if lfs_object.changed?

          repository_types(oid)&.each do |type|
            create_lfs_objects_project(lfs_object, type)
          end
        end
        # rubocop: enable CodeReuse/ActiveRecord

        def after_run(_)
          FileUtils.remove_entry(tmpdir) if Dir.exist?(tmpdir)
        end

        private

        def download_service
          BulkImports::FileDownloadService.new(
            configuration: context.configuration,
            relative_url: context.entity.relation_download_url_path(relation, context.extra[:batch_number]),
            tmpdir: tmpdir,
            filename: targz_filename
          )
        end

        def decompression_service
          BulkImports::FileDecompressionService.new(tmpdir: tmpdir, filename: targz_filename)
        end

        def extraction_service
          BulkImports::ArchiveExtractionService.new(tmpdir: tmpdir, filename: tar_filename)
        end

        def lfs_json
          @lfs_json ||= Gitlab::Json.parse(File.read(lfs_json_filepath))
        rescue StandardError
          raise BulkImports::Error, 'LFS Objects JSON read failed'
        end

        def tmpdir
          @tmpdir ||= Dir.mktmpdir('bulk_imports')
        end

        def relation
          BulkImports::FileTransfer::ProjectConfig::LFS_OBJECTS_RELATION
        end

        def tar_filename
          "#{relation}.tar"
        end

        def targz_filename
          "#{tar_filename}.gz"
        end

        def lfs_json_filepath?(file_path)
          file_path == lfs_json_filepath
        end

        def tar_filepath?(file_path)
          File.join(tmpdir, tar_filename) == file_path
        end

        def lfs_json_filepath
          File.join(tmpdir, "#{relation}.json")
        end

        def create_lfs_objects_project(lfs_object, repository_type)
          return unless allowed_repository_types.include?(repository_type)

          lfs_objects_project = LfsObjectsProject.create(
            project: portable,
            lfs_object: lfs_object,
            repository_type: repository_type
          )

          return if lfs_objects_project.persisted?

          logger.warn(
            project_id: portable.id,
            message: 'Failed to save lfs objects project',
            errors: lfs_objects_project.errors.full_messages.to_sentence,
            **Gitlab::ApplicationContext.current
          )
        end

        def repository_types(oid)
          types = lfs_json[oid]

          return [] unless types
          return [] unless types.is_a?(Array)

          # only return allowed repository types
          types.uniq & allowed_repository_types
        end

        def allowed_repository_types
          @allowed_repository_types ||= LfsObjectsProject.repository_types.values.push(nil)
        end
      end
    end
  end
end