diff options
author | Francisco Javier López <fjlopez@gitlab.com> | 2018-06-06 19:42:18 +0300 |
---|---|---|
committer | Douwe Maan <douwe@gitlab.com> | 2018-06-06 19:42:18 +0300 |
commit | e8f49b4bee8d803953b852685889a2912609ae84 (patch) | |
tree | fdbef6fc26ebc49be27e1553ea980cb734b8ab7d /app/services | |
parent | 0dd7563b7c91141f545432e9082906ebb196a38d (diff) |
Support LFS objects when creating a project by import
Diffstat (limited to 'app/services')
7 files changed, 320 insertions, 2 deletions
diff --git a/app/services/base_service.rb b/app/services/base_service.rb index 6883ba36c71..3519b7c5e7d 100644 --- a/app/services/base_service.rb +++ b/app/services/base_service.rb @@ -3,7 +3,7 @@ class BaseService attr_accessor :project, :current_user, :params - def initialize(project, user, params = {}) + def initialize(project, user = nil, params = {}) @project, @current_user, @params = project, user, params.dup end diff --git a/app/services/projects/import_service.rb b/app/services/projects/import_service.rb index 00080717600..1781a01cbd4 100644 --- a/app/services/projects/import_service.rb +++ b/app/services/projects/import_service.rb @@ -17,6 +17,8 @@ module Projects def execute add_repository_to_project + download_lfs_objects + import_data success @@ -37,7 +39,7 @@ module Projects # We should skip the repository for a GitHub import or GitLab project import, # because these importers fetch the project repositories for us. - return if has_importer? && importer_class.try(:imports_repository?) + return if importer_imports_repository? if unknown_url? # In this case, we only want to import issues, not a repository. @@ -73,6 +75,27 @@ module Projects end end + def download_lfs_objects + # In this case, we only want to import issues + return if unknown_url? + + # If it has its own repository importer, it has to implements its own lfs import download + return if importer_imports_repository? + + return unless project.lfs_enabled? + + oids_to_download = Projects::LfsPointers::LfsImportService.new(project).execute + download_service = Projects::LfsPointers::LfsDownloadService.new(project) + + oids_to_download.each do |oid, link| + download_service.execute(oid, link) + end + rescue => e + # Right now, to avoid aborting the importing process, we silently fail + # if any exception raises. + Rails.logger.error("The Lfs import process failed. #{e.message}") + end + def import_data return unless has_importer? @@ -98,5 +121,9 @@ module Projects def unknown_url? project.import_url == Project::UNKNOWN_IMPORT_URL end + + def importer_imports_repository? + has_importer? && importer_class.try(:imports_repository?) + end end end diff --git a/app/services/projects/lfs_pointers/lfs_download_link_list_service.rb b/app/services/projects/lfs_pointers/lfs_download_link_list_service.rb new file mode 100644 index 00000000000..d9fb74b090e --- /dev/null +++ b/app/services/projects/lfs_pointers/lfs_download_link_list_service.rb @@ -0,0 +1,93 @@ +# This service lists the download link from a remote source based on the +# oids provided +module Projects + module LfsPointers + class LfsDownloadLinkListService < BaseService + DOWNLOAD_ACTION = 'download'.freeze + + DownloadLinksError = Class.new(StandardError) + DownloadLinkNotFound = Class.new(StandardError) + + attr_reader :remote_uri + + def initialize(project, remote_uri: nil) + super(project) + + @remote_uri = remote_uri + end + + # This method accepts two parameters: + # - oids: hash of oids to query. The structure is { lfs_file_oid => lfs_file_size } + # + # Returns a hash with the structure { lfs_file_oids => download_link } + def execute(oids) + return {} unless project&.lfs_enabled? && remote_uri && oids.present? + + get_download_links(oids) + end + + private + + def get_download_links(oids) + response = Gitlab::HTTP.post(remote_uri, + body: request_body(oids), + headers: headers) + + raise DownloadLinksError, response.message unless response.success? + + parse_response_links(response['objects']) + end + + def parse_response_links(objects_response) + objects_response.each_with_object({}) do |entry, link_list| + begin + oid = entry['oid'] + link = entry.dig('actions', DOWNLOAD_ACTION, 'href') + + raise DownloadLinkNotFound unless link + + link_list[oid] = add_credentials(link) + rescue DownloadLinkNotFound, URI::InvalidURIError + Rails.logger.error("Link for Lfs Object with oid #{oid} not found or invalid.") + end + end + end + + def request_body(oids) + { + operation: DOWNLOAD_ACTION, + objects: oids.map { |oid, size| { oid: oid, size: size } } + }.to_json + end + + def headers + { + 'Accept' => LfsRequest::CONTENT_TYPE, + 'Content-Type' => LfsRequest::CONTENT_TYPE + }.freeze + end + + def add_credentials(link) + uri = URI.parse(link) + + if should_add_credentials?(uri) + uri.user = remote_uri.user + uri.password = remote_uri.password + end + + uri.to_s + end + + # The download link can be a local url or an object storage url + # If the download link has the some host as the import url then + # we add the same credentials because we may need them + def should_add_credentials?(link_uri) + url_credentials? && link_uri.host == remote_uri.host + end + + def url_credentials? + remote_uri.user.present? || remote_uri.password.present? + end + end + end +end diff --git a/app/services/projects/lfs_pointers/lfs_download_service.rb b/app/services/projects/lfs_pointers/lfs_download_service.rb new file mode 100644 index 00000000000..6ea43561d61 --- /dev/null +++ b/app/services/projects/lfs_pointers/lfs_download_service.rb @@ -0,0 +1,58 @@ +# This service downloads and links lfs objects from a remote URL +module Projects + module LfsPointers + class LfsDownloadService < BaseService + def execute(oid, url) + return unless project&.lfs_enabled? && oid.present? && url.present? + + return if LfsObject.exists?(oid: oid) + + sanitized_uri = Gitlab::UrlSanitizer.new(url) + + with_tmp_file(oid) do |file| + size = download_and_save_file(file, sanitized_uri) + lfs_object = LfsObject.new(oid: oid, size: size, file: file) + + project.all_lfs_objects << lfs_object + end + rescue StandardError => e + Rails.logger.error("LFS file with oid #{oid} could't be downloaded from #{sanitized_uri.sanitized_url}: #{e.message}") + end + + private + + def download_and_save_file(file, sanitized_uri) + IO.copy_stream(open(sanitized_uri.sanitized_url, headers(sanitized_uri)), file) + end + + def headers(sanitized_uri) + {}.tap do |headers| + credentials = sanitized_uri.credentials + + if credentials[:user].present? || credentials[:password].present? + # Using authentication headers in the request + headers[:http_basic_authentication] = [credentials[:user], credentials[:password]] + end + end + end + + def with_tmp_file(oid) + create_tmp_storage_dir + + File.open(File.join(tmp_storage_dir, oid), 'w') { |file| yield file } + end + + def create_tmp_storage_dir + FileUtils.makedirs(tmp_storage_dir) unless Dir.exist?(tmp_storage_dir) + end + + def tmp_storage_dir + @tmp_storage_dir ||= File.join(storage_dir, 'tmp', 'download') + end + + def storage_dir + @storage_dir ||= Gitlab.config.lfs.storage_path + end + end + end +end diff --git a/app/services/projects/lfs_pointers/lfs_import_service.rb b/app/services/projects/lfs_pointers/lfs_import_service.rb new file mode 100644 index 00000000000..b6b0dec142f --- /dev/null +++ b/app/services/projects/lfs_pointers/lfs_import_service.rb @@ -0,0 +1,92 @@ +# This service manages the whole worflow of discovering the Lfs files in a +# repository, linking them to the project and downloading (and linking) the non +# existent ones. +module Projects + module LfsPointers + class LfsImportService < BaseService + include Gitlab::Utils::StrongMemoize + + HEAD_REV = 'HEAD'.freeze + LFS_ENDPOINT_PATTERN = /^\t?url\s*=\s*(.+)$/.freeze + LFS_BATCH_API_ENDPOINT = '/info/lfs/objects/batch'.freeze + + LfsImportError = Class.new(StandardError) + + def execute + return {} unless project&.lfs_enabled? + + if external_lfs_endpoint? + # If the endpoint host is different from the import_url it means + # that the repo is using a third party service for storing the LFS files. + # In this case, we have to disable lfs in the project + disable_lfs! + + return {} + end + + get_download_links + rescue LfsDownloadLinkListService::DownloadLinksError => e + raise LfsImportError, "The LFS objects download list couldn't be imported. Error: #{e.message}" + end + + private + + def external_lfs_endpoint? + lfsconfig_endpoint_uri && lfsconfig_endpoint_uri.host != import_uri.host + end + + def disable_lfs! + project.update(lfs_enabled: false) + end + + def get_download_links + existent_lfs = LfsListService.new(project).execute + linked_oids = LfsLinkService.new(project).execute(existent_lfs.keys) + + # Retrieving those oids not linked and which we need to download + not_linked_lfs = existent_lfs.except(*linked_oids) + + LfsDownloadLinkListService.new(project, remote_uri: current_endpoint_uri).execute(not_linked_lfs) + end + + def lfsconfig_endpoint_uri + strong_memoize(:lfsconfig_endpoint_uri) do + # Retrieveing the blob data from the .lfsconfig file + data = project.repository.lfsconfig_for(HEAD_REV) + # Parsing the data to retrieve the url + parsed_data = data&.match(LFS_ENDPOINT_PATTERN) + + if parsed_data + URI.parse(parsed_data[1]).tap do |endpoint| + endpoint.user ||= import_uri.user + endpoint.password ||= import_uri.password + end + end + end + rescue URI::InvalidURIError + raise LfsImportError, 'Invalid URL in .lfsconfig file' + end + + def import_uri + @import_uri ||= URI.parse(project.import_url) + rescue URI::InvalidURIError + raise LfsImportError, 'Invalid project import URL' + end + + def current_endpoint_uri + (lfsconfig_endpoint_uri || default_endpoint_uri) + end + + # The import url must end with '.git' here we ensure it is + def default_endpoint_uri + @default_endpoint_uri ||= begin + import_uri.dup.tap do |uri| + path = uri.path.gsub(%r(/$), '') + path += '.git' unless path.ends_with?('.git') + uri.path = path + LFS_BATCH_API_ENDPOINT + end + end + end + end + end +end diff --git a/app/services/projects/lfs_pointers/lfs_link_service.rb b/app/services/projects/lfs_pointers/lfs_link_service.rb new file mode 100644 index 00000000000..d20bdf86c58 --- /dev/null +++ b/app/services/projects/lfs_pointers/lfs_link_service.rb @@ -0,0 +1,29 @@ +# Given a list of oids, this services links the existent Lfs Objects to the project +module Projects + module LfsPointers + class LfsLinkService < BaseService + # Accept an array of oids to link + # + # Returns a hash with the same structure with oids linked + def execute(oids) + return {} unless project&.lfs_enabled? + + # Search and link existing LFS Object + link_existing_lfs_objects(oids) + end + + private + + def link_existing_lfs_objects(oids) + existent_lfs_objects = LfsObject.where(oid: oids) + + return [] unless existent_lfs_objects.any? + + not_linked_lfs_objects = existent_lfs_objects.where.not(id: project.all_lfs_objects) + project.all_lfs_objects << not_linked_lfs_objects + + existent_lfs_objects.pluck(:oid) + end + end + end +end diff --git a/app/services/projects/lfs_pointers/lfs_list_service.rb b/app/services/projects/lfs_pointers/lfs_list_service.rb new file mode 100644 index 00000000000..b770982cbc0 --- /dev/null +++ b/app/services/projects/lfs_pointers/lfs_list_service.rb @@ -0,0 +1,19 @@ +# This service list all existent Lfs objects in a repository +module Projects + module LfsPointers + class LfsListService < BaseService + REV = 'HEAD'.freeze + + # Retrieve all lfs blob pointers and returns a hash + # with the structure { lfs_file_oid => lfs_file_size } + def execute + return {} unless project&.lfs_enabled? + + Gitlab::Git::LfsChanges.new(project.repository, REV) + .all_pointers + .map! { |blob| [blob.lfs_oid, blob.lfs_size] } + .to_h + end + end + end +end |