Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Van Landuyt <bob@vanlanduyt.co>2019-04-23 17:27:01 +0300
committerBob Van Landuyt <bob@vanlanduyt.co>2019-05-31 10:40:54 +0300
commit589b2db06ca2ca2bc3e5d9e56968e3609f9e4626 (patch)
tree1652ec910b83d71666fabcf1957d46886a400d40 /lib/gitlab/phabricator_import
parent6189c869b87aa469f5efb058834cd65afd8fe563 (diff)
Setup Phabricator import
This sets up all the basics for importing Phabricator tasks into GitLab issues. To import all tasks from a Phabricator instance into GitLab, we'll import all of them into a new project that will have its repository disabled. The import is hooked into a regular ProjectImport setup, but similar to the GitHub parallel importer takes care of all the imports itself. In this iteration, we're importing each page of tasks in a separate sidekiq job. The first thing we do when requesting a new page of tasks is schedule the next page to be imported. But to avoid deadlocks, we only allow a single job per worker type to run at the same time. For now we're only importing basic Issue information, this should be extended to richer information.
Diffstat (limited to 'lib/gitlab/phabricator_import')
-rw-r--r--lib/gitlab/phabricator_import/base_worker.rb80
-rw-r--r--lib/gitlab/phabricator_import/cache/map.rb65
-rw-r--r--lib/gitlab/phabricator_import/conduit.rb9
-rw-r--r--lib/gitlab/phabricator_import/conduit/client.rb41
-rw-r--r--lib/gitlab/phabricator_import/conduit/maniphest.rb28
-rw-r--r--lib/gitlab/phabricator_import/conduit/pagination.rb24
-rw-r--r--lib/gitlab/phabricator_import/conduit/response.rb60
-rw-r--r--lib/gitlab/phabricator_import/conduit/tasks_response.rb24
-rw-r--r--lib/gitlab/phabricator_import/import_tasks_worker.rb10
-rw-r--r--lib/gitlab/phabricator_import/importer.rb44
-rw-r--r--lib/gitlab/phabricator_import/issues/importer.rb42
-rw-r--r--lib/gitlab/phabricator_import/issues/task_importer.rb54
-rw-r--r--lib/gitlab/phabricator_import/project_creator.rb78
-rw-r--r--lib/gitlab/phabricator_import/representation/task.rb60
-rw-r--r--lib/gitlab/phabricator_import/worker_state.rb47
15 files changed, 666 insertions, 0 deletions
diff --git a/lib/gitlab/phabricator_import/base_worker.rb b/lib/gitlab/phabricator_import/base_worker.rb
new file mode 100644
index 00000000000..b69c65e78f8
--- /dev/null
+++ b/lib/gitlab/phabricator_import/base_worker.rb
@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+
+# All workers within a Phabricator import should inherit from this worker and
+# implement the `#import` method. The jobs should then be scheduled using the
+# `.schedule` class method instead of `.perform_async`
+#
+# Doing this makes sure that only one job of that type is running at the same time
+# for a certain project. This will avoid deadlocks. When a job is already running
+# we'll wait for it for 10 times 5 seconds to restart. If the running job hasn't
+# finished, by then, we'll retry in 30 seconds.
+#
+# It also makes sure that we keep the import state of the project up to date:
+# - It keeps track of the jobs so we know how many jobs are running for the
+# project
+# - It refreshes the import jid, so it doesn't get cleaned up by the
+# `StuckImportJobsWorker`
+# - It marks the import as failed if a job failed to many times
+# - It marks the import as finished when all remaining jobs are done
+module Gitlab
+ module PhabricatorImport
+ class BaseWorker
+ include ApplicationWorker
+ include ProjectImportOptions # This marks the project as failed after too many tries
+ include Gitlab::ExclusiveLeaseHelpers
+
+ class << self
+ def schedule(project_id, *args)
+ perform_async(project_id, *args)
+ add_job(project_id)
+ end
+
+ def add_job(project_id)
+ worker_state(project_id).add_job
+ end
+
+ def remove_job(project_id)
+ worker_state(project_id).remove_job
+ end
+
+ def worker_state(project_id)
+ Gitlab::PhabricatorImport::WorkerState.new(project_id)
+ end
+ end
+
+ def perform(project_id, *args)
+ in_lock("#{self.class.name.underscore}/#{project_id}/#{args}", ttl: 2.hours, sleep_sec: 5.seconds) do
+ project = Project.find_by_id(project_id)
+ next unless project
+
+ # Bail if the import job already failed
+ next unless project.import_state&.in_progress?
+
+ project.import_state.refresh_jid_expiration
+
+ import(project, *args)
+
+ # If this is the last running job, finish the import
+ project.after_import if self.class.worker_state(project_id).running_count < 2
+
+ self.class.remove_job(project_id)
+ end
+ rescue Gitlab::ExclusiveLeaseHelpers::FailedToObtainLockError
+ # Reschedule a job if there was already a running one
+ # Running them at the same time could cause a deadlock updating the same
+ # resource
+ self.class.perform_in(30.seconds, project_id, *args)
+ end
+
+ private
+
+ def import(project, *args)
+ importer_class.new(project, *args).execute
+ end
+
+ def importer_class
+ raise NotImplementedError, "Implement `#{__method__}` on #{self.class}"
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/cache/map.rb b/lib/gitlab/phabricator_import/cache/map.rb
new file mode 100644
index 00000000000..fa8b37b20ca
--- /dev/null
+++ b/lib/gitlab/phabricator_import/cache/map.rb
@@ -0,0 +1,65 @@
+# frozen_string_literal: true
+module Gitlab
+ module PhabricatorImport
+ module Cache
+ class Map
+ def initialize(project)
+ @project = project
+ end
+
+ def get_gitlab_model(phabricator_id)
+ cached_info = get(phabricator_id)
+ return unless cached_info[:classname] && cached_info[:database_id]
+
+ cached_info[:classname].constantize.find_by_id(cached_info[:database_id])
+ end
+
+ def set_gitlab_model(object, phabricator_id)
+ set(object.class, object.id, phabricator_id)
+ end
+
+ private
+
+ attr_reader :project
+
+ def set(klass_name, object_id, phabricator_id)
+ key = cache_key_for_phabricator_id(phabricator_id)
+
+ redis.with do |r|
+ r.multi do |multi|
+ multi.mapped_hmset(key,
+ { classname: klass_name, database_id: object_id })
+ multi.expire(key, timeout)
+ end
+ end
+ end
+
+ def get(phabricator_id)
+ key = cache_key_for_phabricator_id(phabricator_id)
+
+ redis.with do |r|
+ r.pipelined do |pipe|
+ # Extend the TTL when a key was
+ pipe.expire(key, timeout)
+ pipe.mapped_hmget(key, :classname, :database_id)
+ end.last
+ end
+ end
+
+ def cache_key_for_phabricator_id(phabricator_id)
+ "#{Redis::Cache::CACHE_NAMESPACE}/phabricator-import/#{project.id}/#{phabricator_id}"
+ end
+
+ def redis
+ Gitlab::Redis::Cache
+ end
+
+ def timeout
+ # Setting the timeout to the same one as we do for clearing stuck jobs
+ # this makes sure all cache is available while the import is running.
+ StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/conduit.rb b/lib/gitlab/phabricator_import/conduit.rb
new file mode 100644
index 00000000000..4c64d737389
--- /dev/null
+++ b/lib/gitlab/phabricator_import/conduit.rb
@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+module Gitlab
+ module PhabricatorImport
+ module Conduit
+ ApiError = Class.new(Gitlab::PhabricatorImport::BaseError)
+ ResponseError = Class.new(ApiError)
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/conduit/client.rb b/lib/gitlab/phabricator_import/conduit/client.rb
new file mode 100644
index 00000000000..4469a3f5849
--- /dev/null
+++ b/lib/gitlab/phabricator_import/conduit/client.rb
@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+module Gitlab
+ module PhabricatorImport
+ module Conduit
+ class Client
+ def initialize(phabricator_url, api_token)
+ @phabricator_url = phabricator_url
+ @api_token = api_token
+ end
+
+ def get(path, params: {})
+ response = Gitlab::HTTP.get(build_url(path), body: build_params(params), headers: headers)
+ Response.parse!(response)
+ rescue *Gitlab::HTTP::HTTP_ERRORS => e
+ # Wrap all errors from the API into an API-error.
+ raise ApiError.new(e)
+ end
+
+ private
+
+ attr_reader :phabricator_url, :api_token
+
+ def headers
+ { "Accept" => 'application/json' }
+ end
+
+ def build_url(path)
+ URI.join(phabricator_url, '/api/', path).to_s
+ end
+
+ def build_params(params)
+ params = params.dup
+ params.compact!
+ params.reverse_merge!("api.token" => api_token)
+
+ CGI.unescape(params.to_query)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/conduit/maniphest.rb b/lib/gitlab/phabricator_import/conduit/maniphest.rb
new file mode 100644
index 00000000000..848b71e49e7
--- /dev/null
+++ b/lib/gitlab/phabricator_import/conduit/maniphest.rb
@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module Gitlab
+ module PhabricatorImport
+ module Conduit
+ class Maniphest
+ def initialize(phabricator_url:, api_token:)
+ @client = Client.new(phabricator_url, api_token)
+ end
+
+ def tasks(after: nil)
+ TasksResponse.new(get_tasks(after))
+ end
+
+ private
+
+ def get_tasks(after)
+ client.get('maniphest.search',
+ params: {
+ after: after,
+ attachments: { projects: 1, subscribers: 1, columns: 1 }
+ })
+ end
+
+ attr_reader :client
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/conduit/pagination.rb b/lib/gitlab/phabricator_import/conduit/pagination.rb
new file mode 100644
index 00000000000..5f54cccdbc8
--- /dev/null
+++ b/lib/gitlab/phabricator_import/conduit/pagination.rb
@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+module Gitlab
+ module PhabricatorImport
+ module Conduit
+ class Pagination
+ def initialize(cursor_json)
+ @cursor_json = cursor_json
+ end
+
+ def has_next_page?
+ next_page.present?
+ end
+
+ def next_page
+ cursor_json["after"]
+ end
+
+ private
+
+ attr_reader :cursor_json
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/conduit/response.rb b/lib/gitlab/phabricator_import/conduit/response.rb
new file mode 100644
index 00000000000..6053ecfbd5e
--- /dev/null
+++ b/lib/gitlab/phabricator_import/conduit/response.rb
@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+module Gitlab
+ module PhabricatorImport
+ module Conduit
+ class Response
+ def self.parse!(http_response)
+ unless http_response.success?
+ raise Gitlab::PhabricatorImport::Conduit::ResponseError,
+ "Phabricator responded with #{http_response.status}"
+ end
+
+ response = new(JSON.parse(http_response.body))
+
+ unless response.success?
+ raise ResponseError,
+ "Phabricator Error: #{response.error_code}: #{response.error_info}"
+ end
+
+ response
+ rescue JSON::JSONError => e
+ raise ResponseError.new(e)
+ end
+
+ def initialize(json)
+ @json = json
+ end
+
+ def success?
+ error_code.nil?
+ end
+
+ def error_code
+ json['error_code']
+ end
+
+ def error_info
+ json['error_info']
+ end
+
+ def data
+ json_result&.fetch('data')
+ end
+
+ def pagination
+ return unless cursor_info = json_result&.fetch('cursor')
+
+ @pagination ||= Pagination.new(cursor_info)
+ end
+
+ private
+
+ attr_reader :json
+
+ def json_result
+ json['result']
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/conduit/tasks_response.rb b/lib/gitlab/phabricator_import/conduit/tasks_response.rb
new file mode 100644
index 00000000000..cbcf7259fb2
--- /dev/null
+++ b/lib/gitlab/phabricator_import/conduit/tasks_response.rb
@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+module Gitlab
+ module PhabricatorImport
+ module Conduit
+ class TasksResponse
+ def initialize(conduit_response)
+ @conduit_response = conduit_response
+ end
+
+ delegate :pagination, to: :conduit_response
+
+ def tasks
+ @tasks ||= conduit_response.data.map do |task_json|
+ Gitlab::PhabricatorImport::Representation::Task.new(task_json)
+ end
+ end
+
+ private
+
+ attr_reader :conduit_response
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/import_tasks_worker.rb b/lib/gitlab/phabricator_import/import_tasks_worker.rb
new file mode 100644
index 00000000000..c36954a8d41
--- /dev/null
+++ b/lib/gitlab/phabricator_import/import_tasks_worker.rb
@@ -0,0 +1,10 @@
+# frozen_string_literal: true
+module Gitlab
+ module PhabricatorImport
+ class ImportTasksWorker < BaseWorker
+ def importer_class
+ Gitlab::PhabricatorImport::Issues::Importer
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/importer.rb b/lib/gitlab/phabricator_import/importer.rb
new file mode 100644
index 00000000000..c1797f4027e
--- /dev/null
+++ b/lib/gitlab/phabricator_import/importer.rb
@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module PhabricatorImport
+ class Importer
+ def self.async?
+ true
+ end
+
+ def self.imports_repository?
+ # This does not really import a repository, but we want to skip all
+ # repository related tasks in the `Projects::ImportService`
+ true
+ end
+
+ def initialize(project)
+ @project = project
+ end
+
+ def execute
+ Gitlab::Import::SetAsyncJid.set_jid(project)
+ schedule_first_tasks_page
+
+ true
+ rescue => e
+ fail_import(e.message)
+
+ false
+ end
+
+ private
+
+ attr_reader :project
+
+ def schedule_first_tasks_page
+ ImportTasksWorker.schedule(project.id)
+ end
+
+ def fail_import(message)
+ project.import_state.mark_as_failed(message)
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/issues/importer.rb b/lib/gitlab/phabricator_import/issues/importer.rb
new file mode 100644
index 00000000000..a58438452ff
--- /dev/null
+++ b/lib/gitlab/phabricator_import/issues/importer.rb
@@ -0,0 +1,42 @@
+# frozen_string_literal: true
+module Gitlab
+ module PhabricatorImport
+ module Issues
+ class Importer
+ def initialize(project, after = nil)
+ @project, @after = project, after
+ end
+
+ def execute
+ schedule_next_batch
+
+ tasks_response.tasks.each do |task|
+ TaskImporter.new(project, task).execute
+ end
+ end
+
+ private
+
+ attr_reader :project, :after
+
+ def schedule_next_batch
+ return unless tasks_response.pagination.has_next_page?
+
+ Gitlab::PhabricatorImport::ImportTasksWorker
+ .schedule(project.id, tasks_response.pagination.next_page)
+ end
+
+ def tasks_response
+ @tasks_response ||= client.tasks(after: after)
+ end
+
+ def client
+ @client ||=
+ Gitlab::PhabricatorImport::Conduit::Maniphest
+ .new(phabricator_url: project.import_data.data['phabricator_url'],
+ api_token: project.import_data.credentials[:api_token])
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/issues/task_importer.rb b/lib/gitlab/phabricator_import/issues/task_importer.rb
new file mode 100644
index 00000000000..40d4392cbc1
--- /dev/null
+++ b/lib/gitlab/phabricator_import/issues/task_importer.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+module Gitlab
+ module PhabricatorImport
+ module Issues
+ class TaskImporter
+ def initialize(project, task)
+ @project, @task = project, task
+ end
+
+ def execute
+ # TODO: get the user from the project namespace from the username loaded by Phab-id
+ # https://gitlab.com/gitlab-org/gitlab-ce/issues/60565
+ issue.author = User.ghost
+
+ # TODO: Reformat the description with attachments, escaping accidental
+ # links and add attachments
+ # https://gitlab.com/gitlab-org/gitlab-ce/issues/60603
+ issue.assign_attributes(task.issue_attributes)
+
+ save!
+
+ issue
+ end
+
+ private
+
+ attr_reader :project, :task
+
+ def save!
+ # Just avoiding an extra redis call, we've already updated the expiry
+ # when reading the id from the map
+ was_persisted = issue.persisted?
+
+ issue.save! if issue.changed?
+
+ object_map.set_gitlab_model(issue, task.phabricator_id) unless was_persisted
+ end
+
+ def issue
+ @issue ||= find_issue_by_phabricator_id(task.phabricator_id) ||
+ project.issues.new
+ end
+
+ def find_issue_by_phabricator_id(phabricator_id)
+ object_map.get_gitlab_model(phabricator_id)
+ end
+
+ def object_map
+ Gitlab::PhabricatorImport::Cache::Map.new(project)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/project_creator.rb b/lib/gitlab/phabricator_import/project_creator.rb
new file mode 100644
index 00000000000..b37a5b44980
--- /dev/null
+++ b/lib/gitlab/phabricator_import/project_creator.rb
@@ -0,0 +1,78 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module PhabricatorImport
+ class ProjectCreator
+ def initialize(current_user, params)
+ @current_user = current_user
+ @params = params.dup
+ end
+
+ def execute
+ return unless import_url.present? && api_token.present?
+
+ project = Projects::CreateService.new(current_user, create_params).execute
+ return project unless project.persisted?
+
+ project.project_feature.update!(project_feature_attributes)
+
+ project
+ end
+
+ private
+
+ attr_reader :current_user, :params
+
+ def create_params
+ {
+ name: project_name,
+ path: project_path,
+ namespace_id: namespace_id,
+ import_type: 'phabricator',
+ import_url: Project::UNKNOWN_IMPORT_URL,
+ import_data: import_data
+ }
+ end
+
+ def project_name
+ params[:name]
+ end
+
+ def project_path
+ params[:path]
+ end
+
+ def namespace_id
+ params[:namespace_id] || current_user.namespace_id
+ end
+
+ def import_url
+ params[:phabricator_server_url]
+ end
+
+ def api_token
+ params[:api_token]
+ end
+
+ def project_feature_attributes
+ @project_features_attributes ||= begin
+ # everything disabled except for issues
+ ProjectFeature::FEATURES.map do |feature|
+ [ProjectFeature.access_level_attribute(feature), ProjectFeature::DISABLED]
+ end.to_h.merge(ProjectFeature.access_level_attribute(:issues) => ProjectFeature::ENABLED)
+ end
+ end
+
+ def import_data
+ {
+ data: {
+ phabricator_url: import_url
+ },
+ credentials: {
+ api_token: params.fetch(:api_token)
+ }
+ }
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/representation/task.rb b/lib/gitlab/phabricator_import/representation/task.rb
new file mode 100644
index 00000000000..6aedc71b626
--- /dev/null
+++ b/lib/gitlab/phabricator_import/representation/task.rb
@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+module Gitlab
+ module PhabricatorImport
+ module Representation
+ class Task
+ def initialize(json)
+ @json = json
+ end
+
+ def phabricator_id
+ json['phid']
+ end
+
+ def issue_attributes
+ @issue_attributes ||= {
+ title: issue_title,
+ description: issue_description,
+ state: issue_state,
+ created_at: issue_created_at,
+ closed_at: issue_closed_at
+ }
+ end
+
+ private
+
+ attr_reader :json
+
+ def issue_title
+ # The 255 limit is the validation we impose on the Issue title in
+ # Issuable
+ @issue_title ||= json['fields']['name'].truncate(255)
+ end
+
+ def issue_description
+ json['fields']['description']['raw']
+ end
+
+ def issue_state
+ issue_closed_at.present? ? :closed : :opened
+ end
+
+ def issue_created_at
+ return unless json['fields']['dateCreated']
+
+ @issue_created_at ||= cast_datetime(json['fields']['dateCreated'])
+ end
+
+ def issue_closed_at
+ return unless json['fields']['dateClosed']
+
+ @issue_closed_at ||= cast_datetime(json['fields']['dateClosed'])
+ end
+
+ def cast_datetime(value)
+ Time.at(value.to_i)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/phabricator_import/worker_state.rb b/lib/gitlab/phabricator_import/worker_state.rb
new file mode 100644
index 00000000000..38829e34509
--- /dev/null
+++ b/lib/gitlab/phabricator_import/worker_state.rb
@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+module Gitlab
+ module PhabricatorImport
+ class WorkerState
+ def initialize(project_id)
+ @project_id = project_id
+ end
+
+ def add_job
+ redis.with do |r|
+ r.pipelined do |pipe|
+ pipe.incr(all_jobs_key)
+ pipe.expire(all_jobs_key, timeout)
+ end
+ end
+ end
+
+ def remove_job
+ redis.with do |r|
+ r.decr(all_jobs_key)
+ end
+ end
+
+ def running_count
+ redis.with { |r| r.get(all_jobs_key) }.to_i
+ end
+
+ private
+
+ attr_reader :project_id
+
+ def redis
+ Gitlab::Redis::SharedState
+ end
+
+ def all_jobs_key
+ @all_jobs_key ||= "phabricator-import/jobs/project-#{project_id}/job-count"
+ end
+
+ def timeout
+ # Make sure we get rid of all the information after a job is marked
+ # as failed/succeeded
+ StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION
+ end
+ end
+ end
+end