Welcome to mirror list, hosted at ThFree Co, Russian Federation.

single_endpoint_notes_importing.rb « github_import « gitlab « lib - gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: aea4059dfbc4923e30884a695fac8480f4e65b4d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# frozen_string_literal: true

# This module is used in:
#  - SingleEndpointDiffNotesImporter
#  - SingleEndpointIssueNotesImporter
#  - SingleEndpointMergeRequestNotesImporter
# if `github_importer_single_endpoint_notes_import` feature flag is on.
#
# - SingleEndpointIssueEventsImporter
# if `github_importer_issue_events_import` feature flag is on.
#
# Fetches associated objects page by page to each item of parent collection.
# Currently `associated` is note or event.
# Currently `parent` is MergeRequest or Issue record.
#
# It fetches 1 PR's associated objects at a time using `issue_comments` or
# `pull_request_comments` endpoint, which is slower than `NotesImporter`
# but it makes sure all notes are imported, as it can sometimes not be
# the case for `NotesImporter`, because `issues_comments` endpoint
# it uses can be limited by GitHub API to not return all available pages.
module Gitlab
  module GithubImport
    module SingleEndpointNotesImporting
      BATCH_SIZE = 100

      def each_object_to_import(&block)
        each_associated_page do |parent_record, associated_page|
          associated_page.objects.each do |associated|
            each_associated(parent_record, associated, &block)
          end
        end
      end

      def id_for_already_imported_cache(associated)
        associated.id
      end

      def parent_collection
        raise NotImplementedError
      end

      def parent_imported_cache_key
        raise NotImplementedError
      end

      def page_counter_id(parent)
        raise NotImplementedError
      end

      private

      # Sometimes we need to add some extra info from parent
      # to associated record that is not available by default
      # in Github API response object. For example:
      # lib/gitlab/github_import/importer/single_endpoint_issue_events_importer.rb:26
      def each_associated(_parent_record, associated)
        return if already_imported?(associated)

        Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched)

        yield(associated)

        mark_as_imported(associated)
      end

      def each_associated_page(&block)
        parent_collection.each_batch(of: BATCH_SIZE, column: :iid) do |batch|
          process_batch(batch, &block)
        end
      end

      def process_batch(batch)
        batch.each do |parent_record|
          # The page counter needs to be scoped by parent_record to avoid skipping
          # pages of notes from already imported parent_record.
          page_counter = PageCounter.new(project, page_counter_id(parent_record))
          repo = project.import_source
          options = collection_options.merge(page: page_counter.current)

          client.each_page(collection_method, repo, parent_record.iid, options) do |page|
            next unless page_counter.set(page.number)

            yield parent_record, page
          end

          mark_parent_imported(parent_record)
        end
      end

      def mark_parent_imported(parent)
        Gitlab::Cache::Import::Caching.set_add(
          parent_imported_cache_key,
          parent.iid
        )
      end

      def already_imported_parents
        Gitlab::Cache::Import::Caching.values_from_set(parent_imported_cache_key)
      end
    end
  end
end