1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
# frozen_string_literal: true
module Gitlab
module GithubImport
module Importer
class PullRequestsReviewsImporter
include ParallelScheduling
def initialize(...)
super
@merge_requests_already_imported_cache_key =
"github-importer/merge_request/already-imported/#{project.id}"
end
def importer_class
PullRequestReviewImporter
end
def representation_class
Gitlab::GithubImport::Representation::PullRequestReview
end
def sidekiq_worker_class
ImportPullRequestReviewWorker
end
def collection_method
:pull_request_reviews
end
def id_for_already_imported_cache(review)
review.id
end
def each_object_to_import(&block)
if use_github_review_importer_query_only_unimported_merge_requests?
each_merge_request_to_import(&block)
else
each_merge_request_skipping_imported(&block)
end
end
private
attr_reader :merge_requests_already_imported_cache_key
# https://gitlab.com/gitlab-org/gitlab/-/merge_requests/62036#note_587181108
def use_github_review_importer_query_only_unimported_merge_requests?
Feature.enabled?(
:github_review_importer_query_only_unimported_merge_requests,
default_enabled: :yaml
)
end
def each_merge_request_skipping_imported
project.merge_requests.find_each do |merge_request|
next if already_imported?(merge_request)
client
.pull_request_reviews(project.import_source, merge_request.iid)
.each do |review|
review.merge_request_id = merge_request.id
yield(review)
end
mark_as_imported(merge_request)
end
end
# The worker can be interrupted, by rate limit for instance,
# in different situations. To avoid requesting already imported data,
# if the worker is interrupted:
# - before importing all reviews of a merge request
# The reviews page is cached with the `PageCounter`, by merge request.
# - before importing all merge requests reviews
# Merge requests that had all the reviews imported are cached with
# `mark_merge_request_reviews_imported`
def each_merge_request_to_import
each_review_page do |page, merge_request|
page.objects.each do |review|
next if already_imported?(review)
review.merge_request_id = merge_request.id
yield(review)
mark_as_imported(review)
end
end
end
def each_review_page
merge_requests_to_import.find_each do |merge_request|
# The page counter needs to be scoped by merge request to avoid skipping
# pages of reviews from already imported merge requests.
page_counter = PageCounter.new(project, page_counter_id(merge_request))
repo = project.import_source
options = collection_options.merge(page: page_counter.current)
client.each_page(collection_method, repo, merge_request.iid, options) do |page|
next unless page_counter.set(page.number)
yield(page, merge_request)
end
# Avoid unnecessary Redis cache keys after the work is done.
page_counter.expire!
mark_merge_request_reviews_imported(merge_request)
end
end
# Returns only the merge requests that still have reviews to be imported.
def merge_requests_to_import
project.merge_requests.where.not(id: already_imported_merge_requests) # rubocop: disable CodeReuse/ActiveRecord
end
def already_imported_merge_requests
Gitlab::Cache::Import::Caching.values_from_set(merge_requests_already_imported_cache_key)
end
def page_counter_id(merge_request)
"merge_request/#{merge_request.id}/#{collection_method}"
end
def mark_merge_request_reviews_imported(merge_request)
Gitlab::Cache::Import::Caching.set_add(
merge_requests_already_imported_cache_key,
merge_request.id
)
end
end
end
end
end
|