1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
|
# frozen_string_literal: true
module Gitlab
module GithubImport
# Class that can be used for finding a GitLab user ID based on a GitHub user
# ID or username.
#
# Any found user IDs are cached in Redis to reduce the number of SQL queries
# executed over time. Valid keys are refreshed upon access so frequently
# used keys stick around.
#
# Lookups are cached even if no ID was found to remove the need for querying
# the database when most queries are not going to return results anyway.
class UserFinder
attr_reader :project, :client
# The base cache key to use for caching user IDs for a given GitHub user
# ID.
ID_CACHE_KEY = 'github-import/user-finder/user-id/%s'
# The base cache key to use for caching user IDs for a given GitHub email
# address.
ID_FOR_EMAIL_CACHE_KEY =
'github-import/user-finder/id-for-email/%s'
# The base cache key to use for caching the Email addresses of GitHub
# usernames.
EMAIL_FOR_USERNAME_CACHE_KEY =
'github-import/user-finder/email-for-username/%s'
# The base cache key to use for caching the user ETAG response headers
USERNAME_ETAG_CACHE_KEY = 'github-import/user-finder/user-etag/%s'
# The base cache key to store whether an email has been fetched for a project
EMAIL_FETCHED_FOR_PROJECT_CACHE_KEY = 'github-import/user-finder/%{project}/email-fetched/%{username}'
EMAIL_API_CALL_LOGGING_MESSAGE = {
true => 'Fetching email from GitHub with ETAG header',
false => 'Fetching email from GitHub'
}.freeze
# project - An instance of `Project`
# client - An instance of `Gitlab::GithubImport::Client`
def initialize(project, client)
@project = project
@client = client
end
# Returns the GitLab user ID of an object's author.
#
# If the object has no author ID we'll use the ID of the GitLab ghost
# user.
# object - An instance of `Hash` or a `Github::Representer`
def author_id_for(object, author_key: :author)
user_info = case author_key
when :actor
object[:actor]
when :assignee
object[:assignee]
when :requested_reviewer
object[:requested_reviewer]
when :review_requester
object[:review_requester]
else
object ? object[:author] : nil
end
id = user_info ? user_id_for(user_info) : GithubImport.ghost_user_id
if id
[id, true]
else
[project.creator_id, false]
end
end
# Returns the GitLab user ID of an issuable's assignee.
def assignee_id_for(issuable)
user_id_for(issuable[:assignee]) if issuable[:assignee]
end
# Returns the GitLab user ID for a GitHub user.
#
# user - An instance of `Gitlab::GithubImport::Representation::User` or `Hash`.
def user_id_for(user)
find(user[:id], user[:login]) if user.present?
end
# Returns the GitLab ID for the given GitHub ID or username.
#
# id - The ID of the GitHub user.
# username - The username of the GitHub user.
def find(id, username)
email = email_for_github_username(username)
cached, found_id = find_from_cache(id, email)
return found_id if found_id
# We only want to query the database if necessary. If previous lookups
# didn't yield a user ID we won't query the database again until the
# keys expire.
find_id_from_database(id, email) unless cached
end
# Finds a user ID from the cache for a given GitHub ID or Email.
def find_from_cache(id, email = nil)
id_exists, id_for_github_id = cached_id_for_github_id(id)
return [id_exists, id_for_github_id] if id_for_github_id
# Just in case no Email address could be retrieved (for whatever reason)
return [false] unless email
cached_id_for_github_email(email)
end
# Finds a GitLab user ID from the database for a given GitHub user ID or
# Email.
def find_id_from_database(id, email)
id_for_github_id(id) || id_for_github_email(email)
end
# Find the public email of a given username in GitHub.
# The email is cached to avoid multiple calls to GitHub. The cache is shared among all projects.
# If the email was not found, a blank email is cached.
# If the email is blank, we attempt to fetch it from GitHub using an ETAG request once for every project.
# @param username [String] The username of the GitHub user.
#
# @return [String] If public email is found
# @return [Nil] If public email or username does not exist
def email_for_github_username(username)
email = read_email_from_cache(username)
if email.blank? && !email_fetched_for_project?(username)
# If an ETAG is available, make an API call with the ETAG.
# Only make a rate-limited API call if the ETAG is not available and the email is nil.
etag = read_etag_from_cache(username)
email = fetch_email_from_github(username, etag: etag) || email
cache_email!(username, email)
cache_etag!(username) if email.blank? && etag.nil?
# If a non-blank email is cached, we don't need the ETAG or project check caches.
# Otherwise, indicate that the project has been checked.
if email.present?
clear_caches!(username)
else
set_project_as_checked!(username)
end
end
email.presence
rescue ::Octokit::NotFound
cache_email!(username, '')
nil
end
def cached_id_for_github_id(id)
read_id_from_cache(ID_CACHE_KEY % id)
end
def cached_id_for_github_email(email)
read_id_from_cache(ID_FOR_EMAIL_CACHE_KEY % email)
end
# If importing from github.com, queries and caches the GitLab user ID for
# a GitHub user ID, if one was found.
#
# When importing from Github Enterprise, do not query user by Github ID
# since we only have users' Github ID from github.com.
def id_for_github_id(id)
gitlab_id =
if project.github_enterprise_import?
nil
else
query_id_for_github_id(id)
end
Gitlab::Cache::Import::Caching.write(ID_CACHE_KEY % id, gitlab_id)
end
# Queries and caches the GitLab user ID for a GitHub email, if one was
# found.
def id_for_github_email(email)
gitlab_id = query_id_for_github_email(email) || nil
Gitlab::Cache::Import::Caching.write(ID_FOR_EMAIL_CACHE_KEY % email, gitlab_id)
end
# rubocop: disable CodeReuse/ActiveRecord
def query_id_for_github_id(id)
User.by_provider_and_extern_uid(:github, id).select(:id).first&.id
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def query_id_for_github_email(email)
User.by_any_email(email).pick(:id)
end
# rubocop: enable CodeReuse/ActiveRecord
# Reads an ID from the cache.
#
# The return value is an Array with two values:
#
# 1. A boolean indicating if the key was present or not.
# 2. The ID as an Integer, or nil in case no ID could be found.
def read_id_from_cache(key)
value = Gitlab::Cache::Import::Caching.read(key)
exists = !value.nil?
number = value.to_i
# The cache key may be empty to indicate a previously looked up user for
# which we couldn't find an ID.
[exists, number > 0 ? number : nil]
end
private
def read_email_from_cache(username)
Gitlab::Cache::Import::Caching.read(email_cache_key(username))
end
def read_etag_from_cache(username)
Gitlab::Cache::Import::Caching.read(etag_cache_key(username))
end
def email_fetched_for_project?(username)
email_fetched_for_project_cache_key = email_fetched_for_project_cache_key(username)
Gitlab::Cache::Import::Caching.read(email_fetched_for_project_cache_key)
end
def fetch_email_from_github(username, etag: nil)
log(EMAIL_API_CALL_LOGGING_MESSAGE[etag.present?], username: username)
user = client.user(username, { headers: { 'If-None-Match' => etag }.compact })
user[:email] || '' if user
end
def cache_email!(username, email)
return unless email
Gitlab::Cache::Import::Caching.write(email_cache_key(username), email)
end
def cache_etag!(username)
etag = client.octokit.last_response.headers[:etag]
Gitlab::Cache::Import::Caching.write(etag_cache_key(username), etag)
end
def set_project_as_checked!(username)
Gitlab::Cache::Import::Caching.write(email_fetched_for_project_cache_key(username), 1)
end
def clear_caches!(username)
Gitlab::Cache::Import::Caching.expire(etag_cache_key(username), 0)
Gitlab::Cache::Import::Caching.expire(email_fetched_for_project_cache_key(username), 0)
end
def email_cache_key(username)
EMAIL_FOR_USERNAME_CACHE_KEY % username
end
def etag_cache_key(username)
USERNAME_ETAG_CACHE_KEY % username
end
def email_fetched_for_project_cache_key(username)
format(EMAIL_FETCHED_FOR_PROJECT_CACHE_KEY, project: project.id, username: username)
end
def log(message, username: nil)
Logger.info(
project_id: project.id,
class: self.class.name,
username: username,
message: message
)
end
end
end
end
|