blob: 79e124a58f55c6a840e19bda6064dcd5cc5b205b (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
# frozen_string_literal: true
module Gitlab
class UrlSanitizer
include Gitlab::Utils::StrongMemoize
ALLOWED_SCHEMES = %w[http https ssh git].freeze
ALLOWED_WEB_SCHEMES = %w[http https].freeze
SCHEMIFIED_SCHEME = 'glschemelessuri'
SCHEMIFY_PLACEHOLDER = "#{SCHEMIFIED_SCHEME}://".freeze
# URI::DEFAULT_PARSER.make_regexp will only match URLs with schemes or
# relative URLs. This section will match schemeless URIs with userinfo
# e.g. user:pass@gitlab.com but will not match scp-style URIs e.g.
# user@server:path/to/file)
#
# The userinfo part is very loose compared to URI's implementation so we
# also match non-escaped userinfo e.g foo:b?r@gitlab.com which should be
# encoded as foo:b%3Fr@gitlab.com
URI_REGEXP = %r{
(?:
#{URI::DEFAULT_PARSER.make_regexp(ALLOWED_SCHEMES)}
|
(?:(?:(?!@)[%#{URI::REGEXP::PATTERN::UNRESERVED}#{URI::REGEXP::PATTERN::RESERVED}])+(?:@))
(?# negative lookahead ensures this isn't an SCP-style URL: [host]:[rel_path|abs_path] server:path/to/file)
(?!#{URI::REGEXP::PATTERN::HOST}:(?:#{URI::REGEXP::PATTERN::REL_PATH}|#{URI::REGEXP::PATTERN::ABS_PATH}))
#{URI::REGEXP::PATTERN::HOSTPORT}
)
}x
def self.sanitize(content)
content.gsub(URI_REGEXP) do |url|
new(url).masked_url
rescue Addressable::URI::InvalidURIError
''
end
end
def self.valid?(url, allowed_schemes: ALLOWED_SCHEMES)
return false unless url.present?
return false unless url.is_a?(String)
uri = Addressable::URI.parse(url.strip)
allowed_schemes.include?(uri.scheme)
rescue Addressable::URI::InvalidURIError
false
end
def self.valid_web?(url)
valid?(url, allowed_schemes: ALLOWED_WEB_SCHEMES)
end
def initialize(url, credentials: nil)
%i[user password].each do |symbol|
credentials[symbol] = credentials[symbol].presence if credentials&.key?(symbol)
end
@credentials = credentials
@url = parse_url(url)
end
def credentials
@credentials ||= { user: @url.user.presence, password: @url.password.presence }
end
def user
credentials[:user]
end
def sanitized_url
safe_url = @url.dup
safe_url.password = nil
safe_url.user = nil
reverse_schemify(safe_url.to_s)
end
strong_memoize_attr :sanitized_url
def masked_url
url = @url.dup
url.password = "*****" if url.password.present?
url.user = "*****" if url.user.present?
reverse_schemify(url.to_s)
end
strong_memoize_attr :masked_url
def full_url
return reverse_schemify(@url.to_s) unless valid_credentials?
url = @url.dup
url.password = encode_percent(credentials[:password]) if credentials[:password].present?
url.user = encode_percent(credentials[:user]) if credentials[:user].present?
reverse_schemify(url.to_s)
end
strong_memoize_attr :full_url
private
def parse_url(url)
url = schemify(url.to_s.strip)
match = url.match(%r{\A(?:(?:#{SCHEMIFIED_SCHEME}|git|ssh|http(?:s?)):)?//(?:(.+)(?:@))?(.+)}o)
raw_credentials = match[1] if match
if raw_credentials.present?
url.sub!("#{raw_credentials}@", '')
user, _, password = raw_credentials.partition(':')
@credentials ||= {}
@credentials[:user] = user.presence if @credentials[:user].blank?
@credentials[:password] = password.presence if @credentials[:password].blank?
end
url = Addressable::URI.parse(url)
url.password = password if password.present?
url.user = user if user.present?
url
end
def schemify(url)
# Prepend the placeholder scheme unless the URL has a scheme or is relative
url.prepend(SCHEMIFY_PLACEHOLDER) unless url.starts_with?(%r{(?:#{URI::REGEXP::PATTERN::SCHEME}:)?//}o)
url
end
def reverse_schemify(url)
url.slice!(SCHEMIFY_PLACEHOLDER) if url.starts_with?(SCHEMIFY_PLACEHOLDER)
url
end
def valid_credentials?
credentials.is_a?(Hash) && credentials.values.any?
end
def encode_percent(string)
# CGI.escape converts spaces to +, but this doesn't work for git clone
CGI.escape(string).gsub('+', '%20')
end
end
end
|