From 38dadcee569adfbbb1c9dc99634bba4e9a9128bc Mon Sep 17 00:00:00 2001 From: GitLab Bot Date: Wed, 29 Mar 2023 23:49:08 +0000 Subject: Add latest changes from gitlab-org/security/gitlab@15-9-stable-ee --- .../nullify_last_error_from_project_mirror_data.rb | 17 ++++ lib/gitlab/url_sanitizer.rb | 90 ++++++++++++++-------- 2 files changed, 76 insertions(+), 31 deletions(-) create mode 100644 lib/gitlab/background_migration/nullify_last_error_from_project_mirror_data.rb (limited to 'lib') diff --git a/lib/gitlab/background_migration/nullify_last_error_from_project_mirror_data.rb b/lib/gitlab/background_migration/nullify_last_error_from_project_mirror_data.rb new file mode 100644 index 00000000000..6ea5c17353b --- /dev/null +++ b/lib/gitlab/background_migration/nullify_last_error_from_project_mirror_data.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # Nullifies last_error value from project_mirror_data table as they + # potentially included sensitive data. + # https://gitlab.com/gitlab-org/security/gitlab/-/merge_requests/3041 + class NullifyLastErrorFromProjectMirrorData < BatchedMigrationJob + feature_category :source_code_management + operation_name :update_all + + def perform + each_sub_batch { |rel| rel.update_all(last_error: nil) } + end + end + end +end diff --git a/lib/gitlab/url_sanitizer.rb b/lib/gitlab/url_sanitizer.rb index e3bf11b00b4..79e124a58f5 100644 --- a/lib/gitlab/url_sanitizer.rb +++ b/lib/gitlab/url_sanitizer.rb @@ -2,15 +2,37 @@ module Gitlab class UrlSanitizer + include Gitlab::Utils::StrongMemoize + ALLOWED_SCHEMES = %w[http https ssh git].freeze ALLOWED_WEB_SCHEMES = %w[http https].freeze + SCHEMIFIED_SCHEME = 'glschemelessuri' + SCHEMIFY_PLACEHOLDER = "#{SCHEMIFIED_SCHEME}://".freeze + # URI::DEFAULT_PARSER.make_regexp will only match URLs with schemes or + # relative URLs. This section will match schemeless URIs with userinfo + # e.g. user:pass@gitlab.com but will not match scp-style URIs e.g. + # user@server:path/to/file) + # + # The userinfo part is very loose compared to URI's implementation so we + # also match non-escaped userinfo e.g foo:b?r@gitlab.com which should be + # encoded as foo:b%3Fr@gitlab.com + URI_REGEXP = %r{ + (?: + #{URI::DEFAULT_PARSER.make_regexp(ALLOWED_SCHEMES)} + | + (?:(?:(?!@)[%#{URI::REGEXP::PATTERN::UNRESERVED}#{URI::REGEXP::PATTERN::RESERVED}])+(?:@)) + (?# negative lookahead ensures this isn't an SCP-style URL: [host]:[rel_path|abs_path] server:path/to/file) + (?!#{URI::REGEXP::PATTERN::HOST}:(?:#{URI::REGEXP::PATTERN::REL_PATH}|#{URI::REGEXP::PATTERN::ABS_PATH})) + #{URI::REGEXP::PATTERN::HOSTPORT} + ) + }x def self.sanitize(content) - regexp = URI::DEFAULT_PARSER.make_regexp(ALLOWED_SCHEMES) - - content.gsub(regexp) { |url| new(url).masked_url } - rescue Addressable::URI::InvalidURIError - content.gsub(regexp, '') + content.gsub(URI_REGEXP) do |url| + new(url).masked_url + rescue Addressable::URI::InvalidURIError + '' + end end def self.valid?(url, allowed_schemes: ALLOWED_SCHEMES) @@ -37,34 +59,45 @@ module Gitlab @url = parse_url(url) end + def credentials + @credentials ||= { user: @url.user.presence, password: @url.password.presence } + end + + def user + credentials[:user] + end + def sanitized_url - @sanitized_url ||= safe_url.to_s + safe_url = @url.dup + safe_url.password = nil + safe_url.user = nil + reverse_schemify(safe_url.to_s) end + strong_memoize_attr :sanitized_url def masked_url url = @url.dup url.password = "*****" if url.password.present? url.user = "*****" if url.user.present? - url.to_s - end - - def credentials - @credentials ||= { user: @url.user.presence, password: @url.password.presence } - end - - def user - credentials[:user] + reverse_schemify(url.to_s) end + strong_memoize_attr :masked_url def full_url - @full_url ||= generate_full_url.to_s + return reverse_schemify(@url.to_s) unless valid_credentials? + + url = @url.dup + url.password = encode_percent(credentials[:password]) if credentials[:password].present? + url.user = encode_percent(credentials[:user]) if credentials[:user].present? + reverse_schemify(url.to_s) end + strong_memoize_attr :full_url private def parse_url(url) - url = url.to_s.strip - match = url.match(%r{\A(?:git|ssh|http(?:s?))\://(?:(.+)(?:@))?(.+)}) + url = schemify(url.to_s.strip) + match = url.match(%r{\A(?:(?:#{SCHEMIFIED_SCHEME}|git|ssh|http(?:s?)):)?//(?:(.+)(?:@))?(.+)}o) raw_credentials = match[1] if match if raw_credentials.present? @@ -83,24 +116,19 @@ module Gitlab url end - def generate_full_url - return @url unless valid_credentials? - - @url.dup.tap do |generated| - generated.password = encode_percent(credentials[:password]) if credentials[:password].present? - generated.user = encode_percent(credentials[:user]) if credentials[:user].present? - end + def schemify(url) + # Prepend the placeholder scheme unless the URL has a scheme or is relative + url.prepend(SCHEMIFY_PLACEHOLDER) unless url.starts_with?(%r{(?:#{URI::REGEXP::PATTERN::SCHEME}:)?//}o) + url end - def safe_url - safe_url = @url.dup - safe_url.password = nil - safe_url.user = nil - safe_url + def reverse_schemify(url) + url.slice!(SCHEMIFY_PLACEHOLDER) if url.starts_with?(SCHEMIFY_PLACEHOLDER) + url end def valid_credentials? - credentials && credentials.is_a?(Hash) && credentials.any? + credentials.is_a?(Hash) && credentials.values.any? end def encode_percent(string) -- cgit v1.2.3