diff options
Diffstat (limited to 'lib/gitlab/encoding_helper.rb')
-rw-r--r-- | lib/gitlab/encoding_helper.rb | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb index f26ab6e3ed1..34c674c3003 100644 --- a/lib/gitlab/encoding_helper.rb +++ b/lib/gitlab/encoding_helper.rb @@ -71,6 +71,21 @@ module Gitlab encode_utf8(data, replace: UNICODE_REPLACEMENT_CHARACTER) end + # This method escapes unsupported UTF-8 characters instead of deleting them + def encode_utf8_with_escaping!(message) + return encode!(message) if Feature.disabled?(:escape_gitaly_refs) + + message = force_encode_utf8(message) + return message if message.valid_encoding? + + unless message.valid_encoding? + message = message.chars.map { |char| char.valid_encoding? ? char : escape_chars(char) }.join + end + + # encode and clean the bad chars + message.replace clean(message) + end + def encode_utf8(message, replace: "") message = force_encode_utf8(message) return message if message.valid_encoding? @@ -145,6 +160,15 @@ module Gitlab message.force_encoding("UTF-8") end + # Escapes \x80 - \xFF characters not supported by UTF-8 + def escape_chars(char) + bytes = char.bytes + + return char unless bytes.one? + + "%#{bytes.first.to_s(16).upcase}" + end + def clean(message, replace: "") message.encode( "UTF-16BE", |