Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorSean McGivern <sean@mcgivern.me.uk>2017-09-06 18:55:35 +0300
committerSean McGivern <sean@mcgivern.me.uk>2017-09-06 18:55:35 +0300
commitba39b26cc2f1ace540177c3b73d64d40b06bc902 (patch)
tree74691ce0b45a6b2d35efc58cdbfa460dc0ddef22 /lib
parent93e1d4dd285c657a3abb09dff7f86e552b0097f2 (diff)
parent46f6092a6d4dd39bfa193d0a6ccbd5688df7eebe (diff)
Merge branch '35942_api_binary_encoding' into 'master'
API fix for non UTF-8 data Closes #35942 See merge request !14038
Diffstat (limited to 'lib')
-rw-r--r--lib/api/commits.rb2
-rw-r--r--lib/api/entities.rb3
-rw-r--r--lib/gitlab/encoding_helper.rb17
-rw-r--r--lib/gitlab/git/blob.rb12
-rw-r--r--lib/gitlab/git/diff.rb16
5 files changed, 38 insertions, 12 deletions
diff --git a/lib/api/commits.rb b/lib/api/commits.rb
index ea78737288a..4b8d248f5f7 100644
--- a/lib/api/commits.rb
+++ b/lib/api/commits.rb
@@ -104,7 +104,7 @@ module API
not_found! 'Commit' unless commit
- commit.raw_diffs.to_a
+ present commit.raw_diffs.to_a, with: Entities::RepoDiff
end
desc "Get a commit's comments" do
diff --git a/lib/api/entities.rb b/lib/api/entities.rb
index 9114b69606b..1d224d7bc21 100644
--- a/lib/api/entities.rb
+++ b/lib/api/entities.rb
@@ -291,10 +291,11 @@ module API
end
class RepoDiff < Grape::Entity
- expose :old_path, :new_path, :a_mode, :b_mode, :diff
+ expose :old_path, :new_path, :a_mode, :b_mode
expose :new_file?, as: :new_file
expose :renamed_file?, as: :renamed_file
expose :deleted_file?, as: :deleted_file
+ expose :json_safe_diff, as: :diff
end
class ProtectedRefAccess < Grape::Entity
diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb
index 8ddc91e341d..7b3483a7f96 100644
--- a/lib/gitlab/encoding_helper.rb
+++ b/lib/gitlab/encoding_helper.rb
@@ -22,10 +22,10 @@ module Gitlab
# return message if message type is binary
detect = CharlockHolmes::EncodingDetector.detect(message)
- return message.force_encoding("BINARY") if detect && detect[:type] == :binary
+ return message.force_encoding("BINARY") if detect_binary?(message, detect)
- # force detected encoding if we have sufficient confidence.
if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD
+ # force detected encoding if we have sufficient confidence.
message.force_encoding(detect[:encoding])
end
@@ -36,6 +36,19 @@ module Gitlab
"--broken encoding: #{encoding}"
end
+ def detect_binary?(data, detect = nil)
+ detect ||= CharlockHolmes::EncodingDetector.detect(data)
+ detect && detect[:type] == :binary && detect[:confidence] == 100
+ end
+
+ def detect_libgit2_binary?(data)
+ # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
+ # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
+ # which is what we use below to keep a consistent behavior.
+ detect = CharlockHolmes::EncodingDetector.new(8000).detect(data)
+ detect && detect[:type] == :binary
+ end
+
def encode_utf8(message)
detect = CharlockHolmes::EncodingDetector.detect(message)
if detect && detect[:encoding]
diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb
index 7780f4e4d4f..8d96826f6ee 100644
--- a/lib/gitlab/git/blob.rb
+++ b/lib/gitlab/git/blob.rb
@@ -42,14 +42,6 @@ module Gitlab
end
end
- def binary?(data)
- # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
- # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
- # which is what we use below to keep a consistent behavior.
- detect = CharlockHolmes::EncodingDetector.new(8000).detect(data)
- detect && detect[:type] == :binary
- end
-
# Returns an array of Blob instances, specified in blob_references as
# [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the
# full blob contents are returned. If blob_size_limit >= 0 then each blob will
@@ -65,6 +57,10 @@ module Gitlab
end
end
+ def binary?(data)
+ EncodingHelper.detect_libgit2_binary?(data)
+ end
+
private
# Recursive search of blob id by path
diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb
index ce3d65062e8..a23c8cf0dd1 100644
--- a/lib/gitlab/git/diff.rb
+++ b/lib/gitlab/git/diff.rb
@@ -116,6 +116,15 @@ module Gitlab
filtered_opts
end
+
+ # Return a binary diff message like:
+ #
+ # "Binary files a/file/path and b/file/path differ\n"
+ # This is used when we detect that a diff is binary
+ # using CharlockHolmes when Rugged treats it as text.
+ def binary_message(old_path, new_path)
+ "Binary files #{old_path} and #{new_path} differ\n"
+ end
end
def initialize(raw_diff, expanded: true)
@@ -190,6 +199,13 @@ module Gitlab
@collapsed = true
end
+ def json_safe_diff
+ return @diff unless detect_binary?(@diff)
+
+ # the diff is binary, let's make a message for it
+ Diff.binary_message(@old_path, @new_path)
+ end
+
private
def init_from_rugged(rugged)