Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-foss.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLin Jen-Shin <godfat@godfat.org>2017-08-08 14:05:40 +0300
committerLin Jen-Shin <godfat@godfat.org>2017-08-08 14:10:46 +0300
commitd831e8e1d06c11e9dc9c0c36767b005a3b86a308 (patch)
tree006f43816dd690c6d2aa4e7d9f0ee24c9bf3d9bf /spec/lib/gitlab/encoding_helper_spec.rb
parentb8ba0efed017b79647421d5ac38c539096b319cc (diff)
Add a test to show that threshold 40 would corrupt
If we set `ENCODING_CONFIDENCE_THRESHOLD` to 40, this test case would not pass. If we raise to 50, this would pass. Note that if in the future rugged didn't return corrupt data, this would be less relevant. But still icu recommend the threshold to be 50, we should just stick with 50.
Diffstat (limited to 'spec/lib/gitlab/encoding_helper_spec.rb')
-rw-r--r--spec/lib/gitlab/encoding_helper_spec.rb44
1 files changed, 44 insertions, 0 deletions
diff --git a/spec/lib/gitlab/encoding_helper_spec.rb b/spec/lib/gitlab/encoding_helper_spec.rb
index 1482ef7132d..26138598651 100644
--- a/spec/lib/gitlab/encoding_helper_spec.rb
+++ b/spec/lib/gitlab/encoding_helper_spec.rb
@@ -30,6 +30,50 @@ describe Gitlab::EncodingHelper do
it 'leaves binary string as is' do
expect(ext_class.encode!(binary_string)).to eq(binary_string)
end
+
+ context 'with corrupted diff' do
+ let(:corrupted_diff) do
+ with_empty_bare_repository do |repo|
+ content = File.read(Rails.root.join(
+ 'spec/fixtures/encoding/Japanese.md').to_s)
+ commit_a = commit(repo, 'Japanese.md', content)
+ commit_b = commit(repo, 'Japanese.md',
+ content.sub('[TODO: Link]', '[現在作業中です: Link]'))
+
+ repo.diff(commit_a, commit_b).each_line.map(&:content).join
+ end
+ end
+
+ let(:cleaned_diff) do
+ corrupted_diff.dup.force_encoding('UTF-8')
+ .encode!('UTF-8', invalid: :replace, replace: '')
+ end
+
+ let(:encoded_diff) do
+ described_class.encode!(corrupted_diff.dup)
+ end
+
+ it 'does not corrupt data but remove invalid characters' do
+ expect(encoded_diff).to eq(cleaned_diff)
+ end
+
+ def commit(repo, path, content)
+ oid = repo.write(content, :blob)
+ index = repo.index
+
+ index.read_tree(repo.head.target.tree) unless repo.empty?
+
+ index.add(path: path, oid: oid, mode: 0100644)
+
+ Rugged::Commit.create(
+ repo,
+ tree: index.write_tree(repo),
+ message: "Update #{path}",
+ parents: repo.empty? ? [] : [repo.head.target].compact,
+ update_ref: 'HEAD'
+ )
+ end
+ end
end
describe '#encode_utf8' do