Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/nextcloud/text.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRaul <raul@nextcloud.com>2022-05-16 19:56:25 +0300
committerJonas (Rebase PR Action) <jonas@freesources.org>2022-06-09 11:26:30 +0300
commit9f1b1be2ecb9a80ead6efda20263699254ae684a (patch)
tree6ad723ff0dabf99d69c91d7be1f65c282a27c355
parentf3df3f187313e85319054ed163cadd4389487d79 (diff)
Add EncodingService test cases
Signed-off-by: Raul <raul@nextcloud.com>
-rw-r--r--lib/Service/EncodingService.php52
-rw-r--r--tests/data/big5.txt40
-rw-r--r--tests/data/cp936.txt1
-rw-r--r--tests/data/gbk.txt22
-rw-r--r--tests/data/iso-8859-15.txt (renamed from tests/data/iso-8859.txt)0
-rw-r--r--tests/data/iso-8859-5.txt12
-rw-r--r--tests/data/utf-16.txtbin0 -> 1430 bytes
-rw-r--r--tests/unit/Service/EncodingServiceTest.php52
8 files changed, 91 insertions, 88 deletions
diff --git a/lib/Service/EncodingService.php b/lib/Service/EncodingService.php
index 0ccc8a00c..3f5bada06 100644
--- a/lib/Service/EncodingService.php
+++ b/lib/Service/EncodingService.php
@@ -26,16 +26,15 @@ declare(strict_types=1);
namespace OCA\Text\Service;
class EncodingService {
- public const COMMON_ENCODINGS = ['UTF-8', 'GB2312', 'GBK', 'BIG-5', 'SJIS-win', 'EUC-JP', 'Windows-1252', 'ISO-8859-15', 'ISO-8859-1', 'ASCII'];
-
- public const UTF_BOMs = [
- 'UTF-32BE' => "\x00\x00\xfe\xff",
- 'UTF-32LE' => "\xff\xfe\x00\x00",
- 'UTF-16BE' => "\xfe\xff",
- 'UTF-16LE' => "\xff\xfe",
- 'UTF-8' => "\xef\xbb\xbf"
- ];
+ public const COMMON_ENCODINGS = [ 'UTF-8', 'GB2312', 'GBK', 'BIG-5', 'SJIS-win', 'EUC-JP', 'Windows-1252', 'ISO-8859-15', 'ISO-8859-1', 'ASCII'];
+ public const UTF_BOMs = [
+ 'UTF-32BE' => "\x00\x00\xfe\xff",
+ 'UTF-32LE' => "\xff\xfe\x00\x00",
+ 'UTF-16BE' => "\xfe\xff",
+ 'UTF-16LE' => "\xff\xfe",
+ 'UTF-8' => "\xef\xbb\xbf"
+ ];
public function encodeToUtf8(string $string): ?string {
$encoding = $this->detectEncoding($string);
@@ -47,37 +46,36 @@ class EncodingService {
}
public function detectEncoding(string $string): ?string {
- $bom_detect = $this->detectUtfBom($string);
- if ($bom_detect) {
- return $bom_detect;
- }
+ $bomDetect = $this->detectUtfBom($string);
+ if ($bomDetect) {
+ return $bomDetect;
+ }
- $encodings = $this->getEncodings();
- foreach ($encodings as $encoding) {
+ foreach ($this->getEncodings() as $encoding) {
if (mb_check_encoding($string, $encoding)) {
return $encoding;
}
}
- return null;
+ return mb_detect_encoding($string, $this->getEncodings(), true) ?: null;
}
- public function detectUtfBom(string $string): ?string {
- foreach (self::UTF_BOMs as $encoding => $utf_bom) {
- $bom = substr($string, 0, strlen($utf_bom));
- if ($bom === $utf_bom) {
- return $encoding;
- }
- }
+ private function detectUtfBom(string $string): ?string {
+ foreach (self::UTF_BOMs as $encoding => $utfBom) {
+ $bom = substr($string, 0, strlen($utfBom));
+ if ($bom === $utfBom) {
+ return $encoding;
+ }
+ }
- return null;
- }
+ return null;
+ }
/**
* @return string[]
*/
private function getEncodings(): array {
- $mb_order = mb_detect_order() ?: [];
- return array_merge($mb_order, self::COMMON_ENCODINGS);
+ $mbOrder = mb_detect_order() ?: [];
+ return array_merge($mbOrder, self::COMMON_ENCODINGS);
}
}
diff --git a/tests/data/big5.txt b/tests/data/big5.txt
index f507065cd..0a67d2cb8 100644
--- a/tests/data/big5.txt
+++ b/tests/data/big5.txt
@@ -1,30 +1,24 @@
-Ĥ@
+B C E V T W U F O M B A @ C X P c ˱ Y D d @ D [ G \ E G F A A E ^ ] K ܱ ܲ F G H _ Q E ` F G ܳ H R T S J K L M I P j f i Q a h N O H e g ɰ ɺ ɮ ɲ ɱ ɵ ɹ ɶ ɳ ɫ ɻ ɸ ɯ ɬ ɴ ɷ ɭ f B g D [ ` h d G ] ˽ C b ^ Z e E H @ F c A i \ _ a ˿ ˹ ˸ ˼ ˳ ˵ ˶ ˻ ˴ ˷ ˺ ˾ ˲ V J I Q ] K Y P X T [ N W M U R Z O \ S L H [ c W \ b U Y R V g Q f X S O ] P N Z ` a [ ^ M _ d L T e ^ W _ U X Y ] S R Q \ Z T V ܷ ܽ ܺ ܿ ܴ ܵ ܾ ܼ ܸ ܶ ܻ ܹ v w u x _ l k I a S R J I K ^ Y Z \ [ ` ] ^ J K @ L N T U A j ` _ U a V B ɼ M N k h a @ L Z B Y W X M N b ɾ Z k F ɿ D E ɽ G C l m n P O S Q R c h d g f e b i k j l A B Z @ A B D C O \ [ H U V T W o p l k i j n m C D E y U c V ] ^ I q m c b P _ C l ` K J X o E Q d c b m n R e d r W g f L M s Y Z o D t u p p d e F F X h
-HHͦӦۥѡAbLYMvQW@ߥCL̽ᦳzʩM}ߡAHSY믫۹ݡC
-ĤG
+S e q O f j y t o n u s l z m i x w v k r q { p S Y U [ X N Q T P W Z O R V \ ~ { g | [ ] u p ʥ } _ a ʤ h x t v \ m v s d n o w l j k q ʡ ^ r ʣ f c z b ʦ e i ` ʢ y O P I K M L E A D I R C F H Q M S J G B @ N H K x t } r C O y E B w J | L s ~ { @ F G z v u D N D @ B C A C F B @ G ԡ E D A w ԧ | Ԣ v { z ԥ Ԩ s ԩ ~ Ԧ Ԫ t Ԥ u x } y ԣ q r j u x q n v { o f s m y z l r t h w g i p _ U Q K T N P S I L J O H M R G M G D G S T J B L R F I H H C E K A k O N P @ E B A C D @ Q F { } ~ z | P ` V Q A Y @ X W Z m o n I } | _ ^ ] y ʩ ʨ w z ʧ x T H I E F Ԭ ԫ | I H G k g Y n o b f d c e a ` } ʪ ʫ ʭ { ʮ ʬ ~ | ʯ Y [ Z @ X W V \ A U T Z X ^ U Y [ ] W V Q R S \ P S R W N Q P T X G J O U I J Ժ V M H L Ա ԯ Խ Կ Դ Լ Ծ Թ Բ ئ ԰ Է ԭ Ե Գ Ի Զ K Ը Ԯ ء ت ة آ ؤ ب أ إ } ز ر خ K ث ح ~ ذ د س ج ا ] c _ f W \ U [ d Z ` e V ^ b h X a g Y Y K W V M R N Q \ [ J P Z O L X M O J L N P I K S R a b c B [ h _ i S Z ʰ B ` Y L i ~ p g h ] j [ q i j ʱ a C _ ` ^ Z a b M N O ش j ] k l o n q p m k ʶ ʹ ʸ
-HHɨŨҸ@vQMۥѡAرڡBBʧOByBvСBFvΨLѡByΪ|XB]BXͩΨLϧOC
-ĤT
+ ʵ ʳ ʴ ʻ ʷ ʲ ʺ g o O H p S D K f E d L P c Q J M r i T R n l I k G F j h q m e N o g c s b l p V v d f m q u r k n h j i t e \ b [ ` P U _ \ a Q [ T R c S W X Z Y ] ^ d @ C D B A @ ػ ظ ؽ B G C ض A D غ ط ع ؾ ؼ E ؿ ص B @ C D @ F A A B E n z p v k y x | u t m { o r w q l s U a X W Z \ _ V T ] [ Y _ ^ c ^ ` b ` W V U X Q R Z S \ [ T Y T \ ] C l m n r s s U u t V ] e E G F } ^ p r s v t u w ʼ ʽ W X v x z w { y f w h g c _ ` b d a f e J I H G K F I K H J H I J ~ f g e d ] d _ o x i g N M L L M p \ t y | K q ʾ ʿ N L r u Y ~ Z } x j h k i j ^ P Q O h i a K s w v z h b ] e c \ i l g ` f a d [ _ k ^ j Ϋ Τ Ϊ Σ Υ } { ά Ω y Χ Ψ Φ | z ΢ ~ Ρ έ o n l k n p o s q p ή r m l m q r S R T X A Z V ^ [ U C W B \ ] Y D @ Q R O O P S V N P U T C R D M Q e g k h c b l j j m d i k f a f ` e ^ h d i c _ g j b r D E ` r q t { _ u t W v w x ί S y ] x | n m o γ ΰ α β δ t s v u b F a c ` U E V W T Z \ E [ Y X q o m p n l m k l n n z ~ } F X y z θ η ι ζ κ y u w w x x v ε G J K H g f d e I h Z [ \ ] _ a H G Y ` ^ p s r t q t u o s s q p r o f F G U { λ | I } q r s p μ z z y i L j M ] b
+
+ u v t s t ν k J U { m C n l ^ v L w v u ξ } | { O o r p N u q P t s a _ ` K d L c w x w u @ H I { x y ̯ ̢ ~ ̮ ̩ ̪ ̭ ̬ ̣ | ̥ ̰ ̦ ̨ { ̧ z ̫ } ̤ ̡ ο ѡ ~ } | ~ v Ѥ Ѧ Ѩ S Ѭ ѣ x Q Ѯ R ѥ ѩ ѫ Ѫ ѭ ѧ y Ѣ w z U ^ d | e ` V ~ T b e I c ] a { d Y b W X [ _ \ f c Z } k o @ Q m D q e F S i l G H N s T J O C ^ U r A P ] p N M t E j B K M R g L P h \ p h l n k [ j _ Z @ q X i m O f g A W Y V o } G F | E C D z n { H y B z ~ y ~ x x v } w | { z ~ | y { } V g j i h a J b A t | ̱ ѯ u r ` a t v u I W ̲ Ѱ v Q ~ } ̾ ̷ ̳ ̺ ̼ ̿ ̻ ̴ ̸ ̽ ̶ ̹ ̵ A @ ѱ C B E Ѳ D Ѿ Ѵ f ѷ Ѻ } ѽ ѿ Ѹ ѵ Ѷ Ѽ ѻ ѳ g y u r զ w ա { j գ i բ s h x ե q t դ R o x n l ~ w | p m z v T S k d z j Y g w } k n | \ m l ~ U y i _ p h q f e c ] ѹ V ݷ W { y X o x ` [ a ^ p | ݱ ݶ ݪ l ݻ i z { b k ݤ n o ݥ ݲ ݸ j d ݣ } ݺ ݨ ݩ ~ ݴ ݫ ݵ ݭ e h f ݹ ݰ ݬ ݡ S ݯ m ݧ ݦ g c ݳ ݮ ݢ Q L K O b R T N P U J Z M
+
+ Y X ^ \ ] Z k [ B E F D G l C N d M L K c e u r F G է v u x s w t q ݼ V H y ݽ z { r W H | s _ I ը } ~ ݾ Y X J I O ^ J @ P M S K N Q L O R լ ի խ ժ ծ թ ٧ ٢ ٥ ٨ ٦ ٣ ١ ٤ y ݿ v w u { x t z \ Z [ ` n K m Q R f P T ٩ | B A կ z a g C G B E @ A A @ F D W C M N F X H S I V Q O J P D R U E L T G K [ \ i V L b J [ E e R A D Q a ` F X _ ` c Z K S f Y a m V X C j c ] @ l g I k P H d \ T ^ b G Z Y O _ U W h ] N M B ^ W U մ յ չ վ ս հ ձ ղ ճ պ ռ շ ջ ն տ ٽ ٫ ٳ ٭ ٻ ٶ ٰ ٵ ٯ ٱ ٺ ٷ ٴ ٬ ټ پ ٪ ٲ ٹ ٸ ٮ ո ~ ٿ J H ^ F X } _ B ] G U d ] [ @ Z o Q a m I ^ K Y g D k a M C W h ` e S f E P L N ` _ n O b T c l j A V i b R \ E A H I
+
+ D J @ G C F B c h i b f e g d Q N W V T O r P q S p X R M o L V U U h Y Z T X S W v V Y d p c e q d K j Y w B Z [ n k \ e o f p f g h L l Z _ q g i j ` C H r h s i j B A C @ @ A A B @ k M [ ] a ~ I J ^ t k l D B r ɡ ɤ ɣ D ɢ @ E [ Y L Q S L M U R O Q V Z X Z K M \ T W E G ^ U N J Y V H I C O P [ ] P N S \ W R ] F T K X D j z q K b e B m o v h f g u G p n s J u y c I M O @ l k } r u x | A F ~ w i _ d ` N { t a L | ϡ Ϥ w ϧ Ϫ Ϭ t v { I ϥ ϭ { s d ~ Ϣ x z } } p Ϩ ϫ z m x ϩ o ^ H | w v n ϣ y q r Ϧ y ~ L C U [ W J M F G J V _ E @ N B O Y D h H H E f Z g a S b \ e c I T A G ` F Q C i P K K X ] e R P G [ U G D g d X c N O I E @ Q Y B D ^ F \ S H F J h b _ ] f a R ` A E W V T L K C M A Z I M D J C U V H D B S K Q W A G E B C O L T @ F G F E P N R @ a ` F _ I J h ^ C R H K c j b W i U L Y e T M P Z d G Q [ N E S g V l X f O D ] \ } n w m q s u S } o u ~ | | v t z w x z ~ p y x { t s r { y v D N M Y K O F R T C ^ W [ ` U I L H _ a V \ J E A Z B @ X Q P ] G I @ A H C O B D F E D J G F E B @ A N C Q S Y W Z R V U [ T X P q o p m n s r x _ e y \ v s g w t ^ a b c f ] u d h ` ] j ` k h _ \ ^ b e d g [ i c f i a x y
+
+ W e l A ^ _ b _ ` a X Z U R T V S P W Q Y ϯ ϳ ϶ ϲ ϱ ϴ ϵ Ϯ ϰ w x y P L n v { Q l r k u q M O z j m s t | p N m N P L X J W i H [ R l S V Z O T j k Y M I [ Q U K H I e O Y b X L ` ^ _ J c \ Z K ] a M d p w y ޡ k z ޢ } m ~ l x ޣ q | o v r n u N { s t g d p j l f n m k q h o c e b r i J Q U S K I L M H U V G V Q O L P N R R M N O P K T S W X T \ b ` ^ a ] _ w t u v l m z k j i { l j k y R n b Ϸ } ϸ Ϲ f P ޤ | g B e d c ` [ Ϻ Ͻ ϻ ϼ Ң ҡ ~ S ] ^ o \ _ R p Q k j h i l ަ ޥ ީ ި ާ s t Y Z r } q p n o l ɥ ɦ C D f b a e g c f g d _ Ͼ ] d e a b \ ^ c ` Ͽ Ҩ ҥ ҧ X W U Ҥ ҩ T V Ҧ g ң Ҫ b f e n y h c m t s a d u r q ` i p w T v s V u o q t r U x S ޭ ެ ު ޮ ޫ ް ޯ v u ~ } { z w x y | _ \ ] W [ a ` ^ d e c y x ~ á m n m z Y v j ɧ E l j k h h i m k g j f i l h ү Ҵ ҫ Ҷ Ү ҹ Һ Ҭ Ҹ ҵ ҳ ҷ _ ] ұ ҭ Ұ һ Ҳ ^ Z \ x m k l s t p { u r o y n w z q y [ x w v | ~ ڡ ` ڧ ک ڢ Z ڦ ڥ [ a b ڨ X } { ڣ z _ | ڤ ڪ Y ^ \ ] W ޷ ޻ ޱ ޼ ޲ ޳ ޽ ޺ ޸ ޹ ޵ ޴ ޾ ޶ @ b A i f e g f Z c X \ [ d h Y m z j h k n l g B E u @ o F â D { A C G v t ã s n n Ҽ ҽ } ޿ ] ä { o ` Ҿ ҿ ~ ګ i ^ _ r o p q I H | w c d F j i ڬ k l n m r p q ` d c b a { z f e ڮ ڭ B j s æ å | s o p t
-HHvɦͩRBۥѩMHwC
-ĥ|
+ h i n l k j e m f g ֡ ֢ | ~ ֤ ֣ } ڶ k j ڰ h ڳ l ڴ m ڱ g i ڵ ڲ گ E C H I F G D l k s m r o ` q a b p n t w u v M ¡ N } O ~ L P J ç x è o K p ~ } n o G q m ֦ o ֥ ڸ q ڷ p J x Q q p u p r K t R r q r s ֧ ڹ s L d u c y S s n x w v y u v ְ w t ֪ ֩ ֫ ֬ ֮ ֭ ֲ ֨ ֱ ֯ ڼ ھ ں ڻ ڿ ڽ t C F D E A B @ N Q O M P } ~ v z y w f g e x { | h @ { A | z ~ } U ¤ ¥ ¢ £ T { é y z t w u v f o x u G B | x ͡ z | ~ } { | ֹ z y } ~ { ֺ ֳ ֵ ַ ָ ֶ ֻ ִ v w x N Q M L H O P J K I T X V S U R Y W j l i k F E C D V G Z W ¦ [ ] \ X Y ê ~ } z } y q { | ~ r t s r ͣ ͢ ּ ֽ ־ ֿ R S T A C B @ m H I _ ^ § ë š ɨ V r q p ͤ } | z { y A Z X @ W \ [ Y I H D G F ^ _ [ ] Z \ p E r q n o J ` ª ¨ © í ì H s ͥ ͦ @ ~ D ] ^ C B J K L a ` s K « ¬ u A E M v ͧ B C @ B D G E F C H A a P S G L F c J H b O N K M I R _ Q ] X N P U T W R Q S Y [ V O i \ b c e ` h d f g v j t x Q y w { z O N L P M u c a g ° e d ² j ± k h ® i b ¯ ­ f l ò ð ñ î ï ó x w y ţ
-HoϬΥСF@ΦשMRAHTC
+Ţ X Y m ~ ͨ E F D G H I I O M K L N J V d T e U f a ^ ` _ k a | } W S X T V R U ³ z { A @ t ͩ L J K Z S Y R X V U T Q P W l h ] _ a e [ Y j ` d \ X W b Z ^ k i f g c r j x t x e u b w f v p c q s h g d l i m y n o k p y u r v l t s w q n z r m { o ~ _ Y i a ] d g \ e ` Z h c ^ b [ f n t ¹ w ´ µ o v q º · m ¶ s u ¸ r p ÷ ø ô õ ú ö ~ } ġ B @ B A l o j » Ģ C I u M ` [ _ ] ^ \ @ i j n o h k g m @ p z | } ù D O N b a B C A s m l n r q ~ y x û ý ü P e d c D o p ~ C A B { | } k z { ¼ ½ l ģ Q A R S @ B T f A @ q s ͪ C U h g C B D F G E t u E D p o m n q | ¿ ¾ ť Ť w v F ɩ ͫ v w w t v y u { z x x ͭ Ͱ ͬ | ͯ ͮ [ G H ] W Z c a I g L d \ Y I b D e V _ F K ` O M X J ^ N E f j l k i n H o m N E G H P L J M Q F O K I Q O J ߡ N K P M G L w u { s ߢ x r { } v ~ | ~ y x y } | t z L H M J K I A D C @ B t x z w v u s r y } ~ þ Ĥ E Ŧ C D R N { S P T U V O h p W P | R Q E ߣ R ͱ i Q r q W T V S U X Y Z ߦ ߧ ߥ ߨ ߤ S J F I K H G ĥ F } } X [ A J K M N L ˢ ˣ { ˡ | z y } ~ ~ j Ͷ ͵ ͷ ͼ Ͳ ͹ Ϳ ʹ ͺ ; ͸ ͽ ͻ ͳ b \ d a q t ] k V ` c e Т w U С Y W R o ~ s v Х f } ^ x Ф u y | m У { l p _ Z S X T g n ӥ [ z A Ө v ӣ } Ӳ Ӫ ~ ө x | ӵ ӭ Ӥ ӳ t Ӭ s ӫ r \ Ӧ z { ӡ u ӯ Ӯ Ӷ Ӵ Ӱ ӧ Ӣ w ӱ y ^ ` e y ] h o u b i @ w r n j \ a Y f c s d z l k Z _ p v A [ g m x q t l ` } ۧ ۪ h ۣ i w s t ] ۤ ۡ u ۬ p ۯ n z r ۭ k d o c a ۥ j ۨ ۩ ~ v f ^ ۢ ۫ e ۰ q m | x y g { b ۦ ۮ _ U ߵ ߿ ߪ ߲ ߶ ߱ ߫ ߹ ߸ ߼ ߾ ߰ ߴ ߻ ߺ ߬ ߭ ߷ ߳ ߯ ߮
-Ho[HŦDAάIHݧԪBHDΫVdʪݹJΦD@C
+` X [ Y Z ] a U ^ W V T c \ b _ s t g f b v u ߩ _ c ] p a w Z X d n i O m ߽ [ R U { \ S Q N e ` h x | W k o T y L r V j P ^ Y l } z q M I @ C E A G L F U O F J T Q D H B V S P W M K N S @ E R D A M O Q I P B R J G U H T K L V C N ~ ÿ ħ ĩ Ħ Ī Ĭ ĭ ī Ĩ J K I G H L E F G O h Ӹ ӷ @ B | { ~ X Z Y W Щ Ч Ц i k j Ш ӿ A F Ӽ ӽ C ӻ H Ӿ ӹ G D Ӻ E B L ץ K ר ׫ H F ~ ש ק פ ׬ ׭ ׯ װ } E ע ס ׮ G ף I D צ M J ת ۿ ۴ ۽ ۱ ۶ ۺ ۸ ۲ ۵ ۳ ۾ ۼ ۷ ۹ ۻ j e g h m i l f d k d a k g e ` o \ h i _ ^ l b ] c n [ m j f Y ] Z a g \ p j _ k f m ^ ` n X l d c h [ b i e o Į į B E A C D Q O N @ P F M Z n Ъ G l Ы ױ N Ь Ю Э m I J N M K L P ײ U T ׸ R ׳ S ׿ ׻ ׽ ׷ ׾ O ׺ ׹ ׵ ׼ ״ ׶ Q t B A v @ n p r q s o u p | w y q u x { s t z r v r q w s t u x v İ
-HHba観vQӻ{bk߫eHC
-ĤC
+ I K H J Ũ R ŧ H I K J P n W V C F E D y { z ı T S [ Я o C A @ B D I G H } | } IJ L U ũ L q r а б p T R Q X P Y V S W U O _ Y ^ ` Z [ X ] \ D F E I C B @ G A H P M E J Q G O K N L F L x { N M } O K y | z ~ w J ~ ~ A O @ B C ķ ĵ ĸ Ĵ Ķ ij @ N M P Q A V [ Ū X W Z Y C B @ A @ M N g m R P D D Z a T S v b H V U W Q R F E ˤ \ [ I Y Z X G s t ] ^ h f c g e d J L Q S R U O K M T P N [ T S U H I J ĺ Ĺ R B S \ ū Ŭ E B j i \ ] в v u _ X W V _ b ` a e ^ f c d V L N K M T o w l k ` [ ^ Y l ] \ _ Z h o n p m r i k g j q s [ a Y b X ] c ` _ ^ W \ Z A C @ E B F D E A @ C B D R O S Q P T ļ ľ Ľ Ļ C E V D U a ŭ ` Ů ^ ] b c F _ \ Q P O p n ` U y x c a b m n C A E F L H J B I K D G b @ a c u w v { x t y z | g f d e M N I J K L H @ G F G H I X Y W V Z X Y W F d ů e H G d @ | G d A [ ˦ ˥ ж д | г ~ { } е z j g n i l h e k m f p z v ~ w | r o q } u x t y { s M e O g i N f j h G O ~ P E J C B M L K I N } D F H R C A S D B Q P O E U o R S Q T ˪ ˧ ˬ ˨ ˩ ˫ и м й л н п о к q p r з l V W T n S Y X k \ R [ P Z U m Q R n q i m l j p k h o Y H J V W U Q G Z T F I X K L M ] \ [ \ Z f Ű u t @ A s _ a ] ` o ^ p s U T S s u t r a ^ _ M ` [ \ J K ] L O P N R _ Q ^ Ŀ ] H I C ] q o V q B x v z D y w C B @ C A m l j b q e o v n y u c i w h x z k r s w u t f r v t s d g p a W Y e Z \ f [ d b ^ c ` X g ] _ x z ~ | y { w v }
-kߤeHHAævɨkߪO@A[CHHvɨO@AHKHϥŨ[欰Hκʳoت[欰`C
-ĤK
-
-H˪kΪkߩҽᤩLvQDI`ɡAvѦX檺akxoثI`欰@ĪɱϡC
-ĤE
-
-Ho[HNeBTΩvC
-ĤQ
-
-HHavѤ@ӿWߦӵLʪkxi椽M}fTAHTwLvQMqȨçPwLXDƫC
+ R Z U g P O V e T q c d N X t y s o w u h b } W ~ x m k f n { j z S v | r l Q p Y i D A C B @ @ C E E B A D ` Y T c [ e U _ a W X ] b j g k ^ Z h j \ d f i S V s c q a l h r b e t m p i d ` o k u g n B f @ D A C a f O h I d j N J K ` g M e L _ c b ^ i ű m p l n o i j g k h Ų ų K M L N J D S R T _ U ^ V r u t h s r p q w D x v { E F } z y | { ~ { h [ \ M K I J F F N H L G n l m w x E G F P m l k ɪ X V Y W ˮ ˰ ˯ ˭ @ E F ~ | } I H K J G } ܣ ܢ | ~ ܡ j k i G H O I o A G L ܤ ܦ ܥ n o m l P J p t q u s y B ܨ ܧ s p r q ] ^ _ ` Q N K P S L R O M w v x ~ } z { | H I S n Q R o Ŵ ŵ q E G F W C t Q R D a S M ܩ ܫ ܪ u v c b d V U T T A @ } { ~ | y @ z J K p ŷ Ŷ O P H i C B q r W D X A L M T Q N ܬ z | w x { y g e [ f Y Z U [ Y X V Z W E J F I H G D B E C F @ A Q N O P r V U t s Ÿ I ` X ܭ h G B u R S t s u H O ܮ ~ } i \ k j l a _ ^ ] ` \ K ^ ] _ N L M R K Q T S P O J H I C D X W U T Y v Ź w W v V w a Y P U ` W V L n q s r t p m o c f d c i h g b b a e d Z ^ [ ] \ Y _ b ` a @ X c M E F ` ^ ] c a \ Z [ _ b x ~ y [ Z } | Y { X z } ~ { Ż x | y z ź R S J v j k z l u e j m f d k N f d e \ ż b I h v w n q p o g h f e g O P G g i h Ž T U V K c x i J { r z y i S R Q ^ ] L | { } x v w s y t r u | j { z ~ j m l t o s q p n k C B D A u X W U T J K I H l o V m s q k v j r n u t ` _ ſ p ž Z \ _ [ ` Y W ] X ^ M Z \ [ y x w z s t ܯ } ~ o k p l m n z { ~
+
diff --git a/tests/data/cp936.txt b/tests/data/cp936.txt
new file mode 100644
index 000000000..3fea6a77e
--- /dev/null
+++ b/tests/data/cp936.txt
@@ -0,0 +1 @@
+ L M N ʥ O P Q R S T U V W X Y Z [ \ ] ַ ^ _ ` a b c d e ̮ f g h i j k l ̳ ׹ m n o ̹ p ƺ q r s t u v w x y z { | } ~ ¢ ԫ ܤ ܣ ܥ ܡ ܢ ǵ ܧ ܦ ܩ @ A B C D E ܨ F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ܪ ` a b c d ܫ e f g h i j k l m n o p q r s t u v ܯ ܬ w x y z { ܭ | } ~ Ĺ ǽ ܮ ī ʿ ׳ Ҽ Ϧ ҹ ̫ ز ʧ ͷ @ A B C ۼ D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b Ů ū c d e f g h i j k ױ l m n o p q r s t u v w x y z { | } ~ ķ ʼ ί Ҧ ¦ Ȣ @ A B C D E F G H I J K L M N O Ӥ P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g ý h i j k l m n o p q r s t u v w x y z { | } ~ ϱ ɩ @ A B C D E F G H I Т J K L ѧ M N O ث P Q R S T U V W X Y Z [ \ ] լ ^ _ ` a b c d e f ʵ g h i j k l m n o p q r s t u v w x y z { | } ~ Ԣ į կ Ѱ ξ С Ң ʬ β ƨ ʺ м չ ɽ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g Ͽ h i j k l m n o p q r s t u v w x y z { | } ~ ո Ƕ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i Ρ j k l m n o p q r s t u v w x y Ѳ z { | } ~ ˧ ʦ ϣ ֡ ϯ ñ Ļ ƽ ׯ ® Ӧ ͥ ӹ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e ͢ f g h i j إ Ū k l m n ߮ o p q ʽ r ߱ s t u v w x y z { | } ~ ǿ ¼ ͮ Ӱ ͽ ѭ ΢ ߯ ־ æ ̬ @ A B C D E ŭ F G H I ˼ J K L M N Թ O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k ˡ l m n o p q r s t u v w Ϣ ǡ x y z { | } ~ Ϥ ϧ Ω Ը @ A B C D E F G H Ľ I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ ο _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ и ų ܲ Ϸ ս ¾ @ A B C D E F G H I J K L M N O Ǥ ִ P ɨ Ť Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f ץ g Ͷ h i j k l m n o p ̧ q r s t u v w x y z Ĩ { Ѻ | } ~ Ĵ ׾ £ ӵ š ȭ ˩ ƴ ק ʰ ָ ֿ ̢ Ю Ų ͦ ͱ ׽ Ҵ ̽ ° @ A B C D E F G H I J K L Ҿ M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a Ԯ b c d e f g h § i j k l m n o p q r s ɦ t u v w x y z { | } ~ Я ҡ ̯ ˤ ժ Ħ ġ ߡ Ʋ ˺ ߤ ײ ׫ ߢ ߣ ߥ ߦ ߧ ߨ @ A B C D E ߪ F ߩ G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h ߫ i j k l m n o p q r s t u v w x y z { | } ~ ߬ ߭ ֧ Ч ɢ ի б ն ˹ ʩ ּ Ѯ @ A B C ʱ D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ӳ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ɹ ~ Ͼ ů ĺ Ի ҷ @ A B C D E F G H I ˷ J K L M N O P Q R S T U V ľ W δ ĩ X Y Z [ \ ] ^ _ ` a b c ɱ d Ȩ e f g ɼ h i j k l m n o p q r s t u v w x y z { | } ~ ö ֦ ǹ ij Ⱦ դ ջ ˨ У Φ ͩ ɣ @ A B C ׮ D E F G H I J K L M Ͱ N O P Q R S T U V W X ÷ Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x е y z { | } ~ ɭ ֲ ׵ Ҭ Ш @ A B C D E F G H I J K L M N O P Q R ¥ S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o ե p q r s t u v w x y ȶ z { | } ~ ģ ӣ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ ̴ _ ` a ϭ b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ƿ ŷ @ A B C D E F G H I J K L M N O P Ъ Q Ǹ R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g ֹ h i j k l m n o p q r s t u v w x y z { | } ~ ѳ ֳ Ź ĸ ÿ ع ë ձ ̺ ص å ˮ ͡ ֭ ϫ @ A Ѵ B C D E F G H I J K ̭ L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a ɳ b c d e f û g Ž h ĭ i j k l m n o p մ q r s t й u v w Ȫ x y z { | } ~ Ţ ע ̩ Ӿ к ϴ Ǣ dz Ũ ԡ Ϳ ӿ ɬ Һ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h Ԩ i j k l m n o p q r s t u v w x y μ z { | } ~ տ ʪ Դ Ϫ ̲ @ A B C D E F G H I J K L M N O Ư P Q R S T U V W X Y © Z [ \ ] ^ _ ` a b c d e f Į g h i j k l m n o p q r s t u v w x y z { | } ~ Ϋ DZ º ̶ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ¯ Ȳ ̿ ը ˸ @ A B C D E F G H I ϩ J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ȼ ɷ ú ɿ Ϩ Ѭ ȼ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ צ ү س ˬ Ƭ ţ IJ ĵ ǣ Ϭ Ȯ ״ @ A B C D E F G H I J ʨ K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j è k l m n o p q r s t u Գ v w x y z { | } ~ ̡ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l õ m n o p q r s t u v w x y z { | } ~ ɺ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ɪ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ư ȿ @ A ƿ B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ ߰ \ ] ^ _ ` a b c d e ˦ f g h i j k l m n o p q r s t u v η w x y z { | } ~ @ A B C D E F G H I J K L ű M N O P Q R S ƣ T U V W X Y Z [ ֢ Ӹ Ȭ \ ] ^ _ ` a b c d e ʹ f g Ʀ h i j k l m n o p ̵ q r s t u v w x y z { | } ~ ̱ ȳ @ A B C D E F G H I J K L M N O P Q R S T U V W Ѣ X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ƥ @ A ӯ B C D յ E F G H ʢ I J K L M N O P Q R S T U V W X Ŀ Y ä Z ֱ [ \ ] ^ _ ` a b ʡ c d e f ü g h i j k l m n o p q r s t u v w x y z { | գ ѣ } ~ @ A B C ˯ D E F G H I J K L M N O P Q R S T U V غ W X Y Z [ \ ] ^ _ Ϲ ` a b c d e f g h i j k l m n ޫ o p Ƴ q r s ˲ t u v w x ͫ y z { | } ~ հ ì ʸ ֪ @ A B C D E F ʯ G H I J K L M ɰ N O P Q R S T U V W X Y Z ש [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { ˶ | } ~ Ӳ ȷ @ A B C D E µ F G H I J K L M N O P Q R S T U V W X Y Z ̼ [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ĥ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h ʾ i j k l m n o p q r s t u v w x y z { | ף } ~ Ʊ » @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b خ c d ˽ e ͺ f g h i j k l m n o p q r s t u v w x y z { | } ~ ϡ ˰ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ѩ ͻ խ Ҥ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l վ m n o p q r s ͯ t u v w x y z { | } ~ Ц @ A B C D E F G H I J K L M N O P Q R S T Ͳ U V W X ɸ Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p ǩ q r s t u v w x y z { | } ~ @ ׭ ƪ A B C D E F G H I ¨ J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ճ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` Ŵ a b c d e f g h i j ϵ k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Լ γ ɴ ֽ Ŧ ϸ ֯ Ѥ ͳ ά ׺ Ե ӧ ȱ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d Ǽ e f g h i j k l m n o p q r s t Ⱥ u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R ҫ S T U V W ˣ X Y Z [ \ ] ^ _ ` a b c d e f g h i j k Ү l m n o p q r s t u v ְ w x y z { | } ~ Ƹ €  ‚ ƒ „ … † ‡ ˆ ‰ Š ‹ Œ  Ž   ‘ ’ “ ” • – — ˜ ™ š › œ  ž Ÿ   @ A B C D E F G H I J K L M Ф N O P ֫ Q R S T U V W X Y Z [ \ в θ ] ^ _ ` a b ̥ c d e f g h i j ʤ k l m n ط o p q r s t u v w x ֬ y z { | } ~ À ŧ Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß Ƣ à @ A B C D E F G Ҹ H I J K ǻ L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w Ĥ ϥ x y z { | } ~ Ā ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ ċ Č č Ď ď Đ đ Ē ē ӷ Ĕ ĕ Ė ė Ę ę Ě ě Ĝ ĝ Ğ ğ Ġ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a Ҩ b c d e f g h i j k l m n o p q ˴ r s t u v w x y z { | } ~ ŀ Ł ł Ń ń Ņ ͧ ņ Ň ň ʼn Ŋ ŋ Ō ō Ŏ ŏ Ő ő Œ œ Ŕ ŕ Ŗ ŗ Ř ř Ś ś Ŝ ŝ Ş ş Š @ A B C D ɫ E F G H ܳ I J ܴ ܵ K L M ܹ N O P Q ܷ R ܺ ܶ S ܻ â T U V W ܼ ܽ X Y ֥ Z [ \ ] « ^ ܸ ܾ о _ ` a ܿ b c d ѿ e f g h i j έ k l Է ̦ m n o p q r s t u ɻ v w x y Ӣ z { | } ~ ƻ ƀ Ɓ Ƃ ƃ Ƅ ƅ ï é Ɔ Ƈ ƈ Ɖ Ɗ Ƌ ƌ ƍ Ǝ ݢ Ə Ɛ Ƒ ƒ Ɠ Ɣ ƕ Ɩ Ɨ Ƙ ƙ ƚ ã ƛ Ɯ Ɲ ƞ Ɵ Ơ @ A B C D E F G H I J K L M N O P Q R S ӫ ݡ ݣ ݥ ݤ ݦ ݧ ҩ T U V W X Y Z ݩ [ \ ݶ ݱ ݴ ] ^ _ ` a b c ݰ d e f g h i ɯ j k l ݮ m n o p ݷ q r ݯ s ݸ t ݬ u v w x y z { ݹ ݳ ݭ Ī | } ~ ǀ ݨ ݪ ݫ ǁ ݲ ݵ Ө ݺ ǂ ݻ ç ǃ DŽ ݼ Dž dž LJ Lj lj NJ Nj nj Ǎ ǎ Ǐ ǐ Ǒ ǒ Ǔ ݿ ǔ Ǖ ǖ Ǘ ǘ Ǚ ǚ ݾ Ǜ ǜ ǝ Ǟ ǟ Ǡ @ A B C D E F G H I J K L ݽ M N O P Q R Ƽ ή S T U V W X Y Z [ \ ] ^ _ ` a b ө Ӫ c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ȁ ȁ Ȃ ȃ Ȅ ȅ Ȇ ȇ Ȉ ȉ Ȋ ȋ Ȍ ȍ Ȏ ȏ Ȑ ȑ Ȓ ȓ Ȕ ȕ Ȗ ȗ Ș ș Ț ț Ȝ ȝ Ȟ ȟ Ƞ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ɀ Ɂ ɂ Ƀ Ʉ Ʌ Ɇ ɇ Ɉ ɉ Ɋ ɋ Ɍ ɍ Ɏ ɏ ɐ ɑ ɒ ɓ ɔ ɕ ɖ ɗ ɘ ə ɚ ɛ ޤ ɜ ɝ ޣ ɞ ɟ ɠ @ A B C D E F G H I J K L M N O P Q R ε S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j Ǿ ޢ k l m n o ެ p q r s ަ t u v w x y z { | } ~ ޡ ʀ ʁ ޥ ʂ ʃ ʄ ʅ ީ ʆ ʇ ʈ ʉ ʊ ި ʋ ʌ ʍ ާ ʎ ʏ ʐ ʑ ʒ ʓ ʔ ʕ ʖ ޭ ʗ ʘ ʙ ʚ ʛ ޳ ު ޮ ʜ ʝ ʞ ʟ ʠ @ A ޶ B ޱ C D E F G H I ޲ J K L M N O P Q R S T Ѧ ޵ U V W X Y Z [ ޯ \ ] ^ ް _ н ` a b ޴ ޹ c d e f g h ޸ i ޷ j k l m n o p ޻ q r s t u v w x y z { | } ~ ޺ ˀ ź ˁ ˂ ˃ ˄ ˅ ˆ ޼ ˇ ˈ ˉ ˊ ˋ ˌ ˍ ˎ ˏ ː ˑ ˒ ˓ ˔ ˕ ˖ ˗ ˘ ˙ ˚ ˛ ˜ ˝ ˞ ˟ ˠ @ A B C D ޽ E F G H I ޿ J K L M N O P Q R S T Ģ U V W X Y Z [ \ ] ^ _ ` a b c d e f g h ޾ i j k l m n o p q r s t u v w պ x y z { | } ~ ̀ ́ ̂ ̃ ̄ ̅ ̆ ̇ ̈ ̉ ̊ ̋ ² Ű ̌ ̍ ̎ ̏ ̐ ̑ ̒ ̓ ̔ ̕ ̖ ̗ ̘ ̙ ̚ ̛ ̜ ̝ ̞ ̟ ̠ @ A B ʭ C D E F G H I Ϻ ʴ J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ̀ ́ ͂ ̓ ̈́ ͅ ͆ ͇ ͈ ͉ ͊ ͋ ͌ ͍ ͎ ͏ Ӽ ͐ ͑ ͒ ͓ ͔ ͕ ͖ ͗ ͘ ͙ ͚ ͛ ͜ ͝ ͞ ͟ ͠ ֩ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ Ӭ _ ` a Ы b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ΀ ΁ ΂ ΃ ΄ ΅ Ά · Έ Ή Ί ΋ Ό ΍ Ύ Ώ ΐ Α Β Γ Δ Ε Ζ Η Θ Ι Κ Λ Μ Ν Ξ Ο Π @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ з π ρ ς σ τ υ φ χ ψ ω ϊ ϋ ό ύ ώ Ϗ ϐ ϑ ϒ ϓ ϔ ϕ ϖ ϗ Ϙ ϙ Ϛ ϛ Ϝ ϝ Ϟ ϟ Ϡ @ A B C D E F G H I J K L M N O P Q R S T U V W X Ѫ Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v ˥ w x y z { | } ~ Ѐ Ё Ђ Ԭ Ѓ Є Ѕ І Ї Ј Љ Њ Ћ Ќ ̻ Ѝ Ў Џ А Б В Г Д Е Ж З И Й К Л М Н О П Р @ Ϯ A B C D E F G H I J K L M N O P Q R װ S T U V W X Y Z [ \ ԣ ] ^ ȹ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ р с т у ф х ц ч ш щ ъ ы ь э ю я ѐ ё ђ ѓ є ѕ і ї ј љ њ ћ ќ ѝ ў џ Ѡ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ҁ ҁ ҂ Ҫ ҃ ҄ ҅ ҆ ҇ ҈ ҉ Ҋ ҋ Ҍ ҍ Ҏ ҏ Ґ ґ Ғ ғ Ҕ ҕ Җ җ Ҙ ҙ Қ қ Ҝ ҝ Ҟ ҟ Ҡ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ӏ Ӂ ӂ Ӄ ӄ Ӆ ӆ Ӈ ӈ Ӊ ӊ Ӌ ӌ Ӎ ӎ ӏ Ӑ ӑ Ӓ ӓ Ӕ ӕ Ӗ ӗ Ә ә Ӛ ӛ Ӝ ӝ Ӟ ӟ Ӡ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ԁ ԁ Ԃ ԃ Ԅ ԅ Ԇ ԇ Ԉ ԉ Ԋ ԋ Ԍ ԍ Ԏ ԏ Ԑ ԑ Ԓ ԓ Ԕ ԕ Ԗ ԗ Ԙ ԙ ղ Ԛ ԛ Ԝ ԝ Ԟ ԟ Ԡ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Հ Ձ Ղ Ճ Մ Յ Ն Շ Ո Չ Պ Ջ Ռ Ս Վ Տ Ր Ց Ւ Փ Ք Օ Ֆ ՗ ՘ ՙ ՚ ՛ ՜ ՝ ՞ ՟ ՠ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ր ց ւ փ ք օ ֆ և ֈ ։ ֊ ֋ ֌ ֍ ֎ ֏ ֐ ֑ ֒ ֓ ֔ ֕ ֖ ֗ ֘ ֙ ֚ ֛ ֜ ֝ ֞ ֟ ֠ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d Ʃ e f g h i j k l m n o p q r s t u v w x y z { | } ~ ׀ ׁ ׂ ׃ ׄ ׅ ׆ ׇ ׈ ׉ ׊ ׋ ׌ ׍ ׎ ׏ א ב ג ד ה ו ז ח ט ڥ ڦ ڧ ڨ י ѵ Ѷ ך ک ڪ ګ כ ֤ ڬ ڭ ʶ ל թ ڮ ڰ گ ם ڱ ڲ ڳ ڴ ʫ ڵ ڶ ڷ ڸ ڹ ѯ ں ڻ ڼ מ ڽ ھ ڿ ˵ ŵ ˭ ׻ ̸ ן ı г ν נ л ҥ ǫ á ̷ Ǵ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h ԥ i j k l m n o p q r s t u v w x y z { | } ~ ؀ ؁ ò ؂ ؃ ؄ ؅ ؆ ؇ ؈ ؉ ؊ ؋ ، ؍ ؎ ؏ ؐ ؑ ؒ ؓ ؔ ؕ ؖ ؗ ؘ ؙ ؚ ؛ ؜ ؝ ؞ ؟ ؠ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ـ ف ق ك ل م ن ه و ى ي ً ٌ ٍ َ ُ ِ ّ ْ ٓ ٔ ٕ ٖ ٗ ٘ ٙ ٚ ٛ ٜ ٝ ٞ ٟ ٠ @ A B C D E F G H I J K L M N O ̰ ƶ ó ¸ P Q R ׸ ׬ S Ӯ T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k Խ l m n o p q r s t u v w x y z { | } ~ ڀ Ȥ ځ ڂ ڃ ڄ څ چ ڇ ڈ ډ ڊ ڋ ڌ ڍ ڎ ſ ڏ ڐ ڑ ڒ ړ ֺ ڔ ڕ ږ Ծ ڗ ژ ڙ ښ ڛ ڜ ڝ ڞ ڟ ڠ @ A B C D E F G H I J K L · M N O P Q R S T U V W X Y Z [ \ ӻ ] ^ _ ̤ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ۀ ہ ۂ ۃ ̣ ۄ ۅ ۆ ۇ ۈ ۉ ۊ ۋ ی ۍ ێ ۏ ې ۑ ے ۓ ۔ ە ۖ ۗ ۘ ۙ ۚ ۛ ۜ ۝ ۞ ۟ ۠ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ܀ ܁ ܂ ܃ ܄ ܅ ܆ ܇ ܈ ܉ ܊ ܋ ܌ ܍ ܎ ܏ ܐ ܑ ܒ ܓ ܔ ܕ ܖ ܗ ܘ ܙ ܚ ܛ ܜ ܝ ܞ ܟ ܠ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ݀ ݁ ݂ ݃ ݄ ݅ ݆ ݇ ݈ ݉ ݊ ݋ ݌ ݍ ݎ ݏ ݐ ݑ ݒ ݓ ݔ ݕ ݖ ݗ ݘ ݙ ݚ ݛ ݜ ݝ ݞ ݟ ݠ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a ת b c d ԯ Ͻ շ e f g h i j k l m n o p q r s t u v w x y z { | Ǩ } Ѹ ~ ހ ށ ނ ރ ބ ӭ ޅ ކ އ ވ މ Զ Υ ފ ދ ތ ލ ގ ޏ ސ ޑ ޒ ޓ ޔ ޕ ޖ ޗ ׷ ޘ ޙ ޚ ޛ ѡ ѷ ޜ ޝ ͸ ޞ ; ޟ ޠ @ ͨ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i Dz j ң k l m n o p q r s t u v w x y z { | } ~ ߀ ߁ ߂ ߃ ߄ ߅ ߆ ߇ ߈ ߉ ߊ ߋ ߌ ߍ ߎ ߏ ߐ ߑ ߒ ߓ ߔ ߕ ߖ ߗ ߘ а ߙ ߚ ۢ ߛ ߜ ۡ ߝ ߞ ۥ ߟ ߠ @ A ۧ ۤ B ۨ C D E F G ۣ ۦ ֣ H ۩ I J K ۭ L M N ۮ ۬ O P Q ۫ R S T ۪ U V ۯ W X ۰ Y Z [ \ ] ^ _ ` a b c ۱ d e f g ۲ h i j k l m n o p q r s t u v w x y z { | } ~ ۴ ۳ ۵ ۷ ۶ ۸ ۹ @ ۺ A B C D E F G H I J K ̪ L M N O P Q R ͪ S ø T U V W X Y Z [ \ ] ^ _ ` a b c d e f g ȩ h i j k l m n o p q r s t u v w x y z { | } ~ Ұ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ǥ Կ ť Ǯ ǯ Ǧ í ͭ ա ϳ ҿ п ê ׶ @ A B þ C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ բ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v ½ ¤ w ª İ x y z { | } ~ Ժ ¡ @ A B C D E F ϶ G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c ȸ d e f g h Ӻ i j k l m n o p q r s t u v w x y z { | } ѩ ~ ù ˪ ϼ @ A B C D E F G H I J K L M ¶ N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ѥ Ь @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Τ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E ҳ ˳ Ԥ ­ F G Ƶ H I ӱ J K ȧ L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v Ʈ w x y ʳ z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A ظ B C D E F G H I J K L M N ܰ O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q Ԧ ѱ R ¿ ʻ פ S T U V W X ƭ Y ɧ Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d ت e f g h i j k l m n o p q r s t u κ v w x ħ y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ³ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ Ÿ ѻ ] Ѽ ^ _ ԧ ` a b c ȵ d e f g h i j k l m n o p q r ӥ s t u v w x y z { | } ¹ ~ ´ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ǭ ^ _ ` Ĭ a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ؾ ػ ܱ @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ȣ @ A B C D E F G H I J K L M N O
diff --git a/tests/data/gbk.txt b/tests/data/gbk.txt
deleted file mode 100644
index f2846a88d..000000000
--- a/tests/data/gbk.txt
+++ /dev/null
@@ -1,22 +0,0 @@
- , Ȩ һ ƽ ȡ , Ӧ ϵ
-
-
- ˵ ǣ
- ʱ ģ Ӧ ͬ ģ Ӧ
-
-
-
- һ Ȩ , 塢 ɫ ԡ ̡ ⡢
- һ ġ λ ֮ ͬ , Ȩ ֮ ¡
-
- ˵ ǣ
- ͬ Ƥ ɫ ͬ ͬ ԣ ͬ 飬 ٣ Ⱥ ͬ ң ÿ Ȩ Щ Ȩ Ӧ Ӱ 졣
-
-
-
- Ȩ ȫ
-
-
- ˵ ǣ
- Ȩ ȫ Ȩ
-
diff --git a/tests/data/iso-8859.txt b/tests/data/iso-8859-15.txt
index 7f1b95575..7f1b95575 100644
--- a/tests/data/iso-8859.txt
+++ b/tests/data/iso-8859-15.txt
diff --git a/tests/data/iso-8859-5.txt b/tests/data/iso-8859-5.txt
new file mode 100644
index 000000000..6cc845176
--- /dev/null
+++ b/tests/data/iso-8859-5.txt
@@ -0,0 +1,12 @@
+ 1
+ . .
+
+ 2
+ , , , - , , , , , , , , .
+
+ , , , , , , , - .
+
+ 3
+ , .
+
+
diff --git a/tests/data/utf-16.txt b/tests/data/utf-16.txt
new file mode 100644
index 000000000..d7be9787a
--- /dev/null
+++ b/tests/data/utf-16.txt
Binary files differ
diff --git a/tests/unit/Service/EncodingServiceTest.php b/tests/unit/Service/EncodingServiceTest.php
index 8458aaae9..d72da7669 100644
--- a/tests/unit/Service/EncodingServiceTest.php
+++ b/tests/unit/Service/EncodingServiceTest.php
@@ -15,42 +15,62 @@ class LabelServiceTest extends TestCase {
/**
* Attempt to decode the file using the default decoding order.
* For files with encodings not included in the COMMON_ENCODINGS array encoding to UTF-8 will fail.
+ * We then do the conversion again after setting the mb_detect_order value: all conversions should succeed.
* @dataProvider dataFileEncodings
*/
- public function testDefault(string $file, string $encoding) {
- $utf8_string = $this->encodingService->encodeToUtf8(file_get_contents($file));
+ public function testEncoding(string $file, string $encoding) {
+ $utf8String = $this->encodingService->encodeToUtf8(file_get_contents($file));
// If encoding is not part of the default encodings we can expect it to fail
// It might still succeed because encoding detection is not precise.
- if (!$utf8_string && !in_array($encoding, EncodingService::COMMON_ENCODINGS, true)) {
- return;
+ if ($utf8String || $this->isSupportedEncoding($encoding)) {
+ $this->assertNotNull($utf8String);
+ $this->assertNotFalse(mb_detect_encoding($utf8String, 'UTF-8', true));
}
- $this->assertNotNull($utf8_string);
- $this->assertNotFalse(mb_detect_encoding($utf8_string, 'UTF-8', true));
+ $originalOrder = mb_detect_order();
+ $this->assertNotFalse(mb_detect_order($encoding));
+
+ $utf8String = $this->encodingService->encodeToUtf8(file_get_contents($file));
+ $this->assertNotNull($utf8String);
+ $this->assertNotFalse(mb_detect_encoding($utf8String, 'UTF-8', true));
+
+ mb_detect_order($originalOrder);
}
/**
- * Includes the encoding of the file in the detection order config value.
- * This means that all files should be successfully encoded to UTF-8.
+ * If the encoding is in the list of common encodings we should be able to detect an encoding (it might not be the
+ * correct encoding due to detection inaccuracies). If not, add the encoding to mb_detect_order.
* @dataProvider dataFileEncodings
*/
- public function testCustomOrder(string $file, string $encoding) {
- $original_order = mb_detect_order();
+ public function testDetection(string $file, string $encoding) {
+ $detectedEncoding = $this->encodingService->detectEncoding(file_get_contents($file));
+ if ($this->isSupportedEncoding($encoding)) {
+ $this->assertNotNull($detectedEncoding);
+ }
+
+ $originalOrder = mb_detect_order();
$this->assertNotFalse(mb_detect_order($encoding));
- $utf8_string = $this->encodingService->encodeToUtf8(file_get_contents($file));
- $this->assertNotNull($utf8_string);
- $this->assertNotFalse(mb_detect_encoding($utf8_string, 'UTF-8', true));
+ $detectedEncoding = $this->encodingService->detectEncoding(file_get_contents($file));
+ $this->assertEquals($encoding, $detectedEncoding);
- mb_detect_order($original_order);
+ mb_detect_order($originalOrder);
}
+
public function dataFileEncodings(): array {
return [
- ['./tests/data/iso-8859.txt', 'ISO-8859-1'],
+ ['./tests/data/iso-8859-15.txt', 'ISO-8859-15'],
['./tests/data/big5.txt', 'BIG-5'],
- ['./tests/data/gbk.txt', 'GBK']
+ ['./tests/data/cp936.txt', 'CP936'],
+ ['./tests/data/utf-16.txt', 'UTF-16LE'],
+ ['./tests/data/iso-8859-5.txt', 'ISO-8859-5'],
];
}
+
+ private function isSupportedEncoding(string $encoding): bool {
+ return in_array($encoding, EncodingService::COMMON_ENCODINGS, true)
+ || isset(EncodingService::UTF_BOMs[$encoding]);
+ }
}