diff options
author | Raul <raul@nextcloud.com> | 2022-05-16 19:56:25 +0300 |
---|---|---|
committer | Raul <raul@nextcloud.com> | 2022-05-17 10:12:06 +0300 |
commit | 42a448c995c988a0acd58afe6ee189298eefb7ac (patch) | |
tree | 6d6fac18f94f16a5fa253e7db6bc11d7cb36b6a9 /lib | |
parent | 4bbb8cc608f72a2ea0b3d2483c86dee3f61b3bd1 (diff) |
Add EncodingService test cases
Signed-off-by: Raul <raul@nextcloud.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Service/EncodingService.php | 52 |
1 files changed, 25 insertions, 27 deletions
diff --git a/lib/Service/EncodingService.php b/lib/Service/EncodingService.php index 0ccc8a00c..3f5bada06 100644 --- a/lib/Service/EncodingService.php +++ b/lib/Service/EncodingService.php @@ -26,16 +26,15 @@ declare(strict_types=1); namespace OCA\Text\Service; class EncodingService { - public const COMMON_ENCODINGS = ['UTF-8', 'GB2312', 'GBK', 'BIG-5', 'SJIS-win', 'EUC-JP', 'Windows-1252', 'ISO-8859-15', 'ISO-8859-1', 'ASCII']; - - public const UTF_BOMs = [ - 'UTF-32BE' => "\x00\x00\xfe\xff", - 'UTF-32LE' => "\xff\xfe\x00\x00", - 'UTF-16BE' => "\xfe\xff", - 'UTF-16LE' => "\xff\xfe", - 'UTF-8' => "\xef\xbb\xbf" - ]; + public const COMMON_ENCODINGS = [ 'UTF-8', 'GB2312', 'GBK', 'BIG-5', 'SJIS-win', 'EUC-JP', 'Windows-1252', 'ISO-8859-15', 'ISO-8859-1', 'ASCII']; + public const UTF_BOMs = [ + 'UTF-32BE' => "\x00\x00\xfe\xff", + 'UTF-32LE' => "\xff\xfe\x00\x00", + 'UTF-16BE' => "\xfe\xff", + 'UTF-16LE' => "\xff\xfe", + 'UTF-8' => "\xef\xbb\xbf" + ]; public function encodeToUtf8(string $string): ?string { $encoding = $this->detectEncoding($string); @@ -47,37 +46,36 @@ class EncodingService { } public function detectEncoding(string $string): ?string { - $bom_detect = $this->detectUtfBom($string); - if ($bom_detect) { - return $bom_detect; - } + $bomDetect = $this->detectUtfBom($string); + if ($bomDetect) { + return $bomDetect; + } - $encodings = $this->getEncodings(); - foreach ($encodings as $encoding) { + foreach ($this->getEncodings() as $encoding) { if (mb_check_encoding($string, $encoding)) { return $encoding; } } - return null; + return mb_detect_encoding($string, $this->getEncodings(), true) ?: null; } - public function detectUtfBom(string $string): ?string { - foreach (self::UTF_BOMs as $encoding => $utf_bom) { - $bom = substr($string, 0, strlen($utf_bom)); - if ($bom === $utf_bom) { - return $encoding; - } - } + private function detectUtfBom(string $string): ?string { + foreach (self::UTF_BOMs as $encoding => $utfBom) { + $bom = substr($string, 0, strlen($utfBom)); + if ($bom === $utfBom) { + return $encoding; + } + } - return null; - } + return null; + } /** * @return string[] */ private function getEncodings(): array { - $mb_order = mb_detect_order() ?: []; - return array_merge($mb_order, self::COMMON_ENCODINGS); + $mbOrder = mb_detect_order() ?: []; + return array_merge($mbOrder, self::COMMON_ENCODINGS); } } |