From 0f1fb3c9f81e3d0844613f145b995f9497f9437d Mon Sep 17 00:00:00 2001 From: Daria Volvenkova Date: Wed, 29 Mar 2017 17:08:01 +0300 Subject: Review fixes. --- coding/CMakeLists.txt | 4 +- coding/multilang_utf8_string.cpp | 96 +++++++++++++++++++++++++++------------- coding/transliteration.cpp | 17 ++++--- coding/transliteration.hpp | 4 +- 4 files changed, 80 insertions(+), 41 deletions(-) (limited to 'coding') diff --git a/coding/CMakeLists.txt b/coding/CMakeLists.txt index 52ae94df7b..908c2d0fb6 100644 --- a/coding/CMakeLists.txt +++ b/coding/CMakeLists.txt @@ -71,8 +71,8 @@ set( succinct_mapper.hpp traffic.cpp traffic.hpp - transliterator.cpp - transliterator.hpp + transliteration.cpp + transliteration.hpp uri.cpp uri.hpp url_encode.hpp diff --git a/coding/multilang_utf8_string.cpp b/coding/multilang_utf8_string.cpp index 369ffdb9a0..9e3211501a 100644 --- a/coding/multilang_utf8_string.cpp +++ b/coding/multilang_utf8_string.cpp @@ -10,37 +10,71 @@ namespace // Note, that it's not feasible to increase languages number here due to // our current encoding (6 bit to store language code). StringUtf8Multilang::Languages const g_languages = {{ - {"default", "Native for each country", "Any-Latin"}, - {"en", "English", ""}, {"ja", "日本語", ""}, {"fr", "Français", ""}, - {"ko_rm", "Korean (Romanized)", "Korean-Latin/BGN"}, {"ar", "العربية", "Any-Latin"}, - {"de", "Deutsch", ""}, {"int_name", "International (Latin)", "Any-Latin"}, - {"ru", "Русский", "Russian-Latin/BGN"}, {"sv", "Svenska", "Any-Latin"}, - {"zh", "中文", "Any-Latin"}, {"fi", "Suomi", "Any-Latin"}, - {"be", "Беларуская", "Belarusian-Latin/BGN"}, {"ka", "ქართული", "Georgian-Latin"}, - {"ko", "한국어", "Hangul-Latin/BGN"}, {"he", "עברית", "Hebrew-Latin/BGN"}, {"nl", "Nederlands", ""}, - {"ga", "Gaeilge", "Any-Latin"}, {"ja_rm", "Japanese (Romanized)", "Any-Latin"}, - {"el", "Ελληνικά", "Greek-Latin"}, {"it", "Italiano", ""}, {"es", "Español", ""}, - {"zh_pinyin", "Chinese (Pinyin)", "Any-Latin"}, {"th", "ไทย", "Thai-Latin"}, - {"cy", "Cymraeg", "Any-Latin"}, {"sr", "Српски", "Serbian-Latin/BGN"}, - {"uk", "Українська", "Ukrainian-Latin/BGN"}, {"ca", "Català", "Any-Latin"}, - {"hu", "Magyar", "Any-Latin"}, {"hsb", "Hornjoserbšćina", "Any-Latin"}, - {"eu", "Euskara", "Any-Latin"}, {"fa", "فارسی", "Any-Latin"}, {"br", "Breton", "Any-Latin"}, - {"pl", "Polski", "Any-Latin"}, {"hy", "Հայերէն", "Armenian-Latin"}, - {"kn", "ಕನ್ನಡ", "Kannada-Latin"}, {"sl", "Slovenščina", "Any-Latin"}, - {"ro", "Română", "Any-Latin"}, {"sq", "Shqipe", "Any-Latin"}, - {"am", "አማርኛ", "Amharic-Latin/BGN"}, {"fy", "Frysk", "Any-Latin"}, - {"cs", "Čeština", "Any-Latin"}, {"gd", "Gàidhlig", "Any-Latin"}, - {"sk", "Slovenčina", "Any-Latin"}, {"af", "Afrikaans", "Any-Latin"}, - {"ja_kana", "日本語(カタカナ)", "Katakana-Latin"}, {"lb", "Luxembourgish", "Any-Latin"}, - {"pt", "Português", "Any-Latin"}, {"hr", "Hrvatski", "Any-Latin"}, - {"fur", "Friulian", "Any-Latin"}, {"vi", "Tiếng Việt", "Any-Latin"}, - {"tr", "Türkçe", "Any-Latin"}, {"bg", "Български", "Bulgarian-Latin/BGN"}, - {"eo", "Esperanto", "Any-Latin"}, {"lt", "Lietuvių", "Any-Latin"}, {"la", "Latin", ""}, - {"kk", "Қазақ", "Kazakh-Latin/BGN"}, {"gsw", "Schwiizertüütsch", "Any-Latin"}, - {"et", "Eesti", "Any-Latin"}, {"ku", "Kurdish", "Any-Latin"}, - {"mn", "Mongolian", "Mongolian-Latin/BGN"}, {"mk", "Македонски", "Macedonian-Latin/BGN"}, - {"lv", "Latviešu", "Any-Latin"}, {"hi", "हिन्दी", "Any-Latin"} - }}; + {"default", "Native for each country", "Any-Latin"}, + {"en", "English", ""}, + {"ja", "日本語", ""}, + {"fr", "Français", ""}, + {"ko_rm", "Korean (Romanized)", "Korean-Latin/BGN"}, + {"ar", "العربية", "Any-Latin"}, + {"de", "Deutsch", ""}, + {"int_name", "International (Latin)", "Any-Latin"}, + {"ru", "Русский", "Russian-Latin/BGN"}, + {"sv", "Svenska", "Any-Latin"}, + {"zh", "中文", "Any-Latin"}, + {"fi", "Suomi", "Any-Latin"}, + {"be", "Беларуская", "Belarusian-Latin/BGN"}, + {"ka", "ქართული", "Georgian-Latin"}, + {"ko", "한국어", "Hangul-Latin/BGN"}, + {"he", "עברית", "Hebrew-Latin/BGN"}, + {"nl", "Nederlands", ""}, + {"ga", "Gaeilge", "Any-Latin"}, + {"ja_rm", "Japanese (Romanized)", "Any-Latin"}, + {"el", "Ελληνικά", "Greek-Latin"}, + {"it", "Italiano", ""}, + {"es", "Español", ""}, + {"zh_pinyin", "Chinese (Pinyin)", "Any-Latin"}, + {"th", "ไทย", "Thai-Latin"}, + {"cy", "Cymraeg", "Any-Latin"}, + {"sr", "Српски", "Serbian-Latin/BGN"}, + {"uk", "Українська", "Ukrainian-Latin/BGN"}, + {"ca", "Català", "Any-Latin"}, + {"hu", "Magyar", "Any-Latin"}, + {"hsb", "Hornjoserbšćina", "Any-Latin"}, + {"eu", "Euskara", "Any-Latin"}, + {"fa", "فارسی", "Any-Latin"}, + {"br", "Breton", "Any-Latin"}, + {"pl", "Polski", "Any-Latin"}, + {"hy", "Հայերէն", "Armenian-Latin"}, + {"kn", "ಕನ್ನಡ", "Kannada-Latin"}, + {"sl", "Slovenščina", "Any-Latin"}, + {"ro", "Română", "Any-Latin"}, + {"sq", "Shqipe", "Any-Latin"}, + {"am", "አማርኛ", "Amharic-Latin/BGN"}, + {"fy", "Frysk", "Any-Latin"}, + {"cs", "Čeština", "Any-Latin"}, + {"gd", "Gàidhlig", "Any-Latin"}, + {"sk", "Slovenčina", "Any-Latin"}, + {"af", "Afrikaans", "Any-Latin"}, + {"ja_kana", "日本語(カタカナ)", "Katakana-Latin"}, + {"lb", "Luxembourgish", "Any-Latin"}, + {"pt", "Português", "Any-Latin"}, + {"hr", "Hrvatski", "Any-Latin"}, + {"fur", "Friulian", "Any-Latin"}, + {"vi", "Tiếng Việt", "Any-Latin"}, + {"tr", "Türkçe", "Any-Latin"}, + {"bg", "Български", "Bulgarian-Latin/BGN"}, + {"eo", "Esperanto", "Any-Latin"}, + {"lt", "Lietuvių", "Any-Latin"}, + {"la", "Latin", ""}, + {"kk", "Қазақ", "Kazakh-Latin/BGN"}, + {"gsw", "Schwiizertüütsch", "Any-Latin"}, + {"et", "Eesti", "Any-Latin"}, + {"ku", "Kurdish", "Any-Latin"}, + {"mn", "Mongolian", "Mongolian-Latin/BGN"}, + {"mk", "Македонски", "Macedonian-Latin/BGN"}, + {"lv", "Latviešu", "Any-Latin"}, + {"hi", "हिन्दी", "Any-Latin"} +}}; static_assert(g_languages.size() == StringUtf8Multilang::kMaxSupportedLanguages, "With current encoding we are limited to 64 languages max."); diff --git a/coding/transliteration.cpp b/coding/transliteration.cpp index c1a9ab1138..e30a03820a 100644 --- a/coding/transliteration.cpp +++ b/coding/transliteration.cpp @@ -15,11 +15,12 @@ Transliteration::~Transliteration() // but it should be called only once for performance reasons. // The primary benefit is to eliminate reports of memory or resource leaks originating // in ICU code from the results generated by heap analysis tools. + // http://www.icu-project.org/apiref/icu4c/uclean_8h.html#a93f27d0ddc7c196a1da864763f2d8920 m_transliterators.clear(); u_cleanup(); } -Transliteration & Transliteration::GetInstance() +Transliteration & Transliteration::Instance() { static Transliteration instance; return instance; @@ -45,21 +46,25 @@ void Transliteration::Init(std::string const & icuDataDir) } } -std::string Transliteration::Transliterate(std::string const & str, int8_t langCode) const +bool Transliteration::Transliterate(std::string const & str, int8_t langCode, std::string & out) const { + if (str.empty()) + return false; + auto const transliteratorId = StringUtf8Multilang::GetTransliteratorIdByCode(langCode); auto const & it = m_transliterators.find(transliteratorId); if (it == m_transliterators.end()) { LOG(LWARNING, ("Transliteration failed, unknown transliterator \"", transliteratorId, "\"")); - return ""; + return false; } UnicodeString ustr(str.c_str()); it->second->transliterate(ustr); - std::string resultStr; - ustr.toUTF8String(resultStr); + if (ustr.isEmpty()) + return false; - return resultStr; + ustr.toUTF8String(out); + return true; } diff --git a/coding/transliteration.hpp b/coding/transliteration.hpp index 04dd75b6c3..cc3f97eb4d 100644 --- a/coding/transliteration.hpp +++ b/coding/transliteration.hpp @@ -14,11 +14,11 @@ class Transliteration public: ~Transliteration(); - static Transliteration & GetInstance(); + static Transliteration & Instance(); void Init(std::string const & icuDataDir); - std::string Transliterate(std::string const & str, int8_t langCode) const; + bool Transliterate(std::string const & str, int8_t langCode, std::string & out) const; private: Transliteration() = default; -- cgit v1.2.3