From 63ef86c21f091321ecac9bef4d9de8134f028279 Mon Sep 17 00:00:00 2001 From: Daria Volvenkova Date: Thu, 20 Apr 2017 18:32:02 +0300 Subject: Initialization of transliterators optimized. --- coding/transliteration.cpp | 56 +++++++++++++++++++++++++++++++++++----------- coding/transliteration.hpp | 8 ++----- 2 files changed, 45 insertions(+), 19 deletions(-) (limited to 'coding') diff --git a/coding/transliteration.cpp b/coding/transliteration.cpp index ca2f84e643..618c602471 100644 --- a/coding/transliteration.cpp +++ b/coding/transliteration.cpp @@ -3,21 +3,35 @@ #include "base/logging.hpp" -#include - #include "3party/icu/common/unicode/uclean.h" #include "3party/icu/common/unicode/unistr.h" #include "3party/icu/common/unicode/utypes.h" #include "3party/icu/i18n/unicode/translit.h" #include "3party/icu/i18n/unicode/utrans.h" +#include "std/unique_ptr.hpp" + +#include +#include +#include + +struct Transliteration::TransliteratorInfo +{ + TransliteratorInfo() + : m_initialized(false) + {} + + std::atomic m_initialized; + std::mutex m_mutex; + std::unique_ptr m_transliterator; +}; + Transliteration::~Transliteration() { // The use of u_cleanup() just before an application terminates is optional, // but it should be called only once for performance reasons. // The primary benefit is to eliminate reports of memory or resource leaks originating // in ICU code from the results generated by heap analysis tools. - // http://www.icu-project.org/apiref/icu4c/uclean_8h.html#a93f27d0ddc7c196a1da864763f2d8920 m_transliterators.clear(); u_cleanup(); } @@ -30,6 +44,9 @@ Transliteration & Transliteration::Instance() void Transliteration::Init(std::string const & icuDataDir) { + // This function should be called at most once in a process, + // before the first ICU operation that will require the loading of an ICU data file. + // This function is not thread-safe. Use it before calling ICU APIs from multiple threads. u_setDataDirectory(icuDataDir.c_str()); for (auto const & lang : StringUtf8Multilang::GetSupportedLanguages()) @@ -37,14 +54,7 @@ void Transliteration::Init(std::string const & icuDataDir) if (strlen(lang.m_transliteratorId) == 0 || m_transliterators.count(lang.m_transliteratorId) != 0) continue; - UErrorCode status = U_ZERO_ERROR; - std::unique_ptr transliterator( - Transliterator::createInstance(lang.m_transliteratorId, UTRANS_FORWARD, status)); - - if (transliterator != nullptr) - m_transliterators.emplace(lang.m_transliteratorId, std::move(transliterator)); - else - LOG(LWARNING, ("Cannot create transliterator \"", lang.m_transliteratorId, "\", icu error =", status)); + m_transliterators.emplace(lang.m_transliteratorId, make_unique()); } } @@ -58,15 +68,35 @@ bool Transliteration::Transliterate(std::string const & str, int8_t langCode, st if (transliteratorId.empty()) return false; - auto const & it = m_transliterators.find(transliteratorId); + auto it = m_transliterators.find(transliteratorId); if (it == m_transliterators.end()) { LOG(LWARNING, ("Transliteration failed, unknown transliterator \"", transliteratorId, "\"")); return false; } + if (!it->second->m_initialized) + { + std::lock_guard lock(it->second->m_mutex); + if (!it->second->m_initialized) + { + UErrorCode status = U_ZERO_ERROR; + UnicodeString translitId(it->first.c_str()); + + it->second->m_transliterator.reset(Transliterator::createInstance(translitId, UTRANS_FORWARD, status)); + + if (it->second->m_transliterator == nullptr) + LOG(LWARNING, ("Cannot create transliterator \"", it->first, "\", icu error =", status)); + + it->second->m_initialized = true; + } + } + + if (it->second->m_transliterator == nullptr) + return false; + UnicodeString ustr(str.c_str()); - it->second->transliterate(ustr); + it->second->m_transliterator->transliterate(ustr); if (ustr.isEmpty()) return false; diff --git a/coding/transliteration.hpp b/coding/transliteration.hpp index cc3f97eb4d..088f870b56 100644 --- a/coding/transliteration.hpp +++ b/coding/transliteration.hpp @@ -4,11 +4,6 @@ #include #include -namespace icu -{ -class Transliterator; -} - class Transliteration { public: @@ -23,5 +18,6 @@ public: private: Transliteration() = default; - std::map> m_transliterators; + struct TransliteratorInfo; + std::map> m_transliterators; }; -- cgit v1.2.3