diff options
author | Arsentiy Milchakov <milcars@mapswithme.com> | 2016-09-28 20:49:42 +0300 |
---|---|---|
committer | Arsentiy Milchakov <milcars@mapswithme.com> | 2016-09-28 20:49:42 +0300 |
commit | fcf41c868d50b0e17c4e9a8f8505f0d9567ff235 (patch) | |
tree | e7fbf9a6a6462048065fe22f700b1217beae73d2 /indexer | |
parent | 6f12239a778ed7c19326229b5aa2e9966f206ddd (diff) |
added mark for readable names into categories.txt
Diffstat (limited to 'indexer')
-rw-r--r-- | indexer/categories_holder.cpp | 224 | ||||
-rw-r--r-- | indexer/categories_holder.hpp | 3 | ||||
-rw-r--r-- | indexer/indexer_tests/categories_test.cpp | 108 |
3 files changed, 240 insertions, 95 deletions
diff --git a/indexer/categories_holder.cpp b/indexer/categories_holder.cpp index f677a57d92..123b1957a6 100644 --- a/indexer/categories_holder.cpp +++ b/indexer/categories_holder.cpp @@ -17,7 +17,128 @@ enum State EParseLanguages }; -} // unnamed namespace +void ProcessSynonym(CategoriesHolder::Category::Name const & name, + deque<CategoriesHolder::Category::Name> & synonyms) +{ + if (name.m_name[0] != '^') + { + synonyms.push_back(name); + return; + } + + // Name which starts with '^' is readable name for UI and it should be in the beginning. + synonyms.push_front(name); + synonyms.front().m_name = name.m_name.substr(1); +} + +void GroupTranslationsToSynonyms(vector<string> const & groups, + CategoriesHolder::GroupTranslations const & translations, + deque<CategoriesHolder::Category::Name> & synonyms) +{ + for (string const & group : groups) + { + auto it = translations.find(group); + if (it == translations.end()) + continue; + for (auto & synonym : it->second) + ProcessSynonym(synonym, synonyms); + } +} + +void TrimGroupTranslations(CategoriesHolder::GroupTranslations & translations) +{ + for(auto & translation : translations) + { + for (auto & synonym : translation.second) + { + if (synonym.m_name[0] == '^') + synonym.m_name = synonym.m_name.substr(1); + } + } +} + +bool ParseEmoji(CategoriesHolder::Category::Name & name) +{ + using namespace strings; + + auto const code = name.m_name; + int c; + if (!to_int(name.m_name.c_str() + 2, c, 16)) + { + LOG(LWARNING, ("Bad emoji code:", code)); + return false; + } + + name.m_name = ToUtf8(UniString(1, static_cast<UniChar>(c))); + + if (IsASCIIString(ToUtf8(search::NormalizeAndSimplifyString(name.m_name)))) + { + LOG(LWARNING, ("Bad emoji code:", code)); + return false; + } + + return true; +} + +void FillPrefixLengthToSuggest(CategoriesHolder::Category::Name & name) +{ + if (isdigit(name.m_name.front()) && name.m_name.front() != '0') + { + name.m_prefixLengthToSuggest = name.m_name[0] - '0'; + name.m_name = name.m_name.substr(1); + } + else + { + name.m_prefixLengthToSuggest = CategoriesHolder::Category::kEmptyPrefixLength; + } +} + +void ProcessName(CategoriesHolder::Category::Name name, vector<string> const & groups, + vector<uint32_t> const & types, CategoriesHolder::GroupTranslations & translations, + deque<CategoriesHolder::Category::Name> & synonyms) +{ + if (name.m_name.empty()) + { + LOG(LWARNING, ("Incorrect name for category:", groups)); + return; + } + + FillPrefixLengthToSuggest(name); + + if (strings::StartsWith(name.m_name, "U+") && !ParseEmoji(name)) + return; + + if (groups.size() == 1 && types.empty()) + translations[groups[0]].push_back(name); // not a translation, but a category group definition + else + ProcessSynonym(name, synonyms); +} + +void ProcessCategory(string const & line, vector<string> & groups, vector<uint32_t> & types) +{ + // Check if category is a group reference. + if (line[0] == '@') + { + CHECK((groups.empty() || !types.empty()), ("Two groups in a group definition, line:", line)); + groups.push_back(line); + return; + } + + // Split category to subcategories for classificator. + vector<string> v; + strings::Tokenize(line, "-", MakeBackInsertFunctor(v)); + + // Get classificator type. + uint32_t const type = classif().GetTypeByPathSafe(v); + if (type == 0) + { + LOG(LWARNING, ("Invalid type:", v, "; during parcing the line:", line)); + return; + } + + types.push_back(type); +} +} // namespace // static int8_t const CategoriesHolder::kEnglishCode = 1; @@ -119,13 +240,10 @@ void CategoriesHolder::LoadFromStream(istream & s) State state = EParseTypes; string line; - Category cat; vector<uint32_t> types; vector<string> currentGroups; - Classificator const & c = classif(); - int lineNumber = 0; while (s.good()) { @@ -138,64 +256,29 @@ void CategoriesHolder::LoadFromStream(istream & s) strings::SimpleTokenizer iter(line, state == EParseTypes ? "|" : ":|"); - switch (state) - { - case EParseTypes: + if (state == EParseTypes) { AddCategory(cat, types); currentGroups.clear(); while (iter) { - // Check if category is a group reference. - if ((*iter)[0] == '@') - { - CHECK((currentGroups.empty() || !types.empty()), - ("Two groups in a group definition at line", lineNumber)); - currentGroups.push_back(*iter); - } - else - { - // Split category to subcategories for classificator. - vector<string> v; - strings::Tokenize(*iter, "-", MakeBackInsertFunctor(v)); - - // Get classificator type. - uint32_t const type = c.GetTypeByPathSafe(v); - if (type != 0) - types.push_back(type); - else - LOG(LWARNING, ("Invalid type:", v, "at line:", lineNumber)); - } - + ProcessCategory(*iter, currentGroups, types); ++iter; } if (!types.empty() || currentGroups.size() == 1) + { + // Add translations into synonyms first, it will allow to override + // translations for UI by concrete category translation. + GroupTranslationsToSynonyms(currentGroups, m_groupTranslations, cat.m_synonyms); state = EParseLanguages; + } } - break; - - case EParseLanguages: + else if (state == EParseLanguages) { if (!iter) { - // If the category groups are specified, add translations from them. - - ///@todo According to the current logic, categories.txt should have - /// the blank string at the end of file. - if (!types.empty()) - { - for (string const & group : currentGroups) - { - auto it = m_groupTranslations.find(group); - if (it == m_groupTranslations.end()) - continue; - for (auto const & synonym : it->second) - cat.m_synonyms.push_back(synonym); - } - } - state = EParseTypes; continue; } @@ -210,56 +293,13 @@ void CategoriesHolder::LoadFromStream(istream & s) name.m_locale = langCode; name.m_name = *iter; - if (name.m_name.empty()) - { - LOG(LWARNING, ("Empty category name at line:", lineNumber)); - continue; - } - - if (name.m_name[0] >= '0' && name.m_name[0] <= '9') - { - name.m_prefixLengthToSuggest = name.m_name[0] - '0'; - name.m_name = name.m_name.substr(1); - } - else - name.m_prefixLengthToSuggest = Category::kEmptyPrefixLength; - - // Process emoji symbols. - using namespace strings; - if (StartsWith(name.m_name, "U+")) - { - auto const code = name.m_name; - int c; - if (!to_int(name.m_name.c_str() + 2, c, 16)) - { - LOG(LWARNING, ("Bad emoji code:", code)); - continue; - } - - name.m_name = ToUtf8(UniString(1, static_cast<UniChar>(c))); - - if (IsASCIIString(ToUtf8(search::NormalizeAndSimplifyString(name.m_name)))) - { - LOG(LWARNING, ("Bad emoji code:", code)); - continue; - } - } - - if (currentGroups.size() == 1 && types.empty()) - { - // Not a translation, but a category group definition - m_groupTranslations[currentGroups[0]].push_back(name); - } - else - cat.m_synonyms.push_back(name); + ProcessName(name, currentGroups, types, m_groupTranslations, cat.m_synonyms); } } - break; - } } - // add last category AddCategory(cat, types); + TrimGroupTranslations(m_groupTranslations); } bool CategoriesHolder::GetNameByType(uint32_t type, int8_t locale, string & name) const diff --git a/indexer/categories_holder.hpp b/indexer/categories_holder.hpp index 948f58894c..c331708359 100644 --- a/indexer/categories_holder.hpp +++ b/indexer/categories_holder.hpp @@ -1,6 +1,7 @@ #pragma once #include "base/string_utils.hpp" +#include "std/deque.hpp" #include "std/iostream.hpp" #include "std/map.hpp" #include "std/shared_ptr.hpp" @@ -28,7 +29,7 @@ public: uint8_t m_prefixLengthToSuggest; }; - vector<Name> m_synonyms; + deque<Name> m_synonyms; inline void Swap(Category & r) { diff --git a/indexer/indexer_tests/categories_test.cpp b/indexer/indexer_tests/categories_test.cpp index 8152110ca4..222ad69760 100644 --- a/indexer/indexer_tests/categories_test.cpp +++ b/indexer/indexer_tests/categories_test.cpp @@ -25,7 +25,7 @@ using namespace indexer; char const g_testCategoriesTxt[] = "amenity-bench\n" "en:1bench|sit down|to sit\n" - "de:0bank|auf die strafbank schicken\n" + "de:2bank|auf die strafbank schicken\n" "zh-Hans:长凳\n" "zh-Hant:長板凳\n" "da:bænk\n" @@ -55,7 +55,7 @@ struct Checker TEST_EQUAL(cat.m_synonyms[2].m_name, "to sit", ()); TEST_EQUAL(cat.m_synonyms[3].m_locale, CategoriesHolder::MapLocaleToInteger("de"), ()); TEST_EQUAL(cat.m_synonyms[3].m_name, "bank", ()); - TEST_EQUAL(cat.m_synonyms[3].m_prefixLengthToSuggest, 0, ()); + TEST_EQUAL(cat.m_synonyms[3].m_prefixLengthToSuggest, 2, ()); TEST_EQUAL(cat.m_synonyms[4].m_locale, CategoriesHolder::MapLocaleToInteger("de"), ()); TEST_EQUAL(cat.m_synonyms[4].m_name, "auf die strafbank schicken", ()); TEST_EQUAL(cat.m_synonyms[5].m_locale, CategoriesHolder::MapLocaleToInteger("zh_CN"), ()); @@ -119,6 +119,110 @@ UNIT_TEST(CategoriesHolder_Smoke) } } +UNIT_TEST(CategoriesHolder_ReadableNameSmoke) +{ + classificator::Load(); + + auto const & categoriesHolder = GetDefaultCategories(); + auto const & groupTranslations = categoriesHolder.GetGroupTranslations(); + + categoriesHolder.ForEachCategory([](CategoriesHolder::Category const & cat) + { + for (auto const & synonym : cat.m_synonyms) + { + TEST_NOT_EQUAL(synonym.m_name[0], '^', ("symbol ^ is used incorrectly in categories.txt " + "and loaded to synonyms.")); + } + }); + + for (auto const & group : groupTranslations) + { + for (auto const & translation : group.second) + { + TEST_NOT_EQUAL(translation.m_name[0], '^', ("symbol ^ is used incorrectly in categories.txt " + "and loaded to group translations")); + } + } +} + +UNIT_TEST(CategoriesHolder_ReadableName) +{ + char const kCategories[] = + "@shop\n" + "en:^Shop\n" + "ru:^Mагазин\n" + "\n" + "@meat\n" + "en:Beef|^Meat\n" + "ru:мясо\n" + "de:Schlachter\n" + "\n" + "@butcher\n" + "de:^Metzgerei\n" + "\n" + "shop|@shop\n" + "en:market\n" + "\n" + "shop-alcohol|@shop\n" + "en:Liquor Store|2^Alcostore\n" + "\n" + "shop-bakery|@shop\n" + "en:^buns\n" + "\n" + "shop-butcher|@meat|@butcher\n" + "en:2butcher\n" + "ru:3^Мясная лавка\n" + "de:Geschäft|2Laden\n" + ""; + + classificator::Load(); + CategoriesHolder holder(make_unique<MemReader>(kCategories, sizeof(kCategories) - 1)); + + size_t count = 0; + holder.ForEachCategory([&count](CategoriesHolder::Category const & cat) + { + if (count == 0) + { + TEST_EQUAL(cat.m_synonyms.size(), 3, ()); + TEST_EQUAL(cat.m_synonyms[0].m_name, "Mагазин", ()); + TEST_EQUAL(cat.m_synonyms[1].m_name, "Shop", ()); + TEST_EQUAL(cat.m_synonyms[2].m_name, "market", ()); + } + + if (count == 1) + { + TEST_EQUAL(cat.m_synonyms.size(), 4, ()); + TEST_EQUAL(cat.m_synonyms[0].m_name, "Alcostore", ()); + TEST_EQUAL(cat.m_synonyms[1].m_name, "Mагазин", ()); + TEST_EQUAL(cat.m_synonyms[2].m_name, "Shop", ()); + TEST_EQUAL(cat.m_synonyms[3].m_name, "Liquor Store", ()); + } + + if (count == 2) + { + TEST_EQUAL(cat.m_synonyms.size(), 3, ()); + TEST_EQUAL(cat.m_synonyms[0].m_name, "buns", ()); + TEST_EQUAL(cat.m_synonyms[1].m_name, "Mагазин", ()); + TEST_EQUAL(cat.m_synonyms[2].m_name, "Shop", ()); + } + + if (count == 3) + { + TEST_EQUAL(cat.m_synonyms.size(), 9, ()); + TEST_EQUAL(cat.m_synonyms[0].m_name, "Мясная лавка", ()); + TEST_EQUAL(cat.m_synonyms[1].m_name, "Metzgerei", ()); + TEST_EQUAL(cat.m_synonyms[2].m_name, "Meat", ()); + TEST_EQUAL(cat.m_synonyms[3].m_name, "Beef", ()); + TEST_EQUAL(cat.m_synonyms[4].m_name, "мясо", ()); + TEST_EQUAL(cat.m_synonyms[5].m_name, "Schlachter", ()); + TEST_EQUAL(cat.m_synonyms[6].m_name, "butcher", ()); + TEST_EQUAL(cat.m_synonyms[7].m_name, "Geschäft", ()); + TEST_EQUAL(cat.m_synonyms[8].m_name, "Laden", ()); + } + ++count; + }); +} + UNIT_TEST(CategoriesIndex_Smoke) { classificator::Load(); |