Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArsentiy Milchakov <milcars@mapswithme.com>2016-09-28 20:49:42 +0300
committerArsentiy Milchakov <milcars@mapswithme.com>2016-09-28 20:49:42 +0300
commitfcf41c868d50b0e17c4e9a8f8505f0d9567ff235 (patch)
treee7fbf9a6a6462048065fe22f700b1217beae73d2 /indexer
parent6f12239a778ed7c19326229b5aa2e9966f206ddd (diff)
added mark for readable names into categories.txt
Diffstat (limited to 'indexer')
-rw-r--r--indexer/categories_holder.cpp224
-rw-r--r--indexer/categories_holder.hpp3
-rw-r--r--indexer/indexer_tests/categories_test.cpp108
3 files changed, 240 insertions, 95 deletions
diff --git a/indexer/categories_holder.cpp b/indexer/categories_holder.cpp
index f677a57d92..123b1957a6 100644
--- a/indexer/categories_holder.cpp
+++ b/indexer/categories_holder.cpp
@@ -17,7 +17,128 @@ enum State
EParseLanguages
};
-} // unnamed namespace
+void ProcessSynonym(CategoriesHolder::Category::Name const & name,
+ deque<CategoriesHolder::Category::Name> & synonyms)
+{
+ if (name.m_name[0] != '^')
+ {
+ synonyms.push_back(name);
+ return;
+ }
+
+ // Name which starts with '^' is readable name for UI and it should be in the beginning.
+ synonyms.push_front(name);
+ synonyms.front().m_name = name.m_name.substr(1);
+}
+
+void GroupTranslationsToSynonyms(vector<string> const & groups,
+ CategoriesHolder::GroupTranslations const & translations,
+ deque<CategoriesHolder::Category::Name> & synonyms)
+{
+ for (string const & group : groups)
+ {
+ auto it = translations.find(group);
+ if (it == translations.end())
+ continue;
+ for (auto & synonym : it->second)
+ ProcessSynonym(synonym, synonyms);
+ }
+}
+
+void TrimGroupTranslations(CategoriesHolder::GroupTranslations & translations)
+{
+ for(auto & translation : translations)
+ {
+ for (auto & synonym : translation.second)
+ {
+ if (synonym.m_name[0] == '^')
+ synonym.m_name = synonym.m_name.substr(1);
+ }
+ }
+}
+
+bool ParseEmoji(CategoriesHolder::Category::Name & name)
+{
+ using namespace strings;
+
+ auto const code = name.m_name;
+ int c;
+ if (!to_int(name.m_name.c_str() + 2, c, 16))
+ {
+ LOG(LWARNING, ("Bad emoji code:", code));
+ return false;
+ }
+
+ name.m_name = ToUtf8(UniString(1, static_cast<UniChar>(c)));
+
+ if (IsASCIIString(ToUtf8(search::NormalizeAndSimplifyString(name.m_name))))
+ {
+ LOG(LWARNING, ("Bad emoji code:", code));
+ return false;
+ }
+
+ return true;
+}
+
+void FillPrefixLengthToSuggest(CategoriesHolder::Category::Name & name)
+{
+ if (isdigit(name.m_name.front()) && name.m_name.front() != '0')
+ {
+ name.m_prefixLengthToSuggest = name.m_name[0] - '0';
+ name.m_name = name.m_name.substr(1);
+ }
+ else
+ {
+ name.m_prefixLengthToSuggest = CategoriesHolder::Category::kEmptyPrefixLength;
+ }
+}
+
+void ProcessName(CategoriesHolder::Category::Name name, vector<string> const & groups,
+ vector<uint32_t> const & types, CategoriesHolder::GroupTranslations & translations,
+ deque<CategoriesHolder::Category::Name> & synonyms)
+{
+ if (name.m_name.empty())
+ {
+ LOG(LWARNING, ("Incorrect name for category:", groups));
+ return;
+ }
+
+ FillPrefixLengthToSuggest(name);
+
+ if (strings::StartsWith(name.m_name, "U+") && !ParseEmoji(name))
+ return;
+
+ if (groups.size() == 1 && types.empty())
+ translations[groups[0]].push_back(name); // not a translation, but a category group definition
+ else
+ ProcessSynonym(name, synonyms);
+}
+
+void ProcessCategory(string const & line, vector<string> & groups, vector<uint32_t> & types)
+{
+ // Check if category is a group reference.
+ if (line[0] == '@')
+ {
+ CHECK((groups.empty() || !types.empty()), ("Two groups in a group definition, line:", line));
+ groups.push_back(line);
+ return;
+ }
+
+ // Split category to subcategories for classificator.
+ vector<string> v;
+ strings::Tokenize(line, "-", MakeBackInsertFunctor(v));
+
+ // Get classificator type.
+ uint32_t const type = classif().GetTypeByPathSafe(v);
+ if (type == 0)
+ {
+ LOG(LWARNING, ("Invalid type:", v, "; during parcing the line:", line));
+ return;
+ }
+
+ types.push_back(type);
+}
+} // namespace
// static
int8_t const CategoriesHolder::kEnglishCode = 1;
@@ -119,13 +240,10 @@ void CategoriesHolder::LoadFromStream(istream & s)
State state = EParseTypes;
string line;
-
Category cat;
vector<uint32_t> types;
vector<string> currentGroups;
- Classificator const & c = classif();
-
int lineNumber = 0;
while (s.good())
{
@@ -138,64 +256,29 @@ void CategoriesHolder::LoadFromStream(istream & s)
strings::SimpleTokenizer iter(line, state == EParseTypes ? "|" : ":|");
- switch (state)
- {
- case EParseTypes:
+ if (state == EParseTypes)
{
AddCategory(cat, types);
currentGroups.clear();
while (iter)
{
- // Check if category is a group reference.
- if ((*iter)[0] == '@')
- {
- CHECK((currentGroups.empty() || !types.empty()),
- ("Two groups in a group definition at line", lineNumber));
- currentGroups.push_back(*iter);
- }
- else
- {
- // Split category to subcategories for classificator.
- vector<string> v;
- strings::Tokenize(*iter, "-", MakeBackInsertFunctor(v));
-
- // Get classificator type.
- uint32_t const type = c.GetTypeByPathSafe(v);
- if (type != 0)
- types.push_back(type);
- else
- LOG(LWARNING, ("Invalid type:", v, "at line:", lineNumber));
- }
-
+ ProcessCategory(*iter, currentGroups, types);
++iter;
}
if (!types.empty() || currentGroups.size() == 1)
+ {
+ // Add translations into synonyms first, it will allow to override
+ // translations for UI by concrete category translation.
+ GroupTranslationsToSynonyms(currentGroups, m_groupTranslations, cat.m_synonyms);
state = EParseLanguages;
+ }
}
- break;
-
- case EParseLanguages:
+ else if (state == EParseLanguages)
{
if (!iter)
{
- // If the category groups are specified, add translations from them.
-
- ///@todo According to the current logic, categories.txt should have
- /// the blank string at the end of file.
- if (!types.empty())
- {
- for (string const & group : currentGroups)
- {
- auto it = m_groupTranslations.find(group);
- if (it == m_groupTranslations.end())
- continue;
- for (auto const & synonym : it->second)
- cat.m_synonyms.push_back(synonym);
- }
- }
-
state = EParseTypes;
continue;
}
@@ -210,56 +293,13 @@ void CategoriesHolder::LoadFromStream(istream & s)
name.m_locale = langCode;
name.m_name = *iter;
- if (name.m_name.empty())
- {
- LOG(LWARNING, ("Empty category name at line:", lineNumber));
- continue;
- }
-
- if (name.m_name[0] >= '0' && name.m_name[0] <= '9')
- {
- name.m_prefixLengthToSuggest = name.m_name[0] - '0';
- name.m_name = name.m_name.substr(1);
- }
- else
- name.m_prefixLengthToSuggest = Category::kEmptyPrefixLength;
-
- // Process emoji symbols.
- using namespace strings;
- if (StartsWith(name.m_name, "U+"))
- {
- auto const code = name.m_name;
- int c;
- if (!to_int(name.m_name.c_str() + 2, c, 16))
- {
- LOG(LWARNING, ("Bad emoji code:", code));
- continue;
- }
-
- name.m_name = ToUtf8(UniString(1, static_cast<UniChar>(c)));
-
- if (IsASCIIString(ToUtf8(search::NormalizeAndSimplifyString(name.m_name))))
- {
- LOG(LWARNING, ("Bad emoji code:", code));
- continue;
- }
- }
-
- if (currentGroups.size() == 1 && types.empty())
- {
- // Not a translation, but a category group definition
- m_groupTranslations[currentGroups[0]].push_back(name);
- }
- else
- cat.m_synonyms.push_back(name);
+ ProcessName(name, currentGroups, types, m_groupTranslations, cat.m_synonyms);
}
}
- break;
- }
}
-
// add last category
AddCategory(cat, types);
+ TrimGroupTranslations(m_groupTranslations);
}
bool CategoriesHolder::GetNameByType(uint32_t type, int8_t locale, string & name) const
diff --git a/indexer/categories_holder.hpp b/indexer/categories_holder.hpp
index 948f58894c..c331708359 100644
--- a/indexer/categories_holder.hpp
+++ b/indexer/categories_holder.hpp
@@ -1,6 +1,7 @@
#pragma once
#include "base/string_utils.hpp"
+#include "std/deque.hpp"
#include "std/iostream.hpp"
#include "std/map.hpp"
#include "std/shared_ptr.hpp"
@@ -28,7 +29,7 @@ public:
uint8_t m_prefixLengthToSuggest;
};
- vector<Name> m_synonyms;
+ deque<Name> m_synonyms;
inline void Swap(Category & r)
{
diff --git a/indexer/indexer_tests/categories_test.cpp b/indexer/indexer_tests/categories_test.cpp
index 8152110ca4..222ad69760 100644
--- a/indexer/indexer_tests/categories_test.cpp
+++ b/indexer/indexer_tests/categories_test.cpp
@@ -25,7 +25,7 @@ using namespace indexer;
char const g_testCategoriesTxt[] =
"amenity-bench\n"
"en:1bench|sit down|to sit\n"
- "de:0bank|auf die strafbank schicken\n"
+ "de:2bank|auf die strafbank schicken\n"
"zh-Hans:长凳\n"
"zh-Hant:長板凳\n"
"da:bænk\n"
@@ -55,7 +55,7 @@ struct Checker
TEST_EQUAL(cat.m_synonyms[2].m_name, "to sit", ());
TEST_EQUAL(cat.m_synonyms[3].m_locale, CategoriesHolder::MapLocaleToInteger("de"), ());
TEST_EQUAL(cat.m_synonyms[3].m_name, "bank", ());
- TEST_EQUAL(cat.m_synonyms[3].m_prefixLengthToSuggest, 0, ());
+ TEST_EQUAL(cat.m_synonyms[3].m_prefixLengthToSuggest, 2, ());
TEST_EQUAL(cat.m_synonyms[4].m_locale, CategoriesHolder::MapLocaleToInteger("de"), ());
TEST_EQUAL(cat.m_synonyms[4].m_name, "auf die strafbank schicken", ());
TEST_EQUAL(cat.m_synonyms[5].m_locale, CategoriesHolder::MapLocaleToInteger("zh_CN"), ());
@@ -119,6 +119,110 @@ UNIT_TEST(CategoriesHolder_Smoke)
}
}
+UNIT_TEST(CategoriesHolder_ReadableNameSmoke)
+{
+ classificator::Load();
+
+ auto const & categoriesHolder = GetDefaultCategories();
+ auto const & groupTranslations = categoriesHolder.GetGroupTranslations();
+
+ categoriesHolder.ForEachCategory([](CategoriesHolder::Category const & cat)
+ {
+ for (auto const & synonym : cat.m_synonyms)
+ {
+ TEST_NOT_EQUAL(synonym.m_name[0], '^', ("symbol ^ is used incorrectly in categories.txt "
+ "and loaded to synonyms."));
+ }
+ });
+
+ for (auto const & group : groupTranslations)
+ {
+ for (auto const & translation : group.second)
+ {
+ TEST_NOT_EQUAL(translation.m_name[0], '^', ("symbol ^ is used incorrectly in categories.txt "
+ "and loaded to group translations"));
+ }
+ }
+}
+
+UNIT_TEST(CategoriesHolder_ReadableName)
+{
+ char const kCategories[] =
+ "@shop\n"
+ "en:^Shop\n"
+ "ru:^Mагазин\n"
+ "\n"
+ "@meat\n"
+ "en:Beef|^Meat\n"
+ "ru:мясо\n"
+ "de:Schlachter\n"
+ "\n"
+ "@butcher\n"
+ "de:^Metzgerei\n"
+ "\n"
+ "shop|@shop\n"
+ "en:market\n"
+ "\n"
+ "shop-alcohol|@shop\n"
+ "en:Liquor Store|2^Alcostore\n"
+ "\n"
+ "shop-bakery|@shop\n"
+ "en:^buns\n"
+ "\n"
+ "shop-butcher|@meat|@butcher\n"
+ "en:2butcher\n"
+ "ru:3^Мясная лавка\n"
+ "de:Geschäft|2Laden\n"
+ "";
+
+ classificator::Load();
+ CategoriesHolder holder(make_unique<MemReader>(kCategories, sizeof(kCategories) - 1));
+
+ size_t count = 0;
+ holder.ForEachCategory([&count](CategoriesHolder::Category const & cat)
+ {
+ if (count == 0)
+ {
+ TEST_EQUAL(cat.m_synonyms.size(), 3, ());
+ TEST_EQUAL(cat.m_synonyms[0].m_name, "Mагазин", ());
+ TEST_EQUAL(cat.m_synonyms[1].m_name, "Shop", ());
+ TEST_EQUAL(cat.m_synonyms[2].m_name, "market", ());
+ }
+
+ if (count == 1)
+ {
+ TEST_EQUAL(cat.m_synonyms.size(), 4, ());
+ TEST_EQUAL(cat.m_synonyms[0].m_name, "Alcostore", ());
+ TEST_EQUAL(cat.m_synonyms[1].m_name, "Mагазин", ());
+ TEST_EQUAL(cat.m_synonyms[2].m_name, "Shop", ());
+ TEST_EQUAL(cat.m_synonyms[3].m_name, "Liquor Store", ());
+ }
+
+ if (count == 2)
+ {
+ TEST_EQUAL(cat.m_synonyms.size(), 3, ());
+ TEST_EQUAL(cat.m_synonyms[0].m_name, "buns", ());
+ TEST_EQUAL(cat.m_synonyms[1].m_name, "Mагазин", ());
+ TEST_EQUAL(cat.m_synonyms[2].m_name, "Shop", ());
+ }
+
+ if (count == 3)
+ {
+ TEST_EQUAL(cat.m_synonyms.size(), 9, ());
+ TEST_EQUAL(cat.m_synonyms[0].m_name, "Мясная лавка", ());
+ TEST_EQUAL(cat.m_synonyms[1].m_name, "Metzgerei", ());
+ TEST_EQUAL(cat.m_synonyms[2].m_name, "Meat", ());
+ TEST_EQUAL(cat.m_synonyms[3].m_name, "Beef", ());
+ TEST_EQUAL(cat.m_synonyms[4].m_name, "мясо", ());
+ TEST_EQUAL(cat.m_synonyms[5].m_name, "Schlachter", ());
+ TEST_EQUAL(cat.m_synonyms[6].m_name, "butcher", ());
+ TEST_EQUAL(cat.m_synonyms[7].m_name, "Geschäft", ());
+ TEST_EQUAL(cat.m_synonyms[8].m_name, "Laden", ());
+ }
+ ++count;
+ });
+}
+
UNIT_TEST(CategoriesIndex_Smoke)
{
classificator::Load();