diff options
author | Maxim Pimenov <m@maps.me> | 2016-03-25 12:16:48 +0300 |
---|---|---|
committer | Maxim Pimenov <m@maps.me> | 2016-03-25 13:43:53 +0300 |
commit | e4035c7d85f41698fd8f732bdce4f16d33477bb3 (patch) | |
tree | eaddec6d592c644021adf005b54059c6a9c89e23 /indexer/categories_holder.cpp | |
parent | cd2d0868b2becb1a9ae0416ed33915f5d454610f (diff) |
[search] Fixed emoji.
The "information box" emoji U+2139 was converted to the letter "i" after
all simplifications. As a result, every token that started with
this letter had the tourism-information category as its synonym.
This was the only case where a normalized and simplified emoji
resulted in a pure ASCII string.
Diffstat (limited to 'indexer/categories_holder.cpp')
-rw-r--r-- | indexer/categories_holder.cpp | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/indexer/categories_holder.cpp b/indexer/categories_holder.cpp index e6678c55ca..bbef05a57b 100644 --- a/indexer/categories_holder.cpp +++ b/indexer/categories_holder.cpp @@ -154,14 +154,21 @@ void CategoriesHolder::LoadFromStream(istream & s) using namespace strings; if (StartsWith(name.m_name, "U+")) { + auto const code = name.m_name; int c; if (!to_int(name.m_name.c_str() + 2, c, 16)) { - LOG(LWARNING, ("Bad emoji code:", name.m_name)); + LOG(LWARNING, ("Bad emoji code:", code)); continue; } name.m_name = ToUtf8(UniString(1, static_cast<UniChar>(c))); + + if (IsASCIIString(ToUtf8(search::NormalizeAndSimplifyString(name.m_name)))) + { + LOG(LWARNING, ("Bad emoji code:", code)); + continue; + } } cat.m_synonyms.push_back(name); |