Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxim Pimenov <m@maps.me>2016-03-25 12:16:48 +0300
committerMaxim Pimenov <m@maps.me>2016-03-25 13:43:53 +0300
commite4035c7d85f41698fd8f732bdce4f16d33477bb3 (patch)
treeeaddec6d592c644021adf005b54059c6a9c89e23 /indexer/categories_holder.cpp
parentcd2d0868b2becb1a9ae0416ed33915f5d454610f (diff)
[search] Fixed emoji.
The "information box" emoji U+2139 was converted to the letter "i" after all simplifications. As a result, every token that started with this letter had the tourism-information category as its synonym. This was the only case where a normalized and simplified emoji resulted in a pure ASCII string.
Diffstat (limited to 'indexer/categories_holder.cpp')
-rw-r--r--indexer/categories_holder.cpp9
1 files changed, 8 insertions, 1 deletions
diff --git a/indexer/categories_holder.cpp b/indexer/categories_holder.cpp
index e6678c55ca..bbef05a57b 100644
--- a/indexer/categories_holder.cpp
+++ b/indexer/categories_holder.cpp
@@ -154,14 +154,21 @@ void CategoriesHolder::LoadFromStream(istream & s)
using namespace strings;
if (StartsWith(name.m_name, "U+"))
{
+ auto const code = name.m_name;
int c;
if (!to_int(name.m_name.c_str() + 2, c, 16))
{
- LOG(LWARNING, ("Bad emoji code:", name.m_name));
+ LOG(LWARNING, ("Bad emoji code:", code));
continue;
}
name.m_name = ToUtf8(UniString(1, static_cast<UniChar>(c)));
+
+ if (IsASCIIString(ToUtf8(search::NormalizeAndSimplifyString(name.m_name))))
+ {
+ LOG(LWARNING, ("Bad emoji code:", code));
+ continue;
+ }
}
cat.m_synonyms.push_back(name);