Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYury Melnichek <melnichek@gmail.com>2011-09-01 23:24:12 +0400
committerAlex Zolotarev <alex@maps.me>2015-09-23 01:22:56 +0300
commit0c79ed900011be611c952221bc18d1aae5d77007 (patch)
tree4340617d7f2fb3c71d7e29ec2423c05b35d6b6c7 /generator/dumper.cpp
parentdc2033017d2104ddb7d1e76c5babe516a0a30763 (diff)
[generator] Normalize names in --dump_prefixes the same way, as we do it in search.
Diffstat (limited to 'generator/dumper.cpp')
-rw-r--r--generator/dumper.cpp33
1 files changed, 13 insertions, 20 deletions
diff --git a/generator/dumper.cpp b/generator/dumper.cpp
index ab16f28e8f..980f6332e7 100644
--- a/generator/dumper.cpp
+++ b/generator/dumper.cpp
@@ -1,5 +1,8 @@
#include "dumper.hpp"
+#include "../indexer/search_delimiters.hpp"
+#include "../indexer/search_string_utils.hpp"
+
#include "../coding/multilang_utf8_string.hpp"
#include "../indexer/classificator.hpp"
@@ -74,7 +77,7 @@ namespace feature
///////////////////////////////////////////////////////////////////
- typedef map<int8_t, map<string, pair<unsigned int, string> > > TokensContainerT;
+ typedef map<int8_t, map<strings::UniString, pair<unsigned int, string> > > TokensContainerT;
class PrefixesCollector
{
public:
@@ -84,31 +87,20 @@ namespace feature
{
CHECK(!name.empty(), ("Feature name is empty"));
- vector<string> tokens;
- strings::SimpleTokenizer tok(name, " ");
- while (tok)
- {
- tokens.push_back(*tok);
- ++tok;
- }
+ vector<strings::UniString> tokens;
+ search::SplitUniString(search::NormalizeAndSimplifyString(name),
+ MakeBackInsertFunctor(tokens), search::Delimiters());
if (tokens.empty())
return true;
- /*
- // ignore token if it's first letter is an uppercase letter
- strings::UniString const s1 = strings::MakeUniString(tokens[0]);
- strings::UniString const s2 = strings::MakeLowerCase(s1);
- if (s1[0] != s2[0])
- return true;
- */
for (size_t i = 1; i < tokens.size(); ++i)
{
- string s;
+ strings::UniString s;
for (size_t numTokens = 0; numTokens < i; ++numTokens)
{
- s += tokens[numTokens];
- s += " ";
+ s.append(tokens[numTokens].begin(), tokens[numTokens].end());
+ s.push_back(' ');
}
pair<TokensContainerT::mapped_type::iterator, bool> found =
m_stats[langCode].insert(make_pair(s, make_pair(1U, name)));
@@ -128,7 +120,7 @@ namespace feature
void Print(int8_t langCode, TokensContainerT::mapped_type const & container)
{
- typedef pair<string, pair<unsigned int, string> > NameElemT;
+ typedef pair<strings::UniString, pair<unsigned int, string> > NameElemT;
typedef vector<NameElemT> VecToSortT;
VecToSortT v(container.begin(), container.end());
sort(v.begin(), v.end(), &SortFunc<NameElemT>);
@@ -142,7 +134,8 @@ namespace feature
{
if (it->second.first <= MIN_OCCURRENCE)
break;
- cout << it->second.first << " " << it->first << " \"" << it->second.second << "\"" << endl;
+ wcout << it->second.first << " " << std::wstring(it->first.begin(), it->first.end());
+ cout << " \"" << it->second.second << "\"" << endl;
}
}
}