Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYury Melnichek <melnichek@gmail.com>2011-05-29 19:02:34 +0400
committerAlex Zolotarev <alex@maps.me>2015-09-23 01:18:19 +0300
commit2380dd73bea3e20199cf3534a7c21ef1bd1cbab3 (patch)
tree8f60d41187a653602d2dab3419b581eb38756498 /base/string_utils.hpp
parent2929b89ced4dbc23279a6fc568b7a9ec5d1c0d7f (diff)
[search] Use normalization and case folding. Refactor TokenizeIterator.
Diffstat (limited to 'base/string_utils.hpp')
-rw-r--r--base/string_utils.hpp18
1 files changed, 12 insertions, 6 deletions
diff --git a/base/string_utils.hpp b/base/string_utils.hpp
index e13eb2b7de..4926611351 100644
--- a/base/string_utils.hpp
+++ b/base/string_utils.hpp
@@ -31,11 +31,10 @@ inline UniString MakeUniString(string const & s)
return result;
}
-template <typename DelimFuncT>
+template <typename DelimFuncT, typename UniCharIterT = UniString::const_iterator>
class TokenizeIterator
{
- typedef utf8::unchecked::iterator<string::const_iterator> Utf8IterT;
- Utf8IterT m_beg, m_end, m_finish;
+ UniCharIterT m_beg, m_end, m_finish;
DelimFuncT m_delimFunc;
/// Explicitly disabled, because we're storing iterators for string
@@ -68,6 +67,12 @@ public:
move();
}
+ TokenizeIterator(UniString const & s, DelimFuncT delimFunc)
+ : m_beg(s.begin()), m_end(s.begin()), m_finish(s.end()), m_delimFunc(delimFunc)
+ {
+ move();
+ }
+
string operator*() const
{
ASSERT( m_beg != m_finish, ("dereferencing of empty iterator") );
@@ -86,7 +91,7 @@ public:
{
if (!*this)
return false;
- TokenizeIterator<DelimFuncT> copy(*this);
+ TokenizeIterator<DelimFuncT, UniCharIterT> copy(*this);
++copy;
return !copy;
}
@@ -94,7 +99,7 @@ public:
UniString GetUniString() const
{
UniString result;
- Utf8IterT iter(m_beg);
+ UniCharIterT iter(m_beg);
while (iter != m_end)
{
result.push_back(*iter);
@@ -113,7 +118,8 @@ public:
bool operator()(UniChar c) const;
};
-typedef TokenizeIterator<SimpleDelimiter> SimpleTokenizer;
+typedef TokenizeIterator<SimpleDelimiter,
+ ::utf8::unchecked::iterator<string::const_iterator> > SimpleTokenizer;
template <typename FunctorT>
void Tokenize(string const & str, char const * delims, FunctorT f)