diff options
author | Yury Melnichek <melnichek@gmail.com> | 2011-05-29 19:02:34 +0400 |
---|---|---|
committer | Alex Zolotarev <alex@maps.me> | 2015-09-23 01:18:19 +0300 |
commit | 2380dd73bea3e20199cf3534a7c21ef1bd1cbab3 (patch) | |
tree | 8f60d41187a653602d2dab3419b581eb38756498 /base/string_utils.hpp | |
parent | 2929b89ced4dbc23279a6fc568b7a9ec5d1c0d7f (diff) |
[search] Use normalization and case folding. Refactor TokenizeIterator.
Diffstat (limited to 'base/string_utils.hpp')
-rw-r--r-- | base/string_utils.hpp | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/base/string_utils.hpp b/base/string_utils.hpp index e13eb2b7de..4926611351 100644 --- a/base/string_utils.hpp +++ b/base/string_utils.hpp @@ -31,11 +31,10 @@ inline UniString MakeUniString(string const & s) return result; } -template <typename DelimFuncT> +template <typename DelimFuncT, typename UniCharIterT = UniString::const_iterator> class TokenizeIterator { - typedef utf8::unchecked::iterator<string::const_iterator> Utf8IterT; - Utf8IterT m_beg, m_end, m_finish; + UniCharIterT m_beg, m_end, m_finish; DelimFuncT m_delimFunc; /// Explicitly disabled, because we're storing iterators for string @@ -68,6 +67,12 @@ public: move(); } + TokenizeIterator(UniString const & s, DelimFuncT delimFunc) + : m_beg(s.begin()), m_end(s.begin()), m_finish(s.end()), m_delimFunc(delimFunc) + { + move(); + } + string operator*() const { ASSERT( m_beg != m_finish, ("dereferencing of empty iterator") ); @@ -86,7 +91,7 @@ public: { if (!*this) return false; - TokenizeIterator<DelimFuncT> copy(*this); + TokenizeIterator<DelimFuncT, UniCharIterT> copy(*this); ++copy; return !copy; } @@ -94,7 +99,7 @@ public: UniString GetUniString() const { UniString result; - Utf8IterT iter(m_beg); + UniCharIterT iter(m_beg); while (iter != m_end) { result.push_back(*iter); @@ -113,7 +118,8 @@ public: bool operator()(UniChar c) const; }; -typedef TokenizeIterator<SimpleDelimiter> SimpleTokenizer; +typedef TokenizeIterator<SimpleDelimiter, + ::utf8::unchecked::iterator<string::const_iterator> > SimpleTokenizer; template <typename FunctorT> void Tokenize(string const & str, char const * delims, FunctorT f) |