Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvng <viktor.govako@gmail.com>2015-12-08 13:56:57 +0300
committerSergey Yershov <yershov@corp.mail.ru>2016-03-23 16:03:01 +0300
commit834f8be3f8fafc6f05d7bfe5661a49572291b417 (patch)
treee39f7a65d92f3f6fa215b90bd959e8e8e6433fd0 /indexer/search_string_utils.hpp
parent1f6bb2ee6740c14d85e40ec6dcb95ba0b4eb824c (diff)
[search] Moved search components to the search library. Moved search index builder to the generator.
Diffstat (limited to 'indexer/search_string_utils.hpp')
-rw-r--r--indexer/search_string_utils.hpp117
1 files changed, 0 insertions, 117 deletions
diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp
deleted file mode 100644
index f131bb73a4..0000000000
--- a/indexer/search_string_utils.hpp
+++ /dev/null
@@ -1,117 +0,0 @@
-#pragma once
-#include "base/string_utils.hpp"
-
-#include "std/algorithm.hpp"
-
-namespace search
-{
-
-// This function should be used for all search strings normalization.
-// It does some magic text transformation which greatly helps us to improve our search.
-inline strings::UniString NormalizeAndSimplifyString(string const & s)
-{
- using namespace strings;
- UniString uniString = MakeUniString(s);
- for (size_t i = 0; i < uniString.size(); ++i)
- {
- UniChar & c = uniString[i];
- switch (c)
- {
- // Replace "d with stroke" to simple d letter. Used in Vietnamese.
- // (unicode-compliant implementation leaves it unchanged)
- case 0x0110:
- case 0x0111: c = 'd'; break;
- // Replace small turkish dotless 'ı' with dotted 'i'.
- // Our own invented hack to avoid well-known Turkish I-letter bug.
- case 0x0131: c = 'i'; break;
- // Replace capital turkish dotted 'İ' with dotted lowercased 'i'.
- // Here we need to handle this case manually too, because default unicode-compliant implementation
- // of MakeLowerCase converts 'İ' to 'i' + 0x0307.
- case 0x0130: c = 'i'; break;
- // Some Danish-specific hacks.
- case 0x00d8: // Ø
- case 0x00f8: c = 'o'; break; // ø
- case 0x0152: // Œ
- case 0x0153: // œ
- c = 'o';
- uniString.insert(uniString.begin() + (i++) + 1, 'e');
- break;
- case 0x00c6: // Æ
- case 0x00e6: // æ
- c = 'a';
- uniString.insert(uniString.begin() + (i++) + 1, 'e');
- break;
- }
- }
-
- MakeLowerCaseInplace(uniString);
- NormalizeInplace(uniString);
-
- // Remove accents that can appear after NFKD normalization.
- uniString.erase_if([](UniChar const & c)
- {
- // ̀ COMBINING GRAVE ACCENT
- // ́ COMBINING ACUTE ACCENT
- return (c == 0x0300 || c == 0x0301);
- });
-
- return uniString;
-
- /// @todo Restore this logic to distinguish и-й in future.
- /*
- // Just after lower casing is a correct place to avoid normalization for specific chars.
- static auto const isSpecificChar = [](UniChar c) -> bool
- {
- return c == 0x0439; // й
- };
- UniString result;
- result.reserve(uniString.size());
- for (auto i = uniString.begin(), end = uniString.end(); i != end;)
- {
- auto j = find_if(i, end, isSpecificChar);
- // We don't check if (j != i) because UniString and Normalize handle it correctly.
- UniString normString(i, j);
- NormalizeInplace(normString);
- result.insert(result.end(), normString.begin(), normString.end());
- if (j == end)
- break;
- result.push_back(*j);
- i = j + 1;
- }
- return result;
- */
-}
-
-template <class DelimsT, typename F>
-void SplitUniString(strings::UniString const & uniS, F f, DelimsT const & delims)
-{
- for (strings::TokenizeIterator<DelimsT> iter(uniS, delims); iter; ++iter)
- f(iter.GetUniString());
-}
-
-strings::UniString FeatureTypeToString(uint32_t type);
-
-template <class ContainerT, class DelimsT>
-bool TokenizeStringAndCheckIfLastTokenIsPrefix(strings::UniString const & s,
- ContainerT & tokens,
- DelimsT const & delimiter)
-{
- SplitUniString(s, MakeBackInsertFunctor(tokens), delimiter);
- return !s.empty() && !delimiter(s.back());
-}
-
-
-template <class ContainerT, class DelimsT>
-bool TokenizeStringAndCheckIfLastTokenIsPrefix(string const & s,
- ContainerT & tokens,
- DelimsT const & delimiter)
-{
- return TokenizeStringAndCheckIfLastTokenIsPrefix(NormalizeAndSimplifyString(s),
- tokens,
- delimiter);
-}
-
-void GetStreetName(strings::SimpleTokenizer iter, string & streetName);
-void GetStreetNameAsKey(string const & name, string & res);
-
-} // namespace search