Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Zolotarev <deathbaba@gmail.com>2011-05-23 20:33:32 +0400
committerAlex Zolotarev <alex@maps.me>2015-09-23 01:17:54 +0300
commitdd4c397bdc4f7fe6671ceeb08fa1d419c85cb77f (patch)
tree52e7352c5b1154cada8ff535ca0ae3bf948a9265
parentcc317c77fd3e5fe641bd1ec1de4504ffe6747075 (diff)
Removed unnecessary base/utf8_string as it's implemented in base/string_utils
-rw-r--r--base/base.pro2
-rw-r--r--base/base_tests/base_tests.pro1
-rw-r--r--base/base_tests/string_utils_test.cpp8
-rw-r--r--base/base_tests/utf8_string_test.cpp61
-rw-r--r--base/string_utils.cpp9
-rw-r--r--base/string_utils.hpp3
-rw-r--r--base/utf8_string.cpp66
-rw-r--r--base/utf8_string.hpp13
-rw-r--r--search/delimiters.cpp35
-rw-r--r--search/delimiters.hpp14
-rw-r--r--search/query.cpp17
-rw-r--r--search/query.hpp4
-rw-r--r--search/search.pro14
-rw-r--r--search/search_processor.cpp5
14 files changed, 91 insertions, 161 deletions
diff --git a/base/base.pro b/base/base.pro
index 8a3b772c15..db8b80e8c2 100644
--- a/base/base.pro
+++ b/base/base.pro
@@ -20,7 +20,6 @@ SOURCES += \
memory_mapped_file.cpp \
path_utils.cpp \
condition.cpp \
- utf8_string.cpp \
HEADERS += \
SRC_FIRST.hpp \
@@ -63,4 +62,3 @@ HEADERS += \
buffer_vector.hpp \
path_utils.hpp \
array_adapters.hpp \
- utf8_string.hpp \
diff --git a/base/base_tests/base_tests.pro b/base/base_tests/base_tests.pro
index 6d98e5aee5..8c66fff5b7 100644
--- a/base/base_tests/base_tests.pro
+++ b/base/base_tests/base_tests.pro
@@ -29,6 +29,5 @@ SOURCES += \
matrix_test.cpp \
commands_queue_test.cpp \
buffer_vector_test.cpp \
- utf8_string_test.cpp \
HEADERS +=
diff --git a/base/base_tests/string_utils_test.cpp b/base/base_tests/string_utils_test.cpp
index 9e582ada71..4e402db222 100644
--- a/base/base_tests/string_utils_test.cpp
+++ b/base/base_tests/string_utils_test.cpp
@@ -113,3 +113,11 @@ UNIT_TEST(SimpleTokenizer)
}
}
+
+UNIT_TEST(LastUniChar)
+{
+ TEST_EQUAL(strings::LastUniChar(""), 0, ());
+ TEST_EQUAL(strings::LastUniChar("Hello"), 0x6f, ());
+ TEST_EQUAL(strings::LastUniChar(" \xD0\x90"), 0x0410, ());
+
+}
diff --git a/base/base_tests/utf8_string_test.cpp b/base/base_tests/utf8_string_test.cpp
deleted file mode 100644
index 0a55cacdc2..0000000000
--- a/base/base_tests/utf8_string_test.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "../../testing/testing.hpp"
-
-#include "../utf8_string.hpp"
-
-using namespace utf8_string;
-
-bool IsDelimeter(uint32_t symbol)
-{
- switch (symbol)
- {
- case ' ':
- case '-':
- case '/':
- case ',':
- case '.':
- case 0x0336:
- return true;
- }
- return false;
-}
-
-UNIT_TEST(Utf8_Split)
-{
- vector<string> result;
- TEST(!Split("", result, &IsDelimeter), ());
- TEST_EQUAL(result.size(), 0, ());
-
- TEST(!Split(" - ,. ", result, &IsDelimeter), ());
- TEST_EQUAL(result.size(), 0, ());
-
- TEST(Split("London - is the capital of babai-city.", result, &IsDelimeter), ());
- TEST_EQUAL(result.size(), 7, ());
- TEST_EQUAL(result[0], "London", ());
- TEST_EQUAL(result[6], "city", ());
-
- // Доллар подорожал на 500 рублей ̶копеек
- char const * s =
- "- \xD0\x94\xD0\xBE\xD0\xBB\xD0\xBB\xD0\xB0\xD1\x80\x20\xD0\xBF\xD0\xBE\xD0\xB4\xD0"
- "\xBE\xD1\x80\xD0\xBE\xD0\xB6\xD0\xB0\xD0\xBB\x20\xD0\xBD\xD0\xB0\x20\x35\x30\x30"
- "\x20\xD1\x80\xD1\x83\xD0\xB1\xD0\xBB\xD0\xB5\xD0\xB9\x20\xCC\xB6\xD0\xBA\xD0\xBE"
- "\xD0\xBF\xD0\xB5\xD0\xB5\xD0\xBA -";
- TEST(Split(s, result, &IsDelimeter), ());
- TEST_EQUAL(result.size(), 6, ());
- TEST_EQUAL(result[3], "500", ());
- TEST_EQUAL(result[4], "\xD1\x80\xD1\x83\xD0\xB1\xD0\xBB\xD0\xB5\xD0\xB9", ());
- TEST_EQUAL(result[5], "\xD0\xBA\xD0\xBE\xD0\xBF\xD0\xB5\xD0\xB5\xD0\xBA", ());
-}
-
-UNIT_TEST(Utf8_Split_MultipleDelimeters)
-{
- vector<string> result;
- TEST(Split("A B C .,D", result, &IsDelimeter), ());
- char const * expected [] = {"A", "B", "C", "D"};
- TEST_EQUAL(result, vector<string>(&expected[0], &expected[0] + ARRAY_SIZE(expected)), ());
-}
-
-UNIT_TEST(Utf8_IsSearchDelimiter)
-{
- TEST(utf8_string::IsSearchDelimiter(static_cast<uint8_t>('~')), ());
- TEST(utf8_string::IsSearchDelimiter(static_cast<uint8_t>('`')), ());
-}
diff --git a/base/string_utils.cpp b/base/string_utils.cpp
index 6b676cc395..46e0d17d23 100644
--- a/base/string_utils.cpp
+++ b/base/string_utils.cpp
@@ -25,6 +25,15 @@ bool SimpleDelimiter::operator()(UniChar c) const
return false;
}
+UniChar LastUniChar(string const & s)
+{
+ if (s.empty())
+ return 0;
+ utf8::unchecked::iterator<string::const_iterator> iter(s.end());
+ --iter;
+ return *iter;
+}
+
bool to_int(char const * s, int & i)
{
char * stop;
diff --git a/base/string_utils.hpp b/base/string_utils.hpp
index 9ce3c7ff25..badc6f1468 100644
--- a/base/string_utils.hpp
+++ b/base/string_utils.hpp
@@ -93,6 +93,9 @@ void Tokenize(string const & str, char const * delims, FunctorT f)
}
}
+/// @return code of last symbol in string or 0 if s is empty
+UniChar LastUniChar(string const & s);
+
template <class T, size_t N, class TT> bool IsInArray(T (&arr) [N], TT const & t)
{
for (size_t i = 0; i < N; ++i)
diff --git a/base/utf8_string.cpp b/base/utf8_string.cpp
deleted file mode 100644
index b4afc1f424..0000000000
--- a/base/utf8_string.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-#include "utf8_string.hpp"
-
-#include "../std/iterator.hpp"
-
-#include "../3party/utfcpp/source/utf8/unchecked.h"
-
-namespace utf8_string
-{
- bool Split(string const & str, vector<string> & out, IsDelimiterFuncT f)
- {
- out.clear();
- string::const_iterator curr = str.begin();
- string::const_iterator end = str.end();
- string word;
- back_insert_iterator<string> inserter = back_inserter(word);
- while (curr != end)
- {
- uint32_t symbol = ::utf8::unchecked::next(curr);
- if (f(symbol))
- {
- if (!word.empty())
- {
- out.push_back(word);
- word.clear();
- inserter = back_inserter(word);
- }
- }
- else
- {
- inserter = utf8::unchecked::append(symbol, inserter);
- }
- }
- if (!word.empty())
- out.push_back(word);
- return !out.empty();
- }
-
- bool IsSearchDelimiter(uint32_t symbol)
- {
- // latin table optimization
- if (symbol >= ' ' && symbol < '0')
- return true;
-
- switch (symbol)
- {
- case ':':
- case ';':
- case '<':
- case '=':
- case '>':
- case '[':
- case ']':
- case '\\':
- case '^':
- case '_':
- case '`':
- case '{':
- case '}':
- case '|':
- case '~':
- case 0x0336:
- return true;
- }
- return false;
- }
-}
diff --git a/base/utf8_string.hpp b/base/utf8_string.hpp
deleted file mode 100644
index d3ca3e6caf..0000000000
--- a/base/utf8_string.hpp
+++ /dev/null
@@ -1,13 +0,0 @@
-#pragma once
-
-#include "../std/string.hpp"
-#include "../std/vector.hpp"
-#include "../std/stdint.hpp"
-
-namespace utf8_string
-{
- typedef bool (*IsDelimiterFuncT)(uint32_t);
- /// delimeters optimal for search
- bool IsSearchDelimiter(uint32_t symbol);
- bool Split(string const & str, vector<string> & out, IsDelimiterFuncT f = &IsSearchDelimiter);
-}
diff --git a/search/delimiters.cpp b/search/delimiters.cpp
new file mode 100644
index 0000000000..2be9f0f397
--- /dev/null
+++ b/search/delimiters.cpp
@@ -0,0 +1,35 @@
+#include "delimiters.hpp"
+
+namespace search
+{
+
+bool Delimiters::operator()(strings::UniChar c) const
+{
+ // @TODO impement full unicode range delimiters table
+ // latin table optimization
+ if (c >= ' ' && c < '0')
+ return true;
+ switch (c)
+ {
+ case ':':
+ case ';':
+ case '<':
+ case '=':
+ case '>':
+ case '[':
+ case ']':
+ case '\\':
+ case '^':
+ case '_':
+ case '`':
+ case '{':
+ case '}':
+ case '|':
+ case '~':
+ case 0x0336:
+ return true;
+ }
+ return false;
+}
+
+}
diff --git a/search/delimiters.hpp b/search/delimiters.hpp
new file mode 100644
index 0000000000..848c670aff
--- /dev/null
+++ b/search/delimiters.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+#include "../base/string_utils.hpp"
+
+namespace search
+{
+
+class Delimiters
+{
+public:
+ bool operator()(strings::UniChar c) const;
+};
+
+}
diff --git a/search/query.cpp b/search/query.cpp
index d2668c133c..abdb558d1c 100644
--- a/search/query.cpp
+++ b/search/query.cpp
@@ -1,19 +1,22 @@
#include "query.hpp"
-#include "../base/utf8_string.hpp"
+#include "delimiters.hpp"
+
+#include "../base/string_utils.hpp"
namespace search1
{
Query::Query(string const & query)
{
- utf8_string::Split(query, m_Keywords, &utf8_string::IsSearchDelimiter);
- if (!query.empty() && !utf8_string::IsSearchDelimiter(query[query.size() - 1]))
+ search::Delimiters delims;
+ strings::TokenizeIterator<search::Delimiters> iter(query, delims);
+ while (iter)
{
- m_Prefix.swap(m_Keywords.back());
- m_Keywords.pop_back();
+ if (iter.IsLast() && !delims(strings::LastUniChar(query)))
+ m_prefix = *iter;
+ else
+ m_keywords.push_back(*iter);
}
}
-
-
}
diff --git a/search/query.hpp b/search/query.hpp
index cdbf85bc94..d950b87e69 100644
--- a/search/query.hpp
+++ b/search/query.hpp
@@ -12,8 +12,8 @@ class Query
public:
explicit Query(string const & query);
private:
- vector<string> m_Keywords;
- string m_Prefix;
+ vector<string> m_keywords;
+ string m_prefix;
};
} // namespace search1
diff --git a/search/search.pro b/search/search.pro
index 857e529f4a..086453d34f 100644
--- a/search/search.pro
+++ b/search/search.pro
@@ -10,11 +10,13 @@ DEPENDENCIES = indexer geometry coding base
include($$ROOT_DIR/common.pri)
HEADERS += \
- query.hpp \
- search_processor.hpp \
- string_match.hpp \
+ query.hpp \
+ search_processor.hpp \
+ string_match.hpp \
+ delimiters.hpp \
SOURCES += \
- query.cpp \
- search_processor.cpp \
- string_match.cpp \
+ query.cpp \
+ search_processor.cpp \
+ string_match.cpp \
+ delimiters.cpp \
diff --git a/search/search_processor.cpp b/search/search_processor.cpp
index 088ce4c360..a0900a2f8e 100644
--- a/search/search_processor.cpp
+++ b/search/search_processor.cpp
@@ -3,7 +3,6 @@
#include "../indexer/feature.hpp"
#include "../indexer/classificator.hpp"
-#include "../base/utf8_string.hpp"
#include "../base/logging.hpp"
#include "../std/bind.hpp"
@@ -31,13 +30,13 @@ namespace search
Query::Query(string const & line)
{
- utf8_string::Split(line, m_tokens);
+ //utf8_string::Split(line, m_tokens);
}
bool Query::operator()(char lang, string const & utf8s)
{
vector<string> words;
- utf8_string::Split(utf8s, words);
+ //utf8_string::Split(utf8s, words);
int score = -1;
for (size_t i = 0; i < m_tokens.size(); ++i)
{