Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/base
diff options
context:
space:
mode:
authorYuri Gorshenin <y@maps.me>2016-11-15 17:35:41 +0300
committerYuri Gorshenin <y@maps.me>2016-11-16 13:36:10 +0300
commite336eb4b6e571df71cc68166c7a09ad98f0609c3 (patch)
treedc1cc716f5db5037cea7e2729a55e3322cbdf7c7 /base
parentcf1f3d1fccbb11c6a1bf14450f43ffd3e4d1852f (diff)
[search] Added fuzzy search (up to 2 errors).
Diffstat (limited to 'base')
-rw-r--r--base/base_tests/levenshtein_dfa_test.cpp17
-rw-r--r--base/levenshtein_dfa.cpp30
-rw-r--r--base/levenshtein_dfa.hpp8
3 files changed, 45 insertions, 10 deletions
diff --git a/base/base_tests/levenshtein_dfa_test.cpp b/base/base_tests/levenshtein_dfa_test.cpp
index f3cd896fa2..cf8e776be2 100644
--- a/base/base_tests/levenshtein_dfa_test.cpp
+++ b/base/base_tests/levenshtein_dfa_test.cpp
@@ -93,4 +93,21 @@ UNIT_TEST(LevenshteinDFA_Smoke)
TEST(Rejects(dfa, "san"), ());
}
}
+
+UNIT_TEST(LevenshteinDFA_Prefix)
+{
+ {
+ LevenshteinDFA dfa("москва", 1 /* prefixCharsToKeep */, 1 /* maxErrors */);
+ TEST(Accepts(dfa, "москва"), ());
+ TEST(Accepts(dfa, "масква"), ());
+ TEST(Accepts(dfa, "моска"), ());
+ TEST(Rejects(dfa, "иосква"), ());
+ }
+ {
+ LevenshteinDFA dfa("москва", 0 /* prefixCharsToKeep */, 1 /* maxErrors */);
+ TEST(Accepts(dfa, "москва"), ());
+ TEST(Accepts(dfa, "иосква"), ());
+ TEST(Rejects(dfa, "моксва"), ());
+ }
+}
} // namespace
diff --git a/base/levenshtein_dfa.cpp b/base/levenshtein_dfa.cpp
index 6c78c4f2a4..54c4bb07e0 100644
--- a/base/levenshtein_dfa.cpp
+++ b/base/levenshtein_dfa.cpp
@@ -21,16 +21,18 @@ public:
{
}
- void Move(LevenshteinDFA::State const & s, UniChar c, LevenshteinDFA::State & t)
+ void Move(LevenshteinDFA::State const & s, size_t prefixCharsToKeep, UniChar c,
+ LevenshteinDFA::State & t)
{
t.Clear();
for (auto const & p : s.m_positions)
- GetMoves(p, c, t);
+ GetMoves(p, prefixCharsToKeep, c, t);
t.Normalize();
}
private:
- void GetMoves(LevenshteinDFA::Position const & p, UniChar c, LevenshteinDFA::State & t)
+ void GetMoves(LevenshteinDFA::Position const & p, size_t prefixCharsToKeep, UniChar c,
+ LevenshteinDFA::State & t)
{
auto & ps = t.m_positions;
@@ -43,6 +45,9 @@ private:
if (p.m_numErrors == m_maxErrors)
return;
+ if (p.m_offset < prefixCharsToKeep)
+ return;
+
ps.emplace_back(p.m_offset, p.m_numErrors + 1);
if (p.m_offset == m_size)
@@ -150,7 +155,7 @@ void LevenshteinDFA::State::Normalize()
// LevenshteinDFA ----------------------------------------------------------------------------------
// static
-LevenshteinDFA::LevenshteinDFA(UniString const & s, uint8_t maxErrors)
+LevenshteinDFA::LevenshteinDFA(UniString const & s, size_t prefixCharsToKeep, uint8_t maxErrors)
: m_size(s.size()), m_maxErrors(maxErrors)
{
m_alphabet.assign(s.begin(), s.end());
@@ -175,6 +180,7 @@ LevenshteinDFA::LevenshteinDFA(UniString const & s, uint8_t maxErrors)
states.emplace(state);
visited[state] = id;
m_transitions.emplace_back(m_alphabet.size());
+ m_accepting.push_back(false);
};
pushState(State::MakeStart(), kStartingState);
@@ -193,12 +199,12 @@ LevenshteinDFA::LevenshteinDFA(UniString const & s, uint8_t maxErrors)
ASSERT_LESS(id, m_transitions.size(), ());
if (IsAccepting(curr))
- m_accepting.insert(id);
+ m_accepting[id] = true;
for (size_t i = 0; i < m_alphabet.size(); ++i)
{
State next;
- table.Move(curr, m_alphabet[i], next);
+ table.Move(curr, prefixCharsToKeep, m_alphabet[i], next);
size_t nid;
@@ -218,8 +224,18 @@ LevenshteinDFA::LevenshteinDFA(UniString const & s, uint8_t maxErrors)
}
}
+LevenshteinDFA::LevenshteinDFA(string const & s, size_t prefixCharsToKeep, uint8_t maxErrors)
+ : LevenshteinDFA(MakeUniString(s), prefixCharsToKeep, maxErrors)
+{
+}
+
+LevenshteinDFA::LevenshteinDFA(UniString const & s, uint8_t maxErrors)
+ : LevenshteinDFA(s, 0 /* prefixCharsToKeep */, maxErrors)
+{
+}
+
LevenshteinDFA::LevenshteinDFA(string const & s, uint8_t maxErrors)
- : LevenshteinDFA(MakeUniString(s), maxErrors)
+ : LevenshteinDFA(s, 0 /* prefixCharsToKeep */, maxErrors)
{
}
diff --git a/base/levenshtein_dfa.hpp b/base/levenshtein_dfa.hpp
index bdf5ca5b30..0e3236784b 100644
--- a/base/levenshtein_dfa.hpp
+++ b/base/levenshtein_dfa.hpp
@@ -3,7 +3,7 @@
#include "base/string_utils.hpp"
#include "std/cstdint.hpp"
-#include "std/unordered_set.hpp"
+#include "std/vector.hpp"
namespace strings
{
@@ -83,6 +83,8 @@ public:
LevenshteinDFA const & m_dfa;
};
+ LevenshteinDFA(UniString const & s, size_t prefixCharsToKeep, uint8_t maxErrors);
+ LevenshteinDFA(string const & s, size_t prefixCharsToKeep, uint8_t maxErrors);
LevenshteinDFA(UniString const & s, uint8_t maxErrors);
LevenshteinDFA(string const & s, uint8_t maxErrors);
@@ -101,7 +103,7 @@ private:
bool IsAccepting(Position const & p) const;
bool IsAccepting(State const & s) const;
- inline bool IsAccepting(size_t s) const { return m_accepting.count(s) != 0; }
+ inline bool IsAccepting(size_t s) const { return m_accepting[s]; }
inline bool IsRejecting(State const & s) const { return s.m_positions.empty(); }
inline bool IsRejecting(size_t s) const { return s == kRejectingState; }
@@ -114,7 +116,7 @@ private:
vector<UniChar> m_alphabet;
vector<vector<size_t>> m_transitions;
- unordered_set<size_t> m_accepting;
+ vector<bool> m_accepting;
};
string DebugPrint(LevenshteinDFA::Position const & p);