Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/search
diff options
context:
space:
mode:
authorYuri Gorshenin <y@maps.me>2016-06-23 11:59:28 +0300
committerYuri Gorshenin <y@maps.me>2016-07-01 17:52:01 +0300
commit36fe4a08508b790a16677465e2e0ed5db70f0414 (patch)
tree68309693a2d80f27923645f8cefe76b99a3ea82d /search
parentcd6395349a25803dfbee006001eea5939ec15ec9 (diff)
[search] Implemented Oracle.
Diffstat (limited to 'search')
-rw-r--r--search/cbv_ptr.cpp45
-rw-r--r--search/cbv_ptr.hpp24
-rw-r--r--search/geocoder.cpp318
-rw-r--r--search/geocoder.hpp72
-rw-r--r--search/geocoder_context.cpp36
-rw-r--r--search/geocoder_context.hpp38
-rw-r--r--search/mwm_context.hpp1
-rw-r--r--search/query_params.hpp4
-rw-r--r--search/search.pro4
-rw-r--r--search/streets_matcher.cpp173
-rw-r--r--search/streets_matcher.hpp37
11 files changed, 465 insertions, 287 deletions
diff --git a/search/cbv_ptr.cpp b/search/cbv_ptr.cpp
index 7bdf63e64a..65eef82fd7 100644
--- a/search/cbv_ptr.cpp
+++ b/search/cbv_ptr.cpp
@@ -4,6 +4,10 @@ namespace search
{
CBVPtr::CBVPtr(coding::CompressedBitVector const * p, bool isOwner) { Set(p, isOwner); }
+CBVPtr::CBVPtr(CBVPtr && rhs) { *this = move(rhs); }
+
+CBVPtr::~CBVPtr() { Release(); }
+
void CBVPtr::Release()
{
if (m_isOwner)
@@ -14,6 +18,12 @@ void CBVPtr::Release()
m_isFull = false;
}
+void CBVPtr::SetFull()
+{
+ Release();
+ m_isFull = true;
+}
+
void CBVPtr::Set(coding::CompressedBitVector const * p, bool isOwner /* = false*/)
{
Release();
@@ -27,6 +37,22 @@ void CBVPtr::Set(unique_ptr<coding::CompressedBitVector> p)
Set(p.release(), true /* isOwner */);
}
+CBVPtr & CBVPtr::operator=(CBVPtr && rhs) noexcept
+{
+ if (this == &rhs)
+ return *this;
+
+ m_ptr = rhs.m_ptr;
+ m_isOwner = rhs.m_isOwner;
+ m_isFull = rhs.m_isFull;
+
+ rhs.m_ptr = nullptr;
+ rhs.m_isOwner = false;
+ rhs.m_isFull = false;
+
+ return *this;
+}
+
void CBVPtr::Union(coding::CompressedBitVector const * p)
{
if (!p || m_isFull)
@@ -36,10 +62,11 @@ void CBVPtr::Union(coding::CompressedBitVector const * p)
{
m_ptr = p;
m_isFull = false;
+ m_isOwner = false;
}
else
{
- Set(coding::CompressedBitVector::Union(*m_ptr, *p).release(), true);
+ Set(coding::CompressedBitVector::Union(*m_ptr, *p).release(), true /* isOwner */);
}
}
@@ -53,15 +80,27 @@ void CBVPtr::Intersect(coding::CompressedBitVector const * p)
if (m_ptr)
{
- Set(coding::CompressedBitVector::Intersect(*m_ptr, *p).release(), true);
+ Set(coding::CompressedBitVector::Intersect(*m_ptr, *p).release(), true /* isOwner */);
}
else if (m_isFull)
{
m_ptr = p;
m_isFull = false;
+ m_isOwner = false;
}
}
-bool CBVPtr::IsEmpty() const { return !m_isFull && coding::CompressedBitVector::IsEmpty(m_ptr); }
+void CBVPtr::CopyTo(CBVPtr & rhs)
+{
+ rhs.Release();
+ if (m_isFull)
+ {
+ rhs.SetFull();
+ return;
+ }
+
+ if (m_ptr)
+ rhs.Set(m_ptr->Clone());
+}
} // namespace search
diff --git a/search/cbv_ptr.hpp b/search/cbv_ptr.hpp
index 3c9cd68fd8..a769ac6886 100644
--- a/search/cbv_ptr.hpp
+++ b/search/cbv_ptr.hpp
@@ -14,7 +14,7 @@ namespace search
/// binary operators logic and takes ownership if needed.
class CBVPtr
{
- DISALLOW_COPY_AND_MOVE(CBVPtr);
+ DISALLOW_COPY(CBVPtr);
coding::CompressedBitVector const * m_ptr = nullptr;
bool m_isOwner = false;
@@ -25,27 +25,28 @@ class CBVPtr
public:
CBVPtr() = default;
CBVPtr(coding::CompressedBitVector const * p, bool isOwner);
- ~CBVPtr() { Release(); }
-
- inline void SetFull()
- {
- Release();
- m_isFull = true;
- }
+ CBVPtr(CBVPtr && rhs);
+ ~CBVPtr();
+ void SetFull();
void Set(coding::CompressedBitVector const * p, bool isOwner = false);
void Set(unique_ptr<coding::CompressedBitVector> p);
inline coding::CompressedBitVector const * Get() const { return m_ptr; }
- coding::CompressedBitVector const & operator*() const { return *m_ptr; }
- coding::CompressedBitVector const * operator->() const { return m_ptr; }
+ inline coding::CompressedBitVector const & operator*() const { return *m_ptr; }
+ inline coding::CompressedBitVector const * operator->() const { return m_ptr; }
+ CBVPtr & operator=(CBVPtr && rhs) noexcept;
- bool IsEmpty() const;
+ inline bool IsEmpty() const { return !m_isFull && coding::CompressedBitVector::IsEmpty(m_ptr); }
+ inline bool IsFull() const { return m_isFull; }
+ inline bool IsOwner() const noexcept { return m_isOwner; }
void Union(coding::CompressedBitVector const * p);
void Intersect(coding::CompressedBitVector const * p);
+ void CopyTo(CBVPtr & rhs);
+
template <class TFn>
void ForEach(TFn && fn) const
{
@@ -54,5 +55,4 @@ public:
coding::CompressedBitVectorEnumerator::ForEach(*m_ptr, forward<TFn>(fn));
}
};
-
} // namespace search
diff --git a/search/geocoder.cpp b/search/geocoder.cpp
index 29a9af2409..0c5079f6fb 100644
--- a/search/geocoder.cpp
+++ b/search/geocoder.cpp
@@ -411,12 +411,10 @@ Geocoder::Geocoder(Index & index, storage::CountryInfoGetter const & infoGetter,
: m_index(index)
, m_infoGetter(infoGetter)
, m_cancellable(cancellable)
- , m_numTokens(0)
, m_model(SearchModel::Instance())
, m_pivotRectsCache(kPivotRectsCacheSize, m_cancellable, Processor::kMaxViewportRadiusM)
, m_localityRectsCache(kLocalityRectsCacheSize, m_cancellable)
, m_pivotFeatures(index)
- , m_villages(nullptr)
, m_filter(nullptr)
, m_matcher(nullptr)
, m_finder(m_cancellable)
@@ -446,13 +444,10 @@ void Geocoder::SetParams(Params const & params)
}
m_retrievalParams = m_params;
- m_numTokens = m_params.m_tokens.size();
- if (!m_params.m_prefixTokens.empty())
- ++m_numTokens;
// Remove all category synonyms for streets, as they're extracted
// individually via LoadStreets.
- for (size_t i = 0; i < m_numTokens; ++i)
+ for (size_t i = 0; i < m_params.GetNumTokens(); ++i)
{
auto & synonyms = m_params.GetTokens(i);
ASSERT(!synonyms.empty(), ());
@@ -488,7 +483,7 @@ void Geocoder::GoEverywhere(PreRanker & preRanker)
MY_SCOPE_GUARD(stopProfiler, &ProfilerStop);
#endif
- if (m_numTokens == 0)
+ if (m_params.GetNumTokens() == 0)
return;
vector<shared_ptr<MwmInfo>> infos;
@@ -499,7 +494,7 @@ void Geocoder::GoEverywhere(PreRanker & preRanker)
void Geocoder::GoInViewport(PreRanker & preRanker)
{
- if (m_numTokens == 0)
+ if (m_params.GetNumTokens() == 0)
return;
vector<shared_ptr<MwmInfo>> infos;
@@ -533,10 +528,12 @@ void Geocoder::GoImpl(PreRanker & preRanker, vector<shared_ptr<MwmInfo>> & infos
// it's ok to save MwmId.
m_worldId = handle.GetId();
m_context = make_unique<MwmContext>(move(handle));
+
if (HasSearchIndex(value))
{
- PrepareAddressFeatures();
- FillLocalitiesTable();
+ BaseContext ctx;
+ InitBaseContext(ctx);
+ FillLocalitiesTable(ctx);
}
m_context.reset();
}
@@ -561,15 +558,13 @@ void Geocoder::GoImpl(PreRanker & preRanker, vector<shared_ptr<MwmInfo>> & infos
{
ASSERT(context, ());
m_context = move(context);
+
MY_SCOPE_GUARD(cleanup, [&]()
{
LOG(LDEBUG, (m_context->GetName(), "geocoding complete."));
m_matcher->OnQueryFinished();
m_matcher = nullptr;
m_context.reset();
- m_addressFeatures.clear();
- m_streets = nullptr;
- m_villages = nullptr;
});
auto it = m_matchersCache.find(m_context->GetId());
@@ -582,7 +577,8 @@ void Geocoder::GoImpl(PreRanker & preRanker, vector<shared_ptr<MwmInfo>> & infos
m_matcher = it->second.get();
m_matcher->SetContext(m_context.get());
- PrepareAddressFeatures();
+ BaseContext ctx;
+ InitBaseContext(ctx);
coding::CompressedBitVector const * viewportCBV = nullptr;
if (inViewport)
@@ -590,33 +586,25 @@ void Geocoder::GoImpl(PreRanker & preRanker, vector<shared_ptr<MwmInfo>> & infos
if (viewportCBV)
{
- for (size_t i = 0; i < m_numTokens; ++i)
- {
- m_addressFeatures[i] =
- coding::CompressedBitVector::Intersect(*m_addressFeatures[i], *viewportCBV);
- }
+ for (auto & features : ctx.m_features)
+ features = coding::CompressedBitVector::Intersect(*features, *viewportCBV);
}
- // |m_streets| will be initialized in LimitedSearch() and its
- // callees, if needed.
- m_streets = nullptr;
-
- m_villages = LoadVillages(*m_context);
+ ctx.m_villages = LoadVillages(*m_context);
auto citiesFromWorld = m_cities;
- FillVillageLocalities();
+ FillVillageLocalities(ctx);
MY_SCOPE_GUARD(remove_villages, [&]()
{
m_cities = citiesFromWorld;
});
- m_usedTokens.assign(m_numTokens, false);
m_lastMatchedRegion = nullptr;
- MatchRegions(REGION_TYPE_COUNTRY);
+ MatchRegions(ctx, REGION_TYPE_COUNTRY);
if (index < numIntersectingMaps || m_preRanker->IsEmpty())
- MatchAroundPivot();
+ MatchAroundPivot(ctx);
};
// Iterates through all alive mwms and performs geocoding.
@@ -636,17 +624,15 @@ void Geocoder::ClearCaches()
m_localityRectsCache.Clear();
m_pivotFeatures.Clear();
- m_addressFeatures.clear();
m_matchersCache.clear();
m_streetsCache.clear();
- m_villages.reset();
m_postcodes.Clear();
}
void Geocoder::PrepareRetrievalParams(size_t curToken, size_t endToken)
{
ASSERT_LESS(curToken, endToken, ());
- ASSERT_LESS_OR_EQUAL(endToken, m_numTokens, ());
+ ASSERT_LESS_OR_EQUAL(endToken, m_params.GetNumTokens(), ());
m_retrievalParams.m_tokens.clear();
m_retrievalParams.m_prefixTokens.clear();
@@ -663,15 +649,17 @@ void Geocoder::PrepareRetrievalParams(size_t curToken, size_t endToken)
}
}
-void Geocoder::PrepareAddressFeatures()
+void Geocoder::InitBaseContext(BaseContext & ctx)
{
- m_addressFeatures.resize(m_numTokens);
- for (size_t i = 0; i < m_numTokens; ++i)
+ ctx.m_usedTokens.assign(m_params.GetNumTokens(), false);
+ ctx.m_numTokens = m_params.GetNumTokens();
+ ctx.m_features.resize(ctx.m_numTokens);
+ for (size_t i = 0; i < ctx.m_features.size(); ++i)
{
PrepareRetrievalParams(i, i + 1);
- m_addressFeatures[i] = RetrieveAddressFeatures(m_context->GetId(), m_context->m_value,
- m_cancellable, m_retrievalParams);
- ASSERT(m_addressFeatures[i], ());
+ ctx.m_features[i] = RetrieveAddressFeatures(m_context->GetId(), m_context->m_value,
+ m_cancellable, m_retrievalParams);
+ ASSERT(ctx.m_features[i], ());
}
}
@@ -688,13 +676,14 @@ void Geocoder::InitLayer(SearchModel::SearchType type, size_t startToken, size_t
layer.m_lastTokenIsPrefix = (layer.m_endToken > m_params.m_tokens.size());
}
-void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter,
+void Geocoder::FillLocalityCandidates(BaseContext const & ctx,
+ coding::CompressedBitVector const * filter,
size_t const maxNumLocalities,
vector<Locality> & preLocalities)
{
preLocalities.clear();
- for (size_t startToken = 0; startToken < m_numTokens; ++startToken)
+ for (size_t startToken = 0; startToken < ctx.m_numTokens; ++startToken)
{
CBVPtr intersection;
CBVPtr unfilteredIntersection;
@@ -703,13 +692,13 @@ void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter
if (filter)
{
intersection.Intersect(filter);
- unfilteredIntersection.Intersect(m_addressFeatures[startToken].get());
+ unfilteredIntersection.Intersect(ctx.m_features[startToken].get());
}
- intersection.Intersect(m_addressFeatures[startToken].get());
+ intersection.Intersect(ctx.m_features[startToken].get());
if (intersection.IsEmpty())
continue;
- for (size_t endToken = startToken + 1; endToken <= m_numTokens; ++endToken)
+ for (size_t endToken = startToken + 1; endToken <= ctx.m_numTokens; ++endToken)
{
// Skip locality candidates that match only numbers.
if (!m_params.IsNumberTokens(startToken, endToken))
@@ -730,11 +719,11 @@ void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter
});
}
- if (endToken < m_numTokens)
+ if (endToken < ctx.m_numTokens)
{
- intersection.Intersect(m_addressFeatures[endToken].get());
+ intersection.Intersect(ctx.m_features[endToken].get());
if (filter)
- unfilteredIntersection.Intersect(m_addressFeatures[endToken].get());
+ unfilteredIntersection.Intersect(ctx.m_features[endToken].get());
if (intersection.IsEmpty())
break;
}
@@ -746,10 +735,10 @@ void Geocoder::FillLocalityCandidates(coding::CompressedBitVector const * filter
scorer.GetTopLocalities(maxNumLocalities, preLocalities);
}
-void Geocoder::FillLocalitiesTable()
+void Geocoder::FillLocalitiesTable(BaseContext const & ctx)
{
vector<Locality> preLocalities;
- FillLocalityCandidates(nullptr, kMaxNumLocalities, preLocalities);
+ FillLocalityCandidates(ctx, nullptr /* filter */, kMaxNumLocalities, preLocalities);
size_t numCities = 0;
size_t numStates = 0;
@@ -818,10 +807,10 @@ void Geocoder::FillLocalitiesTable()
}
}
-void Geocoder::FillVillageLocalities()
+void Geocoder::FillVillageLocalities(BaseContext const & ctx)
{
vector<Locality> preLocalities;
- FillLocalityCandidates(m_villages.get(), kMaxNumVillages, preLocalities);
+ FillLocalityCandidates(ctx, ctx.m_villages.get() /* filter */, kMaxNumVillages, preLocalities);
size_t numVillages = 0;
@@ -874,19 +863,19 @@ void Geocoder::ForEachCountry(vector<shared_ptr<MwmInfo>> const & infos, TFn &&
}
}
-void Geocoder::MatchRegions(RegionType type)
+void Geocoder::MatchRegions(BaseContext & ctx, RegionType type)
{
switch (type)
{
case REGION_TYPE_STATE:
// Tries to skip state matching and go to cities matching.
// Then, performs states matching.
- MatchCities();
+ MatchCities(ctx);
break;
case REGION_TYPE_COUNTRY:
// Tries to skip country matching and go to states matching.
// Then, performs countries matching.
- MatchRegions(REGION_TYPE_STATE);
+ MatchRegions(ctx, REGION_TYPE_STATE);
break;
case REGION_TYPE_COUNT: ASSERT(false, ("Invalid region type.")); return;
}
@@ -903,7 +892,7 @@ void Geocoder::MatchRegions(RegionType type)
size_t const startToken = p.first.first;
size_t const endToken = p.first.second;
- if (HasUsedTokensInRange(startToken, endToken))
+ if (ctx.HasUsedTokensInRange(startToken, endToken))
continue;
for (auto const & region : p.second)
@@ -927,8 +916,8 @@ void Geocoder::MatchRegions(RegionType type)
if (!matches)
continue;
- ScopedMarkTokens mark(m_usedTokens, startToken, endToken);
- if (AllTokensUsed())
+ ScopedMarkTokens mark(ctx.m_usedTokens, startToken, endToken);
+ if (ctx.AllTokensUsed())
{
// Region matches to search query, we need to emit it as is.
EmitResult(region, startToken, endToken);
@@ -942,22 +931,22 @@ void Geocoder::MatchRegions(RegionType type)
});
switch (type)
{
- case REGION_TYPE_STATE: MatchCities(); break;
- case REGION_TYPE_COUNTRY: MatchRegions(REGION_TYPE_STATE); break;
+ case REGION_TYPE_STATE: MatchCities(ctx); break;
+ case REGION_TYPE_COUNTRY: MatchRegions(ctx, REGION_TYPE_STATE); break;
case REGION_TYPE_COUNT: ASSERT(false, ("Invalid region type.")); break;
}
}
}
}
-void Geocoder::MatchCities()
+void Geocoder::MatchCities(BaseContext & ctx)
{
// Localities are ordered my (m_startToken, m_endToken) pairs.
for (auto const & p : m_cities)
{
size_t const startToken = p.first.first;
size_t const endToken = p.first.second;
- if (HasUsedTokensInRange(startToken, endToken))
+ if (ctx.HasUsedTokensInRange(startToken, endToken))
continue;
for (auto const & city : p.second)
@@ -970,8 +959,8 @@ void Geocoder::MatchCities()
continue;
}
- ScopedMarkTokens mark(m_usedTokens, startToken, endToken);
- if (AllTokensUsed())
+ ScopedMarkTokens mark(ctx.m_usedTokens, startToken, endToken);
+ if (ctx.AllTokensUsed())
{
// City matches to search query, we need to emit it as is.
EmitResult(city, startToken, endToken);
@@ -989,12 +978,12 @@ void Geocoder::MatchCities()
continue;
LocalityFilter filter(*cityFeatures);
- LimitedSearch(filter);
+ LimitedSearch(ctx, filter);
}
}
}
-void Geocoder::MatchAroundPivot()
+void Geocoder::MatchAroundPivot(BaseContext & ctx)
{
auto const * features = RetrieveGeometryFeatures(*m_context, m_params.m_pivot, RECT_ID_PIVOT);
@@ -1002,10 +991,10 @@ void Geocoder::MatchAroundPivot()
return;
ViewportFilter filter(*features, m_preRanker->Limit() /* threshold */);
- LimitedSearch(filter);
+ LimitedSearch(ctx, filter);
}
-void Geocoder::LimitedSearch(FeaturesFilter const & filter)
+void Geocoder::LimitedSearch(BaseContext & ctx, FeaturesFilter const & filter)
{
m_filter = &filter;
MY_SCOPE_GUARD(resetFilter, [&]()
@@ -1013,36 +1002,36 @@ void Geocoder::LimitedSearch(FeaturesFilter const & filter)
m_filter = nullptr;
});
- if (!m_streets)
- m_streets = LoadStreets(*m_context);
+ if (!ctx.m_streets)
+ ctx.m_streets = LoadStreets(*m_context);
- MatchUnclassified(0 /* curToken */);
+ MatchUnclassified(ctx, 0 /* curToken */);
- auto search = [this]()
+ auto search = [this, &ctx]()
{
- GreedilyMatchStreets();
- MatchPOIsAndBuildings(0 /* curToken */);
+ GreedilyMatchStreets(ctx);
+ MatchPOIsAndBuildings(ctx, 0 /* curToken */);
};
- WithPostcodes(search);
+ WithPostcodes(ctx, search);
search();
}
template <typename TFn>
-void Geocoder::WithPostcodes(TFn && fn)
+void Geocoder::WithPostcodes(BaseContext & ctx, TFn && fn)
{
size_t const maxPostcodeTokens = GetMaxNumTokensInPostcode();
- for (size_t startToken = 0; startToken != m_numTokens; ++startToken)
+ for (size_t startToken = 0; startToken != ctx.m_numTokens; ++startToken)
{
size_t endToken = startToken;
- for (size_t n = 1; startToken + n <= m_numTokens && n <= maxPostcodeTokens; ++n)
+ for (size_t n = 1; startToken + n <= ctx.m_numTokens && n <= maxPostcodeTokens; ++n)
{
- if (m_usedTokens[startToken + n - 1])
+ if (ctx.m_usedTokens[startToken + n - 1])
break;
TokenSlice slice(m_params, startToken, startToken + n);
- auto const isPrefix = startToken + n == m_numTokens;
+ auto const isPrefix = startToken + n == ctx.m_numTokens;
if (LooksLikePostcode(QuerySlice(slice), isPrefix))
endToken = startToken + n;
}
@@ -1058,7 +1047,7 @@ void Geocoder::WithPostcodes(TFn && fn)
if (!coding::CompressedBitVector::IsEmpty(postcodes))
{
- ScopedMarkTokens mark(m_usedTokens, startToken, endToken);
+ ScopedMarkTokens mark(ctx.m_usedTokens, startToken, endToken);
m_postcodes.Clear();
m_postcodes.m_startToken = startToken;
@@ -1070,122 +1059,41 @@ void Geocoder::WithPostcodes(TFn && fn)
}
}
-void Geocoder::GreedilyMatchStreets()
+void Geocoder::GreedilyMatchStreets(BaseContext & ctx)
{
- for (size_t startToken = 0; startToken < m_numTokens; ++startToken)
- {
- if (m_usedTokens[startToken])
- continue;
-
- // Here we try to match as many tokens as possible while
- // intersection is a non-empty bit vector of streets. Single
- // tokens that are synonyms to streets are ignored. Moreover,
- // each time a token that looks like a beginning of a house number
- // is met, we try to use current intersection of tokens as a
- // street layer and try to match BUILDINGs or POIs.
- CBVPtr allFeatures(m_streets, false /* isOwner */);
-
- size_t curToken = startToken;
-
- // This variable is used for prevention of duplicate calls to
- // CreateStreetsLayerAndMatchLowerLayers() with the same
- // arguments.
- size_t lastToken = startToken;
-
- // When true, no bit vectors were intersected with allFeatures at
- // all.
- bool emptyIntersection = true;
-
- // When true, allFeatures is in the incomplete state and can't be
- // used for creation of street layers.
- bool incomplete = false;
-
- auto createStreetsLayerAndMatchLowerLayers = [&]()
- {
- if (!allFeatures.IsEmpty() && !emptyIntersection && !incomplete && lastToken != curToken)
- {
- CreateStreetsLayerAndMatchLowerLayers(startToken, curToken, *allFeatures);
- lastToken = curToken;
- }
- };
-
- StreetTokensFilter filter([&](strings::UniString const & /* token */, size_t tag)
- {
- auto buffer = coding::CompressedBitVector::Intersect(
- *allFeatures, *m_addressFeatures[tag]);
- if (tag < curToken)
- {
- // This is the case for delayed
- // street synonym. Therefore,
- // allFeatures is temporarily in the
- // incomplete state.
- allFeatures.Set(move(buffer));
- emptyIntersection = false;
-
- incomplete = true;
- return;
- }
- ASSERT_EQUAL(tag, curToken, ());
-
- // |allFeatures| will become empty
- // after the intersection. Therefore
- // we need to create streets layer
- // right now.
- if (coding::CompressedBitVector::IsEmpty(buffer))
- createStreetsLayerAndMatchLowerLayers();
-
- allFeatures.Set(move(buffer));
- emptyIntersection = false;
- incomplete = false;
- });
-
- for (; curToken < m_numTokens && !m_usedTokens[curToken] && !allFeatures.IsEmpty(); ++curToken)
- {
- auto const & token = m_params.GetTokens(curToken).front();
- bool const isPrefix = curToken >= m_params.m_tokens.size();
-
- if (house_numbers::LooksLikeHouseNumber(token, isPrefix))
- createStreetsLayerAndMatchLowerLayers();
+ vector<StreetsMatcher::Prediction> predictions;
+ StreetsMatcher::Go(ctx, *m_filter, m_params, predictions);
- filter.Put(token, isPrefix, curToken);
- }
- createStreetsLayerAndMatchLowerLayers();
- }
+ for (auto const & prediction : predictions)
+ CreateStreetsLayerAndMatchLowerLayers(ctx, prediction);
}
-void Geocoder::CreateStreetsLayerAndMatchLowerLayers(
- size_t startToken, size_t endToken, coding::CompressedBitVector const & features)
+void Geocoder::CreateStreetsLayerAndMatchLowerLayers(BaseContext & ctx,
+ StreetsMatcher::Prediction const & prediction)
{
ASSERT(m_layers.empty(), ());
- if (coding::CompressedBitVector::IsEmpty(&features))
- return;
-
- CBVPtr filtered(&features, false /* isOwner */);
- if (m_filter->NeedToFilter(features))
- filtered.Set(m_filter->Filter(features).release(), true /* isOwner */);
-
m_layers.emplace_back();
MY_SCOPE_GUARD(cleanupGuard, bind(&vector<FeaturesLayer>::pop_back, &m_layers));
auto & layer = m_layers.back();
- InitLayer(SearchModel::SEARCH_TYPE_STREET, startToken, endToken, layer);
+ InitLayer(SearchModel::SEARCH_TYPE_STREET, prediction.m_startToken, prediction.m_endToken, layer);
vector<uint32_t> sortedFeatures;
- sortedFeatures.reserve(features.PopCount());
- filtered.ForEach(MakeBackInsertFunctor(sortedFeatures));
+ sortedFeatures.reserve(prediction.m_features->PopCount());
+ prediction.m_features.ForEach(MakeBackInsertFunctor(sortedFeatures));
layer.m_sortedFeatures = &sortedFeatures;
- ScopedMarkTokens mark(m_usedTokens, startToken, endToken);
- MatchPOIsAndBuildings(0 /* curToken */);
+ ScopedMarkTokens mark(ctx.m_usedTokens, prediction.m_startToken, prediction.m_endToken);
+ MatchPOIsAndBuildings(ctx, 0 /* curToken */);
}
-void Geocoder::MatchPOIsAndBuildings(size_t curToken)
+void Geocoder::MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken)
{
BailIfCancelled();
- curToken = SkipUsedTokens(curToken);
- if (curToken == m_numTokens)
+ curToken = ctx.SkipUsedTokens(curToken);
+ if (curToken == ctx.m_numTokens)
{
// All tokens were consumed, find paths through layers, emit
// features.
@@ -1203,7 +1111,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken)
filtered.Set(m_postcodes.m_features.get(), false /* isOwner */);
filtered.ForEach([&](uint32_t id)
{
- EmitResult(m_context->GetId(), id, GetSearchTypeInGeocoding(id),
+ EmitResult(m_context->GetId(), id, GetSearchTypeInGeocoding(ctx, id),
m_postcodes.m_startToken, m_postcodes.m_endToken);
});
return;
@@ -1257,7 +1165,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken)
// any.
auto clusterize = [&](uint32_t featureId)
{
- auto const searchType = GetSearchTypeInGeocoding(featureId);
+ auto const searchType = GetSearchTypeInGeocoding(ctx, featureId);
// All SEARCH_TYPE_CITY features were filtered in
// MatchCities(). All SEARCH_TYPE_STREET features were
@@ -1273,7 +1181,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken)
features.SetFull();
// Try to consume [curToken, m_numTokens) tokens range.
- for (size_t n = 1; curToken + n <= m_numTokens && !m_usedTokens[curToken + n - 1]; ++n)
+ for (size_t n = 1; curToken + n <= ctx.m_numTokens && !ctx.m_usedTokens[curToken + n - 1]; ++n)
{
// At this point |features| is the intersection of
// m_addressFeatures[curToken], m_addressFeatures[curToken + 1],
@@ -1286,7 +1194,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken)
InitLayer(layer.m_type, curToken, curToken + n, layer);
}
- features.Intersect(m_addressFeatures[curToken + n - 1].get());
+ features.Intersect(ctx.m_features[curToken + n - 1].get());
ASSERT(features.Get(), ());
CBVPtr filtered;
@@ -1357,7 +1265,7 @@ void Geocoder::MatchPOIsAndBuildings(size_t curToken)
layer.m_type = static_cast<SearchModel::SearchType>(i);
if (IsLayerSequenceSane())
- MatchPOIsAndBuildings(curToken + n);
+ MatchPOIsAndBuildings(ctx, curToken + n);
}
}
}
@@ -1505,7 +1413,7 @@ void Geocoder::FillMissingFieldsInResults()
}
}
-void Geocoder::MatchUnclassified(size_t curToken)
+void Geocoder::MatchUnclassified(BaseContext & ctx, size_t curToken)
{
ASSERT(m_layers.empty(), ());
@@ -1516,28 +1424,28 @@ void Geocoder::MatchUnclassified(size_t curToken)
// adjacent tokens will be matched to "Hyde Park", whereas it's not
// ok to match something to "Park London Hyde", because tokens
// "Park" and "Hyde" are not adjacent.
- if (NumUnusedTokensGroups() != 1)
+ if (ctx.NumUnusedTokenGroups() != 1)
return;
CBVPtr allFeatures;
allFeatures.SetFull();
auto startToken = curToken;
- for (curToken = SkipUsedTokens(curToken); curToken < m_numTokens && !m_usedTokens[curToken];
- ++curToken)
+ for (curToken = ctx.SkipUsedTokens(curToken);
+ curToken < ctx.m_numTokens && !ctx.m_usedTokens[curToken]; ++curToken)
{
- allFeatures.Intersect(m_addressFeatures[curToken].get());
+ allFeatures.Intersect(ctx.m_features[curToken].get());
}
if (m_filter->NeedToFilter(*allFeatures))
- allFeatures.Set(m_filter->Filter(*allFeatures).release(), true /* isOwner */);
+ allFeatures.Set(m_filter->Filter(*allFeatures));
if (allFeatures.IsEmpty())
return;
auto emitUnclassified = [&](uint32_t featureId)
{
- auto type = GetSearchTypeInGeocoding(featureId);
+ auto type = GetSearchTypeInGeocoding(ctx, featureId);
if (type == SearchModel::SEARCH_TYPE_UNCLASSIFIED)
EmitResult(m_context->GetId(), featureId, type, startToken, curToken);
};
@@ -1614,11 +1522,15 @@ coding::CompressedBitVector const * Geocoder::RetrieveGeometryFeatures(MwmContex
}
}
-SearchModel::SearchType Geocoder::GetSearchTypeInGeocoding(uint32_t featureId)
+SearchModel::SearchType Geocoder::GetSearchTypeInGeocoding(BaseContext const & ctx,
+ uint32_t featureId)
{
- if (m_streets->GetBit(featureId))
+ ASSERT(ctx.m_streets, ());
+ ASSERT(ctx.m_villages, ());
+
+ if (ctx.m_streets->GetBit(featureId))
return SearchModel::SEARCH_TYPE_STREET;
- if (m_villages->GetBit(featureId))
+ if (ctx.m_villages->GetBit(featureId))
return SearchModel::SEARCH_TYPE_VILLAGE;
FeatureType feature;
@@ -1626,34 +1538,6 @@ SearchModel::SearchType Geocoder::GetSearchTypeInGeocoding(uint32_t featureId)
return m_model.GetSearchType(feature);
}
-bool Geocoder::AllTokensUsed() const
-{
- return all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor());
-}
-
-bool Geocoder::HasUsedTokensInRange(size_t from, size_t to) const
-{
- return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, IdFunctor());
-}
-
-size_t Geocoder::NumUnusedTokensGroups() const
-{
- size_t numGroups = 0;
- for (size_t i = 0; i < m_usedTokens.size(); ++i)
- {
- if (!m_usedTokens[i] && (i == 0 || m_usedTokens[i - 1]))
- ++numGroups;
- }
- return numGroups;
-}
-
-size_t Geocoder::SkipUsedTokens(size_t curToken) const
-{
- while (curToken != m_usedTokens.size() && m_usedTokens[curToken])
- ++curToken;
- return curToken;
-}
-
string DebugPrint(Geocoder::Locality const & locality)
{
ostringstream os;
diff --git a/search/geocoder.hpp b/search/geocoder.hpp
index 79e549bba6..876f5b762b 100644
--- a/search/geocoder.hpp
+++ b/search/geocoder.hpp
@@ -3,6 +3,7 @@
#include "search/cancel_exception.hpp"
#include "search/features_layer.hpp"
#include "search/features_layer_path_finder.hpp"
+#include "search/geocoder_context.hpp"
#include "search/geometry_cache.hpp"
#include "search/mode.hpp"
#include "search/model.hpp"
@@ -11,6 +12,7 @@
#include "search/pre_ranking_info.hpp"
#include "search/query_params.hpp"
#include "search/ranking_utils.hpp"
+#include "search/streets_matcher.hpp"
#include "indexer/index.hpp"
#include "indexer/mwm_set.hpp"
@@ -195,60 +197,57 @@ private:
// Creates a cache of posting lists corresponding to features in m_context
// for each token and saves it to m_addressFeatures.
- void PrepareAddressFeatures();
+ void InitBaseContext(BaseContext & ctx);
void InitLayer(SearchModel::SearchType type, size_t startToken, size_t endToken,
FeaturesLayer & layer);
- void FillLocalityCandidates(coding::CompressedBitVector const * filter,
+ void FillLocalityCandidates(BaseContext const & ctx, coding::CompressedBitVector const * filter,
size_t const maxNumLocalities, vector<Locality> & preLocalities);
- void FillLocalitiesTable();
+ void FillLocalitiesTable(BaseContext const & ctx);
- void FillVillageLocalities();
+ void FillVillageLocalities(BaseContext const & ctx);
template <typename TFn>
void ForEachCountry(vector<shared_ptr<MwmInfo>> const & infos, TFn && fn);
// Throws CancelException if cancelled.
- inline void BailIfCancelled()
- {
- ::search::BailIfCancelled(m_cancellable);
- }
+ inline void BailIfCancelled() { ::search::BailIfCancelled(m_cancellable); }
// Tries to find all countries and states in a search query and then
// performs matching of cities in found maps.
- void MatchRegions(RegionType type);
+ void MatchRegions(BaseContext & ctx, RegionType type);
// Tries to find all cities in a search query and then performs
// matching of streets in found cities.
- void MatchCities();
+ void MatchCities(BaseContext & ctx);
// Tries to do geocoding without localities, ie. find POIs,
// BUILDINGs and STREETs without knowledge about country, state,
// city or village. If during the geocoding too many features are
// retrieved, viewport is used to throw away excess features.
- void MatchAroundPivot();
+ void MatchAroundPivot(BaseContext & ctx);
// Tries to do geocoding in a limited scope, assuming that knowledge
// about high-level features, like cities or countries, is
// incorporated into |filter|.
- void LimitedSearch(FeaturesFilter const & filter);
+ void LimitedSearch(BaseContext & ctx, FeaturesFilter const & filter);
template <typename TFn>
- void WithPostcodes(TFn && fn);
+ void WithPostcodes(BaseContext & ctx, TFn && fn);
// Tries to match some adjacent tokens in the query as streets and
// then performs geocoding in street vicinities.
- void GreedilyMatchStreets();
+ void GreedilyMatchStreets(BaseContext & ctx);
- void CreateStreetsLayerAndMatchLowerLayers(size_t startToken, size_t endToken,
- coding::CompressedBitVector const & features);
+ void CreateStreetsLayerAndMatchLowerLayers(BaseContext & ctx,
+ StreetsMatcher::Prediction const & prediction);
// Tries to find all paths in a search tree, where each edge is
// marked with some substring of the query tokens. These paths are
// called "layer sequence" and current path is stored in |m_layers|.
- void MatchPOIsAndBuildings(size_t curToken);
+ void MatchPOIsAndBuildings(BaseContext & ctx, size_t curToken);
// Returns true if current path in the search tree (see comment for
// MatchPOIsAndBuildings()) looks sane. This method is used as a fast
@@ -271,7 +270,7 @@ private:
// Tries to match unclassified objects from lower layers, like
// parks, forests, lakes, rivers, etc. This method finds all
// UNCLASSIFIED objects that match to all currently unused tokens.
- void MatchUnclassified(size_t curToken);
+ void MatchUnclassified(BaseContext & ctx, size_t curToken);
unique_ptr<coding::CompressedBitVector> LoadCategories(
MwmContext & context, vector<strings::UniString> const & categories);
@@ -290,21 +289,7 @@ private:
// This is a faster wrapper around SearchModel::GetSearchType(), as
// it uses pre-loaded lists of streets and villages.
- SearchModel::SearchType GetSearchTypeInGeocoding(uint32_t featureId);
-
- // Returns true iff all tokens are used.
- bool AllTokensUsed() const;
-
- // Returns true if there exists at least one used token in [from,
- // to).
- bool HasUsedTokensInRange(size_t from, size_t to) const;
-
- // Counts number of groups of consecutive unused tokens.
- size_t NumUnusedTokensGroups() const;
-
- // Advances |curToken| to the nearest unused token, or to the end of
- // |m_usedTokens| if there are no unused tokens.
- size_t SkipUsedTokens(size_t curToken) const;
+ SearchModel::SearchType GetSearchTypeInGeocoding(BaseContext const & ctx, uint32_t featureId);
Index & m_index;
@@ -315,9 +300,6 @@ private:
// Geocoder params.
Params m_params;
- // Total number of search query tokens.
- size_t m_numTokens;
-
// This field is used to map features to a limited number of search
// classes.
SearchModel const & m_model;
@@ -344,30 +326,12 @@ private:
// Cache of nested rects used to estimate distance from a feature to the pivot.
NestedRectsCache m_pivotFeatures;
- // Cache of posting lists for each token in the query. TODO (@y,
- // @m, @vng): consider to update this cache lazily, as user inputs
- // tokens one-by-one.
- vector<unique_ptr<coding::CompressedBitVector>> m_addressFeatures;
-
// Cache of street ids in mwms.
map<MwmSet::MwmId, unique_ptr<coding::CompressedBitVector>> m_streetsCache;
- // Street features in the mwm that is currently being processed.
- // The initialization of m_streets is postponed in order to gain
- // some speed. Therefore m_streets may be used only in
- // LimitedSearch() and in all its callees.
- coding::CompressedBitVector const * m_streets;
-
- // Village features in the mwm that is currently being processed.
- unique_ptr<coding::CompressedBitVector> m_villages;
-
// Postcodes features in the mwm that is currently being processed.
Postcodes m_postcodes;
- // This vector is used to indicate what tokens were matched by
- // locality and can't be re-used during the geocoding process.
- vector<bool> m_usedTokens;
-
// This filter is used to throw away excess features.
FeaturesFilter const * m_filter;
diff --git a/search/geocoder_context.cpp b/search/geocoder_context.cpp
new file mode 100644
index 0000000000..9f03510916
--- /dev/null
+++ b/search/geocoder_context.cpp
@@ -0,0 +1,36 @@
+#include "search/geocoder_context.hpp"
+
+#include "base/stl_add.hpp"
+
+#include "std/algorithm.hpp"
+
+namespace search
+{
+size_t BaseContext::SkipUsedTokens(size_t curToken) const
+{
+ while (curToken != m_usedTokens.size() && m_usedTokens[curToken])
+ ++curToken;
+ return curToken;
+}
+
+bool BaseContext::AllTokensUsed() const
+{
+ return all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor());
+}
+
+bool BaseContext::HasUsedTokensInRange(size_t from, size_t to) const
+{
+ return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, IdFunctor());
+}
+
+size_t BaseContext::NumUnusedTokenGroups() const
+{
+ size_t numGroups = 0;
+ for (size_t i = 0; i < m_usedTokens.size(); ++i)
+ {
+ if (!m_usedTokens[i] && (i == 0 || m_usedTokens[i - 1]))
+ ++numGroups;
+ }
+ return numGroups;
+}
+} // namespace search
diff --git a/search/geocoder_context.hpp b/search/geocoder_context.hpp
new file mode 100644
index 0000000000..ab43007047
--- /dev/null
+++ b/search/geocoder_context.hpp
@@ -0,0 +1,38 @@
+#pragma once
+
+#include "coding/compressed_bit_vector.hpp"
+
+#include "std/unique_ptr.hpp"
+#include "std/vector.hpp"
+
+namespace search
+{
+class FeaturesFilter;
+
+struct BaseContext
+{
+ // Advances |curToken| to the nearest unused token, or to the end of
+ // |m_usedTokens| if there are no unused tokens.
+ size_t SkipUsedTokens(size_t curToken) const;
+
+ // Returns true iff all tokens are used.
+ bool AllTokensUsed() const;
+
+ // Returns true if there exists at least one used token in [from,
+ // to).
+ bool HasUsedTokensInRange(size_t from, size_t to) const;
+
+ // Counts number of groups of consecutive unused tokens.
+ size_t NumUnusedTokenGroups() const;
+
+ vector<unique_ptr<coding::CompressedBitVector>> m_features;
+ unique_ptr<coding::CompressedBitVector> m_villages;
+ coding::CompressedBitVector const * m_streets = nullptr;
+
+ // This vector is used to indicate what tokens were matched by
+ // locality and can't be re-used during the geocoding process.
+ vector<bool> m_usedTokens;
+
+ size_t m_numTokens = 0;
+};
+} // namespace search
diff --git a/search/mwm_context.hpp b/search/mwm_context.hpp
index 200f5cd053..ca9b50b524 100644
--- a/search/mwm_context.hpp
+++ b/search/mwm_context.hpp
@@ -90,5 +90,4 @@ private:
DISALLOW_COPY_AND_MOVE(MwmContext);
};
-
} // namespace search
diff --git a/search/query_params.hpp b/search/query_params.hpp
index f3bc6f90eb..2f00bc6c34 100644
--- a/search/query_params.hpp
+++ b/search/query_params.hpp
@@ -36,6 +36,10 @@ struct QueryParams
TSynonymsVector const & GetTokens(size_t i) const;
TSynonymsVector & GetTokens(size_t i);
+ inline size_t GetNumTokens() const
+ {
+ return m_prefixTokens.empty() ? m_tokens.size() : m_tokens.size() + 1;
+ }
/// @return true if all tokens in [start, end) range has number synonym.
bool IsNumberTokens(size_t start, size_t end) const;
diff --git a/search/search.pro b/search/search.pro
index 6be1bd7a85..5ddaf35a28 100644
--- a/search/search.pro
+++ b/search/search.pro
@@ -22,6 +22,7 @@ HEADERS += \
features_layer_matcher.hpp \
features_layer_path_finder.hpp \
geocoder.hpp \
+ geocoder_context.hpp \
geometry_cache.hpp \
geometry_utils.hpp \
house_detector.hpp \
@@ -60,6 +61,7 @@ HEADERS += \
search_trie.hpp \
stats_cache.hpp \
street_vicinity_loader.hpp \
+ streets_matcher.hpp \
string_intersection.hpp \
suggest.hpp \
token_slice.hpp \
@@ -75,6 +77,7 @@ SOURCES += \
features_layer_matcher.cpp \
features_layer_path_finder.cpp \
geocoder.cpp \
+ geocoder_context.cpp \
geometry_cache.cpp \
geometry_utils.cpp \
house_detector.cpp \
@@ -108,5 +111,6 @@ SOURCES += \
retrieval.cpp \
reverse_geocoder.cpp \
street_vicinity_loader.cpp \
+ streets_matcher.cpp \
token_slice.cpp \
types_skipper.cpp \
diff --git a/search/streets_matcher.cpp b/search/streets_matcher.cpp
new file mode 100644
index 0000000000..7d8125437c
--- /dev/null
+++ b/search/streets_matcher.cpp
@@ -0,0 +1,173 @@
+#include "search/streets_matcher.hpp"
+#include "search/features_filter.hpp"
+#include "search/house_numbers_matcher.hpp"
+#include "search/query_params.hpp"
+
+#include "indexer/search_string_utils.hpp"
+
+#include "base/logging.hpp"
+#include "base/stl_helpers.hpp"
+
+namespace search
+{
+namespace
+{
+bool LessByHash(StreetsMatcher::Prediction const & lhs, StreetsMatcher::Prediction const & rhs)
+{
+ if (lhs.m_hash != rhs.m_hash)
+ return lhs.m_hash < rhs.m_hash;
+
+ if (lhs.m_prob != rhs.m_prob)
+ return lhs.m_prob > rhs.m_prob;
+
+ if (lhs.GetNumTokens() != rhs.GetNumTokens())
+ return lhs.GetNumTokens() > rhs.GetNumTokens();
+
+ return lhs.m_startToken < rhs.m_startToken;
+}
+} // namespace
+
+// static
+void StreetsMatcher::Go(BaseContext const & ctx, FeaturesFilter const & filter,
+ QueryParams const & params, vector<Prediction> & predictions)
+{
+ size_t const kMaxNumPredictions = 3;
+ double const kTailProbability = 0.05;
+
+ predictions.clear();
+ FindStreets(ctx, filter, params, predictions);
+
+ if (predictions.empty())
+ return;
+
+ sort(predictions.begin(), predictions.end(), &LessByHash);
+ predictions.erase(
+ unique(predictions.begin(), predictions.end(), my::EqualsBy(&Prediction::m_hash)),
+ predictions.end());
+
+ sort(predictions.rbegin(), predictions.rend(), my::LessBy(&Prediction::m_prob));
+ while (predictions.size() > kMaxNumPredictions && predictions.back().m_prob < kTailProbability)
+ predictions.pop_back();
+}
+
+// static
+void StreetsMatcher::FindStreets(BaseContext const & ctx, FeaturesFilter const & filter,
+ QueryParams const & params, vector<Prediction> & predictions)
+{
+ for (size_t startToken = 0; startToken < ctx.m_numTokens; ++startToken)
+ {
+ if (ctx.m_usedTokens[startToken])
+ continue;
+
+ // Here we try to match as many tokens as possible while
+ // intersection is a non-empty bit vector of streets. Single
+ // tokens that are synonyms to streets are ignored. Moreover,
+ // each time a token that looks like a beginning of a house number
+ // is met, we try to use current intersection of tokens as a
+ // street layer and try to match BUILDINGs or POIs.
+ CBVPtr streets(ctx.m_streets, false /* isOwner */);
+
+ CBVPtr all;
+ all.SetFull();
+
+ size_t curToken = startToken;
+
+ // This variable is used for prevention of duplicate calls to
+ // CreateStreetsLayerAndMatchLowerLayers() with the same
+ // arguments.
+ size_t lastToken = startToken;
+
+ // When true, no bit vectors were intersected with |streets| at all.
+ bool emptyIntersection = true;
+
+ // When true, |streets| is in the incomplete state and can't be
+ // used for creation of street layers.
+ bool incomplete = false;
+
+ auto emit = [&]()
+ {
+ if (!streets.IsEmpty() && !emptyIntersection && !incomplete && lastToken != curToken)
+ {
+ CBVPtr fs(streets.Get(), false /* isOwner */);
+ CBVPtr fa(all.Get(), false /* isOwner */);
+
+ ASSERT(!fs.IsFull(), ());
+ ASSERT(!fa.IsFull(), ());
+
+ if (filter.NeedToFilter(*fs))
+ fs.Set(filter.Filter(*fs));
+
+ if (fs.IsEmpty())
+ return;
+
+ if (filter.NeedToFilter(*fa))
+ {
+ fa.Set(filter.Filter(*fa));
+ fa.Union(fs.Get());
+ }
+
+ predictions.emplace_back();
+ auto & prediction = predictions.back();
+
+ prediction.m_startToken = startToken;
+ prediction.m_endToken = curToken;
+
+ ASSERT_LESS_OR_EQUAL(fs->PopCount(), fa->PopCount(), ());
+ prediction.m_prob =
+ static_cast<double>(fs->PopCount()) / static_cast<double>(fa->PopCount());
+
+ if (fs.IsOwner())
+ prediction.m_features = move(fs);
+ else
+ fs.CopyTo(prediction.m_features);
+
+ prediction.m_hash = coding::CompressedBitVectorHasher::Hash(*prediction.m_features);
+ }
+ };
+
+ StreetTokensFilter filter([&](strings::UniString const & /* token */, size_t tag)
+ {
+ auto buffer = coding::CompressedBitVector::Intersect(
+ *streets, *ctx.m_features[tag]);
+ if (tag < curToken)
+ {
+ // This is the case for delayed
+ // street synonym. Therefore,
+ // |streets| is temporarily in the
+ // incomplete state.
+ streets.Set(move(buffer));
+ all.Intersect(ctx.m_features[tag].get());
+ emptyIntersection = false;
+
+ incomplete = true;
+ return;
+ }
+ ASSERT_EQUAL(tag, curToken, ());
+
+ // |streets| will become empty after
+ // the intersection. Therefore we need
+ // to create streets layer right now.
+ if (coding::CompressedBitVector::IsEmpty(buffer))
+ emit();
+
+ streets.Set(move(buffer));
+ all.Intersect(ctx.m_features[tag].get());
+ emptyIntersection = false;
+ incomplete = false;
+ });
+
+ for (; curToken < ctx.m_numTokens && !ctx.m_usedTokens[curToken] && !streets.IsEmpty();
+ ++curToken)
+ {
+ auto const & token = params.GetTokens(curToken).front();
+ bool const isPrefix = curToken >= params.m_tokens.size();
+
+ if (house_numbers::LooksLikeHouseNumber(token, isPrefix))
+ emit();
+
+ filter.Put(token, isPrefix, curToken);
+ }
+ emit();
+ }
+}
+} // namespace search
diff --git a/search/streets_matcher.hpp b/search/streets_matcher.hpp
new file mode 100644
index 0000000000..43062db676
--- /dev/null
+++ b/search/streets_matcher.hpp
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "search/cbv_ptr.hpp"
+#include "search/geocoder_context.hpp"
+
+#include "std/vector.hpp"
+
+namespace search
+{
+class FeaturesFilter;
+struct QueryParams;
+
+class StreetsMatcher
+{
+public:
+ struct Prediction
+ {
+ inline size_t GetNumTokens() const { return m_endToken - m_startToken; }
+
+ CBVPtr m_features;
+
+ size_t m_startToken = 0;
+ size_t m_endToken = 0;
+
+ double m_prob = 0.0;
+
+ uint64_t m_hash = 0;
+ };
+
+ static void Go(BaseContext const & ctx, FeaturesFilter const & filter, QueryParams const & params,
+ vector<Prediction> & predictions);
+
+private:
+ static void FindStreets(BaseContext const & ctx, FeaturesFilter const & filter,
+ QueryParams const & params, vector<Prediction> & prediction);
+};
+} // namespace search