diff options
author | mpimenov <mpimenov@users.noreply.github.com> | 2017-03-17 17:53:17 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-03-17 17:53:17 +0300 |
commit | 84914a4d88fa7ff4991a8ee318647e3755046cc1 (patch) | |
tree | 99b689694cf7f1a179c870f2d04e2d4ad8b513b1 | |
parent | f3a7aff724a66d9ee9e2530854ff76abdb542848 (diff) | |
parent | 37b38c92171ffd0a33110d1c016085d9ff542dea (diff) |
[search] Matching is extracted to search_quality lib.
-rw-r--r-- | search/search_quality/CMakeLists.txt | 6 | ||||
-rw-r--r-- | search/search_quality/features_collector_tool/features_collector_tool.cpp | 95 | ||||
-rw-r--r-- | search/search_quality/matcher.cpp | 94 | ||||
-rw-r--r-- | search/search_quality/matcher.hpp | 38 | ||||
-rw-r--r-- | search/search_quality/search_quality.pro | 2 |
5 files changed, 143 insertions, 92 deletions
diff --git a/search/search_quality/CMakeLists.txt b/search/search_quality/CMakeLists.txt index e01dc02edf..4dae74599f 100644 --- a/search/search_quality/CMakeLists.txt +++ b/search/search_quality/CMakeLists.txt @@ -3,10 +3,12 @@ project(search_quality) include_directories(${OMIM_ROOT}/3party/jansson/src) set(SRC - helpers.hpp helpers.cpp - sample.hpp + helpers.hpp + matcher.cpp + matcher.hpp sample.cpp + sample.hpp ) add_library(${PROJECT_NAME} ${SRC}) diff --git a/search/search_quality/features_collector_tool/features_collector_tool.cpp b/search/search_quality/features_collector_tool/features_collector_tool.cpp index 270b26e860..f3fc477ce6 100644 --- a/search/search_quality/features_collector_tool/features_collector_tool.cpp +++ b/search/search_quality/features_collector_tool/features_collector_tool.cpp @@ -1,12 +1,12 @@ #include "search/ranking_info.hpp" #include "search/result.hpp" #include "search/search_quality/helpers.hpp" +#include "search/search_quality/matcher.hpp" #include "search/search_quality/sample.hpp" #include "search/search_tests_support/test_search_engine.hpp" #include "search/search_tests_support/test_search_request.hpp" #include "indexer/classificator_loader.hpp" -#include "indexer/feature_algo.hpp" #include "storage/country_info_getter.hpp" #include "storage/index.hpp" @@ -43,30 +43,12 @@ DEFINE_string(mwm_path, "", "Path to mwm files (writable dir)"); DEFINE_string(stats_path, "", "Path to store stats about queries results (default: stderr)"); DEFINE_string(json_in, "", "Path to the json file with samples (default: stdin)"); -size_t constexpr kInvalidId = numeric_limits<size_t>::max(); - struct Stats { // Indexes of not-found VITAL or RELEVANT results. vector<size_t> m_notFound; }; -struct Context -{ - Context(Index & index) : m_index(index) {} - - WARN_UNUSED_RESULT bool GetFeature(FeatureID const & id, FeatureType & ft) - { - auto const & mwmId = id.m_mwmId; - if (!m_guard || m_guard->GetId() != mwmId) - m_guard = make_unique<Index::FeaturesLoaderGuard>(m_index, mwmId); - return m_guard->GetFeatureByIndex(id.m_index, ft); - } - - Index & m_index; - unique_ptr<Index::FeaturesLoaderGuard> m_guard; -}; - void GetContents(istream & is, string & contents) { string line; @@ -77,73 +59,6 @@ void GetContents(istream & is, string & contents) } } -bool Matches(Context & context, Sample::Result const & golden, search::Result const & actual) -{ - static double constexpr kToleranceMeters = 50; - if (actual.GetResultType() != Result::RESULT_FEATURE) - return false; - - FeatureType ft; - if (!context.GetFeature(actual.GetFeatureID(), ft)) - return false; - - auto const houseNumber = ft.GetHouseNumber(); - auto const center = feature::GetCenter(ft); - - bool nameMatches = false; - if (golden.m_name.empty()) - { - nameMatches = true; - } - else - { - ft.ForEachName([&golden, &nameMatches](int8_t /* lang */, string const & name) { - if (golden.m_name == strings::MakeUniString(name)) - { - nameMatches = true; - return false; // breaks the loop - } - return true; // continues the loop - }); - } - - return nameMatches && golden.m_houseNumber == houseNumber && - MercatorBounds::DistanceOnEarth(golden.m_pos, center) < kToleranceMeters; -} - -void MatchResults(Context & context, vector<Sample::Result> const & golden, - vector<search::Result> const & actual, vector<size_t> & goldenMatching, - vector<size_t> & actualMatching) -{ - auto const n = golden.size(); - auto const m = actual.size(); - - goldenMatching.assign(n, kInvalidId); - actualMatching.assign(m, kInvalidId); - - // TODO (@y, @m): use Kuhn algorithm here for maximum matching. - for (size_t i = 0; i < n; ++i) - { - if (goldenMatching[i] != kInvalidId) - continue; - auto const & g = golden[i]; - - for (size_t j = 0; j < m; ++j) - { - if (actualMatching[j] != kInvalidId) - continue; - - auto const & a = actual[j]; - if (Matches(context, g, a)) - { - goldenMatching[i] = j; - actualMatching[j] = i; - break; - } - } - } -} - void DidDownload(TCountryId const & /* countryId */, shared_ptr<platform::LocalCountryFile> const & /* localFile */) { @@ -247,7 +162,7 @@ int main(int argc, char * argv[]) } vector<Stats> stats(samples.size()); - Context context(engine); + Matcher matcher(engine); cout << "SampleId,"; RankingInfo::PrintCSVHeader(cout); @@ -274,7 +189,7 @@ int main(int argc, char * argv[]) vector<size_t> goldenMatching; vector<size_t> actualMatching; - MatchResults(context, sample.m_results, results, goldenMatching, actualMatching); + matcher.Match(sample.m_results, results, goldenMatching, actualMatching); for (size_t j = 0; j < results.size(); ++j) { @@ -285,7 +200,7 @@ int main(int argc, char * argv[]) info.ToCSV(cout); auto relevance = Sample::Result::RELEVANCE_IRRELEVANT; - if (actualMatching[j] != kInvalidId) + if (actualMatching[j] != Matcher::kInvalidId) relevance = sample.m_results[actualMatching[j]].m_relevance; cout << "," << DebugPrint(relevance) << endl; } @@ -293,7 +208,7 @@ int main(int argc, char * argv[]) auto & s = stats[i]; for (size_t j = 0; j < goldenMatching.size(); ++j) { - if (goldenMatching[j] == kInvalidId && + if (goldenMatching[j] == Matcher::kInvalidId && sample.m_results[j].m_relevance != Sample::Result::RELEVANCE_IRRELEVANT) { s.m_notFound.push_back(j); diff --git a/search/search_quality/matcher.cpp b/search/search_quality/matcher.cpp new file mode 100644 index 0000000000..5b73fbab14 --- /dev/null +++ b/search/search_quality/matcher.cpp @@ -0,0 +1,94 @@ +#include "search/search_quality/matcher.hpp" + +#include "indexer/feature.hpp" +#include "indexer/feature_algo.hpp" +#include "indexer/feature_decl.hpp" + +#include "base/string_utils.hpp" + +#include "geometry/mercator.hpp" + +#include "base/stl_add.hpp" + +namespace search +{ +// static +size_t constexpr Matcher::kInvalidId; + +Matcher::Matcher(Index & index) : m_index(index) {} + +void Matcher::Match(std::vector<Sample::Result> const & golden, std::vector<Result> const & actual, + std::vector<size_t> & goldenMatching, std::vector<size_t> & actualMatching) +{ + auto const n = golden.size(); + auto const m = actual.size(); + + goldenMatching.assign(n, kInvalidId); + actualMatching.assign(m, kInvalidId); + + // TODO (@y, @m): use Kuhn algorithm here for maximum matching. + for (size_t i = 0; i < n; ++i) + { + if (goldenMatching[i] != kInvalidId) + continue; + auto const & g = golden[i]; + + for (size_t j = 0; j < m; ++j) + { + if (actualMatching[j] != kInvalidId) + continue; + + auto const & a = actual[j]; + if (Matches(g, a)) + { + goldenMatching[i] = j; + actualMatching[j] = i; + break; + } + } + } +} + +bool Matcher::GetFeature(FeatureID const & id, FeatureType & ft) +{ + auto const & mwmId = id.m_mwmId; + if (!m_guard || m_guard->GetId() != mwmId) + m_guard = my::make_unique<Index::FeaturesLoaderGuard>(m_index, mwmId); + return m_guard->GetFeatureByIndex(id.m_index, ft); +} + +bool Matcher::Matches(Sample::Result const & golden, search::Result const & actual) +{ + static double constexpr kToleranceMeters = 50; + + if (actual.GetResultType() != Result::RESULT_FEATURE) + return false; + + FeatureType ft; + if (!GetFeature(actual.GetFeatureID(), ft)) + return false; + + auto const houseNumber = ft.GetHouseNumber(); + auto const center = feature::GetCenter(ft); + + bool nameMatches = false; + if (golden.m_name.empty()) + { + nameMatches = true; + } + else + { + ft.ForEachName([&golden, &nameMatches](int8_t /* lang */, string const & name) { + if (golden.m_name == strings::MakeUniString(name)) + { + nameMatches = true; + return false; // breaks the loop + } + return true; // continues the loop + }); + } + + return nameMatches && golden.m_houseNumber == houseNumber && + MercatorBounds::DistanceOnEarth(golden.m_pos, center) < kToleranceMeters; +} +} // namespace search diff --git a/search/search_quality/matcher.hpp b/search/search_quality/matcher.hpp new file mode 100644 index 0000000000..c99c3c079f --- /dev/null +++ b/search/search_quality/matcher.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include "search/result.hpp" +#include "search/search_quality/sample.hpp" + +#include "indexer/index.hpp" + +#include "base/macros.hpp" + +#include <cstdint> +#include <limits> +#include <memory> +#include <vector> + +class FeatureType; +struct FeatureID; + +namespace search +{ +class Matcher +{ +public: + static size_t constexpr kInvalidId = std::numeric_limits<size_t>::max(); + + Matcher(Index & index); + + void Match(std::vector<Sample::Result> const & golden, std::vector<Result> const & actual, + std::vector<size_t> & goldenMatching, std::vector<size_t> & actualMatching); + +private: + WARN_UNUSED_RESULT bool GetFeature(FeatureID const & id, FeatureType & ft); + + bool Matches(Sample::Result const & golden, Result const & actual); + + Index & m_index; + std::unique_ptr<Index::FeaturesLoaderGuard> m_guard; +}; +} // namespace search diff --git a/search/search_quality/search_quality.pro b/search/search_quality/search_quality.pro index 4cd8fe3de6..49a3fd922a 100644 --- a/search/search_quality/search_quality.pro +++ b/search/search_quality/search_quality.pro @@ -12,8 +12,10 @@ INCLUDEPATH += $$ROOT_DIR/3party/jansson/src HEADERS += \ helpers.hpp \ + matcher.hpp \ sample.hpp \ SOURCES += \ helpers.cpp \ + matcher.cpp \ sample.cpp \ |