Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/search
diff options
context:
space:
mode:
authorMaxim Pimenov <m@maps.me>2018-07-09 18:35:33 +0300
committerTatiana Yan <tatiana.kondakova@gmail.com>2018-07-09 19:37:52 +0300
commit29ded65c09460fe9a91994f1e4f05d63c15ef822 (patch)
tree9e483e514da5e611b6419e94870f08a67f1124e3 /search
parent82bfd522607572499b11d411da6a3d1decc5d7f5 (diff)
Review fixes.
Diffstat (limited to 'search')
-rw-r--r--search/base/text_index/mem.hpp34
-rw-r--r--search/base/text_index/merger.cpp54
-rw-r--r--search/base/text_index/postings.hpp42
-rw-r--r--search/base/text_index/reader.hpp1
-rw-r--r--search/base/text_index/utils.hpp2
5 files changed, 86 insertions, 47 deletions
diff --git a/search/base/text_index/mem.hpp b/search/base/text_index/mem.hpp
index 367becbe14..c3887e215e 100644
--- a/search/base/text_index/mem.hpp
+++ b/search/base/text_index/mem.hpp
@@ -8,7 +8,6 @@
#include "coding/reader.hpp"
#include "coding/varint.hpp"
-#include "coding/write_to_sink.hpp"
#include "base/assert.hpp"
#include "base/string_utils.hpp"
@@ -88,30 +87,31 @@ private:
class MemPostingsFetcher : public PostingsFetcher
{
public:
- MemPostingsFetcher(std::map<Token, std::vector<Posting>> const & postingsByToken)
+ explicit MemPostingsFetcher(std::map<Token, std::vector<Posting>> const & postingsByToken)
+ : m_postingsByToken(postingsByToken), m_it(m_postingsByToken.begin())
{
- // todo(@m) An unnecessary copy?
- m_postings.reserve(postingsByToken.size());
- for (auto const & entry : postingsByToken)
- m_postings.emplace_back(entry.second);
}
// PostingsFetcher overrides:
- bool GetPostingsForNextToken(std::vector<uint32_t> & postings)
+ bool IsValid() const override { return m_it != m_postingsByToken.end(); }
+
+ void Advance() override
+ {
+ if (m_it != m_postingsByToken.end())
+ ++m_it;
+ }
+
+ void ForEachPosting(Fn const & fn) const override
{
- CHECK_LESS_OR_EQUAL(m_tokenId, m_postings.size(), ());
- if (m_tokenId == m_postings.size())
- return false;
- postings.swap(m_postings[m_tokenId++]);
- return true;
+ CHECK(IsValid(), ());
+ for (uint32_t p : m_it->second)
+ fn(p);
}
private:
- std::vector<std::vector<uint32_t>> m_postings;
- // Index of the next token to be processed. The
- // copy of the postings list in |m_postings| is not guaranteed
- // to be valid after it's been processed.
- size_t m_tokenId = 0;
+ std::map<Token, std::vector<Posting>> const & m_postingsByToken;
+ // Iterator to the current token that will be processed when ForEachPosting is called.
+ std::map<Token, std::vector<Posting>>::const_iterator m_it;
};
void SortPostings();
diff --git a/search/base/text_index/merger.cpp b/search/base/text_index/merger.cpp
index 7ea8425475..50f8af1d9a 100644
--- a/search/base/text_index/merger.cpp
+++ b/search/base/text_index/merger.cpp
@@ -14,6 +14,8 @@
#include "base/stl_helpers.hpp"
#include <algorithm>
+#include <cstdint>
+#include <iterator>
#include <utility>
#include <vector>
@@ -30,45 +32,67 @@ public:
TextIndexReader const & index2)
: m_dict(dict), m_index1(index1), m_index2(index2)
{
+ ReadPostings();
}
// PostingsFetcher overrides:
- bool GetPostingsForNextToken(std::vector<uint32_t> & postings)
+ bool IsValid() const override
{
- postings.clear();
+ auto const & tokens = m_dict.GetTokens();
+ CHECK_LESS_OR_EQUAL(m_tokenId, tokens.size(), ());
+ return m_tokenId < tokens.size();
+ }
+ void Advance() override
+ {
auto const & tokens = m_dict.GetTokens();
CHECK_LESS_OR_EQUAL(m_tokenId, tokens.size(), ());
if (m_tokenId == tokens.size())
- return false;
+ return;
- m_index1.ForEachPosting(tokens[m_tokenId], MakeBackInsertFunctor(postings));
- m_index2.ForEachPosting(tokens[m_tokenId], MakeBackInsertFunctor(postings));
- my::SortUnique(postings);
++m_tokenId;
- return true;
+ ReadPostings();
+ }
+
+ void ForEachPosting(Fn const & fn) const override
+ {
+ CHECK(IsValid(), ());
+ for (uint32_t p : m_postings)
+ fn(p);
}
private:
+ // Reads postings for the current token.
+ void ReadPostings()
+ {
+ m_postings.clear();
+ if (!IsValid())
+ return;
+
+ auto const & tokens = m_dict.GetTokens();
+ m_index1.ForEachPosting(tokens[m_tokenId], MakeBackInsertFunctor(m_postings));
+ m_index2.ForEachPosting(tokens[m_tokenId], MakeBackInsertFunctor(m_postings));
+ my::SortUnique(m_postings);
+ }
+
TextIndexDictionary const & m_dict;
TextIndexReader const & m_index1;
TextIndexReader const & m_index2;
// Index of the next token from |m_dict| to be processed.
size_t m_tokenId = 0;
+ vector<uint32_t> m_postings;
};
TextIndexDictionary MergeDictionaries(TextIndexDictionary const & dict1,
TextIndexDictionary const & dict2)
{
- vector<Token> commonTokens = dict1.GetTokens();
- for (auto const & token : dict2.GetTokens())
- {
- size_t dummy;
- if (!dict1.GetTokenId(token, dummy))
- commonTokens.emplace_back(token);
- }
+ vector<Token> commonTokens;
+ auto const & ts1 = dict1.GetTokens();
+ auto const & ts2 = dict2.GetTokens();
+ merge(ts1.begin(), ts1.end(), ts2.begin(), ts2.end(), back_inserter(commonTokens));
+ ASSERT(is_sorted(commonTokens.begin(), commonTokens.end()), ());
+ commonTokens.erase(unique(commonTokens.begin(), commonTokens.end()), commonTokens.end());
- sort(commonTokens.begin(), commonTokens.end());
TextIndexDictionary dict;
dict.SetTokens(move(commonTokens));
return dict;
diff --git a/search/base/text_index/postings.hpp b/search/base/text_index/postings.hpp
index ccfc32ab25..81a790beda 100644
--- a/search/base/text_index/postings.hpp
+++ b/search/base/text_index/postings.hpp
@@ -5,7 +5,10 @@
#include "search/base/text_index/utils.hpp"
#include "coding/varint.hpp"
+#include "coding/write_to_sink.hpp"
+#include <cstdint>
+#include <functional>
#include <vector>
namespace search
@@ -20,11 +23,21 @@ struct TextIndexHeader;
class PostingsFetcher
{
public:
- // Returns true and fills |postings| with the postings list of the next token
- // when there is one.
- // Returns false if the underlying source is exhausted, i.e. there are
- // no more tokens left.
- virtual bool GetPostingsForNextToken(std::vector<uint32_t> & postings) = 0;
+ using Fn = std::function<void(uint32_t)>;
+
+ virtual ~PostingsFetcher() = default;
+
+ // Returns true when there are tokens left in the fetcher and false otherwise.
+ virtual bool IsValid() const = 0;
+
+ // Advances fetcher to the next token.
+ virtual void Advance() = 0;
+
+ // Calls |fn| for every posting for the current token. Initially,
+ // current token is the first token and then calls to Advance
+ // may be used to process the next token until the underlying
+ // source of the tokens is exhausted and the fetcher is no longer valid.
+ virtual void ForEachPosting(Fn const & fn) const = 0;
};
// Fetches the postings list one by one from |fetcher| and writes them
@@ -44,20 +57,21 @@ void WritePostings(Sink & sink, uint64_t startPos, TextIndexHeader & header,
std::vector<uint32_t> postingsStarts;
postingsStarts.reserve(header.m_numTokens);
-
- // todo(@m) s/uint32_t/Posting/ ?
- std::vector<uint32_t> postings;
- while (fetcher.GetPostingsForNextToken(postings))
{
- postingsStarts.emplace_back(RelativePos(sink, startPos));
-
- uint32_t last = 0;
- for (auto const p : postings)
- {
+ uint32_t last;
+ // todo(@m) s/uint32_t/Posting/ ?
+ auto writePostings = [&](uint32_t p) {
CHECK(last == 0 || last < p, (last, p));
uint32_t const delta = p - last;
WriteVarUint(sink, delta);
last = p;
+ };
+ while (fetcher.IsValid())
+ {
+ postingsStarts.emplace_back(RelativePos(sink, startPos));
+ last = 0;
+ fetcher.ForEachPosting(writePostings);
+ fetcher.Advance();
}
}
// One more for convenience.
diff --git a/search/base/text_index/reader.hpp b/search/base/text_index/reader.hpp
index 772aacd77e..5d9be4aaf7 100644
--- a/search/base/text_index/reader.hpp
+++ b/search/base/text_index/reader.hpp
@@ -1,5 +1,6 @@
#pragma once
+#include "search/base/text_index/dictionary.hpp"
#include "search/base/text_index/text_index.hpp"
#include "coding/file_reader.hpp"
diff --git a/search/base/text_index/utils.hpp b/search/base/text_index/utils.hpp
index fe896c6c37..0f6067fba5 100644
--- a/search/base/text_index/utils.hpp
+++ b/search/base/text_index/utils.hpp
@@ -9,7 +9,7 @@ namespace search
namespace base
{
template <typename Sink>
-static uint32_t RelativePos(Sink & sink, uint64_t startPos)
+uint32_t RelativePos(Sink & sink, uint64_t startPos)
{
return ::base::checked_cast<uint32_t>(sink.Pos() - startPos);
}