Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxim Pimenov <m@maps.me>2015-09-11 16:38:18 +0300
committerSergey Yershov <yershov@corp.mail.ru>2016-03-23 16:02:16 +0300
commitd6800004d763a905c056bb3ca86e639f476377bc (patch)
tree593c88959f02ecdbaeda78a7add3eec77ea0704a /coding/compressed_bit_vector.hpp
parente8acd6f4596ebf011822e0c86fbca556faf3b724 (diff)
Review fixes.
Diffstat (limited to 'coding/compressed_bit_vector.hpp')
-rw-r--r--coding/compressed_bit_vector.hpp138
1 files changed, 77 insertions, 61 deletions
diff --git a/coding/compressed_bit_vector.hpp b/coding/compressed_bit_vector.hpp
index b856f9fc72..26a2182f77 100644
--- a/coding/compressed_bit_vector.hpp
+++ b/coding/compressed_bit_vector.hpp
@@ -1,7 +1,6 @@
#include "std/vector.hpp"
#include "base/assert.hpp"
-#include "base/bits.hpp"
#include "coding/reader.hpp"
#include "coding/writer.hpp"
@@ -24,14 +23,20 @@ public:
virtual ~CompressedBitVector() = default;
- // Executes f for each bit that is set to one using
- // the bit's 0-based position as argument.
- template <typename F>
- void ForEach(F && f) const;
-
// Intersects two bit vectors.
- static unique_ptr<CompressedBitVector> Intersect(CompressedBitVector const &,
- CompressedBitVector const &);
+ // todo(@pimenov) We expect the common use case to be as follows.
+ // A CBV is created in memory and several CBVs are read and intersected
+ // with it one by one. The in-memory CBV may initially contain a bit
+ // for every feature in an mwm and the intersected CBVs are read from
+ // the leaves of a search trie.
+ // Therefore an optimization of Intersect comes to mind: make a wrapper
+ // around TReader that will read a representation of a CBV from disk
+ // and intersect it bit by bit with the global in-memory CBV bypassing such
+ // routines as allocating memory and choosing strategy. They all can be called only
+ // once, namely in the end, when it is needed to pack the in-memory CBV into
+ // a suitable representation and pass it to the caller.
+ static unique_ptr<CompressedBitVector> Intersect(CompressedBitVector const & lhs,
+ CompressedBitVector const & rhs);
// Returns the number of set bits (population count).
virtual uint32_t PopCount() const = 0;
@@ -60,80 +65,63 @@ string DebugPrint(CompressedBitVector::StorageStrategy strat);
class DenseCBV : public CompressedBitVector
{
public:
+ DenseCBV() = default;
+
// Builds a dense CBV from a list of positions of set bits.
DenseCBV(vector<uint64_t> const & setBits);
- // Builds a dense CBV from a packed bitmap of set bits.
- // todo(@pimenov) This behaviour of & and && constructors is extremely error-prone.
- DenseCBV(vector<uint64_t> && bitMasks) : m_bits(move(bitMasks))
- {
- m_popCount = 0;
- for (size_t i = 0; i < m_bits.size(); ++i)
- m_popCount += bits::PopCount(m_bits[i]);
- }
+ // Not to be confused with the constructor: the semantics
+ // of the array of integers is completely different.
+ static unique_ptr<DenseCBV> BuildFromBitGroups(vector<uint64_t> && bitGroups);
- ~DenseCBV() = default;
+ size_t NumBitGroups() const { return m_bitGroups.size(); }
- size_t NumBitGroups() const { return m_bits.size(); }
+ static uint32_t const kBlockSize = 64;
template <typename F>
- void ForEach(F && f) const;
-
- uint64_t GetBitGroup(size_t i) const
+ void ForEach(F && f) const
{
- if (i < m_bits.size())
- return m_bits[i];
- return 0;
+ for (size_t i = 0; i < m_bitGroups.size(); ++i)
+ for (size_t j = 0; j < kBlockSize; ++j)
+ if (((m_bitGroups[i] >> j) & 1) > 0)
+ f(kBlockSize * i + j);
}
- // CompressedBitVector overrides:
+ // Returns 0 if the group number is too large to be contained in m_bits.
+ uint64_t GetBitGroup(size_t i) const;
+ // CompressedBitVector overrides:
uint32_t PopCount() const override;
-
bool GetBit(uint32_t pos) const override;
-
StorageStrategy GetStorageStrategy() const override;
-
void Serialize(Writer & writer) const override;
private:
- vector<uint64_t> m_bits;
- uint32_t m_popCount;
+ vector<uint64_t> m_bitGroups;
+ uint32_t m_popCount = 0;
};
class SparseCBV : public CompressedBitVector
{
public:
- SparseCBV(vector<uint64_t> const & setBits) : m_positions(setBits)
- {
- ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ());
- }
-
- SparseCBV(vector<uint64_t> && setBits) : m_positions(move(setBits))
- {
- ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ());
- }
+ SparseCBV(vector<uint64_t> const & setBits);
- ~SparseCBV() = default;
+ SparseCBV(vector<uint64_t> && setBits);
// Returns the position of the i'th set bit.
- uint64_t Select(size_t i) const
- {
- ASSERT_LESS(i, m_positions.size(), ());
- return m_positions[i];
- }
+ uint64_t Select(size_t i) const;
template <typename F>
- void ForEach(F && f) const;
+ void ForEach(F && f) const
+ {
+ for (auto const & position : m_positions)
+ f(position);
+ }
// CompressedBitVector overrides:
-
uint32_t PopCount() const override;
-
bool GetBit(uint32_t pos) const override;
-
StorageStrategy GetStorageStrategy() const override;
-
void Serialize(Writer & writer) const override;
private:
@@ -146,7 +134,11 @@ class CompressedBitVectorBuilder
public:
// Chooses a strategy to store the bit vector with bits from setBits set to one
// and returns a pointer to a class that fits best.
- static unique_ptr<CompressedBitVector> Build(vector<uint64_t> const & setBits);
+ static unique_ptr<CompressedBitVector> FromBitPositions(vector<uint64_t> const & setBits);
+
+ // Chooses a strategy to store the bit vector with bits from a bitmap obtained
+ // by concatenating the elements of bitGroups.
+ static unique_ptr<CompressedBitVector> FromBitGroups(vector<uint64_t> && bitGroups);
// Reads a bit vector from reader which must contain a valid
// bit vector representation (see CompressedBitVector::Serialize for the format).
@@ -154,29 +146,53 @@ public:
static unique_ptr<CompressedBitVector> Deserialize(TReader & reader)
{
ReaderSource<TReader> src(reader);
- uint8_t header = ReadPrimitiveFromSource<uint8_t>(reader);
+ uint8_t header = ReadPrimitiveFromSource<uint8_t>(src);
CompressedBitVector::StorageStrategy strat =
static_cast<CompressedBitVector::StorageStrategy>(header);
switch (strat)
{
case CompressedBitVector::StorageStrategy::Dense:
{
- uint32_t numBitGroups = ReadPrimitiveFromSource<uint32_t>(reader);
- vector<uint64_t> bitGroups(numBitGroups);
- for (size_t i = 0; i < numBitGroups; ++i)
- bitGroups[i] = ReadPrimitiveFromSource<uint64_t>(reader);
- return make_unique<DenseCBV>(move(bitGroups));
+ vector<uint64_t> bitGroups;
+ ReadPrimitiveVectorFromSource(src, bitGroups);
+ return DenseCBV::BuildFromBitGroups(move(bitGroups));
}
case CompressedBitVector::StorageStrategy::Sparse:
{
- uint32_t numBits = ReadPrimitiveFromSource<uint32_t>(reader);
- vector<uint64_t> setBits(numBits);
- for (size_t i = 0; i < numBits; ++i)
- setBits[i] = ReadPrimitiveFromSource<uint64_t>(reader);
+ vector<uint64_t> setBits;
+ ReadPrimitiveVectorFromSource(src, setBits);
return make_unique<SparseCBV>(setBits);
}
}
return nullptr;
}
};
+
+// ForEach is generic and therefore cannot be virtual: a helper class is needed.
+class CompressedBitVectorEnumerator
+{
+public:
+ // Executes f for each bit that is set to one using
+ // the bit's 0-based position as argument.
+ template <typename F>
+ static void ForEach(CompressedBitVector const & cbv, F && f)
+ {
+ CompressedBitVector::StorageStrategy strat = cbv.GetStorageStrategy();
+ switch (strat)
+ {
+ case CompressedBitVector::StorageStrategy::Dense:
+ {
+ DenseCBV const & denseCBV = static_cast<DenseCBV const &>(cbv);
+ denseCBV.ForEach(f);
+ return;
+ }
+ case CompressedBitVector::StorageStrategy::Sparse:
+ {
+ SparseCBV const & sparseCBV = static_cast<SparseCBV const &>(cbv);
+ sparseCBV.ForEach(f);
+ return;
+ }
+ }
+ }
+};
} // namespace coding