Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxim Pimenov <m@maps.me>2015-09-11 16:38:18 +0300
committerSergey Yershov <yershov@corp.mail.ru>2016-03-23 16:02:16 +0300
commitd6800004d763a905c056bb3ca86e639f476377bc (patch)
tree593c88959f02ecdbaeda78a7add3eec77ea0704a /coding/compressed_bit_vector.cpp
parente8acd6f4596ebf011822e0c86fbca556faf3b724 (diff)
Review fixes.
Diffstat (limited to 'coding/compressed_bit_vector.cpp')
-rw-r--r--coding/compressed_bit_vector.cpp134
1 files changed, 93 insertions, 41 deletions
diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp
index 5f4336b324..0b444b42ac 100644
--- a/coding/compressed_bit_vector.cpp
+++ b/coding/compressed_bit_vector.cpp
@@ -2,24 +2,23 @@
#include "coding/writer.hpp"
#include "coding/write_to_sink.hpp"
+#include "base/bits.hpp"
+
#include "std/algorithm.hpp"
namespace
{
+uint64_t const kBlockSize = coding::DenseCBV::kBlockSize;
+
unique_ptr<coding::CompressedBitVector> IntersectImpl(coding::DenseCBV const & a,
coding::DenseCBV const & b)
{
size_t sizeA = a.NumBitGroups();
size_t sizeB = b.NumBitGroups();
- vector<uint64_t> resBits;
- for (size_t i = 0; i < min(sizeA, sizeB); ++i)
- {
- uint64_t bitGroup = a.GetBitGroup(i) & b.GetBitGroup(i);
- for (size_t j = 0; j < 64; j++)
- if (((bitGroup >> j) & 1) > 0)
- resBits.push_back(64 * i + j);
- }
- return coding::CompressedBitVectorBuilder::Build(resBits);
+ vector<uint64_t> resGroups(min(sizeA, sizeB));
+ for (size_t i = 0; i < resGroups.size(); ++i)
+ resGroups[i] = a.GetBitGroup(i) & b.GetBitGroup(i);
+ return coding::CompressedBitVectorBuilder::FromBitGroups(move(resGroups));
}
// The intersection of dense and sparse is always sparse.
@@ -71,6 +70,17 @@ unique_ptr<coding::CompressedBitVector> IntersectImpl(coding::SparseCBV const &
}
return make_unique<coding::SparseCBV>(move(resPos));
}
+
+// Returns true if a bit vector with popCount bits set out of totalBits
+// is fit to be represented as a DenseCBV. Note that we do not
+// account for possible irregularities in the distribution of bits.
+// In particular, we do not break the bit vector into blocks that are
+// stored separately although this might turn out to be a good idea.
+bool DenseEnough(uint64_t popCount, uint64_t totalBits)
+{
+ // Settle at 30% for now.
+ return popCount * 10 >= totalBits * 3;
+}
} // namespace
namespace coding
@@ -79,18 +89,39 @@ DenseCBV::DenseCBV(vector<uint64_t> const & setBits)
{
if (setBits.empty())
{
- m_bits.resize(0);
+ m_bitGroups.resize(0);
m_popCount = 0;
return;
}
uint64_t maxBit = setBits[0];
for (size_t i = 1; i < setBits.size(); ++i)
maxBit = max(maxBit, setBits[i]);
- size_t sz = (maxBit + 64 - 1) / 64;
- m_bits.resize(sz);
+ size_t sz = (maxBit + kBlockSize - 1) / kBlockSize;
+ m_bitGroups.resize(sz);
m_popCount = static_cast<uint32_t>(setBits.size());
for (uint64_t pos : setBits)
- m_bits[pos / 64] |= static_cast<uint64_t>(1) << (pos % 64);
+ m_bitGroups[pos / kBlockSize] |= static_cast<uint64_t>(1) << (pos % kBlockSize);
+}
+
+// static
+unique_ptr<DenseCBV> DenseCBV::BuildFromBitGroups(vector<uint64_t> && bitGroups)
+{
+ unique_ptr<DenseCBV> cbv(new DenseCBV());
+ cbv->m_popCount = 0;
+ for (size_t i = 0; i < bitGroups.size(); ++i)
+ cbv->m_popCount += bits::PopCount(bitGroups[i]);
+ cbv->m_bitGroups = move(bitGroups);
+ return cbv;
+}
+
+SparseCBV::SparseCBV(vector<uint64_t> const & setBits) : m_positions(setBits)
+{
+ ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ());
+}
+
+SparseCBV::SparseCBV(vector<uint64_t> && setBits) : m_positions(move(setBits))
+{
+ ASSERT(is_sorted(m_positions.begin(), m_positions.end()), ());
}
uint32_t DenseCBV::PopCount() const { return m_popCount; }
@@ -99,40 +130,35 @@ uint32_t SparseCBV::PopCount() const { return m_positions.size(); }
bool DenseCBV::GetBit(uint32_t pos) const
{
- uint64_t bitGroup = GetBitGroup(pos / 64);
- return ((bitGroup >> (pos % 64)) & 1) > 0;
+ uint64_t bitGroup = GetBitGroup(pos / kBlockSize);
+ return ((bitGroup >> (pos % kBlockSize)) & 1) > 0;
}
bool SparseCBV::GetBit(uint32_t pos) const
{
- auto it = lower_bound(m_positions.begin(), m_positions.end(), pos);
+ auto const it = lower_bound(m_positions.begin(), m_positions.end(), pos);
return it != m_positions.end() && *it == pos;
}
-CompressedBitVector::StorageStrategy DenseCBV::GetStorageStrategy() const
+uint64_t DenseCBV::GetBitGroup(size_t i) const
{
- return CompressedBitVector::StorageStrategy::Dense;
+ return i < m_bitGroups.size() ? m_bitGroups[i] : 0;
}
-CompressedBitVector::StorageStrategy SparseCBV::GetStorageStrategy() const
+uint64_t SparseCBV::Select(size_t i) const
{
- return CompressedBitVector::StorageStrategy::Sparse;
+ ASSERT_LESS(i, m_positions.size(), ());
+ return m_positions[i];
}
-template <typename F>
-void DenseCBV::ForEach(F && f) const
+CompressedBitVector::StorageStrategy DenseCBV::GetStorageStrategy() const
{
- for (size_t i = 0; i < m_bits.size(); ++i)
- for (size_t j = 0; j < 64; ++j)
- if (((m_bits[i] >> j) & 1) > 0)
- f(64 * i + j);
+ return CompressedBitVector::StorageStrategy::Dense;
}
-template <typename F>
-void SparseCBV::ForEach(F && f) const
+CompressedBitVector::StorageStrategy SparseCBV::GetStorageStrategy() const
{
- for (size_t i = 0; i < m_positions.size(); ++i)
- f(m_positions[i]);
+ return CompressedBitVector::StorageStrategy::Sparse;
}
string DebugPrint(CompressedBitVector::StorageStrategy strat)
@@ -167,16 +193,43 @@ void SparseCBV::Serialize(Writer & writer) const
}
// static
-unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::Build(vector<uint64_t> const & setBits)
+unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitPositions(
+ vector<uint64_t> const & setBits)
{
if (setBits.empty())
return make_unique<SparseCBV>(setBits);
uint64_t maxBit = setBits[0];
for (size_t i = 1; i < setBits.size(); ++i)
maxBit = max(maxBit, setBits[i]);
- // 30% occupied is dense enough
- if (10 * setBits.size() >= 3 * maxBit)
+
+ if (DenseEnough(setBits.size(), maxBit))
return make_unique<DenseCBV>(setBits);
+
+ return make_unique<SparseCBV>(setBits);
+}
+
+// static
+unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitGroups(
+ vector<uint64_t> && bitGroups)
+{
+ while (!bitGroups.empty() && bitGroups.back() == 0)
+ bitGroups.pop_back();
+ if (bitGroups.empty())
+ return make_unique<SparseCBV>(bitGroups);
+
+ uint64_t maxBit = kBlockSize * bitGroups.size() - 1;
+ uint64_t popCount = 0;
+ for (size_t i = 0; i < bitGroups.size(); ++i)
+ popCount += bits::PopCount(bitGroups[i]);
+
+ if (DenseEnough(popCount, maxBit))
+ return DenseCBV::BuildFromBitGroups(move(bitGroups));
+
+ vector<uint64_t> setBits;
+ for (size_t i = 0; i < bitGroups.size(); ++i)
+ for (size_t j = 0; j < kBlockSize; ++j)
+ if (((bitGroups[i] >> j) & 1) > 0)
+ setBits.push_back(kBlockSize * i + j);
return make_unique<SparseCBV>(setBits);
}
@@ -184,29 +237,28 @@ unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::Build(vector<uint64_
unique_ptr<CompressedBitVector> CompressedBitVector::Intersect(CompressedBitVector const & lhs,
CompressedBitVector const & rhs)
{
- auto stratA = lhs.GetStorageStrategy();
- auto stratB = rhs.GetStorageStrategy();
- auto stratDense = CompressedBitVector::StorageStrategy::Dense;
- auto stratSparse = CompressedBitVector::StorageStrategy::Sparse;
- if (stratA == stratDense && stratB == stratDense)
+ using strat = CompressedBitVector::StorageStrategy;
+ auto const stratA = lhs.GetStorageStrategy();
+ auto const stratB = rhs.GetStorageStrategy();
+ if (stratA == strat::Dense && stratB == strat::Dense)
{
DenseCBV const & a = static_cast<DenseCBV const &>(lhs);
DenseCBV const & b = static_cast<DenseCBV const &>(rhs);
return IntersectImpl(a, b);
}
- if (stratA == stratDense && stratB == stratSparse)
+ if (stratA == strat::Dense && stratB == strat::Sparse)
{
DenseCBV const & a = static_cast<DenseCBV const &>(lhs);
SparseCBV const & b = static_cast<SparseCBV const &>(rhs);
return IntersectImpl(a, b);
}
- if (stratA == stratSparse && stratB == stratDense)
+ if (stratA == strat::Sparse && stratB == strat::Dense)
{
SparseCBV const & a = static_cast<SparseCBV const &>(lhs);
DenseCBV const & b = static_cast<DenseCBV const &>(rhs);
return IntersectImpl(a, b);
}
- if (stratA == stratSparse && stratB == stratSparse)
+ if (stratA == strat::Sparse && stratB == strat::Sparse)
{
SparseCBV const & a = static_cast<SparseCBV const &>(lhs);
SparseCBV const & b = static_cast<SparseCBV const &>(rhs);