Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuri Gorshenin <y@maps.me>2015-10-21 12:07:53 +0300
committerSergey Yershov <yershov@corp.mail.ru>2016-03-23 16:02:26 +0300
commit46435d0987a839b31442f76cd6bf0275b2beb8e3 (patch)
tree221b382d507ad982b5dbe15940e3913e3c3c7299 /coding/compressed_bit_vector.cpp
parent60cf672e88bbcaf0830ed69eea178b360cd45d0b (diff)
[coding, search] Added Subtract() method to BitVectors.
Diffstat (limited to 'coding/compressed_bit_vector.cpp')
-rw-r--r--coding/compressed_bit_vector.cpp285
1 files changed, 174 insertions, 111 deletions
diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp
index 876b0135f4..a5fc3ebee7 100644
--- a/coding/compressed_bit_vector.cpp
+++ b/coding/compressed_bit_vector.cpp
@@ -1,80 +1,156 @@
#include "coding/compressed_bit_vector.hpp"
-#include "coding/writer.hpp"
+
#include "coding/write_to_sink.hpp"
+#include "base/assert.hpp"
#include "base/bits.hpp"
#include "std/algorithm.hpp"
namespace coding
{
-// static
-uint32_t const DenseCBV::kBlockSize;
-} // namespace coding
-
namespace
{
-uint64_t const kBlockSize = coding::DenseCBV::kBlockSize;
-
-unique_ptr<coding::CompressedBitVector> IntersectImpl(coding::DenseCBV const & a,
- coding::DenseCBV const & b)
+struct IntersectOp
{
- size_t sizeA = a.NumBitGroups();
- size_t sizeB = b.NumBitGroups();
- vector<uint64_t> resGroups(min(sizeA, sizeB));
- for (size_t i = 0; i < resGroups.size(); ++i)
- resGroups[i] = a.GetBitGroup(i) & b.GetBitGroup(i);
- return coding::CompressedBitVectorBuilder::FromBitGroups(move(resGroups));
-}
+ IntersectOp() {}
-// The intersection of dense and sparse is always sparse.
-unique_ptr<coding::CompressedBitVector> IntersectImpl(coding::DenseCBV const & a,
- coding::SparseCBV const & b)
-{
- vector<uint64_t> resPos;
- for (size_t i = 0; i < b.PopCount(); ++i)
+ unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a,
+ coding::DenseCBV const & b) const
{
- auto pos = b.Select(i);
- if (a.GetBit(pos))
- resPos.push_back(pos);
+ size_t sizeA = a.NumBitGroups();
+ size_t sizeB = b.NumBitGroups();
+ vector<uint64_t> resGroups(min(sizeA, sizeB));
+ for (size_t i = 0; i < resGroups.size(); ++i)
+ resGroups[i] = a.GetBitGroup(i) & b.GetBitGroup(i);
+ return coding::CompressedBitVectorBuilder::FromBitGroups(move(resGroups));
}
- return make_unique<coding::SparseCBV>(move(resPos));
-}
-unique_ptr<coding::CompressedBitVector> IntersectImpl(coding::SparseCBV const & a,
- coding::DenseCBV const & b)
-{
- return IntersectImpl(b, a);
-}
-
-unique_ptr<coding::CompressedBitVector> IntersectImpl(coding::SparseCBV const & a,
- coding::SparseCBV const & b)
-{
- size_t sizeA = a.PopCount();
- size_t sizeB = b.PopCount();
- vector<uint64_t> resPos;
- size_t i = 0;
- size_t j = 0;
- while (i < sizeA && j < sizeB)
+ // The intersection of dense and sparse is always sparse.
+ unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a,
+ coding::SparseCBV const & b) const
{
- auto posA = a.Select(i);
- auto posB = b.Select(j);
- if (posA == posB)
+ vector<uint64_t> resPos;
+ for (size_t i = 0; i < b.PopCount(); ++i)
{
- resPos.push_back(posA);
- ++i;
- ++j;
+ auto pos = b.Select(i);
+ if (a.GetBit(pos))
+ resPos.push_back(pos);
}
- else if (posA < posB)
- {
- ++i;
- }
- else
+ return make_unique<coding::SparseCBV>(move(resPos));
+ }
+
+ unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a,
+ coding::DenseCBV const & b) const
+ {
+ return operator()(b, a);
+ }
+
+ unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a,
+ coding::SparseCBV const & b) const
+ {
+ vector<uint64_t> resPos;
+ set_intersection(a.Begin(), a.End(), b.Begin(), b.End(), back_inserter(resPos));
+ return make_unique<coding::SparseCBV>(move(resPos));
+ }
+};
+
+struct SubtractOp
+{
+ SubtractOp() {}
+
+ unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a,
+ coding::DenseCBV const & b) const
+ {
+ size_t sizeA = a.NumBitGroups();
+ size_t sizeB = b.NumBitGroups();
+ vector<uint64_t> resGroups(min(sizeA, sizeB));
+ for (size_t i = 0; i < resGroups.size(); ++i)
+ resGroups[i] = a.GetBitGroup(i) & ~b.GetBitGroup(i);
+ return CompressedBitVectorBuilder::FromBitGroups(move(resGroups));
+ }
+
+ unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a,
+ coding::SparseCBV const & b) const
+ {
+ vector<uint64_t> resGroups(a.NumBitGroups());
+
+ size_t i = 0;
+ auto j = b.Begin();
+ for (; i < resGroups.size() && j < b.End(); ++i)
{
- ++j;
+ uint64_t const kBitsBegin = i * DenseCBV::kBlockSize;
+ uint64_t const kBitsEnd = (i + 1) * DenseCBV::kBlockSize;
+
+ uint64_t mask = 0;
+ for (; j < b.End() && *j < kBitsEnd; ++j)
+ {
+ ASSERT_GREATER_OR_EQUAL(*j, kBitsBegin, ());
+ mask |= static_cast<uint64_t>(1) << (*j - kBitsBegin);
+ }
+
+ resGroups[i] = a.GetBitGroup(i) & ~mask;
}
+
+ for (; i < resGroups.size(); ++i)
+ resGroups[i] = a.GetBitGroup(i);
+
+ return CompressedBitVectorBuilder::FromBitGroups(move(resGroups));
+ }
+
+ unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a,
+ coding::DenseCBV const & b) const
+ {
+ vector<uint64_t> resPos;
+ copy_if(a.Begin(), a.End(), back_inserter(resPos), [&](uint64_t bit)
+ {
+ return !b.GetBit(bit);
+ });
+ return CompressedBitVectorBuilder::FromBitPositions(move(resPos));
+ }
+
+ unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a,
+ coding::SparseCBV const & b) const
+ {
+ vector<uint64_t> resPos;
+ set_difference(a.Begin(), a.End(), b.Begin(), b.End(), back_inserter(resPos));
+ return CompressedBitVectorBuilder::FromBitPositions(move(resPos));
+ }
+};
+
+template <typename TBinaryOp>
+unique_ptr<coding::CompressedBitVector> Apply(TBinaryOp const & op, CompressedBitVector const & lhs,
+ CompressedBitVector const & rhs)
+{
+ using strat = CompressedBitVector::StorageStrategy;
+ auto const stratA = lhs.GetStorageStrategy();
+ auto const stratB = rhs.GetStorageStrategy();
+ if (stratA == strat::Dense && stratB == strat::Dense)
+ {
+ DenseCBV const & a = static_cast<DenseCBV const &>(lhs);
+ DenseCBV const & b = static_cast<DenseCBV const &>(rhs);
+ return op(a, b);
+ }
+ if (stratA == strat::Dense && stratB == strat::Sparse)
+ {
+ DenseCBV const & a = static_cast<DenseCBV const &>(lhs);
+ SparseCBV const & b = static_cast<SparseCBV const &>(rhs);
+ return op(a, b);
}
- return make_unique<coding::SparseCBV>(move(resPos));
+ if (stratA == strat::Sparse && stratB == strat::Dense)
+ {
+ SparseCBV const & a = static_cast<SparseCBV const &>(lhs);
+ DenseCBV const & b = static_cast<DenseCBV const &>(rhs);
+ return op(a, b);
+ }
+ if (stratA == strat::Sparse && stratB == strat::Sparse)
+ {
+ SparseCBV const & a = static_cast<SparseCBV const &>(lhs);
+ SparseCBV const & b = static_cast<SparseCBV const &>(rhs);
+ return op(a, b);
+ }
+
+ return nullptr;
}
// Returns true if a bit vector with popCount bits set out of totalBits
@@ -87,22 +163,34 @@ bool DenseEnough(uint64_t popCount, uint64_t totalBits)
// Settle at 30% for now.
return popCount * 10 >= totalBits * 3;
}
-} // namespace
-namespace coding
+template <typename TBitPositions>
+unique_ptr<CompressedBitVector> BuildFromBitPositions(TBitPositions && setBits)
{
+ if (setBits.empty())
+ return make_unique<SparseCBV>(forward<TBitPositions>(setBits));
+ uint64_t const maxBit = *max_element(setBits.begin(), setBits.end());
+
+ if (DenseEnough(setBits.size(), maxBit))
+ return make_unique<DenseCBV>(forward<TBitPositions>(setBits));
+
+ return make_unique<SparseCBV>(forward<TBitPositions>(setBits));
+}
+} // namespace
+
+// static
+uint64_t const DenseCBV::kBlockSize;
+
DenseCBV::DenseCBV(vector<uint64_t> const & setBits)
{
if (setBits.empty())
{
return;
}
- uint64_t maxBit = setBits[0];
- for (size_t i = 1; i < setBits.size(); ++i)
- maxBit = max(maxBit, setBits[i]);
+ uint64_t const maxBit = *max_element(setBits.begin(), setBits.end());
size_t const sz = 1 + maxBit / kBlockSize;
m_bitGroups.resize(sz);
- m_popCount = static_cast<uint32_t>(setBits.size());
+ m_popCount = static_cast<uint64_t>(setBits.size());
for (uint64_t pos : setBits)
m_bitGroups[pos / kBlockSize] |= static_cast<uint64_t>(1) << (pos % kBlockSize);
}
@@ -123,9 +211,9 @@ uint64_t DenseCBV::GetBitGroup(size_t i) const
return i < m_bitGroups.size() ? m_bitGroups[i] : 0;
}
-uint32_t DenseCBV::PopCount() const { return m_popCount; }
+uint64_t DenseCBV::PopCount() const { return m_popCount; }
-bool DenseCBV::GetBit(uint32_t pos) const
+bool DenseCBV::GetBit(uint64_t pos) const
{
uint64_t bitGroup = GetBitGroup(pos / kBlockSize);
return ((bitGroup >> (pos % kBlockSize)) & 1) > 0;
@@ -140,9 +228,7 @@ void DenseCBV::Serialize(Writer & writer) const
{
uint8_t header = static_cast<uint8_t>(GetStorageStrategy());
WriteToSink(writer, header);
- WriteToSink(writer, static_cast<uint32_t>(NumBitGroups()));
- for (size_t i = 0; i < NumBitGroups(); ++i)
- WriteToSink(writer, GetBitGroup(i));
+ rw::WriteVectorOfPOD(writer, m_bitGroups);
}
SparseCBV::SparseCBV(vector<uint64_t> const & setBits) : m_positions(setBits)
@@ -161,9 +247,9 @@ uint64_t SparseCBV::Select(size_t i) const
return m_positions[i];
}
-uint32_t SparseCBV::PopCount() const { return m_positions.size(); }
+uint64_t SparseCBV::PopCount() const { return m_positions.size(); }
-bool SparseCBV::GetBit(uint32_t pos) const
+bool SparseCBV::GetBit(uint64_t pos) const
{
auto const it = lower_bound(m_positions.begin(), m_positions.end(), pos);
return it != m_positions.end() && *it == pos;
@@ -178,39 +264,35 @@ void SparseCBV::Serialize(Writer & writer) const
{
uint8_t header = static_cast<uint8_t>(GetStorageStrategy());
WriteToSink(writer, header);
- WriteToSink(writer, PopCount());
- ForEach([&](uint64_t bitPos)
- {
- WriteToSink(writer, bitPos);
- });
+ rw::WriteVectorOfPOD(writer, m_positions);
}
// static
unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitPositions(
vector<uint64_t> const & setBits)
{
- if (setBits.empty())
- return make_unique<SparseCBV>(setBits);
- uint64_t maxBit = setBits[0];
- for (size_t i = 1; i < setBits.size(); ++i)
- maxBit = max(maxBit, setBits[i]);
-
- if (DenseEnough(setBits.size(), maxBit))
- return make_unique<DenseCBV>(setBits);
+ return BuildFromBitPositions(setBits);
+}
- return make_unique<SparseCBV>(setBits);
+// static
+unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitPositions(
+ vector<uint64_t> && setBits)
+{
+ return BuildFromBitPositions(move(setBits));
}
// static
unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitGroups(
vector<uint64_t> && bitGroups)
{
+ static uint64_t const kBlockSize = DenseCBV::kBlockSize;
+
while (!bitGroups.empty() && bitGroups.back() == 0)
bitGroups.pop_back();
if (bitGroups.empty())
return make_unique<SparseCBV>(bitGroups);
- uint64_t const maxBit = kBlockSize * bitGroups.size() - 1;
+ uint64_t const maxBit = kBlockSize * (bitGroups.size() - 1) + bits::CeilLog(bitGroups.back());
uint64_t popCount = 0;
for (size_t i = 0; i < bitGroups.size(); ++i)
popCount += bits::PopCount(bitGroups[i]);
@@ -245,34 +327,15 @@ string DebugPrint(CompressedBitVector::StorageStrategy strat)
unique_ptr<CompressedBitVector> CompressedBitVector::Intersect(CompressedBitVector const & lhs,
CompressedBitVector const & rhs)
{
- using strat = CompressedBitVector::StorageStrategy;
- auto const stratA = lhs.GetStorageStrategy();
- auto const stratB = rhs.GetStorageStrategy();
- if (stratA == strat::Dense && stratB == strat::Dense)
- {
- DenseCBV const & a = static_cast<DenseCBV const &>(lhs);
- DenseCBV const & b = static_cast<DenseCBV const &>(rhs);
- return IntersectImpl(a, b);
- }
- if (stratA == strat::Dense && stratB == strat::Sparse)
- {
- DenseCBV const & a = static_cast<DenseCBV const &>(lhs);
- SparseCBV const & b = static_cast<SparseCBV const &>(rhs);
- return IntersectImpl(a, b);
- }
- if (stratA == strat::Sparse && stratB == strat::Dense)
- {
- SparseCBV const & a = static_cast<SparseCBV const &>(lhs);
- DenseCBV const & b = static_cast<DenseCBV const &>(rhs);
- return IntersectImpl(a, b);
- }
- if (stratA == strat::Sparse && stratB == strat::Sparse)
- {
- SparseCBV const & a = static_cast<SparseCBV const &>(lhs);
- SparseCBV const & b = static_cast<SparseCBV const &>(rhs);
- return IntersectImpl(a, b);
- }
+ static IntersectOp const intersectOp;
+ return Apply(intersectOp, lhs, rhs);
+}
- return nullptr;
+// static
+unique_ptr<CompressedBitVector> CompressedBitVector::Subtract(CompressedBitVector const & lhs,
+ CompressedBitVector const & rhs)
+{
+ static SubtractOp const subtractOp;
+ return Apply(subtractOp, lhs, rhs);
}
} // namespace coding