Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/coding
diff options
context:
space:
mode:
authorYuri Gorshenin <y@maps.me>2015-12-23 17:30:47 +0300
committerSergey Yershov <yershov@corp.mail.ru>2016-03-23 16:03:15 +0300
commitdf0bcdcf07d60c48cc6e84d57e7b42c7821e286f (patch)
treefb97e5f3d7ca8a7236df0ac9b5cf158abc95364f /coding
parent79278e235eeaa7698c05ba38aeddde0639c02e7b (diff)
[search] Implemented search in locality.
Diffstat (limited to 'coding')
-rw-r--r--coding/coding_tests/compressed_bit_vector_test.cpp119
-rw-r--r--coding/compressed_bit_vector.cpp118
-rw-r--r--coding/compressed_bit_vector.hpp8
3 files changed, 234 insertions, 11 deletions
diff --git a/coding/coding_tests/compressed_bit_vector_test.cpp b/coding/coding_tests/compressed_bit_vector_test.cpp
index 42515d5720..b0b800ea5e 100644
--- a/coding/coding_tests/compressed_bit_vector_test.cpp
+++ b/coding/coding_tests/compressed_bit_vector_test.cpp
@@ -25,6 +25,14 @@ void Subtract(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2, vector<u
back_inserter(result));
}
+void Union(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2, vector<uint64_t> & result)
+{
+ sort(setBits1.begin(), setBits1.end());
+ sort(setBits2.begin(), setBits2.end());
+ set_union(setBits1.begin(), setBits1.end(), setBits2.begin(), setBits2.end(),
+ back_inserter(result));
+}
+
template <typename TBinaryOp>
void CheckBinaryOp(TBinaryOp op, vector<uint64_t> & setBits1, vector<uint64_t> & setBits2,
coding::CompressedBitVector const & cbv)
@@ -33,14 +41,8 @@ void CheckBinaryOp(TBinaryOp op, vector<uint64_t> & setBits1, vector<uint64_t> &
op(setBits1, setBits2, expected);
TEST_EQUAL(expected.size(), cbv.PopCount(), ());
- vector<bool> expectedBitmap;
- if (!expected.empty())
- expectedBitmap.resize(expected.back() + 1);
-
for (size_t i = 0; i < expected.size(); ++i)
- expectedBitmap[expected[i]] = true;
- for (size_t i = 0; i < expectedBitmap.size(); ++i)
- TEST_EQUAL(cbv.GetBit(i), expectedBitmap[i], ());
+ TEST(cbv.GetBit(expected[i]), ());
}
void CheckIntersection(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2,
@@ -54,6 +56,12 @@ void CheckSubtraction(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2,
{
CheckBinaryOp(&Subtract, setBits1, setBits2, cbv);
}
+
+void CheckUnion(vector<uint64_t> & setBits1, vector<uint64_t> & setBits2,
+ coding::CompressedBitVector const & cbv)
+{
+ CheckBinaryOp(&Union, setBits1, setBits2, cbv);
+}
} // namespace
UNIT_TEST(CompressedBitVector_Intersect1)
@@ -214,6 +222,103 @@ UNIT_TEST(CompressedBitVector_Subtract4)
CheckSubtraction(setBits1, setBits2, *cbv3);
}
+UNIT_TEST(CompressedBitVector_Union_Smoke)
+{
+ vector<uint64_t> setBits1 = {};
+ vector<uint64_t> setBits2 = {};
+
+ auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
+ auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
+ TEST(cbv1.get(), ());
+ TEST(cbv2.get(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv1->GetStorageStrategy(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv2->GetStorageStrategy(), ());
+
+ auto cbv3 = coding::CompressedBitVector::Union(*cbv1, *cbv2);
+ TEST(cbv3.get(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
+ CheckUnion(setBits1, setBits2, *cbv3);
+}
+
+UNIT_TEST(CompressedBitVector_Union1)
+{
+ vector<uint64_t> setBits1 = {};
+ vector<uint64_t> setBits2 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+ auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
+ auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
+ TEST(cbv1.get(), ());
+ TEST(cbv2.get(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv1->GetStorageStrategy(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv2->GetStorageStrategy(), ());
+
+ auto cbv3 = coding::CompressedBitVector::Union(*cbv1, *cbv2);
+ TEST(cbv3.get(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv3->GetStorageStrategy(), ());
+ CheckUnion(setBits1, setBits2, *cbv3);
+}
+
+UNIT_TEST(CompressedBitVector_Union2)
+{
+ vector<uint64_t> setBits1 = {256, 1024};
+ vector<uint64_t> setBits2 = {0, 32, 64};
+
+ auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
+ auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
+ TEST(cbv1.get(), ());
+ TEST(cbv2.get(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv1->GetStorageStrategy(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv2->GetStorageStrategy(), ());
+
+ auto cbv3 = coding::CompressedBitVector::Union(*cbv1, *cbv2);
+ TEST(cbv3.get(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
+ CheckUnion(setBits1, setBits2, *cbv3);
+}
+
+UNIT_TEST(CompressedBitVector_Union3)
+{
+ vector<uint64_t> setBits1 = {0, 1, 2, 3, 4, 5, 6};
+
+ vector<uint64_t> setBits2;
+ for (int i = 0; i < 256; ++i)
+ setBits2.push_back(i);
+
+ auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
+ auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
+ TEST(cbv1.get(), ());
+ TEST(cbv2.get(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv1->GetStorageStrategy(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv2->GetStorageStrategy(), ());
+
+ auto cbv3 = coding::CompressedBitVector::Union(*cbv1, *cbv2);
+ TEST(cbv3.get(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv3->GetStorageStrategy(), ());
+ CheckUnion(setBits1, setBits2, *cbv3);
+}
+
+UNIT_TEST(CompressedBitVector_Union4)
+{
+ vector<uint64_t> setBits1;
+ for (int i = 0; i < coding::DenseCBV::kBlockSize; ++i)
+ setBits1.push_back(i);
+
+ vector<uint64_t> setBits2 = {1000000000};
+
+ auto cbv1 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits1);
+ auto cbv2 = coding::CompressedBitVectorBuilder::FromBitPositions(setBits2);
+ TEST(cbv1.get(), ());
+ TEST(cbv2.get(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Dense, cbv1->GetStorageStrategy(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv2->GetStorageStrategy(), ());
+
+ auto cbv3 = coding::CompressedBitVector::Union(*cbv1, *cbv2);
+ TEST(cbv3.get(), ());
+ TEST_EQUAL(coding::CompressedBitVector::StorageStrategy::Sparse, cbv3->GetStorageStrategy(), ());
+
+ CheckUnion(setBits1, setBits2, *cbv3);
+}
+
UNIT_TEST(CompressedBitVector_SerializationDense)
{
int const kNumBits = 100;
diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp
index 9d9513f11f..78a8c7cfde 100644
--- a/coding/compressed_bit_vector.cpp
+++ b/coding/compressed_bit_vector.cpp
@@ -118,6 +118,96 @@ struct SubtractOp
}
};
+struct UnionOp
+{
+ UnionOp() {}
+
+ unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a,
+ coding::DenseCBV const & b) const
+ {
+ size_t sizeA = a.NumBitGroups();
+ size_t sizeB = b.NumBitGroups();
+
+ size_t commonSize = min(sizeA, sizeB);
+ size_t resultSize = max(sizeA, sizeB);
+ vector<uint64_t> resGroups(resultSize);
+ for (size_t i = 0; i < commonSize; ++i)
+ resGroups[i] = a.GetBitGroup(i) | b.GetBitGroup(i);
+ if (a.NumBitGroups() == resultSize)
+ {
+ for (size_t i = commonSize; i < resultSize; ++i)
+ resGroups[i] = a.GetBitGroup(i);
+ }
+ else
+ {
+ for (size_t i = commonSize; i < resultSize; ++i)
+ resGroups[i] = b.GetBitGroup(i);
+ }
+ return CompressedBitVectorBuilder::FromBitGroups(move(resGroups));
+ }
+
+ unique_ptr<coding::CompressedBitVector> operator()(coding::DenseCBV const & a,
+ coding::SparseCBV const & b) const
+ {
+ size_t sizeA = a.NumBitGroups();
+ size_t sizeB = b.PopCount() == 0 ? 0 : (b.Select(b.PopCount() - 1) + DenseCBV::kBlockSize - 1) /
+ DenseCBV::kBlockSize;
+ if (sizeB > sizeA)
+ {
+ vector<uint64_t> resPos;
+ auto j = b.Begin();
+ auto merge = [&](uint64_t va)
+ {
+ while (j < b.End() && *j < va)
+ {
+ resPos.push_back(*j);
+ ++j;
+ }
+ resPos.push_back(va);
+ };
+ a.ForEach(merge);
+ for (; j < b.End(); ++j)
+ resPos.push_back(*j);
+ return CompressedBitVectorBuilder::FromBitPositions(move(resPos));
+ }
+
+ vector<uint64_t> resGroups(sizeA);
+
+ size_t i = 0;
+ auto j = b.Begin();
+ for (; i < sizeA || j < b.End(); ++i)
+ {
+ uint64_t const kBitsBegin = i * DenseCBV::kBlockSize;
+ uint64_t const kBitsEnd = (i + 1) * DenseCBV::kBlockSize;
+
+ uint64_t mask = i < sizeA ? a.GetBitGroup(i) : 0;
+ for (; j < b.End() && *j < kBitsEnd; ++j)
+ {
+ ASSERT_GREATER_OR_EQUAL(*j, kBitsBegin, ());
+ mask |= static_cast<uint64_t>(1) << (*j - kBitsBegin);
+ }
+
+ resGroups[i] = mask;
+ }
+
+ return CompressedBitVectorBuilder::FromBitGroups(move(resGroups));
+ }
+
+ unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a,
+ coding::DenseCBV const & b) const
+ {
+ return operator()(b, a);
+ }
+
+ unique_ptr<coding::CompressedBitVector> operator()(coding::SparseCBV const & a,
+ coding::SparseCBV const & b) const
+ {
+ vector<uint64_t> resPos;
+ set_union(a.Begin(), a.End(), b.Begin(), b.End(), back_inserter(resPos));
+ return CompressedBitVectorBuilder::FromBitPositions(move(resPos));
+ }
+};
+
template <typename TBinaryOp>
unique_ptr<coding::CompressedBitVector> Apply(TBinaryOp const & op, CompressedBitVector const & lhs,
CompressedBitVector const & rhs)
@@ -341,15 +431,35 @@ string DebugPrint(CompressedBitVector::StorageStrategy strat)
unique_ptr<CompressedBitVector> CompressedBitVector::Intersect(CompressedBitVector const & lhs,
CompressedBitVector const & rhs)
{
- static IntersectOp const intersectOp;
- return Apply(intersectOp, lhs, rhs);
+ static IntersectOp const op;
+ return Apply(op, lhs, rhs);
}
// static
unique_ptr<CompressedBitVector> CompressedBitVector::Subtract(CompressedBitVector const & lhs,
CompressedBitVector const & rhs)
{
- static SubtractOp const subtractOp;
- return Apply(subtractOp, lhs, rhs);
+ static SubtractOp const op;
+ return Apply(op, lhs, rhs);
+}
+
+// static
+unique_ptr<CompressedBitVector> CompressedBitVector::Union(CompressedBitVector const & lhs,
+ CompressedBitVector const & rhs)
+{
+ static UnionOp const op;
+ return Apply(op, lhs, rhs);
+}
+
+// static
+bool CompressedBitVector::IsEmpty(unique_ptr<CompressedBitVector> const & cbv)
+{
+ return !cbv || cbv->PopCount() == 0;
+}
+
+// static
+bool CompressedBitVector::IsEmpty(CompressedBitVector const * cbv)
+{
+ return !cbv || cbv->PopCount() == 0;
}
} // namespace coding
diff --git a/coding/compressed_bit_vector.hpp b/coding/compressed_bit_vector.hpp
index 837e35015f..23d7b1c04c 100644
--- a/coding/compressed_bit_vector.hpp
+++ b/coding/compressed_bit_vector.hpp
@@ -43,6 +43,14 @@ public:
static unique_ptr<CompressedBitVector> Subtract(CompressedBitVector const & lhs,
CompressedBitVector const & rhs);
+ // Unites two bit vectors.
+ static unique_ptr<CompressedBitVector> Union(CompressedBitVector const & lhs,
+ CompressedBitVector const & rhs);
+
+ static bool IsEmpty(unique_ptr<CompressedBitVector> const & cbv);
+
+ static bool IsEmpty(CompressedBitVector const * cbv);
+
// Returns the number of set bits (population count).
virtual uint64_t PopCount() const = 0;