diff options
author | Maxim Pimenov <m@maps.me> | 2015-10-21 14:46:54 +0300 |
---|---|---|
committer | Sergey Yershov <yershov@corp.mail.ru> | 2016-03-23 16:02:27 +0300 |
commit | 2ef3f59f92b5fbace5c600cd968b8d0e795529e0 (patch) | |
tree | f5ba71a73de89bb0ad60d5ce25129443d95afd84 /coding | |
parent | 7e2abfba0bffbc3e96314f6c1dabe5e27f9782e8 (diff) |
Switched the search index to use compressed bit vectors.
Diffstat (limited to 'coding')
-rw-r--r-- | coding/coding_tests/trie_test.cpp | 2 | ||||
-rw-r--r-- | coding/compressed_bit_vector.cpp | 21 | ||||
-rw-r--r-- | coding/compressed_bit_vector.hpp | 6 | ||||
-rw-r--r-- | coding/trie.hpp | 21 | ||||
-rw-r--r-- | coding/trie_builder.hpp | 4 | ||||
-rw-r--r-- | coding/trie_reader.hpp | 2 |
6 files changed, 50 insertions, 6 deletions
diff --git a/coding/coding_tests/trie_test.cpp b/coding/coding_tests/trie_test.cpp index 8935d42563..9460f33d30 100644 --- a/coding/coding_tests/trie_test.cpp +++ b/coding/coding_tests/trie_test.cpp @@ -277,7 +277,7 @@ UNIT_TEST(TrieBuilder_Build) trie::ReadTrie<MemReader, ValueList<uint32_t>>(memReader, serial::CodingParams()); vector<KeyValuePair> res; KeyValuePairBackInserter f; - trie::ForEachRef(*root, f, vector<trie::TrieChar>()); + trie::ForEachRefWithValues(*root, f, vector<trie::TrieChar>()); sort(f.m_v.begin(), f.m_v.end()); TEST_EQUAL(v, f.m_v, ()); } diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp index a5fc3ebee7..d6ef51dd03 100644 --- a/coding/compressed_bit_vector.cpp +++ b/coding/compressed_bit_vector.cpp @@ -312,6 +312,27 @@ unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromBitGroups( return make_unique<SparseCBV>(setBits); } +// static +unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromCBV(CompressedBitVector const & cbv) +{ + auto strat = cbv.GetStorageStrategy(); + switch (strat) + { + case CompressedBitVector::StorageStrategy::Dense: + { + DenseCBV const & dense = static_cast<DenseCBV const &>(cbv); + auto bitGroups = dense.m_bitGroups; + return CompressedBitVectorBuilder::FromBitGroups(move(bitGroups)); + } + case CompressedBitVector::StorageStrategy::Sparse: + { + SparseCBV const & sparse = static_cast<SparseCBV const &>(cbv); + return CompressedBitVectorBuilder::FromBitPositions(sparse.m_positions); + } + } + return unique_ptr<CompressedBitVector>(); +} + string DebugPrint(CompressedBitVector::StorageStrategy strat) { switch (strat) diff --git a/coding/compressed_bit_vector.hpp b/coding/compressed_bit_vector.hpp index 95d2d86ac4..82f67d1ad5 100644 --- a/coding/compressed_bit_vector.hpp +++ b/coding/compressed_bit_vector.hpp @@ -2,8 +2,11 @@ #include "coding/reader.hpp" #include "coding/writer.hpp" +#include "base/assert.hpp" + #include "std/algorithm.hpp" #include "std/unique_ptr.hpp" +#include "std/utility.hpp" #include "std/vector.hpp" namespace coding @@ -150,6 +153,9 @@ public: // by concatenating the elements of bitGroups. static unique_ptr<CompressedBitVector> FromBitGroups(vector<uint64_t> && bitGroups); + // Copies a CBV. + static unique_ptr<CompressedBitVector> FromCBV(CompressedBitVector const & cbv); + // Reads a bit vector from reader which must contain a valid // bit vector representation (see CompressedBitVector::Serialize for the format). template <typename TReader> diff --git a/coding/trie.hpp b/coding/trie.hpp index ef80a14e11..d9433e7f3d 100644 --- a/coding/trie.hpp +++ b/coding/trie.hpp @@ -24,6 +24,8 @@ class Iterator //dbg::ObjectTracker m_tracker; public: + using TValue = typename TValueList::TValue; + struct Edge { typedef buffer_vector<TrieChar, 8> EdgeStrT; @@ -70,9 +72,9 @@ struct FixedSizeValueReader template <typename TValueList, typename TF, typename TString> void ForEachRef(Iterator<TValueList> const & iter, TF && f, TString const & s) { - iter.m_valueList.ForEach([&f, &s](typename TValueList::TValue value) + iter.m_valueList.ForEach([&f, &s](typename TValueList::TValue const & /* value */) { - f(s, value); + f(s); }); for (size_t i = 0; i < iter.m_edge.size(); ++i) { @@ -83,4 +85,19 @@ void ForEachRef(Iterator<TValueList> const & iter, TF && f, TString const & s) } } +template <typename TValueList, typename TF, typename TString> +void ForEachRefWithValues(Iterator<TValueList> const & iter, TF && f, TString const & s) +{ + iter.m_valueList.ForEach([&f, &s](typename TValueList::TValue const & value) + { + f(s, value); + }); + for (size_t i = 0; i < iter.m_edge.size(); ++i) + { + TString s1(s); + s1.insert(s1.end(), iter.m_edge[i].m_str.begin(), iter.m_edge[i].m_str.end()); + auto it = iter.GoToEdge(i); + ForEachRefWithValues(*it, f, s1); + } +} } // namespace trie diff --git a/coding/trie_builder.hpp b/coding/trie_builder.hpp index d42ebaf119..b9f22d3a14 100644 --- a/coding/trie_builder.hpp +++ b/coding/trie_builder.hpp @@ -17,13 +17,13 @@ // pre-order alphabetically reversed (parent, last child, first child). // Leaf node format: -// [value] ... [value] +// [valueList] // Internal node format: // [1: header]: [2: min(valueCount, 3)] [6: min(childCount, 63)] // [vu valueCount]: if valueCount in header == 3 // [vu childCount]: if childCount in header == 63 -// [value] ... [value] +// [valueList] // [childInfo] ... [childInfo] // Child info format: diff --git a/coding/trie_reader.hpp b/coding/trie_reader.hpp index f3978f460a..6d20ffd265 100644 --- a/coding/trie_reader.hpp +++ b/coding/trie_reader.hpp @@ -94,7 +94,7 @@ private: if (childCount == 63) childCount = ReadVarUint<uint32_t>(src); - // [value] ... [value] + // [valueList] m_valueList.Deserialize(src, valueCount); // [childInfo] ... [childInfo] |