diff options
author | Maxim Pimenov <m@maps.me> | 2015-10-21 14:48:15 +0300 |
---|---|---|
committer | Sergey Yershov <yershov@corp.mail.ru> | 2016-03-23 16:02:37 +0300 |
commit | cea61ad8077c64501eec492e864f2b2ee681681f (patch) | |
tree | 9900d18ae5c6f32048f30a441357f13f59a30300 /coding | |
parent | 4e33a1f23cbfa9b5822feb7aded52d73fa808dcc (diff) |
Refactored the serialization code.
Diffstat (limited to 'coding')
-rw-r--r-- | coding/coding_tests/coding_tests.pro | 2 | ||||
-rw-r--r-- | coding/coding_tests/trie_test.cpp | 64 | ||||
-rw-r--r-- | coding/compressed_bit_vector.cpp | 22 | ||||
-rw-r--r-- | coding/compressed_bit_vector.hpp | 2 | ||||
-rw-r--r-- | coding/trie_builder.hpp | 38 | ||||
-rw-r--r-- | coding/trie_reader.hpp | 47 |
6 files changed, 94 insertions, 81 deletions
diff --git a/coding/coding_tests/coding_tests.pro b/coding/coding_tests/coding_tests.pro index 3241f9eea5..f782173851 100644 --- a/coding/coding_tests/coding_tests.pro +++ b/coding/coding_tests/coding_tests.pro @@ -6,7 +6,7 @@ TEMPLATE = app ROOT_DIR = ../.. -DEPENDENCIES = coding base indexer minizip tomcrypt succinct +DEPENDENCIES = coding base minizip tomcrypt succinct include($$ROOT_DIR/common.pri) diff --git a/coding/coding_tests/trie_test.cpp b/coding/coding_tests/trie_test.cpp index ec86f3c637..83ccbd3528 100644 --- a/coding/coding_tests/trie_test.cpp +++ b/coding/coding_tests/trie_test.cpp @@ -6,9 +6,6 @@ #include "coding/trie_reader.hpp" #include "coding/write_to_sink.hpp" -#include "indexer/coding_params.hpp" -#include "indexer/string_file_values.hpp" - #include "base/logging.hpp" #include "std/algorithm.hpp" @@ -109,21 +106,44 @@ struct MaxValueCalc } }; -class CharValueList +// The ValueList and SingleValueSerializer classes are similar to +// those in indexer/string_file_values.hpp but that file +// is not included to avoid coding_tests's dependency from indexer. +class SingleValueSerializerChar +{ +public: + template <typename TWriter> + void Serialize(TWriter & writer, char & v) const + { + WriteToSink(writer, v); + } +}; + +class SingleValueSerializerUint32 +{ +public: + template <typename TWriter> + void Serialize(TWriter & writer, uint32_t & v) const + { + WriteToSink(writer, v); + } +}; + +class ValueListChar { public: using TValue = char; void Init(vector<TValue> const &) {} - CharValueList(const string & s) : m_string(s) {} + ValueListChar(const string & s) : m_string(s) {} size_t Size() const { return m_string.size(); } bool IsEmpty() const { return m_string.empty(); } - template <typename TSink> - void Serialize(TSink & sink) const + template <typename TSink, typename TSerializer> + void Serialize(TSink & sink, TSerializer const & /* serializer */) const { sink.Write(m_string.data(), m_string.size()); } @@ -132,16 +152,13 @@ private: string m_string; }; -} // namespace - -template <> -class ValueList<uint32_t> +class ValueListUint32 { public: using TValue = uint32_t; + using TSerializer = SingleValueSerializerUint32; - ValueList() = default; - ValueList(serial::CodingParams const & codingParams) : m_codingParams(codingParams) {} + ValueListUint32() = default; void Init(vector<TValue> const & values) { m_values = values; } @@ -150,14 +167,14 @@ public: bool IsEmpty() const { return m_values.empty(); } template <typename TSink> - void Serialize(TSink & sink) const + void Serialize(TSink & sink, TSerializer const & /* serializer */) const { for (auto const & value : m_values) WriteToSink(sink, value); } template <typename TSource> - void Deserialize(TSource & src, uint32_t valueCount) + void Deserialize(TSource & src, uint32_t valueCount, TSerializer const & /* serializer */) { m_values.resize(valueCount); for (size_t i = 0; i < valueCount; ++i) @@ -165,7 +182,7 @@ public: } template <typename TSource> - void Deserialize(TSource & src) + void Deserialize(TSource & src, TSerializer const & /* serializer */) { m_values.clear(); while (src.Size() > 0) @@ -179,12 +196,10 @@ public: f(value); } - void SetCodingParams(serial::CodingParams const & codingParams) { m_codingParams = codingParams; } - private: vector<TValue> m_values; - serial::CodingParams m_codingParams; }; +} // namespace #define ZENC bits::ZigZagEncode #define MKSC(x) static_cast<signed char>(x) @@ -200,8 +215,9 @@ UNIT_TEST(TrieBuilder_WriteNode_Smoke) "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"), ChildNodeInfo(true, 5, "a")}; - CharValueList valueList("123"); - trie::WriteNode(sink, 0, valueList, &children[0], &children[0] + ARRAY_SIZE(children)); + ValueListChar valueList("123"); + trie::WriteNode(sink, SingleValueSerializerChar(), 0, valueList, &children[0], + &children[0] + ARRAY_SIZE(children)); uint8_t const expected [] = { BOOST_BINARY(11000101), // Header: [0b11] [0b000101] @@ -266,13 +282,13 @@ UNIT_TEST(TrieBuilder_Build) vector<uint8_t> buf; PushBackByteSink<vector<uint8_t>> sink(buf); + SingleValueSerializerUint32 serializer; trie::Build<PushBackByteSink<vector<uint8_t>>, typename vector<KeyValuePair>::iterator, - ValueList<uint32_t>>(sink, v.begin(), v.end()); + ValueListUint32>(sink, serializer, v.begin(), v.end()); reverse(buf.begin(), buf.end()); MemReader memReader = MemReader(&buf[0], buf.size()); - auto const root = - trie::ReadTrie<MemReader, ValueList<uint32_t>>(memReader, serial::CodingParams()); + auto const root = trie::ReadTrie<MemReader, ValueListUint32>(memReader, serializer); vector<KeyValuePair> res; KeyValuePairBackInserter f; trie::ForEachRefWithValues(*root, f, vector<trie::TrieChar>()); diff --git a/coding/compressed_bit_vector.cpp b/coding/compressed_bit_vector.cpp index da80b04d42..9f81e7a3a6 100644 --- a/coding/compressed_bit_vector.cpp +++ b/coding/compressed_bit_vector.cpp @@ -319,17 +319,17 @@ unique_ptr<CompressedBitVector> CompressedBitVectorBuilder::FromCBV(CompressedBi auto strat = cbv.GetStorageStrategy(); switch (strat) { - case CompressedBitVector::StorageStrategy::Dense: - { - DenseCBV const & dense = static_cast<DenseCBV const &>(cbv); - auto bitGroups = dense.m_bitGroups; - return CompressedBitVectorBuilder::FromBitGroups(move(bitGroups)); - } - case CompressedBitVector::StorageStrategy::Sparse: - { - SparseCBV const & sparse = static_cast<SparseCBV const &>(cbv); - return CompressedBitVectorBuilder::FromBitPositions(sparse.m_positions); - } + case CompressedBitVector::StorageStrategy::Dense: + { + DenseCBV const & dense = static_cast<DenseCBV const &>(cbv); + auto bitGroups = dense.m_bitGroups; + return CompressedBitVectorBuilder::FromBitGroups(move(bitGroups)); + } + case CompressedBitVector::StorageStrategy::Sparse: + { + SparseCBV const & sparse = static_cast<SparseCBV const &>(cbv); + return CompressedBitVectorBuilder::FromBitPositions(sparse.m_positions); + } } return unique_ptr<CompressedBitVector>(); } diff --git a/coding/compressed_bit_vector.hpp b/coding/compressed_bit_vector.hpp index 82f67d1ad5..93329a1423 100644 --- a/coding/compressed_bit_vector.hpp +++ b/coding/compressed_bit_vector.hpp @@ -68,6 +68,7 @@ string DebugPrint(CompressedBitVector::StorageStrategy strat); class DenseCBV : public CompressedBitVector { public: + friend class CompressedBitVectorBuilder; static uint64_t const kBlockSize = 64; DenseCBV() = default; @@ -111,6 +112,7 @@ private: class SparseCBV : public CompressedBitVector { public: + friend class CompressedBitVectorBuilder; using TIterator = vector<uint64_t>::const_iterator; SparseCBV(vector<uint64_t> const & setBits); diff --git a/coding/trie_builder.hpp b/coding/trie_builder.hpp index 7231548bbe..f640b44ccc 100644 --- a/coding/trie_builder.hpp +++ b/coding/trie_builder.hpp @@ -40,16 +40,16 @@ namespace trie { -template <typename TSink, typename TChildIter, typename TValueList> -void WriteNode(TSink & sink, TrieChar baseChar, TValueList const & valueList, - TChildIter const begChild, TChildIter const endChild, bool isRoot = false) +template <typename TSink, typename TChildIter, typename TValueList, typename TSerializer> +void WriteNode(TSink & sink, TSerializer const & serializer, TrieChar baseChar, + TValueList const & valueList, TChildIter const begChild, TChildIter const endChild, + bool isRoot = false) { uint32_t const valueCount = valueList.Size(); if (begChild == endChild && !isRoot) { // Leaf node. - WriteVarUint(sink, valueCount); - valueList.Serialize(sink); + valueList.Serialize(sink, serializer); return; } uint32_t const childCount = endChild - begChild; @@ -59,7 +59,7 @@ void WriteNode(TSink & sink, TrieChar baseChar, TValueList const & valueList, WriteVarUint(sink, valueCount); if (childCount >= 63) WriteVarUint(sink, childCount); - valueList.Serialize(sink); + valueList.Serialize(sink, serializer); for (TChildIter it = begChild; it != endChild; /*++it*/) { uint8_t header = (it->IsLeaf() ? 128 : 0); @@ -156,22 +156,22 @@ struct NodeInfo } }; -template <typename TSink, typename TValueList> -void WriteNodeReverse(TSink & sink, TrieChar baseChar, NodeInfo<TValueList> & node, - bool isRoot = false) +template <typename TSink, typename TValueList, typename TSerializer> +void WriteNodeReverse(TSink & sink, TSerializer const & serializer, TrieChar baseChar, + NodeInfo<TValueList> & node, bool isRoot = false) { using TOutStorage = buffer_vector<uint8_t, 64>; TOutStorage out; PushBackByteSink<TOutStorage> outSink(out); node.FinalizeValueList(); - WriteNode(outSink, baseChar, node.m_valueList, node.m_children.rbegin(), node.m_children.rend(), - isRoot); + WriteNode(outSink, serializer, baseChar, node.m_valueList, node.m_children.rbegin(), + node.m_children.rend(), isRoot); reverse(out.begin(), out.end()); sink.Write(out.data(), out.size()); } -template <typename TSink, class TNodes> -void PopNodes(TSink & sink, TNodes & nodes, int nodesToPop) +template <typename TSink, typename TNodes, typename TSerializer> +void PopNodes(TSink & sink, TSerializer const & serializer, TNodes & nodes, int nodesToPop) { using TNodeInfo = typename TNodes::value_type; ASSERT_GREATER(nodes.size(), nodesToPop, ()); @@ -190,7 +190,7 @@ void PopNodes(TSink & sink, TNodes & nodes, int nodesToPop) } else { - WriteNodeReverse(sink, node.m_char, node); + WriteNodeReverse(sink, serializer, node.m_char, node); prevNode.m_children.emplace_back( node.m_children.empty(), static_cast<uint32_t>(sink.Pos() - node.m_begPos), node.m_char); } @@ -214,8 +214,8 @@ void AppendValue(TNodeInfo & node, TValue const & value) node.m_temporaryValueList.push_back(value); } -template <typename TSink, typename TIter, typename TValueList> -void Build(TSink & sink, TIter const beg, TIter const end) +template <typename TSink, typename TIter, typename TValueList, typename TSerializer> +void Build(TSink & sink, TSerializer const & serializer, TIter const beg, TIter const end) { using TTrieString = buffer_vector<TrieChar, 32>; using TNodeInfo = NodeInfo<TValueList>; @@ -241,7 +241,7 @@ void Build(TSink & sink, TIter const beg, TIter const end) while (nCommon < min(key.size(), prevKey.size()) && prevKey[nCommon] == key[nCommon]) ++nCommon; - PopNodes(sink, nodes, nodes.size() - nCommon - 1); // Root is also a common node. + PopNodes(sink, serializer, nodes, nodes.size() - nCommon - 1); // Root is also a common node. uint64_t const pos = sink.Pos(); for (size_t i = nCommon; i < key.size(); ++i) @@ -253,10 +253,10 @@ void Build(TSink & sink, TIter const beg, TIter const end) } // Pop all the nodes from the stack. - PopNodes(sink, nodes, nodes.size() - 1); + PopNodes(sink, serializer, nodes, nodes.size() - 1); // Write the root. - WriteNodeReverse(sink, DEFAULT_CHAR /* baseChar */, nodes.back(), true /* isRoot */); + WriteNodeReverse(sink, serializer, DEFAULT_CHAR /* baseChar */, nodes.back(), true /* isRoot */); } } // namespace trie diff --git a/coding/trie_reader.hpp b/coding/trie_reader.hpp index 878323f841..b84aeaf7cf 100644 --- a/coding/trie_reader.hpp +++ b/coding/trie_reader.hpp @@ -3,36 +3,32 @@ #include "coding/reader.hpp" #include "coding/varint.hpp" -#include "indexer/coding_params.hpp" -#include "indexer/string_file_values.hpp" - #include "base/assert.hpp" #include "base/bits.hpp" #include "base/macros.hpp" namespace trie { -template <class TValueList> +template <class TValueList, typename TSerializer> class LeafIterator0 : public Iterator<TValueList> { public: + using TValue = typename TValueList::TValue; using Iterator<TValueList>::m_valueList; template <class TReader> - LeafIterator0(TReader const & reader, serial::CodingParams const & codingParams) + LeafIterator0(TReader const & reader, TSerializer const & serializer) { ReaderSource<TReader> src(reader); - uint32_t valueCount = ReadVarUint<uint32_t>(src); - m_valueList.SetCodingParams(codingParams); - m_valueList.Deserialize(src, valueCount); - // todo(@mpimenov) There used to be an assert here - // that src is completely exhausted by this time. + if (src.Size() > 0) + m_valueList.Deserialize(src, 1 /* valueCount */, serializer); + ASSERT_EQUAL(src.Size(), 0, ()); } // trie::Iterator overrides: unique_ptr<Iterator<TValueList>> Clone() const override { - return make_unique<LeafIterator0<TValueList>>(*this); + return make_unique<LeafIterator0<TValueList, TSerializer>>(*this); } unique_ptr<Iterator<TValueList>> GoToEdge(size_t i) const override @@ -43,24 +39,24 @@ public: } }; -template <class TReader, class TValueList> +template <typename TReader, typename TValueList, typename TSerializer> class Iterator0 : public Iterator<TValueList> { public: + using TValue = typename TValueList::TValue; using Iterator<TValueList>::m_valueList; using Iterator<TValueList>::m_edge; - Iterator0(TReader const & reader, TrieChar baseChar, serial::CodingParams const & codingParams) - : m_reader(reader), m_codingParams(codingParams) + Iterator0(TReader const & reader, TrieChar baseChar, TSerializer const & serializer) + : m_reader(reader), m_serializer(serializer) { - m_valueList.SetCodingParams(m_codingParams); ParseNode(baseChar); } // trie::Iterator overrides: unique_ptr<Iterator<TValueList>> Clone() const override { - return make_unique<Iterator0<TReader, TValueList>>(*this); + return make_unique<Iterator0<TReader, TValueList, TSerializer>>(*this); } unique_ptr<Iterator<TValueList>> GoToEdge(size_t i) const override @@ -71,12 +67,12 @@ public: if (m_edgeInfo[i].m_isLeaf) { - return make_unique<LeafIterator0<TValueList>>(m_reader.SubReader(offset, size), - m_codingParams); + return make_unique<LeafIterator0<TValueList, TSerializer>>(m_reader.SubReader(offset, size), + m_serializer); } - return make_unique<Iterator0<TReader, TValueList>>( - m_reader.SubReader(offset, size), this->m_edge[i].m_str.back(), m_codingParams); + return make_unique<Iterator0<TReader, TValueList, TSerializer>>( + m_reader.SubReader(offset, size), this->m_edge[i].m_str.back(), m_serializer); } private: @@ -98,7 +94,7 @@ private: childCount = ReadVarUint<uint32_t>(src); // [valueList] - m_valueList.Deserialize(src, valueCount); + m_valueList.Deserialize(src, valueCount, m_serializer); // [childInfo] ... [childInfo] this->m_edge.resize(childCount); @@ -150,15 +146,14 @@ private: buffer_vector<EdgeInfo, 9> m_edgeInfo; TReader m_reader; - serial::CodingParams m_codingParams; + TSerializer m_serializer; }; // Returns iterator to the root of the trie. -template <class TReader, class TValueList> -unique_ptr<Iterator<TValueList>> ReadTrie(TReader const & reader, - serial::CodingParams const & codingParams) +template <class TReader, class TValueList, class TSerializer> +unique_ptr<Iterator<TValueList>> ReadTrie(TReader const & reader, TSerializer const & serializer) { - return make_unique<Iterator0<TReader, TValueList>>(reader, DEFAULT_CHAR, codingParams); + return make_unique<Iterator0<TReader, TValueList, TSerializer>>(reader, DEFAULT_CHAR, serializer); } } // namespace trie |