diff options
author | Yuri Gorshenin <y@maps.me> | 2017-06-30 13:00:28 +0300 |
---|---|---|
committer | Arsentiy Milchakov <milcars@mapswithme.com> | 2017-07-04 14:09:45 +0300 |
commit | f8aeb3711100c875e718b952d8a42442030d02af (patch) | |
tree | 7b36bea7a742022b18ef43c7ee4ac3de5f0ab9a0 /coding/huffman.cpp | |
parent | 043cc3623ae4a22557b7c9738d3c7e6d5cabbb0a (diff) |
[base][coding] Burrows-Wheeler coder.
Diffstat (limited to 'coding/huffman.cpp')
-rw-r--r-- | coding/huffman.cpp | 56 |
1 files changed, 49 insertions, 7 deletions
diff --git a/coding/huffman.cpp b/coding/huffman.cpp index a8d8a1275a..83d60039a3 100644 --- a/coding/huffman.cpp +++ b/coding/huffman.cpp @@ -9,13 +9,6 @@ HuffmanCoder::~HuffmanCoder() DeleteHuffmanTree(m_root); } -void HuffmanCoder::Init(vector<strings::UniString> const & data) -{ - DeleteHuffmanTree(m_root); - BuildHuffmanTree(data.begin(), data.end()); - BuildTables(m_root, 0); -} - bool HuffmanCoder::Encode(uint32_t symbol, Code & code) const { auto it = m_encoderTable.find(symbol); @@ -49,6 +42,14 @@ void HuffmanCoder::BuildTables(Node * root, uint32_t path) BuildTables(root->r, path + (static_cast<uint32_t>(1) << root->depth)); } +void HuffmanCoder::Clear() +{ + DeleteHuffmanTree(m_root); + m_root = nullptr; + m_encoderTable.clear(); + m_decoderTable.clear(); +} + void HuffmanCoder::DeleteHuffmanTree(Node * root) { if (!root) @@ -58,6 +59,47 @@ void HuffmanCoder::DeleteHuffmanTree(Node * root) delete root; } +void HuffmanCoder::BuildHuffmanTree(Freqs const & freqs) +{ + // One would need more than 2^32 symbols to build a code that long. + // On the other hand, 32 is short enough for our purposes, so do not + // try to shrink the trees beyond this threshold. + + uint32_t const kMaxDepth = 32; + + priority_queue<Node *, vector<Node *>, NodeComparator> pq; + for (auto const & e : freqs.GetTable()) + pq.push(new Node(e.first, e.second, true /* isLeaf */)); + + if (pq.empty()) + { + m_root = nullptr; + return; + } + + while (pq.size() > 1) + { + auto a = pq.top(); + pq.pop(); + auto b = pq.top(); + pq.pop(); + if (a->symbol > b->symbol) + swap(a, b); + // Give it the smaller symbol to make the resulting encoding more predictable. + auto ab = new Node(a->symbol, a->freq + b->freq, false /* isLeaf */); + ab->l = a; + ab->r = b; + CHECK_LESS_OR_EQUAL(a->depth, kMaxDepth, ()); + CHECK_LESS_OR_EQUAL(b->depth, kMaxDepth, ()); + pq.push(ab); + } + + m_root = pq.top(); + pq.pop(); + + SetDepths(m_root, 0); +} + void HuffmanCoder::SetDepths(Node * root, uint32_t depth) { if (!root) |