Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'coding/huffman.hpp')
-rw-r--r--coding/huffman.hpp151
1 files changed, 79 insertions, 72 deletions
diff --git a/coding/huffman.hpp b/coding/huffman.hpp
index a59b24a195..5beaef4033 100644
--- a/coding/huffman.hpp
+++ b/coding/huffman.hpp
@@ -7,6 +7,7 @@
#include "base/string_utils.hpp"
#include "std/algorithm.hpp"
+#include "std/iterator.hpp"
#include "std/map.hpp"
#include "std/queue.hpp"
#include "std/vector.hpp"
@@ -16,6 +17,43 @@ namespace coding
class HuffmanCoder
{
public:
+ class Freqs
+ {
+ public:
+ using Table = map<uint32_t, uint32_t>;
+
+ Freqs() = default;
+
+ template <typename... Args>
+ Freqs(Args const &... args)
+ {
+ Add(args...);
+ }
+
+ void Add(strings::UniString const & s) { Add(s.begin(), s.end()); }
+
+ void Add(string const & s) { Add(s.begin(), s.end()); }
+
+ template <typename It>
+ void Add(It begin, It end)
+ {
+ for (; begin != end; ++begin)
+ ++m_table[static_cast<uint32_t>(*begin)];
+ }
+
+ template <typename T>
+ void Add(vector<T> const & v)
+ {
+ for (auto const & e : v)
+ Add(e);
+ }
+
+ Table const & GetTable() const { return m_table; }
+
+ private:
+ Table m_table;
+ };
+
// A Code encodes a path to a leaf. It is read starting from
// the least significant bit.
struct Code
@@ -39,7 +77,15 @@ public:
// Internally builds a Huffman tree and makes
// the EncodeAndWrite and ReadAndDecode methods available.
- void Init(vector<strings::UniString> const & data);
+ template <typename... Args>
+ void Init(Args const &... args)
+ {
+ Clear();
+ BuildHuffmanTree(Freqs(args...));
+ BuildTables(m_root, 0);
+ }
+
+ void Clear();
// One way to store the encoding would be
// -- the succinct representation of the topology of Huffman tree;
@@ -103,32 +149,51 @@ public:
bool Encode(uint32_t symbol, Code & code) const;
bool Decode(Code const & code, uint32_t & symbol) const;
+ template <typename TWriter, typename It>
+ uint32_t EncodeAndWrite(TWriter & writer, It begin, It end) const
+ {
+ size_t const d = distance(begin, end);
+ BitWriter<TWriter> bitWriter(writer);
+ WriteVarUint(writer, d);
+ uint32_t sz = 0;
+ for (; begin != end; ++begin)
+ sz += EncodeAndWrite(bitWriter, static_cast<uint32_t>(*begin));
+ return sz;
+ }
+
+ template <typename TWriter>
+ uint32_t EncodeAndWrite(TWriter & writer, string const & s) const
+ {
+ return EncodeAndWrite(writer, s.begin(), s.end());
+ }
+
// Returns the number of bits written AFTER the size, i.e. the number
// of bits that the encoded string consists of.
template <typename TWriter>
uint32_t EncodeAndWrite(TWriter & writer, strings::UniString const & s) const
{
- BitWriter<TWriter> bitWriter(writer);
- WriteVarUint(writer, s.size());
- uint32_t sz = 0;
- for (size_t i = 0; i < s.size(); ++i)
- sz += EncodeAndWrite(bitWriter, static_cast<uint32_t>(s[i]));
- return sz;
+ return EncodeAndWrite(writer, s.begin(), s.end());
}
- template <typename TSource>
- strings::UniString ReadAndDecode(TSource & src) const
+ template <typename TSource, typename OutIt>
+ void ReadAndDecode(TSource & src, OutIt out) const
{
BitReader<TSource> bitReader(src);
size_t sz = static_cast<size_t>(ReadVarUint<uint32_t, TSource>(src));
vector<strings::UniChar> v(sz);
for (size_t i = 0; i < sz; ++i)
- v[i] = static_cast<strings::UniChar>(ReadAndDecode(bitReader));
- return strings::UniString(v.begin(), v.end());
+ *out++ = ReadAndDecode(bitReader);
}
-private:
+ template <typename TSource>
+ strings::UniString ReadAndDecode(TSource & src) const
+ {
+ strings::UniString result;
+ ReadAndDecode(src, std::back_inserter(result));
+ return result;
+ }
+private:
struct Node
{
Node *l, *r;
@@ -195,72 +260,14 @@ private:
void DeleteHuffmanTree(Node * root);
- // Builds a fixed Huffman tree for a collection of strings::UniStrings.
- // UniString is always UTF-32. Every code point is treated as a symbol for the encoder.
- template <typename TIter>
- void BuildHuffmanTree(TIter const & beg, TIter const & end)
- {
- if (beg == end)
- {
- m_root = nullptr;
- return;
- }
-
- // One would need more than 2^32 symbols to build a code that long.
- // On the other hand, 32 is short enough for our purposes, so do not
- // try to shrink the trees beyond this threshold.
- uint32_t const kMaxDepth = 32;
-
- map<uint32_t, uint32_t> freqs;
- for (auto it = beg; it != end; ++it)
- {
- auto const & e = *it;
- for (size_t i = 0; i < e.size(); ++i)
- {
- ++freqs[static_cast<uint32_t>(e[i])];
- }
- }
-
- priority_queue<Node *, vector<Node *>, NodeComparator> pq;
- for (auto const & e : freqs)
- pq.push(new Node(e.first, e.second, true /* isLeaf */));
-
- if (pq.empty())
- {
- m_root = nullptr;
- return;
- }
-
- while (pq.size() > 1)
- {
- auto a = pq.top();
- pq.pop();
- auto b = pq.top();
- pq.pop();
- if (a->symbol > b->symbol)
- swap(a, b);
- // Give it the smaller symbol to make the resulting encoding more predictable.
- auto ab = new Node(a->symbol, a->freq + b->freq, false /* isLeaf */);
- ab->l = a;
- ab->r = b;
- CHECK_LESS_OR_EQUAL(a->depth, kMaxDepth, ());
- CHECK_LESS_OR_EQUAL(b->depth, kMaxDepth, ());
- pq.push(ab);
- }
-
- m_root = pq.top();
- pq.pop();
-
- SetDepths(m_root, 0);
- }
+ void BuildHuffmanTree(Freqs const & freqs);
// Properly sets the depth field in the subtree rooted at root.
// It is easier to do it after the tree is built.
void SetDepths(Node * root, uint32_t depth);
- Node * m_root; // m_pRoot?
+ Node * m_root;
map<Code, uint32_t> m_decoderTable;
map<uint32_t, Code> m_encoderTable;
};
-
} // namespace coding