Welcome to mirror list, hosted at ThFree Co, Russian Federation.

simple_dense_coding.hpp « coding - github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: dee95e6804d5fb0deeb16296f80630ffe4db347d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#pragma once

#include "std/vector.hpp"

#include "3party/succinct/elias_fano_compressed_list.hpp"

namespace coding
{
// This class represents a variant of a so-called simple dense coding
// scheme for byte strings.  It can be used when it's necessary to
// compress strings with skewed entropy and nevertheless efficient
// access to the string's elements is needed.
//
// The main idea is to assign codewords from the set { 0, 1, 00, 01,
// 10, 11, 000, ... } to string's symbols in accordance with their
// frequencies and to create a helper bit-vector for starting
// positions of the codewords in compressed string.
//
// Memory complexity: 2 * n * (H_0(T) + 1) bits for a text T, but note
// that this is an upper bound and too pessimistic.

// Time complexity: O(log(n * H_0(T))) to access i-th element of the
// string, because of logarithmic complexity of
// rs_bit_vector::select. This will be fixed when RRR will be
// implemented.
//
// For details, see Kimmo Fredriksson, Fedor Nikitin, "Simple Random
// Access Compression", Fundamenta Informaticae 2009,
// http://www.cs.uku.fi/~fredriks/pub/papers/fi09.pdf.
class SimpleDenseCoding
{
public:
  SimpleDenseCoding() = default;

  SimpleDenseCoding(vector<uint8_t> const & data);

  SimpleDenseCoding(SimpleDenseCoding && rhs);

  uint8_t Get(uint64_t i) const;

  inline uint64_t Size() const { return m_ranks.size(); }

  // map is used here (instead of Map) for compatibility with succinct
  // structures.
  template <typename TVisitor>
  void map(TVisitor & visitor)
  {
    visitor(m_ranks, "m_ranks");
    visitor(m_symbols, "m_symbols");
  }

private:
  succinct::elias_fano_compressed_list m_ranks;
  succinct::mapper::mappable_vector<uint8_t> m_symbols;
};
}  // namespace coding