Welcome to mirror list, hosted at ThFree Co, Russian Federation.

bwt_coder.hpp « coding - github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: ca51e526b9c667987cdf01dc9cdd60c19e081653 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#pragma once

#include "coding/huffman.hpp"
#include "coding/varint.hpp"

#include "base/bwt.hpp"
#include "base/move_to_front.hpp"

#include <algorithm>
#include <cstdint>
#include <iterator>
#include <vector>

namespace coding
{
class BWTCoder
{
public:
  struct Params
  {
    size_t m_blockSize = 32000;
  };

  template <typename Sink>
  static void EncodeAndWriteBlock(Sink & sink, size_t n, uint8_t const * s,
                                  std::vector<uint8_t> & bwtBuffer)
  {
    bwtBuffer.resize(n);
    auto const start = base::BWT(n, s, bwtBuffer.data());

    base::MoveToFront mtf;
    for (auto & b : bwtBuffer)
      b = mtf.Transform(b);

    WriteVarUint(sink, start);

    HuffmanCoder huffman;
    huffman.Init(bwtBuffer.begin(), bwtBuffer.end());
    huffman.WriteEncoding(sink);
    huffman.EncodeAndWrite(sink, bwtBuffer.begin(), bwtBuffer.end());
  }

  template <typename Sink>
  static void EncodeAndWriteBlock(Sink & sink, size_t n, uint8_t const * s)
  {
    std::vector<uint8_t> bwtBuffer;
    EncodeAndWriteBlock(sink, n, s, bwtBuffer);
  }

  template <typename Sink>
  static void EncodeAndWriteBlock(Sink & sink, string const & s)
  {
    EncodeAndWriteBlock(sink, s.size(), reinterpret_cast<uint8_t const *>(s.data()));
  }

  template <typename Sink>
  static void EncodeAndWrite(Params const & params, Sink & sink, size_t n, uint8_t const * s)
  {
    CHECK(params.m_blockSize != 0, ());
    CHECK_GREATER(n + params.m_blockSize, n, ());

    std::vector<uint8_t> bwtBuffer;

    size_t const numBlocks = (n + params.m_blockSize - 1) / params.m_blockSize;
    WriteVarUint(sink, numBlocks);
    for (size_t i = 0; i < n; i += params.m_blockSize)
    {
      auto const m = std::min(n - i, params.m_blockSize);
      EncodeAndWriteBlock(sink, m, s + i, bwtBuffer);
    }
  }

  template <typename Source, typename OutIt>
  static OutIt ReadAndDecodeBlock(Source & source, std::vector<uint8_t> & bwtBuffer,
                                  std::vector<uint8_t> & revBuffer, OutIt it)
  {
    auto const start = ReadVarUint<uint64_t, Source>(source);

    HuffmanCoder huffman;
    huffman.ReadEncoding(source);

    bwtBuffer.clear();
    huffman.ReadAndDecode(source, std::back_inserter(bwtBuffer));

    size_t const n = bwtBuffer.size();
    base::MoveToFront mtf;
    for (size_t i = 0; i < n; ++i)
    {
      auto const b = mtf[bwtBuffer[i]];
      bwtBuffer[i] = b;
      mtf.Transform(b);
    }

    if (n != 0)
      CHECK_LESS(start, n, ());

    revBuffer.resize(n);
    base::RevBWT(n, static_cast<size_t>(start), bwtBuffer.data(), revBuffer.data());
    return std::copy(revBuffer.begin(), revBuffer.end(), it);
  }

  template <typename Source, typename OutIt>
  static OutIt ReadAndDecodeBlock(Source & source, OutIt it)
  {
    std::vector<uint8_t> bwtBuffer;
    std::vector<uint8_t> revBuffer;
    return ReadAndDecodeBlock(source, bwtBuffer, revBuffer, it);
  }

  template <typename Source, typename OutIt>
  static OutIt ReadAndDecode(Source & source, OutIt it)
  {
    auto const numBlocks = ReadVarUint<uint64_t, Source>(source);
    CHECK_LESS(numBlocks, std::numeric_limits<size_t>::max(), ());

    std::vector<uint8_t> bwtBuffer;
    std::vector<uint8_t> revBuffer;

    for (size_t i = 0; i < static_cast<size_t>(numBlocks); ++i)
      it = ReadAndDecodeBlock(source, bwtBuffer, revBuffer, it);
    return it;
  }
};
}  // namespace coding