#pragma once #include "coding/huffman.hpp" #include "coding/varint.hpp" #include "base/bwt.hpp" #include "base/move_to_front.hpp" #include #include #include #include namespace coding { class BWTCoder { public: struct Params { size_t m_blockSize = 32000; }; template static void EncodeAndWriteBlock(Sink & sink, size_t n, uint8_t const * s, std::vector & bwtBuffer) { bwtBuffer.resize(n); auto const start = base::BWT(n, s, bwtBuffer.data()); base::MoveToFront mtf; for (auto & b : bwtBuffer) b = mtf.Transform(b); WriteVarUint(sink, start); HuffmanCoder huffman; huffman.Init(bwtBuffer.begin(), bwtBuffer.end()); huffman.WriteEncoding(sink); huffman.EncodeAndWrite(sink, bwtBuffer.begin(), bwtBuffer.end()); } template static void EncodeAndWriteBlock(Sink & sink, size_t n, uint8_t const * s) { std::vector bwtBuffer; EncodeAndWriteBlock(sink, n, s, bwtBuffer); } template static void EncodeAndWriteBlock(Sink & sink, string const & s) { EncodeAndWriteBlock(sink, s.size(), reinterpret_cast(s.data())); } template static void EncodeAndWrite(Params const & params, Sink & sink, size_t n, uint8_t const * s) { CHECK(params.m_blockSize != 0, ()); CHECK_GREATER(n + params.m_blockSize, n, ()); std::vector bwtBuffer; size_t const numBlocks = (n + params.m_blockSize - 1) / params.m_blockSize; WriteVarUint(sink, numBlocks); for (size_t i = 0; i < n; i += params.m_blockSize) { auto const m = std::min(n - i, params.m_blockSize); EncodeAndWriteBlock(sink, m, s + i, bwtBuffer); } } template static OutIt ReadAndDecodeBlock(Source & source, std::vector & bwtBuffer, std::vector & revBuffer, OutIt it) { auto const start = ReadVarUint(source); HuffmanCoder huffman; huffman.ReadEncoding(source); bwtBuffer.clear(); huffman.ReadAndDecode(source, std::back_inserter(bwtBuffer)); size_t const n = bwtBuffer.size(); base::MoveToFront mtf; for (size_t i = 0; i < n; ++i) { auto const b = mtf[bwtBuffer[i]]; bwtBuffer[i] = b; mtf.Transform(b); } if (n != 0) CHECK_LESS(start, n, ()); revBuffer.resize(n); base::RevBWT(n, static_cast(start), bwtBuffer.data(), revBuffer.data()); return std::copy(revBuffer.begin(), revBuffer.end(), it); } template static OutIt ReadAndDecodeBlock(Source & source, OutIt it) { std::vector bwtBuffer; std::vector revBuffer; return ReadAndDecodeBlock(source, bwtBuffer, revBuffer, it); } template static OutIt ReadAndDecode(Source & source, OutIt it) { auto const numBlocks = ReadVarUint(source); CHECK_LESS(numBlocks, std::numeric_limits::max(), ()); std::vector bwtBuffer; std::vector revBuffer; for (size_t i = 0; i < static_cast(numBlocks); ++i) it = ReadAndDecodeBlock(source, bwtBuffer, revBuffer, it); return it; } }; } // namespace coding