Welcome to mirror list, hosted at ThFree Co, Russian Federation.

blob_indexer.cpp « coding - github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: a0b20a9cad2dece18b7bb9f134db1787d8f96c17 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#include "blob_indexer.hpp"
#include "../coding/byte_stream.hpp"
#include "../coding/endianness.hpp"
#include "../coding/varint.hpp"
#include "../coding/writer.hpp"
#include "../coding/write_to_sink.hpp"
#include "../base/assert.hpp"
#include "../base/base.hpp"
#include "../base/logging.hpp"
#include "../std/algorithm.hpp"
#include "../std/set.hpp"
#include "../std/string.hpp"

BlobIndexer::BlobIndexer(Writer & writer,
                         size_t maxUncompressedChunkSize,
                         CompressorType const & compressor) :
  m_writer(writer),
  m_maxUncompressedChunkSize(min(int(maxUncompressedChunkSize), (1 << BITS_IN_CHUNK_SIZE) - 1)),
  m_compressor(compressor),
  m_totalBlobSizeUncompressed(0),
  m_maxBlobSize(0),
  m_largeBlobCount(0)
{
  ASSERT_LESS(maxUncompressedChunkSize, (1 << BITS_IN_CHUNK_SIZE), ());
  CHECK_EQUAL(m_writer.Pos(), 0, ("Writer should not have something written already"));

  // Write header.
  char const header[] = "Blb";
  m_writer.Write(header, 3);
  WriteToSink(m_writer, static_cast<uint8_t>(BITS_IN_CHUNK_SIZE));
}

uint64_t BlobIndexer::AddBlob(string const & blob)
{
  if (blob.size() > m_maxUncompressedChunkSize)
  {
    LOG(LINFO, ("Blob bigger than chunk:", m_blobChunkAndOffset.size(), blob.size(),
                blob.substr(0, 64)));
    ++m_largeBlobCount;
  }

  if (m_currentChunk.size() + blob.size() > m_maxUncompressedChunkSize)
    FlushChunk();

  m_blobChunkAndOffset.push_back(
        (m_chunkOffset.size() << BITS_IN_CHUNK_SIZE) + m_currentChunk.size());

  m_currentChunk.insert(m_currentChunk.end(), blob.begin(), blob.end());

  return m_blobChunkAndOffset.size() - 1;
}

void BlobIndexer::FlushChunk()
{
  if (!m_currentChunk.empty())
  {
    string compressedChunk;
    m_compressor(&m_currentChunk[0], m_currentChunk.size(), compressedChunk);
    m_writer.Write(compressedChunk.data(), compressedChunk.size());
    WriteToSink(m_writer, static_cast<uint32_t>(m_currentChunk.size()));
    uint32_t const chunkPrevOffset = (m_chunkOffset.empty() ? 0 : m_chunkOffset.back());
    m_chunkOffset.push_back(compressedChunk.size() + 4 + chunkPrevOffset);
    m_currentChunk.clear();
  }
}

BlobIndexer::~BlobIndexer()
{
  FlushChunk();

  for (size_t i = 0; i < m_chunkOffset.size(); ++i)
    WriteToSink(m_writer, m_chunkOffset[i]);
  for (size_t i = 0; i < m_blobChunkAndOffset.size(); ++i)
    WriteToSink(m_writer, m_blobChunkAndOffset[i]);
  WriteToSink(m_writer, static_cast<uint32_t>(m_blobChunkAndOffset.size()));
}