Welcome to mirror list, hosted at ThFree Co, Russian Federation.

slof_dictionary.cpp « words - github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 16798ae5c2514853260d86900e5a151f9439c4fd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include "slof_dictionary.hpp"
#include "../coding/bzip2_compressor.hpp"
#include "../coding/byte_stream.hpp"
#include "../coding/endianness.hpp"
#include "../coding/reader.hpp"
#include "../coding/varint.hpp"
#include "../base/logging.hpp"
#include "../base/base.hpp"
#include "../std/utility.hpp"

namespace
{
  char const * SkipVarUint(char const * p)
  {
    while ((*p) & 128)
      ++p;
    return ++p;
  }
}

sl::SlofDictionary::SlofDictionary(Reader const * pReader)
      : m_pReader(pReader), m_Decompressor(&DecompressBZip2IntoFixedSize)
{
  Init();
}

sl::SlofDictionary::SlofDictionary(
    Reader const * pReader, function<void (const char *, size_t, char *, size_t)> decompressor)
      : m_pReader(pReader), m_Decompressor(decompressor)
{
  Init();
}

void sl::SlofDictionary::Init()
{
  m_pReader->Read(0, &m_Header, sizeof(m_Header));
  if (m_Header.m_MajorVersion != 1)
    MYTHROW(OpenDictionaryNewerVersionException, (m_Header.m_MajorVersion));
}

sl::SlofDictionary::~SlofDictionary()
{
}

sl::Dictionary::Id sl::SlofDictionary::KeyCount() const
{
  return m_Header.m_KeyCount;
}

void sl::SlofDictionary::ReadKeyData(sl::Dictionary::Id id, string & data) const
{
  pair<uint32_t, uint32_t> offsets;
  m_pReader->Read(m_Header.m_KeyIndexOffset + id * 4, &offsets, 8);
  offsets.first = SwapIfBigEndian(offsets.first);
  offsets.second = SwapIfBigEndian(offsets.second);
  // Add 2 trailing zeroes, to be sure that reading varint doesn't hang if file is corrupted.
  data.resize(offsets.second - offsets.first + 2);
  m_pReader->Read(m_Header.m_KeyDataOffset + offsets.first,
                  &data[0], offsets.second - offsets.first);
}

void sl::SlofDictionary::KeyById(sl::Dictionary::Id id, string & key) const
{
  string keyData;
  ReadKeyData(id, keyData);
  char const * pBeg = SkipVarUint(SkipVarUint(&keyData[0]));
  char const * pLast = &keyData[keyData.size() - 1];
  // ReadKeyData adds trailing zeroes, so that reading VarUint doesn't hang up in case of
  // corrupted data. Strip them.
  while (pLast >= pBeg && *pLast == 0)
    --pLast;
  key.assign(pBeg, pLast + 1);
}

void sl::SlofDictionary::ArticleById(sl::Dictionary::Id id, string & article) const
{
  string keyData;
  ReadKeyData(id, keyData);
  ArrayByteSource keyDataSource(&keyData[0]);
  uint64_t const articleChunkPos = ReadVarUint<uint64_t>(keyDataSource);
  uint32_t const articleNumInChunk = ReadVarUint<uint32_t>(keyDataSource);

  uint32_t const chunkSize =
      ReadPrimitiveFromPos<uint32_t>(*m_pReader, m_Header.m_ArticleOffset + articleChunkPos);
  string chunk(chunkSize, 0);
  m_pReader->Read(m_Header.m_ArticleOffset + articleChunkPos + 4, &chunk[0], chunkSize);
  ArrayByteSource chunkSource(&chunk[0]);
  uint32_t chunkHeaderSize = ReadVarUint<uint32_t>(chunkSource);
  chunkHeaderSize += (chunkSource.PtrC() - &chunk[0]);
  uint32_t const decompressedChunkSize = ReadVarUint<uint32_t>(chunkSource);
  uint32_t articleBegInChunk = 0;
  for (uint32_t i = 0; i < articleNumInChunk; ++i)
    articleBegInChunk += ReadVarUint<uint32_t>(chunkSource);
  uint32_t const articleSizeInChunk = ReadVarUint<uint32_t>(chunkSource);
  vector<char> decompressedChunk(decompressedChunkSize);
  m_Decompressor(&chunk[chunkHeaderSize], chunkSize - chunkHeaderSize,
                 &decompressedChunk[0], decompressedChunkSize);
  article.assign(&decompressedChunk[articleBegInChunk], articleSizeInChunk);
}