Welcome to mirror list, hosted at ThFree Co, Russian Federation.

slof_indexer.hpp « publisher - github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 156761b71121ae6e28d3faa844f894b48869591f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#pragma once
#include "../base/base.hpp"
#include "../std/function.hpp"
#include "../std/set.hpp"
#include "../std/string.hpp"
#include "../std/utility.hpp"
#include "../std/vector.hpp"

class Writer;

namespace sl
{

class SlofIndexer
{
public:
  SlofIndexer(Writer & writer,
            size_t maxUncompressedArticleChunkSize,
            function<void (char const *, size_t, string &)> const & compressor);
  ~SlofIndexer();

  // Add article and return its id.
  uint64_t AddArticle(string const & article, bool forceChunkFlush = false);

  // Add key with given article id. Keys may be passed in arbitry order.
  void AddKey(string const & word, uint64_t articleId);

  void LogStats() const;

private:
  void FlushArticleChunk();

  Writer & m_Writer;
  size_t const m_MaxUncompressedArticleChunkSize;
  function<void (char const *, size_t, string &)> m_Compressor;
  typedef set<pair<string, uint64_t> > WordsContainerType;
  WordsContainerType m_Words;
  uint64_t const m_ArticleOffset;
  string m_CurrentArticleChunk;
  vector<uint32_t> m_ArticleSizesInChunk;
  uint32_t m_ArticleCount;

  // Just for stats.
  uint32_t m_ArticleChunkCount;
  uint64_t m_TotalArticleSizeUncompressed;
  uint32_t m_MaxArticleSize;
};

}