Welcome to mirror list, hosted at ThFree Co, Russian Federation.

categories_holder.cpp « search - github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 5a1044a2b3b3c4d6fb613feaa8ec30ccb627a29d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#include "categories_holder.hpp"

#include "../indexer/classificator.hpp"

#include "../coding/multilang_utf8_string.hpp"

#include "../base/string_utils.hpp"
#include "../base/logging.hpp"


namespace search
{

struct Splitter
{
  vector<string> & m_v;
  Splitter(vector<string> & v) : m_v(v) {}
  void operator()(string const & s)
  {
    m_v.push_back(s);
  }
};

enum State {
  EParseTypes,
  EParseLanguages
};

size_t CategoriesHolder::LoadFromStream(istream & stream)
{
  m_categories.clear();

  State state = EParseTypes;

  string line;
  Category cat;
  while (stream.good())
  {
    getline(stream, line);
    strings::SimpleTokenizer iter(line, ":|");
    switch (state)
    {
    case EParseTypes:
      {
        if (!cat.m_synonyms.empty() && !cat.m_types.empty())
          m_categories.push_back(cat);
        cat.m_synonyms.clear();
        cat.m_types.clear();
        while (iter)
        {
          // split category to sub categories for classificator
          vector<string> v;
          strings::Tokenize(*iter, "-", Splitter(v));
          // get classificator type
          cat.m_types.push_back(classif().GetTypeByPath(v));
          ++iter;
        }
        if (!cat.m_types.empty())
          state = EParseLanguages;
      }
      break;

    case EParseLanguages:
      {
        if (!iter)
        {
          state = EParseTypes;
          continue;
        }
        int8_t langCode = StringUtf8Multilang::GetLangIndex(*iter);
        if (langCode == -1)
        {
          LOG(LWARNING, ("Invalid language code:", *iter));
          continue;
        }
        while (++iter)
        {
          Category::Name name;
          name.m_lang = langCode;
          name.m_name = *iter;

          // ASSERT(name.m_Name.empty(), ());
          if (name.m_name.empty())
            continue;

          if (name.m_name[0] >= '0' && name.m_name[0] <= '9')
          {
            name.m_prefixLengthToSuggest = name.m_name[0] - '0';
            name.m_name = name.m_name.substr(1);
          }
          else
            name.m_prefixLengthToSuggest = 10;

          cat.m_synonyms.push_back(name);
        }
      }
      break;
    }
  }
  // add last category
  if (!cat.m_synonyms.empty() && !cat.m_types.empty())
    m_categories.push_back(cat);

  return m_categories.size();
}

void CategoriesHolder::swap(CategoriesHolder & o)
{
  m_categories.swap(o.m_categories);
}

}