diff options
author | Maxim Pimenov <m@maps.me> | 2016-04-24 20:05:45 +0300 |
---|---|---|
committer | Maxim Pimenov <m@maps.me> | 2016-04-26 16:00:55 +0300 |
commit | 360bafbc2907788a3893792fb6dd366d3eb48e44 (patch) | |
tree | 7dd76e67a6241d73d87cfa29b84adb515f60427a /indexer/categories_index.hpp | |
parent | b9cc722dd7c4a6d04f6cb18bd9855e22baee9ee2 (diff) |
[indexer] Added a component that maps (sub)strings to categories.
Diffstat (limited to 'indexer/categories_index.hpp')
-rw-r--r-- | indexer/categories_index.hpp | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/indexer/categories_index.hpp b/indexer/categories_index.hpp new file mode 100644 index 0000000000..0429976096 --- /dev/null +++ b/indexer/categories_index.hpp @@ -0,0 +1,60 @@ +#include "categories_holder.hpp" + +#include "base/mem_trie.hpp" + +#include "std/string.hpp" +#include "std/vector.hpp" + +namespace indexer +{ +// This class is used to simplify searches of categories by +// synonyms to their names (in various languages). +// An example usage is helping a user who is trying to add +// a new feature with our editor. +// All category data is taken from data/categories.txt. +// All types returned are those from classificator. +class CategoriesIndex +{ +public: + using Category = CategoriesHolder::Category; + + CategoriesIndex() : m_catHolder(GetDefaultCategories()) {} + + CategoriesIndex(CategoriesHolder const & catHolder) : m_catHolder(catHolder) {} + + // Adds all categories that match |type|. Only synonyms + // in language |lang| are added. See indexer/categories_holder.cpp + // for language enumeration. + void AddCategoryByTypeAndLang(uint32_t type, int8_t lang); + + // Adds all categories that match |type|. All known synonyms + // are added. + void AddCategoryByTypeAllLangs(uint32_t type); + + // Adds all categories from data/classificator.txt. Only + // names in language |lang| are added. + void AddAllCategoriesInLang(int8_t lang); + + // Adds all categories from data/classificator.txt. + void AddAllCategoriesAllLangs(); + + // Returns all categories that have |query| as a substring. Note + // that all synonyms for a category are contained in a returned + // value even if only one language was used when adding this + // category's name to index. + // Beware weird results when query is a malformed UTF-8 string. + void GetCategories(string const & query, vector<Category> & result); + + // Returns all types that match to categories that have |query| as substring. + // Beware weird results when query is a malformed UTF-8 string. + void GetAssociatedTypes(string const & query, vector<uint32_t> & result); + +#ifdef DEBUG + int GetNumTrieNodes() { return m_trie.GetNumNodes(); } +#endif + +private: + CategoriesHolder const & m_catHolder; + my::MemTrie<string, uint32_t> m_trie; +}; +} // namespace indexer |