diff options
author | vng <viktor.govako@gmail.com> | 2014-04-23 23:34:41 +0400 |
---|---|---|
committer | Alex Zolotarev <alex@maps.me> | 2015-09-23 02:15:11 +0300 |
commit | 98d058924d40889f72ed6ce13c1d8cc0ceb87668 (patch) | |
tree | 0e599909ed36e48bb6aa30256ce126d59264a59d /indexer/categories_holder.cpp | |
parent | 9632652c23690f0c2bfe9b06f9c694fb64b1de46 (diff) |
[search] Fixed stop words processing for category synonyms.
Diffstat (limited to 'indexer/categories_holder.cpp')
-rw-r--r-- | indexer/categories_holder.cpp | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/indexer/categories_holder.cpp b/indexer/categories_holder.cpp index 50dc4c58d9..97a1ba241e 100644 --- a/indexer/categories_holder.cpp +++ b/indexer/categories_holder.cpp @@ -49,7 +49,8 @@ void CategoriesHolder::AddCategory(Category & cat, vector<uint32_t> & types) for (size_t j = 0; j < tokens.size(); ++j) for (size_t k = 0; k < types.size(); ++k) - m_name2type.insert(make_pair(tokens[j], types[k])); + if (ValidKeyToken(tokens[j])) + m_name2type.insert(make_pair(tokens[j], types[k])); } } @@ -57,6 +58,20 @@ void CategoriesHolder::AddCategory(Category & cat, vector<uint32_t> & types) types.clear(); } +bool CategoriesHolder::ValidKeyToken(StringT const & s) +{ + if (s.size() > 2) + return true; + + /// @todo We need to have global stop words array for the most used languages. + char const * arr[] = { "a", "z", "s", "d", "di", "de", "le" }; + for (size_t i = 0; i < ARRAY_SIZE(arr); ++i) + if (s.IsEqualAscii(arr[i])) + return false; + + return true; +} + void CategoriesHolder::LoadFromStream(istream & s) { m_type2cat.clear(); |