diff options
author | Yury Melnichek <melnichek@gmail.com> | 2011-09-01 22:53:34 +0400 |
---|---|---|
committer | Alex Zolotarev <alex@maps.me> | 2015-09-23 01:22:54 +0300 |
commit | 15f39716e8435cfbe09f77baedf2e57a2c8633e5 (patch) | |
tree | 028b3eca305606b414fe3b59b84fc02202263b37 /indexer/search_delimiters.cpp | |
parent | 716ca5ca5ed4e7e825a554523821f4df2cdb8a13 (diff) |
[search] Move search/delimeters.?pp -> indexer/search_delimiters.?pp
Diffstat (limited to 'indexer/search_delimiters.cpp')
-rw-r--r-- | indexer/search_delimiters.cpp | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/indexer/search_delimiters.cpp b/indexer/search_delimiters.cpp new file mode 100644 index 0000000000..6c70a79bd9 --- /dev/null +++ b/indexer/search_delimiters.cpp @@ -0,0 +1,63 @@ +#include "delimiters.hpp" + +namespace search +{ + +bool Delimiters::operator()(strings::UniChar c) const +{ + // ascii ranges first + if (c < '0') + return true; + if (c > '9' && c < 'A') + return true; + if (c > 'Z' && c < 'a') + return true; + if (c > 'z' && c < 0xC0) + return true; + + // values are calculated by osm statistics on 26/05/11 + switch (c) + { + case 0x2116: // NUMERO SIGN +// case 0x00B0: // DEGREE SIGN + case 0x2013: // EN DASH + case 0x2019: // RIGHT SINGLE QUOTATION MARK + case 0x00AB: // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + case 0x00BB: // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + case 0x3000: // IDEOGRAPHIC SPACE + case 0x30FB: // KATAKANA MIDDLE DOT +// case 0x00B4: // ACUTE ACCENT + case 0x200E: // LEFT-TO-RIGHT MARK + case 0xFF08: // FULLWIDTH LEFT PARENTHESIS +// case 0x00A0: // NO-BREAK SPACE + case 0xFF09: // FULLWIDTH RIGHT PARENTHESIS + case 0x2018: // LEFT SINGLE QUOTATION MARK +// case 0x007E: // TILDE + case 0x2014: // EM DASH +// case 0x007C: // VERTICAL LINE + case 0x0F0B: // TIBETAN MARK INTERSYLLABIC TSHEG + case 0x201C: // LEFT DOUBLE QUOTATION MARK + case 0x201E: // DOUBLE LOW-9 QUOTATION MARK +// case 0x00AE: // REGISTERED SIGN + case 0xFFFD: // REPLACEMENT CHARACTER + case 0x200C: // ZERO WIDTH NON-JOINER + case 0x201D: // RIGHT DOUBLE QUOTATION MARK + case 0x3001: // IDEOGRAPHIC COMMA + case 0x300C: // LEFT CORNER BRACKET + case 0x300D: // RIGHT CORNER BRACKET +// case 0x00B7: // MIDDLE DOT + case 0x061F: // ARABIC QUESTION MARK + case 0x2192: // RIGHTWARDS ARROW + case 0x2212: // MINUS SIGN +// case 0x0091: // <control> +// case 0x007D: // RIGHT CURLY BRACKET +// case 0x007B: // LEFT CURLY BRACKET +// case 0x00A9: // COPYRIGHT SIGN + case 0x200D: // ZERO WIDTH JOINER + case 0x200B: // ZERO WIDTH SPACE + return true; + } + return false; +} + +} |