Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvng <viktor.govako@gmail.com>2015-07-01 16:50:49 +0300
committerAlex Zolotarev <alex@maps.me>2015-09-23 02:54:47 +0300
commit671dcce02a7a73079c66023b6713f5660aa5379f (patch)
treecf9b93386240352bf6c14b217b08272df8700bcc /indexer
parent90457ec1e4e5d8307df931ba75a3a84b1baa544c (diff)
[search] Treat “Đ,đ” as “d” letter in normalization.
Diffstat (limited to 'indexer')
-rw-r--r--indexer/indexer_tests/search_string_utils_test.cpp12
-rw-r--r--indexer/search_string_utils.hpp4
2 files changed, 13 insertions, 3 deletions
diff --git a/indexer/indexer_tests/search_string_utils_test.cpp b/indexer/indexer_tests/search_string_utils_test.cpp
index 8278d87ab8..26ed3b52e7 100644
--- a/indexer/indexer_tests/search_string_utils_test.cpp
+++ b/indexer/indexer_tests/search_string_utils_test.cpp
@@ -18,14 +18,20 @@ UNIT_TEST(NormalizeAndSimplifyStringWithOurTambourines)
"Iiİı", "iiii", // Famous turkish "I" letter bug.
"ЙЁйёШКИЙй", "йейешкийй", // Better handling of Russian й letter.
"ØøÆ挜", "ooaeaeoeoe",
- "バス", "ハス"
+ "バス", "ハス",
+ "âàáạăốợồôểềệếỉđưựứửýĂÂĐÊÔƠƯ",
+ "aaaaaooooeeeeiduuuuyaadeoou", // Vietnamese
+ "ăâț", "aat" // Romanian
};
*/
string const arr[] = {"ÜbërÅłłęšß", "uberallesss", // Basic test case.
"Iiİı", "iiii", // Famous turkish "I" letter bug.
"ЙЁйёШКИЙй", "иеиешкиии", // Better handling of Russian й letter.
- "ØøÆ挜", "ooaeaeoeoe",
- "バス", "ハス"
+ "ØøÆ挜", "ooaeaeoeoe", // Dansk
+ "バス", "ハス",
+ "âàáạăốợồôểềệếỉđưựứửýĂÂĐÊÔƠƯ",
+ "aaaaaooooeeeeiduuuuyaadeoou", // Vietnamese
+ "ăâț", "aat" // Romanian
};
for (size_t i = 0; i < ARRAY_SIZE(arr); i += 2)
diff --git a/indexer/search_string_utils.hpp b/indexer/search_string_utils.hpp
index d27e43991a..86c9b10092 100644
--- a/indexer/search_string_utils.hpp
+++ b/indexer/search_string_utils.hpp
@@ -17,6 +17,10 @@ inline strings::UniString NormalizeAndSimplifyString(string const & s)
UniChar & c = uniString[i];
switch (c)
{
+ // Replace "d with stroke" to simple d letter. Used in Vietnamese.
+ // (unicode-compliant implementation leaves it unchanged)
+ case 0x0110:
+ case 0x0111: c = 'd'; break;
// Replace small turkish dotless 'ı' with dotted 'i'.
// Our own invented hack to avoid well-known Turkish I-letter bug.
case 0x0131: c = 'i'; break;