Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/base
diff options
context:
space:
mode:
authorAlex Zolotarev <alex@maps.me>2016-03-31 09:51:32 +0300
committerAlex Zolotarev <alex@maps.me>2016-04-05 13:54:23 +0300
commitf4c112b59fcee233ba33b0308c2db9d6e49f3a9f (patch)
tree0b864c51e4756b0aa6b711ffd31ca396754d6b88 /base
parent86d4446e86076dc61d6de78935ea9dbfc2eb51bc (diff)
strings::NormalizeDigits for full-width unicode numbers.
Diffstat (limited to 'base')
-rw-r--r--base/base_tests/string_utils_test.cpp13
-rw-r--r--base/string_utils.cpp15
-rw-r--r--base/string_utils.hpp3
3 files changed, 31 insertions, 0 deletions
diff --git a/base/base_tests/string_utils_test.cpp b/base/base_tests/string_utils_test.cpp
index 55b256bacf..05c269d29c 100644
--- a/base/base_tests/string_utils_test.cpp
+++ b/base/base_tests/string_utils_test.cpp
@@ -612,3 +612,16 @@ UNIT_TEST(EditDistance)
testUniStringEditDistance("ll", "l1", 1);
testUniStringEditDistance("\u0132ij", "\u0133IJ", 3);
}
+
+UNIT_TEST(NormalizeDigits)
+{
+ auto const nd = [](string str) -> string
+ {
+ strings::NormalizeDigits(str);
+ return str;
+ };
+ TEST_EQUAL(nd(""), "", ());
+ TEST_EQUAL(nd("z12345//"), "z12345//", ());
+ TEST_EQUAL(nd("a0192 "), "a0192 ", ());
+ TEST_EQUAL(nd("3456789"), "3456789", ());
+}
diff --git a/base/string_utils.cpp b/base/string_utils.cpp
index e9f2aa1d39..6e643d18f2 100644
--- a/base/string_utils.cpp
+++ b/base/string_utils.cpp
@@ -113,6 +113,21 @@ UniString Normalize(UniString const & s)
return result;
}
+void NormalizeDigits(string & utf8)
+{
+ for (size_t i = 0; i + 2 < utf8.size(); ++i)
+ {
+ if (utf8[i] == '\xEF' && utf8[i + 1] == '\xBC')
+ {
+ char const n = utf8[i + 2];
+ if (n < '\x90' || n > '\x99')
+ continue;
+ utf8[i] = n - 0x90 + '0';
+ utf8.erase(i + 1, 2);
+ }
+ }
+}
+
namespace
{
char ascii_to_lower(char in)
diff --git a/base/string_utils.hpp b/base/string_utils.hpp
index 4d006a0d08..a7a6a4290d 100644
--- a/base/string_utils.hpp
+++ b/base/string_utils.hpp
@@ -43,6 +43,9 @@ UniString MakeLowerCase(UniString const & s);
void NormalizeInplace(UniString & s);
UniString Normalize(UniString const & s);
+/// Replaces "full width" unicode digits with ascii ones.
+void NormalizeDigits(string & utf8);
+
/// Counts number of start symbols in string s (that is not lower and not normalized) that maches
/// to lower and normalized string low_s. If s doen't starts with low_s then returns 0; otherwise
/// returns number of start symbols in s that equivalent to lowStr