Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/base
diff options
context:
space:
mode:
authorAlex Zolotarev <alex@maps.me>2016-04-04 17:15:45 +0300
committerAlex Zolotarev <alex@maps.me>2016-04-05 13:55:12 +0300
commitbaf4a0c0cd07bea2472e9934970a0f16778898cd (patch)
tree53d772228bb20bf9f20110d2381679c2cd5da85c /base
parent32a50c6a59b012024cc59fb71a7bcad5fe360f43 (diff)
Speed improvement from Yury Gorshenin.
Diffstat (limited to 'base')
-rw-r--r--base/string_utils.cpp39
1 files changed, 31 insertions, 8 deletions
diff --git a/base/string_utils.cpp b/base/string_utils.cpp
index 6e643d18f2..857c190c9b 100644
--- a/base/string_utils.cpp
+++ b/base/string_utils.cpp
@@ -113,19 +113,42 @@ UniString Normalize(UniString const & s)
return result;
}
-void NormalizeDigits(string & utf8)
-{
- for (size_t i = 0; i + 2 < utf8.size(); ++i)
+void NormalizeDigits(string &utf8) {
+ size_t const n = utf8.size();
+ size_t const m = n >= 2 ? n - 2 : 0;
+
+ size_t i = 0;
+ while (i < n && utf8[i] != '\xEF')
+ ++i;
+ size_t j = i;
+
+ // Following invariant holds before/between/after loop iterations below:
+ // * utf8[0, i) represents a checked part of the input string.
+ // * utf8[0, j) represents a normalized version of the utf8[0, i).
+ while (i < m)
{
if (utf8[i] == '\xEF' && utf8[i + 1] == '\xBC')
{
- char const n = utf8[i + 2];
- if (n < '\x90' || n > '\x99')
- continue;
- utf8[i] = n - 0x90 + '0';
- utf8.erase(i + 1, 2);
+ auto const n = utf8[i + 2];
+ if (n >= '\x90' && n <= '\x99')
+ {
+ utf8[j++] = n - 0x90 + '0';
+ i += 3;
+ }
+ else
+ {
+ utf8[j++] = utf8[i++];
+ utf8[j++] = utf8[i++];
+ }
+ }
+ else
+ {
+ utf8[j++] = utf8[i++];
}
}
+ while (i < n)
+ utf8[j++] = utf8[i++];
+ utf8.resize(j);
}
namespace