Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/coding
diff options
context:
space:
mode:
authorMaxim Pimenov <m@maps.me>2018-12-12 20:17:50 +0300
committerTatiana Yan <tatiana.kondakova@gmail.com>2018-12-13 13:19:18 +0300
commit5bd46a0144339fb35e9c1cba55050dcdadc7a67f (patch)
treed4673f294b7a47a1fed0c56a3d17c6193ed11b3a /coding
parentf07de9288b85b7d7952801383b7fa3c20756de5b (diff)
[coding] Renamed the StringUtf8Multilang files.
Also added some documentation on how StringUtf8Multilang is stored.
Diffstat (limited to 'coding')
-rw-r--r--coding/CMakeLists.txt8
-rw-r--r--coding/coding_tests/CMakeLists.txt2
-rw-r--r--coding/coding_tests/string_utf8_multilang_tests.cpp (renamed from coding/coding_tests/multilang_utf8_string_test.cpp)13
-rw-r--r--coding/string_utf8_multilang.cpp (renamed from coding/multilang_utf8_string.cpp)6
-rw-r--r--coding/string_utf8_multilang.hpp (renamed from coding/multilang_utf8_string.hpp)18
-rw-r--r--coding/transliteration.cpp7
-rw-r--r--coding/transliteration.hpp1
7 files changed, 37 insertions, 18 deletions
diff --git a/coding/CMakeLists.txt b/coding/CMakeLists.txt
index d07ad421f2..c7fbe99724 100644
--- a/coding/CMakeLists.txt
+++ b/coding/CMakeLists.txt
@@ -54,8 +54,6 @@ set(
memory_region.hpp
mmap_reader.cpp
mmap_reader.hpp
- multilang_utf8_string.cpp
- multilang_utf8_string.hpp
parse_xml.hpp
point_coding.cpp
point_coding.hpp
@@ -71,13 +69,15 @@ set(
reader_writer_ops.hpp
serdes_binary_header.hpp
serdes_json.hpp
- simple_dense_coding.cpp
- simple_dense_coding.hpp
sha1.cpp
sha1.hpp
+ simple_dense_coding.cpp
+ simple_dense_coding.hpp
streams.hpp
streams_common.hpp
streams_sink.hpp
+ string_utf8_multilang.cpp
+ string_utf8_multilang.hpp
succinct_mapper.hpp
tesselator_decl.hpp
text_storage.hpp
diff --git a/coding/coding_tests/CMakeLists.txt b/coding/coding_tests/CMakeLists.txt
index f8ee6ec16c..86d6ab7cbb 100644
--- a/coding/coding_tests/CMakeLists.txt
+++ b/coding/coding_tests/CMakeLists.txt
@@ -23,7 +23,6 @@ set(
huffman_test.cpp
mem_file_reader_test.cpp
mem_file_writer_test.cpp
- multilang_utf8_string_test.cpp
png_decoder_test.cpp
point_coding_tests.cpp
reader_cache_test.cpp
@@ -31,6 +30,7 @@ set(
reader_test.hpp
reader_writer_ops_test.cpp
simple_dense_coding_test.cpp
+ string_utf8_multilang_tests.cpp
succinct_mapper_test.cpp
test_polylines.cpp
test_polylines.hpp
diff --git a/coding/coding_tests/multilang_utf8_string_test.cpp b/coding/coding_tests/string_utf8_multilang_tests.cpp
index d9c89ee1db..b79a512a1b 100644
--- a/coding/coding_tests/multilang_utf8_string_test.cpp
+++ b/coding/coding_tests/string_utf8_multilang_tests.cpp
@@ -1,6 +1,6 @@
#include "testing/testing.hpp"
-#include "coding/multilang_utf8_string.hpp"
+#include "coding/string_utf8_multilang.hpp"
#include "base/control_flow.hpp"
@@ -121,11 +121,12 @@ UNIT_TEST(MultilangString_Unique)
UNIT_TEST(MultilangString_LangNames)
{
// It is important to compare the contents of the strings, and not just pointers
- TEST_EQUAL(string("Беларуская"), StringUtf8Multilang::GetLangNameByCode(StringUtf8Multilang::GetLangIndex("be")), ());
+ TEST_EQUAL(string("Беларуская"),
+ StringUtf8Multilang::GetLangNameByCode(StringUtf8Multilang::GetLangIndex("be")), ());
auto const & langs = StringUtf8Multilang::GetSupportedLanguages();
- // Using size_t workaround, because our logging/testing macroses do not support passing POD types by value,
- // only by reference. And our constant is a constexpr.
+ // Using size_t workaround, because our logging/testing macroses do not support passing POD types
+ // by value, only by reference. And our constant is a constexpr.
TEST_EQUAL(langs.size(), size_t(StringUtf8Multilang::kMaxSupportedLanguages), ());
auto const international = StringUtf8Multilang::GetLangIndex("int_name");
TEST_EQUAL(langs[international].m_code, string("int_name"), ());
@@ -137,11 +138,11 @@ UNIT_TEST(MultilangString_HasString)
s.AddString(0, "xxx");
s.AddString(18, "yyy");
s.AddString(63, "zzz");
-
+
TEST(s.HasString(0), ());
TEST(s.HasString(18), ());
TEST(s.HasString(63), ());
-
+
TEST(!s.HasString(1), ());
TEST(!s.HasString(32), ());
}
diff --git a/coding/multilang_utf8_string.cpp b/coding/string_utf8_multilang.cpp
index 51c5b58357..fb84f41c61 100644
--- a/coding/multilang_utf8_string.cpp
+++ b/coding/string_utf8_multilang.cpp
@@ -1,4 +1,4 @@
-#include "coding/multilang_utf8_string.hpp"
+#include "coding/string_utf8_multilang.hpp"
#include "defines.hpp"
@@ -202,12 +202,12 @@ bool StringUtf8Multilang::GetString(int8_t lang, string & utf8s) const
bool StringUtf8Multilang::HasString(int8_t lang) const
{
- for(size_t i = 0; i < m_s.size(); i = GetNextIndex(i))
+ for (size_t i = 0; i < m_s.size(); i = GetNextIndex(i))
{
if ((m_s[i] & 0x3F) == lang)
return true;
}
-
+
return false;
}
diff --git a/coding/multilang_utf8_string.hpp b/coding/string_utf8_multilang.hpp
index f336dd0f84..ca54fbac82 100644
--- a/coding/multilang_utf8_string.hpp
+++ b/coding/string_utf8_multilang.hpp
@@ -42,6 +42,24 @@ void ReadString(TSource & src, std::string & s)
}
} // namespace utils
+// A class to store strings in multiple languages.
+// May be used e.g. to store several translations of a feature's name.
+//
+// The coding scheme is as follows:
+// * Pairs of the form (|lang|, |s|) are stored. |s| is a string in the UTF-8
+// encoding and |lang| is one of the 64 supported languages (see the list in the cpp file).
+//
+// * Each pair is represented by a byte encoding the lang followed by the
+// UTF-8 bytes of the string. Then, all such representations are concatenated
+// into a single std::string.
+// The language code is encoded with 6 bits that are prepended with "10", i.e.
+// 10xx xxxx. In the UTF-8 encoding that would be a continuation byte, so
+// if you start reading the string and such a byte appears out of nowhere in
+// a place where a continuation byte is not expected you may be sure
+// that the string for the current language has ended and you've reached the
+// string for the next language. Note that this breaks the self-synchronization property.
+//
+// * The order of the stored strings is not specified. Any language may come first.
class StringUtf8Multilang
{
public:
diff --git a/coding/transliteration.cpp b/coding/transliteration.cpp
index e8e73aa8f3..22f21d2bcf 100644
--- a/coding/transliteration.cpp
+++ b/coding/transliteration.cpp
@@ -1,5 +1,6 @@
#include "coding/transliteration.hpp"
-#include "coding/multilang_utf8_string.hpp"
+
+#include "coding/string_utf8_multilang.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
@@ -10,8 +11,6 @@
#include "3party/icu/i18n/unicode/translit.h"
#include "3party/icu/i18n/unicode/utrans.h"
-#include "std/unique_ptr.hpp"
-
#include <cstring>
#include <mutex>
@@ -58,7 +57,7 @@ void Transliteration::Init(std::string const & icuDataDir)
if (strlen(lang.m_transliteratorId) == 0 || m_transliterators.count(lang.m_transliteratorId) != 0)
continue;
- m_transliterators.emplace(lang.m_transliteratorId, make_unique<TransliteratorInfo>());
+ m_transliterators.emplace(lang.m_transliteratorId, std::make_unique<TransliteratorInfo>());
}
}
diff --git a/coding/transliteration.hpp b/coding/transliteration.hpp
index 00fbaa0497..cefe519d82 100644
--- a/coding/transliteration.hpp
+++ b/coding/transliteration.hpp
@@ -1,6 +1,7 @@
#pragma once
#include <atomic>
+#include <cstdint>
#include <map>
#include <memory>
#include <string>