Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaria Volvenkova <d.volvenkova@corp.mail.ru>2018-11-02 04:36:07 +0300
committermpimenov <mpimenov@users.noreply.github.com>2018-11-23 13:49:43 +0300
commit7cd27d26171073cc20482ae0f5fbf695b985fdb4 (patch)
tree29dd01309ff151325b40da0bd69f91432572517c /descriptions
parente4d44c3bfa8258dc9b25de46eeeb5928b83246a0 (diff)
[descriptions] Description section serializer and deserializer added.
Diffstat (limited to 'descriptions')
-rw-r--r--descriptions/CMakeLists.txt13
-rw-r--r--descriptions/descriptions_tests/CMakeLists.txt22
-rw-r--r--descriptions/descriptions_tests/descriptions_tests.cpp61
-rw-r--r--descriptions/header.hpp43
-rw-r--r--descriptions/loader.cpp44
-rw-r--r--descriptions/loader.hpp41
-rw-r--r--descriptions/serdes.cpp45
-rw-r--r--descriptions/serdes.hpp291
8 files changed, 560 insertions, 0 deletions
diff --git a/descriptions/CMakeLists.txt b/descriptions/CMakeLists.txt
new file mode 100644
index 0000000000..d8952eee7a
--- /dev/null
+++ b/descriptions/CMakeLists.txt
@@ -0,0 +1,13 @@
+project(descriptions)
+
+set(
+ SRC
+ header.hpp
+ loader.cpp
+ loader.hpp
+ serdes.cpp
+ serdes.hpp
+)
+
+omim_add_library(${PROJECT_NAME} ${SRC})
+omim_add_test_subdirectory(descriptions_tests)
diff --git a/descriptions/descriptions_tests/CMakeLists.txt b/descriptions/descriptions_tests/CMakeLists.txt
new file mode 100644
index 0000000000..52cfc02c4b
--- /dev/null
+++ b/descriptions/descriptions_tests/CMakeLists.txt
@@ -0,0 +1,22 @@
+project(descriptions_tests)
+
+set(
+ SRC
+ descriptions_tests.cpp
+)
+
+omim_add_test(${PROJECT_NAME} ${SRC})
+
+omim_link_libraries(
+ ${PROJECT_NAME}
+ descriptions
+ indexer
+ platform
+ coding
+ base
+ jansson
+ stats_client
+ ${LIBZ}
+)
+
+link_qt5_core(${PROJECT_NAME})
diff --git a/descriptions/descriptions_tests/descriptions_tests.cpp b/descriptions/descriptions_tests/descriptions_tests.cpp
new file mode 100644
index 0000000000..e37592b6cc
--- /dev/null
+++ b/descriptions/descriptions_tests/descriptions_tests.cpp
@@ -0,0 +1,61 @@
+#include "testing/testing.hpp"
+
+#include "descriptions/serdes.hpp"
+
+#include "coding/reader.hpp"
+#include "coding/writer.hpp"
+
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace descriptions;
+
+UNIT_TEST(Descriptions_SerDes)
+{
+ std::map<FeatureIndex, std::map<LangCode, std::string>> data =
+ { {100, {{10, "Description of feature 100, language 10."},
+ {11, "Описание фичи 100, язык 11."}}},
+ {101, {{11, "Описание фичи 101, язык 11."}}},
+ {102, {{11, "Описание фичи 102, язык 11."},
+ {10, "Description of feature 102, language 10."}}}
+ };
+
+ DescriptionsCollection descriptionsCollection;
+ for (auto const & featureDesc : data)
+ {
+ StringUtf8Multilang str;
+ for (auto const & translation : featureDesc.second)
+ str.AddString(translation.first, translation.second);
+ descriptionsCollection.emplace_back(featureDesc.first, std::move(str));
+ }
+
+ std::vector<uint8_t> buffer;
+ {
+ Serializer ser(std::move(descriptionsCollection));
+ MemWriter<decltype(buffer)> writer(buffer);
+ ser.Serialize(writer);
+ }
+
+ std::string description1;
+ std::string description2;
+ std::string description3;
+ std::string description4;
+ std::string description5;
+ {
+ Deserializer des;
+ MemReader reader(buffer.data(), buffer.size());
+ des.Deserialize(reader, 102, {11, 10}, description1);
+ des.Deserialize(reader, 100, {12, 10}, description2);
+ des.Deserialize(reader, 101, {12}, description3);
+ des.Deserialize(reader, 0, {10, 11}, description4);
+ des.Deserialize(reader, 102, {10}, description5);
+ }
+
+ TEST_EQUAL(description1, "Описание фичи 102, язык 11.", ());
+ TEST_EQUAL(description2, "Description of feature 100, language 10.", ());
+ TEST_EQUAL(description3, "", ());
+ TEST_EQUAL(description4, "", ());
+ TEST_EQUAL(description5, "Description of feature 102, language 10.", ());
+}
diff --git a/descriptions/header.hpp b/descriptions/header.hpp
new file mode 100644
index 0000000000..647b70559d
--- /dev/null
+++ b/descriptions/header.hpp
@@ -0,0 +1,43 @@
+#pragma once
+
+#include "coding/reader.hpp"
+#include "coding/serdes_binary_header.hpp"
+#include "coding/write_to_sink.hpp"
+
+#include <cstdint>
+
+namespace descriptions
+{
+struct HeaderV0
+{
+ template <typename Visitor>
+ void Visit(Visitor & visitor)
+ {
+ visitor(m_featuresOffset, "featuresOffset");
+ visitor(m_langMetaOffset, "langMetaOffset");
+ visitor(m_indexOffset, "indexOffset");
+ visitor(m_stringsOffset, "stringsOffset");
+ visitor(m_eosOffset, "eosOffset");
+ }
+
+ template <typename Sink>
+ void Serialize(Sink & sink)
+ {
+ coding::binary::HeaderSerVisitor<Sink> visitor(sink);
+ visitor(*this);
+ }
+
+ template <typename Source>
+ void Deserialize(Source & source)
+ {
+ coding::binary::HeaderDesVisitor<Source> visitor(source);
+ visitor(*this);
+ }
+
+ uint64_t m_featuresOffset = 0;
+ uint64_t m_langMetaOffset = 0;
+ uint64_t m_indexOffset = 0;
+ uint64_t m_stringsOffset = 0;
+ uint64_t m_eosOffset = 0; // End of section.
+};
+} // namespace descriptions
diff --git a/descriptions/loader.cpp b/descriptions/loader.cpp
new file mode 100644
index 0000000000..d55ae79adf
--- /dev/null
+++ b/descriptions/loader.cpp
@@ -0,0 +1,44 @@
+#include "descriptions/loader.hpp"
+
+#include "indexer/data_source.hpp"
+
+#include "base/assert.hpp"
+
+#include "defines.hpp"
+
+namespace descriptions
+{
+bool Loader::GetDescription(FeatureID const & featureId, std::vector<int8_t> const & langPriority,
+ std::string & description)
+{
+ auto const handle = m_dataSource.GetMwmHandleById(featureId.m_mwmId);
+
+ if (!handle.IsAlive())
+ return false;
+
+ auto const & value = *handle.GetValue<MwmValue>();
+
+ if (!value.m_cont.IsExist(DESCRIPTIONS_FILE_TAG))
+ return false;
+
+ EntryPtr entry;
+ {
+ std::lock_guard<std::mutex> lock(m_mutex);
+ auto it = m_deserializers.find(featureId.m_mwmId);
+
+ if (it == m_deserializers.end())
+ {
+ auto const result = m_deserializers.emplace(featureId.m_mwmId, std::make_shared<Entry>());
+ it = result.first;
+ }
+ entry = it->second;
+ }
+
+ ASSERT(entry, ());
+
+ auto readerPtr = value.m_cont.GetReader(DESCRIPTIONS_FILE_TAG);
+
+ std::lock_guard<std::mutex> lock(entry->m_mutex);
+ return entry->m_deserializer.Deserialize(*readerPtr.GetPtr(), featureId.m_index, langPriority, description);
+}
+} // namespace descriptions
diff --git a/descriptions/loader.hpp b/descriptions/loader.hpp
new file mode 100644
index 0000000000..6e225ca068
--- /dev/null
+++ b/descriptions/loader.hpp
@@ -0,0 +1,41 @@
+#pragma once
+
+#include "descriptions/serdes.hpp"
+
+#include "indexer/feature_decl.hpp"
+#include "indexer/mwm_set.hpp"
+
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <vector>
+
+class DataSource;
+
+namespace descriptions
+{
+// *NOTE* This class IS thread-safe.
+class Loader
+{
+public:
+ explicit Loader(DataSource const & dataSource) : m_dataSource(dataSource) {}
+
+ bool GetDescription(FeatureID const & featureId, std::vector<int8_t> const & langPriority,
+ std::string & description);
+
+private:
+ struct Entry
+ {
+ std::mutex m_mutex;
+ Deserializer m_deserializer;
+ };
+
+ using EntryPtr = std::shared_ptr<Entry>;
+
+ DataSource const & m_dataSource;
+ std::map<MwmSet::MwmId, EntryPtr> m_deserializers;
+ std::mutex m_mutex;
+};
+} // namespace descriptions
diff --git a/descriptions/serdes.cpp b/descriptions/serdes.cpp
new file mode 100644
index 0000000000..75da40dc8b
--- /dev/null
+++ b/descriptions/serdes.cpp
@@ -0,0 +1,45 @@
+#include "descriptions/serdes.hpp"
+
+#include <utility>
+
+namespace descriptions
+{
+Serializer::Serializer(DescriptionsCollection && descriptions)
+ : m_descriptions(std::move(descriptions))
+{
+ std::sort(m_descriptions.begin(), m_descriptions.end(), base::LessBy(&FeatureDescription::m_featureIndex));
+
+ m_langMetaCollection.reserve(m_descriptions.size());
+
+ size_t stringsCount = 0;
+
+ for (size_t i = 0; i < m_descriptions.size(); ++i)
+ {
+ auto & index = m_descriptions[i];
+
+ LangMeta langMeta;
+ index.m_description.ForEach([this, &stringsCount, &langMeta, i](LangCode lang, std::string const & str)
+ {
+ ++stringsCount;
+ auto & group = m_groupedByLang[lang];
+ langMeta.insert(std::make_pair(lang, static_cast<StringIndex>(group.size())));
+ group.push_back(i);
+ });
+ m_langMetaCollection.push_back(langMeta);
+ }
+
+ std::map<LangCode, uint32_t> indicesOffsets;
+ uint32_t currentOffset = 0;
+ for (auto & langIndices : m_groupedByLang)
+ {
+ indicesOffsets.insert(std::make_pair(langIndices.first, currentOffset));
+ currentOffset += langIndices.second.size();
+ }
+
+ for (auto & langMeta : m_langMetaCollection)
+ {
+ for (auto & translation : langMeta)
+ translation.second += indicesOffsets[translation.first];
+ }
+}
+} // namespace descriptions
diff --git a/descriptions/serdes.hpp b/descriptions/serdes.hpp
new file mode 100644
index 0000000000..8c072a09e4
--- /dev/null
+++ b/descriptions/serdes.hpp
@@ -0,0 +1,291 @@
+#pragma once
+
+#include "descriptions/header.hpp"
+
+#include "indexer/feature_decl.hpp"
+
+#include "coding/dd_vector.hpp"
+#include "coding/multilang_utf8_string.hpp"
+#include "coding/text_storage.hpp"
+
+#include "base/assert.hpp"
+#include "base/stl_helpers.hpp"
+
+#include <algorithm>
+#include <cstdint>
+#include <iterator>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <utility>
+
+namespace descriptions
+{
+using FeatureIndex = uint32_t;
+using StringIndex = uint32_t;
+using LangCode = int8_t;
+using LangMeta = std::unordered_map<LangCode, StringIndex>;
+using LangMetaOffset = uint32_t;
+
+enum class Version : uint8_t
+{
+ V0 = 0,
+ Latest = V0
+};
+
+struct FeatureDescription
+{
+ FeatureDescription() = default;
+ FeatureDescription(FeatureIndex index, StringUtf8Multilang && description)
+ : m_featureIndex(index)
+ , m_description(std::move(description))
+ {}
+
+ FeatureIndex m_featureIndex = 0;
+ StringUtf8Multilang m_description;
+};
+using DescriptionsCollection = std::vector<FeatureDescription>;
+
+/// \brief
+/// Section name: "descriptions".
+/// Description: keeping text descriptions of features in different languages.
+/// Section tables:
+/// * header
+/// * sorted feature ids vector
+/// * vector of unordered maps with language codes and string indices of corresponding translations of a description
+/// * vector of maps offsets for each feature id (and one additional dummy offset in the end)
+/// * BWT-compressed strings grouped by language.
+class Serializer
+{
+public:
+ /// \param descriptions unsorted collection of feature descriptions.
+ explicit Serializer(DescriptionsCollection && descriptions);
+
+ template <typename Sink>
+ void Serialize(Sink & sink)
+ {
+ WriteToSink(sink, static_cast<uint8_t>(Version::Latest));
+
+ auto const startPos = sink.Pos();
+
+ HeaderV0 header;
+ header.Serialize(sink);
+
+ header.m_featuresOffset = sink.Pos() - startPos;
+ SerializeFeaturesIndices(sink);
+
+ std::vector<LangMetaOffset> offsets;
+ header.m_langMetaOffset = sink.Pos() - startPos;
+ SerializeLangMetaCollection(sink, offsets);
+
+ header.m_indexOffset = sink.Pos() - startPos;
+ SerializeLangMetaIndex(sink, offsets);
+
+ header.m_stringsOffset = sink.Pos() - startPos;
+ SerializeStrings(sink);
+
+ header.m_eosOffset = sink.Pos() - startPos;
+ sink.Seek(startPos);
+ header.Serialize(sink);
+ sink.Seek(startPos + header.m_eosOffset);
+ }
+
+ // Serializes a vector of 32-bit sorted feature ids.
+ template <typename Sink>
+ void SerializeFeaturesIndices(Sink & sink)
+ {
+ CHECK(std::is_sorted(m_descriptions.begin(), m_descriptions.end(),
+ base::LessBy(&FeatureDescription::m_featureIndex)), ());
+
+ for (auto const & index : m_descriptions)
+ WriteToSink(sink, index.m_featureIndex);
+ }
+
+ template <typename Sink>
+ void SerializeLangMetaCollection(Sink & sink, std::vector<LangMetaOffset> & offsets)
+ {
+ auto const startPos = sink.Pos();
+ for (auto const & meta : m_langMetaCollection)
+ {
+ offsets.push_back(static_cast<LangMetaOffset>(sink.Pos() - startPos));
+ for (auto const & pair : meta)
+ {
+ WriteToSink(sink, pair.first);
+ WriteVarUint(sink, pair.second);
+ }
+ }
+ offsets.push_back(static_cast<LangMetaOffset>(sink.Pos() - startPos));
+ }
+
+ template <typename Sink>
+ void SerializeLangMetaIndex(Sink & sink, std::vector<LangMetaOffset> const & offsets)
+ {
+ for (auto const & offset : offsets)
+ WriteToSink(sink, offset);
+ }
+
+ // Serializes strings in a compressed storage with block access.
+ template <typename Sink>
+ void SerializeStrings(Sink & sink)
+ {
+ coding::BlockedTextStorageWriter<Sink> writer(sink, 200000 /* blockSize */);
+ std::string str;
+ for (auto const & langIndices : m_groupedByLang)
+ {
+ for (auto const & descIndex : langIndices.second)
+ {
+ auto const found = m_descriptions[descIndex].m_description.GetString(langIndices.first, str);
+ CHECK(found, ());
+ writer.Append(str);
+ }
+ }
+ }
+
+private:
+ DescriptionsCollection m_descriptions;
+ std::vector<LangMeta> m_langMetaCollection;
+ std::map<LangCode, std::vector<size_t>> m_groupedByLang;
+};
+
+class Deserializer
+{
+public:
+ template <typename Reader>
+ bool Deserialize(Reader & reader, FeatureIndex featureIndex, std::vector<LangCode> const & langPriority,
+ std::string & description)
+ {
+ NonOwningReaderSource source(reader);
+ auto const version = static_cast<Version>(ReadPrimitiveFromSource<uint8_t>(source));
+
+ auto subReader = reader.CreateSubReader(source.Pos(), source.Size());
+ CHECK(subReader, ());
+
+ switch (version)
+ {
+ case Version::V0: return DeserializeV0(*subReader, featureIndex, langPriority, description);
+ }
+ CHECK_SWITCH();
+
+ return false;
+ }
+
+ template <typename Reader>
+ bool DeserializeV0(Reader & reader, FeatureIndex featureIndex, std::vector<LangCode> const & langPriority,
+ std::string & description)
+ {
+ InitializeIfNeeded(reader);
+
+ LangMetaOffset startOffset = 0;
+ LangMetaOffset endOffset = 0;
+ {
+ ReaderPtr<Reader> idsSubReader(CreateFeatureIndicesSubReader(reader));
+ DDVector<FeatureIndex, ReaderPtr<Reader>> ids(idsSubReader);
+ auto const it = std::lower_bound(ids.begin(), ids.end(), featureIndex);
+ if (it == ids.end() || *it != featureIndex)
+ return false;
+
+ auto const d = static_cast<uint32_t>(std::distance(ids.begin(), it));
+
+ ReaderPtr<Reader> ofsSubReader(CreateLangMetaOffsetsSubReader(reader));
+ DDVector<LangMetaOffset, ReaderPtr<Reader>> ofs(ofsSubReader);
+ CHECK_LESS(d, ofs.size(), ());
+ CHECK_LESS(d + 1, ofs.size(), ());
+
+ startOffset = ofs[d];
+ endOffset = ofs[d + 1];
+ }
+
+ LangMeta langMeta;
+ {
+ auto langMetaSubReader = CreateLangMetaSubReader(reader, startOffset, endOffset);
+ NonOwningReaderSource source(*langMetaSubReader);
+
+ while (source.Size() > 0)
+ {
+ auto const lang = ReadPrimitiveFromSource<LangCode>(source);
+ auto const stringIndex = ReadVarUint<StringIndex>(source);
+ langMeta.insert(std::make_pair(lang, stringIndex));
+ }
+ }
+
+ auto stringsSubReader = CreateStringsSubReader(reader);
+ for (auto const lang : langPriority)
+ {
+ auto const it = langMeta.find(lang);
+ if (it != langMeta.end())
+ {
+ description = m_stringsReader.ExtractString(*stringsSubReader, it->second);
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ template <typename Reader>
+ std::unique_ptr<Reader> CreateFeatureIndicesSubReader(Reader & reader)
+ {
+ CHECK(m_initialized, ());
+
+ auto const pos = m_header.m_featuresOffset;
+ CHECK_GREATER_OR_EQUAL(m_header.m_langMetaOffset, pos, ());
+ auto const size = m_header.m_langMetaOffset - pos;
+ return reader.CreateSubReader(pos, size);
+ }
+
+ template <typename Reader>
+ std::unique_ptr<Reader> CreateLangMetaOffsetsSubReader(Reader & reader)
+ {
+ CHECK(m_initialized, ());
+
+ auto const pos = m_header.m_indexOffset;
+ CHECK_GREATER_OR_EQUAL(m_header.m_stringsOffset, pos, ());
+ auto const size = m_header.m_stringsOffset - pos;
+ return reader.CreateSubReader(pos, size);
+ }
+
+ template <typename Reader>
+ std::unique_ptr<Reader> CreateLangMetaSubReader(Reader & reader, LangMetaOffset startOffset, LangMetaOffset endOffset)
+ {
+ CHECK(m_initialized, ());
+
+ auto const pos = m_header.m_langMetaOffset + startOffset;
+ CHECK_GREATER_OR_EQUAL(m_header.m_indexOffset, pos, ());
+ auto const size = endOffset - startOffset;
+ CHECK_GREATER_OR_EQUAL(m_header.m_indexOffset, pos + size, ());
+ return reader.CreateSubReader(pos, size);
+ }
+
+ template <typename Reader>
+ std::unique_ptr<Reader> CreateStringsSubReader(Reader & reader)
+ {
+ CHECK(m_initialized, ());
+
+ auto const pos = m_header.m_stringsOffset;
+ CHECK_GREATER_OR_EQUAL(m_header.m_eosOffset, pos, ());
+ auto const size = m_header.m_eosOffset - pos;
+ return reader.CreateSubReader(pos, size);
+ }
+
+private:
+ template <typename Reader>
+ void InitializeIfNeeded(Reader & reader)
+ {
+ if (m_initialized)
+ return;
+
+ {
+ NonOwningReaderSource source(reader);
+ m_header.Deserialize(source);
+ }
+
+ m_initialized = true;
+ }
+
+ bool m_initialized = false;
+ HeaderV0 m_header;
+ coding::BlockedTextStorageReader m_stringsReader;
+};
+} // namespace descriptions