Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Byko-Ianko <bykoianko@gmail.com>2016-07-22 15:40:37 +0300
committerGitHub <noreply@github.com>2016-07-22 15:40:37 +0300
commite69836616a71a12164b92fbaaccfb4c220a4bc5f (patch)
treec55fc118e0725035c3037374a3bbcda4c9c9a78a /indexer
parentb4d8c1c997989267773f4a17bc3a869620cf29da (diff)
parent120cddc6295b3a79455cc30c7aea6ff7f4fb99d9 (diff)
Merge pull request #3822 from ygorshenin/implement-centers-storage
[indexer] Implemented CentersTable.
Diffstat (limited to 'indexer')
-rw-r--r--indexer/centers_table.cpp339
-rw-r--r--indexer/centers_table.hpp80
-rw-r--r--indexer/indexer.pro2
-rw-r--r--indexer/indexer_tests/centers_table_test.cpp113
-rw-r--r--indexer/indexer_tests/indexer_tests.pro1
-rw-r--r--indexer/rank_table.cpp42
6 files changed, 536 insertions, 41 deletions
diff --git a/indexer/centers_table.cpp b/indexer/centers_table.cpp
new file mode 100644
index 0000000000..c3f42e8b94
--- /dev/null
+++ b/indexer/centers_table.cpp
@@ -0,0 +1,339 @@
+#include "indexer/centers_table.hpp"
+
+#include "indexer/coding_params.hpp"
+#include "indexer/geometry_coding.hpp"
+#include "indexer/point_to_int64.hpp"
+
+#include "coding/endianness.hpp"
+#include "coding/memory_region.hpp"
+#include "coding/reader.hpp"
+#include "coding/succinct_mapper.hpp"
+#include "coding/varint.hpp"
+#include "coding/write_to_sink.hpp"
+#include "coding/writer.hpp"
+
+#include "base/assert.hpp"
+#include "base/logging.hpp"
+
+#include "std/unordered_map.hpp"
+
+#include "3party/succinct/elias_fano.hpp"
+#include "3party/succinct/rs_bit_vector.hpp"
+
+namespace search
+{
+namespace
+{
+template <typename TCont>
+void EndiannessAwareMap(bool endiannesMismatch, CopiedMemoryRegion & region, TCont & cont)
+{
+ TCont c;
+ if (endiannesMismatch)
+ {
+ coding::ReverseMapVisitor visitor(region.MutableData());
+ c.map(visitor);
+ }
+ else
+ {
+ coding::MapVisitor visitor(region.ImmutableData());
+ c.map(visitor);
+ }
+
+ c.swap(cont);
+}
+
+// V0 of CentersTable. Has the following format:
+//
+// File offset (bytes) Field name Field size (bytes)
+// 0 common header 4
+// 4 positions offset 4
+// 8 deltas offset 4
+// 12 end of section 4
+// 16 identifiers table positions offset - 16
+// positions offset positions table deltas offset - positions offset
+// deltas offset deltas blocks end of section - deltas offset
+//
+// All offsets are in little-endian format.
+//
+// Identifiers table is a bit-vector with rank-select table, where set
+// bits denote that centers for the corresponding features are in
+// table. Identifiers table is stored in the native endianness.
+//
+// Positions table is a Elias-Fano table, where each entry corresponds
+// to the start position of the centers block. Positions table is
+// stored in the native endianness.
+//
+// Deltas is a sequence of blocks, where each block (with the
+// exception of the last one) is a sequence of kBlockSize varuints,
+// where each varuint represents an encoded delta of the center from
+// some prediction. For the first center in the block map base point
+// is used as a prediction, for all other centers in the block
+// previous center is used as a prediction. This allows to decode
+// block of kBlockSize consecutive centers at one syscall, and cache
+// them in RAM.
+class CentersTableV0 : public CentersTable
+{
+public:
+ static const uint32_t kBlockSize = 64;
+
+ struct Header
+ {
+ void Read(Reader & reader)
+ {
+ m_base.Read(reader);
+
+ NonOwningReaderSource source(reader);
+ source.Skip(sizeof(m_base));
+ m_positionsOffset = ReadPrimitiveFromSource<uint32_t>(source);
+ m_deltasOffset = ReadPrimitiveFromSource<uint32_t>(source);
+ m_endOffset = ReadPrimitiveFromSource<uint32_t>(source);
+ }
+
+ void Write(Writer & writer)
+ {
+ m_base.Write(writer);
+
+ WriteToSink(writer, m_positionsOffset);
+ WriteToSink(writer, m_deltasOffset);
+ WriteToSink(writer, m_endOffset);
+ }
+
+ bool IsValid() const
+ {
+ if (!m_base.IsValid())
+ {
+ LOG(LERROR, ("Base header is not valid!"));
+ return false;
+ }
+ if (m_positionsOffset < sizeof(m_header))
+ {
+ LOG(LERROR, ("Positions before header:", m_positionsOffset, sizeof(m_header)));
+ return false;
+ }
+ if (m_deltasOffset < m_positionsOffset)
+ {
+ LOG(LERROR, ("Deltas before positions:", m_deltasOffset, m_positionsOffset));
+ return false;
+ }
+ if (m_endOffset < m_deltasOffset)
+ {
+ LOG(LERROR, ("End of section before deltas:", m_endOffset, m_deltasOffset));
+ return false;
+ }
+ return true;
+ }
+
+ CentersTable::Header m_base;
+ uint32_t m_positionsOffset = 0;
+ uint32_t m_deltasOffset = 0;
+ uint32_t m_endOffset = 0;
+ };
+
+ static_assert(sizeof(Header) == 16, "Wrong header size.");
+
+ CentersTableV0(Reader & reader, serial::CodingParams const & codingParams)
+ : m_reader(reader), m_codingParams(codingParams)
+ {
+ }
+
+ // CentersTable overrides:
+ bool Get(uint32_t id, m2::PointD & center) override
+ {
+ if (id >= m_ids.size() || !m_ids[id])
+ return false;
+ uint32_t const rank = static_cast<uint32_t>(m_ids.rank(id));
+ uint32_t const base = rank / kBlockSize;
+ uint32_t const offset = rank % kBlockSize;
+
+ auto & entry = m_cache[base];
+ if (entry.empty())
+ {
+ entry.resize(kBlockSize);
+
+ uint32_t const start = m_offsets.select(base);
+ uint32_t const end = base + 1 < m_offsets.num_ones()
+ ? m_offsets.select(base + 1)
+ : m_header.m_endOffset - m_header.m_deltasOffset;
+
+ vector<uint8_t> data(end - start);
+
+ m_reader.Read(m_header.m_deltasOffset + start, data.data(), data.size());
+
+ MemReader mreader(data.data(), data.size());
+ NonOwningReaderSource msource(mreader);
+
+ uint64_t delta = ReadVarUint<uint64_t>(msource);
+ entry[0] = DecodeDelta(delta, m_codingParams.GetBasePoint());
+
+ for (size_t i = 1; i < kBlockSize && msource.Size() > 0; ++i)
+ {
+ delta = ReadVarUint<uint64_t>(msource);
+ entry[i] = DecodeDelta(delta, entry[i - 1]);
+ }
+ }
+
+ center = PointU2PointD(entry[offset], m_codingParams.GetCoordBits());
+ return true;
+ }
+
+private:
+ // CentersTable overrides:
+ bool Init() override
+ {
+ m_header.Read(m_reader);
+
+ if (!m_header.IsValid())
+ return false;
+
+ bool const isHostBigEndian = IsBigEndian();
+ bool const isDataBigEndian = m_header.m_base.m_endianness == 1;
+ bool const endiannesMismatch = isHostBigEndian != isDataBigEndian;
+
+ {
+ uint32_t const idsSize = m_header.m_positionsOffset - sizeof(m_header);
+ vector<uint8_t> data(idsSize);
+ m_reader.Read(sizeof(m_header), data.data(), data.size());
+ m_idsRegion = make_unique<CopiedMemoryRegion>(move(data));
+ EndiannessAwareMap(endiannesMismatch, *m_idsRegion, m_ids);
+ }
+
+ {
+ uint32_t const offsetsSize = m_header.m_deltasOffset - m_header.m_positionsOffset;
+ vector<uint8_t> data(offsetsSize);
+ m_reader.Read(m_header.m_positionsOffset, data.data(), data.size());
+ m_offsetsRegion = make_unique<CopiedMemoryRegion>(move(data));
+ EndiannessAwareMap(endiannesMismatch, *m_offsetsRegion, m_offsets);
+ }
+
+ return true;
+ }
+
+private:
+ Header m_header;
+ Reader & m_reader;
+ serial::CodingParams const m_codingParams;
+
+ unique_ptr<CopiedMemoryRegion> m_idsRegion;
+ unique_ptr<CopiedMemoryRegion> m_offsetsRegion;
+
+ succinct::rs_bit_vector m_ids;
+ succinct::elias_fano m_offsets;
+
+ unordered_map<uint32_t, vector<m2::PointU>> m_cache;
+};
+} // namespace
+
+// CentersTable::Header ----------------------------------------------------------------------------
+void CentersTable::Header::Read(Reader & reader)
+{
+ NonOwningReaderSource source(reader);
+ m_version = ReadPrimitiveFromSource<uint16_t>(source);
+ m_endianness = ReadPrimitiveFromSource<uint16_t>(source);
+}
+
+void CentersTable::Header::Write(Writer & writer)
+{
+ WriteToSink(writer, m_version);
+ WriteToSink(writer, m_endianness);
+}
+
+bool CentersTable::Header::IsValid() const
+{
+ if (m_endianness > 1)
+ return false;
+ return true;
+}
+
+// CentersTable ------------------------------------------------------------------------------------
+unique_ptr<CentersTable> CentersTable::Load(Reader & reader,
+ serial::CodingParams const & codingParams)
+{
+ uint16_t const version = ReadPrimitiveFromPos<uint16_t>(reader, 0 /* pos */);
+ if (version != 0)
+ return unique_ptr<CentersTable>();
+
+ // Only single version of centers table is supported now. If you
+ // need to implement new versions of CentersTable, implement
+ // dispatching based on first-four-bytes version.
+ unique_ptr<CentersTable> table = make_unique<CentersTableV0>(reader, codingParams);
+ if (!table->Init())
+ return unique_ptr<CentersTable>();
+ return table;
+}
+
+// CentersTableBuilder -----------------------------------------------------------------------------
+CentersTableBuilder::CentersTableBuilder(Writer & writer, serial::CodingParams const & codingParams)
+ : m_writer(writer), m_codingParams(codingParams)
+{
+}
+
+CentersTableBuilder::~CentersTableBuilder()
+{
+ CentersTableV0::Header header;
+
+ int64_t const startOffset = m_writer.Pos();
+ header.Write(m_writer);
+
+ {
+ uint64_t const numBits = m_ids.empty() ? 0 : m_ids.back() + 1;
+
+ succinct::bit_vector_builder builder(numBits);
+ for (auto const & id : m_ids)
+ builder.set(id, true);
+
+ coding::FreezeVisitor<Writer> visitor(m_writer);
+ succinct::rs_bit_vector(&builder).map(visitor);
+ }
+
+ vector<uint32_t> offsets;
+ vector<uint8_t> deltas;
+
+ {
+ MemWriter<vector<uint8_t>> writer(deltas);
+ for (size_t i = 0; i < m_centers.size(); i += CentersTableV0::kBlockSize)
+ {
+ offsets.push_back(deltas.size());
+
+ uint64_t delta = EncodeDelta(m_centers[i], m_codingParams.GetBasePoint());
+ WriteVarUint(writer, delta);
+ for (size_t j = i + 1; j < i + CentersTableV0::kBlockSize && j < m_centers.size(); ++j)
+ {
+ delta = EncodeDelta(m_centers[j], m_centers[j - 1]);
+ WriteVarUint(writer, delta);
+ }
+ }
+ }
+
+ {
+ succinct::elias_fano::elias_fano_builder builder(offsets.empty() ? 0 : offsets.back() + 1,
+ offsets.size());
+ for (auto const & offset : offsets)
+ builder.push_back(offset);
+
+ header.m_positionsOffset = m_writer.Pos();
+ coding::FreezeVisitor<Writer> visitor(m_writer);
+ succinct::elias_fano(&builder).map(visitor);
+ }
+
+ {
+ header.m_deltasOffset = m_writer.Pos();
+ m_writer.Write(deltas.data(), deltas.size());
+ header.m_endOffset = m_writer.Pos();
+ }
+
+ int64_t const endOffset = m_writer.Pos();
+
+ m_writer.Seek(startOffset);
+ header.Write(m_writer);
+ m_writer.Seek(endOffset);
+}
+
+void CentersTableBuilder::Put(uint32_t featureId, m2::PointD const & center)
+{
+ if (!m_ids.empty())
+ CHECK_LESS(m_ids.back(), featureId, ());
+
+ m_centers.push_back(PointD2PointU(center, m_codingParams.GetCoordBits()));
+ m_ids.push_back(featureId);
+}
+} // namespace search
diff --git a/indexer/centers_table.hpp b/indexer/centers_table.hpp
new file mode 100644
index 0000000000..5d4be325ef
--- /dev/null
+++ b/indexer/centers_table.hpp
@@ -0,0 +1,80 @@
+#pragma once
+
+#include "geometry/point2d.hpp"
+
+#include "std/cstdint.hpp"
+#include "std/unique_ptr.hpp"
+#include "std/vector.hpp"
+
+class Reader;
+class Writer;
+
+namespace serial
+{
+class CodingParams;
+}
+
+namespace search
+{
+// A wrapper class around serialized centers-table.
+//
+// *NOTE* This wrapper is abstract enough so feel free to change it,
+// but note that there should always be backward-compatibility. Thus,
+// when adding new versions, never change data format of old versions.
+//
+// All centers tables are serialized in the following format:
+//
+// File offset (bytes) Field name Field size (bytes)
+// 0 version 2
+// 2 endianness 2
+//
+// Version and endianness is always in stored little-endian format. 0
+// value of endianness means little endian, whereas 1 means big
+// endian.
+class CentersTable
+{
+public:
+ struct Header
+ {
+ void Read(Reader & reader);
+ void Write(Writer & writer);
+ bool IsValid() const;
+
+ uint16_t m_version = 0;
+ uint16_t m_endianness = 0;
+ };
+
+ static_assert(sizeof(Header) == 4, "Wrong header size");
+
+ virtual ~CentersTable() = default;
+
+ // Tries to get |center| of the feature identified by |id|. Returns
+ // false if table does not have entry for the feature.
+ virtual bool Get(uint32_t id, m2::PointD & center) = 0;
+
+ // Loads CentersTable instance. Note that |reader| must be alive
+ // until the destruction of loaded table. Returns nullptr if
+ // CentersTable can't be loaded.
+ static unique_ptr<CentersTable> Load(Reader & reader, serial::CodingParams const & codingParams);
+
+private:
+ virtual bool Init() = 0;
+};
+
+class CentersTableBuilder
+{
+public:
+ CentersTableBuilder(Writer & writer, serial::CodingParams const & codingParams);
+
+ ~CentersTableBuilder();
+
+ void Put(uint32_t featureId, m2::PointD const & center);
+
+private:
+ Writer & m_writer;
+ serial::CodingParams const & m_codingParams;
+
+ vector<m2::PointU> m_centers;
+ vector<uint32_t> m_ids;
+};
+} // namespace search
diff --git a/indexer/indexer.pro b/indexer/indexer.pro
index ee3e3624ef..26a2499e36 100644
--- a/indexer/indexer.pro
+++ b/indexer/indexer.pro
@@ -13,6 +13,7 @@ SOURCES += \
categories_holder.cpp \
categories_holder_loader.cpp \
categories_index.cpp \
+ centers_table.cpp \
classificator.cpp \
classificator_loader.cpp \
coding_params.cpp \
@@ -64,6 +65,7 @@ HEADERS += \
categories_index.hpp \
cell_coverer.hpp \
cell_id.hpp \
+ centers_table.hpp \
classificator.hpp \
classificator_loader.hpp \
coding_params.hpp \
diff --git a/indexer/indexer_tests/centers_table_test.cpp b/indexer/indexer_tests/centers_table_test.cpp
new file mode 100644
index 0000000000..107b3d1e56
--- /dev/null
+++ b/indexer/indexer_tests/centers_table_test.cpp
@@ -0,0 +1,113 @@
+#include "testing/testing.hpp"
+
+#include "indexer/centers_table.hpp"
+#include "indexer/classificator_loader.hpp"
+#include "indexer/data_header.hpp"
+#include "indexer/feature_algo.hpp"
+#include "indexer/features_vector.hpp"
+
+#include "platform/platform.hpp"
+
+#include "coding/file_name_utils.hpp"
+#include "coding/reader.hpp"
+#include "coding/writer.hpp"
+
+#include "geometry/mercator.hpp"
+#include "geometry/point2d.hpp"
+
+#include "std/cstdint.hpp"
+#include "std/string.hpp"
+#include "std/utility.hpp"
+#include "std/vector.hpp"
+
+using namespace search;
+
+namespace
+{
+using TBuffer = vector<uint8_t>;
+
+struct CentersTableTest
+{
+ CentersTableTest() { classificator::Load(); }
+};
+
+UNIT_CLASS_TEST(CentersTableTest, Smoke)
+{
+ string const kMap = my::JoinFoldersToPath(GetPlatform().WritableDir(), "minsk-pass.mwm");
+
+ feature::DataHeader header(kMap);
+ auto const codingParams = header.GetDefCodingParams();
+
+ FeaturesVectorTest fv(kMap);
+
+ TBuffer buffer;
+
+ {
+ MemWriter<TBuffer> writer(buffer);
+ CentersTableBuilder builder(writer, codingParams);
+
+ fv.GetVector().ForEach(
+ [&](FeatureType & ft, uint32_t id) { builder.Put(id, feature::GetCenter(ft)); });
+ }
+
+ {
+ MemReader reader(buffer.data(), buffer.size());
+ auto table = CentersTable::Load(reader, codingParams);
+ TEST(table.get(), ());
+
+ fv.GetVector().ForEach([&](FeatureType & ft, uint32_t id) {
+ m2::PointD actual;
+ TEST(table->Get(id, actual), ());
+
+ m2::PointD expected = feature::GetCenter(ft);
+
+ TEST_LESS_OR_EQUAL(MercatorBounds::DistanceOnEarth(actual, expected), 1, (id));
+ });
+ }
+}
+
+UNIT_CLASS_TEST(CentersTableTest, Subset)
+{
+ vector<pair<uint32_t, m2::PointD>> const features = {
+ {1, m2::PointD(0, 0)}, {5, m2::PointD(1, 1)}, {10, m2::PointD(2, 2)}};
+
+ serial::CodingParams codingParams;
+
+ TBuffer buffer;
+ {
+ MemWriter<TBuffer> writer(buffer);
+ CentersTableBuilder builder(writer, codingParams);
+ for (auto const & feature : features)
+ builder.Put(feature.first, feature.second);
+ }
+
+ {
+ MemReader reader(buffer.data(), buffer.size());
+ auto table = CentersTable::Load(reader, codingParams);
+ TEST(table.get(), ());
+
+ size_t i = 0;
+ size_t j = 0;
+
+ while (i < 100)
+ {
+ ASSERT(j == features.size() || features[j].first >= i, ("Invariant violation"));
+
+ m2::PointD actual;
+ if (j != features.size() && i == features[j].first)
+ {
+ TEST(table->Get(i, actual), ());
+ TEST_LESS_OR_EQUAL(MercatorBounds::DistanceOnEarth(actual, features[j].second), 1, ());
+ }
+ else
+ {
+ TEST(!table->Get(i, actual), ());
+ }
+
+ ++i;
+ while (j != features.size() && features[j].first < i)
+ ++j;
+ }
+ }
+}
+} // namespace
diff --git a/indexer/indexer_tests/indexer_tests.pro b/indexer/indexer_tests/indexer_tests.pro
index 0f1090a2f0..4eaa571965 100644
--- a/indexer/indexer_tests/indexer_tests.pro
+++ b/indexer/indexer_tests/indexer_tests.pro
@@ -20,6 +20,7 @@ SOURCES += \
categories_test.cpp \
cell_coverer_test.cpp \
cell_id_test.cpp \
+ centers_table_test.cpp \
checker_test.cpp \
drules_selector_parser_test.cpp \
editable_map_object_test.cpp \
diff --git a/indexer/rank_table.cpp b/indexer/rank_table.cpp
index 3d9d6300fd..80c3e687cc 100644
--- a/indexer/rank_table.cpp
+++ b/indexer/rank_table.cpp
@@ -13,6 +13,7 @@
#include "coding/endianness.hpp"
#include "coding/file_container.hpp"
#include "coding/file_writer.hpp"
+#include "coding/memory_region.hpp"
#include "coding/reader.hpp"
#include "coding/simple_dense_coding.hpp"
#include "coding/succinct_mapper.hpp"
@@ -55,47 +56,6 @@ CheckResult CheckEndianness(TReader && reader)
return CheckResult::EndiannessMatch;
}
-class MemoryRegion
-{
-public:
- virtual ~MemoryRegion() = default;
-
- virtual uint64_t Size() const = 0;
- virtual uint8_t const * ImmutableData() const = 0;
-};
-
-class MappedMemoryRegion : public MemoryRegion
-{
-public:
- MappedMemoryRegion(FilesMappingContainer::Handle && handle) : m_handle(move(handle)) {}
-
- // MemoryRegion overrides:
- uint64_t Size() const override { return m_handle.GetSize(); }
- uint8_t const * ImmutableData() const override { return m_handle.GetData<uint8_t>(); }
-
-private:
- FilesMappingContainer::Handle m_handle;
-
- DISALLOW_COPY(MappedMemoryRegion);
-};
-
-class CopiedMemoryRegion : public MemoryRegion
-{
-public:
- CopiedMemoryRegion(vector<uint8_t> && buffer) : m_buffer(move(buffer)) {}
-
- // MemoryRegion overrides:
- uint64_t Size() const override { return m_buffer.size(); }
- uint8_t const * ImmutableData() const override { return m_buffer.data(); }
-
- inline uint8_t * MutableData() { return m_buffer.data(); }
-
-private:
- vector<uint8_t> m_buffer;
-
- DISALLOW_COPY(CopiedMemoryRegion);
-};
-
unique_ptr<CopiedMemoryRegion> GetMemoryRegionForTag(FilesContainerR const & rcont,
FilesContainerBase::Tag const & tag)
{