Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Yershov <yershov@corp.mail.ru>2015-08-24 13:46:05 +0300
committerAlex Zolotarev <alex@maps.me>2015-09-23 03:03:14 +0300
commit7fcd6ec8c4259a010433d72fce5bb299d24b8381 (patch)
treec4f90dbe82c5cc8b439de1c2427c0fefccfb0ab4 /generator
parent8a05999c2ed155f6deb5bb1570e2c455145b7788 (diff)
Added possibility load all intermediate data in memory
Diffstat (limited to 'generator')
-rw-r--r--generator/data_cache_file.hpp106
-rw-r--r--generator/generator_tests/intermediate_data_test.cpp135
-rw-r--r--generator/osm_decl.hpp126
-rw-r--r--generator/osm_source.cpp4
4 files changed, 332 insertions, 39 deletions
diff --git a/generator/data_cache_file.hpp b/generator/data_cache_file.hpp
index de57bc698a..3e2be4b731 100644
--- a/generator/data_cache_file.hpp
+++ b/generator/data_cache_file.hpp
@@ -9,6 +9,7 @@
#include "base/logging.hpp"
#include "std/algorithm.hpp"
+#include "std/deque.hpp"
#include "std/exception.hpp"
#include "std/limits.hpp"
#include "std/utility.hpp"
@@ -35,6 +36,7 @@ class IndexFile
TContainer m_elements;
TFile m_file;
+ uint64_t m_fileSize = 0;
static size_t constexpr kFlushCount = 1024;
@@ -71,23 +73,23 @@ public:
void ReadAll()
{
m_elements.clear();
- uint64_t const fileSize = m_file.Size();
- if (fileSize == 0)
+ m_fileSize = m_file.Size();
+ if (m_fileSize == 0)
return;
LOG_SHORT(LINFO, ("Offsets reading is started for file ", GetFileName()));
- CHECK_EQUAL(0, fileSize % sizeof(TElement), ("Damaged file."));
+ CHECK_EQUAL(0, m_fileSize % sizeof(TElement), ("Damaged file."));
try
{
- m_elements.resize(CheckedCast(fileSize / sizeof(TElement)));
+ m_elements.resize(CheckedCast(m_fileSize / sizeof(TElement)));
}
catch (exception const &) // bad_alloc
{
LOG(LCRITICAL, ("Insufficient memory for required offset map"));
}
- m_file.Read(0, &m_elements[0], CheckedCast(fileSize));
+ m_file.Read(0, &m_elements[0], CheckedCast(m_fileSize));
sort(m_elements.begin(), m_elements.end(), ElementComparator());
@@ -102,12 +104,12 @@ public:
m_elements.push_back(make_pair(k, v));
}
- bool GetValueByKey(TKey k, TValue & v) const
+ bool GetValueByKey(TKey key, TValue & value) const
{
- auto it = lower_bound(m_elements.begin(), m_elements.end(), k, ElementComparator());
- if ((it != m_elements.end()) && ((*it).first == k))
+ auto it = lower_bound(m_elements.begin(), m_elements.end(), key, ElementComparator());
+ if ((it != m_elements.end()) && ((*it).first == key))
{
- v = (*it).second;
+ value = (*it).second;
return true;
}
return false;
@@ -132,41 +134,95 @@ class OSMElementCache
{
public:
using TKey = uint64_t;
- using TStream = typename conditional<TMode == EMode::Write, FileWriterStream, FileReaderStream>::type;
+ using TStorage = typename conditional<TMode == EMode::Write, FileWriter, FileReader>::type;
using TOffsetFile = typename conditional<TMode == EMode::Write, FileWriter, FileReader>::type;
protected:
- TStream m_stream;
+ using TBuffer = vector<uint8_t>;
+ TStorage m_storage;
detail::IndexFile<TOffsetFile, uint64_t> m_offsets;
string m_name;
+ TBuffer m_data;
+ bool m_preload = false;
public:
- OSMElementCache(string const & name) : m_stream(name), m_offsets(name + OFFSET_EXT), m_name(name) {}
+ OSMElementCache(string const & name, bool preload = false)
+ : m_storage(name)
+ , m_offsets(name + OFFSET_EXT)
+ , m_name(name)
+ , m_preload(preload)
+ {
+ InitStorage<TMode>();
+ }
- template <class TValue>
- void Write(TKey id, TValue const & value)
+ template <EMode T>
+ typename enable_if<T == EMode::Write, void>::type InitStorage() {}
+
+ template <EMode T>
+ typename enable_if<T == EMode::Read, void>::type InitStorage()
{
- m_offsets.Add(id, m_stream.Pos());
- value.Write(m_stream);
- std::ofstream ff((m_name+".wlog").c_str(), std::ios::binary | std::ios::app);
- ff << id << " " << value.ToString() << std::endl;
+ if (m_preload)
+ {
+ size_t sz = m_storage.Size();
+ m_data.resize(sz);
+ m_storage.Read(0, m_data.data(), sz);
+ }
}
- template <class TValue>
- bool Read(TKey id, TValue & value)
+ template <class TValue, EMode T = TMode>
+ typename enable_if<T == EMode::Write, void>::type Write(TKey id, TValue const & value)
+ {
+ m_offsets.Add(id, m_storage.Pos());
+ m_data.clear();
+ MemWriter<TBuffer> w(m_data);
+
+ value.Write(w);
+
+ // write buffer
+ ASSERT_LESS(m_data.size(), numeric_limits<uint32_t>::max(), ());
+ uint32_t sz = static_cast<uint32_t>(m_data.size());
+ m_storage.Write(&sz, sizeof(sz));
+ m_storage.Write(m_data.data(), sz * sizeof(TBuffer::value_type));
+
+// std::ofstream ff((m_name+".wlog").c_str(), std::ios::binary | std::ios::app);
+// ff << id << " " << value.ToString() << std::endl;
+// if (id == 1942060)
+// ff << id << " " << value.Dump() << std::endl;
+ }
+
+ template <class TValue, EMode T = TMode>
+ typename enable_if<T == EMode::Read, bool>::type Read(TKey id, TValue & value)
{
uint64_t pos;
if (m_offsets.GetValueByKey(id, pos))
{
- m_stream.Seek(pos);
- value.Read(m_stream);
- std::ofstream ff((m_name+".rlog").c_str(), std::ios::binary | std::ios::app);
- ff << id << " " << value.ToString() << std::endl;
+ uint32_t valueSize = m_preload ? *((uint32_t *)(m_data.data() + pos)) : 0;
+ size_t offset = pos + sizeof(uint32_t);
+
+ if (!m_preload)
+ {
+ // in case not in memory work we read buffer
+ m_storage.Read(pos, &valueSize, sizeof(valueSize));
+ m_data.resize(valueSize);
+ m_storage.Read(pos + sizeof(valueSize), m_data.data(), valueSize);
+ offset = 0;
+ }
+
+ // prepare correct reader
+ MemReader reader(m_data.data() + offset, valueSize);
+
+ value.Read(reader);
+
+
+// std::ofstream ff((m_name+".rlog").c_str(), std::ios::binary | std::ios::app);
+// ff << id << " " << value.ToString() << std::endl;
+// if (id == 1942060)
+// ff << id << " " << value.Dump() << std::endl;
return true;
}
else
{
- LOG_SHORT(LWARNING, ("Can't find offset in file ", m_offsets.GetFileName(), " by id ", id));
+ LOG_SHORT(LWARNING, ("Can't find offset in file", m_offsets.GetFileName(), "by id", id));
return false;
}
}
diff --git a/generator/generator_tests/intermediate_data_test.cpp b/generator/generator_tests/intermediate_data_test.cpp
new file mode 100644
index 0000000000..41a597c270
--- /dev/null
+++ b/generator/generator_tests/intermediate_data_test.cpp
@@ -0,0 +1,135 @@
+//
+// intermediate_data_test.cpp
+// generator_tool
+//
+// Created by Sergey Yershov on 20.08.15.
+// Copyright (c) 2015 maps.me. All rights reserved.
+//
+
+#include "testing/testing.hpp"
+
+#include "generator/osm_decl.hpp"
+
+
+UNIT_TEST(Intermediate_Data_empty_way_element_save_load_test)
+{
+ WayElement e1(1 /* fake osm id */);
+
+ using TBuffer = vector<uint8_t>;
+ TBuffer buffer;
+ MemWriter<TBuffer> w(buffer);
+
+ e1.Write(w);
+
+ MemReader r(buffer.data(), buffer.size());
+
+ WayElement e2(1 /* fake osm id */);
+
+ e2.Read(r);
+
+ TEST_EQUAL(e2.nodes.size(), 0, ());
+}
+
+
+UNIT_TEST(Intermediate_Data_way_element_save_load_test)
+{
+ WayElement e1(1 /* fake osm id */);
+
+ e1.nodes.push_back(0);
+ e1.nodes.push_back(1);
+ e1.nodes.push_back(2);
+ e1.nodes.push_back(3);
+ e1.nodes.push_back(0xFFFFFFFF);
+ e1.nodes.push_back(0xFFFFFFFFFFFFFFFF);
+
+ using TBuffer = vector<uint8_t>;
+ TBuffer buffer;
+ MemWriter<TBuffer> w(buffer);
+
+ e1.Write(w);
+
+ MemReader r(buffer.data(), buffer.size());
+
+ WayElement e2(1 /* fake osm id */);
+
+ e2.Read(r);
+
+ TEST_EQUAL(e2.nodes.size(), 6, ());
+ TEST_EQUAL(e2.nodes[0], 0, ());
+ TEST_EQUAL(e2.nodes[1], 1, ());
+ TEST_EQUAL(e2.nodes[2], 2, ());
+ TEST_EQUAL(e2.nodes[3], 3, ());
+ TEST_EQUAL(e2.nodes[4], 0xFFFFFFFF, ());
+ TEST_EQUAL(e2.nodes[5], 0xFFFFFFFFFFFFFFFF, ());
+}
+
+UNIT_TEST(Intermediate_Data_relation_element_save_load_test)
+{
+ RelationElement e1;
+
+ e1.nodes.emplace_back(1, "inner");
+ e1.nodes.emplace_back(2, "outer");
+ e1.nodes.emplace_back(3, "unknown");
+ e1.nodes.emplace_back(4, "inner role");
+
+ e1.ways.emplace_back(1, "inner");
+ e1.ways.emplace_back(2, "outer");
+ e1.ways.emplace_back(3, "unknown");
+ e1.ways.emplace_back(4, "inner role");
+
+ e1.tags.emplace("key1","value1");
+ e1.tags.emplace("key2","value2");
+ e1.tags.emplace("key3","value3");
+ e1.tags.emplace("key4","value4");
+
+ using TBuffer = vector<uint8_t>;
+ TBuffer buffer;
+ MemWriter<TBuffer> w(buffer);
+
+ e1.Write(w);
+
+ MemReader r(buffer.data(), buffer.size());
+
+ RelationElement e2;
+
+ e2.nodes.emplace_back(30, "000unknown");
+ e2.nodes.emplace_back(40, "000inner role");
+ e2.ways.emplace_back(10, "000inner");
+ e2.ways.emplace_back(20, "000outer");
+ e2.tags.emplace("key1old","value1old");
+ e2.tags.emplace("key2old","value2old");
+
+ e2.Read(r);
+
+ TEST_EQUAL(e2.nodes.size(), 4, ());
+ TEST_EQUAL(e2.ways.size(), 4, ());
+ TEST_EQUAL(e2.tags.size(), 4, ());
+
+ TEST_EQUAL(e2.nodes[0].first, 1, ());
+ TEST_EQUAL(e2.nodes[1].first, 2, ());
+ TEST_EQUAL(e2.nodes[2].first, 3, ());
+ TEST_EQUAL(e2.nodes[3].first, 4, ());
+
+ TEST_EQUAL(e2.nodes[0].second, "inner", ());
+ TEST_EQUAL(e2.nodes[1].second, "outer", ());
+ TEST_EQUAL(e2.nodes[2].second, "unknown", ());
+ TEST_EQUAL(e2.nodes[3].second, "inner role", ());
+
+ TEST_EQUAL(e2.ways[0].first, 1, ());
+ TEST_EQUAL(e2.ways[1].first, 2, ());
+ TEST_EQUAL(e2.ways[2].first, 3, ());
+ TEST_EQUAL(e2.ways[3].first, 4, ());
+
+ TEST_EQUAL(e2.ways[0].second, "inner", ());
+ TEST_EQUAL(e2.ways[1].second, "outer", ());
+ TEST_EQUAL(e2.ways[2].second, "unknown", ());
+ TEST_EQUAL(e2.ways[3].second, "inner role", ());
+
+ TEST_EQUAL(e2.tags["key1"], "value1", ());
+ TEST_EQUAL(e2.tags["key2"], "value2", ());
+ TEST_EQUAL(e2.tags["key3"], "value3", ());
+ TEST_EQUAL(e2.tags["key4"], "value4", ());
+
+ TEST_NOT_EQUAL(e2.tags["key1old"], "value1old", ());
+ TEST_NOT_EQUAL(e2.tags["key2old"], "value2old", ());
+}
diff --git a/generator/osm_decl.hpp b/generator/osm_decl.hpp
index 561c35b5e9..f0ebf78c0a 100644
--- a/generator/osm_decl.hpp
+++ b/generator/osm_decl.hpp
@@ -3,8 +3,13 @@
#include "base/assert.hpp"
#include "base/std_serialization.hpp"
+#include "coding/reader.hpp"
+#include "coding/varint.hpp"
+#include "coding/writer.hpp"
+
#include "std/algorithm.hpp"
#include "std/bind.hpp"
+#include "std/limits.hpp"
#include "std/string.hpp"
#include "std/utility.hpp"
#include "std/vector.hpp"
@@ -49,16 +54,23 @@ struct WayElement
for_each(nodes.rbegin(), nodes.rend(), ref(toDo));
}
- template <class TArchive>
- void Write(TArchive & ar) const
+ template <class TWriter>
+ void Write(TWriter & writer) const
{
- ar << nodes;
+ uint64_t count = nodes.size();
+ WriteVarUint(writer, count);
+ for (uint64_t e : nodes)
+ WriteVarUint(writer, e);
}
- template <class TArchive>
- void Read(TArchive & ar)
+ template <class TReader>
+ void Read(TReader & reader)
{
- ar >> nodes;
+ ReaderSource<MemReader> r(reader);
+ uint64_t count = ReadVarUint<uint64_t>(r);
+ nodes.resize(count);
+ for (uint64_t & e : nodes)
+ e = ReadVarUint<uint64_t>(r);
}
string ToString() const
@@ -67,6 +79,14 @@ struct WayElement
ss << nodes.size() << " " << m_wayOsmId;
return ss.str();
}
+
+ string Dump() const
+ {
+ stringstream ss;
+ for (auto const & e : nodes)
+ ss << e << ";";
+ return ss.str();
+ }
};
class RelationElement
@@ -104,16 +124,86 @@ public:
tags.swap(rhs.tags);
}
- template <class TArchive>
- void Write(TArchive & ar) const
+ template <class TWriter>
+ void Write(TWriter & writer) const
{
- ar << nodes << ways << tags;
+ auto StringWriter = [&writer, this](string const & str)
+ {
+ CHECK_LESS(str.size(), numeric_limits<uint16_t>::max(),
+ ("Can't store string greater then 65535 bytes", Dump()));
+ uint16_t sz = static_cast<uint16_t>(str.size());
+ writer.Write(&sz, sizeof(sz));
+ writer.Write(str.data(), sz);
+ };
+
+ auto MembersWriter = [&writer, &StringWriter](TMembers const & members)
+ {
+ uint64_t count = members.size();
+ WriteVarUint(writer, count);
+ for (auto const & e : members)
+ {
+ // write id
+ WriteVarUint(writer, e.first);
+ // write role
+ StringWriter(e.second);
+ }
+ };
+
+ MembersWriter(nodes);
+ MembersWriter(ways);
+
+ uint64_t count = tags.size();
+ WriteVarUint(writer, count);
+ for (auto const & e : tags)
+ {
+ // write key
+ StringWriter(e.first);
+ // write value
+ StringWriter(e.second);
+ }
}
- template <class TArchive>
- void Read(TArchive & ar)
+ template <class TReader>
+ void Read(TReader & reader)
{
- ar >> nodes >> ways >> tags;
+ ReaderSource<TReader> r(reader);
+
+ auto StringReader = [&r](string & str)
+ {
+ uint16_t sz = 0;
+ r.Read(&sz, sizeof(sz));
+ str.resize(sz);
+ r.Read(&str[0], sz);
+ };
+
+ auto MembersReader = [&r, &StringReader](TMembers & members)
+ {
+ uint64_t count = ReadVarUint<uint64_t>(r);
+ members.resize(count);
+ for (auto & e : members)
+ {
+ // decode id
+ e.first = ReadVarUint<uint64_t>(r);
+ // decode role
+ StringReader(e.second);
+ }
+ };
+
+ MembersReader(nodes);
+ MembersReader(ways);
+
+ // decode tags
+ tags.clear();
+ uint64_t count = ReadVarUint<uint64_t>(r);
+ for (uint64_t i = 0; i < count; ++i)
+ {
+ pair<string, string> kv;
+ // decode key
+ StringReader(kv.first);
+ // decode value
+ StringReader(kv.second);
+ tags.emplace(kv);
+ }
}
string ToString() const
@@ -123,6 +213,18 @@ public:
return ss.str();
}
+ string Dump() const
+ {
+ stringstream ss;
+ for (auto const & e : nodes)
+ ss << "n{" << e.first << "," << e.second << "};";
+ for (auto const & e : ways)
+ ss << "w{" << e.first << "," << e.second << "};";
+ for (auto const & e : tags)
+ ss << "t{" << e.first << "," << e.second << "};";
+ return ss.str();
+ }
+
protected:
bool FindRoleImpl(TMembers const & container, uint64_t id, string & role) const
{
diff --git a/generator/osm_source.cpp b/generator/osm_source.cpp
index 207ef7387e..d57abebac6 100644
--- a/generator/osm_source.cpp
+++ b/generator/osm_source.cpp
@@ -111,8 +111,8 @@ class IntermediateData
public:
IntermediateData(TNodesHolder & nodes, string const & dir)
: m_nodes(nodes)
- , m_ways(my::JoinFoldersToPath(dir, WAYS_FILE))
- , m_relations(my::JoinFoldersToPath(dir, RELATIONS_FILE))
+ , m_ways(my::JoinFoldersToPath(dir, WAYS_FILE), true)
+ , m_relations(my::JoinFoldersToPath(dir, RELATIONS_FILE), true)
, m_nodeToRelations(my::JoinFoldersToPath(dir, string(NODES_FILE) + ID2REL_EXT))
, m_wayToRelations(my::JoinFoldersToPath(dir, string(WAYS_FILE) + ID2REL_EXT))
{