Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnatoly Serdtcev <serdtcev@maps.me>2019-01-31 19:32:25 +0300
committerAnatoly Serdtcev <serdtcev@maps.me>2019-01-31 19:32:25 +0300
commit0e8477ad8bd0f5d6f0ce63a0e766f32ccc2885c4 (patch)
tree40c92cdaa9eef443def5b42e906e23f9df26b7b9 /geocoder
parent07ef24680d16929bbd900d65f8658b74382997c0 (diff)
[geocoder] Fix for review
Diffstat (limited to 'geocoder')
-rw-r--r--geocoder/geocoder.cpp4
-rw-r--r--geocoder/geocoder_tests/geocoder_tests.cpp2
-rw-r--r--geocoder/hierarchy.hpp16
-rw-r--r--geocoder/hierarchy_reader.cpp23
-rw-r--r--geocoder/hierarchy_reader.hpp5
-rw-r--r--geocoder/index.cpp10
-rw-r--r--geocoder/index.hpp4
7 files changed, 37 insertions, 27 deletions
diff --git a/geocoder/geocoder.cpp b/geocoder/geocoder.cpp
index 7d5784e17c..266d68c923 100644
--- a/geocoder/geocoder.cpp
+++ b/geocoder/geocoder.cpp
@@ -2,10 +2,10 @@
#include "geocoder/hierarchy_reader.hpp"
-#include "indexer/search_string_utils.hpp"
-
#include "search/house_numbers_matcher.hpp"
+#include "indexer/search_string_utils.hpp"
+
#include "base/assert.hpp"
#include "base/logging.hpp"
#include "base/scope_guard.hpp"
diff --git a/geocoder/geocoder_tests/geocoder_tests.cpp b/geocoder/geocoder_tests/geocoder_tests.cpp
index 27a9d8a5ba..1f0329e4d5 100644
--- a/geocoder/geocoder_tests/geocoder_tests.cpp
+++ b/geocoder/geocoder_tests/geocoder_tests.cpp
@@ -164,7 +164,7 @@ UNIT_TEST(Geocoder_EmptyFileConcurrentRead)
UNIT_TEST(Geocoder_BigFileConcurrentRead)
{
- int const kEntryCount = 1000000;
+ int const kEntryCount = 100000;
stringstream s;
for (int i = 0; i < kEntryCount; ++i)
diff --git a/geocoder/hierarchy.hpp b/geocoder/hierarchy.hpp
index 89198bd7af..8579db75fb 100644
--- a/geocoder/hierarchy.hpp
+++ b/geocoder/hierarchy.hpp
@@ -20,31 +20,31 @@ public:
struct ParsingStats
{
// Number of entries that the hierarchy was constructed from.
- uint64_t m_numLoaded{0};
+ uint64_t m_numLoaded = 0;
// Number of corrupted json lines.
- uint64_t m_badJsons{0};
+ uint64_t m_badJsons = 0;
// Number of entries with unreadable base::GeoObjectIds.
- uint64_t m_badOsmIds{0};
+ uint64_t m_badOsmIds = 0;
// Number of base::GeoObjectsIds that occur as keys in at least two entries.
- uint64_t m_duplicateOsmIds{0};
+ uint64_t m_duplicateOsmIds = 0;
// Number of entries with duplicate subfields in the address field.
- uint64_t m_duplicateAddresses{0};
+ uint64_t m_duplicateAddresses = 0;
// Number of entries whose address field either does
// not exist or consists of empty lines.
- uint64_t m_emptyAddresses{0};
+ uint64_t m_emptyAddresses = 0;
// Number of entries without the name field or with an empty one.
- uint64_t m_emptyNames{0};
+ uint64_t m_emptyNames = 0;
// Number of entries whose names do not match the most
// specific parts of their addresses.
// This is expected from POIs but not from regions or streets.
- uint64_t m_mismatchedNames{0};
+ uint64_t m_mismatchedNames = 0;
};
// A single entry in the hierarchy directed acyclic graph.
diff --git a/geocoder/hierarchy_reader.cpp b/geocoder/hierarchy_reader.cpp
index f917d4bb6c..c86e60d490 100644
--- a/geocoder/hierarchy_reader.cpp
+++ b/geocoder/hierarchy_reader.cpp
@@ -14,7 +14,7 @@ namespace
// Information will be logged for every |kLogBatch| entries.
size_t const kLogBatch = 100000;
-void operator += (Hierarchy::ParsingStats & accumulator, Hierarchy::ParsingStats & stats)
+void operator+=(Hierarchy::ParsingStats & accumulator, Hierarchy::ParsingStats & stats)
{
struct ValidationStats
{
@@ -32,7 +32,6 @@ void operator += (Hierarchy::ParsingStats & accumulator, Hierarchy::ParsingStats
accumulator.m_emptyNames += stats.m_emptyNames;
accumulator.m_mismatchedNames += stats.m_mismatchedNames;
}
-
} // namespace
HierarchyReader::HierarchyReader(string const & pathToJsonHierarchy)
@@ -47,11 +46,13 @@ HierarchyReader::HierarchyReader(std::istream & in)
{
}
-Hierarchy HierarchyReader::Read(size_t readersCount)
+Hierarchy HierarchyReader::Read(unsigned int readersCount)
{
LOG(LINFO, ("Reading entries..."));
- readersCount = min(readersCount, size_t{thread::hardware_concurrency()});
+ if (auto hardwareConcurrency = thread::hardware_concurrency())
+ readersCount = min(hardwareConcurrency, readersCount);
+ readersCount = max(1U, readersCount);
vector<multimap<base::GeoObjectId, Entry>> taskEntries(readersCount);
vector<ParsingStats> tasksStats(readersCount);
@@ -109,7 +110,7 @@ vector<Hierarchy::Entry> HierarchyReader::MergeEntries(vector<multimap<base::Geo
auto partsQueue = priority_queue<PartReference, std::vector<PartReference>, ReferenceGreater>
(entryParts.begin(), entryParts.end());
- while (partsQueue.size())
+ while (!partsQueue.empty())
{
auto & minPart = partsQueue.top().get();
partsQueue.pop();
@@ -120,7 +121,7 @@ vector<Hierarchy::Entry> HierarchyReader::MergeEntries(vector<multimap<base::Geo
minPart.erase(minPart.begin());
}
- if (minPart.size())
+ if (!minPart.empty())
partsQueue.push(ref(minPart));
}
@@ -153,9 +154,9 @@ void HierarchyReader::ReadEntryMap(multimap<base::GeoObjectId, Entry> & entries,
// Temporary local object for efficient concurent processing (individual cache line for container).
auto localEntries = multimap<base::GeoObjectId, Entry>{};
- int const kLineBufferCapacity = 10000;
+ size_t const kLineBufferCapacity = 10000;
vector<string> linesBuffer(kLineBufferCapacity);
- int bufferSize = 0;
+ size_t bufferSize = 0;
while (true)
{
@@ -180,10 +181,10 @@ void HierarchyReader::ReadEntryMap(multimap<base::GeoObjectId, Entry> & entries,
entries = move(localEntries);
}
-void HierarchyReader::DeserializeEntryMap(vector<string> const & linesBuffer, int const bufferSize,
+void HierarchyReader::DeserializeEntryMap(vector<string> const & linesBuffer, size_t const bufferSize,
multimap<base::GeoObjectId, Entry> & entries, ParsingStats & stats)
{
- for (int i = 0; i < bufferSize; ++i)
+ for (size_t i = 0; i < bufferSize; ++i)
{
auto & line = linesBuffer[i];
@@ -220,4 +221,4 @@ void HierarchyReader::DeserializeEntryMap(vector<string> const & linesBuffer, in
entries.emplace(osmId, move(entry));
}
}
-} // namespace geocoder
+} // namespace geocoder
diff --git a/geocoder/hierarchy_reader.hpp b/geocoder/hierarchy_reader.hpp
index 48ae156764..54b38073a5 100644
--- a/geocoder/hierarchy_reader.hpp
+++ b/geocoder/hierarchy_reader.hpp
@@ -25,12 +25,13 @@ public:
explicit HierarchyReader(std::string const & pathToJsonHierarchy);
explicit HierarchyReader(std::istream & in);
- Hierarchy Read(size_t readersCount = 4);
+ // Read hierarchy file/stream concurrency in |readersCount| threads.
+ Hierarchy Read(unsigned int readersCount = 4);
private:
void ReadEntryMap(std::multimap<base::GeoObjectId, Entry> & entries, ParsingStats & stats);
- void DeserializeEntryMap(std::vector<std::string> const & linesBuffer, int const bufferSize,
+ void DeserializeEntryMap(std::vector<std::string> const & linesBuffer, std::size_t const bufferSize,
std::multimap<base::GeoObjectId, Entry> & entries, ParsingStats & stats);
std::vector<Entry> MergeEntries(std::vector<std::multimap<base::GeoObjectId, Entry>> & entryParts);
diff --git a/geocoder/index.cpp b/geocoder/index.cpp
index f11e991c0a..7117805040 100644
--- a/geocoder/index.cpp
+++ b/geocoder/index.cpp
@@ -23,8 +23,13 @@ size_t const kLogBatch = 100000;
namespace geocoder
{
-Index::Index(Hierarchy const & hierarchy) : m_docs(hierarchy.GetEntries())
+Index::Index(Hierarchy const & hierarchy, unsigned int processingThreadsCount)
+ : m_docs(hierarchy.GetEntries()), m_processingThreadsCount{processingThreadsCount}
{
+ if (auto hardwareConcurrency = thread::hardware_concurrency())
+ m_processingThreadsCount = min(hardwareConcurrency, m_processingThreadsCount);
+ m_processingThreadsCount = max(1U, m_processingThreadsCount);
+
LOG(LINFO, ("Indexing hierarchy entries..."));
AddEntries();
LOG(LINFO, ("Indexing houses..."));
@@ -94,7 +99,8 @@ void Index::AddHouses()
atomic<size_t> numIndexed{0};
std::mutex mutex;
- vector<thread> threads(thread::hardware_concurrency());
+ vector<thread> threads(m_processingThreadsCount);
+ CHECK_GREATER(threads.size(), 0, ());
for (size_t t = 0; t < threads.size(); ++t)
{
diff --git a/geocoder/index.hpp b/geocoder/index.hpp
index 8a56fb4271..a2c96dc1ca 100644
--- a/geocoder/index.hpp
+++ b/geocoder/index.hpp
@@ -20,7 +20,7 @@ public:
// that the index was constructed from.
using DocId = std::vector<Doc>::size_type;
- explicit Index(Hierarchy const & hierarchy);
+ explicit Index(Hierarchy const & hierarchy, unsigned int processingThreadsCount = 4);
Doc const & GetDoc(DocId const id) const;
@@ -74,5 +74,7 @@ private:
// Lists of houses grouped by the streets they belong to.
std::unordered_map<DocId, std::vector<DocId>> m_buildingsOnStreet;
+
+ unsigned int m_processingThreadsCount;
};
} // namespace geocoder