Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladiMihaylenko <vxmihaylenko@gmail.com>2018-09-28 17:36:37 +0300
committerVladimir Byko-Ianko <bykoianko@gmail.com>2018-10-03 14:41:03 +0300
commitc2cf27c33fe1781a71ed009e9cf57e760490620c (patch)
tree2734cee91090a9b6def485960e1e456419fa8cf3 /track_analyzing
parent424037a4bb0513ea4a8ef82cf929dec6877a7023 (diff)
Review fixes and parallel matching
Diffstat (limited to 'track_analyzing')
-rw-r--r--track_analyzing/track_analyzer/cmd_match.cpp115
-rw-r--r--track_analyzing/track_analyzer/cmd_table.cpp139
-rw-r--r--track_analyzing/track_analyzer/track_analyzer.cpp9
-rw-r--r--track_analyzing/utils.hpp5
4 files changed, 217 insertions, 51 deletions
diff --git a/track_analyzing/track_analyzer/cmd_match.cpp b/track_analyzing/track_analyzer/cmd_match.cpp
index cdb6ef1fef..2b0a2de3c7 100644
--- a/track_analyzing/track_analyzer/cmd_match.cpp
+++ b/track_analyzing/track_analyzer/cmd_match.cpp
@@ -8,20 +8,31 @@
#include "storage/storage.hpp"
+#include "coding/file_name_utils.hpp"
+#include "coding/file_reader.hpp"
+#include "coding/file_writer.hpp"
+#include "coding/zlib.hpp"
+
#include "platform/platform.hpp"
+#include "base/assert.hpp"
#include "base/logging.hpp"
#include "base/timer.hpp"
+#include <algorithm>
#include <memory>
#include <string>
+#include <thread>
using namespace routing;
using namespace std;
+using namespace storage;
using namespace track_analyzing;
namespace
{
+using Iter = typename vector<string>::iterator;
+
void MatchTracks(MwmToTracks const & mwmToTracks, storage::Storage const & storage,
NumMwmIds const & numMwmIds, MwmToMatchedTracks & mwmToMatchedTracks)
{
@@ -78,11 +89,8 @@ void MatchTracks(MwmToTracks const & mwmToTracks, storage::Storage const & stora
namespace track_analyzing
{
-void CmdMatch(string const & logFile, string const & trackFile)
+void CmdMatch(string const & logFile, string const & trackFile, shared_ptr<NumMwmIds> numMwmIds, Storage & storage)
{
- LOG(LINFO, ("Matching", logFile));
- shared_ptr<NumMwmIds> numMwmIds;
- storage::Storage storage;
MwmToTracks mwmToTracks;
ParseTracks(logFile, numMwmIds, storage, mwmToTracks);
@@ -94,4 +102,103 @@ void CmdMatch(string const & logFile, string const & trackFile)
serializer.Serialize(mwmToMatchedTracks, writer);
LOG(LINFO, ("Matched tracks were saved to", trackFile));
}
+
+void CmdMatch(string const & logFile, string const & trackFile)
+{
+ LOG(LINFO, ("Matching", logFile));
+ shared_ptr<NumMwmIds> numMwmIds;
+ Storage storage;
+ CmdMatch(logFile, trackFile, numMwmIds, storage);
+}
+
+void UnzipAndMatch(Iter begin, Iter end, string const & trackExt, shared_ptr<NumMwmIds> numMwmIds)
+{
+ Storage storage;
+ for (auto it = begin; it != end; ++it)
+ {
+ auto & file = *it;
+ string data;
+ try
+ {
+ auto const r = GetPlatform().GetReader(file);
+ r->ReadAsString(data);
+ }
+ catch (FileReader::ReadException const & e)
+ {
+ LOG(LWARNING, (e.what()));
+ continue;
+ }
+
+ using Inflate = coding::ZLib::Inflate;
+ Inflate inflate(Inflate::Format::GZip);
+ string track;
+ inflate(data.data(), data.size(), back_inserter(track));
+ base::GetNameWithoutExt(file);
+ try
+ {
+ FileWriter w(file);
+ w.Write(track.data(), track.size());
+ }
+ catch (FileWriter::WriteException const & e)
+ {
+ LOG(LWARNING, (e.what()));
+ continue;
+ }
+
+ CmdMatch(file, file + trackExt, numMwmIds, storage);
+ FileWriter::DeleteFileX(file);
+ }
+}
+
+void CmdMatchDir(string const & logDir, string const & trackExt)
+{
+ Platform::EFileType fileType = Platform::FILE_TYPE_UNKNOWN;
+ Platform::EError const result = Platform::GetFileType(logDir, fileType);
+
+ if (result == Platform::ERR_FILE_DOES_NOT_EXIST)
+ {
+ LOG(LINFO, ("Directory doesn't exist", logDir));
+ return;
+ }
+
+ if (result != Platform::ERR_OK)
+ {
+ LOG(LINFO, ("Can't get file type for", logDir));
+ return;
+ }
+
+ if (fileType != Platform::FILE_TYPE_DIRECTORY)
+ {
+ LOG(LINFO, (logDir, "is not a directory."));
+ return;
+ }
+
+ Platform::FilesList filesList;
+ Platform::GetFilesRecursively(logDir, filesList);
+ if (filesList.empty())
+ {
+ LOG(LINFO, (logDir, "is empty."));
+ return;
+ }
+
+ shared_ptr<NumMwmIds> numMwmIds;
+ auto const size = filesList.size();
+ auto const hardwareConcurrency = static_cast<size_t>(thread::hardware_concurrency());
+ CHECK_GREATER(hardwareConcurrency, 0, ("No available threads."));
+ LOG(LINFO, ("Number of available threads =", hardwareConcurrency));
+ auto const threadsCount = min(size, hardwareConcurrency);
+ auto const blockSize = size / threadsCount;
+ vector<thread> threads(threadsCount - 1);
+ auto begin = filesList.begin();
+ for (size_t i = 0; i < threadsCount - 1; ++i)
+ {
+ auto end = begin + blockSize;
+ threads[i] = thread(UnzipAndMatch, begin, end, trackExt, numMwmIds);
+ begin = end;
+ }
+
+ UnzipAndMatch(begin, filesList.end(), trackExt, numMwmIds);
+ for (auto & t : threads)
+ t.join();
+}
} // namespace track_analyzing
diff --git a/track_analyzing/track_analyzer/cmd_table.cpp b/track_analyzing/track_analyzer/cmd_table.cpp
index 8f94005867..9da281dcd2 100644
--- a/track_analyzing/track_analyzer/cmd_table.cpp
+++ b/track_analyzing/track_analyzer/cmd_table.cpp
@@ -20,10 +20,18 @@
#include "coding/file_name_utils.hpp"
#include "coding/file_reader.hpp"
+#include "base/assert.hpp"
#include "base/sunrise_sunset.hpp"
#include "base/timer.hpp"
+#include <cstdint>
#include <iostream>
+#include <limits>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
#include "defines.hpp"
@@ -68,11 +76,29 @@ public:
m_hwtags.push_back(classif().GetTypeByPath(speedForType.m_types));
for (auto const & surface : CarModel::GetSurfaces())
- m_surfaceTags.push_back(classif().GetTypeByPath(vector<string>(surface.m_types, surface.m_types + 2)));
+ m_surfaceTags.push_back(classif().GetTypeByPath(surface.m_types));
}
struct Type
{
+ bool operator<(Type const & rhs) const
+ {
+ if (m_hwType != rhs.m_hwType)
+ return m_hwType < rhs.m_hwType;
+
+ return m_surfaceType < rhs.m_surfaceType;
+ }
+
+ bool operator==(Type const & rhs) const
+ {
+ return m_hwType == rhs.m_hwType && m_surfaceType == rhs.m_surfaceType;
+ }
+
+ bool operator!=(Type const & rhs) const
+ {
+ return !(*this == rhs);
+ }
+
uint32_t m_hwType = 0;
uint32_t m_surfaceType = 0;
};
@@ -109,6 +135,27 @@ private:
struct RoadInfo
{
+ bool operator==(RoadInfo const & rhs) const
+ {
+ return m_type == rhs.m_type && m_isCityRoad == rhs.m_isCityRoad && m_isOneWay == rhs.m_isOneWay;
+ }
+
+ bool operator!=(RoadInfo const & rhs) const
+ {
+ return !(*this == rhs);
+ }
+
+ bool operator<(RoadInfo const & rhs) const
+ {
+ if (m_type != rhs.m_type)
+ return m_type < rhs.m_type;
+
+ if (m_isCityRoad != rhs.m_isCityRoad)
+ return !m_isCityRoad;
+
+ return !m_isOneWay;
+ }
+
CarModelTypes::Type m_type;
bool m_isCityRoad = false;
bool m_isOneWay = false;
@@ -119,37 +166,43 @@ class MoveType final
public:
MoveType() = default;
- MoveType(RoadInfo const & roadType, traffic::SpeedGroup speedGroup)
- : m_roadInfo(roadType), m_speedGroup(speedGroup)
+ MoveType(RoadInfo const & roadType, traffic::SpeedGroup speedGroup, bool isDayTime)
+ : m_roadInfo(roadType), m_speedGroup(speedGroup), m_isDayTime(isDayTime)
{
}
bool operator==(MoveType const & rhs) const
{
- return m_roadInfo.m_type.m_hwType == rhs.m_roadInfo.m_type.m_hwType &&
- m_roadInfo.m_type.m_surfaceType == rhs.m_roadInfo.m_type.m_surfaceType &&
- m_speedGroup == rhs.m_speedGroup;
+ return m_roadInfo == rhs.m_roadInfo && m_speedGroup == rhs.m_speedGroup && m_isDayTime == rhs.m_isDayTime;
}
bool operator<(MoveType const & rhs) const
{
- if (m_roadInfo.m_type.m_hwType != rhs.m_roadInfo.m_type.m_hwType)
- return m_roadInfo.m_type.m_hwType < rhs.m_roadInfo.m_type.m_hwType;
+ if (m_roadInfo != rhs.m_roadInfo)
+ return m_roadInfo < rhs.m_roadInfo;
- if (m_roadInfo.m_type.m_surfaceType != rhs.m_roadInfo.m_type.m_surfaceType)
- return m_roadInfo.m_type.m_surfaceType < rhs.m_roadInfo.m_type.m_surfaceType;
+ if (m_speedGroup != rhs.m_speedGroup)
+ return m_speedGroup < rhs.m_speedGroup;
- return m_speedGroup < rhs.m_speedGroup;
+ return !m_isDayTime;
}
- string ToString() const
+ bool IsValid() const
+ {
+ // In order to collect cleaner data we don't use speed group lower than G5.
+ return m_roadInfo.m_type.m_hwType != 0 &&
+ m_roadInfo.m_type.m_surfaceType != 0 &&
+ m_speedGroup == traffic::SpeedGroup::G5;
+ }
+
+ string GetSummary() const
{
ostringstream out;
- out << TypeToString(m_roadInfo.m_type.m_hwType) << " " <<
- TypeToString(m_roadInfo.m_type.m_surfaceType) << " " <<
- m_roadInfo.m_isCityRoad << " " <<
- m_roadInfo.m_isOneWay << " " <<
- traffic::DebugPrint(m_speedGroup);
+ out << TypeToString(m_roadInfo.m_type.m_hwType) << ","
+ << TypeToString(m_roadInfo.m_type.m_surfaceType) << ","
+ << m_roadInfo.m_isCityRoad << ","
+ << m_roadInfo.m_isOneWay << ","
+ << m_isDayTime;
return out.str();
}
@@ -157,6 +210,7 @@ public:
private:
RoadInfo m_roadInfo;
traffic::SpeedGroup m_speedGroup = traffic::SpeedGroup::Unknown;
+ bool m_isDayTime;
};
class SpeedInfo final
@@ -170,8 +224,12 @@ public:
void Add(SpeedInfo const & rhs) { Add(rhs.m_totalDistance, rhs.m_totalTime); }
- double GetDistance() const { return m_totalDistance; }
- uint64_t GetTime() const { return m_totalTime; }
+ string GetSummary() const
+ {
+ ostringstream out;
+ out << m_totalDistance << "," << m_totalTime << "," << CalcSpeedKMpH(m_totalDistance, m_totalTime);
+ return out.str();
+ }
private:
double m_totalDistance = 0.0;
@@ -181,7 +239,9 @@ private:
class MoveTypeAggregator final
{
public:
- void Add(MoveType const moveType, MatchedTrack::const_iterator begin,
+ MoveTypeAggregator(string const & mwmName) : m_mwmName(mwmName) {}
+
+ void Add(MoveType && moveType, MatchedTrack::const_iterator begin,
MatchedTrack::const_iterator end, Geometry & geometry)
{
if (begin + 1 >= end)
@@ -201,18 +261,12 @@ public:
string GetSummary() const
{
ostringstream out;
- out << std::fixed << std::setprecision(1);
-
- bool firstIteration = true;
- for (auto it : m_moveInfos)
+ for (auto const & it : m_moveInfos)
{
- SpeedInfo const & speedInfo = it.second;
- if (firstIteration)
- firstIteration = false;
- else
- out << " * ";
+ if (!it.first.IsValid())
+ continue;
- out << it.first.ToString() << " " << speedInfo.GetDistance() << " " << speedInfo.GetTime();
+ out << m_mwmName << "," << it.first.GetSummary() << "," << it.second.GetSummary() << '\n';
}
return out.str();
@@ -220,6 +274,7 @@ public:
private:
map<MoveType, SpeedInfo> m_moveInfos;
+ string const & m_mwmName;
};
class MatchedTrackPointToMoveType final
@@ -234,8 +289,10 @@ public:
MoveType GetMoveType(MatchedTrackPoint const & point)
{
+ auto const & dataPoint = point.GetDataPoint();
return MoveType(GetRoadInfo(point.GetSegment().GetFeatureId()),
- static_cast<traffic::SpeedGroup>(point.GetDataPoint().m_traffic));
+ static_cast<traffic::SpeedGroup>(dataPoint.m_traffic),
+ DayTimeToBool(GetDayTime(dataPoint.m_timestamp, dataPoint.m_latLon.lat, dataPoint.m_latLon.lon)));
}
private:
@@ -268,8 +325,7 @@ namespace track_analyzing
void CmdTagsTable(string const & filepath, string const & trackExtension, StringFilter mwmFilter,
StringFilter userFilter)
{
- cout << "mwm,user,track idx,is day,length in meters,time in seconds,speed in km/h,"
- "track summary(hw type surface type is_city_road is_one_way traffic distance time)" << endl;
+ cout << "mwm,hw type,surface type,is city road,is one way,is day,distance,time,speed km/h\n";
storage::Storage storage;
storage.RegisterAllLocalMaps(false /* enableDiffs */);
@@ -297,13 +353,7 @@ void CmdTagsTable(string const & filepath, string const & trackExtension, String
if (track.size() <= 1)
continue;
- auto const & dataPoint = track.front().GetDataPoint();
- uint64_t const start = dataPoint.m_timestamp;
- uint64_t const timeElapsed = track.back().GetDataPoint().m_timestamp - start;
- double const length = CalcTrackLength(track, geometry);
- double const speed = CalcSpeedKMpH(length, timeElapsed);
-
- MoveTypeAggregator aggregator;
+ MoveTypeAggregator aggregator(mwmName);
for (auto subTrackBegin = track.begin(); subTrackBegin != track.end();)
{
@@ -313,14 +363,13 @@ void CmdTagsTable(string const & filepath, string const & trackExtension, String
pointToMoveType.GetMoveType(*subTrackEnd) == moveType)
++subTrackEnd;
- aggregator.Add(moveType, subTrackBegin, subTrackEnd, geometry);
+ aggregator.Add(move(moveType), subTrackBegin, subTrackEnd, geometry);
subTrackBegin = subTrackEnd;
}
- cout << mwmName << "," << user << "," << trackIdx << ","
- << DayTimeToBool(GetDayTime(start, dataPoint.m_latLon.lat, dataPoint.m_latLon.lon))
- << "," << length << "," << timeElapsed << ","
- << speed << "," << aggregator.GetSummary() << endl;
+ auto const summary = aggregator.GetSummary();
+ if (!summary.empty())
+ cout << summary;
}
}
};
diff --git a/track_analyzing/track_analyzer/track_analyzer.cpp b/track_analyzing/track_analyzer/track_analyzer.cpp
index 392f4b4e38..f624da2f26 100644
--- a/track_analyzing/track_analyzer/track_analyzer.cpp
+++ b/track_analyzing/track_analyzer/track_analyzer.cpp
@@ -30,6 +30,8 @@ DEFINE_string_ext(cmd, "",
"match - based on raw logs gathers points to tracks and matches them to "
"features. To use the tool raw logs should be taken from \"trafin\" production "
"project in gz files and extracted.\n"
+ "match_dir - the same as match but applies to the directory with raw logs in gz format."
+ "Process files in several threads.\n"
"unmatched_tracks - based on raw logs gathers points to tracks\n"
"and save tracks to csv. Track points save as lat, log, timestamp in seconds\n"
"tracks - prints track statistics\n"
@@ -80,6 +82,8 @@ void CmdCppTrack(string const & trackFile, string const & mwmName, string const
size_t trackIdx);
// Match raw gps logs to tracks.
void CmdMatch(string const & logFile, string const & trackFile);
+// The same as match but applies for the directory with raw logs.
+void CmdMatchDir(string const & logDir, string const & trackExt);
// Parse |logFile| and save tracks (mwm name, aloha id, lats, lons, timestamps in seconds in csv).
void CmdUnmatchedTracks(string const & logFile, string const & trackFileCsv);
// Print aggregated tracks to csv table.
@@ -111,6 +115,11 @@ int main(int argc, char ** argv)
string const & logFile = Checked_in();
CmdMatch(logFile, FLAGS_out.empty() ? logFile + ".track" : FLAGS_out);
}
+ if (cmd == "match_dir")
+ {
+ string const & logDir = Checked_in();
+ CmdMatchDir(logDir, FLAGS_track_extension);
+ }
else if (cmd == "unmatched_tracks")
{
string const & logFile = Checked_in();
diff --git a/track_analyzing/utils.hpp b/track_analyzing/utils.hpp
index 89d1f5e030..91b650cd93 100644
--- a/track_analyzing/utils.hpp
+++ b/track_analyzing/utils.hpp
@@ -11,6 +11,7 @@
#include "platform/platform.hpp"
+#include <algorithm>
#include <cstdint>
#include <functional>
#include <memory>
@@ -41,7 +42,7 @@ void ForTracksSortedByMwmName(MwmToTracks const & mwmToTracks, routing::NumMwmId
mwmNames.reserve(mwmToTracks.size());
for (auto const & it : mwmToTracks)
mwmNames.push_back(numMwmIds.GetFile(it.first).GetName());
- sort(mwmNames.begin(), mwmNames.end());
+ std::sort(mwmNames.begin(), mwmNames.end());
for (auto const & mwmName : mwmNames)
{
@@ -54,6 +55,6 @@ void ForTracksSortedByMwmName(MwmToTracks const & mwmToTracks, routing::NumMwmId
void ForEachTrackFile(
std::string const & filepath, std::string const & extension,
- shared_ptr<routing::NumMwmIds> numMwmIds,
+ std::shared_ptr<routing::NumMwmIds> numMwmIds,
std::function<void(std::string const & filename, MwmToMatchedTracks const &)> && toDo);
} // namespace track_analyzing