Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--base/string_utils.cpp44
-rw-r--r--base/string_utils.hpp89
-rw-r--r--generator/booking_dataset.cpp203
-rw-r--r--generator/booking_dataset.hpp6
4 files changed, 163 insertions, 179 deletions
diff --git a/base/string_utils.cpp b/base/string_utils.cpp
index 22dfad3030..de986d77a3 100644
--- a/base/string_utils.cpp
+++ b/base/string_utils.cpp
@@ -11,7 +11,6 @@
namespace strings
{
-
bool UniString::IsEqualAscii(char const * s) const
{
return (size() == strlen(s) && equal(begin(), end(), s));
@@ -71,7 +70,6 @@ bool to_uint(char const * start, unsigned int & i, int base /*= 10*/)
return IntegerCheck(start, stop, v, i);
}
-
bool to_uint64(char const * s, uint64_t & i)
{
char * stop;
@@ -183,33 +181,21 @@ void NormalizeDigits(UniString & us)
namespace
{
- char ascii_to_lower(char in)
- {
- char const diff = 'z' - 'Z';
- static_assert(diff == 'a' - 'A', "");
- static_assert(diff > 0, "");
-
- if (in >= 'A' && in <= 'Z')
- return (in + diff);
- return in;
- }
-}
-
-void AsciiToLower(string & s)
+char ascii_to_lower(char in)
{
- transform(s.begin(), s.end(), s.begin(), &ascii_to_lower);
-}
+ char const diff = 'z' - 'Z';
+ static_assert(diff == 'a' - 'A', "");
+ static_assert(diff > 0, "");
-void Trim(string & s)
-{
- boost::trim(s);
+ if (in >= 'A' && in <= 'Z')
+ return (in + diff);
+ return in;
}
-
-void Trim(string & s, char const * anyOf)
-{
- boost::trim_if(s, boost::is_any_of(anyOf));
}
+void AsciiToLower(string & s) { transform(s.begin(), s.end(), s.begin(), &ascii_to_lower); }
+void Trim(string & s) { boost::trim(s); }
+void Trim(string & s, char const * anyOf) { boost::trim_if(s, boost::is_any_of(anyOf)); }
bool EqualNoCase(string const & s1, string const & s2)
{
return MakeLowerCase(s1) == MakeLowerCase(s2);
@@ -238,9 +224,7 @@ bool IsASCIIString(string const & str)
}
bool IsASCIIDigit(UniChar c) { return c >= '0' && c <= '9'; }
-
bool IsASCIILatin(UniChar c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }
-
bool StartsWith(UniString const & s, UniString const & p)
{
if (p.size() > s.size())
@@ -253,11 +237,7 @@ bool StartsWith(UniString const & s, UniString const & p)
return true;
}
-bool StartsWith(string const & s1, char const * s2)
-{
- return (s1.compare(0, strlen(s2), s2) == 0);
-}
-
+bool StartsWith(string const & s1, char const * s2) { return (s1.compare(0, strlen(s2), s2) == 0); }
bool EndsWith(string const & s1, char const * s2)
{
size_t const n = s1.size();
@@ -344,4 +324,4 @@ bool AlmostEqual(string const & str1, string const & str2, size_t mismatchedCoun
return false;
}
-} // namespace strings
+} // namespace strings
diff --git a/base/string_utils.hpp b/base/string_utils.hpp
index ca5449ae2b..85396b7f19 100644
--- a/base/string_utils.hpp
+++ b/base/string_utils.hpp
@@ -15,18 +15,22 @@
/// All methods work with strings in utf-8 format
namespace strings
{
-
typedef uint32_t UniChar;
-//typedef buffer_vector<UniChar, 32> UniString;
+// typedef buffer_vector<UniChar, 32> UniString;
/// Make new type, not typedef. Need to specialize DebugPrint.
class UniString : public buffer_vector<UniChar, 32>
{
typedef buffer_vector<UniChar, 32> BaseT;
+
public:
UniString() {}
explicit UniString(size_t n, UniChar c = UniChar()) : BaseT(n, c) {}
- template <class IterT> UniString(IterT b, IterT e) : BaseT(b, e) {}
+ template <class IterT>
+ UniString(IterT b, IterT e)
+ : BaseT(b, e)
+ {
+ }
bool IsEqualAscii(char const * s) const;
@@ -83,10 +87,7 @@ bool IsASCIIString(string const & str);
bool IsASCIIDigit(UniChar c);
bool IsASCIILatin(UniChar c);
-inline string DebugPrint(UniString const & s)
-{
- return ToUtf8(s);
-}
+inline string DebugPrint(UniString const & s) { return ToUtf8(s); }
template <typename DelimFuncT, typename UniCharIterT = UniString::const_iterator>
class TokenizeIterator
@@ -117,14 +118,14 @@ class TokenizeIterator
public:
/// @warning string S must be not temporary!
TokenizeIterator(string const & s, DelimFuncT const & delimFunc)
- : m_beg(s.begin()), m_end(s.begin()), m_finish(s.end()), m_delimFunc(delimFunc)
+ : m_beg(s.begin()), m_end(s.begin()), m_finish(s.end()), m_delimFunc(delimFunc)
{
move();
}
/// @warning unistring S must be not temporary!
TokenizeIterator(UniString const & s, DelimFuncT const & delimFunc)
- : m_beg(s.begin()), m_end(s.begin()), m_finish(s.end()), m_delimFunc(delimFunc)
+ : m_beg(s.begin()), m_end(s.begin()), m_finish(s.end()), m_delimFunc(delimFunc)
{
move();
}
@@ -137,12 +138,12 @@ public:
string operator*() const
{
- ASSERT( m_beg != m_finish, ("dereferencing of empty iterator") );
+ ASSERT(m_beg != m_finish, ("dereferencing of empty iterator"));
return string(m_beg.base(), m_end.base());
}
operator bool() const { return m_beg != m_finish; }
-
+
TokenizeIterator & operator++()
{
move();
@@ -159,11 +160,7 @@ public:
return !copy;
}
- UniString GetUniString() const
- {
- return UniString(m_beg, m_end);
- }
-
+ UniString GetUniString() const { return UniString(m_beg, m_end); }
/// Same as operator bool() in expression it == end(...)
bool operator==(TokenizeIterator const &) { return !(*this); }
/// Same as operator bool() in expression it != end(...)
@@ -173,6 +170,7 @@ public:
class SimpleDelimiter
{
UniString m_delims;
+
public:
SimpleDelimiter(char const * delimChars);
// Used in TokenizeIterator to allow past the end iterator construction.
@@ -181,8 +179,8 @@ public:
bool operator()(UniChar c) const;
};
-typedef TokenizeIterator<SimpleDelimiter,
- ::utf8::unchecked::iterator<string::const_iterator> > SimpleTokenizer;
+typedef TokenizeIterator<SimpleDelimiter, ::utf8::unchecked::iterator<string::const_iterator>>
+ SimpleTokenizer;
template <typename TFunctor>
void Tokenize(string const & str, char const * delims, TFunctor && f)
@@ -198,7 +196,8 @@ void Tokenize(string const & str, char const * delims, TFunctor && f)
/// @return code of last symbol in string or 0 if s is empty
UniChar LastUniChar(string const & s);
-template <class T, size_t N, class TT> bool IsInArray(T (&arr) [N], TT const & t)
+template <class T, size_t N, class TT>
+bool IsInArray(T(&arr)[N], TT const & t)
{
for (size_t i = 0; i < N; ++i)
if (arr[i] == t)
@@ -214,10 +213,17 @@ bool to_uint64(char const * s, uint64_t & i);
bool to_int64(char const * s, int64_t & i);
bool to_double(char const * s, double & d);
-inline bool is_number(string const & s) { int64_t dummy; return to_int64(s.c_str(), dummy); }
+inline bool is_number(string const & s)
+{
+ int64_t dummy;
+ return to_int64(s.c_str(), dummy);
+}
inline bool to_int(string const & s, int & i, int base = 10) { return to_int(s.c_str(), i, base); }
-inline bool to_uint(string const & s, unsigned int & i, int base = 10) { return to_uint(s.c_str(), i, base); }
+inline bool to_uint(string const & s, unsigned int & i, int base = 10)
+{
+ return to_uint(s.c_str(), i, base);
+}
inline bool to_uint64(string const & s, uint64_t & i) { return to_uint64(s.c_str(), i); }
inline bool to_int64(string const & s, int64_t & i) { return to_int64(s.c_str(), i); }
inline bool to_double(string const & s, double & d) { return to_double(s.c_str(), d); }
@@ -225,17 +231,10 @@ inline bool to_double(string const & s, double & d) { return to_double(s.c_str()
/// @name From numeric to string.
//@{
-inline string to_string(string const & s)
-{
- return s;
-}
-
-inline string to_string(char const * s)
-{
- return s;
-}
-
-template <typename T> string to_string(T t)
+inline string to_string(string const & s) { return s; }
+inline string to_string(char const * s) { return s; }
+template <typename T>
+string to_string(T t)
{
ostringstream ss;
ss << t;
@@ -261,7 +260,8 @@ int UpperBoundOnChars()
return numeric_limits<T>::digits10 + is_signed<T>::value + 1;
}
-template <typename T> char * to_string_digits(char * buf, T i)
+template <typename T>
+char * to_string_digits(char * buf, T i)
{
do
{
@@ -272,7 +272,8 @@ template <typename T> char * to_string_digits(char * buf, T i)
return buf;
}
-template <typename T> string to_string_signed(T i)
+template <typename T>
+string to_string_signed(T i)
{
bool const negative = i < 0;
int const sz = UpperBoundOnChars<T>();
@@ -287,7 +288,8 @@ template <typename T> string to_string_signed(T i)
return string(beg, end - beg);
}
-template <typename T> string to_string_unsigned(T i)
+template <typename T>
+string to_string_unsigned(T i)
{
int const sz = UpperBoundOnChars<T>();
char buf[sz];
@@ -295,19 +297,10 @@ template <typename T> string to_string_unsigned(T i)
char * beg = to_string_digits(end, i);
return string(beg, end - beg);
}
-
-}
-
-inline string to_string(int64_t i)
-{
- return impl::to_string_signed(i);
-}
-
-inline string to_string(uint64_t i)
-{
- return impl::to_string_unsigned(i);
}
+inline string to_string(int64_t i) { return impl::to_string_signed(i); }
+inline string to_string(uint64_t i) { return impl::to_string_unsigned(i); }
/// Use this function to get string with fixed count of
/// "Digits after comma".
string to_string_dac(double d, int dac);
@@ -399,7 +392,7 @@ size_t EditDistance(TIter const & b1, TIter const & e1, TIter const & b2, TIter
namespace std
{
-template <typename ... Args>
+template <typename... Args>
struct iterator_traits<strings::TokenizeIterator<Args...>>
{
using difference_type = std::ptrdiff_t;
@@ -408,4 +401,4 @@ struct iterator_traits<strings::TokenizeIterator<Args...>>
using reference = string;
using iterator_category = std::input_iterator_tag;
};
-} // namespace std
+} // namespace std
diff --git a/generator/booking_dataset.cpp b/generator/booking_dataset.cpp
index f62f52d4aa..d2ac8ca57e 100644
--- a/generator/booking_dataset.cpp
+++ b/generator/booking_dataset.cpp
@@ -14,7 +14,6 @@
namespace generator
{
-
namespace
{
bool CheckForValues(string const & value)
@@ -28,7 +27,7 @@ bool CheckForValues(string const & value)
return false;
}
} // namespace
-
+
BookingDataset::Hotel::Hotel(string const & src)
{
vector<string> rec(FieldsCount());
@@ -39,7 +38,7 @@ BookingDataset::Hotel::Hotel(string const & src)
strings::to_uint(rec[Index(Fields::Id)], id);
strings::to_double(rec[Index(Fields::Latitude)], lat);
strings::to_double(rec[Index(Fields::Longtitude)], lon);
-
+
name = rec[Index(Fields::Name)];
address = rec[Index(Fields::Address)];
@@ -55,13 +54,14 @@ BookingDataset::Hotel::Hotel(string const & src)
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h)
{
- return s << "Name: " << h.name << "\t Address: " << h.address << "\t lat: " << h.lat << " lon: " << h.lon;
+ return s << "Name: " << h.name << "\t Address: " << h.address << "\t lat: " << h.lat
+ << " lon: " << h.lon;
}
BookingDataset::BookingDataset(string const & dataPath)
{
LoadHotels(dataPath);
-
+
size_t counter = 0;
for (auto const & hotel : m_hotels)
{
@@ -73,12 +73,18 @@ BookingDataset::BookingDataset(string const & dataPath)
bool BookingDataset::BookingFilter(OsmElement const & e) const
{
- return Filter(e, [&](OsmElement const & e){ return MatchWithBooking(e); });
+ return Filter(e, [&](OsmElement const & e)
+ {
+ return MatchWithBooking(e);
+ });
}
bool BookingDataset::TourismFilter(OsmElement const & e) const
{
- return Filter(e, [&](OsmElement const & e){ return true; });
+ return Filter(e, [&](OsmElement const & e)
+ {
+ return true;
+ });
}
BookingDataset::Hotel const & BookingDataset::GetHotel(size_t index) const
@@ -91,7 +97,7 @@ vector<size_t> BookingDataset::GetNearestHotels(double lat, double lon, size_t l
double maxDistance /* = 0.0 */) const
{
namespace bgi = boost::geometry::index;
-
+
vector<size_t> indexes;
for_each(bgi::qbegin(m_rtree, bgi::nearest(TPoint(lat, lon), limit)), bgi::qend(m_rtree),
[&](TValue const & v)
@@ -100,47 +106,49 @@ vector<size_t> BookingDataset::GetNearestHotels(double lat, double lon, size_t l
double const dist = ms::DistanceOnEarth(lat, lon, hotel.lat, hotel.lon);
if (maxDistance != 0.0 && dist > maxDistance /* max distance in meters */)
return;
-
+
indexes.emplace_back(v.second);
});
return indexes;
}
-bool BookingDataset::MatchByName(string const & osmName, vector<size_t> const & bookingIndexes) const
+bool BookingDataset::MatchByName(string const & osmName,
+ vector<size_t> const & bookingIndexes) const
{
return false;
-
+
// Match name.
-// vector<strings::UniString> osmTokens;
-// NormalizeAndTokenizeString(name, osmTokens, search::Delimiters());
-//
-// cout << "\n------------- " << name << endl;
-//
-// bool matched = false;
-// for (auto const & index : indexes)
-// {
-// vector<strings::UniString> bookingTokens;
-// NormalizeAndTokenizeString(m_hotels[index].name, bookingTokens, search::Delimiters());
-//
-// map<size_t, vector<pair<size_t, size_t>>> weightPair;
-//
-// for (size_t j = 0; j < osmTokens.size(); ++j)
-// {
-// for (size_t i = 0; i < bookingTokens.size(); ++i)
-// {
-// size_t distance = strings::EditDistance(osmTokens[j].begin(), osmTokens[j].end(),
-// bookingTokens[i].begin(), bookingTokens[i].end());
-// if (distance < 3)
-// weightPair[distance].emplace_back(i, j);
-// }
-// }
-//
-// if (!weightPair.empty())
-// {
-// cout << m_hotels[e.second] << endl;
-// matched = true;
-// }
-// }
+ // vector<strings::UniString> osmTokens;
+ // NormalizeAndTokenizeString(name, osmTokens, search::Delimiters());
+ //
+ // cout << "\n------------- " << name << endl;
+ //
+ // bool matched = false;
+ // for (auto const & index : indexes)
+ // {
+ // vector<strings::UniString> bookingTokens;
+ // NormalizeAndTokenizeString(m_hotels[index].name, bookingTokens, search::Delimiters());
+ //
+ // map<size_t, vector<pair<size_t, size_t>>> weightPair;
+ //
+ // for (size_t j = 0; j < osmTokens.size(); ++j)
+ // {
+ // for (size_t i = 0; i < bookingTokens.size(); ++i)
+ // {
+ // size_t distance = strings::EditDistance(osmTokens[j].begin(), osmTokens[j].end(),
+ // bookingTokens[i].begin(),
+ // bookingTokens[i].end());
+ // if (distance < 3)
+ // weightPair[distance].emplace_back(i, j);
+ // }
+ // }
+ //
+ // if (!weightPair.empty())
+ // {
+ // cout << m_hotels[e.second] << endl;
+ // matched = true;
+ // }
+ // }
}
void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) const
@@ -150,10 +158,10 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons
OsmElement e;
e.type = OsmElement::EntityType::Node;
e.id = 1;
-
+
e.lat = hotel.lat;
e.lon = hotel.lon;
-
+
e.AddTag("name", hotel.name);
e.AddTag("ref:sponsored", strings::to_string(hotel.id));
e.AddTag("website", hotel.descUrl);
@@ -161,56 +169,56 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons
e.AddTag("stars", strings::to_string(hotel.stars));
e.AddTag("price_rate", strings::to_string(hotel.priceCategory));
e.AddTag("addr:full", hotel.address);
-
+
switch (hotel.type)
{
- case 19:
- case 205: e.AddTag("tourism", "motel"); break;
-
- case 21:
- case 206:
- case 212: e.AddTag("tourism", "resort"); break;
-
- case 3:
- case 23:
- case 24:
- case 25:
- case 202:
- case 207:
- case 208:
- case 209:
- case 210:
- case 216:
- case 220:
- case 223: e.AddTag("tourism", "guest_house"); break;
-
- case 14:
- case 204:
- case 213:
- case 218:
- case 219:
- case 226:
- case 222: e.AddTag("tourism", "hotel"); break;
-
- case 211:
- case 224:
- case 228: e.AddTag("tourism", "chalet"); break;
-
- case 13:
- case 225:
- case 203: e.AddTag("tourism", "hostel"); break;
-
- case 215:
- case 221:
- case 227:
- case 2:
- case 201: e.AddTag("tourism", "apartment"); break;
-
- case 214: e.AddTag("tourism", "camp_site"); break;
-
- default: e.AddTag("tourism", "hotel"); break;
+ case 19:
+ case 205: e.AddTag("tourism", "motel"); break;
+
+ case 21:
+ case 206:
+ case 212: e.AddTag("tourism", "resort"); break;
+
+ case 3:
+ case 23:
+ case 24:
+ case 25:
+ case 202:
+ case 207:
+ case 208:
+ case 209:
+ case 210:
+ case 216:
+ case 220:
+ case 223: e.AddTag("tourism", "guest_house"); break;
+
+ case 14:
+ case 204:
+ case 213:
+ case 218:
+ case 219:
+ case 226:
+ case 222: e.AddTag("tourism", "hotel"); break;
+
+ case 211:
+ case 224:
+ case 228: e.AddTag("tourism", "chalet"); break;
+
+ case 13:
+ case 225:
+ case 203: e.AddTag("tourism", "hostel"); break;
+
+ case 215:
+ case 221:
+ case 227:
+ case 2:
+ case 201: e.AddTag("tourism", "apartment"); break;
+
+ case 214: e.AddTag("tourism", "camp_site"); break;
+
+ default: e.AddTag("tourism", "hotel"); break;
}
-
+
fn(&e);
}
}
@@ -218,17 +226,17 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons
void BookingDataset::LoadHotels(string const & path)
{
m_hotels.clear();
-
+
if (path.empty())
return;
-
+
ifstream src(path);
if (!src.is_open())
{
LOG(LERROR, ("Error while opening", path, ":", strerror(errno)));
return;
}
-
+
for (string line; getline(src, line);)
m_hotels.emplace_back(line);
}
@@ -244,20 +252,21 @@ bool BookingDataset::MatchWithBooking(OsmElement const & e) const
break;
}
}
-
+
if (name.empty())
return false;
-
+
// Find 3 nearest values to a point.
auto const indexes = GetNearestHotels(e.lat, e.lon, 3, 150 /* max distance in meters */);
if (indexes.empty())
return false;
-
+
bool matched = MatchByName(name, indexes);
return matched;
}
-bool BookingDataset::Filter(OsmElement const & e, function<bool(OsmElement const &)> const & fn) const
+bool BookingDataset::Filter(OsmElement const & e,
+ function<bool(OsmElement const &)> const & fn) const
{
if (e.type != OsmElement::EntityType::Node)
return false;
diff --git a/generator/booking_dataset.hpp b/generator/booking_dataset.hpp
index d0810e0590..6ce74c7118 100644
--- a/generator/booking_dataset.hpp
+++ b/generator/booking_dataset.hpp
@@ -57,10 +57,12 @@ public:
bool TourismFilter(OsmElement const & e) const;
Hotel const & GetHotel(size_t index) const;
- vector<size_t> GetNearestHotels(double lat, double lon, size_t limit, double maxDistance = 0.0) const;
+ vector<size_t> GetNearestHotels(double lat, double lon, size_t limit,
+ double maxDistance = 0.0) const;
bool MatchByName(string const & osmName, vector<size_t> const & bookingIndexes) const;
void BuildFeatures(function<void(OsmElement *)> const & fn) const;
+
protected:
vector<Hotel> m_hotels;
@@ -77,5 +79,5 @@ protected:
};
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h);
-
+
} // namespace generator