diff options
author | Maxim Pimenov <m@maps.me> | 2018-08-10 13:30:22 +0300 |
---|---|---|
committer | Vladimir Byko-Ianko <bykoianko@gmail.com> | 2018-08-21 15:01:31 +0300 |
commit | fdebef29b0532388f2689d162971a43f379c0cf5 (patch) | |
tree | b31dc8ec3cee3d9df339ecbf90cff633356f9225 /base | |
parent | ecd47090ebfbbba80b1cca18b2621021ff75db50 (diff) |
[base] Replacing osm::Id with GeoObjectId.
Diffstat (limited to 'base')
-rw-r--r-- | base/CMakeLists.txt | 4 | ||||
-rw-r--r-- | base/base_tests/CMakeLists.txt | 2 | ||||
-rw-r--r-- | base/base_tests/geo_object_id_tests.cpp | 44 | ||||
-rw-r--r-- | base/base_tests/osm_id_test.cpp | 24 | ||||
-rw-r--r-- | base/geo_object_id.cpp | 95 | ||||
-rw-r--r-- | base/geo_object_id.hpp | 100 | ||||
-rw-r--r-- | base/osm_id.cpp | 81 | ||||
-rw-r--r-- | base/osm_id.hpp | 47 |
8 files changed, 242 insertions, 155 deletions
diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt index 00fec2ab05..3de26ac947 100644 --- a/base/CMakeLists.txt +++ b/base/CMakeLists.txt @@ -24,6 +24,8 @@ set( exception.cpp exception.hpp fifo_cache.hpp + geo_object_id.cpp + geo_object_id.hpp get_time.hpp gmtime.cpp gmtime.hpp @@ -45,8 +47,6 @@ set( newtype.hpp normalize_unicode.cpp observer_list.hpp - osm_id.cpp - osm_id.hpp pprof.cpp pprof.hpp random.cpp diff --git a/base/base_tests/CMakeLists.txt b/base/base_tests/CMakeLists.txt index 095e83de9a..56257580aa 100644 --- a/base/base_tests/CMakeLists.txt +++ b/base/base_tests/CMakeLists.txt @@ -15,6 +15,7 @@ set( containers_test.cpp control_flow_tests.cpp fifo_cache_test.cpp + geo_object_id_tests.cpp levenshtein_dfa_test.cpp logging_test.cpp math_test.cpp @@ -23,7 +24,6 @@ set( move_to_front_tests.cpp newtype_test.cpp observer_list_test.cpp - osm_id_test.cpp range_iterator_test.cpp ref_counted_tests.cpp regexp_test.cpp diff --git a/base/base_tests/geo_object_id_tests.cpp b/base/base_tests/geo_object_id_tests.cpp new file mode 100644 index 0000000000..9addea3995 --- /dev/null +++ b/base/base_tests/geo_object_id_tests.cpp @@ -0,0 +1,44 @@ +#include "testing/testing.hpp" + +#include "base/geo_object_id.hpp" + +namespace base +{ +UNIT_TEST(GeoObjectId) +{ + GeoObjectId const invalid(GeoObjectId::kInvalid); + TEST_EQUAL(invalid.GetType(), GeoObjectId::Type::Invalid, ()); + + GeoObjectId const node(GeoObjectId::Type::ObsoleteOsmNode, 12345); + TEST_EQUAL(node.GetSerialId(), 12345ULL, ()); + TEST_EQUAL(node.GetType(), GeoObjectId::Type::ObsoleteOsmNode, ()); + TEST_EQUAL(DebugPrint(node), "Osm Node 12345", ()); + + GeoObjectId const way(GeoObjectId::Type::ObsoleteOsmWay, 93245123456332ULL); + TEST_EQUAL(way.GetSerialId(), 93245123456332ULL, ()); + TEST_EQUAL(way.GetType(), GeoObjectId::Type::ObsoleteOsmWay, ()); + TEST_EQUAL(DebugPrint(way), "Osm Way 93245123456332", ()); + + GeoObjectId const relation(GeoObjectId::Type::ObsoleteOsmRelation, 5); + TEST_EQUAL(relation.GetSerialId(), 5ULL, ()); + TEST_EQUAL(relation.GetType(), GeoObjectId::Type::ObsoleteOsmRelation, ()); + TEST_EQUAL(DebugPrint(relation), "Osm Relation 5", ()); + + // 2^48 - 1, maximal possible serial id. + GeoObjectId const surrogate(GeoObjectId::Type::OsmSurrogate, 281474976710655ULL); + TEST_EQUAL(surrogate.GetSerialId(), 281474976710655ULL, ()); + TEST_EQUAL(surrogate.GetType(), GeoObjectId::Type::OsmSurrogate, ()); + TEST_EQUAL(DebugPrint(surrogate), "Osm Surrogate 281474976710655", ()); + + // 0 is not prohibited by the encoding even though OSM ids start from 1. + GeoObjectId const booking(GeoObjectId::Type::BookingComNode, 0); + TEST_EQUAL(booking.GetSerialId(), 0, ()); + TEST_EQUAL(booking.GetType(), GeoObjectId::Type::BookingComNode, ()); + TEST_EQUAL(DebugPrint(booking), "Booking.com 0", ()); + + GeoObjectId const fias(GeoObjectId::Type::Fias, 0xf1a5); + TEST_EQUAL(fias.GetSerialId(), 0xf1a5, ()); + TEST_EQUAL(fias.GetType(), GeoObjectId::Type::Fias, ()); + TEST_EQUAL(DebugPrint(fias), "FIAS 61861", ()); +} +} // namespace base diff --git a/base/base_tests/osm_id_test.cpp b/base/base_tests/osm_id_test.cpp deleted file mode 100644 index e5ecaea065..0000000000 --- a/base/base_tests/osm_id_test.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include "testing/testing.hpp" - -#include "base/osm_id.hpp" - -namespace osm -{ -UNIT_TEST(OsmId) -{ - Id const node = Id::Node(12345); - TEST_EQUAL(node.GetOsmId(), 12345ULL, ()); - TEST_EQUAL(node.GetType(), Id::Type::Node, ()); - TEST_EQUAL(DebugPrint(node), "node 12345", ()); - - Id const way = Id::Way(93245123456332ULL); - TEST_EQUAL(way.GetOsmId(), 93245123456332ULL, ()); - TEST_EQUAL(way.GetType(), Id::Type::Way, ()); - TEST_EQUAL(DebugPrint(way), "way 93245123456332", ()); - - Id const relation = Id::Relation(5); - TEST_EQUAL(relation.GetOsmId(), 5ULL, ()); - TEST_EQUAL(relation.GetType(), Id::Type::Relation, ()); - TEST_EQUAL(DebugPrint(relation), "relation 5", ()); -} -} // namespace osm diff --git a/base/geo_object_id.cpp b/base/geo_object_id.cpp new file mode 100644 index 0000000000..4749c4442d --- /dev/null +++ b/base/geo_object_id.cpp @@ -0,0 +1,95 @@ +#include "base/geo_object_id.hpp" + +#include "base/assert.hpp" + +#include <sstream> + +namespace +{ +// todo(@m) Uncomment when the transition from osm::id to base::GeoObjectId is complete +// and add assertions about the highest bit. +// The old scheme used the highest bit and the new one does not. +// uint64_t const kTypeMask = 0x7F00000000000000; +uint64_t const kTypeMask = 0xFF00000000000000; +uint64_t const kReservedMask = 0x00FF000000000000; +uint64_t const kSerialMask = 0x0000FFFFFFFFFFFF; +} // namespace + +namespace base +{ +GeoObjectId::GeoObjectId(uint64_t encodedId) : m_encodedId(encodedId) {} + +GeoObjectId::GeoObjectId(GeoObjectId::Type type, uint64_t id) +{ + m_encodedId = (static_cast<uint64_t>(type) << 56) | id; +} + +uint64_t GeoObjectId::GetSerialId() const +{ + ASSERT_NOT_EQUAL(m_encodedId & kTypeMask, 0, ()); + ASSERT_EQUAL(m_encodedId & kReservedMask, 0, ()); + return m_encodedId & kSerialMask; +} + +uint64_t GeoObjectId::GetEncodedId() const { return m_encodedId; } + +GeoObjectId::Type GeoObjectId::GetType() const +{ + ASSERT_EQUAL(m_encodedId & kReservedMask, 0, ()); + uint64_t const typeBits = (m_encodedId & kTypeMask) >> 56; + switch (typeBits) + { + case 0x00: return GeoObjectId::Type::Invalid; + case 0x01: return GeoObjectId::Type::OsmNode; + case 0x02: return GeoObjectId::Type::OsmWay; + case 0x03: return GeoObjectId::Type::OsmRelation; + case 0x04: return GeoObjectId::Type::BookingComNode; + case 0x05: return GeoObjectId::Type::OsmSurrogate; + case 0x06: return GeoObjectId::Type::Fias; + case 0x40: return GeoObjectId::Type::ObsoleteOsmNode; + case 0x80: return GeoObjectId::Type::ObsoleteOsmWay; + case 0xC0: return GeoObjectId::Type::ObsoleteOsmRelation; + } + CHECK_SWITCH(); +} + +GeoObjectId MakeOsmNode(uint64_t id) +{ + return GeoObjectId(GeoObjectId::Type::ObsoleteOsmNode, id); +} + +GeoObjectId MakeOsmWay(uint64_t id) +{ + return GeoObjectId(GeoObjectId::Type::ObsoleteOsmWay, id); +} + +GeoObjectId MakeOsmRelation(uint64_t id) +{ + return GeoObjectId(GeoObjectId::Type::ObsoleteOsmRelation, id); +} + +std::string DebugPrint(GeoObjectId::Type const & t) +{ + switch (t) + { + case GeoObjectId::Type::Invalid: return "Invalid"; + case GeoObjectId::Type::OsmNode: return "Osm Node"; + case GeoObjectId::Type::OsmWay: return "Osm Way"; + case GeoObjectId::Type::OsmRelation: return "Osm Relation"; + case GeoObjectId::Type::BookingComNode: return "Booking.com"; + case GeoObjectId::Type::OsmSurrogate: return "Osm Surrogate"; + case GeoObjectId::Type::Fias: return "FIAS"; + case GeoObjectId::Type::ObsoleteOsmNode: return "Osm Node"; + case GeoObjectId::Type::ObsoleteOsmWay: return "Osm Way"; + case GeoObjectId::Type::ObsoleteOsmRelation: return "Osm Relation"; + } + CHECK_SWITCH(); +} + +std::string DebugPrint(GeoObjectId const & id) +{ + std::ostringstream oss; + oss << DebugPrint(id.GetType()) << " " << id.GetSerialId(); + return oss.str(); +} +} // namespace base diff --git a/base/geo_object_id.hpp b/base/geo_object_id.hpp new file mode 100644 index 0000000000..bdc524e824 --- /dev/null +++ b/base/geo_object_id.hpp @@ -0,0 +1,100 @@ +#pragma once + +#include <cstdint> +#include <functional> +#include <string> + +namespace base +{ +// GeoObjectId is used to pack the source of a geographical object together with its +// id within this source (that is, its serial number) into a single 64-bit number. +// +// The bit encoding is as follows: +// 0sss ssss RRRR RRRR xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx +// R - reserved bits +// s - bits for object source +// x - bits for serial number (object id within its source) +// +// The highest byte encodes one of (2^7 - 1) = 127 possible object sources. +// Another byte is reserved and the last 6 bytes leave us with 2^48 possible values that can be +// used for ids within a source. +// Typically, the reserved byte will be zero but it may be used in future if the format changes. +// At the time of writing, OSM has approximately 2^32 different nodes with ids starting +// from one (https://wiki.openstreetmap.org/wiki/Stats) and this is by far the largest +// serial numbers that we use. +// The highest bit is zero so that the resulting number is positive if read as a signed 64-bit +// integer in two's complement notation. This is important for readability in some database systems +// that do not support unsigned integers. An older scheme we used to store OsmIds tried to keep as +// much bits for the serial number as possible and utilized the highest bit which resulted in a lot +// of "negative" numbers. +// +// When all bits are set to zero the GeoObjectId is defined to be invalid. +// +// Another way would be to use separate headers for source and for categories within the source, +// as in OSM->Way->Id instead of OSMWay->Id that we have now but we do not have this many sources +// and the difference does not seem important. Also this would probably touch the highest bit. +class GeoObjectId +{ +public: + // Sources of the objects. + enum class Type : uint8_t + { + Invalid = 0x00, + OsmNode = 0x01, + OsmWay = 0x02, + OsmRelation = 0x03, + BookingComNode = 0x04, + + // Artificial objects that substitute objects not presented in OSM for some reason. + // E.g., streets that only exist in addr:street fields of houses but not as separate OSM ways. + OsmSurrogate = 0x05, + + // Federal informational address system. http://www.ifias.ru/ + Fias = 0x06, + + ObsoleteOsmNode = 0x40, + ObsoleteOsmWay = 0x80, + ObsoleteOsmRelation = 0xC0, + }; + + static const uint64_t kInvalid = 0ULL; + + explicit GeoObjectId(uint64_t encodedId = kInvalid); + + GeoObjectId(Type type, uint64_t id); + + // Returns the id that the object has within its source. + uint64_t GetSerialId() const; + + // Returns the encoded value that contains both + // the source type and the serial number. + uint64_t GetEncodedId() const; + + // Returns the source type of the object. + Type GetType() const; + + bool operator<(GeoObjectId const & other) const { return m_encodedId < other.m_encodedId; } + bool operator==(GeoObjectId const & other) const { return m_encodedId == other.m_encodedId; } + bool operator!=(GeoObjectId const & other) const { return !(*this == other); } + bool operator==(uint64_t other) const { return GetSerialId() == other; } + +private: + uint64_t m_encodedId; +}; + +struct HashGeoObjectId : private std::hash<uint64_t> +{ + size_t operator()(GeoObjectId const & id) const + { + return std::hash<uint64_t>::operator()(id.GetSerialId()); + } +}; + +// Helper functions for readability. +GeoObjectId MakeOsmNode(uint64_t id); +GeoObjectId MakeOsmWay(uint64_t id); +GeoObjectId MakeOsmRelation(uint64_t id); + +std::string DebugPrint(GeoObjectId::Type const & t); +std::string DebugPrint(GeoObjectId const & id); +} // namespace base diff --git a/base/osm_id.cpp b/base/osm_id.cpp deleted file mode 100644 index 4fe42ec891..0000000000 --- a/base/osm_id.cpp +++ /dev/null @@ -1,81 +0,0 @@ -#include "base/osm_id.hpp" - -#include "base/assert.hpp" - -#include <sstream> - -namespace -{ -// Use 2 higher bits to encode type. -// Note that the masks are not disjoint. -uint64_t const kNode = 0x4000000000000000ULL; -uint64_t const kWay = 0x8000000000000000ULL; -uint64_t const kRelation = 0xC000000000000000ULL; -uint64_t const kTypeMask = 0xC000000000000000ULL; -} // namespace - -namespace osm -{ -Id::Id(uint64_t encodedId) : m_encodedId(encodedId) -{ -} - -Id Id::Node(uint64_t id) -{ - ASSERT_EQUAL(id & kTypeMask, 0, ()); - return Id(id | kNode); -} - -Id Id::Way(uint64_t id) -{ - ASSERT_EQUAL(id & kTypeMask, 0, ()); - return Id(id | kWay); -} - -Id Id::Relation(uint64_t id) -{ - ASSERT_EQUAL(id & kTypeMask, 0, ()); - return Id(id | kRelation); -} - -uint64_t Id::GetOsmId() const -{ - ASSERT_NOT_EQUAL(m_encodedId & kTypeMask, 0, ()); - return m_encodedId & ~kTypeMask; -} - -uint64_t Id::GetEncodedId() const -{ - return m_encodedId; -} - -Id::Type Id::GetType() const -{ - uint64_t const mask = m_encodedId & kTypeMask; - switch (mask) - { - case kNode: return Id::Type::Node; - case kWay: return Id::Type::Way; - case kRelation: return Id::Type::Relation; - } - CHECK_SWITCH(); -} - -std::string DebugPrint(Id::Type const & t) -{ - switch (t) - { - case Id::Type::Node: return "node"; - case Id::Type::Way: return "way"; - case Id::Type::Relation: return "relation"; - } - CHECK_SWITCH(); -} - -std::string DebugPrint(Id const & id) -{ - std::ostringstream oss; - oss << DebugPrint(id.GetType()) << " " << id.GetOsmId(); - return oss.str(); -} -} // namespace osm diff --git a/base/osm_id.hpp b/base/osm_id.hpp deleted file mode 100644 index 515a28dd01..0000000000 --- a/base/osm_id.hpp +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include <cstdint> -#include <functional> -#include <string> - -namespace osm -{ -class Id -{ -public: - enum class Type - { - Node, - Way, - Relation - }; - - static const uint64_t kInvalid = 0ULL; - - explicit Id(uint64_t encodedId = kInvalid); - - static Id Node(uint64_t osmId); - static Id Way(uint64_t osmId); - static Id Relation(uint64_t osmId); - - uint64_t GetOsmId() const; - uint64_t GetEncodedId() const; - Type GetType() const; - - bool operator<(Id const & other) const { return m_encodedId < other.m_encodedId; } - bool operator==(Id const & other) const { return m_encodedId == other.m_encodedId; } - bool operator!=(Id const & other) const { return !(*this == other); } - bool operator==(uint64_t other) const { return GetOsmId() == other; } - -private: - uint64_t m_encodedId; -}; - -struct HashId : private std::hash<uint64_t> -{ - size_t operator()(Id const & id) const { return std::hash<uint64_t>::operator()(id.GetOsmId()); } -}; - -std::string DebugPrint(Id::Type const & t); -std::string DebugPrint(Id const & id); -} // namespace osm |