Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorIlya Zverev <zverik@textual.ru>2016-01-26 17:18:09 +0300
committerIlya Zverev <zverik@textual.ru>2016-06-04 14:22:32 +0300
commite6152683c48681bc02048f17e6c225056d5626d6 (patch)
tree278479b3014164bb15cdffe2d26b76c923770328 /tools
parent2aa8f994bc66418c0b994807cf70dbbd50f4ff19 (diff)
Started work on mwm.py
Diffstat (limited to 'tools')
-rwxr-xr-xtools/python/mwm/dump_mwm.py24
-rw-r--r--tools/python/mwm/mwm.py382
2 files changed, 406 insertions, 0 deletions
diff --git a/tools/python/mwm/dump_mwm.py b/tools/python/mwm/dump_mwm.py
new file mode 100755
index 0000000000..bc86ba53cd
--- /dev/null
+++ b/tools/python/mwm/dump_mwm.py
@@ -0,0 +1,24 @@
+#!/usr/bin/python
+import sys, os.path
+import itertools
+from mwm import MWM
+
+if len(sys.argv) < 2:
+ print 'Dumps some MWM structures.'
+ print 'Usage: {0} <country.mwm>'.format(sys.argv[0])
+ sys.exit(1)
+
+mwm = MWM(open(sys.argv[1], 'rb'))
+mwm.read_types(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', '..', 'data', 'types.txt'))
+print 'Tags:'
+for tag, value in mwm.tags.iteritems():
+ print ' {0:<8}: offs {1:9} len {2:8}'.format(tag, value[0], value[1])
+print 'Version:', mwm.read_version()
+print 'Header:', mwm.read_header()
+print 'Metadata count:', len(mwm.read_metadata())
+cross = mwm.read_crossmwm()
+if cross:
+ print 'Outgoing points:', len(cross['out']), 'incoming:', len(cross['in'])
+ print 'Outgoing regions:', set(cross['neighbours'])
+for feature in itertools.islice(mwm.iter_features(), 10):
+ print feature
diff --git a/tools/python/mwm/mwm.py b/tools/python/mwm/mwm.py
new file mode 100644
index 0000000000..1ad6851331
--- /dev/null
+++ b/tools/python/mwm/mwm.py
@@ -0,0 +1,382 @@
+# MWM Reader Module
+import struct
+import math
+
+# Unprocessed sections: geomN, trgN, idx, sdx (search index), addr (search address), offs (feature offsets), dat (!)
+# Routing sections: mercedes (matrix), daewoo (edge data), infinity (edge id), skoda (shortcuts), chrysler (cross context), ftseg, node2ftseg
+# (these mostly are succinct structures, except chrysler and node2ftseg, so no use trying to load them here)
+
+class MWM:
+ languages = ["default",
+ "en", "ja", "fr", "ko_rm", "ar", "de", "int_name", "ru", "sv", "zh", "fi", "be", "ka", "ko",
+ "he", "nl", "ga", "ja_rm", "el", "it", "es", "zh_pinyin", "th", "cy", "sr", "uk", "ca", "hu",
+ "hsb", "eu", "fa", "br", "pl", "hy", "kn", "sl", "ro", "sq", "am", "fy", "cs", "gd", "sk",
+ "af", "ja_kana", "lb", "pt", "hr", "fur", "vi", "tr", "bg", "eo", "lt", "la", "kk", "gsw",
+ "et", "ku", "mn", "mk", "lv", "hi"]
+
+ metadata = ["0",
+ "cuisine", "open_hours", "phone_number", "fax_number", "stars",
+ "operator", "url", "website", "internet", "ele",
+ "turn_lanes", "turn_lanes_forward", "turn_lanes_backward", "email", "postcode",
+ "wikipedia", "maxspeed", "flats", "height", "min_height",
+ "denomination", "building_levels"
+ ]
+
+ def __init__(self, f):
+ self.f = f
+ self.coord_size = None
+ self.base_point = (0, 0)
+ self.read_info()
+ self.type_mapping = []
+
+ def read_types(self, filename):
+ with open(filename, 'r') as ft:
+ for line in ft:
+ if len(line.strip()) > 0:
+ self.type_mapping.append(line.strip())
+
+ def read_info(self):
+ self.f.seek(0)
+ self.f.seek(self.read_uint(8))
+ cnt = self.read_varuint()
+ self.tags = {}
+ for i in range(cnt):
+ name = self.read_string(True)
+ offset = self.read_varuint()
+ length = self.read_varuint()
+ self.tags[name] = (offset, length)
+
+ def has_tag(self, tag):
+ return tag in self.tags and self.tags[tag][1] > 0
+
+ def seek_tag(self, tag):
+ self.f.seek(self.tags[tag][0])
+
+ def inside_tag(self, tag):
+ pos = self.tag_position(tag)
+ return pos >= 0 and pos < self.tags[tag][1]
+
+ def tag_position(self, tag):
+ return self.f.tell() - self.tags[tag][0]
+
+ def read_version(self):
+ """Reads 'version' section."""
+ self.seek_tag('version')
+ self.f.read(4) # skip prolog
+ fmt = self.read_varuint() + 1
+ version = self.read_varuint()
+ return { 'fmt': fmt, 'version': version }
+
+ def read_header(self):
+ """Reads 'header' section."""
+ if not self.has_tag('header'):
+ # Stub for routing files
+ self.coord_size = (1 << 30) - 1
+ return {}
+ self.seek_tag('header')
+ result = {}
+ coord_bits = self.read_varuint()
+ self.coord_size = (1 << coord_bits) - 1
+ self.base_point = self.read_coord(convert=False)
+ result['basePoint'] = self.to_4326(self.base_point)
+ result['bounds'] = self.read_bounds()
+ result['scales'] = self.read_uint_array()
+ langs = self.read_uint_array()
+ for i in range(len(langs)):
+ if i < len(self.languages):
+ langs[i] = self.languages[langs[i]]
+ result['langs'] = langs
+ map_type = self.read_varint()
+ if map_type == 0:
+ result['mapType'] = 'world'
+ elif map_type == 1:
+ result['mapType'] = 'worldcoasts'
+ elif map_type == 2:
+ result['mapType'] = 'country'
+ else:
+ result['mapType'] = 'unknown: {0}'.format(map_type)
+ return result
+
+ # COMPLEX READERS
+
+ def read_metadata(self):
+ """Reads 'meta' and 'metaidx' sections."""
+ if not self.has_tag('metaidx'):
+ return {}
+ # Metadata format is different since v8
+ fmt = self.read_version()['fmt']
+ # First, read metaidx, to match featureId <-> metadata
+ self.seek_tag('metaidx')
+ ftid_meta = []
+ while self.inside_tag('metaidx'):
+ ftid = self.read_uint(4)
+ moffs = self.read_uint(4)
+ ftid_meta.append((moffs, ftid))
+ # Sort ftid_meta array
+ ftid_meta.sort(key=lambda x: x[0])
+ ftpos = 0
+ # Now read metadata
+ self.seek_tag('meta')
+ metadatar = {}
+ while self.inside_tag('meta'):
+ tag_pos = self.tag_position('meta')
+ fields = {}
+ if fmt >= 8:
+ sz = self.read_varuint()
+ if sz:
+ for i in range(sz):
+ t = self.read_varuint()
+ t = self.metadata[t] if t < len(self.metadata) else str(t)
+ fields[t] = self.read_string()
+ else:
+ while True:
+ t = self.read_uint(1)
+ is_last = t & 0x80 > 0
+ t = t & 0x7f
+ t = self.metadata[t] if t < len(self.metadata) else str(t)
+ l = self.read_uint(1)
+ fields[t] = self.f.read(l)
+ if is_last:
+ break
+
+ if len(fields):
+ while ftpos < len(ftid_meta) and ftid_meta[ftpos][0] < tag_pos:
+ ftpos += 1
+ if ftpos < len(ftid_meta):
+ if ftid_meta[ftpos][0] == tag_pos:
+ metadatar[ftid_meta[ftpos][1]] = fields
+ return metadatar
+
+ def read_crossmwm(self):
+ """Reads 'chrysler' section (cross-mwm routing table)."""
+ if not self.has_tag('chrysler'):
+ return {}
+ self.seek_tag('chrysler')
+ # Ingoing nodes: array of (nodeId, coord) tuples
+ incomingCount = self.read_uint(4)
+ incoming = []
+ for i in range(incomingCount):
+ nodeId = self.read_uint(4)
+ point = self.read_coord(False)
+ incoming.append((nodeId, point))
+ # Outgoing nodes: array of (nodeId, coord, outIndex) tuples
+ # outIndex is an index in neighbours array
+ outgoingCount = self.read_uint(4)
+ outgoing = []
+ for i in range(outgoingCount):
+ nodeId = self.read_uint(4)
+ point = self.read_coord(False)
+ outIndex = self.read_uint(1)
+ outgoing.append((nodeId, point, outIndex))
+ # Adjacency matrix: costs of routes for each (incoming, outgoing) tuple
+ matrix = []
+ for i in range(incomingCount):
+ sub = []
+ for j in range(outgoingCount):
+ sub.append(self.read_uint(4))
+ matrix.append(sub)
+ # List of mwms to which leads each outgoing node
+ neighboursCount = self.read_uint(4)
+ neighbours = []
+ for i in range(neighboursCount):
+ size = self.read_uint(4)
+ neighbours.append(self.f.read(size))
+ return { 'in': incoming, 'out': outgoing, 'matrix': matrix, 'neighbours': neighbours }
+
+ class GeomType:
+ POINT = 0
+ LINE = 1 << 5
+ AREA = 1 << 6
+ POINT_EX = 3 << 5
+
+ class OsmIdCode:
+ NODE = 0x4000000000000000;
+ WAY = 0x8000000000000000;
+ RELATION = 0xC000000000000000;
+ RESET = ~(NODE | WAY | RELATION);
+
+ def iter_features(self):
+ """Reads 'dat' section."""
+ if not self.has_tag('dat'):
+ return
+ # TODO: read 'offs'?
+ self.seek_tag('dat')
+ while self.inside_tag('dat'):
+ feature = {}
+ feature_size = self.read_varuint()
+ next_feature = self.f.tell() + feature_size
+ feature['size'] = feature_size
+
+ # Header
+ header = {}
+ header_bits = self.read_uint(1)
+ types_count = (header_bits & 0x07) + 1
+ has_name = header_bits & 0x08 > 0
+ has_layer = header_bits & 0x10 > 0
+ has_addinfo = header_bits & 0x80 > 0
+ geom_type = header_bits & 0x60
+ types = []
+ for i in range(types_count):
+ type_id = self.read_varuint()
+ if type_id < len(self.type_mapping):
+ types.append(self.type_mapping[type_id])
+ else:
+ types.append(str(type_id))
+ header['types'] = types
+ if has_name:
+ header['name'] = self.read_multilang()
+ if has_layer:
+ header['layer'] = self.read_uint(1)
+ if has_addinfo:
+ if geom_type == MWM.GeomType.POINT:
+ header['rank'] = self.read_uint(1)
+ elif geom_type == MWM.GeomType.LINE:
+ header['ref'] = self.read_string()
+ elif geom_type == MWM.GeomType.AREA or geom_type == MWM.GeomType.POINT_EX:
+ header['house'] = self.read_numeric_string()
+ feature['header'] = header
+
+ # Geometry
+ geometry = {}
+ if geom_type == MWM.GeomType.POINT or geom_type == MWM.GeomType.POINT_EX:
+ geometry['type'] = 'Point'
+ elif geom_type == MWM.GeomType.LINE:
+ geometry['type'] = 'LineString'
+ elif geom_type == MWM.GeomType.AREA:
+ geometry['type'] = 'Polygon'
+ if geom_type == MWM.GeomType.POINT:
+ geometry['coordinates'] = list(self.read_coord())
+
+ # (flipping table emoticon)
+ feature['geometry'] = geometry
+ if False:
+ if geom_type != MWM.GeomType.POINT:
+ polygon_count = self.read_varuint()
+ polygons = []
+ for i in range(polygon_count):
+ count = self.read_varuint()
+ buf = self.f.read(count)
+ # TODO: decode
+ geometry['coordinates'] = polygons
+ feature['coastCell'] = self.read_varint()
+
+ # OSM IDs
+ count = self.read_varuint()
+ osmids = []
+ for i in range(count):
+ encid = self.read_uint(8)
+ if encid & MWM.OsmIdCode.NODE == MWM.OsmIdCode.NODE:
+ typ = 'n'
+ elif encid & MWM.OsmIdCode.WAY == MWM.OsmIdCode.WAY:
+ typ = 'w'
+ elif encid & MWM.OsmIdCode.RELATION == MWM.OsmIdCode.RELATION:
+ typ = 'r'
+ else:
+ typ = ''
+ osmids.append('{0}{1}'.format(typ, encid & MWM.OsmIdCode.RESET))
+ feature['osmIds'] = osmids
+
+ if self.f.tell() > next_feature:
+ raise Exception('Feature parsing error, read too much')
+ yield feature
+ self.f.seek(next_feature)
+
+ # BITWISE READERS
+
+ def read_uint(self, bytelen=1):
+ if bytelen == 1:
+ fmt = 'B'
+ elif bytelen == 2:
+ fmt = 'H'
+ elif bytelen == 4:
+ fmt = 'I'
+ elif bytelen == 8:
+ fmt = 'Q'
+ else:
+ raise Exception('Bytelen {0} is not supported'.format(bytelen))
+ res = struct.unpack(fmt, self.f.read(bytelen))
+ return res[0]
+
+ def read_varuint(self):
+ res = 0
+ shift = 0
+ more = True
+ while more:
+ b = self.f.read(1)
+ if not b:
+ return res
+ res |= (ord(b[0]) & 0x7F) << shift
+ shift += 7
+ more = ord(b[0]) >= 0x80
+ return res
+
+ def read_varint(self):
+ uint = self.read_varuint()
+ res = uint >> 1
+ return res if uint & 1 == 0 else -res
+
+ def mwm_unshuffle(self, x):
+ x = ((x & 0x22222222) << 1) | ((x >> 1) & 0x22222222) | (x & 0x99999999)
+ x = ((x & 0x0C0C0C0C) << 2) | ((x >> 2) & 0x0C0C0C0C) | (x & 0xC3C3C3C3)
+ x = ((x & 0x00F000F0) << 4) | ((x >> 4) & 0x00F000F0) | (x & 0xF00FF00F)
+ x = ((x & 0x0000FF00) << 8) | ((x >> 8) & 0x0000FF00) | (x & 0xFF0000FF)
+ return x
+
+ def mwm_bitwise_split(self, v):
+ hi = self.mwm_unshuffle(v >> 32)
+ lo = self.mwm_unshuffle(v & 0xFFFFFFFF)
+ x = ((hi & 0xFFFF) << 16) | (lo & 0xFFFF);
+ y = (hi & 0xFFFF0000) | (lo >> 16);
+ return (x, y)
+
+ def read_point(self, packed=True):
+ """Reads an unsigned point, returns (x, y)."""
+ if packed:
+ u = self.read_varuint()
+ else:
+ u = self.read_uint(8)
+ return self.mwm_bitwise_split(u)
+
+ def to_4326(self, point):
+ if self.coord_size is None:
+ raise Exception('Call read_header() first.')
+ merc_bounds = (-180, -180, 180, 180) # Xmin, Ymin, Xmax, Ymax
+ x = point[0] * (merc_bounds[2] - merc_bounds[0]) / self.coord_size + merc_bounds[0]
+ y = point[1] * (merc_bounds[3] - merc_bounds[1]) / self.coord_size + merc_bounds[1]
+ y = 360.0 * math.atan(math.tanh(y * math.pi / 360.0)) / math.pi
+ return (x, y)
+
+ def read_coord(self, packed=True, convert=True):
+ """Reads a pair of coords in degrees mercator, returns (lon, lat)."""
+ upoint = self.read_point(packed)
+ point = (upoint[0] + self.base_point[0], upoint[1] + self.base_point[1])
+ return self.to_4326(point) if convert else point
+
+ def read_bounds(self):
+ """Reads mercator bounds, returns (min_lon, min_lat, max_lon, max_lat)."""
+ rmin = self.read_coord()
+ rmax = self.read_coord()
+ return (rmin[0], rmin[1], rmax[0], rmax[1])
+
+ def read_string(self, plain=False):
+ length = self.read_varuint() + (0 if plain else 1)
+ return self.f.read(length)
+
+ def read_uint_array(self):
+ length = self.read_varuint()
+ result = []
+ for i in range(length):
+ result.append(self.read_varuint())
+ return result
+
+ def read_numeric_string(self):
+ sz = self.read_varuint()
+ if sz & 1 != 0:
+ return str(sz >> 1)
+ sz = (sz >> 1) + 1
+ return self.f.read(sz)
+
+ def read_multilang(self):
+ s = self.read_string()
+ # TODO!
+ return s