Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorMaksim Andrianov <maksimandrianov1@gmail.com>2019-05-24 15:58:48 +0300
committermpimenov <mpimenov@users.noreply.github.com>2019-05-29 16:58:56 +0300
commit2a97d57399e9d12dc8ce4455de56a89fdcedc949 (patch)
tree12fe8ed181c67dac613c0a00d1e5a8dff82946fc /tools
parentf2edf6d87091d08dd633d10e3393058a8c044f37 (diff)
[python] Added staistics.
Diffstat (limited to 'tools')
-rw-r--r--tools/python/maps_generator/__main__.py26
-rw-r--r--tools/python/maps_generator/generator/env.py6
-rw-r--r--tools/python/maps_generator/generator/gen_tool.py1
-rw-r--r--tools/python/maps_generator/generator/settings.py5
-rw-r--r--tools/python/maps_generator/generator/statistics.py116
-rw-r--r--tools/python/maps_generator/maps_generator.py52
-rw-r--r--tools/python/maps_generator/var/etc/map_generator.ini.default5
-rw-r--r--tools/python/maps_generator/var/etc/stats_types_config.txt59
8 files changed, 255 insertions, 15 deletions
diff --git a/tools/python/maps_generator/__main__.py b/tools/python/maps_generator/__main__.py
index ead17107d2..97a55b9371 100644
--- a/tools/python/maps_generator/__main__.py
+++ b/tools/python/maps_generator/__main__.py
@@ -8,7 +8,8 @@ from .generator.exceptions import ContinueError, SkipError, ValidationError
from .maps_generator import (generate_maps, generate_coasts, reset_to_stage,
ALL_STAGES, stage_download_production_external,
stage_descriptions, stage_ugc, stage_popularity,
- stage_localads, stages_as_string)
+ stage_localads, stage_statistics,
+ stages_as_string)
from .utils.collections import unique
logger = logging.getLogger("maps_generator")
@@ -25,16 +26,16 @@ def parse_options():
nargs="?",
type=str,
help="Continue the last build or specified in CONTINUE from the "
- "last stopped stage.")
+ "last stopped stage.")
parser.add_argument(
"--countries",
type=str,
default="",
help="List of regions, separated by a comma or a semicolon, or path to "
- "file with regions, separated by a line break, for which maps"
- " will be built. The names of the regions can be seen "
- "in omim/data/borders. It is necessary to set names without "
- "any extension.")
+ "file with regions, separated by a line break, for which maps"
+ " will be built. The names of the regions can be seen "
+ "in omim/data/borders. It is necessary to set names without "
+ "any extension.")
parser.add_argument(
"--skip",
type=str,
@@ -58,7 +59,7 @@ def parse_options():
default=False,
action="store_true",
help="Build production maps. In another case, 'osm only maps' are built"
- " - maps without additional data and advertising.")
+ " - maps without additional data and advertising.")
return vars(parser.parse_args())
@@ -131,10 +132,13 @@ def main():
]
options["skip"] = options_skip
if not options["production"]:
- options["skip"] += stages_as_string(stage_download_production_external,
- stage_ugc, stage_popularity,
- stage_descriptions,
- stage_localads)
+ options["skip"] += stages_as_string(
+ stage_download_production_external,
+ stage_ugc, stage_popularity,
+ stage_descriptions,
+ stage_localads,
+ stage_statistics
+ )
if not all(s in ALL_STAGES for s in options["skip"]):
raise SkipError(f"Stages {set(options['skip']) - set(ALL_STAGES)} "
f"not found.")
diff --git a/tools/python/maps_generator/generator/env.py b/tools/python/maps_generator/generator/env.py
index 7d0211993b..ad8dca2851 100644
--- a/tools/python/maps_generator/generator/env.py
+++ b/tools/python/maps_generator/generator/env.py
@@ -153,6 +153,12 @@ class Env:
return path
@property
+ def stats_path(self):
+ path = os.path.join(self.out_path, "stats")
+ self._create_if_not_exist(path)
+ return path
+
+ @property
def types_path(self):
return os.path.join(self.user_resource_path, "types.txt")
diff --git a/tools/python/maps_generator/generator/gen_tool.py b/tools/python/maps_generator/generator/gen_tool.py
index ca8a05d4e5..1899a12291 100644
--- a/tools/python/maps_generator/generator/gen_tool.py
+++ b/tools/python/maps_generator/generator/gen_tool.py
@@ -37,6 +37,7 @@ class GenTool:
"no_ads": bool,
"preprocess": bool,
"split_by_polygons": bool,
+ "type_statistics": bool,
"planet_version": int,
"booking_data": str,
"brands_data": str,
diff --git a/tools/python/maps_generator/generator/settings.py b/tools/python/maps_generator/generator/settings.py
index b0adebe3cd..8df50f2a38 100644
--- a/tools/python/maps_generator/generator/settings.py
+++ b/tools/python/maps_generator/generator/settings.py
@@ -50,6 +50,8 @@ SUBWAY_URL = ""
FOOD_URL = ""
FOOD_TRANSLATIONS_URL = ""
+STATS_TYPES_CONFIG = ""
+
PLANET = "planet"
GEN_TOOL = "generator_tool"
@@ -120,6 +122,9 @@ FOOD_URL = _get_opt(config, "External", "FOOD_URL", FOOD_URL)
FOOD_TRANSLATIONS_URL = _get_opt(config, "External", "FOOD_TRANSLATIONS_URL",
FOOD_TRANSLATIONS_URL)
+STATS_TYPES_CONFIG = _get_opt_path(config, "Stats", "STATS_TYPES_CONFIG",
+ STATS_TYPES_CONFIG)
+
PLANET_O5M = os.path.join(MAIN_OUT_PATH, PLANET + ".o5m")
PLANET_PBF = os.path.join(MAIN_OUT_PATH, PLANET + ".osm.pbf")
PLANET_COASTS_GEOM_URL = os.path.join(PLANET_COASTS_URL, "latest_coasts.geom")
diff --git a/tools/python/maps_generator/generator/statistics.py b/tools/python/maps_generator/generator/statistics.py
new file mode 100644
index 0000000000..6051e97c51
--- /dev/null
+++ b/tools/python/maps_generator/generator/statistics.py
@@ -0,0 +1,116 @@
+import re
+import os
+import datetime
+from collections import defaultdict
+
+
+RE_STAT = re.compile(r"(?:\d+\. )?([\w:|-]+?)\|: "
+ r"size = \d+; "
+ r"count = (\d+); "
+ r"length = ([0-9.e+-]+) m; "
+ r"area = ([0-9.e+-]+) m²; "
+ r"names = (\d+)\s*")
+
+RE_TIME_DELTA = re.compile(r'^(?:(?P<days>-?\d+) (days?, )?)?'
+ r'((?:(?P<hours>-?\d+):)(?=\d+:\d+))?'
+ r'(?:(?P<minutes>-?\d+):)?'
+ r'(?P<seconds>-?\d+)'
+ r'(?:\.(?P<microseconds>\d{1,6})\d{0,6})?$')
+
+RE_FINISH_STAGE = re.compile(r"(.*)Stage (\w+): finished in (.+)$")
+
+
+def read_stat(f):
+ stats = []
+ for line in f:
+ m = RE_STAT.match(line)
+ stats.append({
+ "name": m.group(1).replace("|", "-"),
+ "cnt": int(m.group(2)),
+ "len": float(m.group(3)),
+ "area": float(m.group(4)),
+ "names": int(m.group(5))
+ })
+ return stats
+
+
+def read_config(f):
+ config = []
+ for line in f:
+ columns = [c.strip() for c in line.split(";", 2)]
+ columns[0] = re.compile(columns[0])
+ columns[1] = columns[1].lower()
+ config.append(columns)
+ return config
+
+
+def process_stat(config, stats):
+ result = {}
+ for param in config:
+ res = 0
+ for typ in stats:
+ if param[0].match(typ["name"]):
+ if param[1] == "len":
+ res += typ["len"]
+ elif param[1] == "area":
+ res += typ["area"]
+ elif param[1] == "cnt_names":
+ res += typ["names"]
+ else:
+ res += typ["cnt"]
+ result[str(param[0]) + param[1]] = res
+ return result
+
+
+def format_res(res, typ):
+ if typ == "len":
+ unit = "м"
+ elif typ == "area":
+ unit = "м²"
+ else:
+ unit = "шт."
+ return res, unit
+
+
+def make_stats(config_path, stats_path):
+ with open(config_path) as f:
+ config = read_config(f)
+ with open(stats_path) as f:
+ stats = process_stat(config, read_stat(f))
+ lines = []
+ for param in config:
+ k = str(param[0]) + param[1]
+ st = format_res(stats[k], param[1])
+ lines.append({"type": param[2], "quantity": st[0], "unit": st[1]})
+ return lines
+
+
+def parse_time(time_str):
+ parts = RE_TIME_DELTA.match(time_str)
+ if not parts:
+ return
+ parts = parts.groupdict()
+ time_params = {}
+ for name, param in parts.items():
+ if param:
+ time_params[name] = int(param)
+ return datetime.timedelta(**time_params)
+
+
+def get_stages_info(log_path):
+ result = defaultdict(lambda: defaultdict(dict))
+ for file in os.listdir(log_path):
+ path = os.path.join(log_path, file)
+ with open(path) as f:
+ for line in f:
+ m = RE_FINISH_STAGE.match(line)
+ if not m:
+ continue
+ stage_name = m.group(2)
+ dt = parse_time(m.group(3))
+ if file.startswith("stage_"):
+ result["stages"][stage_name] = dt
+ else:
+ country = file.split(".")[0]
+ result["countries"][country][stage_name] = dt
+ return result
diff --git a/tools/python/maps_generator/maps_generator.py b/tools/python/maps_generator/maps_generator.py
index 4018a56863..78e15d7868 100644
--- a/tools/python/maps_generator/maps_generator.py
+++ b/tools/python/maps_generator/maps_generator.py
@@ -3,7 +3,10 @@ import os
import shutil
from functools import partial
from multiprocessing.pool import ThreadPool
+from collections import defaultdict
import multiprocessing
+import json
+import datetime
from descriptions.descriptions_downloader import (check_and_get_checker,
download_from_wikipedia_tags,
@@ -22,6 +25,7 @@ from .generator.env import (planet_lock_file, build_lock_file,
from .generator.exceptions import (ContinueError, BadExitStatusError,
wait_and_raise_if_fail)
from .generator.gen_tool import run_gen_tool
+from .generator.statistics import make_stats, get_stages_info
from .utils.file import is_verified, download_file, make_tarfile
logger = logging.getLogger("maps_generator")
@@ -238,8 +242,9 @@ def stage_external_resources(env):
for ttf_file in resources:
shutil.copy2(ttf_file, env.intermediate_path)
- shutil.copy2(os.path.join(env.user_resource_path, "WorldCoasts_obsolete.mwm"),
- env.mwm_path)
+ shutil.copy2(
+ os.path.join(env.user_resource_path, "WorldCoasts_obsolete.mwm"),
+ env.mwm_path)
for file in os.listdir(env.mwm_path):
if file.startswith(WORLD_NAME) and file.endswith(".mwm"):
@@ -260,6 +265,46 @@ def stage_localads(env):
@stage
+def stage_statistics(env):
+ result = defaultdict(lambda: defaultdict(dict))
+
+ @country_stage_log
+ def stage_mwm_statistics(env, country, **kwargs):
+ stats_tmp = os.path.join(env.draft_path, f"{country}.stat")
+ with open(stats_tmp, "w") as f:
+ maps_stages.run_gen_tool_with_recovery_country(
+ env,
+ env.gen_tool,
+ out=f,
+ err=env.get_subprocess_out(country),
+ data_path=env.mwm_path,
+ user_resource_path=env.user_resource_path,
+ type_statistics=True,
+ output=country,
+ **kwargs
+ )
+ result["countries"][country]["types"] = \
+ make_stats(settings.STATS_TYPES_CONFIG, stats_tmp)
+
+ mwms = env.get_mwm_names()
+ countries = filter(lambda x: x not in WORLDS_NAMES, mwms)
+ with ThreadPool() as pool:
+ pool.map(partial(stage_mwm_statistics, env), countries)
+ stages_info = get_stages_info(env.log_path)
+ result["stages"] = stages_info["stages"]
+ for c in stages_info["countries"]:
+ result["countries"][c]["stages"] = stages_info["countries"][c]
+
+ def default(o):
+ if isinstance(o, datetime.timedelta):
+ return str(o)
+
+ with open(os.path.join(env.stats_path, "stats.json"), "w") as f:
+ json.dump(result, f, ensure_ascii=False, sort_keys=True,
+ indent=2, default=default)
+
+
+@stage
def stage_cleanup(env):
osm2ft_path = os.path.join(env.out_path, "osm2ft")
os.makedirs(osm2ft_path, exist_ok=True)
@@ -283,7 +328,7 @@ STAGES = [s.__name__ for s in
stage_download_and_convert_planet, stage_update_planet,
stage_coastline, stage_preprocess, stage_features, stage_mwm,
stage_descriptions, stage_countries_txt, stage_external_resources,
- stage_localads, stage_cleanup)]
+ stage_localads, stage_statistics, stage_cleanup)]
ALL_STAGES = STAGES + COUNTRIES_STAGES
@@ -343,6 +388,7 @@ def generate_maps(env):
stage_countries_txt(env)
stage_external_resources(env)
stage_localads(env)
+ stage_statistics(env)
stage_cleanup(env)
diff --git a/tools/python/maps_generator/var/etc/map_generator.ini.default b/tools/python/maps_generator/var/etc/map_generator.ini.default
index 2fc56d6361..593781d5a0 100644
--- a/tools/python/maps_generator/var/etc/map_generator.ini.default
+++ b/tools/python/maps_generator/var/etc/map_generator.ini.default
@@ -32,5 +32,8 @@ OSM_TOOLS_PATH: ~/osmctools
# POPULARITY_URL:
# SUBWAY_URL:
# FOOD_URL:
-# FOOD_TRANSLATIONS_URL:
+# FOOD_TRANSLATIONS_URL:
+
+[Stats]
+STATS_TYPES_CONFIG: ${Developer:OMIM_PATH}/tools/python/maps_generator/var/etc/stats_types_config.txt
diff --git a/tools/python/maps_generator/var/etc/stats_types_config.txt b/tools/python/maps_generator/var/etc/stats_types_config.txt
new file mode 100644
index 0000000000..3026e9f86e
--- /dev/null
+++ b/tools/python/maps_generator/var/etc/stats_types_config.txt
@@ -0,0 +1,59 @@
+barrier-(fence|gate);len;Заборы
+building;cnt;Здания
+(amenity|shop|historic)-.*;cnt;POI
+(amenity|shop|historic)-.*;cnt_names;POI c именами
+amenity-(cafe|restaurant|fast_food).*;cnt;Кафе и рестораны
+amenity-(pub|bar);cnt;Бары и пабы
+amenity-kindergarten;cnt;Детские сады
+amenity-(school|university|college);cnt;Школы и университеты
+amenity-parking.*;cnt;Автостоянки
+amenity-parking.*;area;Автостоянки
+amenity-pharmacy;cnt;Аптеки
+amenity-place_of_worship.*;cnt;Храмы
+amenity-(hospital|doctors);cnt;Больницы и поликлиники
+amenity-toilets;cnt;Туалеты
+amenity-(waste_disposal|recycling);cnt;Мусорные баки
+highway-(motorway|trunk|primary|secondary|tertiary|residential|unclassified|service|track|living_street)(_link)?(-.*)?;len;Автодорожная сеть
+highway-(footway|path|pedestrian|steps).*;len;Пешеходные дорожки
+highway-.*-bridge;len;Мосты
+highway-.*-tunnel;len;Туннели
+highway-(footway|path|steps)-bridge;len;Пешеходные мосты
+highway-(footway|path|steps)-tunnel;len;Пешеходные туннели
+highway-steps.*;len;Лестницы
+highway-speed_camera;cnt;Камеры контроля скорости
+internet_access-wlan;cnt;Точки доступа Wi-Fi
+leisure-(pitch|stadium|playing_fields|track|sports_centre).*;cnt;Спортплощадки и комплексы
+leisure-playground;cnt;Детские площадки
+man_made-lighthouse;cnt;Маяки
+man_made-windmill;cnt;Ветряные мельницы
+man_made-pipeline.*;len;Трубопроводы
+natural-beach;cnt;Пляжи
+natural-tree;cnt;Отдельностоящие деревья
+natural-waterfall;cnt;Водопады
+piste:type.*;len;Лыжни
+place-(city.*|town|village|hamlet);cnt;Населённые пункты
+place-island;cnt;Острова
+power-(minor_)?line.*;len;Линии электропередачи
+power-(pole|tower);cnt;Опоры ЛЭП
+railway-(rail|monorail|light_rail|narrow_gauge|preserved|siding|spur|yard|disused|incline).*;len;Железные дороги
+railway-.*-(bridge|tunnel);len;Железнодорожные мосты и туннели
+railway-(razed|abandoned).*;len;Снятые ветки ж/д
+railway-narrow_gauge.*;len;Узкоколейные ж/д
+railway-tram(-.*)?;len;Трамвайные пути
+railway-(halt|station);cnt;Станции железной дороги
+railway-subway.*;len;Линии метро
+highway-bus_stop|railway-tram_stop;cnt;Остановки наземного транспорта
+shop-bakery;cnt;Пекарни
+shop-books;cnt;Книжные магазины
+shop-clothes;cnt;Магазины одежды
+shop-shoes;cnt;Магазины обуви
+shop-(convenience|supermarket);cnt;Продуктовые магазины
+shop-florist;cnt;Цветочные салоны
+shop-(hairdresser|beauty);cnt;Парикмахерские и салоны красоты
+tourism-(guest_house|hos?tel|motel);cnt;Гостиницы и хостелы
+tourism-(attraction|viewpoint);cnt;Достопримечательности и точки обзора
+waterway-(canal|river|stream)(-.*)?;len;Реки, каналы и ручьи
+landuse-cemetery.*;area;Кладбища
+leisure-park.*;area;Парки
+natural-beach;area;Пляжи
+sponsored-booking;cnt;Booking отели