Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorgreshilov <slovaricheg@gmail.com>2018-07-23 17:39:51 +0300
committermpimenov <mpimenov@users.noreply.github.com>2018-07-26 16:50:25 +0300
commitd8aa7b532a2cb22fe2599a39d5375ea40ba55d2d (patch)
treeee9216b791e6afc908d98e60e8ccfecead88258e /tools
parent7a53568ea1c2c2572b05ca473c313156d84e9c7d (diff)
[python] Update mwm_downloader
- Fix redirect - Review fixes
Diffstat (limited to 'tools')
-rwxr-xr-xtools/python/mwm_downloader.py125
1 files changed, 79 insertions, 46 deletions
diff --git a/tools/python/mwm_downloader.py b/tools/python/mwm_downloader.py
index fbee37dac3..daa70e5b85 100755
--- a/tools/python/mwm_downloader.py
+++ b/tools/python/mwm_downloader.py
@@ -1,65 +1,97 @@
#!/usr/bin/env python3
import argparse
+import itertools
+import json
+import logging
import os
import re
-import time
+import random
+import socket
import sys
+import time
import urllib.request
import urllib.parse
import urllib.error
-import logging
+
from multiprocessing.pool import ThreadPool
+from pathlib import Path
+
+ROOT = Path(__file__).parent.absolute()
+OMIM_ROOT = ROOT / '..' / '..'
+DEFAULT_DOWNLOAD_DIRECTORY = OMIM_ROOT / 'data'
+COUNTRIES_TXT = OMIM_ROOT / 'data' / 'countries.txt'
+TIMEOUT = 3
-DIRECT_MAP_URL = 'http://direct.mapswithme.com/direct/'
-ROOT = os.path.dirname(os.path.realpath(__file__))
-DEFAULT_DOWNLOAD_DIRECTORY = os.path.join(os.path.dirname(os.path.dirname(ROOT)), 'data')
-MAP_REVISION_REGEXP = re.compile(r'(\d{6})')
+URL_PATTERN = 'http://{prefix}.mapswithme.com/direct/{version}/{name}.mwm'
+MAP_SERVERS = ('maps-dl-ru1', 'maps-dl-ru2', 'maps-dl-ru3', 'maps-dl-ams1',
+ 'maps-dl-eu2', 'maps-dl-us1')
MWM_NAME_REGEXP = re.compile(r'"(\S+\.mwm)"')
logger = logging.getLogger(__name__)
-def parse_html_by_regexp(url, regexp):
- def parse_line(line):
- return regexp.search(line.decode('utf-8'))
-
- with urllib.request.urlopen(url) as http_response_lines:
- filtered_lines = filter(None, map(parse_line, http_response_lines))
- for line in filtered_lines:
- yield line.group(1)
+def country_names_generator(country_obj):
+ """
+ See more info about 'countries.txt' file format in 'omim/storage/country.cpp'.
+ """
+ if 'g' in country_obj:
+ yield from country_names_generator(country_obj['g'])
+ elif 's' in country_obj:
+ yield country_obj['id']
+ elif type(country_obj) is list:
+ for country in country_obj:
+ yield from country_names_generator(country)
-def get_mwm_versions():
- return map(int, parse_html_by_regexp(DIRECT_MAP_URL, MAP_REVISION_REGEXP))
+def is_redirected(url):
+ response = urllib.request.urlopen(url)
+ return response.url != url
-def get_mwm_names(version_num):
- version_url = '{base}{version}'.format(base=DIRECT_MAP_URL, version=version_num)
- return map(urllib.parse.unquote, parse_html_by_regexp(version_url, MWM_NAME_REGEXP))
+def download_file(url, filename, network_attempts=3):
+ try:
+ if is_redirected(url):
+ logging.error('Maps server not found in \'{url}\''.format(url=url))
+ return False
+ with urllib.request.urlopen(url, timeout=TIMEOUT) as response:
+ content = response.read()
+ with open(filename, 'wb') as f:
+ f.write(content)
-def download_file(arg_tuple, attempts=3):
- url, filename = arg_tuple
- try:
- urllib.request.urlretrieve(url, filename=filename)
except urllib.error.HTTPError as err:
if 400 <= err.code < 500:
- logger.error('URL \'{url}\' is not found'.format(url=url), exc_info=True)
+ logger.error('URL \'{url}\' is not found'.format(url=url))
return False
- except PermissionError:
- logger.error('Can\'t write file {filename}: permission denied'.format(filename=filename), exc_info=True)
+ except (PermissionError, FileNotFoundError) as e:
+ logger.error('Can\'t write file {filename}: {error}'.format(filename=filename, error=e))
return False
- except urllib.error.URLError:
- if attempts:
- time.sleep(3)
- return download_file(arg_tuple, attempts - 1)
- logger.error('File {filename} is not loaded'.format(filename=filename), exc_info=True)
+ except (urllib.error.URLError, socket.timeout):
+ if network_attempts > 0:
+ time.sleep(TIMEOUT)
+ return download_file(url, filename, network_attempts - 1)
+ logger.error('File {filename} is not loaded'.format(filename=filename))
return False
+
return True
+def download_map(arg_tuple):
+ version, mwm_name, filename = arg_tuple
+ mwm_name = urllib.parse.quote(mwm_name)
+ random_prefix_generator = (random.choice(MAP_SERVERS) for _ in range(0, len(MAP_SERVERS)))
+
+ for server_prefix in random_prefix_generator:
+ url = URL_PATTERN.format(prefix=server_prefix, version=version, name=mwm_name)
+ if download_file(url, filename):
+ return True
+ else:
+ logging.error('Failed to load mwm \'{}\' from server \'{}\''.format(mwm_name, server_prefix))
+ return False
+
+
def progress_bar(total, progress):
"""
Displays or updates a console progress bar.
@@ -83,46 +115,47 @@ def progress_bar(total, progress):
def download_mwm_list(version=None, threads=8, folder=None, mwm_prefix_list=None, quiet=True):
- versions = list(get_mwm_versions())
+ try:
+ countries = json.load(open(str(COUNTRIES_TXT), 'r'))
+ except (OSError, json.decoder.JSONDecodeError) as e:
+ logging.error('File \'omim/data/countries.txt\' is corrupted.', exc_info=True)
+ exit(1)
- if version is None:
- # Get latest version by default
- version = max(versions)
- elif version not in versions:
- logger.error('Maps with version {version} is not found in {url}'.format(version=version, url=DIRECT_MAP_URL))
- return
+ mwm_names = country_names_generator(countries)
- mwm_names = get_mwm_names(version)
if mwm_prefix_list is not None:
mwm_regexp = re.compile('|'.join(mwm_prefix_list))
mwm_to_download = filter(lambda m: mwm_regexp.match(m), mwm_names)
else:
mwm_to_download = mwm_names
+ if version is None:
+ version = countries['v']
+
if folder is None:
folder = '{base}/{version}'.format(base=DEFAULT_DOWNLOAD_DIRECTORY, version=version)
os.makedirs(folder, exist_ok=True)
- def generate_args(mwm_name):
- unescaped_name = urllib.parse.quote(mwm_name)
- url = '{base}{version}/{mwm_name}'.format(base=DIRECT_MAP_URL, version=version, mwm_name=unescaped_name)
- filename = '{folder}/{mwm_name}'.format(folder=folder, mwm_name=mwm_name)
- return url, filename
+ def filename(mwm_name):
+ return '{folder}/{mwm_name}.mwm'.format(folder=folder, mwm_name=mwm_name)
- mwm_args = list(map(generate_args, mwm_to_download))
+ mwm_args = [(version, mwm_name, filename(mwm_name)) for mwm_name in mwm_to_download]
# No sense to show progress bar for one map
if len(mwm_args) < 2:
quiet = True
pool = ThreadPool(processes=threads)
- for num, _ in enumerate(pool.imap_unordered(download_file, mwm_args)):
+ for num, _ in enumerate(pool.imap_unordered(download_map, mwm_args)):
if not quiet:
progress_bar(len(mwm_args), num + 1)
if __name__ == '__main__':
+ if sys.version_info < (3, 4):
+ raise RuntimeError('This script requires Python 3.4+')
+
parser = argparse.ArgumentParser(description='Script to download \'.mwm\' files in multiple threads')
parser.add_argument('-v', '--version', help='Map version number e.g. 180126', type=int, default=None)
parser.add_argument('-t', '--threads', help='Threads count', type=int, default=8)