Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorMaksim Andrianov <maksimandrianov1@gmail.com>2019-05-07 13:52:09 +0300
committerMaksim Andrianov <maksimandrianov1@gmail.com>2019-05-08 11:24:49 +0300
commit949f4d6dfc15701a506d75ddaef179b4724ad9b3 (patch)
tree98edca6ddeb0b42b1b97d72b349e7e5bc65081cf /tools
parent15cec734e2f5ca73fae3627233044cbd0f98ceef (diff)
Review fixes
Diffstat (limited to 'tools')
-rw-r--r--tools/python/post_generation/__main__.py19
-rwxr-xr-xtools/python/post_generation/hierarchy_to_countries.py101
-rwxr-xr-xtools/python/post_generation/localads_mwm_to_csv.py7
-rwxr-xr-xtools/unix/generate_planet.sh4
4 files changed, 64 insertions, 67 deletions
diff --git a/tools/python/post_generation/__main__.py b/tools/python/post_generation/__main__.py
index 6f7d4cf111..c004c7deb9 100644
--- a/tools/python/post_generation/__main__.py
+++ b/tools/python/post_generation/__main__.py
@@ -9,9 +9,9 @@ from .localads_mwm_to_csv import create_csv
class PostGeneration:
def __init__(self):
parser = argparse.ArgumentParser(
- description="Post generation instruments",
+ description="Post-generation instruments",
usage="""post_generation <command> [<args>]
-The most commonly used post_generation commands are:
+The post_generation commands are:
localads_mwm_to_csv Prepares CSV files for uploading to localads database from mwm files.
hierarchy_to_countries Produces countries.txt from hierarchy.txt.
""")
@@ -42,17 +42,16 @@ The most commonly used post_generation commands are:
help="path to omim/data/types.txt")
parser.add_argument("--threads",
type=int,
+ default=1,
help="number of threads to process files")
- parser.add_argument("--version", type=int, help="override mwm version")
- parser.add_argument("--debug",
- action="store_true",
- help="debug parse_mwm call")
+ parser.add_argument("--mwm_version", type=int, required=True,
+ help="Mwm version")
args = parser.parse_args(sys.argv[2:])
if not args.osm2ft:
args.osm2ft = args.mwm
create_csv(args.output, args.mwm, args.osm2ft, args.types,
- args.version, args.threads, args.debug)
+ args.mwm_version, args.threads)
@staticmethod
def hierarchy_to_countries():
@@ -67,15 +66,15 @@ The most commonly used post_generation commands are:
help="old_vs_new.csv file")
parser.add_argument("--osm", required=True,
help="borders_vs_osm.csv file")
- parser.add_argument("--version", type=int, default=151231,
- help="Version")
+ parser.add_argument("--mwm_version", type=int, required=True,
+ help="Mwm version")
parser.add_argument("-o", "--output", required=True,
help="Output countries.txt file (default is stdout)")
args = parser.parse_args(sys.argv[2:])
countries_json = hierarchy_to_countries_(args.old, args.osm,
args.hierarchy,
args.target,
- args.version)
+ args.mwm_version)
if args.output:
with open(args.output, "w") as f:
f.write(countries_json)
diff --git a/tools/python/post_generation/hierarchy_to_countries.py b/tools/python/post_generation/hierarchy_to_countries.py
index 66dddbb92b..c5683b6252 100755
--- a/tools/python/post_generation/hierarchy_to_countries.py
+++ b/tools/python/post_generation/hierarchy_to_countries.py
@@ -4,7 +4,7 @@
#
# Sample lines:
# Iran;Q794;ir;fa
-# Iran_South;Q794-South
+# Iran_South;Q794-South
#
# Number of leading spaces mean hierarchy depth. In above case, Iran_South is inside Iran.
# Then follows a semicolon-separated list:
@@ -12,6 +12,7 @@
# 2. Region name template using wikidata Qxxx codes and predefined strings
# 3. Country ISO code (used for flags in the legacy format)
# 4. Comma-separated list of language ISO codes for the region
+
import base64
import hashlib
import json
@@ -22,7 +23,7 @@ import re
class CountryDict(dict):
def __init__(self, *args, **kwargs):
dict.__init__(self, *args, **kwargs)
- self.order = ["id", "n", "f", "v", "c", "s", "sha1_base64", "rs", "g"]
+ self.order = ["id", "n", "v", "c", "s", "sha1_base64", "rs", "g"]
def __iter__(self):
for key in self.order:
@@ -37,7 +38,7 @@ class CountryDict(dict):
yield (key, self.__getitem__(key))
-def get_hash(path, name):
+def get_mwm_hash(path, name):
filename = os.path.join(path, f"{name}.mwm")
h = hashlib.sha1()
with open(filename, "rb") as f:
@@ -46,7 +47,7 @@ def get_hash(path, name):
return str(base64.b64encode(h.digest()), "utf-8")
-def get_size(path, name):
+def get_mwm_size(path, name):
filename = os.path.join(path, f"{name}.mwm")
return os.path.getsize(filename)
@@ -82,14 +83,14 @@ def parse_old_vs_new(old_vs_new_csv_path):
if not old_vs_new_csv_path:
return oldvs
- with open(old_vs_new_csv_path, "r") as f:
+ with open(old_vs_new_csv_path) as f:
for line in f:
m = re.match(r"(.+?)\t(.+)", line.strip())
- if m:
- if m.group(2) in oldvs:
- oldvs[m.group(2)].append(m.group(1))
- else:
- oldvs[m.group(2)] = [m.group(1)]
+ assert m
+ if m.group(2) in oldvs:
+ oldvs[m.group(2)].append(m.group(1))
+ else:
+ oldvs[m.group(2)] = [m.group(1)]
return oldvs
@@ -98,60 +99,60 @@ def parse_borders_vs_osm(borders_vs_osm_csv_path):
if not borders_vs_osm_csv_path:
return vsosm
- with open(borders_vs_osm_csv_path, "r") as f:
+ with open(borders_vs_osm_csv_path) as f:
for line in f:
- m = re.match(r"^(.+?)\t(\d)\t(.+?)$", line.strip())
- if m:
- if m.group(1) in vsosm:
- vsosm[m.group(1)].append(m.group(3))
- else:
- vsosm[m.group(1)] = [m.group(3)]
+ m = re.match(r"(.+)\t(\d)\t(.+)", line.strip())
+ assert m
+ if m.group(1) in vsosm:
+ vsosm[m.group(1)].append(m.group(3))
+ else:
+ vsosm[m.group(1)] = [m.group(3)]
return vsosm
def hierarchy_to_countries(old_vs_new_csv_path, borders_vs_osm_csv_path,
hierarchy_path, target_path, version):
+
+ def fill_last(last, stack):
+ name = last["id"]
+ last["s"] = get_mwm_size(target_path, name)
+ last["sha1_base64"] = get_mwm_hash(target_path, name)
+ if last["s"] >= 0:
+ stack[-1]["g"].append(last)
+
oldvs = parse_old_vs_new(old_vs_new_csv_path)
vsosm = parse_borders_vs_osm(borders_vs_osm_csv_path)
stack = [CountryDict(v=version, nameattr="Countries", g=[])]
last = None
- with open(hierarchy_path, "r") as f:
+ with open(hierarchy_path) as f:
for line in f:
- m = re.match("( *)(.+?)\n", line)
- if m:
- depth = len(m.group(1))
- if last is not None:
- lastd = last["d"]
- del last["d"]
- if lastd < depth:
- # last is a group
- last["g"] = []
- stack.append(last)
- else:
- name = last["f" if "f" in last else "id"]
- last["s"] = get_size(target_path, name)
- last["sha1_base64"] = get_hash(target_path, name)
- if last["s"] >= 0:
- stack[-1]["g"].append(last)
- while depth < len(stack) - 1:
- # group ended, add it to higher group
- g = stack.pop()
- if len(g["g"]) > 0:
- stack[-1]["g"].append(g)
- items = m.group(2).split(";")
- last = CountryDict({"id": items[0], "d": depth})
- if items[0] in oldvs:
- last["old"] = oldvs[items[0]]
- if items[0] in vsosm:
- last["affiliations"] = vsosm[items[0]]
+ m = re.match("( *).+", line)
+ assert m
+ depth = len(m.group(1))
+ if last is not None:
+ lastd = last["d"]
+ del last["d"]
+ if lastd < depth:
+ # last is a group
+ last["g"] = []
+ stack.append(last)
+ else:
+ fill_last(last, stack)
+ while depth < len(stack) - 1:
+ # group ended, add it to higher group
+ g = stack.pop()
+ if len(g["g"]) > 0:
+ stack[-1]["g"].append(g)
+ items = m.group(2).split(";")
+ last = CountryDict({"id": items[0], "d": depth})
+ if items[0] in oldvs:
+ last["old"] = oldvs[items[0]]
+ if items[0] in vsosm:
+ last["affiliations"] = vsosm[items[0]]
# the last line is always a file
del last["d"]
- name = last["f" if "f" in last else "id"]
- last["s"] = get_size(target_path, name)
- last["sha1_base64"] = get_hash(target_path, name)
- if last["s"] >= 0:
- stack[-1]["g"].append(last)
+ fill_last(last, stack)
while len(stack) > 1:
g = stack.pop()
if len(g["g"]) > 0:
diff --git a/tools/python/post_generation/localads_mwm_to_csv.py b/tools/python/post_generation/localads_mwm_to_csv.py
index caa0b77d7a..af779a09c5 100755
--- a/tools/python/post_generation/localads_mwm_to_csv.py
+++ b/tools/python/post_generation/localads_mwm_to_csv.py
@@ -80,7 +80,7 @@ def write_csv(output_dir, qtype):
mapping = QUEUES[qtype].get()
-def create_csv(output, mwm_path, osm2ft_path, types, version, threads, debug=False):
+def create_csv(output, mwm_path, osm2ft_path, types, version, threads):
if not os.path.isdir(output):
os.mkdir(output)
@@ -97,10 +97,7 @@ def create_csv(output, mwm_path, osm2ft_path, types, version, threads, debug=Fal
logging.error("Cannot find %s", osm2ft_name)
sys.exit(2)
parse_mwm_args = (os.path.join(mwm_path, mwm_name), osm2ft_name, version, types)
- if debug:
- parse_mwm(*parse_mwm_args)
- else:
- pool.apply_async(parse_mwm, parse_mwm_args)
+ pool.apply_async(parse_mwm, parse_mwm_args)
pool.close()
pool.join()
for queue in QUEUES.values():
diff --git a/tools/unix/generate_planet.sh b/tools/unix/generate_planet.sh
index 0d388b98de..1f5e867bf7 100755
--- a/tools/unix/generate_planet.sh
+++ b/tools/unix/generate_planet.sh
@@ -635,7 +635,7 @@ fi
if [ "$MODE" == "resources" ]; then
putmode "Step 8: Updating resource lists"
# Update countries list
- $PYTHON36 -m $POST_GENERATION_MODULE hierarchy_to_countries --target "$TARGET" --hierarchy "$DATA_PATH/hierarchy.txt" --version "$COUNTRIES_VERSION" \
+ $PYTHON36 -m $POST_GENERATION_MODULE hierarchy_to_countries --target "$TARGET" --hierarchy "$DATA_PATH/hierarchy.txt" --mwm_version "$COUNTRIES_VERSION" \
--old "$DATA_PATH/old_vs_new.csv" --osm "$DATA_PATH/borders_vs_osm.csv" --output "$TARGET/countries.txt" >> "$PLANET_LOG" 2>&1
# A quick fix: chmodding to a+rw all generated files
@@ -674,7 +674,7 @@ if [ -n "${LOCALADS-}" ]; then
LOCALADS_LOG="$LOG_PATH/localads.log"
LOCALADS_PATH="$INTDIR/localads"
mkdir -p "$LOCALADS_PATH"
- $PYTHON36 -m "$POST_GENERATION_MODULE" localads_mwm_to_csv "$TARGET" --osm2ft "$INTDIR" --version "$COUNTRIES_VERSION" --types "$DATA_PATH/types.txt" --output "$LOCALADS_PATH" >> "$LOCALADS_LOG" 2>&1
+ $PYTHON36 -m "$POST_GENERATION_MODULE" localads_mwm_to_csv "$TARGET" --osm2ft "$INTDIR" --mwm_version "$COUNTRIES_VERSION" --types "$DATA_PATH/types.txt" --output "$LOCALADS_PATH" >> "$LOCALADS_LOG" 2>&1
LOCALADS_ARCHIVE="localads_$COUNTRIES_VERSION.tgz"
cd "$LOCALADS_PATH"
tar -czf "$LOCALADS_ARCHIVE" *.csv >> "$LOCALADS_LOG" 2>&1