Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorIlya Zverev <zverik@textual.ru>2017-06-09 20:49:37 +0300
committertherearesomewhocallmetim <dtv@mapswithme.com>2017-11-09 13:43:55 +0300
commit3dc17dfbb84b24332f25a2ad4dff661b03fe08e5 (patch)
tree1d30ce7c7e8124d1146560b2486c8dd4bf06439e /tools
parente05e50ee87339abd10bea07eef2aea61bea0c089 (diff)
[python] Update strings converters
Diffstat (limited to 'tools')
-rwxr-xr-xtools/python/categories_merge_language.py29
-rwxr-xr-xtools/python/convert_strings.py114
-rwxr-xr-xtools/python/strings_to_csv.py117
3 files changed, 128 insertions, 132 deletions
diff --git a/tools/python/categories_merge_language.py b/tools/python/categories_merge_language.py
index 58a865fe52..1cf5c89c77 100755
--- a/tools/python/categories_merge_language.py
+++ b/tools/python/categories_merge_language.py
@@ -2,45 +2,44 @@
import sys
import os
+path = os.path.join(os.path.dirname(sys.argv[0]), '..', '..', 'data', 'categories.txt')
if len(sys.argv) < 2:
print('Merges some language in categories.txt with English')
print('Usage: {} <lang> [path_to_categories.txt]'.format(sys.argv[0]))
+ print('Default path to categories: {}'.format(path))
+ if not os.path.exists(path):
+ print('Warning: default path to categories.txt will fail')
sys.exit(1)
lang = sys.argv[1]
if len(sys.argv) > 2:
path = sys.argv[2]
-else:
- path = os.path.join(os.path.dirname(sys.argv[0]), '..', '..', 'data', 'categories.txt')
with open(path, 'r') as f:
- first = True
langs = []
trans = None
def flush_langs():
- for l in langs:
+ for lang in langs:
if trans and l[0] == 'en':
- parts = l[1].split('|')
+ parts = lang[1].split('|')
parts[0] = '{} - {}'.format(parts[0], trans)
- l[1] = '|'.join(parts)
- print(':'.join(l))
+ lang[1] = '|'.join(parts)
+ print(':'.join(lang))
- for line in f:
- if len(line.strip()) == 0 or line[0] == '#':
+ for line in map(str.strip, f):
+ if len(line) == 0 or line[0] == '#':
if langs:
flush_langs()
langs = []
trans = None
- print(line.strip())
- first = True
- elif first:
- print(line.strip())
- first = False
+ print(line)
+ elif not langs:
+ print(line)
else:
if ':' not in line:
raise Exception('Line {} is not a translation line'.format(line))
- l = line.strip().split(':')
+ l = line.split(':')
langs.append(l)
if l[0] == lang:
trans = l[1].split('|')[0]
diff --git a/tools/python/convert_strings.py b/tools/python/convert_strings.py
new file mode 100755
index 0000000000..8276191914
--- /dev/null
+++ b/tools/python/convert_strings.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+import argparse
+import csv
+import sys
+
+
+def langs_order(lang):
+ if lang == 'en':
+ return '0'
+ elif lang == 'ru':
+ return '1'
+ return lang
+
+
+def read_strings(fin):
+ curtitle = None
+ curtrans = {}
+ for line in filter(None, map(str.strip, fin)):
+ if line[0].startswith('['):
+ if curtrans:
+ yield curtitle, curtrans
+ curtitle = line.strip('[ ]')
+ curtrans = {}
+ elif '=' in line and curtitle:
+ lang, trans = (x.strip() for x in line.split('='))
+ curtrans[lang] = trans
+ if curtrans:
+ yield curtitle, curtrans
+
+
+def from_csv(fin, fout, delim):
+ r = csv.reader(fin, delimiter=delim)
+ header = next(r)
+ for row in r:
+ fout.write('[{}]\n'.format(row[0]))
+ for i, col in enumerate(map(str.strip, row)):
+ if len(col) > 0 and i > 0:
+ fout.write('{} = {}\n'.format(header[i], col))
+ fout.write('\n')
+
+
+def to_csv(fin, fout, delim, langs):
+ def write_line(writer, title, translations, langs):
+ row = [title]
+ for lang in langs:
+ row.append('' if lang not in translations else translations[lang])
+ writer.writerow(row)
+
+ w = csv.writer(fout, delimiter=delim)
+ if langs is not None:
+ w.writerow(['Key'] + langs)
+
+ strings = []
+ for title, trans in read_strings(fin):
+ if langs is None:
+ strings.append((title, trans))
+ else:
+ write_line(w, title, trans, langs)
+
+ # If we don't have langs, build a list and print
+ if langs is None:
+ langs = set()
+ for s in strings:
+ langs.update(list(s[1].values()))
+
+ langs = sorted(langs, key=langs_order)
+ w.writerow(['Key'] + langs)
+ for s in strings:
+ write_line(w, s[0], s[1], langs)
+
+
+def from_categories(fin, fout):
+ raise Exception('This conversion has not been implemented yet.')
+
+
+def to_categories(fin, fout):
+ for title, trans in read_strings(fin):
+ fout.write('{}\n'.format(title))
+ for lang in sorted(trans.keys(), key=langs_order):
+ fout.write('{}:^{}\n'.format(lang, trans[lang]))
+ fout.write('\n')
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Converts between strings.txt, csv files and categories.txt.')
+ parser.add_argument('input', type=argparse.FileType('r'), help='Input file')
+ parser.add_argument('-o', '--output', default='-', help='Output file, "-" for stdout')
+ parser.add_argument('-d', '--delimiter', default=',', help='CSV delimiter')
+ parser.add_argument('-l', '--langs', help='List of langs for csv: empty for default, "?" to autodetect, comma-separated for a list')
+ parser.add_argument('--csv2s', action='store_true', help='CSV -> TXT')
+ parser.add_argument('--s2csv', action='store_true', help='TXT -> CSV')
+ parser.add_argument('--cat2s', action='store_true', help='Categories -> TXT')
+ parser.add_argument('--s2cat', action='store_true', help='TXT -> Categories')
+ options = parser.parse_args()
+
+ fout = sys.stdout if options.output == '-' else open(options.output, 'w')
+
+ if not options.langs:
+ langs = 'en ru ar cs da nl fi fr de hu id it ja ko nb pl pt ro es sv th tr uk vi zh-Hans zh-Hant he sk'.split()
+ elif options.langs == '?':
+ langs = None
+ else:
+ langs = options.langs.split(',')
+
+ if options.csv2s:
+ from_csv(options.input, fout, options.delimiter)
+ elif options.s2csv:
+ to_csv(options.input, fout, options.delimiter, langs)
+ elif options.cat2s:
+ from_categories(options.input, fout)
+ elif options.s2cat:
+ to_categories(options.input, fout)
+ else:
+ raise ValueError('Please select a conversion direction.')
diff --git a/tools/python/strings_to_csv.py b/tools/python/strings_to_csv.py
deleted file mode 100755
index e4221c9ed8..0000000000
--- a/tools/python/strings_to_csv.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env python
-import argparse
-import csv
-import sys
-
-
-class HeaderProcessor:
- def lang_from_csv(self, column):
- return column
-
- def lang_to_csv(self, column):
- return column
-
- def section_from_csv(self, title):
- return title
-
- def section_to_csv(self, title):
- return title
-
-
-def from_csv(fin, fout, proc, delim):
- r = csv.reader(fin, delimiter=delim)
- header = next(r)
- for i in range(1, len(header)):
- header[i] = proc.lang_from_csv(header[i])
- for row in r:
- fout.write('[{}]\n'.format(proc.section_from_csv(row[0])))
- for i in range(1, len(row)):
- if len(row[i].strip()) > 0:
- fout.write('{} = {}\n'.format(header[i], row[i]))
- fout.write('\n')
-
-
-def to_csv(fin, fout, proc, delim, langs):
- def write_line(writer, title, translations, langs):
- row = [title]
- for l in langs:
- row.append('' if l not in translations else translations[l])
- writer.writerow(row)
-
- strings = []
- w = csv.writer(fout, delimiter=delim)
- if langs is not None:
- w.writerow([''] + langs)
- curtitle = None
- curtrans = {}
- for line in fin:
- line = line.strip()
- if len(line) == 0:
- continue
- elif line[0] == '[':
- if curtrans:
- if langs is None:
- strings.append((curtitle, curtrans))
- else:
- write_line(w, curtitle, curtrans, langs)
- curtitle = line[1:-1]
- curtrans = {}
- elif '=' in line and curtitle:
- lang, trans = (x.strip() for x in line.split('='))
- curtrans[lang] = trans
- if curtrans:
- if langs is None:
- strings.append((curtitle, curtrans))
- else:
- write_line(w, curtitle, curtrans, langs)
-
- # If we don't have langs, build a list and print
- if langs is None:
- l = set()
- for s in strings:
- l.update(list(s[1].values()))
-
- def str_sort(k):
- if k == 'en':
- return '0'
- elif k == 'ru':
- return '1'
- return l
- l = sorted(l, key=str_sort)
- for s in strings:
- write_line(w, s[0], s[1], l)
-
-
-if __name__ == '__main__':
- processors = {
- 'default': HeaderProcessor()
- }
-
- parser = argparse.ArgumentParser(description='Coverts string.txt to csv and back')
- parser.add_argument('input', type=argparse.FileType('r'), help='Input file')
- parser.add_argument('-o', '--output', default='-', help='Output file, "-" for stdout')
- parser.add_argument('-d', '--delimiter', default=',', help='CSV delimiter')
- parser.add_argument('-l', '--langs', help='List of langs for csv: empty for default, "?" to autodetect, comma-separated for a list')
- parser.add_argument('-p', '--processor', default='default', help='Name of a header processor ({})'.format(','.join(processors.keys())))
- parser.add_argument('--csv-in', action='store_true', help='CSV -> TXT')
- parser.add_argument('--csv-out', action='store_true', help='TXT -> CSV')
- options = parser.parse_args()
-
- fout = sys.stdout if options.output == '-' else open(options.output, 'w')
- if options.csv_in:
- csv_in = True
- elif options.csv_out:
- csv_in = False
- else:
- raise ValueError('Autodetection is not implemented yet.')
-
- if csv_in:
- from_csv(options.input, fout, processors[options.processor], options.delimiter)
- else:
- if not options.langs:
- langs = 'en ru ar cs da nl fi fr de hu id it ja ko nb pl pt ro es sv th tr uk vi zh-Hans zh-Hant he sk'.split()
- elif options.langs == '?':
- langs = None
- else:
- langs = options.langs.split(',')
- to_csv(options.input, fout, processors[options.processor], options.delimiter, langs)