From 3dc17dfbb84b24332f25a2ad4dff661b03fe08e5 Mon Sep 17 00:00:00 2001 From: Ilya Zverev Date: Fri, 9 Jun 2017 20:49:37 +0300 Subject: [python] Update strings converters --- tools/python/categories_merge_language.py | 29 ++++---- tools/python/convert_strings.py | 114 +++++++++++++++++++++++++++++ tools/python/strings_to_csv.py | 117 ------------------------------ 3 files changed, 128 insertions(+), 132 deletions(-) create mode 100755 tools/python/convert_strings.py delete mode 100755 tools/python/strings_to_csv.py (limited to 'tools') diff --git a/tools/python/categories_merge_language.py b/tools/python/categories_merge_language.py index 58a865fe52..1cf5c89c77 100755 --- a/tools/python/categories_merge_language.py +++ b/tools/python/categories_merge_language.py @@ -2,45 +2,44 @@ import sys import os +path = os.path.join(os.path.dirname(sys.argv[0]), '..', '..', 'data', 'categories.txt') if len(sys.argv) < 2: print('Merges some language in categories.txt with English') print('Usage: {} [path_to_categories.txt]'.format(sys.argv[0])) + print('Default path to categories: {}'.format(path)) + if not os.path.exists(path): + print('Warning: default path to categories.txt will fail') sys.exit(1) lang = sys.argv[1] if len(sys.argv) > 2: path = sys.argv[2] -else: - path = os.path.join(os.path.dirname(sys.argv[0]), '..', '..', 'data', 'categories.txt') with open(path, 'r') as f: - first = True langs = [] trans = None def flush_langs(): - for l in langs: + for lang in langs: if trans and l[0] == 'en': - parts = l[1].split('|') + parts = lang[1].split('|') parts[0] = '{} - {}'.format(parts[0], trans) - l[1] = '|'.join(parts) - print(':'.join(l)) + lang[1] = '|'.join(parts) + print(':'.join(lang)) - for line in f: - if len(line.strip()) == 0 or line[0] == '#': + for line in map(str.strip, f): + if len(line) == 0 or line[0] == '#': if langs: flush_langs() langs = [] trans = None - print(line.strip()) - first = True - elif first: - print(line.strip()) - first = False + print(line) + elif not langs: + print(line) else: if ':' not in line: raise Exception('Line {} is not a translation line'.format(line)) - l = line.strip().split(':') + l = line.split(':') langs.append(l) if l[0] == lang: trans = l[1].split('|')[0] diff --git a/tools/python/convert_strings.py b/tools/python/convert_strings.py new file mode 100755 index 0000000000..8276191914 --- /dev/null +++ b/tools/python/convert_strings.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python +import argparse +import csv +import sys + + +def langs_order(lang): + if lang == 'en': + return '0' + elif lang == 'ru': + return '1' + return lang + + +def read_strings(fin): + curtitle = None + curtrans = {} + for line in filter(None, map(str.strip, fin)): + if line[0].startswith('['): + if curtrans: + yield curtitle, curtrans + curtitle = line.strip('[ ]') + curtrans = {} + elif '=' in line and curtitle: + lang, trans = (x.strip() for x in line.split('=')) + curtrans[lang] = trans + if curtrans: + yield curtitle, curtrans + + +def from_csv(fin, fout, delim): + r = csv.reader(fin, delimiter=delim) + header = next(r) + for row in r: + fout.write('[{}]\n'.format(row[0])) + for i, col in enumerate(map(str.strip, row)): + if len(col) > 0 and i > 0: + fout.write('{} = {}\n'.format(header[i], col)) + fout.write('\n') + + +def to_csv(fin, fout, delim, langs): + def write_line(writer, title, translations, langs): + row = [title] + for lang in langs: + row.append('' if lang not in translations else translations[lang]) + writer.writerow(row) + + w = csv.writer(fout, delimiter=delim) + if langs is not None: + w.writerow(['Key'] + langs) + + strings = [] + for title, trans in read_strings(fin): + if langs is None: + strings.append((title, trans)) + else: + write_line(w, title, trans, langs) + + # If we don't have langs, build a list and print + if langs is None: + langs = set() + for s in strings: + langs.update(list(s[1].values())) + + langs = sorted(langs, key=langs_order) + w.writerow(['Key'] + langs) + for s in strings: + write_line(w, s[0], s[1], langs) + + +def from_categories(fin, fout): + raise Exception('This conversion has not been implemented yet.') + + +def to_categories(fin, fout): + for title, trans in read_strings(fin): + fout.write('{}\n'.format(title)) + for lang in sorted(trans.keys(), key=langs_order): + fout.write('{}:^{}\n'.format(lang, trans[lang])) + fout.write('\n') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Converts between strings.txt, csv files and categories.txt.') + parser.add_argument('input', type=argparse.FileType('r'), help='Input file') + parser.add_argument('-o', '--output', default='-', help='Output file, "-" for stdout') + parser.add_argument('-d', '--delimiter', default=',', help='CSV delimiter') + parser.add_argument('-l', '--langs', help='List of langs for csv: empty for default, "?" to autodetect, comma-separated for a list') + parser.add_argument('--csv2s', action='store_true', help='CSV -> TXT') + parser.add_argument('--s2csv', action='store_true', help='TXT -> CSV') + parser.add_argument('--cat2s', action='store_true', help='Categories -> TXT') + parser.add_argument('--s2cat', action='store_true', help='TXT -> Categories') + options = parser.parse_args() + + fout = sys.stdout if options.output == '-' else open(options.output, 'w') + + if not options.langs: + langs = 'en ru ar cs da nl fi fr de hu id it ja ko nb pl pt ro es sv th tr uk vi zh-Hans zh-Hant he sk'.split() + elif options.langs == '?': + langs = None + else: + langs = options.langs.split(',') + + if options.csv2s: + from_csv(options.input, fout, options.delimiter) + elif options.s2csv: + to_csv(options.input, fout, options.delimiter, langs) + elif options.cat2s: + from_categories(options.input, fout) + elif options.s2cat: + to_categories(options.input, fout) + else: + raise ValueError('Please select a conversion direction.') diff --git a/tools/python/strings_to_csv.py b/tools/python/strings_to_csv.py deleted file mode 100755 index e4221c9ed8..0000000000 --- a/tools/python/strings_to_csv.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python -import argparse -import csv -import sys - - -class HeaderProcessor: - def lang_from_csv(self, column): - return column - - def lang_to_csv(self, column): - return column - - def section_from_csv(self, title): - return title - - def section_to_csv(self, title): - return title - - -def from_csv(fin, fout, proc, delim): - r = csv.reader(fin, delimiter=delim) - header = next(r) - for i in range(1, len(header)): - header[i] = proc.lang_from_csv(header[i]) - for row in r: - fout.write('[{}]\n'.format(proc.section_from_csv(row[0]))) - for i in range(1, len(row)): - if len(row[i].strip()) > 0: - fout.write('{} = {}\n'.format(header[i], row[i])) - fout.write('\n') - - -def to_csv(fin, fout, proc, delim, langs): - def write_line(writer, title, translations, langs): - row = [title] - for l in langs: - row.append('' if l not in translations else translations[l]) - writer.writerow(row) - - strings = [] - w = csv.writer(fout, delimiter=delim) - if langs is not None: - w.writerow([''] + langs) - curtitle = None - curtrans = {} - for line in fin: - line = line.strip() - if len(line) == 0: - continue - elif line[0] == '[': - if curtrans: - if langs is None: - strings.append((curtitle, curtrans)) - else: - write_line(w, curtitle, curtrans, langs) - curtitle = line[1:-1] - curtrans = {} - elif '=' in line and curtitle: - lang, trans = (x.strip() for x in line.split('=')) - curtrans[lang] = trans - if curtrans: - if langs is None: - strings.append((curtitle, curtrans)) - else: - write_line(w, curtitle, curtrans, langs) - - # If we don't have langs, build a list and print - if langs is None: - l = set() - for s in strings: - l.update(list(s[1].values())) - - def str_sort(k): - if k == 'en': - return '0' - elif k == 'ru': - return '1' - return l - l = sorted(l, key=str_sort) - for s in strings: - write_line(w, s[0], s[1], l) - - -if __name__ == '__main__': - processors = { - 'default': HeaderProcessor() - } - - parser = argparse.ArgumentParser(description='Coverts string.txt to csv and back') - parser.add_argument('input', type=argparse.FileType('r'), help='Input file') - parser.add_argument('-o', '--output', default='-', help='Output file, "-" for stdout') - parser.add_argument('-d', '--delimiter', default=',', help='CSV delimiter') - parser.add_argument('-l', '--langs', help='List of langs for csv: empty for default, "?" to autodetect, comma-separated for a list') - parser.add_argument('-p', '--processor', default='default', help='Name of a header processor ({})'.format(','.join(processors.keys()))) - parser.add_argument('--csv-in', action='store_true', help='CSV -> TXT') - parser.add_argument('--csv-out', action='store_true', help='TXT -> CSV') - options = parser.parse_args() - - fout = sys.stdout if options.output == '-' else open(options.output, 'w') - if options.csv_in: - csv_in = True - elif options.csv_out: - csv_in = False - else: - raise ValueError('Autodetection is not implemented yet.') - - if csv_in: - from_csv(options.input, fout, processors[options.processor], options.delimiter) - else: - if not options.langs: - langs = 'en ru ar cs da nl fi fr de hu id it ja ko nb pl pt ro es sv th tr uk vi zh-Hans zh-Hant he sk'.split() - elif options.langs == '?': - langs = None - else: - langs = options.langs.split(',') - to_csv(options.input, fout, processors[options.processor], options.delimiter, langs) -- cgit v1.2.3