diff options
author | Vladimir Byko-Ianko <v.bykoianko@corp.mail.ru> | 2015-07-22 09:03:33 +0300 |
---|---|---|
committer | Alex Zolotarev <alex@maps.me> | 2015-09-23 02:57:18 +0300 |
commit | 04016db2b6d6cf70d65a19fd3b7bf7578e49de20 (patch) | |
tree | 1e523301ad6605456173d83a1980394df39e2e95 /sound/tts | |
parent | b74c4e9c1638866b4159d1502f0555d8e7e442d7 (diff) |
Python script for generating twine file form csv.
Diffstat (limited to 'sound/tts')
-rwxr-xr-x | sound/tts/sound_csv_to_sound_txt.py | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/sound/tts/sound_csv_to_sound_txt.py b/sound/tts/sound_csv_to_sound_txt.py new file mode 100755 index 0000000000..9af25751b2 --- /dev/null +++ b/sound/tts/sound_csv_to_sound_txt.py @@ -0,0 +1,73 @@ +#!/usr/bin/python + +import csv +import os.path + +from optparse import OptionParser + +ID_COLUMN = 1 +MIN_PROCESSED_COLUMN = 2 +MAX_PROCESSED_COLUMN = 30 + +def parse_args(): + opt_parser = OptionParser(usage="It's a tool for converting text voice messages from csv to twine input format." + + "As a source shall be taken a csv file from\n" + + "https://docs.google.com/spreadsheets/d/1gJsSzFpp2B3xnSx-RjjQ3Do66lQDhCxtfEnQo7Vrkw0/edit#gid=150382014\n" + + "The output shall be put to omim/sound/tts/sound.txt. As another output file the tool generates languages.txt." + + "languages.txt contains all available languages in csv.\n" + + "Example: python %prog path_to_sound.csv path_to_sound.txt path_to_languages.txt", + version="%prog 1.0") + + (options, args) = opt_parser.parse_args() + + if len(args) != 3: + opt_parser.error("Wrong number of arguments.") + return args + + +def run(): + args = parse_args() + + input_name = args[0] + twine_name = args[1] + languages_name = args[2] + + print("Converting sound.csv to sound.txt (input of twine)") + if not os.path.isfile(input_name): + print("Error. CSV file not found. Please check the usage.\n") + return + + txt_file = open(twine_name, 'w') + with open(input_name, 'rb') as csvfile: + txt_file.write('[[sound]]\n') + csv_reader = csv.reader(csvfile, delimiter=',', quotechar='\n') + + languages = {} + csv_reader.next() +# A row with language names (like en, ru and so on) is located on the second line. + language_row = csv_reader.next() + languages_file = open(languages_name, 'w') + for idx, lang in enumerate(language_row): + if (idx >= MIN_PROCESSED_COLUMN + and idx < MAX_PROCESSED_COLUMN and lang != ''): + languages[idx] = lang + languages_file.write(lang + ' ') + languages_file.close() + csv_reader.next() +# Translation follows starting from the 4th line in the table. + for row in csv_reader: + if row[ID_COLUMN] != '': + txt_file.write(' [' + row[ID_COLUMN] + ']\n') + for column_idx, translation in enumerate(row): + if (column_idx >= MIN_PROCESSED_COLUMN + and column_idx < MAX_PROCESSED_COLUMN and column_idx in languages): + txt_file.write(' ' + languages[column_idx] + ' = ' + translation + '\n') + txt_file.write('\n') + + csvfile.close() + txt_file.close() + print('Done. Check ' + twine_name + ' and ' + languages_name + ' for the result.') + + +if __name__ == "__main__": + run() |