diff options
Diffstat (limited to 'release/scripts')
-rw-r--r-- | release/scripts/modules/bl_i18n_utils/bl_process_msg.py | 307 | ||||
-rwxr-xr-x | release/scripts/modules/bl_i18n_utils/check_po.py | 119 | ||||
-rwxr-xr-x | release/scripts/modules/bl_i18n_utils/clean_po.py | 27 | ||||
-rwxr-xr-x | release/scripts/modules/bl_i18n_utils/import_po_from_branches.py | 56 | ||||
-rwxr-xr-x | release/scripts/modules/bl_i18n_utils/merge_po.py | 105 | ||||
-rw-r--r-- | release/scripts/modules/bl_i18n_utils/settings.py | 74 | ||||
-rwxr-xr-x | release/scripts/modules/bl_i18n_utils/update_branches.py | 39 | ||||
-rwxr-xr-x | release/scripts/modules/bl_i18n_utils/update_languages_menu.py | 12 | ||||
-rwxr-xr-x | release/scripts/modules/bl_i18n_utils/update_mo.py | 10 | ||||
-rwxr-xr-x | release/scripts/modules/bl_i18n_utils/update_po.py | 59 | ||||
-rwxr-xr-x | release/scripts/modules/bl_i18n_utils/update_pot.py | 79 | ||||
-rwxr-xr-x | release/scripts/modules/bl_i18n_utils/update_trunk.py | 28 | ||||
-rw-r--r-- | release/scripts/modules/bl_i18n_utils/utils.py | 875 |
13 files changed, 1097 insertions, 693 deletions
diff --git a/release/scripts/modules/bl_i18n_utils/bl_process_msg.py b/release/scripts/modules/bl_i18n_utils/bl_process_msg.py index 7e9266d0530..531ad2faf50 100644 --- a/release/scripts/modules/bl_i18n_utils/bl_process_msg.py +++ b/release/scripts/modules/bl_i18n_utils/bl_process_msg.py @@ -23,27 +23,41 @@ # You should not directly use this script, rather use update_msg.py! import os +import re +import collections +import copy # XXX Relative import does not work here when used from Blender... from bl_i18n_utils import settings +import bpy -#classes = set() - +print(dir(settings)) SOURCE_DIR = settings.SOURCE_DIR CUSTOM_PY_UI_FILES = [os.path.abspath(os.path.join(SOURCE_DIR, p)) for p in settings.CUSTOM_PY_UI_FILES] FILE_NAME_MESSAGES = settings.FILE_NAME_MESSAGES -COMMENT_PREFIX = settings.COMMENT_PREFIX -CONTEXT_PREFIX = settings.CONTEXT_PREFIX +MSG_COMMENT_PREFIX = settings.MSG_COMMENT_PREFIX +MSG_CONTEXT_PREFIX = settings.MSG_CONTEXT_PREFIX CONTEXT_DEFAULT = settings.CONTEXT_DEFAULT +#CONTEXT_DEFAULT = bpy.app.i18n.contexts.default # XXX Not yet! :) UNDOC_OPS_STR = settings.UNDOC_OPS_STR NC_ALLOWED = settings.WARN_MSGID_NOT_CAPITALIZED_ALLOWED +##### Utils ##### + +# check for strings like ": %d" +ignore_reg = re.compile(r"^(?:[-*.()/\\+:%xWXYZ0-9]|%d|%f|%s|%r|\s)*$") +filter_message = ignore_reg.match + + def check(check_ctxt, messages, key, msgsrc): + """ + Performs a set of checks over the given key (context, message)... + """ if check_ctxt is None: return multi_rnatip = check_ctxt.get("multi_rnatip") @@ -73,7 +87,79 @@ def check(check_ctxt, messages, key, msgsrc): undoc_ops.add(key) +def print_warnings(check_ctxt, messages): + if check_ctxt is not None: + print("WARNINGS:") + keys = set() + for c in check_ctxt.values(): + keys |= c + # XXX Temp, see below + keys -= check_ctxt["multi_rnatip"] + for key in keys: + if key in check_ctxt["undoc_ops"]: + print("\tThe following operators are undocumented:") + else: + print("\t“{}”|“{}”:".format(*key)) + if key in check_ctxt["multi_lines"]: + print("\t\t-> newline in this message!") + if key in check_ctxt["not_capitalized"]: + print("\t\t-> message not capitalized!") + if key in check_ctxt["end_point"]: + print("\t\t-> message with endpoint!") + # XXX Hide this one for now, too much false positives. +# if key in check_ctxt["multi_rnatip"]: +# print("\t\t-> tip used in several RNA items") + if key in check_ctxt["py_in_rna"]: + print("\t\t-> RNA message also used in py UI code:") + print("\t\t{}".format("\n\t\t".join(messages[key]))) + + +def enable_addons(addons={}, support={}, disable=False): + """ + Enable (or disable) addons based either on a set of names, or a set of 'support' types. + Returns the list of all affected addons (as fake modules)! + """ + import addon_utils + import bpy + + userpref = bpy.context.user_preferences + used_ext = {ext.module for ext in userpref.addons} + + ret = [mod for mod in addon_utils.modules(addon_utils.addons_fake_modules) + if ((addons and mod.__name__ in addons) or + (not addons and addon_utils.module_bl_info(mod)["support"] in support))] + + for mod in ret: + module_name = mod.__name__ + if disable: + if module_name not in used_ext: + continue + print(" Disabling module ", module_name) + bpy.ops.wm.addon_disable(module=module_name) + else: + if module_name in used_ext: + continue + print(" Enabling module ", module_name) + bpy.ops.wm.addon_enable(module=module_name) + + # XXX There are currently some problems with bpy/rna... + # *Very* tricky to solve! + # So this is a hack to make all newly added operator visible by + # bpy.types.OperatorProperties.__subclasses__() + for cat in dir(bpy.ops): + cat = getattr(bpy.ops, cat) + for op in dir(cat): + getattr(cat, op).get_rna() + + return ret + + +##### RNA ##### + def dump_messages_rna(messages, check_ctxt): + """ + Dump into messages dict all RNA-defined UI messages (labels en tooltips). + """ import bpy def classBlackList(): @@ -257,20 +343,17 @@ def dump_messages_rna(messages, check_ctxt): # Parse everything (recursively parsing from bpy_struct "class"...). processed = process_cls_list(type(bpy.context).__base__.__subclasses__()) print("{} classes processed!".format(processed)) -# import pickle -# global classes -# classes = {str(c) for c in classes} -# with open("/home/i7deb64/Bureau/tpck_2", "wb") as f: -# pickle.dump(classes, f, protocol=0) from bpy_extras.keyconfig_utils import KM_HIERARCHY walk_keymap_hierarchy(KM_HIERARCHY, "KM_HIERARCHY") -def dump_messages_pytext(messages, check_ctxt): - """ dumps text inlined in the python user interface: eg. +##### Python source code ##### +def dump_py_messages_from_files(messages, check_ctxt, files): + """ + Dump text inlined in the python files given, e.g. 'My Name' in: layout.prop("someprop", text="My Name") """ import ast @@ -278,7 +361,6 @@ def dump_messages_pytext(messages, check_ctxt): # ------------------------------------------------------------------------- # Gather function names - import bpy # key: func_id # val: [(arg_kw, arg_pos), (arg_kw, arg_pos), ...] func_translate_args = {} @@ -290,15 +372,12 @@ def dump_messages_pytext(messages, check_ctxt): # E.g. we don’t want to get strings inside subscripts (blah["foo"])! stopper_nodes = {ast.Subscript, } + # For now only consider functions from UILayout... for func_id, func in bpy.types.UILayout.bl_rna.functions.items(): - # check it has a 'text' argument + # check it has one or more arguments as defined in translate_kw for (arg_pos, (arg_kw, arg)) in enumerate(func.parameters.items()): - if ((arg_kw in translate_kw) and - (arg.is_output is False) and - (arg.type == 'STRING')): - - func_translate_args.setdefault(func_id, []).append((arg_kw, - arg_pos)) + if ((arg_kw in translate_kw) and (arg.is_output is False) and (arg.type == 'STRING')): + func_translate_args.setdefault(func_id, []).append((arg_kw, arg_pos)) # print(func_translate_args) check_ctxt_py = None @@ -308,19 +387,20 @@ def dump_messages_pytext(messages, check_ctxt): "not_capitalized": check_ctxt["not_capitalized"], "end_point": check_ctxt["end_point"]} - # ------------------------------------------------------------------------- - # Function definitions - + # Helper function def extract_strings(fp_rel, node): - """ Recursively get strings, needed in case we have "Blah" + "Blah", - passed as an argument in that case it wont evaluate to a string. - However, break on some kind of stopper nodes, like e.g. Subscript. """ - + Recursively get strings, needed in case we have "Blah" + "Blah", passed as an argument in that case it won't + evaluate to a string. However, break on some kind of stopper nodes, like e.g. Subscript. + """ if type(node) == ast.Str: eval_str = ast.literal_eval(node) if eval_str: - key = (CONTEXT_DEFAULT, eval_str) + # Parse optional context included in string! + if bpy.app.i18n.context_sep in eval_str: + key = eval_str.split(bpy.app.i18n.context_sep, 1) + else: + key = (CONTEXT_DEFAULT, eval_str) msgsrc = "{}:{}".format(fp_rel, node.lineno) check(check_ctxt_py, messages, key, msgsrc) messages.setdefault(key, []).append(msgsrc) @@ -330,10 +410,9 @@ def dump_messages_pytext(messages, check_ctxt): if type(nd) not in stopper_nodes: extract_strings(fp_rel, nd) - def extract_strings_from_file(fp): - filedata = open(fp, 'r', encoding="utf8") - root_node = ast.parse(filedata.read(), fp, 'exec') - filedata.close() + for fp in files: + with open(fp, 'r', encoding="utf8") as filedata: + root_node = ast.parse(filedata.read(), fp, 'exec') fp_rel = os.path.relpath(fp, SOURCE_DIR) @@ -361,72 +440,90 @@ def dump_messages_pytext(messages, check_ctxt): if kw.arg == arg_kw: extract_strings(fp_rel, kw.value) - # ------------------------------------------------------------------------- - # Dump Messages - mod_dir = os.path.join(SOURCE_DIR, - "release", - "scripts", - "startup", - "bl_ui") +def dump_py_messages(messages, check_ctxt, addons): + mod_dir = os.path.join(SOURCE_DIR, "release", "scripts", "startup", "bl_ui") - files = [os.path.join(mod_dir, fn) - for fn in sorted(os.listdir(mod_dir)) - if not fn.startswith("_") - if fn.endswith("py") - ] + files = [os.path.join(mod_dir, fn) for fn in sorted(os.listdir(mod_dir)) + if not fn.startswith("_") if fn.endswith("py")] # Dummy Cycles has its py addon in its own dir! files += CUSTOM_PY_UI_FILES - for fp in files: - extract_strings_from_file(fp) + # Add all addons we support in main translation file! + for mod in addons: + fn = mod.__file__ + if os.path.basename(fn) == "__init__.py": + mod_dir = os.path.dirname(fn) + files += [fn for fn in sorted(os.listdir(mod_dir)) + if os.path.isfile(fn) and os.path.splitext(fn)[1] == ".py"] + else: + files.append(fn) + + dump_py_messages_from_files(messages, check_ctxt, files) +##### Main functions! ##### + def dump_messages(do_messages, do_checks): - import collections - import re + messages = getattr(collections, 'OrderedDict', dict)() - def enable_addons(): - """For now, enable all official addons, before extracting msgids.""" - import addon_utils - import bpy + messages[(CONTEXT_DEFAULT, "")] = [] - userpref = bpy.context.user_preferences - used_ext = {ext.module for ext in userpref.addons} - support = {"OFFICIAL"} - # collect the categories that can be filtered on - addons = [(mod, addon_utils.module_bl_info(mod)) for mod in - addon_utils.modules(addon_utils.addons_fake_modules)] + # Enable all wanted addons. + # For now, enable all official addons, before extracting msgids. + addons = enable_addons(support={"OFFICIAL"}) - for mod, info in addons: - module_name = mod.__name__ - if module_name in used_ext or info["support"] not in support: - continue - print(" Enabling module ", module_name) - bpy.ops.wm.addon_enable(module=module_name) + check_ctxt = None + if do_checks: + check_ctxt = {"multi_rnatip": set(), + "multi_lines": set(), + "py_in_rna": set(), + "not_capitalized": set(), + "end_point": set(), + "undoc_ops": set()} + + # get strings from RNA + dump_messages_rna(messages, check_ctxt) - # XXX There are currently some problems with bpy/rna... - # *Very* tricky to solve! - # So this is a hack to make all newly added operator visible by - # bpy.types.OperatorProperties.__subclasses__() - for cat in dir(bpy.ops): - cat = getattr(bpy.ops, cat) - for op in dir(cat): - getattr(cat, op).get_rna() + # get strings from UI layout definitions text="..." args + dump_py_messages(messages, check_ctxt, addons) + + del messages[(CONTEXT_DEFAULT, "")] + + print_warnings(check_ctxt, messages) + + if do_messages: + print("Writing messages…") + num_written = 0 + num_filtered = 0 + with open(FILE_NAME_MESSAGES, 'w', encoding="utf8") as message_file: + for (ctx, key), value in messages.items(): + # filter out junk values + if filter_message(key): + num_filtered += 1 + continue + + # Remove newlines in key and values! + message_file.write("\n".join(MSG_COMMENT_PREFIX + msgsrc.replace("\n", "") for msgsrc in value)) + message_file.write("\n") + if ctx: + message_file.write(MSG_CONTEXT_PREFIX + ctx.replace("\n", "") + "\n") + message_file.write(key.replace("\n", "") + "\n") + num_written += 1 + + print("Written {} messages to: {} ({} were filtered out)." + "".format(num_written, FILE_NAME_MESSAGES, num_filtered)) - # check for strings like ": %d" - ignore_reg = re.compile(r"^(?:[-*.()/\\+:%xWXYZ0-9]|%d|%f|%s|%r|\s)*$") - filter_message = ignore_reg.match +def dump_addon_messages(module_name, messages_formats, do_checks): messages = getattr(collections, 'OrderedDict', dict)() messages[(CONTEXT_DEFAULT, "")] = [] - - # Enable all wanted addons. - enable_addons() + minus_messages = copy.deepcopy(messages) check_ctxt = None + minus_check_ctxt = None if do_checks: check_ctxt = {"multi_rnatip": set(), "multi_lines": set(), @@ -434,39 +531,44 @@ def dump_messages(do_messages, do_checks): "not_capitalized": set(), "end_point": set(), "undoc_ops": set()} + minus_check_ctxt = copy.deepcopy(check_ctxt) - # get strings from RNA + # Get current addon state (loaded or not): + was_loaded = addon_utils.check(module_name)[1] + + # Enable our addon and get strings from RNA. + enable_addons(addons={module_name}) dump_messages_rna(messages, check_ctxt) + # Now disable our addon, and rescan RNA. + enable_addons(addons={module_name}, disable=True) + dump_messages_rna(minus_messages, minus_check_ctxt) + + # Restore previous state if needed! + if was_loaded: + enable_addons(addons={module_name}) + + # and make the diff! + for key in minus_messages: + if k == (CONTEXT_DEFAULT, ""): + continue + del messages[k] + + if check_ctxt: + for key in check_ctxt: + for warning in minus_check_ctxt[key]: + check_ctxt[key].remove(warning) + + # and we are done with those! + del minus_messages + del minus_check_ctxt + # get strings from UI layout definitions text="..." args dump_messages_pytext(messages, check_ctxt) del messages[(CONTEXT_DEFAULT, "")] - if do_checks: - print("WARNINGS:") - keys = set() - for c in check_ctxt.values(): - keys |= c - # XXX Temp, see below - keys -= check_ctxt["multi_rnatip"] - for key in keys: - if key in check_ctxt["undoc_ops"]: - print("\tThe following operators are undocumented:") - else: - print("\t“{}”|“{}”:".format(*key)) - if key in check_ctxt["multi_lines"]: - print("\t\t-> newline in this message!") - if key in check_ctxt["not_capitalized"]: - print("\t\t-> message not capitalized!") - if key in check_ctxt["end_point"]: - print("\t\t-> message with endpoint!") - # XXX Hide this one for now, too much false positives. -# if key in check_ctxt["multi_rnatip"]: -# print("\t\t-> tip used in several RNA items") - if key in check_ctxt["py_in_rna"]: - print("\t\t-> RNA message also used in py UI code:") - print("\t\t{}".format("\n\t\t".join(messages[key]))) + print_warnings if do_messages: print("Writing messages…") @@ -491,6 +593,7 @@ def dump_messages(do_messages, do_checks): "".format(num_written, FILE_NAME_MESSAGES, num_filtered)) + def main(): try: import bpy diff --git a/release/scripts/modules/bl_i18n_utils/check_po.py b/release/scripts/modules/bl_i18n_utils/check_po.py index 2e82047bb95..96f91ee0914 100755 --- a/release/scripts/modules/bl_i18n_utils/check_po.py +++ b/release/scripts/modules/bl_i18n_utils/check_po.py @@ -38,45 +38,47 @@ BRANCHES_DIR = settings.BRANCHES_DIR FILE_NAME_POT = settings.FILE_NAME_POT -def print_diff(ref_messages, messages, states): +def print_diff(ref_msgs, msgs): # Remove comments from messages list! - messages = set(messages.keys()) - states["comm_msg"] - unneeded = (messages - ref_messages) - for msgid in unneeded: - print('\tUnneeded message id "{}"'.format(msgid)) + messages = set(msgs.msgs.keys()) - msgs.comm_msgs + unneeded = (messages - ref_msgs.msgs.keys()) + for msgkey in unneeded: + print('\tUnneeded message context/id "{}"'.format(msgkey)) - missing = (ref_messages - messages) - for msgid in missing: - print('\tMissing message id "{}"'.format(msgid)) + missing = (ref_msgs.msgs.keys() - messages) + for msgkey in missing: + print('\tMissing message context/id "{}"'.format(msgkey)) - for msgid in states["comm_msg"]: - print('\tCommented message id "{}"'.format(msgid)) + for msgid in msgs.comm_msgs: + print('\tCommented message context/id "{}"'.format(msgkey)) - print("\t{} unneeded messages, {} missing messages, {} commented messages." \ - "".format(len(unneeded), len(missing), len(states["comm_msg"]))) + print("\t{} unneeded messages, {} missing messages, {} commented messages." + "".format(len(unneeded), len(missing), len(msgs.comm_msgs))) return 0 -def process_po(ref_messages, po, glob_stats, do_stats, do_messages): +#def process_po(ref_messages, po, glob_stats, do_stats, do_messages): +def process_po(ref_messages, po, do_stats, do_messages): print("Checking {}...".format(po)) ret = 0 - messages, states, stats = utils.parse_messages(po) + messages = utils.I18nMessages(kind='PO', src=po) if do_messages: - t = print_diff(ref_messages, messages, states) + t = print_diff(ref_messages, messages) if t: ret = t if do_stats: print("\tStats:") - t = utils.print_stats(stats, glob_stats, prefix=" ") + t = messages.print_stats(prefix=" ") if t: ret = t - if states["is_broken"]: + if messages.parsing_errors: print("\tERROR! This .po is broken!") ret = 1 return ret +# XXX Quick update for new I18Nfoo objects, need rework! def main(): import argparse parser = argparse.ArgumentParser(description="Check po’s in branches " \ @@ -97,22 +99,21 @@ def main(): if args.pot: global FILE_NAME_POT FILE_NAME_POT = args.pot - glob_stats = {"nbr" : 0.0, - "lvl" : 0.0, - "lvl_ttips" : 0.0, - "lvl_trans_ttips" : 0.0, - "lvl_ttips_in_trans": 0.0, - "lvl_comm" : 0.0, - "nbr_signs" : 0, - "nbr_trans_signs" : 0, - "contexts" : set()} + #glob_stats = {"nbr" : 0.0, + #"lvl" : 0.0, + #"lvl_ttips" : 0.0, + #"lvl_trans_ttips" : 0.0, + #"lvl_ttips_in_trans": 0.0, + #"lvl_comm" : 0.0, + #"nbr_signs" : 0, + #"nbr_trans_signs" : 0, + #"contexts" : set()} ret = 0 pot_messages = None if args.messages: - pot_messages, u1, pot_stats = utils.parse_messages(FILE_NAME_POT) - pot_messages = set(pot_messages.keys()) - glob_stats["nbr_signs"] = pot_stats["nbr_signs"] + pot_messages = utils.I18nMessages(kind='PO', src=FILE_NAME_POT) + #glob_stats["nbr_signs"] = pot_stats["nbr_signs"] if args.langs: for lang in args.langs: @@ -121,16 +122,16 @@ def main(): else: po = os.path.join(BRANCHES_DIR, lang, ".".join((lang, "po"))) if os.path.exists(po): - t = process_po(pot_messages, po, glob_stats, - args.stats, args.messages) + #t = process_po(pot_messages, po, glob_stats, args.stats, args.messages) + t = process_po(pot_messages, po, args.stats, args.messages) if t: ret = t elif args.trunk: for po in os.listdir(TRUNK_PO_DIR): if po.endswith(".po"): po = os.path.join(TRUNK_PO_DIR, po) - t = process_po(pot_messages, po, glob_stats, - args.stats, args.messages) + #t = process_po(pot_messages, po, glob_stats, args.stats, args.messages) + t = process_po(pot_messages, po, args.stats, args.messages) if t: ret = t else: @@ -138,35 +139,35 @@ def main(): for po in os.listdir(os.path.join(BRANCHES_DIR, lang)): if po.endswith(".po"): po = os.path.join(BRANCHES_DIR, lang, po) - t = process_po(pot_messages, po, glob_stats, - args.stats, args.messages) + #t = process_po(pot_messages, po, glob_stats, args.stats, args.messages) + t = process_po(pot_messages, po, args.stats, args.messages) if t: ret = t - if args.stats and glob_stats["nbr"] != 0.0: - nbr_contexts = len(glob_stats["contexts"] - {""}) - if nbr_contexts != 1: - if nbr_contexts == 0: - nbr_contexts = "No" - _ctx_txt = "s are" - else: - _ctx_txt = " is" - print("\nAverage stats for all {:.0f} processed files:\n" - " {:>6.1%} done!\n" - " {:>6.1%} of messages are tooltips.\n" - " {:>6.1%} of tooltips are translated.\n" - " {:>6.1%} of translated messages are tooltips.\n" - " {:>6.1%} of messages are commented.\n" - " The org msgids are currently made of {} signs.\n" - " All processed translations are currently made of {} signs.\n" - " {} specific context{} present:\n {}\n" - "".format(glob_stats["nbr"], glob_stats["lvl"] / glob_stats["nbr"], - glob_stats["lvl_ttips"] / glob_stats["nbr"], - glob_stats["lvl_trans_ttips"] / glob_stats["nbr"], - glob_stats["lvl_ttips_in_trans"] / glob_stats["nbr"], - glob_stats["lvl_comm"] / glob_stats["nbr"], glob_stats["nbr_signs"], - glob_stats["nbr_trans_signs"], nbr_contexts, _ctx_txt, - "\n ".join(glob_stats["contexts"]-{""}))) + #if args.stats and glob_stats["nbr"] != 0.0: + #nbr_contexts = len(glob_stats["contexts"] - {""}) + #if nbr_contexts != 1: + #if nbr_contexts == 0: + #nbr_contexts = "No" + #_ctx_txt = "s are" + #else: + #_ctx_txt = " is" + #print("\nAverage stats for all {:.0f} processed files:\n" + #" {:>6.1%} done!\n" + #" {:>6.1%} of messages are tooltips.\n" + #" {:>6.1%} of tooltips are translated.\n" + #" {:>6.1%} of translated messages are tooltips.\n" + #" {:>6.1%} of messages are commented.\n" + #" The org msgids are currently made of {} signs.\n" + #" All processed translations are currently made of {} signs.\n" + #" {} specific context{} present:\n {}\n" + #"".format(glob_stats["nbr"], glob_stats["lvl"] / glob_stats["nbr"], + #glob_stats["lvl_ttips"] / glob_stats["nbr"], + #glob_stats["lvl_trans_ttips"] / glob_stats["nbr"], + #glob_stats["lvl_ttips_in_trans"] / glob_stats["nbr"], + #glob_stats["lvl_comm"] / glob_stats["nbr"], glob_stats["nbr_signs"], + #glob_stats["nbr_trans_signs"], nbr_contexts, _ctx_txt, + #"\n ".join(glob_stats["contexts"]-{""}))) return ret diff --git a/release/scripts/modules/bl_i18n_utils/clean_po.py b/release/scripts/modules/bl_i18n_utils/clean_po.py index 2924ad9fb74..da8d25cb9f4 100755 --- a/release/scripts/modules/bl_i18n_utils/clean_po.py +++ b/release/scripts/modules/bl_i18n_utils/clean_po.py @@ -39,30 +39,27 @@ BRANCHES_DIR = settings.BRANCHES_DIR def do_clean(po, strict): print("Cleaning {}...".format(po)) - messages, states, u1 = utils.parse_messages(po) + msgs = utils.I18nMessages(kind='PO', src=po) - if strict and states["is_broken"]: + if strict and msgs.parsing_errors: print("ERROR! This .po file is broken!") return 1 - for msgkey in states["comm_msg"]: - del messages[msgkey] - utils.write_messages(po, messages, states["comm_msg"], states["fuzzy_msg"]) - print("Removed {} commented messages.".format(len(states["comm_msg"]))) + nbr_rem = len(msgs.comm_msgs) + for msgkey in msgs.comm_msgs: + del msgs.msgs[msgkey] + msgs.write(kind='PO', dest=po) + print("Removed {} commented messages.".format(nbr_rem)) return 0 def main(): import argparse - parser = argparse.ArgumentParser(description="Clean po’s in branches " \ - "or trunk (i.e. remove " \ - "all commented messages).") - parser.add_argument('-t', '--trunk', action="store_true", - help="Clean po’s in trunk rather than branches.") - parser.add_argument('-s', '--strict', action="store_true", - help="Raise an error if a po is broken.") - parser.add_argument('langs', metavar='ISO_code', nargs='*', - help="Restrict processed languages to those.") + parser = argparse.ArgumentParser(description="Clean po’s in branches or trunk (i.e. remove all commented " + "messages).") + parser.add_argument('-t', '--trunk', action="store_true", help="Clean po’s in trunk rather than branches.") + parser.add_argument('-s', '--strict', action="store_true", help="Raise an error if a po is broken.") + parser.add_argument('langs', metavar='ISO_code', nargs='*', help="Restrict processed languages to those.") args = parser.parse_args() ret = 0 diff --git a/release/scripts/modules/bl_i18n_utils/import_po_from_branches.py b/release/scripts/modules/bl_i18n_utils/import_po_from_branches.py index 533dded3c57..f42b45e7d4e 100755 --- a/release/scripts/modules/bl_i18n_utils/import_po_from_branches.py +++ b/release/scripts/modules/bl_i18n_utils/import_po_from_branches.py @@ -26,7 +26,6 @@ import os import shutil import sys import subprocess -from codecs import open try: import settings @@ -48,14 +47,10 @@ PY3 = settings.PYTHON3_EXEC def main(): import argparse - parser = argparse.ArgumentParser(description="Import advanced enough po’s " \ - "from branches to trunk.") - parser.add_argument('-t', '--threshold', type=int, - help="Import threshold, as a percentage.") - parser.add_argument('-s', '--strict', action="store_true", - help="Raise an error if a po is broken.") - parser.add_argument('langs', metavar='ISO_code', nargs='*', - help="Restrict processed languages to those.") + parser = argparse.ArgumentParser(description="Import advanced enough po’s from branches to trunk.") + parser.add_argument('-t', '--threshold', type=float, help="Import threshold, as a percentage.") + parser.add_argument('-s', '--strict', action="store_true", help="Raise an error if a po is broken.") + parser.add_argument('langs', metavar='ISO_code', nargs='*', help="Restrict processed languages to those.") args = parser.parse_args() ret = 0 @@ -70,51 +65,40 @@ def main(): po = os.path.join(BRANCHES_DIR, lang, ".".join((lang, "po"))) if os.path.exists(po): po_is_rtl = os.path.join(BRANCHES_DIR, lang, RTL_PREPROCESS_FILE) - msgs, state, stats = utils.parse_messages(po) - tot_msgs = stats["tot_msg"] - trans_msgs = stats["trans_msg"] + msgs = utils.I18nMessages(iso=lang, kind='PO', src=po) lvl = 0.0 - if tot_msgs: - lvl = float(trans_msgs) / float(tot_msgs) + if msgs.nbr_msgs: + lvl = msgs.nbr_trans_msgs / msgs.nbr_msgs if lvl > threshold: - if state["is_broken"] and args.strict: - print("{:<10}: {:>6.1%} done, but BROKEN, skipped." \ - "".format(lang, lvl)) + if msgs.parsing_errors and args.strict: + print("{:<10}: {:>6.1%} done, but BROKEN, skipped.".format(lang, lvl)) ret = 1 else: if os.path.exists(po_is_rtl): - out_po = os.path.join(TRUNK_PO_DIR, - ".".join((lang, "po"))) - out_raw_po = os.path.join(TRUNK_PO_DIR, - "_".join((lang, "raw.po"))) + out_po = os.path.join(TRUNK_PO_DIR, ".".join((lang, "po"))) + out_raw_po = os.path.join(TRUNK_PO_DIR, "_".join((lang, "raw.po"))) keys = [] trans = [] - for k, m in msgs.items(): + for k, m in msgs.msgs.items(): keys.append(k) - trans.append("".join(m["msgstr_lines"])) + trans.append(m.msgstr) trans = rtl_preprocess.log2vis(trans) for k, t in zip(keys, trans): - # Mono-line for now... - msgs[k]["msgstr_lines"] = [t] - utils.write_messages(out_po, msgs, state["comm_msg"], - state["fuzzy_msg"]) + msgs[k].msgstr = t + msgs.write(kinf='PO', dest=out_po) # Also copies org po! shutil.copy(po, out_raw_po) - print("{:<10}: {:>6.1%} done, enough translated " \ - "messages, processed and copied to trunk." \ + print("{:<10}: {:>6.1%} done, enough translated messages, processed and copied to trunk." "".format(lang, lvl)) else: shutil.copy(po, TRUNK_PO_DIR) - print("{:<10}: {:>6.1%} done, enough translated " \ - "messages, copied to trunk.".format(lang, lvl)) + print("{:<10}: {:>6.1%} done, enough translated messages, copied to trunk.".format(lang, lvl)) else: - if state["is_broken"] and args.strict: - print("{:<10}: {:>6.1%} done, BROKEN and not enough " \ - "translated messages, skipped".format(lang, lvl)) + if msgs.parsing_errors and args.strict: + print("{:<10}: {:>6.1%} done, BROKEN and not enough translated messages, skipped".format(lang, lvl)) ret = 1 else: - print("{:<10}: {:>6.1%} done, not enough translated " \ - "messages, skipped.".format(lang, lvl)) + print("{:<10}: {:>6.1%} done, not enough translated messages, skipped.".format(lang, lvl)) return ret diff --git a/release/scripts/modules/bl_i18n_utils/merge_po.py b/release/scripts/modules/bl_i18n_utils/merge_po.py index baf67de2e81..610be0f15bd 100755 --- a/release/scripts/modules/bl_i18n_utils/merge_po.py +++ b/release/scripts/modules/bl_i18n_utils/merge_po.py @@ -37,26 +37,22 @@ except: from . import (settings, utils) +# XXX This is a quick hack to make it work with new I18n... objects! To be reworked! def main(): import argparse - parser = argparse.ArgumentParser(description="" \ - "Merge one or more .po files into the first dest one.\n" \ - "If a msgkey (msgctxt, msgid) is present in more than " \ - "one merged po, the one in the first file wins, unless " \ - "it’s marked as fuzzy and one later is not.\n" \ - "The fuzzy flag is removed if necessary.\n" \ - "All other comments are never modified.\n" \ - "Commented messages in dst will always remain " \ - "commented, and commented messages are never merged " \ + parser = argparse.ArgumentParser(description="" + "Merge one or more .po files into the first dest one.\n" + "If a msgkey (msgctxt, msgid) is present in more than one merged po, the one in the first file " + "wins, unless it’s marked as fuzzy and one later is not.\n" + "The fuzzy flag is removed if necessary.\n" + "All other comments are never modified.\n" + "Commented messages in dst will always remain commented, and commented messages are never merged " "from sources.") - parser.add_argument('-s', '--stats', action="store_true", - help="Show statistics info.") + parser.add_argument('-s', '--stats', action="store_true", help="Show statistics info.") parser.add_argument('-r', '--replace', action="store_true", help="Replace existing messages of same \"level\" already in dest po.") - parser.add_argument('dst', metavar='dst.po', - help="The dest po into which merge the others.") - parser.add_argument('src', metavar='src.po', nargs='+', - help="The po's to merge into the dst.po one.") + parser.add_argument('dst', metavar='dst.po', help="The dest po into which merge the others.") + parser.add_argument('src', metavar='src.po', nargs='+', help="The po's to merge into the dst.po one.") args = parser.parse_args() ret = 0 @@ -67,89 +63,78 @@ def main(): nbr_added = 0 nbr_unfuzzied = 0 - dst_messages, dst_states, dst_stats = utils.parse_messages(args.dst) - if dst_states["is_broken"]: + dst_msgs = utils.I18nMessages(kind='PO', src=args.dst) + if dst_msgs.parsing_errors: print("Dest po is BROKEN, aborting.") return 1 if args.stats: print("Dest po, before merging:") - utils.print_stats(dst_stats, prefix="\t") - # If we don’t want to replace existing valid translations, pre-populate - # done_msgkeys and done_fuzzy_msgkeys. + dst_msgs.print_stats(prefix="\t") + # If we don’t want to replace existing valid translations, pre-populate done_msgkeys and done_fuzzy_msgkeys. if not args.replace: - done_msgkeys = dst_states["trans_msg"].copy() - done_fuzzy_msgkeys = dst_states["fuzzy_msg"].copy() + done_msgkeys = dst_msgs.trans_msgs.copy() + done_fuzzy_msgkeys = dst_msgs.fuzzy_msgs.copy() for po in args.src: - messages, states, stats = utils.parse_messages(po) - if states["is_broken"]: + msgs = utils.I18nMessages(kind='PO', src=po) + if msgs.parsing_errors: print("\tSrc po {} is BROKEN, skipping.".format(po)) ret = 1 continue print("\tMerging {}...".format(po)) if args.stats: print("\t\tMerged po stats:") - utils.print_stats(stats, prefix="\t\t\t") - for msgkey, val in messages.items(): + msgs.print_stats(prefix="\t\t\t") + for msgkey, msg in msgs.msgs.items(): msgctxt, msgid = msgkey # This msgkey has already been completely merged, or is a commented one, # or the new message is commented, skip it. - if msgkey in (done_msgkeys | dst_states["comm_msg"] | states["comm_msg"]): + if msgkey in (done_msgkeys | dst_msgs.comm_msgs | msgs.comm_msgs): continue - is_ttip = utils.is_tooltip(msgid) + is_ttip = msg.is_tooltip # New messages does not yet exists in dest. - if msgkey not in dst_messages: - dst_messages[msgkey] = messages[msgkey] - if msgkey in states["fuzzy_msg"]: + if msgkey not in dst_msgs.msgs: + dst_msgs[msgkey] = msgs.msgs[msgkey] + if msgkey in msgs.fuzzy_msgs: done_fuzzy_msgkeys.add(msgkey) - dst_states["fuzzy_msg"].add(msgkey) - elif msgkey in states["trans_msg"]: + dst_msgs.fuzzy_msgs.add(msgkey) + elif msgkey in msgs.trans_msgs: done_msgkeys.add(msgkey) - dst_states["trans_msg"].add(msgkey) - dst_stats["trans_msg"] += 1 - if is_ttip: - dst_stats["trans_ttips"] += 1 + dst_msgs.trans_msgs.add(msgkey) nbr_added += 1 - dst_stats["tot_msg"] += 1 - if is_ttip: - dst_stats["tot_ttips"] += 1 # From now on, the new messages is already in dst. # New message is neither translated nor fuzzy, skip it. - elif msgkey not in (states["trans_msg"] | states["fuzzy_msg"]): + elif msgkey not in (msgs.trans_msgs | msgs.fuzzy_msgs): continue # From now on, the new message is either translated or fuzzy! # The new message is translated. - elif msgkey in states["trans_msg"]: - dst_messages[msgkey]["msgstr_lines"] = messages[msgkey]["msgstr_lines"] + elif msgkey in msgs.trans_msgs: + dst_msgs.msgs[msgkey].msgstr = msg.msgstr done_msgkeys.add(msgkey) done_fuzzy_msgkeys.discard(msgkey) - if msgkey in dst_states["fuzzy_msg"]: - dst_states["fuzzy_msg"].remove(msgkey) + if msgkey in dst_msgs.fuzzy_msgs: + dst_msgs.fuzzy_msgs.remove(msgkey) nbr_unfuzzied += 1 - if msgkey not in dst_states["trans_msg"]: - dst_states["trans_msg"].add(msgkey) - dst_stats["trans_msg"] += 1 - if is_ttip: - dst_stats["trans_ttips"] += 1 + if msgkey not in dst_msgs.trans_msgs: + dst_msgs.trans_msgs.add(msgkey) else: nbr_replaced += 1 nbr_merged += 1 - # The new message is fuzzy, org one is fuzzy too, - # and this msgkey has not yet been merged. - elif msgkey not in (dst_states["trans_msg"] | done_fuzzy_msgkeys): - dst_messages[msgkey]["msgstr_lines"] = messages[msgkey]["msgstr_lines"] + # The new message is fuzzy, org one is fuzzy too, and this msgkey has not yet been merged. + elif msgkey not in (dst_msgs.trans_msgs | done_fuzzy_msgkeys): + dst_msgs[msgkey].msgstr = msg.msgstr done_fuzzy_msgkeys.add(msgkey) - dst_states["fuzzy_msg"].add(msgkey) + dst_msgs.fuzzy_msgs.add(msgkey) nbr_merged += 1 nbr_replaced += 1 - utils.write_messages(args.dst, dst_messages, dst_states["comm_msg"], dst_states["fuzzy_msg"]) + dst_msgs.write(kind='PO', dest=args.dst) - print("Merged completed. {} messages were merged (among which {} were replaced), " \ - "{} were added, {} were \"un-fuzzied\"." \ - "".format(nbr_merged, nbr_replaced, nbr_added, nbr_unfuzzied)) + print("Merged completed. {} messages were merged (among which {} were replaced), {} were added, " + "{} were \"un-fuzzied\".".format(nbr_merged, nbr_replaced, nbr_added, nbr_unfuzzied)) if args.stats: + dst_msgs.update_info() print("Final merged po stats:") - utils.print_stats(dst_stats, prefix="\t") + dst_msgs.print_stats(prefix="\t") return ret diff --git a/release/scripts/modules/bl_i18n_utils/settings.py b/release/scripts/modules/bl_i18n_utils/settings.py index 0f09e8238ab..eb5fd4cd0fa 100644 --- a/release/scripts/modules/bl_i18n_utils/settings.py +++ b/release/scripts/modules/bl_i18n_utils/settings.py @@ -96,13 +96,62 @@ IMPORT_MIN_LEVEL = -1 IMPORT_LANGUAGES_SKIP = {'am', 'bg', 'fi', 'el', 'et', 'ko', 'ne', 'pl', 'ro', 'uz', 'uz@cyrillic'} # The comment prefix used in generated messages.txt file. -COMMENT_PREFIX = "#~ " +MSG_COMMENT_PREFIX = "#~ " + +# The comment prefix used in generated messages.txt file. +MSG_CONTEXT_PREFIX = "MSGCTXT:" + +# The default comment prefix used in po's. +PO_COMMENT_PREFIX= "# " # The comment prefix used to mark sources of msgids, in po's. -COMMENT_PREFIX_SOURCE = "#: " +PO_COMMENT_PREFIX_SOURCE = "#: " -# The comment prefix used in generated messages.txt file. -CONTEXT_PREFIX = "MSGCTXT:" +# The comment prefix used to mark sources of msgids, in po's. +PO_COMMENT_PREFIX_SOURCE_CUSTOM = "#. :src: " + +# The comment prefix used to comment entries in po's. +PO_COMMENT_PREFIX_MSG= "#~ " + +# The comment prefix used to mark fuzzy msgids, in po's. +PO_COMMENT_FUZZY = "#, fuzzy" + +# The prefix used to define context, in po's. +PO_MSGCTXT = "msgctxt " + +# The prefix used to define msgid, in po's. +PO_MSGID = "msgid " + +# The prefix used to define msgstr, in po's. +PO_MSGSTR = "msgstr " + +# The 'header' key of po files. +PO_HEADER_KEY = ("", "") + +PO_HEADER_MSGSTR = ( + "Project-Id-Version: Blender {blender_ver} (r{blender_rev})\\n\n" + "Report-Msgid-Bugs-To: \\n\n" + "POT-Creation-Date: {time}\\n\n" + "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\n" + "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n\n" + "Language-Team: LANGUAGE <LL@li.org>\\n\n" + "Language: {iso}\\n\n" + "MIME-Version: 1.0\\n\n" + "Content-Type: text/plain; charset=UTF-8\\n\n" + "Content-Transfer-Encoding: 8bit\n" +) +PO_HEADER_COMMENT_COPYRIGHT = ( + "# Blender's translation file (po format).\n" + "# Copyright (C) {year} The Blender Foundation.\n" + "# This file is distributed under the same license as the Blender package.\n" + "#\n" +) +PO_HEADER_COMMENT = ( + "# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n" + "#" +) + +TEMPLATE_ISO_ID = "__TEMPLATE__" # Default context. CONTEXT_DEFAULT = "" @@ -121,8 +170,7 @@ PYGETTEXT_ALLOWED_EXTS = {".c", ".cpp", ".cxx", ".hpp", ".hxx", ".h"} PYGETTEXT_MAX_MULTI_CTXT = 16 # Where to search contexts definitions, relative to SOURCE_DIR (defined below). -PYGETTEXT_CONTEXTS_DEFSRC = os.path.join("source", "blender", "blenfont", - "BLF_translation.h") +PYGETTEXT_CONTEXTS_DEFSRC = os.path.join("source", "blender", "blenfont", "BLF_translation.h") # Regex to extract contexts defined in BLF_translation.h # XXX Not full-proof, but should be enough here! @@ -146,11 +194,21 @@ _str_base = ( "(?P={_}2)" # And closing quote. ) str_clean_re = _str_base.format(_="g", capt="P<clean>") +_inbetween_str_re = ( + # XXX Strings may have comments between their pieces too, not only spaces! + r"(?:\s*(?:" + # A C comment + r"/\*.*(?!\*/).\*/|" + # Or a C++ one! + r"//[^\n]*\n" + # And we are done! + r")?)*" +) # Here we have to consider two different cases (empty string and other). _str_whole_re = ( _str_base.format(_="{_}1_", capt=":") + # Optional loop start, this handles "split" strings... - "(?:(?<=[\"'])\\s*(?=[\"'])(?:" + "(?:(?<=[\"'])" + _inbetween_str_re + "(?=[\"'])(?:" + _str_base.format(_="{_}2_", capt=":") + # End of loop. "))*" @@ -231,6 +289,8 @@ WARN_MSGID_NOT_CAPITALIZED_ALLOWED = { } WARN_MSGID_NOT_CAPITALIZED_ALLOWED |= set(lng[2] for lng in LANGUAGES) +PARSER_CACHE_HASH = 'sha1' + ############################################################################### # PATHS diff --git a/release/scripts/modules/bl_i18n_utils/update_branches.py b/release/scripts/modules/bl_i18n_utils/update_branches.py index 4c38a2f71fb..1a22a9835dd 100755 --- a/release/scripts/modules/bl_i18n_utils/update_branches.py +++ b/release/scripts/modules/bl_i18n_utils/update_branches.py @@ -43,42 +43,31 @@ FILE_NAME_POT = settings.FILE_NAME_POT def main(): import argparse - parser = argparse.ArgumentParser(description="" \ - "Update all branches:\n" \ - "* Generate a temp messages.txt file.\n" \ - "* Use it to generate a temp .pot file.\n" \ - "* Use it to update all .po’s in /branches.") - parser.add_argument('--pproc-contexts', action="store_true", - help="Pre-process po’s to avoid having plenty of " - "fuzzy msgids just because a context was " - "added/changed!") - parser.add_argument('-c', '--no_checks', default=True, - action="store_false", - help="No checks over UI messages.") + parser = argparse.ArgumentParser(description="Update all branches:\n" + "* Generate a temp messages.txt file.\n" + "* Use it to generate a blender.pot file.\n" + "* Use it to update all .po’s in /branches.") + #parser.add_argument('--pproc-contexts', action="store_true", + #help="Pre-process po’s to avoid having plenty of fuzzy msgids just because a context was " + #"added/changed!") + parser.add_argument('-c', '--no_checks', default=True, action="store_false", help="No checks over UI messages.") parser.add_argument('-a', '--add', action="store_true", - help="Add missing po’s (useful only when one or " \ - "more languages are given!).") - parser.add_argument('langs', metavar='ISO_code', nargs='*', - help="Restrict processed languages to those.") + help="Add missing po’s (useful only when one or more languages are given!).") + parser.add_argument('langs', metavar='ISO_code', nargs='*', help="Restrict processed languages to those.") args = parser.parse_args() ret = 0 # Generate a temp messages file. - dummy, msgfile = tempfile.mkstemp(suffix=".txt", - prefix="blender_messages_") + dummy, msgfile = tempfile.mkstemp(suffix=".txt", prefix="blender_messages_") os.close(dummy) cmd = (PY3, "./update_msg.py", "-o", msgfile) t = subprocess.call(cmd) if t: ret = t - # Generate a temp pot file. - # Back to having a pot file in trunk/po. It's quite useful for translators that want to start + # Generate blender.pot file in trunk/po. It's quite useful for translators that want to start # a new translation and not not want to bother generating their own po from scratch! -# dummy, potfile = tempfile.mkstemp(suffix=".pot", -# prefix="blender_pot_") -# os.close(dummy) potfile = FILE_NAME_POT cmd = [PY3, "./update_pot.py", "-i", msgfile, "-o", potfile] if not args.no_checks: @@ -93,8 +82,8 @@ def main(): if args.add: cmd.append("-a") cmd += args.langs - if args.pproc_contexts: - cmd.append("--pproc-contexts") + #if args.pproc_contexts: + #cmd.append("--pproc-contexts") t = subprocess.call(cmd) if t: ret = t diff --git a/release/scripts/modules/bl_i18n_utils/update_languages_menu.py b/release/scripts/modules/bl_i18n_utils/update_languages_menu.py index 9b4cb20fadf..6263f1c1e64 100755 --- a/release/scripts/modules/bl_i18n_utils/update_languages_menu.py +++ b/release/scripts/modules/bl_i18n_utils/update_languages_menu.py @@ -73,11 +73,11 @@ def find_matching_po(languages, stats, forbidden): def main(): import argparse - parser = argparse.ArgumentParser(description="" - "Update 'languages' text file used by Blender at runtime to build translations menu.") + parser = argparse.ArgumentParser(description="Update 'languages' text file used by Blender at runtime to build " + "translations menu.") parser.add_argument('-m', '--min_translation', type=int, default=-100, - help="Minimum level of translation, as a percentage " - "(translations below this are commented out in menu).") + help="Minimum level of translation, as a percentage (translations below this are commented out " + "in menu).") parser.add_argument('langs', metavar='ISO_code', nargs='*', help="Unconditionally exclude those languages from the menu.") args = parser.parse_args() @@ -92,8 +92,8 @@ def main(): for po in os.listdir(TRUNK_PO_DIR): if po.endswith(".po") and not po.endswith("_raw.po"): lang = os.path.basename(po)[:-3] - u1, u2, _stats = utils.parse_messages(os.path.join(TRUNK_PO_DIR, po)) - stats[lang] = _stats["trans_msg"] / _stats["tot_msg"] + msgs = utils.I18nMessages(kind='PO', src=os.path.join(TRUNK_PO_DIR, po)) + stats[lang] = msgs.nbr_trans_msgs / msgs.nbr_msgs # Generate languages file used by Blender's i18n system. # First, match all entries in LANGUAGES to a lang in stats, if possible! diff --git a/release/scripts/modules/bl_i18n_utils/update_mo.py b/release/scripts/modules/bl_i18n_utils/update_mo.py index 7f68736593c..ab6fedc87a7 100755 --- a/release/scripts/modules/bl_i18n_utils/update_mo.py +++ b/release/scripts/modules/bl_i18n_utils/update_mo.py @@ -51,11 +51,11 @@ def process_po(po, lang, mo=None): # show stats cmd = (GETTEXT_MSGFMT_EXECUTABLE, - "--statistics", - po, - "-o", - mo or os.path.join(mo_dir, ".".join((DOMAIN, "mo"))), - ) + "--statistics", + po, + "-o", + mo or os.path.join(mo_dir, ".".join((DOMAIN, "mo"))), + ) print("Running ", " ".join(cmd)) ret = subprocess.call(cmd) diff --git a/release/scripts/modules/bl_i18n_utils/update_po.py b/release/scripts/modules/bl_i18n_utils/update_po.py index 4c6495c5cfe..a1fe20f9fce 100755 --- a/release/scripts/modules/bl_i18n_utils/update_po.py +++ b/release/scripts/modules/bl_i18n_utils/update_po.py @@ -41,6 +41,7 @@ TRUNK_PO_DIR = settings.TRUNK_PO_DIR FILE_NAME_POT = settings.FILE_NAME_POT +# XXX Not updated, not sure it's that much useful... def pproc_newcontext_po(po, pot_messages, pot_stats): print("Adding new contexts to {}...".format(po)) messages, state, stats = utils.parse_messages(po) @@ -73,41 +74,39 @@ def pproc_newcontext_po(po, pot_messages, pot_stats): def process_po(po, lang): # update po file - cmd = (GETTEXT_MSGMERGE_EXECUTABLE, - "--update", - "-w", "1", # XXX Ugly hack to prevent msgmerge merging - # short source comments together! - "--no-wrap", - "--backup=none", - "--lang={}".format(lang), - po, - FILE_NAME_POT, - ) - + #cmd = (GETTEXT_MSGMERGE_EXECUTABLE, + #"--update", + #"-w", "1", # XXX Ugly hack to prevent msgmerge merging short source comments together! + #"--no-wrap", + #"--backup=none", + #"--lang={}".format(lang), + #po, + #FILE_NAME_POT, + #) + + pot = utils.I18nMessages(kind='PO', src=FILE_NAME_POT) + msg = utils.I18nMessages(iso=lang, kind='PO', src=po) print("Updating {}...".format(po)) - print("Running ", " ".join(cmd)) - ret = subprocess.call(cmd) + msg.update(pot) + msg.write(kind='PO', dest=po) + #print("Running ", " ".join(cmd)) + #ret = subprocess.call(cmd) print("Finished!\n") - return ret + return 0 def main(): import argparse parser = argparse.ArgumentParser(description="Write out messages.txt " "from Blender.") - parser.add_argument('-t', '--trunk', action="store_true", - help="Update po’s in /trunk/po rather than /branches.") - parser.add_argument('-i', '--input', metavar="File", - help="Input pot file path.") - parser.add_argument('--pproc-contexts', action="store_true", - help="Pre-process po’s to avoid having plenty of " - "fuzzy msgids just because a context was " - "added/changed!") + parser.add_argument('-t', '--trunk', action="store_true", help="Update po’s in /trunk/po rather than /branches.") + parser.add_argument('-i', '--input', metavar="File", help="Input pot file path.") + #parser.add_argument('--pproc-contexts', action="store_true", + #help="Pre-process po’s to avoid having plenty of fuzzy msgids just because a context was " + #"added/changed!") parser.add_argument('-a', '--add', action="store_true", - help="Add missing po’s (useful only when one or " - "more languages are given!).") - parser.add_argument('langs', metavar='ISO_code', nargs='*', - help="Restrict processed languages to those.") + help="Add missing po’s (useful only when one or more languages are given!).") + parser.add_argument('langs', metavar='ISO_code', nargs='*', help="Restrict processed languages to those.") args = parser.parse_args() if args.input: @@ -115,9 +114,11 @@ def main(): FILE_NAME_POT = args.input ret = 0 - if args.pproc_contexts: - _ctxt_proc = pproc_newcontext_po - pot_messages, _a, pot_stats = utils.parse_messages(FILE_NAME_POT) + #if args.pproc_contexts: + #_ctxt_proc = pproc_newcontext_po + #pot_messages, _a, pot_stats = utils.parse_messages(FILE_NAME_POT) + if 0: + pass else: _ctxt_proc = lambda a, b, c: 0 pot_messages, pot_stats = None, None diff --git a/release/scripts/modules/bl_i18n_utils/update_pot.py b/release/scripts/modules/bl_i18n_utils/update_pot.py index ecb5d837a09..e05e60937a8 100755 --- a/release/scripts/modules/bl_i18n_utils/update_pot.py +++ b/release/scripts/modules/bl_i18n_utils/update_pot.py @@ -23,6 +23,7 @@ # Update blender.pot file from messages.txt import subprocess +import collections import os import sys import re @@ -41,9 +42,11 @@ except: LANGUAGES_CATEGORIES = settings.LANGUAGES_CATEGORIES LANGUAGES = settings.LANGUAGES -COMMENT_PREFIX = settings.COMMENT_PREFIX -COMMENT_PREFIX_SOURCE = settings.COMMENT_PREFIX_SOURCE -CONTEXT_PREFIX = settings.CONTEXT_PREFIX +PO_COMMENT_PREFIX = settings.PO_COMMENT_PREFIX +PO_COMMENT_PREFIX_SOURCE = settings.PO_COMMENT_PREFIX_SOURCE +PO_COMMENT_PREFIX_SOURCE_CUSTOM = settings.PO_COMMENT_PREFIX_SOURCE_CUSTOM +MSG_COMMENT_PREFIX = settings.MSG_COMMENT_PREFIX +MSG_CONTEXT_PREFIX = settings.MSG_CONTEXT_PREFIX FILE_NAME_MESSAGES = settings.FILE_NAME_MESSAGES FILE_NAME_POT = settings.FILE_NAME_POT SOURCE_DIR = settings.SOURCE_DIR @@ -79,6 +82,10 @@ _clean_str = re.compile(settings.str_clean_re).finditer clean_str = lambda s: "".join(m.group("clean") for m in _clean_str(s)) +def _new_messages(): + return getattr(collections, "OrderedDict", dict)() + + def check_file(path, rel_path, messages): def process_entry(ctxt, msg): # Context. @@ -207,20 +214,22 @@ def get_svnrev(): def gen_empty_pot(): + blender_ver = "" blender_rev = get_svnrev().decode() utctime = time.gmtime() time_str = time.strftime("%Y-%m-%d %H:%M+0000", utctime) year_str = time.strftime("%Y", utctime) - return utils.gen_empty_messages(blender_rev, time_str, year_str) + return utils.I18nMessages.gen_empty_messages("__POT__", blender_ver, blender_rev, time_str, year_str) escape_re = tuple(re.compile(r[0]) for r in settings.ESCAPE_RE) escape = lambda s, n: escape_re[n].sub(settings.ESCAPE_RE[n][1], s) -def merge_messages(msgs, states, messages, do_checks, spell_cache): - num_added = num_present = 0 +def merge_messages(msgs, messages, do_checks, spell_cache): + num_added = 0 + num_present = msgs.nbr_msgs for (context, msgid), srcs in messages.items(): if do_checks: err = spell_check(msgid, spell_cache) @@ -233,19 +242,15 @@ def merge_messages(msgs, states, messages, do_checks, spell_cache): for n in range(len(escape_re)): msgid = escape(msgid, n) - srcs = [COMMENT_PREFIX_SOURCE + s for s in srcs] - key = (context, msgid) - if key not in msgs: - msgs[key] = {"msgid_lines": [msgid], - "msgstr_lines": [""], - "comment_lines": srcs, - "msgctxt_lines": [context]} + if key not in msgs.msgs: + msg = utils.I18nMessage([context], [msgid], [""], []) + msg.sources = srcs + msgs.msgs[key] = msg num_added += 1 else: - # We need to merge comments! - msgs[key]["comment_lines"].extend(srcs) - num_present += 1 + # We need to merge sources! + msgs.msgs[key].sources += srcs return num_added, num_present @@ -270,7 +275,7 @@ def main(): print("Running fake py gettext…") # Not using any more xgettext, simpler to do it ourself! - messages = utils.new_messages() + messages = _new_messages() py_xgettext(messages) print("Finished, found {} messages.".format(len(messages))) @@ -281,55 +286,49 @@ def main(): spell_cache = set() print("Generating POT file {}…".format(FILE_NAME_POT)) - msgs, states = gen_empty_pot() - tot_messages, _a = merge_messages(msgs, states, messages, - True, spell_cache) + msgs = gen_empty_pot() + tot_messages, _a = merge_messages(msgs, messages, True, spell_cache) # add messages collected automatically from RNA print("\tMerging RNA messages from {}…".format(FILE_NAME_MESSAGES)) - messages = utils.new_messages() + messages.clear() with open(FILE_NAME_MESSAGES, encoding="utf-8") as f: srcs = [] context = "" for line in f: line = utils.stripeol(line) - if line.startswith(COMMENT_PREFIX): - srcs.append(line[len(COMMENT_PREFIX):].strip()) - elif line.startswith(CONTEXT_PREFIX): - context = line[len(CONTEXT_PREFIX):].strip() + if line.startswith(MSG_COMMENT_PREFIX): + srcs.append(line[len(MSG_COMMENT_PREFIX):].strip()) + elif line.startswith(MSG_CONTEXT_PREFIX): + context = line[len(MSG_CONTEXT_PREFIX):].strip() else: key = (context, line) messages[key] = srcs srcs = [] context = "" - num_added, num_present = merge_messages(msgs, states, messages, - True, spell_cache) + num_added, num_present = merge_messages(msgs, messages, True, spell_cache) tot_messages += num_added - print("\tMerged {} messages ({} were already present)." - "".format(num_added, num_present)) + print("\tMerged {} messages ({} were already present).".format(num_added, num_present)) print("\tAdding languages labels...") - messages = {(CONTEXT_DEFAULT, lng[1]): - ("Languages’ labels from bl_i18n_utils/settings.py",) - for lng in LANGUAGES} - messages.update({(CONTEXT_DEFAULT, cat[1]): - ("Language categories’ labels from bl_i18n_utils/settings.py",) - for cat in LANGUAGES_CATEGORIES}) - num_added, num_present = merge_messages(msgs, states, messages, - True, spell_cache) + messages.clear() + messages.update(((CONTEXT_DEFAULT, lng[1]), ("Languages’ labels from bl_i18n_utils/settings.py",)) + for lng in LANGUAGES) + messages.update(((CONTEXT_DEFAULT, cat[1]), ("Language categories’ labels from bl_i18n_utils/settings.py",)) + for cat in LANGUAGES_CATEGORIES) + num_added, num_present = merge_messages(msgs, messages, True, spell_cache) tot_messages += num_added print("\tAdded {} language messages.".format(num_added)) # Write back all messages into blender.pot. - utils.write_messages(FILE_NAME_POT, msgs, states["comm_msg"], - states["fuzzy_msg"]) + msgs.write('PO', FILE_NAME_POT) if SPELL_CACHE and spell_cache: with open(SPELL_CACHE, 'wb') as f: pickle.dump(spell_cache, f) - print("Finished, total: {} messages!".format(tot_messages - 1)) + print("Finished, total: {} messages!".format(tot_messages)) return 0 diff --git a/release/scripts/modules/bl_i18n_utils/update_trunk.py b/release/scripts/modules/bl_i18n_utils/update_trunk.py index b84a227ae0a..d7d1a9741cb 100755 --- a/release/scripts/modules/bl_i18n_utils/update_trunk.py +++ b/release/scripts/modules/bl_i18n_utils/update_trunk.py @@ -25,8 +25,7 @@ # * Copy po’s from branches advanced enough. # * Clean po’s in trunk. # * Compile po’s in trunk in mo’s, keeping track of those failing. -# * Remove po’s, mo’s (and their dir’s) that failed to compile or -# are no more present in trunk. +# * Remove po’s, mo’s (and their dir’s) that failed to compile or are no more present in trunk. import subprocess import os @@ -50,25 +49,6 @@ LANGUAGES_FILE = settings.LANGUAGES_FILE PY3 = settings.PYTHON3_EXEC -def find_matching_po(languages, stats): - """Match languages defined in LANGUAGES setting to relevant po, if possible!""" - ret = [] - for uid, label, org_key in languages: - key = org_key - if key not in stats: - # Try to simplify the key (eg from es_ES to es). - if '_' in org_key: - key = org_key[0:org_key.index('_')] - if '@' in org_key: - key = key + org_key[org_key.index('@'):] - if key in stats: - ret.append((stats[key], uid, label, org_key)) - else: - # Mark invalid entries, so that we can put them in the languages file, - # but commented! - ret.append((0.0, -uid, label, org_key)) - return ret - def main(): import argparse parser = argparse.ArgumentParser(description="" @@ -99,7 +79,8 @@ def main(): os.remove(po) # Copy po’s from branches. - cmd = [PY3, "./import_po_from_branches.py", "-s"] + #cmd = [PY3, "./import_po_from_branches.py", "-s"] + cmd = [PY3, "./import_po_from_branches.py"] if args.threshold is not None: cmd += ["-t", str(args.threshold)] if args.langs: @@ -124,7 +105,8 @@ def main(): if args.langs and lang not in args.langs: continue - cmd = [PY3, "./clean_po.py", "-t", "-s", lang] + #cmd = [PY3, "./clean_po.py", "-t", "-s", lang] + cmd = [PY3, "./clean_po.py", "-t", lang] t = subprocess.call(cmd) if t: ret = t diff --git a/release/scripts/modules/bl_i18n_utils/utils.py b/release/scripts/modules/bl_i18n_utils/utils.py index 9481f750092..ff5ed4f93d7 100644 --- a/release/scripts/modules/bl_i18n_utils/utils.py +++ b/release/scripts/modules/bl_i18n_utils/utils.py @@ -20,357 +20,660 @@ # Some misc utilities... +import collections +import copy import os +import re import sys -import collections from bl_i18n_utils import settings -COMMENT_PREFIX = settings.COMMENT_PREFIX +PO_COMMENT_PREFIX = settings.PO_COMMENT_PREFIX +PO_COMMENT_PREFIX_MSG = settings.PO_COMMENT_PREFIX_MSG +PO_COMMENT_PREFIX_SOURCE = settings.PO_COMMENT_PREFIX_SOURCE +PO_COMMENT_PREFIX_SOURCE_CUSTOM = settings.PO_COMMENT_PREFIX_SOURCE_CUSTOM +PO_COMMENT_FUZZY = settings.PO_COMMENT_FUZZY +PO_MSGCTXT = settings.PO_MSGCTXT +PO_MSGID = settings.PO_MSGID +PO_MSGSTR = settings.PO_MSGSTR + +PO_HEADER_KEY = settings.PO_HEADER_KEY +PO_HEADER_COMMENT = settings.PO_HEADER_COMMENT +PO_HEADER_COMMENT_COPYRIGHT = settings.PO_HEADER_COMMENT_COPYRIGHT +PO_HEADER_MSGSTR = settings.PO_HEADER_MSGSTR + +PARSER_CACHE_HASH = settings.PARSER_CACHE_HASH + WARN_NC = settings.WARN_MSGID_NOT_CAPITALIZED NC_ALLOWED = settings.WARN_MSGID_NOT_CAPITALIZED_ALLOWED +PARSER_CACHE_HASH = settings.PARSER_CACHE_HASH +##### Misc Utils ##### + def stripeol(s): return s.rstrip("\n\r") -# XXX For now, we assume that all messages > 30 chars are tooltips! -def is_tooltip(msgid): - return len(msgid) > 30 +_valid_po_path_re = re.compile(r"^\S+:[0-9]+$") +def is_valid_po_path(path): + return bool(_valid_po_path_re.match(path)) + + +class I18nMessage: + """ + Internal representation of a message. + """ + __slots__ = ("msgctxt_lines", "msgid_lines", "msgstr_lines", "comment_lines", "is_fuzzy", "is_commented") + + def __init__(self, msgctxt_lines=[], msgid_lines=[], msgstr_lines=[], comment_lines=[], + is_commented=False, is_fuzzy=False): + self.msgctxt_lines = msgctxt_lines + self.msgid_lines = msgid_lines + self.msgstr_lines = msgstr_lines + self.comment_lines = comment_lines + self.is_fuzzy = is_fuzzy + self.is_commented = is_commented + + def _get_msgctxt(self): + return ("".join(self.msgctxt_lines)).replace("\\n", "\n") + def _set_msgctxt(self, ctxt): + self.msgctxt_lines = [ctxt] + msgctxt = property(_get_msgctxt, _set_msgctxt) + + def _get_msgid(self): + return ("".join(self.msgid_lines)).replace("\\n", "\n") + def _set_msgid(self, msgid): + self.msgid_lines = [msgid] + msgid = property(_get_msgid, _set_msgid) + + def _get_msgstr(self): + return ("".join(self.msgstr_lines)).replace("\\n", "\n") + def _set_msgstr(self, msgstr): + self.msgstr_lines = [msgstr] + msgstr = property(_get_msgstr, _set_msgstr) + + def _get_sources(self): + lstrip1 = len(PO_COMMENT_PREFIX_SOURCE) + lstrip2 = len(PO_COMMENT_PREFIX_SOURCE_CUSTOM) + return ([l[lstrip1:] for l in self.comment_lines if l.startswith(PO_COMMENT_PREFIX_SOURCE)] + + [l[lstrip2:] for l in self.comment_lines if l.startswith(PO_COMMENT_PREFIX_SOURCE_CUSTOM)]) + def _set_sources(self, sources): + # list.copy() is not available in py3.2 ... + cmmlines = [] + cmmlines[:] = self.comment_lines + for l in cmmlines: + if l.startswith(PO_COMMENT_PREFIX_SOURCE) or l.startswith(PO_COMMENT_PREFIX_SOURCE_CUSTOM): + self.comment_lines.remove(l) + lines_src = [] + lines_src_custom = [] + for src in sources: + if is_valid_po_path(src): + lines_src.append(PO_COMMENT_PREFIX_SOURCE + src) + else: + lines_src_custom.append(PO_COMMENT_PREFIX_SOURCE_CUSTOM + src) + self.comment_lines += lines_src_custom + lines_src + sources = property(_get_sources, _set_sources) + + def _get_is_tooltip(self): + # XXX For now, we assume that all messages > 30 chars are tooltips! + return len(self.msgid) > 30 + is_tooltip = property(_get_is_tooltip) + + def normalize(self, max_len=80): + """ + Normalize this message, call this before exporting it... + Currently normalize msgctxt, msgid and msgstr lines to given max_len (if below 1, make them single line). + """ + max_len -= 2 # The two quotes! + # We do not need the full power of textwrap... We just split first at escaped new lines, then into each line + # if needed... No word splitting, nor fancy spaces handling! + def _wrap(text, max_len, init_len): + if len(text) + init_len < max_len: + return [text] + lines = text.splitlines() + ret = [] + for l in lines: + tmp = [] + cur_len = 0 + words = l.split(' ') + for w in words: + cur_len += len(w) + 1 + if cur_len > (max_len - 1) and tmp: + ret.append(" ".join(tmp) + " ") + del tmp[:] + cur_len = len(w) + 1 + tmp.append(w) + if tmp: + ret.append(" ".join(tmp)) + return ret + if max_len < 1: + self.msgctxt_lines = self.msgctxt.replace("\n", "\\n\n").splitlines() + self.msgid_lines = self.msgid.replace("\n", "\\n\n").splitlines() + self.msgstr_lines = self.msgstr.replace("\n", "\\n\n").splitlines() + else: + init_len = len(PO_MSGCTXT) + 1 + if self.is_commented: + init_len += len(PO_COMMENT_PREFIX_MSG) + self.msgctxt_lines = _wrap(self.msgctxt.replace("\n", "\\n\n"), max_len, init_len) + init_len = len(PO_MSGID) + 1 + if self.is_commented: + init_len += len(PO_COMMENT_PREFIX_MSG) + self.msgid_lines = _wrap(self.msgid.replace("\n", "\\n\n"), max_len, init_len) -def new_messages(): - return getattr(collections, 'OrderedDict', dict)() + init_len = len(PO_MSGSTR) + 1 + if self.is_commented: + init_len += len(PO_COMMENT_PREFIX_MSG) + self.msgstr_lines = _wrap(self.msgstr.replace("\n", "\\n\n"), max_len, init_len) -def parse_messages(fname): +class I18nMessages: """ - Returns a tupple (messages, states, stats). - messages is an odereddict of dicts - {(ctxt, msgid): {msgid_lines:, msgstr_lines:, - comment_lines:, msgctxt_lines:}}. - states is a dict of three sets of (msgid, ctxt), and a boolean flag - indicating the .po is somewhat broken - {trans_msg:, fuzzy_msg:, comm_msg:, is_broken:}. - stats is a dict of values - {tot_msg:, trans_msg:, tot_ttips:, trans_ttips:, comm_msg:, - nbr_signs:, nbr_trans_signs:, contexts: set()}. - Note: This function will silently "arrange" mis-formated entries, thus - using afterward write_messages() should always produce a po-valid file, - though not correct! + Internal representation of messages for one language (iso code), with additional stats info. """ - tot_messages = 0 - tot_tooltips = 0 - trans_messages = 0 - trans_tooltips = 0 - comm_messages = 0 - nbr_signs = 0 - nbr_trans_signs = 0 - contexts = set() - reading_msgid = False - reading_msgstr = False - reading_msgctxt = False - reading_comment = False - is_translated = False - is_fuzzy = False - is_commented = False - is_broken = False - msgid_lines = [] - msgstr_lines = [] - msgctxt_lines = [] - comment_lines = [] - - messages = new_messages() - translated_messages = set() - fuzzy_messages = set() - commented_messages = set() - - def clean_vars(): - nonlocal reading_msgid, reading_msgstr, reading_msgctxt, \ - reading_comment, is_fuzzy, is_translated, is_commented, \ - msgid_lines, msgstr_lines, msgctxt_lines, comment_lines - reading_msgid = reading_msgstr = reading_msgctxt = \ - reading_comment = False - is_tooltip = is_fuzzy = is_translated = is_commented = False + + # Avoid parsing again! + # Keys should be (pseudo) file-names, values are tuples (hash, I18nMessages) + # Note: only used by po parser currently! + _parser_cache = {} + + def __init__(self, iso="__POT__", kind=None, key=None, src=None): + self.iso = iso + self.msgs = self._new_messages() + self.trans_msgs = set() + self.fuzzy_msgs = set() + self.comm_msgs = set() + self.ttip_msgs = set() + self.contexts = set() + self.nbr_msgs = 0 + self.nbr_trans_msgs = 0 + self.nbr_ttips = 0 + self.nbr_trans_ttips = 0 + self.nbr_comm_msgs = 0 + self.nbr_signs = 0 + self.nbr_trans_signs = 0 + self.parsing_errors = [] + if kind and src: + self.parse(kind, key, src) + self.update_info() + + @staticmethod + def _new_messages(): + return getattr(collections, 'OrderedDict', dict)() + + @classmethod + def gen_empty_messages(cls, iso, blender_ver, blender_rev, time, year, default_copyright=True): + """Generate an empty I18nMessages object (only header is present!).""" + msgstr = PO_HEADER_MSGSTR.format(blender_ver=str(blender_ver), blender_rev=int(blender_rev), + time=str(time), iso=str(iso)) + comment = "" + if default_copyright: + comment = PO_HEADER_COMMENT_COPYRIGHT.format(year=str(year)) + comment = comment + PO_HEADER_COMMENT + + msgs = cls(iso=iso) + msgs.msgs[PO_HEADER_KEY] = I18nMessage([], [""], [msgstr], [comment], False, True) + msgs.update_info() + + return msgs + + def normalize(self, max_len=80): + for msg in self.msgs.values(): + msg.normalize(max_len) + + def merge(self, replace=False, *args): + pass + + def update(self, ref, use_similar=0.8, keep_old_commented=True): + """ + Update this I18nMessage with the ref one. Translations from ref are never used. Source comments from ref + completely replace current ones. If use_similar is not 0.0, it will try to match new messages in ref with an + existing one. Messages no more found in ref will be marked as commented if keep_old_commented is True, + or removed. + """ + import difflib + similar_pool = {} + if use_similar > 0.0: + for key in self.msgs: + similar_pool.setdefault(key[1], set()).add(key) + + msgs = self._new_messages() + for (key, msg) in ref.msgs.items(): + if key in self.msgs: + msgs[key] = self.msgs[key] + msgs[key].sources = msg.sources + else: + skey = None + if use_similar > 0.0: + # try to find some close key in existing messages... + tmp = difflib.get_close_matches(key[1], similar_pool, n=1, cutoff=use_similar) + if tmp: + tmp = tmp[0] + # Try to get the same context, else just get one... + skey = (key[0], tmp) + if skey not in similar_pool[tmp]: + skey = tuple(similar_pool[tmp])[0] + msgs[key] = msg + if skey: + msgs[key].msgstr = self.msgs[skey].msgstr + msgs[key].is_fuzzy = True + # Add back all "old" and already commented messages as commented ones, if required. + if keep_old_commented: + for key, msg in self.msgs.items(): + if key not in msgs: + msgs[key] = msg + msgs[key].is_commented = True + # And finalize the update! + self.msgs = msgs + + def update_info(self): + self.trans_msgs.clear() + self.fuzzy_msgs.clear() + self.comm_msgs.clear() + self.ttip_msgs.clear() + self.contexts.clear() + self.nbr_signs = 0 + self.nbr_trans_signs = 0 + for key, msg in self.msgs.items(): + if key == PO_HEADER_KEY: + continue + if msg.is_commented: + self.comm_msgs.add(key) + else: + if msg.msgstr: + self.trans_msgs.add(key) + if msg.is_fuzzy: + self.fuzzy_msgs.add(key) + if msg.is_tooltip: + self.ttip_msgs.add(key) + self.contexts.add(key[0]) + self.nbr_signs += len(msg.msgid) + self.nbr_trans_signs += len(msg.msgstr) + self.nbr_msgs = len(self.msgs) + self.nbr_trans_msgs = len(self.trans_msgs) + self.nbr_ttips = len(self.ttip_msgs) + self.nbr_trans_ttips = len(self.ttip_msgs & self.trans_msgs) + self.nbr_comm_msgs = len(self.comm_msgs) + + def print_stats(self, prefix=""): + """ + Print out some stats about an I18nMessages object. + """ + lvl = 0.0 + lvl_ttips = 0.0 + lvl_comm = 0.0 + lvl_trans_ttips = 0.0 + lvl_ttips_in_trans = 0.0 + if self.nbr_msgs > 0: + lvl = float(self.nbr_trans_msgs) / float(self.nbr_msgs) + lvl_ttips = float(self.nbr_ttips) / float(self.nbr_msgs) + lvl_comm = float(self.nbr_comm_msgs) / float(self.nbr_msgs + self.nbr_comm_msgs) + if self.nbr_ttips > 0: + lvl_trans_ttips = float(self.nbr_trans_ttips) / float(self.nbr_ttips) + if self.nbr_trans_msgs > 0: + lvl_ttips_in_trans = float(self.nbr_trans_ttips) / float(self.nbr_trans_msgs) + + lines = ("", + "{:>6.1%} done! ({} translated messages over {}).\n" + "".format(lvl, self.nbr_trans_msgs, self.nbr_msgs), + "{:>6.1%} of messages are tooltips ({} over {}).\n" + "".format(lvl_ttips, self.nbr_ttips, self.nbr_msgs), + "{:>6.1%} of tooltips are translated ({} over {}).\n" + "".format(lvl_trans_ttips, self.nbr_trans_ttips, self.nbr_ttips), + "{:>6.1%} of translated messages are tooltips ({} over {}).\n" + "".format(lvl_ttips_in_trans, self.nbr_trans_ttips, self.nbr_trans_msgs), + "{:>6.1%} of messages are commented ({} over {}).\n" + "".format(lvl_comm, self.nbr_comm_msgs, self.nbr_comm_msgs + self.nbr_msgs), + "This translation is currently made of {} signs.\n".format(self.nbr_trans_signs)) + print(prefix.join(lines)) + + def parse(self, kind, key, src): + del self.parsing_errors[:] + self.parsers[kind](self, src, key) + if self.parsing_errors: + print("WARNING! Errors while parsing {}:".format(key)) + for line, error in self.parsing_errors: + print(" Around line {}: {}".format(line, error)) + print("The parser solved them as well as it could...") + self.update_info() + + def parse_messages_from_po(self, src, key=None): + """ + Parse a po file. + Note: This function will silently "arrange" mis-formated entries, thus using afterward write_messages() should + always produce a po-valid file, though not correct! + """ + reading_msgid = False + reading_msgstr = False + reading_msgctxt = False + reading_comment = False + is_commented = False + is_fuzzy = False + msgctxt_lines = [] msgid_lines = [] msgstr_lines = [] - msgctxt_lines = [] comment_lines = [] - def finalize_message(): - nonlocal reading_msgid, reading_msgstr, reading_msgctxt, \ - reading_comment, is_fuzzy, is_translated, is_commented, \ - msgid_lines, msgstr_lines, msgctxt_lines, comment_lines, \ - messages, translated_messages, fuzzy_messages, \ - commented_messages, \ - tot_messages, tot_tooltips, trans_messages, trans_tooltips, \ - comm_messages, nbr_signs, nbr_trans_signs, contexts - - msgid = "".join(msgid_lines) - msgctxt = "".join(msgctxt_lines) - msgkey = (msgctxt, msgid) - is_ttip = is_tooltip(msgid) - - # Never allow overriding existing msgid/msgctxt pairs! - if msgkey in messages: - clean_vars() - return - - nbr_signs += len(msgid) - if is_commented: - commented_messages.add(msgkey) - elif is_fuzzy: - fuzzy_messages.add(msgkey) - elif is_translated: - translated_messages.add(msgkey) - nbr_trans_signs += len("".join(msgstr_lines)) - messages[msgkey] = {"msgid_lines" : msgid_lines, - "msgstr_lines" : msgstr_lines, - "comment_lines": comment_lines, - "msgctxt_lines": msgctxt_lines} - - if is_commented: - comm_messages += 1 - else: - tot_messages += 1 - if is_ttip: - tot_tooltips += 1 - if not is_fuzzy and is_translated: - trans_messages += 1 - if is_ttip: - trans_tooltips += 1 - if msgctxt not in contexts: - contexts.add(msgctxt) - - clean_vars() - - with open(fname, 'r', encoding="utf-8") as f: - for line_nr, line in enumerate(f): - line = stripeol(line) + # Helper function + def finalize_message(self, line_nr): + nonlocal reading_msgid, reading_msgstr, reading_msgctxt, reading_comment + nonlocal is_commented, is_fuzzy, msgid_lines, msgstr_lines, msgctxt_lines, comment_lines + + msgid = "".join(msgid_lines) + msgctxt = "".join(msgctxt_lines) + msgkey = (msgctxt, msgid) + + # Never allow overriding existing msgid/msgctxt pairs! + if msgkey in self.msgs: + self.parsing_errors.append((line_nr, "{} context/msgid is already in current messages!".format(msgkey))) + return + + self.msgs[msgkey] = I18nMessage(msgctxt_lines, msgid_lines, msgstr_lines, comment_lines, + is_commented, is_fuzzy) + + # Let's clean up and get ready for next message! + reading_msgid = reading_msgstr = reading_msgctxt = reading_comment = False + is_commented = is_fuzzy = False + msgctxt_lines = [] + msgid_lines = [] + msgstr_lines = [] + comment_lines = [] + + # try to use src as file name... + if os.path.exists(src): + if not key: + key = src + with open(src, 'r', encoding="utf-8") as f: + src = f.read() + + # Try to use values from cache! + curr_hash = None + if key and key in self._parser_cache: + old_hash, msgs = self._parser_cache[key] + import hashlib + curr_hash = hashlib.new(PARSER_CACHE_HASH, src.encode()).digest() + if curr_hash == old_hash: + self.msgs = copy.deepcopy(msgs) # we might edit self.msgs! + return + + _comm_msgctxt = PO_COMMENT_PREFIX_MSG + PO_MSGCTXT + _len_msgctxt = len(PO_MSGCTXT + '"') + _len_comm_msgctxt = len(_comm_msgctxt + '"') + _comm_msgid = PO_COMMENT_PREFIX_MSG + PO_MSGID + _len_msgid = len(PO_MSGID + '"') + _len_comm_msgid = len(_comm_msgid + '"') + _comm_msgstr = PO_COMMENT_PREFIX_MSG + PO_MSGSTR + _len_msgstr = len(PO_MSGSTR + '"') + _len_comm_msgstr = len(_comm_msgstr + '"') + _len_comm_str = len(PO_COMMENT_PREFIX_MSG + '"') + + # Main loop over all lines in src... + for line_nr, line in enumerate(src.splitlines()): if line == "": - finalize_message() + finalize_message(self, line_nr) - elif line.startswith("msgctxt") or \ - line.startswith("".join((COMMENT_PREFIX, "msgctxt"))): + elif line.startswith(PO_MSGCTXT) or line.startswith(_comm_msgctxt): reading_comment = False reading_ctxt = True - if line.startswith(COMMENT_PREFIX): + if line.startswith(PO_COMMENT_PREFIX_MSG): is_commented = True - line = line[9 + len(COMMENT_PREFIX):-1] + line = line[_len_comm_msgctxt:-1] else: - line = line[9:-1] + line = line[_len_msgctxt:-1] msgctxt_lines.append(line) - elif line.startswith("msgid") or \ - line.startswith("".join((COMMENT_PREFIX, "msgid"))): + elif line.startswith(PO_MSGID) or line.startswith(_comm_msgid): reading_comment = False reading_msgid = True - if line.startswith(COMMENT_PREFIX): + if line.startswith(PO_COMMENT_PREFIX_MSG): + if not is_commented and reading_ctxt: + self.parsing_errors.append((line_nr, "commented msgid following regular msgctxt")) is_commented = True - line = line[7 + len(COMMENT_PREFIX):-1] + line = line[_len_comm_msgid:-1] else: - line = line[7:-1] + line = line[_len_msgid:-1] + reading_ctxt = False msgid_lines.append(line) - elif line.startswith("msgstr") or \ - line.startswith("".join((COMMENT_PREFIX, "msgstr"))): + elif line.startswith(PO_MSGSTR) or line.startswith(_comm_msgstr): if not reading_msgid: - is_broken = True + self.parsing_errors.append((line_nr, "msgstr without a prior msgid")) else: reading_msgid = False reading_msgstr = True - if line.startswith(COMMENT_PREFIX): - line = line[8 + len(COMMENT_PREFIX):-1] + if line.startswith(PO_COMMENT_PREFIX_MSG): + line = line[_len_comm_msgstr:-1] if not is_commented: - is_broken = True + self.parsing_errors.append((line_nr, "commented msgstr following regular msgid")) else: - line = line[8:-1] + line = line[_len_msgstr:-1] if is_commented: - is_broken = True + self.parsing_errors.append((line_nr, "regular msgstr following commented msgid")) msgstr_lines.append(line) - if line: - is_translated = True - elif line.startswith("#"): - if reading_msgid: - if is_commented: - msgid_lines.append(line[1 + len(COMMENT_PREFIX):-1]) - else: - msgid_lines.append(line) - is_broken = True - elif reading_msgstr: - if is_commented: - msgstr_lines.append(line[1 + len(COMMENT_PREFIX):-1]) - else: - msgstr_lines.append(line) - is_broken = True + elif line.startswith(PO_COMMENT_PREFIX[0]): + if line.startswith(PO_COMMENT_PREFIX_MSG): + if reading_msgctxt: + if is_commented: + msgctxt_lines.append(line[_len_comm_str:-1]) + else: + msgctxt_lines.append(line) + self.parsing_errors.append((line_nr, "commented string while reading regular msgctxt")) + elif reading_msgid: + if is_commented: + msgid_lines.append(line[_len_comm_str:-1]) + else: + msgid_lines.append(line) + self.parsing_errors.append((line_nr, "commented string while reading regular msgid")) + elif reading_msgstr: + if is_commented: + msgstr_lines.append(line[_len_comm_str:-1]) + else: + msgstr_lines.append(line) + self.parsing_errors.append((line_nr, "commented string while reading regular msgstr")) else: - if line.startswith("#, fuzzy"): + if reading_msgctxt or reading_msgid or reading_msgstr: + self.parsing_errors.append((line_nr, + "commented string within msgctxt, msgid or msgstr scope, ignored")) + elif line.startswith(PO_COMMENT_FUZZY): is_fuzzy = True else: comment_lines.append(line) reading_comment = True else: - if reading_msgid: + if reading_msgctxt: + msgctxt_lines.append(line[1:-1]) + elif reading_msgid: msgid_lines.append(line[1:-1]) elif reading_msgstr: line = line[1:-1] msgstr_lines.append(line) - if not is_translated and line: - is_translated = True else: - is_broken = True + self.parsing_errors.append((line_nr, "regular string outside msgctxt, msgid or msgstr scope")) + print(line) # If no final empty line, last message is not finalized! if reading_msgstr: - finalize_message() - - return (messages, - {"trans_msg": translated_messages, - "fuzzy_msg": fuzzy_messages, - "comm_msg" : commented_messages, - "is_broken": is_broken}, - {"tot_msg" : tot_messages, - "trans_msg" : trans_messages, - "tot_ttips" : tot_tooltips, - "trans_ttips" : trans_tooltips, - "comm_msg" : comm_messages, - "nbr_signs" : nbr_signs, - "nbr_trans_signs": nbr_trans_signs, - "contexts" : contexts}) - - -def write_messages(fname, messages, commented, fuzzy): - "Write in fname file the content of messages (similar to parse_messages " \ - "returned values). commented and fuzzy are two sets containing msgid. " \ - "Returns the number of written messages." - num = 0 - with open(fname, 'w', encoding="utf-8") as f: - for msgkey, val in messages.items(): - msgctxt, msgid = msgkey - f.write("\n".join(val["comment_lines"])) - # Only mark as fuzzy if msgstr is not empty! - if msgkey in fuzzy and "".join(val["msgstr_lines"]): - f.write("\n#, fuzzy") - if msgkey in commented: - if msgctxt: - f.write("\n{}msgctxt \"".format(COMMENT_PREFIX)) - f.write("\"\n{}\"".format(COMMENT_PREFIX).join( - val["msgctxt_lines"])) - f.write("\"") - f.write("\n{}msgid \"".format(COMMENT_PREFIX)) - f.write("\"\n{}\"".format(COMMENT_PREFIX).join( - val["msgid_lines"])) - f.write("\"\n{}msgstr \"".format(COMMENT_PREFIX)) - f.write("\"\n{}\"".format(COMMENT_PREFIX).join( - val["msgstr_lines"])) - f.write("\"\n\n") - else: - if msgctxt: - f.write("\nmsgctxt \"") - f.write("\"\n\"".join(val["msgctxt_lines"])) - f.write("\"") - f.write("\nmsgid \"") - f.write("\"\n\"".join(val["msgid_lines"])) - f.write("\"\nmsgstr \"") - f.write("\"\n\"".join(val["msgstr_lines"])) - f.write("\"\n\n") - num += 1 - return num - - -def gen_empty_messages(blender_rev, time_str, year_str): - """Generate an empty messages & state data (only header if present!).""" - header_key = ("", "") - - messages = new_messages() - messages[header_key] = { - "msgid_lines": [""], - "msgctxt_lines": [], - "msgstr_lines": [ - "Project-Id-Version: Blender r{}\\n" - "".format(blender_rev), - "Report-Msgid-Bugs-To: \\n", - "POT-Creation-Date: {}\\n" - "".format(time_str), - "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n", - "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n", - "Language-Team: LANGUAGE <LL@li.org>\\n", - "Language: \\n", - "MIME-Version: 1.0\\n", - "Content-Type: text/plain; charset=UTF-8\\n", - "Content-Transfer-Encoding: 8bit\\n" - ], - "comment_lines": [ - "# Blender's translation file (po format).", - "# Copyright (C) {} The Blender Foundation." - "".format(year_str), - "# This file is distributed under the same " - "# license as the Blender package.", - "# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.", - "#", - ], - } + finalize_message(self, line_nr) + + if key: + if not curr_hash: + import hashlib + curr_hash = hashlib.new(PARSER_CACHE_HASH, src.encode()).digest() + self._parser_cache[key] = (curr_hash, self.msgs) + + def write(self, kind, dest): + self.writers[kind](self, dest) + + def write_messages_to_po(self, fname): + """ + Write messages in fname po file. + """ + self.normalize(max_len=0) # No wrapping for now... + with open(fname, 'w', encoding="utf-8") as f: + for msg in self.msgs.values(): + f.write("\n".join(msg.comment_lines)) + # Only mark as fuzzy if msgstr is not empty! + if msg.is_fuzzy and msg.msgstr: + f.write("\n" + PO_COMMENT_FUZZY) + _p = PO_COMMENT_PREFIX_MSG if msg.is_commented else "" + _pmsgctxt = _p + PO_MSGCTXT + _pmsgid = _p + PO_MSGID + _pmsgstr = _p + PO_MSGSTR + chunks = [] + if msg.msgctxt: + if len(msg.msgctxt_lines) > 1: + chunks += [ + "\n" + _pmsgctxt + "\"\"\n" + _p + "\"", + ("\"\n" + _p + "\"").join(msg.msgctxt_lines), + "\"", + ] + else: + chunks += ["\n" + _pmsgctxt + "\"" + msg.msgctxt + "\""] + if len(msg.msgid_lines) > 1: + print(msg.msgid_lines) + chunks += [ + "\n" + _pmsgid + "\"\"\n" + _p + "\"", + ("\"\n" + _p + "\"").join(msg.msgid_lines), + "\"", + ] + else: + chunks += ["\n" + _pmsgid + "\"" + msg.msgid + "\""] + if len(msg.msgstr_lines) > 1: + chunks += [ + "\n" + _pmsgstr + "\"\"\n" + _p + "\"", + ("\"\n" + _p + "\"").join(msg.msgstr_lines), + "\"", + ] + else: + chunks += ["\n" + _pmsgstr + "\"" + msg.msgstr + "\""] + chunks += ["\n\n"] + f.write("".join(chunks)) - states = {"trans_msg": set(), - "fuzzy_msg": {header_key}, - "comm_msg": set(), - "is_broken": False} + parsers = { + "PO": parse_messages_from_po, +# "PYTUPLE": parse_messages_from_pytuple, + } - return messages, states + writers = { + "PO": write_messages_to_po, + #"PYDICT": write_messages_to_pydict, + } -def print_stats(stats, glob_stats=None, prefix=""): +class I18n: """ - Print out some stats about a po file. - glob_stats is for making global stats over several po's. + Internal representation of a whole translation set. """ - tot_msgs = stats["tot_msg"] - trans_msgs = stats["trans_msg"] - tot_ttips = stats["tot_ttips"] - trans_ttips = stats["trans_ttips"] - comm_msgs = stats["comm_msg"] - nbr_signs = stats["nbr_signs"] - nbr_trans_signs = stats["nbr_trans_signs"] - contexts = stats["contexts"] - lvl = lvl_ttips = lvl_trans_ttips = lvl_ttips_in_trans = lvl_comm = 0.0 - - if tot_msgs > 0: - lvl = float(trans_msgs) / float(tot_msgs) - lvl_ttips = float(tot_ttips) / float(tot_msgs) - lvl_comm = float(comm_msgs) / float(tot_msgs+comm_msgs) - if tot_ttips > 0: - lvl_trans_ttips = float(trans_ttips) / float(tot_ttips) - if trans_msgs > 0: - lvl_ttips_in_trans = float(trans_ttips) / float(trans_msgs) - - if glob_stats: - glob_stats["nbr"] += 1.0 - glob_stats["lvl"] += lvl - glob_stats["lvl_ttips"] += lvl_ttips - glob_stats["lvl_trans_ttips"] += lvl_trans_ttips - glob_stats["lvl_ttips_in_trans"] += lvl_ttips_in_trans - glob_stats["lvl_comm"] += lvl_comm - glob_stats["nbr_trans_signs"] += nbr_trans_signs - if glob_stats["nbr_signs"] == 0: - glob_stats["nbr_signs"] = nbr_signs - glob_stats["contexts"] |= contexts - - lines = ("", - "{:>6.1%} done! ({} translated messages over {}).\n" - "".format(lvl, trans_msgs, tot_msgs), - "{:>6.1%} of messages are tooltips ({} over {}).\n" - "".format(lvl_ttips, tot_ttips, tot_msgs), - "{:>6.1%} of tooltips are translated ({} over {}).\n" - "".format(lvl_trans_ttips, trans_ttips, tot_ttips), - "{:>6.1%} of translated messages are tooltips ({} over {}).\n" - "".format(lvl_ttips_in_trans, trans_ttips, trans_msgs), - "{:>6.1%} of messages are commented ({} over {}).\n" - "".format(lvl_comm, comm_msgs, comm_msgs + tot_msgs), - "This translation is currently made of {} signs.\n" - "".format(nbr_trans_signs)) - print(prefix.join(lines)) - return 0 + + def __init__(self, src): + self.trans = {} + self.update_info() + + def update_info(self): + self.nbr_trans = 0 + self.lvl = 0.0 + self.lvl_ttips = 0.0 + self.lvl_trans_ttips = 0.0 + self.lvl_ttips_in_trans = 0.0 + self.lvl_comm = 0.0 + self.nbr_signs = 0 + self.nbr_trans_signs = 0 + self.contexts = set() + + if TEMPLATE_ISO_ID in self.trans: + self.nbr_trans = len(self.trans) - 1 + self.nbr_signs = self.trans[TEMPLATE_ISO_ID].nbr_signs + else: + self.nbr_trans = len(self.trans) + for iso, msgs in self.trans.items(): + msgs.update_info() + if msgs.nbr_msgs > 0: + self.lvl += float(msgs.nbr_trans_msgs) / float(msgs.nbr_msgs) + self.lvl_ttips += float(msgs.nbr_ttips) / float(msgs.nbr_msgs) + self.lvl_comm += float(msgs.nbr_comm_msgs) / float(msgs.nbr_msgs + msgs.nbr_comm_msgs) + if msgs.nbr_ttips > 0: + self.lvl_trans_ttips = float(msgs.nbr_trans_ttips) / float(msgs.nbr_ttips) + if msgs.nbr_trans_msgs > 0: + self.lvl_ttips_in_trans = float(msgs.nbr_trans_ttips) / float(msgs.nbr_trans_msgs) + if self.nbr_signs == 0: + self.nbr_signs = msgs.nbr_signs + self.nbr_trans_signs += msgs.nbr_trans_signs + self.contexts |= msgs.contexts + + def print_stats(self, prefix="", print_msgs=True): + """ + Print out some stats about an I18n object. + If print_msgs is True, it will also print all its translations' stats. + """ + if print_msgs: + msgs_prefix = prefix + " " + for key, msgs in self.trans: + if key == TEMPLATE_ISO_ID: + continue + print(prefix + key + ":") + msgs.print_stats(prefix=msgs_prefix) + print(prefix) + + nbr_contexts = len(self.contexts - {CONTEXT_DEFAULT}) + if nbr_contexts != 1: + if nbr_contexts == 0: + nbr_contexts = "No" + _ctx_txt = "s are" + else: + _ctx_txt = " is" + lines = ("", + "Average stats for all {} translations:\n".format(self.nbr_trans), + " {:>6.1%} done!\n".format(self.lvl / self.nbr_trans), + " {:>6.1%} of messages are tooltips.\n".format(self.lvl_ttips / self.nbr_trans), + " {:>6.1%} of tooltips are translated.\n".format(self.lvl_trans_ttips / self.nbr_trans), + " {:>6.1%} of translated messages are tooltips.\n".format(self.lvl_ttips_in_trans / self.nbr_trans), + " {:>6.1%} of messages are commented.\n".format(self.lvl_comm / self.nbr_trans), + " The org msgids are currently made of {} signs.\n".format(self.nbr_signs), + " All processed translations are currently made of {} signs.\n".format(self.nbr_trans_signs), + " {} specific context{} present:\n {}\n" + "".format(self.nbr_contexts, _ctx_txt, "\n ".join(self.contexts - {CONTEXT_DEFAULT})), + "\n") + print(prefix.join(lines)) + + +##### Parsers ##### + +#def parse_messages_from_pytuple(self, src, key=None): + #""" + #Returns a dict of tuples similar to the one returned by parse_messages_from_po (one per language, plus a 'pot' + #one keyed as '__POT__'). + #""" + ## src may be either a string to be interpreted as py code, or a real tuple! + #if isinstance(src, str): + #src = eval(src) +# + #curr_hash = None + #if key and key in _parser_cache: + #old_hash, ret = _parser_cache[key] + #import hashlib + #curr_hash = hashlib.new(PARSER_CACHE_HASH, str(src).encode()).digest() + #if curr_hash == old_hash: + #return ret +# + #pot = new_messages() + #states = gen_states() + #stats = gen_stats() + #ret = {"__POT__": (pot, states, stats)} + #for msg in src: + #key = msg[0] + #messages[msgkey] = gen_message(msgid_lines, msgstr_lines, comment_lines, msgctxt_lines) + #pot[key] = gen_message(msgid_lines=[key[1]], msgstr_lines=[ + #for lang, trans, (is_fuzzy, comments) in msg[2:]: + #if trans and not is_fuzzy: + #i18n_dict.setdefault(lang, dict())[key] = trans +# + #if key: + #if not curr_hash: + #import hashlib + #curr_hash = hashlib.new(PARSER_CACHE_HASH, str(src).encode()).digest() + #_parser_cache[key] = (curr_hash, val) + #return ret
\ No newline at end of file |