Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'release/scripts/modules/bl_i18n_utils/utils.py')
-rw-r--r--release/scripts/modules/bl_i18n_utils/utils.py875
1 files changed, 589 insertions, 286 deletions
diff --git a/release/scripts/modules/bl_i18n_utils/utils.py b/release/scripts/modules/bl_i18n_utils/utils.py
index 9481f750092..ff5ed4f93d7 100644
--- a/release/scripts/modules/bl_i18n_utils/utils.py
+++ b/release/scripts/modules/bl_i18n_utils/utils.py
@@ -20,357 +20,660 @@
# Some misc utilities...
+import collections
+import copy
import os
+import re
import sys
-import collections
from bl_i18n_utils import settings
-COMMENT_PREFIX = settings.COMMENT_PREFIX
+PO_COMMENT_PREFIX = settings.PO_COMMENT_PREFIX
+PO_COMMENT_PREFIX_MSG = settings.PO_COMMENT_PREFIX_MSG
+PO_COMMENT_PREFIX_SOURCE = settings.PO_COMMENT_PREFIX_SOURCE
+PO_COMMENT_PREFIX_SOURCE_CUSTOM = settings.PO_COMMENT_PREFIX_SOURCE_CUSTOM
+PO_COMMENT_FUZZY = settings.PO_COMMENT_FUZZY
+PO_MSGCTXT = settings.PO_MSGCTXT
+PO_MSGID = settings.PO_MSGID
+PO_MSGSTR = settings.PO_MSGSTR
+
+PO_HEADER_KEY = settings.PO_HEADER_KEY
+PO_HEADER_COMMENT = settings.PO_HEADER_COMMENT
+PO_HEADER_COMMENT_COPYRIGHT = settings.PO_HEADER_COMMENT_COPYRIGHT
+PO_HEADER_MSGSTR = settings.PO_HEADER_MSGSTR
+
+PARSER_CACHE_HASH = settings.PARSER_CACHE_HASH
+
WARN_NC = settings.WARN_MSGID_NOT_CAPITALIZED
NC_ALLOWED = settings.WARN_MSGID_NOT_CAPITALIZED_ALLOWED
+PARSER_CACHE_HASH = settings.PARSER_CACHE_HASH
+##### Misc Utils #####
+
def stripeol(s):
return s.rstrip("\n\r")
-# XXX For now, we assume that all messages > 30 chars are tooltips!
-def is_tooltip(msgid):
- return len(msgid) > 30
+_valid_po_path_re = re.compile(r"^\S+:[0-9]+$")
+def is_valid_po_path(path):
+ return bool(_valid_po_path_re.match(path))
+
+
+class I18nMessage:
+ """
+ Internal representation of a message.
+ """
+ __slots__ = ("msgctxt_lines", "msgid_lines", "msgstr_lines", "comment_lines", "is_fuzzy", "is_commented")
+
+ def __init__(self, msgctxt_lines=[], msgid_lines=[], msgstr_lines=[], comment_lines=[],
+ is_commented=False, is_fuzzy=False):
+ self.msgctxt_lines = msgctxt_lines
+ self.msgid_lines = msgid_lines
+ self.msgstr_lines = msgstr_lines
+ self.comment_lines = comment_lines
+ self.is_fuzzy = is_fuzzy
+ self.is_commented = is_commented
+
+ def _get_msgctxt(self):
+ return ("".join(self.msgctxt_lines)).replace("\\n", "\n")
+ def _set_msgctxt(self, ctxt):
+ self.msgctxt_lines = [ctxt]
+ msgctxt = property(_get_msgctxt, _set_msgctxt)
+
+ def _get_msgid(self):
+ return ("".join(self.msgid_lines)).replace("\\n", "\n")
+ def _set_msgid(self, msgid):
+ self.msgid_lines = [msgid]
+ msgid = property(_get_msgid, _set_msgid)
+
+ def _get_msgstr(self):
+ return ("".join(self.msgstr_lines)).replace("\\n", "\n")
+ def _set_msgstr(self, msgstr):
+ self.msgstr_lines = [msgstr]
+ msgstr = property(_get_msgstr, _set_msgstr)
+
+ def _get_sources(self):
+ lstrip1 = len(PO_COMMENT_PREFIX_SOURCE)
+ lstrip2 = len(PO_COMMENT_PREFIX_SOURCE_CUSTOM)
+ return ([l[lstrip1:] for l in self.comment_lines if l.startswith(PO_COMMENT_PREFIX_SOURCE)] +
+ [l[lstrip2:] for l in self.comment_lines if l.startswith(PO_COMMENT_PREFIX_SOURCE_CUSTOM)])
+ def _set_sources(self, sources):
+ # list.copy() is not available in py3.2 ...
+ cmmlines = []
+ cmmlines[:] = self.comment_lines
+ for l in cmmlines:
+ if l.startswith(PO_COMMENT_PREFIX_SOURCE) or l.startswith(PO_COMMENT_PREFIX_SOURCE_CUSTOM):
+ self.comment_lines.remove(l)
+ lines_src = []
+ lines_src_custom = []
+ for src in sources:
+ if is_valid_po_path(src):
+ lines_src.append(PO_COMMENT_PREFIX_SOURCE + src)
+ else:
+ lines_src_custom.append(PO_COMMENT_PREFIX_SOURCE_CUSTOM + src)
+ self.comment_lines += lines_src_custom + lines_src
+ sources = property(_get_sources, _set_sources)
+
+ def _get_is_tooltip(self):
+ # XXX For now, we assume that all messages > 30 chars are tooltips!
+ return len(self.msgid) > 30
+ is_tooltip = property(_get_is_tooltip)
+
+ def normalize(self, max_len=80):
+ """
+ Normalize this message, call this before exporting it...
+ Currently normalize msgctxt, msgid and msgstr lines to given max_len (if below 1, make them single line).
+ """
+ max_len -= 2 # The two quotes!
+ # We do not need the full power of textwrap... We just split first at escaped new lines, then into each line
+ # if needed... No word splitting, nor fancy spaces handling!
+ def _wrap(text, max_len, init_len):
+ if len(text) + init_len < max_len:
+ return [text]
+ lines = text.splitlines()
+ ret = []
+ for l in lines:
+ tmp = []
+ cur_len = 0
+ words = l.split(' ')
+ for w in words:
+ cur_len += len(w) + 1
+ if cur_len > (max_len - 1) and tmp:
+ ret.append(" ".join(tmp) + " ")
+ del tmp[:]
+ cur_len = len(w) + 1
+ tmp.append(w)
+ if tmp:
+ ret.append(" ".join(tmp))
+ return ret
+ if max_len < 1:
+ self.msgctxt_lines = self.msgctxt.replace("\n", "\\n\n").splitlines()
+ self.msgid_lines = self.msgid.replace("\n", "\\n\n").splitlines()
+ self.msgstr_lines = self.msgstr.replace("\n", "\\n\n").splitlines()
+ else:
+ init_len = len(PO_MSGCTXT) + 1
+ if self.is_commented:
+ init_len += len(PO_COMMENT_PREFIX_MSG)
+ self.msgctxt_lines = _wrap(self.msgctxt.replace("\n", "\\n\n"), max_len, init_len)
+ init_len = len(PO_MSGID) + 1
+ if self.is_commented:
+ init_len += len(PO_COMMENT_PREFIX_MSG)
+ self.msgid_lines = _wrap(self.msgid.replace("\n", "\\n\n"), max_len, init_len)
-def new_messages():
- return getattr(collections, 'OrderedDict', dict)()
+ init_len = len(PO_MSGSTR) + 1
+ if self.is_commented:
+ init_len += len(PO_COMMENT_PREFIX_MSG)
+ self.msgstr_lines = _wrap(self.msgstr.replace("\n", "\\n\n"), max_len, init_len)
-def parse_messages(fname):
+class I18nMessages:
"""
- Returns a tupple (messages, states, stats).
- messages is an odereddict of dicts
- {(ctxt, msgid): {msgid_lines:, msgstr_lines:,
- comment_lines:, msgctxt_lines:}}.
- states is a dict of three sets of (msgid, ctxt), and a boolean flag
- indicating the .po is somewhat broken
- {trans_msg:, fuzzy_msg:, comm_msg:, is_broken:}.
- stats is a dict of values
- {tot_msg:, trans_msg:, tot_ttips:, trans_ttips:, comm_msg:,
- nbr_signs:, nbr_trans_signs:, contexts: set()}.
- Note: This function will silently "arrange" mis-formated entries, thus
- using afterward write_messages() should always produce a po-valid file,
- though not correct!
+ Internal representation of messages for one language (iso code), with additional stats info.
"""
- tot_messages = 0
- tot_tooltips = 0
- trans_messages = 0
- trans_tooltips = 0
- comm_messages = 0
- nbr_signs = 0
- nbr_trans_signs = 0
- contexts = set()
- reading_msgid = False
- reading_msgstr = False
- reading_msgctxt = False
- reading_comment = False
- is_translated = False
- is_fuzzy = False
- is_commented = False
- is_broken = False
- msgid_lines = []
- msgstr_lines = []
- msgctxt_lines = []
- comment_lines = []
-
- messages = new_messages()
- translated_messages = set()
- fuzzy_messages = set()
- commented_messages = set()
-
- def clean_vars():
- nonlocal reading_msgid, reading_msgstr, reading_msgctxt, \
- reading_comment, is_fuzzy, is_translated, is_commented, \
- msgid_lines, msgstr_lines, msgctxt_lines, comment_lines
- reading_msgid = reading_msgstr = reading_msgctxt = \
- reading_comment = False
- is_tooltip = is_fuzzy = is_translated = is_commented = False
+
+ # Avoid parsing again!
+ # Keys should be (pseudo) file-names, values are tuples (hash, I18nMessages)
+ # Note: only used by po parser currently!
+ _parser_cache = {}
+
+ def __init__(self, iso="__POT__", kind=None, key=None, src=None):
+ self.iso = iso
+ self.msgs = self._new_messages()
+ self.trans_msgs = set()
+ self.fuzzy_msgs = set()
+ self.comm_msgs = set()
+ self.ttip_msgs = set()
+ self.contexts = set()
+ self.nbr_msgs = 0
+ self.nbr_trans_msgs = 0
+ self.nbr_ttips = 0
+ self.nbr_trans_ttips = 0
+ self.nbr_comm_msgs = 0
+ self.nbr_signs = 0
+ self.nbr_trans_signs = 0
+ self.parsing_errors = []
+ if kind and src:
+ self.parse(kind, key, src)
+ self.update_info()
+
+ @staticmethod
+ def _new_messages():
+ return getattr(collections, 'OrderedDict', dict)()
+
+ @classmethod
+ def gen_empty_messages(cls, iso, blender_ver, blender_rev, time, year, default_copyright=True):
+ """Generate an empty I18nMessages object (only header is present!)."""
+ msgstr = PO_HEADER_MSGSTR.format(blender_ver=str(blender_ver), blender_rev=int(blender_rev),
+ time=str(time), iso=str(iso))
+ comment = ""
+ if default_copyright:
+ comment = PO_HEADER_COMMENT_COPYRIGHT.format(year=str(year))
+ comment = comment + PO_HEADER_COMMENT
+
+ msgs = cls(iso=iso)
+ msgs.msgs[PO_HEADER_KEY] = I18nMessage([], [""], [msgstr], [comment], False, True)
+ msgs.update_info()
+
+ return msgs
+
+ def normalize(self, max_len=80):
+ for msg in self.msgs.values():
+ msg.normalize(max_len)
+
+ def merge(self, replace=False, *args):
+ pass
+
+ def update(self, ref, use_similar=0.8, keep_old_commented=True):
+ """
+ Update this I18nMessage with the ref one. Translations from ref are never used. Source comments from ref
+ completely replace current ones. If use_similar is not 0.0, it will try to match new messages in ref with an
+ existing one. Messages no more found in ref will be marked as commented if keep_old_commented is True,
+ or removed.
+ """
+ import difflib
+ similar_pool = {}
+ if use_similar > 0.0:
+ for key in self.msgs:
+ similar_pool.setdefault(key[1], set()).add(key)
+
+ msgs = self._new_messages()
+ for (key, msg) in ref.msgs.items():
+ if key in self.msgs:
+ msgs[key] = self.msgs[key]
+ msgs[key].sources = msg.sources
+ else:
+ skey = None
+ if use_similar > 0.0:
+ # try to find some close key in existing messages...
+ tmp = difflib.get_close_matches(key[1], similar_pool, n=1, cutoff=use_similar)
+ if tmp:
+ tmp = tmp[0]
+ # Try to get the same context, else just get one...
+ skey = (key[0], tmp)
+ if skey not in similar_pool[tmp]:
+ skey = tuple(similar_pool[tmp])[0]
+ msgs[key] = msg
+ if skey:
+ msgs[key].msgstr = self.msgs[skey].msgstr
+ msgs[key].is_fuzzy = True
+ # Add back all "old" and already commented messages as commented ones, if required.
+ if keep_old_commented:
+ for key, msg in self.msgs.items():
+ if key not in msgs:
+ msgs[key] = msg
+ msgs[key].is_commented = True
+ # And finalize the update!
+ self.msgs = msgs
+
+ def update_info(self):
+ self.trans_msgs.clear()
+ self.fuzzy_msgs.clear()
+ self.comm_msgs.clear()
+ self.ttip_msgs.clear()
+ self.contexts.clear()
+ self.nbr_signs = 0
+ self.nbr_trans_signs = 0
+ for key, msg in self.msgs.items():
+ if key == PO_HEADER_KEY:
+ continue
+ if msg.is_commented:
+ self.comm_msgs.add(key)
+ else:
+ if msg.msgstr:
+ self.trans_msgs.add(key)
+ if msg.is_fuzzy:
+ self.fuzzy_msgs.add(key)
+ if msg.is_tooltip:
+ self.ttip_msgs.add(key)
+ self.contexts.add(key[0])
+ self.nbr_signs += len(msg.msgid)
+ self.nbr_trans_signs += len(msg.msgstr)
+ self.nbr_msgs = len(self.msgs)
+ self.nbr_trans_msgs = len(self.trans_msgs)
+ self.nbr_ttips = len(self.ttip_msgs)
+ self.nbr_trans_ttips = len(self.ttip_msgs & self.trans_msgs)
+ self.nbr_comm_msgs = len(self.comm_msgs)
+
+ def print_stats(self, prefix=""):
+ """
+ Print out some stats about an I18nMessages object.
+ """
+ lvl = 0.0
+ lvl_ttips = 0.0
+ lvl_comm = 0.0
+ lvl_trans_ttips = 0.0
+ lvl_ttips_in_trans = 0.0
+ if self.nbr_msgs > 0:
+ lvl = float(self.nbr_trans_msgs) / float(self.nbr_msgs)
+ lvl_ttips = float(self.nbr_ttips) / float(self.nbr_msgs)
+ lvl_comm = float(self.nbr_comm_msgs) / float(self.nbr_msgs + self.nbr_comm_msgs)
+ if self.nbr_ttips > 0:
+ lvl_trans_ttips = float(self.nbr_trans_ttips) / float(self.nbr_ttips)
+ if self.nbr_trans_msgs > 0:
+ lvl_ttips_in_trans = float(self.nbr_trans_ttips) / float(self.nbr_trans_msgs)
+
+ lines = ("",
+ "{:>6.1%} done! ({} translated messages over {}).\n"
+ "".format(lvl, self.nbr_trans_msgs, self.nbr_msgs),
+ "{:>6.1%} of messages are tooltips ({} over {}).\n"
+ "".format(lvl_ttips, self.nbr_ttips, self.nbr_msgs),
+ "{:>6.1%} of tooltips are translated ({} over {}).\n"
+ "".format(lvl_trans_ttips, self.nbr_trans_ttips, self.nbr_ttips),
+ "{:>6.1%} of translated messages are tooltips ({} over {}).\n"
+ "".format(lvl_ttips_in_trans, self.nbr_trans_ttips, self.nbr_trans_msgs),
+ "{:>6.1%} of messages are commented ({} over {}).\n"
+ "".format(lvl_comm, self.nbr_comm_msgs, self.nbr_comm_msgs + self.nbr_msgs),
+ "This translation is currently made of {} signs.\n".format(self.nbr_trans_signs))
+ print(prefix.join(lines))
+
+ def parse(self, kind, key, src):
+ del self.parsing_errors[:]
+ self.parsers[kind](self, src, key)
+ if self.parsing_errors:
+ print("WARNING! Errors while parsing {}:".format(key))
+ for line, error in self.parsing_errors:
+ print(" Around line {}: {}".format(line, error))
+ print("The parser solved them as well as it could...")
+ self.update_info()
+
+ def parse_messages_from_po(self, src, key=None):
+ """
+ Parse a po file.
+ Note: This function will silently "arrange" mis-formated entries, thus using afterward write_messages() should
+ always produce a po-valid file, though not correct!
+ """
+ reading_msgid = False
+ reading_msgstr = False
+ reading_msgctxt = False
+ reading_comment = False
+ is_commented = False
+ is_fuzzy = False
+ msgctxt_lines = []
msgid_lines = []
msgstr_lines = []
- msgctxt_lines = []
comment_lines = []
- def finalize_message():
- nonlocal reading_msgid, reading_msgstr, reading_msgctxt, \
- reading_comment, is_fuzzy, is_translated, is_commented, \
- msgid_lines, msgstr_lines, msgctxt_lines, comment_lines, \
- messages, translated_messages, fuzzy_messages, \
- commented_messages, \
- tot_messages, tot_tooltips, trans_messages, trans_tooltips, \
- comm_messages, nbr_signs, nbr_trans_signs, contexts
-
- msgid = "".join(msgid_lines)
- msgctxt = "".join(msgctxt_lines)
- msgkey = (msgctxt, msgid)
- is_ttip = is_tooltip(msgid)
-
- # Never allow overriding existing msgid/msgctxt pairs!
- if msgkey in messages:
- clean_vars()
- return
-
- nbr_signs += len(msgid)
- if is_commented:
- commented_messages.add(msgkey)
- elif is_fuzzy:
- fuzzy_messages.add(msgkey)
- elif is_translated:
- translated_messages.add(msgkey)
- nbr_trans_signs += len("".join(msgstr_lines))
- messages[msgkey] = {"msgid_lines" : msgid_lines,
- "msgstr_lines" : msgstr_lines,
- "comment_lines": comment_lines,
- "msgctxt_lines": msgctxt_lines}
-
- if is_commented:
- comm_messages += 1
- else:
- tot_messages += 1
- if is_ttip:
- tot_tooltips += 1
- if not is_fuzzy and is_translated:
- trans_messages += 1
- if is_ttip:
- trans_tooltips += 1
- if msgctxt not in contexts:
- contexts.add(msgctxt)
-
- clean_vars()
-
- with open(fname, 'r', encoding="utf-8") as f:
- for line_nr, line in enumerate(f):
- line = stripeol(line)
+ # Helper function
+ def finalize_message(self, line_nr):
+ nonlocal reading_msgid, reading_msgstr, reading_msgctxt, reading_comment
+ nonlocal is_commented, is_fuzzy, msgid_lines, msgstr_lines, msgctxt_lines, comment_lines
+
+ msgid = "".join(msgid_lines)
+ msgctxt = "".join(msgctxt_lines)
+ msgkey = (msgctxt, msgid)
+
+ # Never allow overriding existing msgid/msgctxt pairs!
+ if msgkey in self.msgs:
+ self.parsing_errors.append((line_nr, "{} context/msgid is already in current messages!".format(msgkey)))
+ return
+
+ self.msgs[msgkey] = I18nMessage(msgctxt_lines, msgid_lines, msgstr_lines, comment_lines,
+ is_commented, is_fuzzy)
+
+ # Let's clean up and get ready for next message!
+ reading_msgid = reading_msgstr = reading_msgctxt = reading_comment = False
+ is_commented = is_fuzzy = False
+ msgctxt_lines = []
+ msgid_lines = []
+ msgstr_lines = []
+ comment_lines = []
+
+ # try to use src as file name...
+ if os.path.exists(src):
+ if not key:
+ key = src
+ with open(src, 'r', encoding="utf-8") as f:
+ src = f.read()
+
+ # Try to use values from cache!
+ curr_hash = None
+ if key and key in self._parser_cache:
+ old_hash, msgs = self._parser_cache[key]
+ import hashlib
+ curr_hash = hashlib.new(PARSER_CACHE_HASH, src.encode()).digest()
+ if curr_hash == old_hash:
+ self.msgs = copy.deepcopy(msgs) # we might edit self.msgs!
+ return
+
+ _comm_msgctxt = PO_COMMENT_PREFIX_MSG + PO_MSGCTXT
+ _len_msgctxt = len(PO_MSGCTXT + '"')
+ _len_comm_msgctxt = len(_comm_msgctxt + '"')
+ _comm_msgid = PO_COMMENT_PREFIX_MSG + PO_MSGID
+ _len_msgid = len(PO_MSGID + '"')
+ _len_comm_msgid = len(_comm_msgid + '"')
+ _comm_msgstr = PO_COMMENT_PREFIX_MSG + PO_MSGSTR
+ _len_msgstr = len(PO_MSGSTR + '"')
+ _len_comm_msgstr = len(_comm_msgstr + '"')
+ _len_comm_str = len(PO_COMMENT_PREFIX_MSG + '"')
+
+ # Main loop over all lines in src...
+ for line_nr, line in enumerate(src.splitlines()):
if line == "":
- finalize_message()
+ finalize_message(self, line_nr)
- elif line.startswith("msgctxt") or \
- line.startswith("".join((COMMENT_PREFIX, "msgctxt"))):
+ elif line.startswith(PO_MSGCTXT) or line.startswith(_comm_msgctxt):
reading_comment = False
reading_ctxt = True
- if line.startswith(COMMENT_PREFIX):
+ if line.startswith(PO_COMMENT_PREFIX_MSG):
is_commented = True
- line = line[9 + len(COMMENT_PREFIX):-1]
+ line = line[_len_comm_msgctxt:-1]
else:
- line = line[9:-1]
+ line = line[_len_msgctxt:-1]
msgctxt_lines.append(line)
- elif line.startswith("msgid") or \
- line.startswith("".join((COMMENT_PREFIX, "msgid"))):
+ elif line.startswith(PO_MSGID) or line.startswith(_comm_msgid):
reading_comment = False
reading_msgid = True
- if line.startswith(COMMENT_PREFIX):
+ if line.startswith(PO_COMMENT_PREFIX_MSG):
+ if not is_commented and reading_ctxt:
+ self.parsing_errors.append((line_nr, "commented msgid following regular msgctxt"))
is_commented = True
- line = line[7 + len(COMMENT_PREFIX):-1]
+ line = line[_len_comm_msgid:-1]
else:
- line = line[7:-1]
+ line = line[_len_msgid:-1]
+ reading_ctxt = False
msgid_lines.append(line)
- elif line.startswith("msgstr") or \
- line.startswith("".join((COMMENT_PREFIX, "msgstr"))):
+ elif line.startswith(PO_MSGSTR) or line.startswith(_comm_msgstr):
if not reading_msgid:
- is_broken = True
+ self.parsing_errors.append((line_nr, "msgstr without a prior msgid"))
else:
reading_msgid = False
reading_msgstr = True
- if line.startswith(COMMENT_PREFIX):
- line = line[8 + len(COMMENT_PREFIX):-1]
+ if line.startswith(PO_COMMENT_PREFIX_MSG):
+ line = line[_len_comm_msgstr:-1]
if not is_commented:
- is_broken = True
+ self.parsing_errors.append((line_nr, "commented msgstr following regular msgid"))
else:
- line = line[8:-1]
+ line = line[_len_msgstr:-1]
if is_commented:
- is_broken = True
+ self.parsing_errors.append((line_nr, "regular msgstr following commented msgid"))
msgstr_lines.append(line)
- if line:
- is_translated = True
- elif line.startswith("#"):
- if reading_msgid:
- if is_commented:
- msgid_lines.append(line[1 + len(COMMENT_PREFIX):-1])
- else:
- msgid_lines.append(line)
- is_broken = True
- elif reading_msgstr:
- if is_commented:
- msgstr_lines.append(line[1 + len(COMMENT_PREFIX):-1])
- else:
- msgstr_lines.append(line)
- is_broken = True
+ elif line.startswith(PO_COMMENT_PREFIX[0]):
+ if line.startswith(PO_COMMENT_PREFIX_MSG):
+ if reading_msgctxt:
+ if is_commented:
+ msgctxt_lines.append(line[_len_comm_str:-1])
+ else:
+ msgctxt_lines.append(line)
+ self.parsing_errors.append((line_nr, "commented string while reading regular msgctxt"))
+ elif reading_msgid:
+ if is_commented:
+ msgid_lines.append(line[_len_comm_str:-1])
+ else:
+ msgid_lines.append(line)
+ self.parsing_errors.append((line_nr, "commented string while reading regular msgid"))
+ elif reading_msgstr:
+ if is_commented:
+ msgstr_lines.append(line[_len_comm_str:-1])
+ else:
+ msgstr_lines.append(line)
+ self.parsing_errors.append((line_nr, "commented string while reading regular msgstr"))
else:
- if line.startswith("#, fuzzy"):
+ if reading_msgctxt or reading_msgid or reading_msgstr:
+ self.parsing_errors.append((line_nr,
+ "commented string within msgctxt, msgid or msgstr scope, ignored"))
+ elif line.startswith(PO_COMMENT_FUZZY):
is_fuzzy = True
else:
comment_lines.append(line)
reading_comment = True
else:
- if reading_msgid:
+ if reading_msgctxt:
+ msgctxt_lines.append(line[1:-1])
+ elif reading_msgid:
msgid_lines.append(line[1:-1])
elif reading_msgstr:
line = line[1:-1]
msgstr_lines.append(line)
- if not is_translated and line:
- is_translated = True
else:
- is_broken = True
+ self.parsing_errors.append((line_nr, "regular string outside msgctxt, msgid or msgstr scope"))
+ print(line)
# If no final empty line, last message is not finalized!
if reading_msgstr:
- finalize_message()
-
- return (messages,
- {"trans_msg": translated_messages,
- "fuzzy_msg": fuzzy_messages,
- "comm_msg" : commented_messages,
- "is_broken": is_broken},
- {"tot_msg" : tot_messages,
- "trans_msg" : trans_messages,
- "tot_ttips" : tot_tooltips,
- "trans_ttips" : trans_tooltips,
- "comm_msg" : comm_messages,
- "nbr_signs" : nbr_signs,
- "nbr_trans_signs": nbr_trans_signs,
- "contexts" : contexts})
-
-
-def write_messages(fname, messages, commented, fuzzy):
- "Write in fname file the content of messages (similar to parse_messages " \
- "returned values). commented and fuzzy are two sets containing msgid. " \
- "Returns the number of written messages."
- num = 0
- with open(fname, 'w', encoding="utf-8") as f:
- for msgkey, val in messages.items():
- msgctxt, msgid = msgkey
- f.write("\n".join(val["comment_lines"]))
- # Only mark as fuzzy if msgstr is not empty!
- if msgkey in fuzzy and "".join(val["msgstr_lines"]):
- f.write("\n#, fuzzy")
- if msgkey in commented:
- if msgctxt:
- f.write("\n{}msgctxt \"".format(COMMENT_PREFIX))
- f.write("\"\n{}\"".format(COMMENT_PREFIX).join(
- val["msgctxt_lines"]))
- f.write("\"")
- f.write("\n{}msgid \"".format(COMMENT_PREFIX))
- f.write("\"\n{}\"".format(COMMENT_PREFIX).join(
- val["msgid_lines"]))
- f.write("\"\n{}msgstr \"".format(COMMENT_PREFIX))
- f.write("\"\n{}\"".format(COMMENT_PREFIX).join(
- val["msgstr_lines"]))
- f.write("\"\n\n")
- else:
- if msgctxt:
- f.write("\nmsgctxt \"")
- f.write("\"\n\"".join(val["msgctxt_lines"]))
- f.write("\"")
- f.write("\nmsgid \"")
- f.write("\"\n\"".join(val["msgid_lines"]))
- f.write("\"\nmsgstr \"")
- f.write("\"\n\"".join(val["msgstr_lines"]))
- f.write("\"\n\n")
- num += 1
- return num
-
-
-def gen_empty_messages(blender_rev, time_str, year_str):
- """Generate an empty messages & state data (only header if present!)."""
- header_key = ("", "")
-
- messages = new_messages()
- messages[header_key] = {
- "msgid_lines": [""],
- "msgctxt_lines": [],
- "msgstr_lines": [
- "Project-Id-Version: Blender r{}\\n"
- "".format(blender_rev),
- "Report-Msgid-Bugs-To: \\n",
- "POT-Creation-Date: {}\\n"
- "".format(time_str),
- "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n",
- "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n",
- "Language-Team: LANGUAGE <LL@li.org>\\n",
- "Language: \\n",
- "MIME-Version: 1.0\\n",
- "Content-Type: text/plain; charset=UTF-8\\n",
- "Content-Transfer-Encoding: 8bit\\n"
- ],
- "comment_lines": [
- "# Blender's translation file (po format).",
- "# Copyright (C) {} The Blender Foundation."
- "".format(year_str),
- "# This file is distributed under the same "
- "# license as the Blender package.",
- "# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.",
- "#",
- ],
- }
+ finalize_message(self, line_nr)
+
+ if key:
+ if not curr_hash:
+ import hashlib
+ curr_hash = hashlib.new(PARSER_CACHE_HASH, src.encode()).digest()
+ self._parser_cache[key] = (curr_hash, self.msgs)
+
+ def write(self, kind, dest):
+ self.writers[kind](self, dest)
+
+ def write_messages_to_po(self, fname):
+ """
+ Write messages in fname po file.
+ """
+ self.normalize(max_len=0) # No wrapping for now...
+ with open(fname, 'w', encoding="utf-8") as f:
+ for msg in self.msgs.values():
+ f.write("\n".join(msg.comment_lines))
+ # Only mark as fuzzy if msgstr is not empty!
+ if msg.is_fuzzy and msg.msgstr:
+ f.write("\n" + PO_COMMENT_FUZZY)
+ _p = PO_COMMENT_PREFIX_MSG if msg.is_commented else ""
+ _pmsgctxt = _p + PO_MSGCTXT
+ _pmsgid = _p + PO_MSGID
+ _pmsgstr = _p + PO_MSGSTR
+ chunks = []
+ if msg.msgctxt:
+ if len(msg.msgctxt_lines) > 1:
+ chunks += [
+ "\n" + _pmsgctxt + "\"\"\n" + _p + "\"",
+ ("\"\n" + _p + "\"").join(msg.msgctxt_lines),
+ "\"",
+ ]
+ else:
+ chunks += ["\n" + _pmsgctxt + "\"" + msg.msgctxt + "\""]
+ if len(msg.msgid_lines) > 1:
+ print(msg.msgid_lines)
+ chunks += [
+ "\n" + _pmsgid + "\"\"\n" + _p + "\"",
+ ("\"\n" + _p + "\"").join(msg.msgid_lines),
+ "\"",
+ ]
+ else:
+ chunks += ["\n" + _pmsgid + "\"" + msg.msgid + "\""]
+ if len(msg.msgstr_lines) > 1:
+ chunks += [
+ "\n" + _pmsgstr + "\"\"\n" + _p + "\"",
+ ("\"\n" + _p + "\"").join(msg.msgstr_lines),
+ "\"",
+ ]
+ else:
+ chunks += ["\n" + _pmsgstr + "\"" + msg.msgstr + "\""]
+ chunks += ["\n\n"]
+ f.write("".join(chunks))
- states = {"trans_msg": set(),
- "fuzzy_msg": {header_key},
- "comm_msg": set(),
- "is_broken": False}
+ parsers = {
+ "PO": parse_messages_from_po,
+# "PYTUPLE": parse_messages_from_pytuple,
+ }
- return messages, states
+ writers = {
+ "PO": write_messages_to_po,
+ #"PYDICT": write_messages_to_pydict,
+ }
-def print_stats(stats, glob_stats=None, prefix=""):
+class I18n:
"""
- Print out some stats about a po file.
- glob_stats is for making global stats over several po's.
+ Internal representation of a whole translation set.
"""
- tot_msgs = stats["tot_msg"]
- trans_msgs = stats["trans_msg"]
- tot_ttips = stats["tot_ttips"]
- trans_ttips = stats["trans_ttips"]
- comm_msgs = stats["comm_msg"]
- nbr_signs = stats["nbr_signs"]
- nbr_trans_signs = stats["nbr_trans_signs"]
- contexts = stats["contexts"]
- lvl = lvl_ttips = lvl_trans_ttips = lvl_ttips_in_trans = lvl_comm = 0.0
-
- if tot_msgs > 0:
- lvl = float(trans_msgs) / float(tot_msgs)
- lvl_ttips = float(tot_ttips) / float(tot_msgs)
- lvl_comm = float(comm_msgs) / float(tot_msgs+comm_msgs)
- if tot_ttips > 0:
- lvl_trans_ttips = float(trans_ttips) / float(tot_ttips)
- if trans_msgs > 0:
- lvl_ttips_in_trans = float(trans_ttips) / float(trans_msgs)
-
- if glob_stats:
- glob_stats["nbr"] += 1.0
- glob_stats["lvl"] += lvl
- glob_stats["lvl_ttips"] += lvl_ttips
- glob_stats["lvl_trans_ttips"] += lvl_trans_ttips
- glob_stats["lvl_ttips_in_trans"] += lvl_ttips_in_trans
- glob_stats["lvl_comm"] += lvl_comm
- glob_stats["nbr_trans_signs"] += nbr_trans_signs
- if glob_stats["nbr_signs"] == 0:
- glob_stats["nbr_signs"] = nbr_signs
- glob_stats["contexts"] |= contexts
-
- lines = ("",
- "{:>6.1%} done! ({} translated messages over {}).\n"
- "".format(lvl, trans_msgs, tot_msgs),
- "{:>6.1%} of messages are tooltips ({} over {}).\n"
- "".format(lvl_ttips, tot_ttips, tot_msgs),
- "{:>6.1%} of tooltips are translated ({} over {}).\n"
- "".format(lvl_trans_ttips, trans_ttips, tot_ttips),
- "{:>6.1%} of translated messages are tooltips ({} over {}).\n"
- "".format(lvl_ttips_in_trans, trans_ttips, trans_msgs),
- "{:>6.1%} of messages are commented ({} over {}).\n"
- "".format(lvl_comm, comm_msgs, comm_msgs + tot_msgs),
- "This translation is currently made of {} signs.\n"
- "".format(nbr_trans_signs))
- print(prefix.join(lines))
- return 0
+
+ def __init__(self, src):
+ self.trans = {}
+ self.update_info()
+
+ def update_info(self):
+ self.nbr_trans = 0
+ self.lvl = 0.0
+ self.lvl_ttips = 0.0
+ self.lvl_trans_ttips = 0.0
+ self.lvl_ttips_in_trans = 0.0
+ self.lvl_comm = 0.0
+ self.nbr_signs = 0
+ self.nbr_trans_signs = 0
+ self.contexts = set()
+
+ if TEMPLATE_ISO_ID in self.trans:
+ self.nbr_trans = len(self.trans) - 1
+ self.nbr_signs = self.trans[TEMPLATE_ISO_ID].nbr_signs
+ else:
+ self.nbr_trans = len(self.trans)
+ for iso, msgs in self.trans.items():
+ msgs.update_info()
+ if msgs.nbr_msgs > 0:
+ self.lvl += float(msgs.nbr_trans_msgs) / float(msgs.nbr_msgs)
+ self.lvl_ttips += float(msgs.nbr_ttips) / float(msgs.nbr_msgs)
+ self.lvl_comm += float(msgs.nbr_comm_msgs) / float(msgs.nbr_msgs + msgs.nbr_comm_msgs)
+ if msgs.nbr_ttips > 0:
+ self.lvl_trans_ttips = float(msgs.nbr_trans_ttips) / float(msgs.nbr_ttips)
+ if msgs.nbr_trans_msgs > 0:
+ self.lvl_ttips_in_trans = float(msgs.nbr_trans_ttips) / float(msgs.nbr_trans_msgs)
+ if self.nbr_signs == 0:
+ self.nbr_signs = msgs.nbr_signs
+ self.nbr_trans_signs += msgs.nbr_trans_signs
+ self.contexts |= msgs.contexts
+
+ def print_stats(self, prefix="", print_msgs=True):
+ """
+ Print out some stats about an I18n object.
+ If print_msgs is True, it will also print all its translations' stats.
+ """
+ if print_msgs:
+ msgs_prefix = prefix + " "
+ for key, msgs in self.trans:
+ if key == TEMPLATE_ISO_ID:
+ continue
+ print(prefix + key + ":")
+ msgs.print_stats(prefix=msgs_prefix)
+ print(prefix)
+
+ nbr_contexts = len(self.contexts - {CONTEXT_DEFAULT})
+ if nbr_contexts != 1:
+ if nbr_contexts == 0:
+ nbr_contexts = "No"
+ _ctx_txt = "s are"
+ else:
+ _ctx_txt = " is"
+ lines = ("",
+ "Average stats for all {} translations:\n".format(self.nbr_trans),
+ " {:>6.1%} done!\n".format(self.lvl / self.nbr_trans),
+ " {:>6.1%} of messages are tooltips.\n".format(self.lvl_ttips / self.nbr_trans),
+ " {:>6.1%} of tooltips are translated.\n".format(self.lvl_trans_ttips / self.nbr_trans),
+ " {:>6.1%} of translated messages are tooltips.\n".format(self.lvl_ttips_in_trans / self.nbr_trans),
+ " {:>6.1%} of messages are commented.\n".format(self.lvl_comm / self.nbr_trans),
+ " The org msgids are currently made of {} signs.\n".format(self.nbr_signs),
+ " All processed translations are currently made of {} signs.\n".format(self.nbr_trans_signs),
+ " {} specific context{} present:\n {}\n"
+ "".format(self.nbr_contexts, _ctx_txt, "\n ".join(self.contexts - {CONTEXT_DEFAULT})),
+ "\n")
+ print(prefix.join(lines))
+
+
+##### Parsers #####
+
+#def parse_messages_from_pytuple(self, src, key=None):
+ #"""
+ #Returns a dict of tuples similar to the one returned by parse_messages_from_po (one per language, plus a 'pot'
+ #one keyed as '__POT__').
+ #"""
+ ## src may be either a string to be interpreted as py code, or a real tuple!
+ #if isinstance(src, str):
+ #src = eval(src)
+#
+ #curr_hash = None
+ #if key and key in _parser_cache:
+ #old_hash, ret = _parser_cache[key]
+ #import hashlib
+ #curr_hash = hashlib.new(PARSER_CACHE_HASH, str(src).encode()).digest()
+ #if curr_hash == old_hash:
+ #return ret
+#
+ #pot = new_messages()
+ #states = gen_states()
+ #stats = gen_stats()
+ #ret = {"__POT__": (pot, states, stats)}
+ #for msg in src:
+ #key = msg[0]
+ #messages[msgkey] = gen_message(msgid_lines, msgstr_lines, comment_lines, msgctxt_lines)
+ #pot[key] = gen_message(msgid_lines=[key[1]], msgstr_lines=[
+ #for lang, trans, (is_fuzzy, comments) in msg[2:]:
+ #if trans and not is_fuzzy:
+ #i18n_dict.setdefault(lang, dict())[key] = trans
+#
+ #if key:
+ #if not curr_hash:
+ #import hashlib
+ #curr_hash = hashlib.new(PARSER_CACHE_HASH, str(src).encode()).digest()
+ #_parser_cache[key] = (curr_hash, val)
+ #return ret \ No newline at end of file