Big refactor of bl_i18n_utils, switching to an object-based model. Still a bit wip.

Notes: * Most tools here have been updated, only a few (unused :p ) features should be missing. But some updates are rough for now, and performances are globally worse currently, will address this later (this is only really noticeable when you work over the whole repo, anyway ;) ). * This update breaks "edit translations" addon, will fix it later, once core (i.e. this module) is considered stable again!
author: Bastien Montagne <montagne29@wanadoo.fr> 2013-01-12 20:49:06 +0400
committer: Bastien Montagne <montagne29@wanadoo.fr> 2013-01-12 20:49:06 +0400
commit: 55e3912f257a61ee32d75e7c8489ae4465975c49 (patch)
tree: 6e9254b95a7cbaf358254e8c18d2cc51c8a8b19e /release/scripts/modules/bl_i18n_utils/utils.py
parent: 68d83f41400468b4c15b2201e8138b60756d1f30 (diff)
1 files changed, 589 insertions, 286 deletions
diff --git a/release/scripts/modules/bl_i18n_utils/utils.py b/release/scripts/modules/bl_i18n_utils/utils.py
index 9481f750092..ff5ed4f93d7 100644
--- a/release/scripts/modules/bl_i18n_utils/utils.py
+++ b/release/scripts/modules/bl_i18n_utils/utils.py
@@ -20,357 +20,660 @@
 
 # Some misc utilities...
 
+import collections
+import copy
 import os
+import re
 import sys
-import collections
 
 from bl_i18n_utils import settings
 
 
-COMMENT_PREFIX = settings.COMMENT_PREFIX
+PO_COMMENT_PREFIX = settings.PO_COMMENT_PREFIX
+PO_COMMENT_PREFIX_MSG = settings.PO_COMMENT_PREFIX_MSG
+PO_COMMENT_PREFIX_SOURCE = settings.PO_COMMENT_PREFIX_SOURCE
+PO_COMMENT_PREFIX_SOURCE_CUSTOM = settings.PO_COMMENT_PREFIX_SOURCE_CUSTOM
+PO_COMMENT_FUZZY = settings.PO_COMMENT_FUZZY
+PO_MSGCTXT = settings.PO_MSGCTXT
+PO_MSGID = settings.PO_MSGID
+PO_MSGSTR = settings.PO_MSGSTR
+
+PO_HEADER_KEY = settings.PO_HEADER_KEY
+PO_HEADER_COMMENT = settings.PO_HEADER_COMMENT
+PO_HEADER_COMMENT_COPYRIGHT = settings.PO_HEADER_COMMENT_COPYRIGHT
+PO_HEADER_MSGSTR = settings.PO_HEADER_MSGSTR
+
+PARSER_CACHE_HASH = settings.PARSER_CACHE_HASH
+
 WARN_NC = settings.WARN_MSGID_NOT_CAPITALIZED
 NC_ALLOWED = settings.WARN_MSGID_NOT_CAPITALIZED_ALLOWED
+PARSER_CACHE_HASH = settings.PARSER_CACHE_HASH
 
 
+##### Misc Utils #####
+
 def stripeol(s):
     return s.rstrip("\n\r")
 
 
-# XXX For now, we assume that all messages > 30 chars are tooltips!
-def is_tooltip(msgid):
-    return len(msgid) > 30
+_valid_po_path_re = re.compile(r"^\S+:[0-9]+$")
+def is_valid_po_path(path):
+    return bool(_valid_po_path_re.match(path))
+
+
+class I18nMessage:
+    """
+    Internal representation of a message.
+    """
+    __slots__ = ("msgctxt_lines", "msgid_lines", "msgstr_lines", "comment_lines", "is_fuzzy", "is_commented")
+
+    def __init__(self, msgctxt_lines=[], msgid_lines=[], msgstr_lines=[], comment_lines=[],
+                 is_commented=False, is_fuzzy=False):
+        self.msgctxt_lines = msgctxt_lines
+        self.msgid_lines = msgid_lines
+        self.msgstr_lines = msgstr_lines
+        self.comment_lines = comment_lines
+        self.is_fuzzy = is_fuzzy
+        self.is_commented = is_commented
+
+    def _get_msgctxt(self):
+        return ("".join(self.msgctxt_lines)).replace("\\n", "\n")
+    def _set_msgctxt(self, ctxt):
+        self.msgctxt_lines = [ctxt]
+    msgctxt = property(_get_msgctxt, _set_msgctxt)
+
+    def _get_msgid(self):
+        return ("".join(self.msgid_lines)).replace("\\n", "\n")
+    def _set_msgid(self, msgid):
+        self.msgid_lines = [msgid]
+    msgid = property(_get_msgid, _set_msgid)
+
+    def _get_msgstr(self):
+        return ("".join(self.msgstr_lines)).replace("\\n", "\n")
+    def _set_msgstr(self, msgstr):
+        self.msgstr_lines = [msgstr]
+    msgstr = property(_get_msgstr, _set_msgstr)
+
+    def _get_sources(self):
+        lstrip1 = len(PO_COMMENT_PREFIX_SOURCE)
+        lstrip2 = len(PO_COMMENT_PREFIX_SOURCE_CUSTOM)
+        return ([l[lstrip1:] for l in self.comment_lines if l.startswith(PO_COMMENT_PREFIX_SOURCE)] +
+                [l[lstrip2:] for l in self.comment_lines if l.startswith(PO_COMMENT_PREFIX_SOURCE_CUSTOM)])
+    def _set_sources(self, sources):
+        # list.copy() is not available in py3.2 ...
+        cmmlines = []
+        cmmlines[:] = self.comment_lines
+        for l in cmmlines:
+            if l.startswith(PO_COMMENT_PREFIX_SOURCE) or l.startswith(PO_COMMENT_PREFIX_SOURCE_CUSTOM):
+                self.comment_lines.remove(l)
+        lines_src = []
+        lines_src_custom = []
+        for src in  sources:
+            if is_valid_po_path(src):
+                lines_src.append(PO_COMMENT_PREFIX_SOURCE + src)
+            else:
+                lines_src_custom.append(PO_COMMENT_PREFIX_SOURCE_CUSTOM + src)
+        self.comment_lines += lines_src_custom + lines_src
+    sources = property(_get_sources, _set_sources)
+
+    def _get_is_tooltip(self):
+        # XXX For now, we assume that all messages > 30 chars are tooltips!
+        return len(self.msgid) > 30
+    is_tooltip = property(_get_is_tooltip)
+
+    def normalize(self, max_len=80):
+        """
+        Normalize this message, call this before exporting it...
+        Currently normalize msgctxt, msgid and msgstr lines to given max_len (if below 1, make them single line).
+        """
+        max_len -= 2  # The two quotes!
+        # We do not need the full power of textwrap... We just split first at escaped new lines, then into each line
+        # if needed... No word splitting, nor fancy spaces handling!
+        def _wrap(text, max_len, init_len):
+            if len(text) + init_len < max_len:
+                return [text]
+            lines = text.splitlines()
+            ret = []
+            for l in lines:
+                tmp = []
+                cur_len = 0
+                words = l.split(' ')
+                for w in words:
+                    cur_len += len(w) + 1
+                    if cur_len > (max_len - 1) and tmp:
+                        ret.append(" ".join(tmp) + " ")
+                        del tmp[:]
+                        cur_len = len(w) + 1
+                    tmp.append(w)
+                if tmp:
+                    ret.append(" ".join(tmp))
+            return ret
+        if max_len < 1:
+            self.msgctxt_lines = self.msgctxt.replace("\n", "\\n\n").splitlines()
+            self.msgid_lines = self.msgid.replace("\n", "\\n\n").splitlines()
+            self.msgstr_lines = self.msgstr.replace("\n", "\\n\n").splitlines()
+        else:
+            init_len = len(PO_MSGCTXT) + 1
+            if self.is_commented:
+                init_len += len(PO_COMMENT_PREFIX_MSG)
+            self.msgctxt_lines = _wrap(self.msgctxt.replace("\n", "\\n\n"), max_len, init_len)
 
+            init_len = len(PO_MSGID) + 1
+            if self.is_commented:
+                init_len += len(PO_COMMENT_PREFIX_MSG)
+            self.msgid_lines = _wrap(self.msgid.replace("\n", "\\n\n"), max_len, init_len)
 
-def new_messages():
-    return getattr(collections, 'OrderedDict', dict)()
+            init_len = len(PO_MSGSTR) + 1
+            if self.is_commented:
+                init_len += len(PO_COMMENT_PREFIX_MSG)
+            self.msgstr_lines = _wrap(self.msgstr.replace("\n", "\\n\n"), max_len, init_len)
 
 
-def parse_messages(fname):
+class I18nMessages:
     """
-    Returns a tupple (messages, states, stats).
-    messages is an odereddict of dicts
-        {(ctxt, msgid): {msgid_lines:, msgstr_lines:,
-                         comment_lines:, msgctxt_lines:}}.
-    states is a dict of three sets of (msgid, ctxt), and a boolean flag
-        indicating the .po is somewhat broken
-        {trans_msg:, fuzzy_msg:, comm_msg:, is_broken:}.
-    stats is a dict of values
-        {tot_msg:, trans_msg:, tot_ttips:, trans_ttips:, comm_msg:,
-         nbr_signs:, nbr_trans_signs:, contexts: set()}.
-    Note: This function will silently "arrange" mis-formated entries, thus
-        using afterward write_messages() should always produce a po-valid file,
-        though not correct!
+    Internal representation of messages for one language (iso code), with additional stats info.
     """
-    tot_messages = 0
-    tot_tooltips = 0
-    trans_messages = 0
-    trans_tooltips = 0
-    comm_messages = 0
-    nbr_signs = 0
-    nbr_trans_signs = 0
-    contexts = set()
-    reading_msgid = False
-    reading_msgstr = False
-    reading_msgctxt = False
-    reading_comment = False
-    is_translated = False
-    is_fuzzy = False
-    is_commented = False
-    is_broken = False
-    msgid_lines = []
-    msgstr_lines = []
-    msgctxt_lines = []
-    comment_lines = []
-
-    messages = new_messages()
-    translated_messages = set()
-    fuzzy_messages = set()
-    commented_messages = set()
-
-    def clean_vars():
-        nonlocal reading_msgid, reading_msgstr, reading_msgctxt, \
-                 reading_comment, is_fuzzy, is_translated, is_commented, \
-                 msgid_lines, msgstr_lines, msgctxt_lines, comment_lines
-        reading_msgid = reading_msgstr = reading_msgctxt = \
-                        reading_comment = False
-        is_tooltip = is_fuzzy = is_translated = is_commented = False
+
+    # Avoid parsing again!
+    # Keys should be (pseudo) file-names, values are tuples (hash, I18nMessages)
+    # Note: only used by po parser currently!
+    _parser_cache = {}
+
+    def __init__(self, iso="__POT__", kind=None, key=None, src=None):
+        self.iso = iso
+        self.msgs = self._new_messages()
+        self.trans_msgs = set()
+        self.fuzzy_msgs = set()
+        self.comm_msgs = set()
+        self.ttip_msgs = set()
+        self.contexts = set()
+        self.nbr_msgs = 0
+        self.nbr_trans_msgs = 0
+        self.nbr_ttips = 0
+        self.nbr_trans_ttips = 0
+        self.nbr_comm_msgs = 0
+        self.nbr_signs = 0
+        self.nbr_trans_signs = 0
+        self.parsing_errors = []
+        if kind and src:
+            self.parse(kind, key, src)
+        self.update_info()
+
+    @staticmethod
+    def _new_messages():
+        return getattr(collections, 'OrderedDict', dict)()
+
+    @classmethod
+    def gen_empty_messages(cls, iso, blender_ver, blender_rev, time, year, default_copyright=True):
+        """Generate an empty I18nMessages object (only header is present!)."""
+        msgstr = PO_HEADER_MSGSTR.format(blender_ver=str(blender_ver), blender_rev=int(blender_rev),
+                                         time=str(time), iso=str(iso))
+        comment = ""
+        if default_copyright:
+            comment = PO_HEADER_COMMENT_COPYRIGHT.format(year=str(year))
+        comment = comment + PO_HEADER_COMMENT
+
+        msgs = cls(iso=iso)
+        msgs.msgs[PO_HEADER_KEY] = I18nMessage([], [""], [msgstr], [comment], False, True)
+        msgs.update_info()
+
+        return msgs
+
+    def normalize(self, max_len=80):
+        for msg in self.msgs.values():
+            msg.normalize(max_len)
+
+    def merge(self, replace=False, *args):
+        pass
+
+    def update(self, ref, use_similar=0.8, keep_old_commented=True):
+        """
+        Update this I18nMessage with the ref one. Translations from ref are never used. Source comments from ref
+        completely replace current ones. If use_similar is not 0.0, it will try to match new messages in ref with an
+        existing one. Messages no more found in ref will be marked as commented if keep_old_commented is True,
+        or removed.
+        """
+        import difflib
+        similar_pool = {}
+        if use_similar > 0.0:
+            for key in self.msgs:
+                similar_pool.setdefault(key[1], set()).add(key)
+
+        msgs = self._new_messages()
+        for (key, msg) in ref.msgs.items():
+            if key in self.msgs:
+                msgs[key] = self.msgs[key]
+                msgs[key].sources = msg.sources
+            else:
+                skey = None
+                if use_similar > 0.0:
+                    # try to find some close key in existing messages...
+                    tmp = difflib.get_close_matches(key[1], similar_pool, n=1, cutoff=use_similar)
+                    if tmp:
+                        tmp = tmp[0]
+                        # Try to get the same context, else just get one...
+                        skey = (key[0], tmp)
+                        if skey not in similar_pool[tmp]:
+                            skey = tuple(similar_pool[tmp])[0]
+                msgs[key] = msg
+                if skey:
+                    msgs[key].msgstr = self.msgs[skey].msgstr
+                    msgs[key].is_fuzzy = True
+        # Add back all "old" and already commented messages as commented ones, if required.
+        if keep_old_commented:
+            for key, msg in self.msgs.items():
+                if key not in msgs:
+                    msgs[key] = msg
+                    msgs[key].is_commented = True
+        # And finalize the update!
+        self.msgs = msgs
+
+    def update_info(self):
+        self.trans_msgs.clear()
+        self.fuzzy_msgs.clear()
+        self.comm_msgs.clear()
+        self.ttip_msgs.clear()
+        self.contexts.clear()
+        self.nbr_signs = 0
+        self.nbr_trans_signs = 0
+        for key, msg in self.msgs.items():
+            if key == PO_HEADER_KEY:
+                continue
+            if msg.is_commented:
+                self.comm_msgs.add(key)
+            else:
+                if msg.msgstr:
+                    self.trans_msgs.add(key)
+                if msg.is_fuzzy:
+                    self.fuzzy_msgs.add(key)
+                if msg.is_tooltip:
+                    self.ttip_msgs.add(key)
+                self.contexts.add(key[0])
+                self.nbr_signs += len(msg.msgid)
+                self.nbr_trans_signs += len(msg.msgstr)
+        self.nbr_msgs = len(self.msgs)
+        self.nbr_trans_msgs = len(self.trans_msgs)
+        self.nbr_ttips = len(self.ttip_msgs)
+        self.nbr_trans_ttips = len(self.ttip_msgs & self.trans_msgs)
+        self.nbr_comm_msgs = len(self.comm_msgs)
+
+    def print_stats(self, prefix=""):
+        """
+        Print out some stats about an I18nMessages object.
+        """
+        lvl = 0.0
+        lvl_ttips = 0.0
+        lvl_comm = 0.0
+        lvl_trans_ttips = 0.0
+        lvl_ttips_in_trans = 0.0
+        if self.nbr_msgs > 0:
+            lvl = float(self.nbr_trans_msgs) / float(self.nbr_msgs)
+            lvl_ttips = float(self.nbr_ttips) / float(self.nbr_msgs)
+            lvl_comm = float(self.nbr_comm_msgs) / float(self.nbr_msgs + self.nbr_comm_msgs)
+        if self.nbr_ttips > 0:
+            lvl_trans_ttips = float(self.nbr_trans_ttips) / float(self.nbr_ttips)
+        if self.nbr_trans_msgs > 0:
+            lvl_ttips_in_trans = float(self.nbr_trans_ttips) / float(self.nbr_trans_msgs)
+
+        lines = ("",
+                 "{:>6.1%} done! ({} translated messages over {}).\n"
+                 "".format(lvl, self.nbr_trans_msgs, self.nbr_msgs),
+                 "{:>6.1%} of messages are tooltips ({} over {}).\n"
+                 "".format(lvl_ttips, self.nbr_ttips, self.nbr_msgs),
+                 "{:>6.1%} of tooltips are translated ({} over {}).\n"
+                 "".format(lvl_trans_ttips, self.nbr_trans_ttips, self.nbr_ttips),
+                 "{:>6.1%} of translated messages are tooltips ({} over {}).\n"
+                 "".format(lvl_ttips_in_trans, self.nbr_trans_ttips, self.nbr_trans_msgs),
+                 "{:>6.1%} of messages are commented ({} over {}).\n"
+                 "".format(lvl_comm, self.nbr_comm_msgs, self.nbr_comm_msgs + self.nbr_msgs),
+                 "This translation is currently made of {} signs.\n".format(self.nbr_trans_signs))
+        print(prefix.join(lines))
+
+    def parse(self, kind, key, src):
+        del self.parsing_errors[:]
+        self.parsers[kind](self, src, key)
+        if self.parsing_errors:
+            print("WARNING! Errors while parsing {}:".format(key))
+            for line, error in self.parsing_errors:
+                print("    Around line {}: {}".format(line, error))
+            print("The parser solved them as well as it could...")
+        self.update_info()
+
+    def parse_messages_from_po(self, src, key=None):
+        """
+        Parse a po file.
+        Note: This function will silently "arrange" mis-formated entries, thus using afterward write_messages() should
+              always produce a po-valid file, though not correct!
+        """
+        reading_msgid = False
+        reading_msgstr = False
+        reading_msgctxt = False
+        reading_comment = False
+        is_commented = False
+        is_fuzzy = False
+        msgctxt_lines = []
         msgid_lines = []
         msgstr_lines = []
-        msgctxt_lines = []
         comment_lines = []
 
-    def finalize_message():
-        nonlocal reading_msgid, reading_msgstr, reading_msgctxt, \
-                 reading_comment, is_fuzzy, is_translated, is_commented, \
-                 msgid_lines, msgstr_lines, msgctxt_lines, comment_lines, \
-                 messages, translated_messages, fuzzy_messages, \
-                 commented_messages, \
-                 tot_messages, tot_tooltips, trans_messages, trans_tooltips, \
-                 comm_messages, nbr_signs, nbr_trans_signs, contexts
-
-        msgid = "".join(msgid_lines)
-        msgctxt = "".join(msgctxt_lines)
-        msgkey = (msgctxt, msgid)
-        is_ttip = is_tooltip(msgid)
-
-        # Never allow overriding existing msgid/msgctxt pairs!
-        if msgkey in messages:
-            clean_vars()
-            return
-
-        nbr_signs += len(msgid)
-        if is_commented:
-            commented_messages.add(msgkey)
-        elif is_fuzzy:
-            fuzzy_messages.add(msgkey)
-        elif is_translated:
-            translated_messages.add(msgkey)
-            nbr_trans_signs += len("".join(msgstr_lines))
-        messages[msgkey] = {"msgid_lines"  : msgid_lines,
-                            "msgstr_lines" : msgstr_lines,
-                            "comment_lines": comment_lines,
-                            "msgctxt_lines": msgctxt_lines}
-
-        if is_commented:
-            comm_messages += 1
-        else:
-            tot_messages += 1
-            if is_ttip:
-                tot_tooltips += 1
-            if not is_fuzzy and is_translated:
-                trans_messages += 1
-                if is_ttip:
-                    trans_tooltips += 1
-            if msgctxt not in contexts:
-                contexts.add(msgctxt)
-
-        clean_vars()
-
-    with open(fname, 'r', encoding="utf-8") as f:
-        for line_nr, line in enumerate(f):
-            line = stripeol(line)
+        # Helper function
+        def finalize_message(self, line_nr):
+            nonlocal reading_msgid, reading_msgstr, reading_msgctxt, reading_comment
+            nonlocal is_commented, is_fuzzy, msgid_lines, msgstr_lines, msgctxt_lines, comment_lines
+
+            msgid = "".join(msgid_lines)
+            msgctxt = "".join(msgctxt_lines)
+            msgkey = (msgctxt, msgid)
+
+            # Never allow overriding existing msgid/msgctxt pairs!
+            if msgkey in self.msgs:
+                self.parsing_errors.append((line_nr, "{} context/msgid is already in current messages!".format(msgkey)))
+                return
+
+            self.msgs[msgkey] = I18nMessage(msgctxt_lines, msgid_lines, msgstr_lines, comment_lines,
+                                            is_commented, is_fuzzy)
+
+            # Let's clean up and get ready for next message!
+            reading_msgid = reading_msgstr = reading_msgctxt = reading_comment = False
+            is_commented = is_fuzzy = False
+            msgctxt_lines = []
+            msgid_lines = []
+            msgstr_lines = []
+            comment_lines = []
+
+        # try to use src as file name...
+        if os.path.exists(src):
+            if not key:
+                key = src
+            with open(src, 'r', encoding="utf-8") as f:
+                src = f.read()
+
+        # Try to use values from cache!
+        curr_hash = None
+        if key and key in self._parser_cache:
+            old_hash, msgs = self._parser_cache[key]
+            import hashlib
+            curr_hash = hashlib.new(PARSER_CACHE_HASH, src.encode()).digest()
+            if curr_hash == old_hash:
+                self.msgs = copy.deepcopy(msgs)  # we might edit self.msgs!
+                return
+
+        _comm_msgctxt = PO_COMMENT_PREFIX_MSG + PO_MSGCTXT
+        _len_msgctxt = len(PO_MSGCTXT + '"')
+        _len_comm_msgctxt = len(_comm_msgctxt + '"')
+        _comm_msgid = PO_COMMENT_PREFIX_MSG + PO_MSGID
+        _len_msgid = len(PO_MSGID + '"')
+        _len_comm_msgid = len(_comm_msgid + '"')
+        _comm_msgstr = PO_COMMENT_PREFIX_MSG + PO_MSGSTR
+        _len_msgstr = len(PO_MSGSTR + '"')
+        _len_comm_msgstr = len(_comm_msgstr + '"')
+        _len_comm_str = len(PO_COMMENT_PREFIX_MSG + '"')
+
+        # Main loop over all lines in src...
+        for line_nr, line in enumerate(src.splitlines()):
             if line == "":
-                finalize_message()
+                finalize_message(self, line_nr)
 
-            elif line.startswith("msgctxt") or \
-                 line.startswith("".join((COMMENT_PREFIX, "msgctxt"))):
+            elif line.startswith(PO_MSGCTXT) or line.startswith(_comm_msgctxt):
                 reading_comment = False
                 reading_ctxt = True
-                if line.startswith(COMMENT_PREFIX):
+                if line.startswith(PO_COMMENT_PREFIX_MSG):
                     is_commented = True
-                    line = line[9 + len(COMMENT_PREFIX):-1]
+                    line = line[_len_comm_msgctxt:-1]
                 else:
-                    line = line[9:-1]
+                    line = line[_len_msgctxt:-1]
                 msgctxt_lines.append(line)
 
-            elif line.startswith("msgid") or \
-                 line.startswith("".join((COMMENT_PREFIX, "msgid"))):
+            elif line.startswith(PO_MSGID) or line.startswith(_comm_msgid):
                 reading_comment = False
                 reading_msgid = True
-                if line.startswith(COMMENT_PREFIX):
+                if line.startswith(PO_COMMENT_PREFIX_MSG):
+                    if not is_commented and reading_ctxt:
+                        self.parsing_errors.append((line_nr, "commented msgid following regular msgctxt"))
                     is_commented = True
-                    line = line[7 + len(COMMENT_PREFIX):-1]
+                    line = line[_len_comm_msgid:-1]
                 else:
-                    line = line[7:-1]
+                    line = line[_len_msgid:-1]
+                reading_ctxt = False
                 msgid_lines.append(line)
 
-            elif line.startswith("msgstr") or \
-                 line.startswith("".join((COMMENT_PREFIX, "msgstr"))):
+            elif line.startswith(PO_MSGSTR) or line.startswith(_comm_msgstr):
                 if not reading_msgid:
-                    is_broken = True
+                    self.parsing_errors.append((line_nr, "msgstr without a prior msgid"))
                 else:
                     reading_msgid = False
                 reading_msgstr = True
-                if line.startswith(COMMENT_PREFIX):
-                    line = line[8 + len(COMMENT_PREFIX):-1]
+                if line.startswith(PO_COMMENT_PREFIX_MSG):
+                    line = line[_len_comm_msgstr:-1]
                     if not is_commented:
-                        is_broken = True
+                        self.parsing_errors.append((line_nr, "commented msgstr following regular msgid"))
                 else:
-                    line = line[8:-1]
+                    line = line[_len_msgstr:-1]
                     if is_commented:
-                        is_broken = True
+                        self.parsing_errors.append((line_nr, "regular msgstr following commented msgid"))
                 msgstr_lines.append(line)
-                if line:
-                    is_translated = True
 
-            elif line.startswith("#"):
-                if reading_msgid:
-                    if is_commented:
-                        msgid_lines.append(line[1 + len(COMMENT_PREFIX):-1])
-                    else:
-                        msgid_lines.append(line)
-                        is_broken = True
-                elif reading_msgstr:
-                    if is_commented:
-                        msgstr_lines.append(line[1 + len(COMMENT_PREFIX):-1])
-                    else:
-                        msgstr_lines.append(line)
-                        is_broken = True
+            elif line.startswith(PO_COMMENT_PREFIX[0]):
+                if line.startswith(PO_COMMENT_PREFIX_MSG):
+                    if reading_msgctxt:
+                        if is_commented:
+                            msgctxt_lines.append(line[_len_comm_str:-1])
+                        else:
+                            msgctxt_lines.append(line)
+                            self.parsing_errors.append((line_nr, "commented string while reading regular msgctxt"))
+                    elif reading_msgid:
+                        if is_commented:
+                            msgid_lines.append(line[_len_comm_str:-1])
+                        else:
+                            msgid_lines.append(line)
+                            self.parsing_errors.append((line_nr, "commented string while reading regular msgid"))
+                    elif reading_msgstr:
+                        if is_commented:
+                            msgstr_lines.append(line[_len_comm_str:-1])
+                        else:
+                            msgstr_lines.append(line)
+                            self.parsing_errors.append((line_nr, "commented string while reading regular msgstr"))
                 else:
-                    if line.startswith("#, fuzzy"):
+                    if reading_msgctxt or reading_msgid or reading_msgstr:
+                        self.parsing_errors.append((line_nr,
+                                                    "commented string within msgctxt, msgid or msgstr scope, ignored"))
+                    elif line.startswith(PO_COMMENT_FUZZY):
                         is_fuzzy = True
                     else:
                         comment_lines.append(line)
                     reading_comment = True
 
             else:
-                if reading_msgid:
+                if reading_msgctxt:
+                    msgctxt_lines.append(line[1:-1])
+                elif reading_msgid:
                     msgid_lines.append(line[1:-1])
                 elif reading_msgstr:
                     line = line[1:-1]
                     msgstr_lines.append(line)
-                    if not is_translated and line:
-                        is_translated = True
                 else:
-                    is_broken = True
+                    self.parsing_errors.append((line_nr, "regular string outside msgctxt, msgid or msgstr scope"))
+                    print(line)
 
         # If no final empty line, last message is not finalized!
         if reading_msgstr:
-            finalize_message()
-
-    return (messages,
-            {"trans_msg": translated_messages,
-             "fuzzy_msg": fuzzy_messages,
-             "comm_msg" : commented_messages,
-             "is_broken": is_broken},
-            {"tot_msg"        : tot_messages,
-             "trans_msg"      : trans_messages,
-             "tot_ttips"      : tot_tooltips,
-             "trans_ttips"    : trans_tooltips,
-             "comm_msg"       : comm_messages,
-             "nbr_signs"      : nbr_signs,
-             "nbr_trans_signs": nbr_trans_signs,
-             "contexts"       : contexts})
-
-
-def write_messages(fname, messages, commented, fuzzy):
-    "Write in fname file the content of messages (similar to parse_messages " \
-    "returned values). commented and fuzzy are two sets containing msgid. " \
-    "Returns the number of written messages."
-    num = 0
-    with open(fname, 'w', encoding="utf-8") as f:
-        for msgkey, val in messages.items():
-            msgctxt, msgid = msgkey
-            f.write("\n".join(val["comment_lines"]))
-            # Only mark as fuzzy if msgstr is not empty!
-            if msgkey in fuzzy and "".join(val["msgstr_lines"]):
-                f.write("\n#, fuzzy")
-            if msgkey in commented:
-                if msgctxt:
-                    f.write("\n{}msgctxt \"".format(COMMENT_PREFIX))
-                    f.write("\"\n{}\"".format(COMMENT_PREFIX).join(
-                                       val["msgctxt_lines"]))
-                    f.write("\"")
-                f.write("\n{}msgid \"".format(COMMENT_PREFIX))
-                f.write("\"\n{}\"".format(COMMENT_PREFIX).join(
-                                   val["msgid_lines"]))
-                f.write("\"\n{}msgstr \"".format(COMMENT_PREFIX))
-                f.write("\"\n{}\"".format(COMMENT_PREFIX).join(
-                                   val["msgstr_lines"]))
-                f.write("\"\n\n")
-            else:
-                if msgctxt:
-                    f.write("\nmsgctxt \"")
-                    f.write("\"\n\"".join(val["msgctxt_lines"]))
-                    f.write("\"")
-                f.write("\nmsgid \"")
-                f.write("\"\n\"".join(val["msgid_lines"]))
-                f.write("\"\nmsgstr \"")
-                f.write("\"\n\"".join(val["msgstr_lines"]))
-                f.write("\"\n\n")
-            num += 1
-    return num
-
-
-def gen_empty_messages(blender_rev, time_str, year_str):
-    """Generate an empty messages & state data (only header if present!)."""
-    header_key = ("", "")
-
-    messages = new_messages()
-    messages[header_key] = {
-        "msgid_lines": [""],
-        "msgctxt_lines": [],
-        "msgstr_lines": [
-            "Project-Id-Version: Blender r{}\\n"
-            "".format(blender_rev),
-            "Report-Msgid-Bugs-To: \\n",
-            "POT-Creation-Date: {}\\n"
-            "".format(time_str),
-            "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n",
-            "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n",
-            "Language-Team: LANGUAGE <LL@li.org>\\n",
-            "Language: \\n",
-            "MIME-Version: 1.0\\n",
-            "Content-Type: text/plain; charset=UTF-8\\n",
-            "Content-Transfer-Encoding: 8bit\\n"
-        ],
-        "comment_lines": [
-            "# Blender's translation file (po format).",
-            "# Copyright (C) {} The Blender Foundation."
-            "".format(year_str),
-            "# This file is distributed under the same "
-            "# license as the Blender package.",
-            "# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.",
-            "#",
-        ],
-    }
+            finalize_message(self, line_nr)
+
+        if key:
+            if not curr_hash:
+                import hashlib
+                curr_hash = hashlib.new(PARSER_CACHE_HASH, src.encode()).digest()
+            self._parser_cache[key] = (curr_hash, self.msgs)
+
+    def write(self, kind, dest):
+        self.writers[kind](self, dest)
+
+    def write_messages_to_po(self, fname):
+        """
+        Write messages in fname po file.
+        """
+        self.normalize(max_len=0)  # No wrapping for now...
+        with open(fname, 'w', encoding="utf-8") as f:
+            for msg in self.msgs.values():
+                f.write("\n".join(msg.comment_lines))
+                # Only mark as fuzzy if msgstr is not empty!
+                if msg.is_fuzzy and msg.msgstr:
+                    f.write("\n" + PO_COMMENT_FUZZY)
+                _p = PO_COMMENT_PREFIX_MSG if msg.is_commented else ""
+                _pmsgctxt = _p + PO_MSGCTXT
+                _pmsgid = _p + PO_MSGID
+                _pmsgstr = _p + PO_MSGSTR
+                chunks = []
+                if msg.msgctxt:
+                    if len(msg.msgctxt_lines) > 1:
+                        chunks += [
+                            "\n" + _pmsgctxt + "\"\"\n" + _p + "\"",
+                            ("\"\n" + _p + "\"").join(msg.msgctxt_lines),
+                            "\"",
+                        ]
+                    else:
+                        chunks += ["\n" + _pmsgctxt + "\"" + msg.msgctxt + "\""]
+                if len(msg.msgid_lines) > 1:
+                    print(msg.msgid_lines)
+                    chunks += [
+                        "\n" + _pmsgid + "\"\"\n" + _p + "\"",
+                        ("\"\n" + _p + "\"").join(msg.msgid_lines),
+                        "\"",
+                    ]
+                else:
+                    chunks += ["\n" + _pmsgid + "\"" + msg.msgid + "\""]
+                if len(msg.msgstr_lines) > 1:
+                    chunks += [
+                        "\n" + _pmsgstr + "\"\"\n" + _p + "\"",
+                        ("\"\n" + _p + "\"").join(msg.msgstr_lines),
+                        "\"",
+                    ]
+                else:
+                    chunks += ["\n" + _pmsgstr + "\"" + msg.msgstr + "\""]
+                chunks += ["\n\n"]
+                f.write("".join(chunks))
 
-    states = {"trans_msg": set(),
-              "fuzzy_msg": {header_key},
-              "comm_msg": set(),
-              "is_broken": False}
+    parsers = {
+        "PO": parse_messages_from_po,
+#        "PYTUPLE": parse_messages_from_pytuple,
+    }
 
-    return messages, states
+    writers = {
+        "PO": write_messages_to_po,
+        #"PYDICT": write_messages_to_pydict,
+    }
 
 
-def print_stats(stats, glob_stats=None, prefix=""):
+class I18n:
     """
-    Print out some stats about a po file.
-    glob_stats is for making global stats over several po's.
+    Internal representation of a whole translation set.
     """
-    tot_msgs        = stats["tot_msg"]
-    trans_msgs      = stats["trans_msg"]
-    tot_ttips       = stats["tot_ttips"]
-    trans_ttips     = stats["trans_ttips"]
-    comm_msgs       = stats["comm_msg"]
-    nbr_signs       = stats["nbr_signs"]
-    nbr_trans_signs = stats["nbr_trans_signs"]
-    contexts        = stats["contexts"]
-    lvl = lvl_ttips = lvl_trans_ttips = lvl_ttips_in_trans = lvl_comm = 0.0
-
-    if tot_msgs > 0:
-        lvl = float(trans_msgs) / float(tot_msgs)
-        lvl_ttips = float(tot_ttips) / float(tot_msgs)
-        lvl_comm = float(comm_msgs) / float(tot_msgs+comm_msgs)
-    if tot_ttips > 0:
-        lvl_trans_ttips = float(trans_ttips) / float(tot_ttips)
-    if trans_msgs > 0:
-        lvl_ttips_in_trans = float(trans_ttips) / float(trans_msgs)
-
-    if glob_stats:
-        glob_stats["nbr"]                += 1.0
-        glob_stats["lvl"]                += lvl
-        glob_stats["lvl_ttips"]          += lvl_ttips
-        glob_stats["lvl_trans_ttips"]    += lvl_trans_ttips
-        glob_stats["lvl_ttips_in_trans"] += lvl_ttips_in_trans
-        glob_stats["lvl_comm"]           += lvl_comm
-        glob_stats["nbr_trans_signs"]    += nbr_trans_signs
-        if glob_stats["nbr_signs"] == 0:
-            glob_stats["nbr_signs"] = nbr_signs
-        glob_stats["contexts"] |= contexts
-
-    lines = ("",
-             "{:>6.1%} done! ({} translated messages over {}).\n"
-             "".format(lvl, trans_msgs, tot_msgs),
-             "{:>6.1%} of messages are tooltips ({} over {}).\n"
-             "".format(lvl_ttips, tot_ttips, tot_msgs),
-             "{:>6.1%} of tooltips are translated ({} over {}).\n"
-             "".format(lvl_trans_ttips, trans_ttips, tot_ttips),
-             "{:>6.1%} of translated messages are tooltips ({} over {}).\n"
-             "".format(lvl_ttips_in_trans, trans_ttips, trans_msgs),
-             "{:>6.1%} of messages are commented ({} over {}).\n"
-             "".format(lvl_comm, comm_msgs, comm_msgs + tot_msgs),
-             "This translation is currently made of {} signs.\n"
-             "".format(nbr_trans_signs))
-    print(prefix.join(lines))
-    return 0
+
+    def __init__(self, src):
+        self.trans = {}
+        self.update_info()
+
+    def update_info(self):
+        self.nbr_trans = 0
+        self.lvl = 0.0
+        self.lvl_ttips = 0.0
+        self.lvl_trans_ttips = 0.0
+        self.lvl_ttips_in_trans = 0.0
+        self.lvl_comm = 0.0
+        self.nbr_signs = 0
+        self.nbr_trans_signs = 0
+        self.contexts = set()
+
+        if TEMPLATE_ISO_ID in self.trans:
+            self.nbr_trans = len(self.trans) - 1
+            self.nbr_signs = self.trans[TEMPLATE_ISO_ID].nbr_signs
+        else:
+            self.nbr_trans = len(self.trans)
+        for iso, msgs in self.trans.items():
+            msgs.update_info()
+            if msgs.nbr_msgs > 0:
+                self.lvl += float(msgs.nbr_trans_msgs) / float(msgs.nbr_msgs)
+                self.lvl_ttips += float(msgs.nbr_ttips) / float(msgs.nbr_msgs)
+                self.lvl_comm += float(msgs.nbr_comm_msgs) / float(msgs.nbr_msgs + msgs.nbr_comm_msgs)
+            if msgs.nbr_ttips > 0:
+                self.lvl_trans_ttips = float(msgs.nbr_trans_ttips) / float(msgs.nbr_ttips)
+            if msgs.nbr_trans_msgs > 0:
+                self.lvl_ttips_in_trans = float(msgs.nbr_trans_ttips) / float(msgs.nbr_trans_msgs)
+            if self.nbr_signs == 0:
+                self.nbr_signs = msgs.nbr_signs
+            self.nbr_trans_signs += msgs.nbr_trans_signs
+            self.contexts |= msgs.contexts
+
+    def print_stats(self, prefix="", print_msgs=True):
+        """
+        Print out some stats about an I18n object.
+        If print_msgs is True, it will also print all its translations' stats.
+        """
+        if print_msgs:
+            msgs_prefix = prefix + "    "
+            for key, msgs in self.trans:
+                if key == TEMPLATE_ISO_ID:
+                    continue
+                print(prefix + key + ":")
+                msgs.print_stats(prefix=msgs_prefix)
+                print(prefix)
+
+        nbr_contexts = len(self.contexts - {CONTEXT_DEFAULT})
+        if nbr_contexts != 1:
+            if nbr_contexts == 0:
+                nbr_contexts = "No"
+            _ctx_txt = "s are"
+        else:
+            _ctx_txt = " is"
+        lines = ("",
+                 "Average stats for all {} translations:\n".format(self.nbr_trans),
+                 "    {:>6.1%} done!\n".format(self.lvl / self.nbr_trans),
+                 "    {:>6.1%} of messages are tooltips.\n".format(self.lvl_ttips / self.nbr_trans),
+                 "    {:>6.1%} of tooltips are translated.\n".format(self.lvl_trans_ttips / self.nbr_trans),
+                 "    {:>6.1%} of translated messages are tooltips.\n".format(self.lvl_ttips_in_trans / self.nbr_trans),
+                 "    {:>6.1%} of messages are commented.\n".format(self.lvl_comm / self.nbr_trans),
+                 "    The org msgids are currently made of {} signs.\n".format(self.nbr_signs),
+                 "    All processed translations are currently made of {} signs.\n".format(self.nbr_trans_signs),
+                 "    {} specific context{} present:\n            {}\n"
+                 "".format(self.nbr_contexts, _ctx_txt, "\n            ".join(self.contexts - {CONTEXT_DEFAULT})),
+                 "\n")
+        print(prefix.join(lines))
+
+
+##### Parsers #####
+
+#def parse_messages_from_pytuple(self, src, key=None):
+    #"""
+    #Returns a dict of tuples similar to the one returned by parse_messages_from_po (one per language, plus a 'pot'
+    #one keyed as '__POT__').
+    #"""
+    ## src may be either a string to be interpreted as py code, or a real tuple!
+    #if isinstance(src, str):
+        #src = eval(src)
+#
+    #curr_hash = None
+    #if key and key in _parser_cache:
+        #old_hash, ret = _parser_cache[key]
+        #import hashlib
+        #curr_hash = hashlib.new(PARSER_CACHE_HASH, str(src).encode()).digest()
+        #if curr_hash == old_hash:
+            #return ret
+#
+    #pot = new_messages()
+    #states = gen_states()
+    #stats = gen_stats()
+    #ret = {"__POT__": (pot, states, stats)}
+    #for msg in src:
+        #key = msg[0]
+        #messages[msgkey] = gen_message(msgid_lines, msgstr_lines, comment_lines, msgctxt_lines)
+        #pot[key] = gen_message(msgid_lines=[key[1]], msgstr_lines=[
+        #for lang, trans, (is_fuzzy, comments) in msg[2:]:
+            #if trans and not is_fuzzy:
+                #i18n_dict.setdefault(lang, dict())[key] = trans
+#
+    #if key:
+        #if not curr_hash:
+            #import hashlib
+            #curr_hash = hashlib.new(PARSER_CACHE_HASH, str(src).encode()).digest()
+        #_parser_cache[key] = (curr_hash, val)
+    #return ret
+\ No newline at end of file
author	Bastien Montagne <montagne29@wanadoo.fr>	2013-01-12 20:49:06 +0400
committer	Bastien Montagne <montagne29@wanadoo.fr>	2013-01-12 20:49:06 +0400
commit	55e3912f257a61ee32d75e7c8489ae4465975c49 (patch)
tree	6e9254b95a7cbaf358254e8c18d2cc51c8a8b19e /release/scripts/modules/bl_i18n_utils/utils.py
parent	68d83f41400468b4c15b2201e8138b60756d1f30 (diff)