Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBastien Montagne <montagne29@wanadoo.fr>2013-02-12 21:32:54 +0400
committerBastien Montagne <montagne29@wanadoo.fr>2013-02-12 21:32:54 +0400
commitf2d9fc7e25e4eb45e42469f8b9d143ceb70e3c3c (patch)
tree3663b91fcd3e9ab294213afd4596865fe32b673e /release
parentdcea2800a74ea2112e3cda5b3ff3035752ecb7ee (diff)
Update to how similar messages matching is handled when updating po files from pot one (gain something like 20% in heavy update situations, and save a nice bunch of memory!).
Diffstat (limited to 'release')
-rw-r--r--release/scripts/modules/bl_i18n_utils/bl_process_msg.py2
-rw-r--r--release/scripts/modules/bl_i18n_utils/spell_check_utils.py1
-rwxr-xr-xrelease/scripts/modules/bl_i18n_utils/update_po.py11
-rw-r--r--release/scripts/modules/bl_i18n_utils/utils.py111
4 files changed, 96 insertions, 29 deletions
diff --git a/release/scripts/modules/bl_i18n_utils/bl_process_msg.py b/release/scripts/modules/bl_i18n_utils/bl_process_msg.py
index f0adc700a45..5381af8d543 100644
--- a/release/scripts/modules/bl_i18n_utils/bl_process_msg.py
+++ b/release/scripts/modules/bl_i18n_utils/bl_process_msg.py
@@ -387,7 +387,7 @@ def dump_py_messages_from_files(messages, check_ctxt, files):
estr_ls.append(estr)
nds_ls.extend(nds)
ret = _extract_string_merge(estr_ls, nds_ls)
- print(ret)
+ #print(ret)
return ret
def extract_strings_split(node):
diff --git a/release/scripts/modules/bl_i18n_utils/spell_check_utils.py b/release/scripts/modules/bl_i18n_utils/spell_check_utils.py
index 2aa3598e4a1..f2d777d22f1 100644
--- a/release/scripts/modules/bl_i18n_utils/spell_check_utils.py
+++ b/release/scripts/modules/bl_i18n_utils/spell_check_utils.py
@@ -225,6 +225,7 @@ dict_uimsgs = {
"loc", "rot", "pos",
"lorem",
"luma",
+ "mem",
"multicam",
"num",
"ok",
diff --git a/release/scripts/modules/bl_i18n_utils/update_po.py b/release/scripts/modules/bl_i18n_utils/update_po.py
index 6e03226f6d3..eb12782264a 100755
--- a/release/scripts/modules/bl_i18n_utils/update_po.py
+++ b/release/scripts/modules/bl_i18n_utils/update_po.py
@@ -97,10 +97,13 @@ def main():
if os.path.exists(po):
pool_data.append((po, lang, pot_msgs))
- with concurrent.futures.ProcessPoolExecutor() as executor:
- for r in executor.map(process_po, pool_data, timeout=600):
- if r != 0:
- ret = r
+ for r in map(process_po, pool_data):
+ if r != 0:
+ ret = r
+ #with concurrent.futures.ProcessPoolExecutor() as executor:
+ #for r in executor.map(process_po, pool_data, timeout=600):
+ #if r != 0:
+ #ret = r
return ret
diff --git a/release/scripts/modules/bl_i18n_utils/utils.py b/release/scripts/modules/bl_i18n_utils/utils.py
index d04e8bead7a..e74400775ae 100644
--- a/release/scripts/modules/bl_i18n_utils/utils.py
+++ b/release/scripts/modules/bl_i18n_utils/utils.py
@@ -21,6 +21,7 @@
# Some misc utilities...
import collections
+import concurrent.futures
import copy
import os
import re
@@ -61,6 +62,35 @@ def is_valid_po_path(path):
return bool(_valid_po_path_re.match(path))
+def get_best_similar(data):
+ import difflib
+ key, use_similar, similar_pool = data
+
+ # try to find some close key in existing messages...
+ # Optimized code inspired by difflib.get_close_matches (as we only need the best match).
+ # We also consider to never make a match when len differs more than -len_key / 2, +len_key * 2 (which is valid
+ # as long as use_similar is not below ~0.7).
+ # Gives an overall ~20% of improvement!
+ #tmp = difflib.get_close_matches(key[1], similar_pool, n=1, cutoff=use_similar)
+ #if tmp:
+ #tmp = tmp[0]
+ tmp = None
+ s = difflib.SequenceMatcher()
+ s.set_seq2(key[1])
+ len_key = len(key[1])
+ min_len = len_key // 2
+ max_len = len_key * 2
+ for x in similar_pool:
+ if min_len < len(x) < max_len:
+ s.set_seq1(x)
+ if s.real_quick_ratio() >= use_similar and s.quick_ratio() >= use_similar:
+ sratio = s.ratio()
+ if sratio >= use_similar:
+ tmp = x
+ use_similar = sratio
+ return key, tmp
+
+
class I18nMessage:
"""
Internal representation of a message.
@@ -233,40 +263,73 @@ class I18nMessages:
existing one. Messages no more found in ref will be marked as commented if keep_old_commented is True,
or removed.
"""
- import difflib
similar_pool = {}
if use_similar > 0.0:
for key, msg in self.msgs.items():
if msg.msgstr: # No need to waste time with void translations!
similar_pool.setdefault(key[1], set()).add(key)
- msgs = self._new_messages()
- for (key, msg) in ref.msgs.items():
- if key in self.msgs:
- msgs[key] = self.msgs[key]
- msgs[key].sources = msg.sources
- else:
- skey = None
- if use_similar > 0.0:
- # try to find some close key in existing messages...
- tmp = difflib.get_close_matches(key[1], similar_pool, n=1, cutoff=use_similar)
- if tmp:
- tmp = tmp[0]
+ msgs = self._new_messages().fromkeys(ref.msgs.keys())
+ ref_keys = set(ref.msgs.keys())
+ org_keys = set(self.msgs.keys())
+ new_keys = ref_keys - org_keys
+ removed_keys = org_keys - ref_keys
+
+ print(new_keys, "\n\n", removed_keys)
+
+ # First process keys present in both org and ref messages.
+ for key in ref_keys - new_keys:
+ msg, refmsg = self.msgs[key], ref.msgs[key]
+ msg.sources = refmsg.sources
+ msg.is_commented = refmsg.is_commented
+ msg.is_fuzzy = refmsg.is_fuzzy
+ msgs[key] = msg
+
+ # Next process new keys.
+ if use_similar > 0.0:
+ with concurrent.futures.ProcessPoolExecutor() as exctr:
+ for key, msgid in exctr.map(get_best_similar,
+ tuple((nk, use_similar, tuple(similar_pool.keys())) for nk in new_keys)):
+ if msgid:
# Try to get the same context, else just get one...
- skey = (key[0], tmp)
- if skey not in similar_pool[tmp]:
- skey = tuple(similar_pool[tmp])[0]
- msgs[key] = msg
- if skey:
- msgs[key].msgstr = self.msgs[skey].msgstr
- msgs[key].is_fuzzy = True
+ skey = (key[0], msgid)
+ if skey not in similar_pool[msgid]:
+ skey = tuple(similar_pool[msgid])[0]
+ # We keep org translation and comments, and mark message as fuzzy.
+ msg, refmsg = copy.deepcopy(self.msgs[skey]), ref.msgs[key]
+ msg.msgctxt = refmsg.msgctxt
+ msg.msgid = refmsg.msgid
+ msg.sources = refmsg.sources
+ msg.is_fuzzy = True
+ msg.is_commented = refmsg.is_commented
+ msgs[key] = msg
+ else:
+ msgs[key] = ref.msgs[key]
+ else:
+ for key in new_keys:
+ msgs[key] = ref.msgs[key]
+
# Add back all "old" and already commented messages as commented ones, if required
# (and translation was not void!).
if keep_old_commented:
- for key, msg in self.msgs.items():
- if key not in msgs and msg.msgstr:
- msgs[key] = msg
- msgs[key].is_commented = True
+ for key in removed_keys:
+ msgs[key] = self.msgs[key]
+ msgs[key].is_commented = True
+ msgs[key].sources = []
+
+ # Special 'meta' message, change project ID version and pot creation date...
+ key = ("", "")
+ rep = []
+ markers = ("Project-Id-Version:", "POT-Creation-Date:")
+ for mrk in markers:
+ for rl in ref.msgs[key].msgstr_lines:
+ if rl.startswith(mrk):
+ for idx, ml in enumerate(msgs[key].msgstr_lines):
+ if ml.startswith(mrk):
+ rep.append((idx, rl))
+ for idx, txt in rep:
+ msgs[key].msgstr_lines[idx] = txt
+
# And finalize the update!
self.msgs = msgs