From 2c348d003e007fc168ae7a8d60b9d59792270ade Mon Sep 17 00:00:00 2001 From: Bastien Montagne Date: Sun, 24 Feb 2013 08:50:55 +0000 Subject: Big i18n tools update, I/II. Notes: * Everything is still a bit raw and sometimes hackish. * Not every feature implemented yet. * A bunch of cleanup is still needed. * Doc needs to be updated too! --- release/scripts/modules/bl_i18n_utils/rtl_utils.py | 195 +++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100755 release/scripts/modules/bl_i18n_utils/rtl_utils.py (limited to 'release/scripts/modules/bl_i18n_utils/rtl_utils.py') diff --git a/release/scripts/modules/bl_i18n_utils/rtl_utils.py b/release/scripts/modules/bl_i18n_utils/rtl_utils.py new file mode 100755 index 00000000000..0544f93a262 --- /dev/null +++ b/release/scripts/modules/bl_i18n_utils/rtl_utils.py @@ -0,0 +1,195 @@ +#!/usr/bin/python3 + +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# ***** END GPL LICENSE BLOCK ***** + +# + +# Preprocess right-to-left languages. +# You can use it either standalone, or through import_po_from_branches or +# update_trunk. +# +# Notes: This has been tested on Linux, not 100% it will work nicely on +# Windows or OsX. +# This uses ctypes, as there is no py3 binding for fribidi currently. +# This implies you only need the compiled C library to run it. +# Finally, note that it handles some formating/escape codes (like +# \", %s, %x12, %.4f, etc.), protecting them from ugly (evil) fribidi, +# which seems completely unaware of such things (as unicode is...). + +import sys +import ctypes +import re + + +#define FRIBIDI_MASK_NEUTRAL 0x00000040L /* Is neutral */ +FRIBIDI_PAR_ON = 0x00000040 + + +#define FRIBIDI_FLAG_SHAPE_MIRRORING 0x00000001 +#define FRIBIDI_FLAG_REORDER_NSM 0x00000002 + +#define FRIBIDI_FLAG_SHAPE_ARAB_PRES 0x00000100 +#define FRIBIDI_FLAG_SHAPE_ARAB_LIGA 0x00000200 +#define FRIBIDI_FLAG_SHAPE_ARAB_CONSOLE 0x00000400 + +#define FRIBIDI_FLAG_REMOVE_BIDI 0x00010000 +#define FRIBIDI_FLAG_REMOVE_JOINING 0x00020000 +#define FRIBIDI_FLAG_REMOVE_SPECIALS 0x00040000 + +#define FRIBIDI_FLAGS_DEFAULT ( \ +# FRIBIDI_FLAG_SHAPE_MIRRORING | \ +# FRIBIDI_FLAG_REORDER_NSM | \ +# FRIBIDI_FLAG_REMOVE_SPECIALS ) + +#define FRIBIDI_FLAGS_ARABIC ( \ +# FRIBIDI_FLAG_SHAPE_ARAB_PRES | \ +# FRIBIDI_FLAG_SHAPE_ARAB_LIGA ) + +FRIBIDI_FLAG_SHAPE_MIRRORING = 0x00000001 +FRIBIDI_FLAG_REORDER_NSM = 0x00000002 +FRIBIDI_FLAG_REMOVE_SPECIALS = 0x00040000 + +FRIBIDI_FLAG_SHAPE_ARAB_PRES = 0x00000100 +FRIBIDI_FLAG_SHAPE_ARAB_LIGA = 0x00000200 + +FRIBIDI_FLAGS_DEFAULT = FRIBIDI_FLAG_SHAPE_MIRRORING | FRIBIDI_FLAG_REORDER_NSM | FRIBIDI_FLAG_REMOVE_SPECIALS + +FRIBIDI_FLAGS_ARABIC = FRIBIDI_FLAG_SHAPE_ARAB_PRES | FRIBIDI_FLAG_SHAPE_ARAB_LIGA + + +MENU_DETECT_REGEX = re.compile("%x\\d+\\|") + + +##### Kernel processing funcs. ##### +def protect_format_seq(msg): + """ + Find some specific escaping/formating sequences (like \", %s, etc., + and protect them from any modification! + """ +# LRM = "\u200E" +# RLM = "\u200F" + LRE = "\u202A" + RLE = "\u202B" + PDF = "\u202C" + LRO = "\u202D" + RLO = "\u202E" + uctrl = {LRE, RLE, PDF, LRO, RLO} + # Most likely incomplete, but seems to cover current needs. + format_codes = set("tslfd") + digits = set(".0123456789") + + if not msg: + return msg + elif MENU_DETECT_REGEX.search(msg): + # An ugly "menu" message, just force it whole LRE if not yet done. + if msg[0] not in {LRE, LRO}: + msg = LRE + msg + + idx = 0 + ret = [] + ln = len(msg) + while idx < ln: + dlt = 1 +# # If we find a control char, skip any additional protection! +# if msg[idx] in uctrl: +# ret.append(msg[idx:]) +# break + # \" or \' + if idx < (ln - 1) and msg[idx] == '\\' and msg[idx + 1] in "\"\'": + dlt = 2 + # %x12| + elif idx < (ln - 2) and msg[idx] == '%' and msg[idx + 1] in "x" and msg[idx + 2] in digits: + dlt = 2 + while (idx + dlt) < ln and msg[idx + dlt] in digits: + dlt += 1 + if (idx + dlt) < ln and msg[idx + dlt] is '|': + dlt += 1 + # %.4f + elif idx < (ln - 3) and msg[idx] == '%' and msg[idx + 1] in digits: + dlt = 2 + while (idx + dlt) < ln and msg[idx + dlt] in digits: + dlt += 1 + if (idx + dlt) < ln and msg[idx + dlt] in format_codes: + dlt += 1 + else: + dlt = 1 + # %s + elif idx < (ln - 1) and msg[idx] == '%' and msg[idx + 1] in format_codes: + dlt = 2 + + if dlt > 1: + ret.append(LRE) + ret += msg[idx:idx + dlt] + idx += dlt + if dlt > 1: + ret.append(PDF) + + return "".join(ret) + + +def log2vis(msgs, settings): + """ + Globally mimics deprecated fribidi_log2vis. + msgs should be an iterable of messages to rtl-process. + """ + fbd = ctypes.CDLL(settings.FRIBIDI_LIB) + + for msg in msgs: + msg = protect_format_seq(msg) + + fbc_str = ctypes.create_unicode_buffer(msg) + ln = len(fbc_str) - 1 +# print(fbc_str.value, ln) + btypes = (ctypes.c_int * ln)() + embed_lvl = (ctypes.c_uint8 * ln)() + pbase_dir = ctypes.c_int(FRIBIDI_PAR_ON) + jtypes = (ctypes.c_uint8 * ln)() + flags = FRIBIDI_FLAGS_DEFAULT | FRIBIDI_FLAGS_ARABIC + + # Find out direction of each char. + fbd.fribidi_get_bidi_types(fbc_str, ln, ctypes.byref(btypes)) + +# print(*btypes) + + fbd.fribidi_get_par_embedding_levels(btypes, ln, + ctypes.byref(pbase_dir), + embed_lvl) + +# print(*embed_lvl) + + # Joinings for arabic chars. + fbd.fribidi_get_joining_types(fbc_str, ln, jtypes) +# print(*jtypes) + fbd.fribidi_join_arabic(btypes, ln, embed_lvl, jtypes) +# print(*jtypes) + + # Final Shaping! + fbd.fribidi_shape(flags, embed_lvl, ln, jtypes, fbc_str) + +# print(fbc_str.value) +# print(*(ord(c) for c in fbc_str)) + # And now, the reordering. + # Note that here, we expect a single line, so no need to do + # fancy things... + fbd.fribidi_reorder_line(flags, btypes, ln, 0, pbase_dir, embed_lvl, + fbc_str, None) +# print(fbc_str.value) +# print(*(ord(c) for c in fbc_str)) + + yield fbc_str.value -- cgit v1.2.3