From 9c01830a901898402bcabe777fa155534729f239 Mon Sep 17 00:00:00 2001 From: elfmz Date: Tue, 11 Oct 2022 23:21:32 +0300 Subject: initial utils for visual string length improvements --- WinPort/CMakeLists.txt | 1 - WinPort/src/Backend/TTY/IsUnstableWidthChar.cpp | 415 ----------------- WinPort/src/Backend/TTY/IsUnstableWidthChar_mk.cpp | 72 --- WinPort/src/Backend/TTY/TTYBackend.cpp | 4 +- far2l/src/mix/strmix.cpp | 115 +---- utils/CMakeLists.txt | 2 + utils/include/utils.h | 12 + utils/src/CharClasses.cpp | 507 +++++++++++++++++++++ utils/src/CharClasses_mk.cpp | 110 +++++ utils/src/StrVisual.cpp | 105 +++++ 10 files changed, 757 insertions(+), 586 deletions(-) delete mode 100644 WinPort/src/Backend/TTY/IsUnstableWidthChar.cpp delete mode 100644 WinPort/src/Backend/TTY/IsUnstableWidthChar_mk.cpp create mode 100644 utils/src/CharClasses.cpp create mode 100644 utils/src/CharClasses_mk.cpp create mode 100644 utils/src/StrVisual.cpp diff --git a/WinPort/CMakeLists.txt b/WinPort/CMakeLists.txt index 2467b440..62d8320b 100644 --- a/WinPort/CMakeLists.txt +++ b/WinPort/CMakeLists.txt @@ -36,7 +36,6 @@ src/Backend/TTY/TTYOutput.cpp src/Backend/TTY/TTYFar2lClipboardBackend.cpp src/Backend/TTY/TTYNegotiateFar2l.cpp src/Backend/TTY/TTYXGlue.cpp -src/Backend/TTY/IsUnstableWidthChar.cpp ) add_library (WinPort ${SOURCES}) diff --git a/WinPort/src/Backend/TTY/IsUnstableWidthChar.cpp b/WinPort/src/Backend/TTY/IsUnstableWidthChar.cpp deleted file mode 100644 index 636266c3..00000000 --- a/WinPort/src/Backend/TTY/IsUnstableWidthChar.cpp +++ /dev/null @@ -1,415 +0,0 @@ -// this file autogenerated by IsUnstableWidthChar_mk.cpp - -#include - -bool IsUnstableWidthChar(wchar_t c) -{ - return (c >= 0x300 && c <= 0x36f) - || (c >= 0x483 && c <= 0x487) - || (c >= 0x591 && c <= 0x5bd) - || (c == 0x5bf) - || (c == 0x5c1 || c == 0x5c2) - || (c == 0x5c4 || c == 0x5c5) - || (c == 0x5c7) - || (c >= 0x610 && c <= 0x61a) - || (c == 0x620) - || (c >= 0x622 && c <= 0x65f) - || (c >= 0x66e && c <= 0x673) - || (c >= 0x675 && c <= 0x6d3) - || (c >= 0x6d5 && c <= 0x6dc) - || (c >= 0x6df && c <= 0x6e4) - || (c == 0x6e7 || c == 0x6e8) - || (c >= 0x6ea && c <= 0x6ef) - || (c >= 0x6fa && c <= 0x6fc) - || (c == 0x6ff) - || (c >= 0x710 && c <= 0x74a) - || (c >= 0x74d && c <= 0x77f) - || (c >= 0x7a6 && c <= 0x7b0) - || (c >= 0x7ca && c <= 0x7f3) - || (c == 0x7fa) - || (c == 0x7fd) - || (c >= 0x816 && c <= 0x819) - || (c >= 0x81b && c <= 0x823) - || (c >= 0x825 && c <= 0x827) - || (c >= 0x829 && c <= 0x82d) - || (c >= 0x840 && c <= 0x85b) - || (c == 0x860) - || (c >= 0x862 && c <= 0x865) - || (c >= 0x867 && c <= 0x86a) - || (c >= 0x8a0 && c <= 0x8ac) - || (c >= 0x8ae && c <= 0x8b4) - || (c >= 0x8b6 && c <= 0x8c7) - || (c >= 0x8d3 && c <= 0x8e1) - || (c >= 0x8e3 && c <= 0x903) - || (c >= 0x93a && c <= 0x93c) - || (c >= 0x93e && c <= 0x94f) - || (c >= 0x951 && c <= 0x957) - || (c == 0x962 || c == 0x963) - || (c >= 0x981 && c <= 0x983) - || (c == 0x9bc) - || (c >= 0x9be && c <= 0x9c4) - || (c == 0x9c7 || c == 0x9c8) - || (c >= 0x9cb && c <= 0x9cd) - || (c == 0x9d7) - || (c == 0x9e2 || c == 0x9e3) - || (c == 0x9fe) - || (c >= 0xa01 && c <= 0xa03) - || (c == 0xa3c) - || (c >= 0xa3e && c <= 0xa42) - || (c == 0xa47 || c == 0xa48) - || (c >= 0xa4b && c <= 0xa4d) - || (c == 0xa51) - || (c == 0xa70 || c == 0xa71) - || (c == 0xa75) - || (c >= 0xa81 && c <= 0xa83) - || (c == 0xabc) - || (c >= 0xabe && c <= 0xac5) - || (c >= 0xac7 && c <= 0xac9) - || (c >= 0xacb && c <= 0xacd) - || (c == 0xae2 || c == 0xae3) - || (c >= 0xafa && c <= 0xaff) - || (c >= 0xb01 && c <= 0xb03) - || (c == 0xb3c) - || (c >= 0xb3e && c <= 0xb44) - || (c == 0xb47 || c == 0xb48) - || (c >= 0xb4b && c <= 0xb4d) - || (c >= 0xb55 && c <= 0xb57) - || (c == 0xb62 || c == 0xb63) - || (c == 0xb82) - || (c >= 0xbbe && c <= 0xbc2) - || (c >= 0xbc6 && c <= 0xbc8) - || (c >= 0xbca && c <= 0xbcd) - || (c == 0xbd7) - || (c >= 0xc00 && c <= 0xc04) - || (c >= 0xc3e && c <= 0xc44) - || (c >= 0xc46 && c <= 0xc48) - || (c >= 0xc4a && c <= 0xc4d) - || (c == 0xc55 || c == 0xc56) - || (c == 0xc62 || c == 0xc63) - || (c >= 0xc81 && c <= 0xc83) - || (c == 0xcbc) - || (c >= 0xcbe && c <= 0xcc4) - || (c >= 0xcc6 && c <= 0xcc8) - || (c >= 0xcca && c <= 0xccd) - || (c == 0xcd5 || c == 0xcd6) - || (c == 0xce2 || c == 0xce3) - || (c >= 0xd00 && c <= 0xd03) - || (c == 0xd3b || c == 0xd3c) - || (c >= 0xd3e && c <= 0xd44) - || (c >= 0xd46 && c <= 0xd48) - || (c >= 0xd4a && c <= 0xd4d) - || (c == 0xd57) - || (c == 0xd62 || c == 0xd63) - || (c >= 0xd81 && c <= 0xd83) - || (c == 0xdca) - || (c >= 0xdcf && c <= 0xdd4) - || (c == 0xdd6) - || (c >= 0xdd8 && c <= 0xddf) - || (c == 0xdf2 || c == 0xdf3) - || (c == 0xe31) - || (c >= 0xe34 && c <= 0xe3a) - || (c >= 0xe47 && c <= 0xe4e) - || (c == 0xeb1) - || (c >= 0xeb4 && c <= 0xebc) - || (c >= 0xec8 && c <= 0xecd) - || (c == 0xf18 || c == 0xf19) - || (c == 0xf35) - || (c == 0xf37) - || (c == 0xf39) - || (c == 0xf3e || c == 0xf3f) - || (c >= 0xf71 && c <= 0xf84) - || (c == 0xf86 || c == 0xf87) - || (c >= 0xf8d && c <= 0xf97) - || (c >= 0xf99 && c <= 0xfbc) - || (c == 0xfc6) - || (c >= 0x102b && c <= 0x103e) - || (c >= 0x1056 && c <= 0x1059) - || (c >= 0x105e && c <= 0x1060) - || (c >= 0x1062 && c <= 0x1064) - || (c >= 0x1067 && c <= 0x106d) - || (c >= 0x1071 && c <= 0x1074) - || (c >= 0x1082 && c <= 0x108d) - || (c == 0x108f) - || (c >= 0x109a && c <= 0x109d) - || (c >= 0x1100 && c <= 0x115f) - || (c >= 0x135d && c <= 0x135f) - || (c >= 0x1712 && c <= 0x1714) - || (c >= 0x1732 && c <= 0x1734) - || (c == 0x1752 || c == 0x1753) - || (c == 0x1772 || c == 0x1773) - || (c >= 0x17b4 && c <= 0x17d3) - || (c == 0x17dd) - || (c == 0x1807) - || (c >= 0x180a && c <= 0x180d) - || (c >= 0x1820 && c <= 0x1878) - || (c >= 0x1885 && c <= 0x18aa) - || (c >= 0x1920 && c <= 0x192b) - || (c >= 0x1930 && c <= 0x193b) - || (c >= 0x1a17 && c <= 0x1a1b) - || (c >= 0x1a55 && c <= 0x1a5e) - || (c >= 0x1a60 && c <= 0x1a7c) - || (c == 0x1a7f) - || (c >= 0x1ab0 && c <= 0x1b04) - || (c >= 0x1b34 && c <= 0x1b44) - || (c >= 0x1b6b && c <= 0x1b73) - || (c >= 0x1b80 && c <= 0x1b82) - || (c >= 0x1ba1 && c <= 0x1bad) - || (c >= 0x1be6 && c <= 0x1bf3) - || (c >= 0x1c24 && c <= 0x1c37) - || (c >= 0x1cd0 && c <= 0x1cd2) - || (c >= 0x1cd4 && c <= 0x1ce8) - || (c == 0x1ced) - || (c == 0x1cf4) - || (c >= 0x1cf7 && c <= 0x1cf9) - || (c >= 0x1dc0 && c <= 0x1dff) - || (c == 0x200d) - || (c >= 0x20d0 && c <= 0x20ff) - || (c >= 0x2190 && c <= 0x243f) - || (c >= 0x25a0 && c <= 0x26ff) - || (c == 0x2705) - || (c == 0x270a || c == 0x270b) - || (c == 0x2728) - || (c == 0x274c) - || (c == 0x274e) - || (c >= 0x2753 && c <= 0x2755) - || (c == 0x2757) - || (c >= 0x2795 && c <= 0x2797) - || (c == 0x27b0) - || (c == 0x27bf) - || (c == 0x2b1b || c == 0x2b1c) - || (c == 0x2b50) - || (c == 0x2b55) - || (c >= 0x2cef && c <= 0x2cf1) - || (c == 0x2d7f) - || (c >= 0x2de0 && c <= 0x2dff) - || (c >= 0x2e80 && c <= 0x2e99) - || (c >= 0x2e9b && c <= 0x2ef3) - || (c >= 0x2f00 && c <= 0x2fd5) - || (c >= 0x2ff0 && c <= 0x2ffb) - || (c >= 0x3000 && c <= 0x303e) - || (c >= 0x3041 && c <= 0x3096) - || (c >= 0x3099 && c <= 0x30ff) - || (c >= 0x3105 && c <= 0x312f) - || (c >= 0x3131 && c <= 0x318e) - || (c >= 0x3190 && c <= 0x31e3) - || (c >= 0x31f0 && c <= 0x321e) - || (c >= 0x3220 && c <= 0x3247) - || (c >= 0x3250 && c <= 0x4dbf) - || (c >= 0x4e00 && c <= 0xa48c) - || (c >= 0xa490 && c <= 0xa4c6) - || (c == 0xa66f) - || (c >= 0xa674 && c <= 0xa67d) - || (c == 0xa69e || c == 0xa69f) - || (c == 0xa6f0 || c == 0xa6f1) - || (c == 0xa802) - || (c == 0xa806) - || (c == 0xa80b) - || (c >= 0xa823 && c <= 0xa827) - || (c == 0xa82c) - || (c >= 0xa840 && c <= 0xa872) - || (c == 0xa880 || c == 0xa881) - || (c >= 0xa8b4 && c <= 0xa8c5) - || (c >= 0xa8e0 && c <= 0xa8f1) - || (c == 0xa8ff) - || (c >= 0xa926 && c <= 0xa92d) - || (c >= 0xa947 && c <= 0xa953) - || (c >= 0xa960 && c <= 0xa97c) - || (c >= 0xa980 && c <= 0xa983) - || (c >= 0xa9b3 && c <= 0xa9c0) - || (c == 0xa9e5) - || (c >= 0xaa29 && c <= 0xaa36) - || (c == 0xaa43) - || (c == 0xaa4c || c == 0xaa4d) - || (c >= 0xaa7b && c <= 0xaa7d) - || (c == 0xaab0) - || (c >= 0xaab2 && c <= 0xaab4) - || (c == 0xaab7 || c == 0xaab8) - || (c == 0xaabe || c == 0xaabf) - || (c == 0xaac1) - || (c >= 0xaaeb && c <= 0xaaef) - || (c == 0xaaf5 || c == 0xaaf6) - || (c >= 0xabe3 && c <= 0xabea) - || (c == 0xabec || c == 0xabed) - || (c >= 0xac00 && c <= 0xd7a3) - || (c >= 0xd800 && c <= 0xdfff) - || (c >= 0xf900 && c <= 0xfaff) - || (c == 0xfb1e) - || (c >= 0xfe00 && c <= 0xfe19) - || (c >= 0xfe20 && c <= 0xfe52) - || (c >= 0xfe54 && c <= 0xfe66) - || (c >= 0xfe68 && c <= 0xfe6b) - || (c >= 0xff01 && c <= 0xff60) - || (c >= 0xffe0 && c <= 0xffe6) - || (c == 0x101fd) - || (c == 0x102e0) - || (c >= 0x10376 && c <= 0x1037a) - || (c >= 0x10a01 && c <= 0x10a03) - || (c == 0x10a05 || c == 0x10a06) - || (c >= 0x10a0c && c <= 0x10a0f) - || (c >= 0x10a38 && c <= 0x10a3a) - || (c == 0x10a3f) - || (c >= 0x10ac0 && c <= 0x10ac5) - || (c == 0x10ac7) - || (c == 0x10ac9 || c == 0x10aca) - || (c >= 0x10acd && c <= 0x10ae1) - || (c >= 0x10ae4 && c <= 0x10ae6) - || (c >= 0x10aeb && c <= 0x10aef) - || (c >= 0x10b80 && c <= 0x10b91) - || (c >= 0x10ba9 && c <= 0x10bae) - || (c >= 0x10d00 && c <= 0x10d27) - || (c == 0x10eab || c == 0x10eac) - || (c >= 0x10f30 && c <= 0x10f44) - || (c >= 0x10f46 && c <= 0x10f54) - || (c == 0x10fb0) - || (c >= 0x10fb2 && c <= 0x10fb6) - || (c >= 0x10fb8 && c <= 0x10fbf) - || (c >= 0x10fc1 && c <= 0x10fc4) - || (c >= 0x10fc9 && c <= 0x10fcb) - || (c >= 0x11000 && c <= 0x11002) - || (c >= 0x11038 && c <= 0x11046) - || (c >= 0x1107f && c <= 0x11082) - || (c >= 0x110b0 && c <= 0x110ba) - || (c >= 0x11100 && c <= 0x11102) - || (c >= 0x11127 && c <= 0x11134) - || (c == 0x11145 || c == 0x11146) - || (c == 0x11173) - || (c >= 0x11180 && c <= 0x11182) - || (c >= 0x111b3 && c <= 0x111c0) - || (c >= 0x111c9 && c <= 0x111cc) - || (c == 0x111ce || c == 0x111cf) - || (c >= 0x1122c && c <= 0x11237) - || (c == 0x1123e) - || (c >= 0x112df && c <= 0x112ea) - || (c >= 0x11300 && c <= 0x11303) - || (c == 0x1133b || c == 0x1133c) - || (c >= 0x1133e && c <= 0x11344) - || (c == 0x11347 || c == 0x11348) - || (c >= 0x1134b && c <= 0x1134d) - || (c == 0x11357) - || (c == 0x11362 || c == 0x11363) - || (c >= 0x11366 && c <= 0x1136c) - || (c >= 0x11370 && c <= 0x11374) - || (c >= 0x11435 && c <= 0x11446) - || (c == 0x1145e) - || (c >= 0x114b0 && c <= 0x114c3) - || (c >= 0x115af && c <= 0x115b5) - || (c >= 0x115b8 && c <= 0x115c0) - || (c == 0x115dc || c == 0x115dd) - || (c >= 0x11630 && c <= 0x11640) - || (c >= 0x116ab && c <= 0x116b7) - || (c >= 0x1171d && c <= 0x1172b) - || (c >= 0x1182c && c <= 0x1183a) - || (c >= 0x11930 && c <= 0x11935) - || (c == 0x11937 || c == 0x11938) - || (c >= 0x1193b && c <= 0x1193e) - || (c == 0x11940) - || (c == 0x11942 || c == 0x11943) - || (c >= 0x119d1 && c <= 0x119d7) - || (c >= 0x119da && c <= 0x119e0) - || (c == 0x119e4) - || (c >= 0x11a01 && c <= 0x11a0a) - || (c >= 0x11a33 && c <= 0x11a39) - || (c >= 0x11a3b && c <= 0x11a3e) - || (c == 0x11a47) - || (c >= 0x11a51 && c <= 0x11a5b) - || (c >= 0x11a8a && c <= 0x11a99) - || (c >= 0x11c2f && c <= 0x11c36) - || (c >= 0x11c38 && c <= 0x11c3f) - || (c >= 0x11c92 && c <= 0x11ca7) - || (c >= 0x11ca9 && c <= 0x11cb6) - || (c >= 0x11d31 && c <= 0x11d36) - || (c == 0x11d3a) - || (c == 0x11d3c || c == 0x11d3d) - || (c >= 0x11d3f && c <= 0x11d45) - || (c == 0x11d47) - || (c >= 0x11d8a && c <= 0x11d8e) - || (c == 0x11d90 || c == 0x11d91) - || (c >= 0x11d93 && c <= 0x11d97) - || (c >= 0x11ef3 && c <= 0x11ef6) - || (c >= 0x16af0 && c <= 0x16af4) - || (c >= 0x16b30 && c <= 0x16b36) - || (c == 0x16f4f) - || (c >= 0x16f51 && c <= 0x16f87) - || (c >= 0x16f8f && c <= 0x16f92) - || (c >= 0x16fe0 && c <= 0x16fe4) - || (c == 0x16ff0 || c == 0x16ff1) - || (c >= 0x17000 && c <= 0x187f7) - || (c >= 0x18800 && c <= 0x18cd5) - || (c >= 0x18d00 && c <= 0x18d08) - || (c >= 0x1b000 && c <= 0x1b11e) - || (c >= 0x1b150 && c <= 0x1b152) - || (c >= 0x1b164 && c <= 0x1b167) - || (c >= 0x1b170 && c <= 0x1b2fb) - || (c == 0x1bc9d || c == 0x1bc9e) - || (c >= 0x1d165 && c <= 0x1d169) - || (c >= 0x1d16d && c <= 0x1d172) - || (c >= 0x1d17b && c <= 0x1d182) - || (c >= 0x1d185 && c <= 0x1d18b) - || (c >= 0x1d1aa && c <= 0x1d1ad) - || (c >= 0x1d242 && c <= 0x1d244) - || (c >= 0x1da00 && c <= 0x1da36) - || (c >= 0x1da3b && c <= 0x1da6c) - || (c == 0x1da75) - || (c == 0x1da84) - || (c >= 0x1da9b && c <= 0x1da9f) - || (c >= 0x1daa1 && c <= 0x1daaf) - || (c >= 0x1e000 && c <= 0x1e006) - || (c >= 0x1e008 && c <= 0x1e018) - || (c >= 0x1e01b && c <= 0x1e021) - || (c == 0x1e023 || c == 0x1e024) - || (c >= 0x1e026 && c <= 0x1e02a) - || (c >= 0x1e130 && c <= 0x1e136) - || (c >= 0x1e2ec && c <= 0x1e2ef) - || (c >= 0x1e8d0 && c <= 0x1e8d6) - || (c >= 0x1e900 && c <= 0x1e94a) - || (c == 0x1f004) - || (c == 0x1f0cf) - || (c == 0x1f18e) - || (c >= 0x1f191 && c <= 0x1f19a) - || (c >= 0x1f200 && c <= 0x1f202) - || (c >= 0x1f210 && c <= 0x1f23b) - || (c >= 0x1f240 && c <= 0x1f248) - || (c == 0x1f250 || c == 0x1f251) - || (c >= 0x1f260 && c <= 0x1f265) - || (c >= 0x1f300 && c <= 0x1f320) - || (c >= 0x1f32d && c <= 0x1f335) - || (c >= 0x1f337 && c <= 0x1f37c) - || (c >= 0x1f37e && c <= 0x1f393) - || (c >= 0x1f3a0 && c <= 0x1f3ca) - || (c >= 0x1f3cf && c <= 0x1f3d3) - || (c >= 0x1f3e0 && c <= 0x1f3f0) - || (c == 0x1f3f4) - || (c >= 0x1f3f8 && c <= 0x1f43e) - || (c == 0x1f440) - || (c >= 0x1f442 && c <= 0x1f4fc) - || (c >= 0x1f4ff && c <= 0x1f53d) - || (c >= 0x1f54b && c <= 0x1f54e) - || (c >= 0x1f550 && c <= 0x1f567) - || (c == 0x1f57a) - || (c == 0x1f595 || c == 0x1f596) - || (c == 0x1f5a4) - || (c >= 0x1f5fb && c <= 0x1f64f) - || (c >= 0x1f680 && c <= 0x1f6c5) - || (c == 0x1f6cc) - || (c >= 0x1f6d0 && c <= 0x1f6d2) - || (c >= 0x1f6d5 && c <= 0x1f6d7) - || (c == 0x1f6eb || c == 0x1f6ec) - || (c >= 0x1f6f4 && c <= 0x1f6fc) - || (c >= 0x1f7e0 && c <= 0x1f7eb) - || (c >= 0x1f90c && c <= 0x1f93a) - || (c >= 0x1f93c && c <= 0x1f945) - || (c >= 0x1f947 && c <= 0x1f978) - || (c >= 0x1f97a && c <= 0x1f9cb) - || (c >= 0x1f9cd && c <= 0x1f9ff) - || (c >= 0x1fa70 && c <= 0x1fa74) - || (c >= 0x1fa78 && c <= 0x1fa7a) - || (c >= 0x1fa80 && c <= 0x1fa86) - || (c >= 0x1fa90 && c <= 0x1faa8) - || (c >= 0x1fab0 && c <= 0x1fab6) - || (c >= 0x1fac0 && c <= 0x1fac2) - || (c >= 0x1fad0 && c <= 0x1fad6) - || (c >= 0x20000 && c <= 0x2fffd) - || (c >= 0x30000 && c <= 0x3fffd) - || (c >= 0xe0100 && c <= 0xe01ef) - ; -} diff --git a/WinPort/src/Backend/TTY/IsUnstableWidthChar_mk.cpp b/WinPort/src/Backend/TTY/IsUnstableWidthChar_mk.cpp deleted file mode 100644 index e5684611..00000000 --- a/WinPort/src/Backend/TTY/IsUnstableWidthChar_mk.cpp +++ /dev/null @@ -1,72 +0,0 @@ -#include "unicode/uchar.h" -#include "unicode/utypes.h" -#include "unicode/stringoptions.h" -#include "stdio.h" - -/// Usage: -/// g++ -O2 ./IsUnstableWidthChar_mk.cpp -o /tmp/IsUnstableWidthChar_mk -licuuc && /tmp/IsUnstableWidthChar_mk > IsUnstableWidthChar.cpp - -static bool IsUnstableWidthBlock(int block) -{ - return block == UBLOCK_ARROWS - || block == UBLOCK_MATHEMATICAL_OPERATORS - || block == UBLOCK_MISCELLANEOUS_TECHNICAL - || block == UBLOCK_CONTROL_PICTURES - || block == UBLOCK_GEOMETRIC_SHAPES - || block == UBLOCK_MISCELLANEOUS_SYMBOLS - || block == UBLOCK_COMBINING_DIACRITICAL_MARKS - || block == UBLOCK_COMBINING_MARKS_FOR_SYMBOLS - || block == UBLOCK_COMBINING_HALF_MARKS - || block == UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT - || block == UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED - ; -} - -int main() -{ -// printf("%u\n", u_getIntPropertyValue(0xcbe, UCHAR_GENERAL_CATEGORY)); -// return -1; - UChar32 c, last = 0x10ffff; - UChar32 unstable_start = 0; - bool first = true; - printf("// this file autogenerated by IsUnstableWidthChar_mk.cpp\n\n"); - printf("#include \n\n"); - printf("bool IsUnstableWidthChar(wchar_t c)\n"); - printf("{\n"); - printf("\treturn "); - for (c = 1; c <= last + 1; ++c) { - bool unstable = false; - if (c <= last) { - const auto width = u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH); - const auto jt = u_getIntPropertyValue(c, UCHAR_JOINING_TYPE); - const auto cat = u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY); - unstable = (width == U_EA_FULLWIDTH || width == U_EA_WIDE - || (jt != U_JT_NON_JOINING && jt != U_JT_TRANSPARENT) - || cat == U_NON_SPACING_MARK || cat == U_COMBINING_SPACING_MARK || cat == U_SURROGATE - || IsUnstableWidthBlock(u_getIntPropertyValue(c, UCHAR_BLOCK))); - } - - if (unstable) { - if (!unstable_start) { - unstable_start = c; - } - - } else if (unstable_start) { - if (first) { - first = false; - } else { - printf("\t || "); - } - if (unstable_start + 2 == c) { - printf("(c == 0x%x || c == 0x%x)\n", (unsigned int)unstable_start, (unsigned int)c - 1); - } else if (unstable_start + 1 < c) { - printf("(c >= 0x%x && c <= 0x%x)\n", (unsigned int)unstable_start, (unsigned int)c - 1); - } else { - printf("(c == 0x%x)\n", (unsigned int)unstable_start); - } - unstable_start = 0; - } - } - printf("\t;\n"); - printf("}\n"); -} diff --git a/WinPort/src/Backend/TTY/TTYBackend.cpp b/WinPort/src/Backend/TTY/TTYBackend.cpp index 7c893d83..62c52d4e 100644 --- a/WinPort/src/Backend/TTY/TTYBackend.cpp +++ b/WinPort/src/Backend/TTY/TTYBackend.cpp @@ -28,8 +28,6 @@ #include "FarTTY.h" #include "../FSClipboardBackend.h" -bool IsUnstableWidthChar(wchar_t c); - static volatile long s_terminal_size_change_id = 0; static TTYBackend * g_vtb = nullptr; @@ -386,7 +384,7 @@ bool TTYBackend::IsUnstableWidthCharCached(wchar_t c) { const size_t h = size_t(c) % ARRAYSIZE(_stable_width_chars_cache); if (_stable_width_chars_cache[h] != c) { - if (IsUnstableWidthChar(c)) { + if (IsCharUnstableWidth(c)) { return true; } _stable_width_chars_cache[h] = c; diff --git a/far2l/src/mix/strmix.cpp b/far2l/src/mix/strmix.cpp index 1e422050..190f9cd2 100644 --- a/far2l/src/mix/strmix.cpp +++ b/far2l/src/mix/strmix.cpp @@ -225,20 +225,13 @@ wchar_t* WINAPI TruncStrFromEnd(wchar_t *Str,int MaxLength) { assert(MaxLength >= 0); - MaxLength=Max(0, MaxLength); + MaxLength = Max(0, MaxLength); - if (Str) - { - int Length = StrLength(Str); - - if (Length > MaxLength) - { - if (MaxLength>3) - wmemcpy(Str+MaxLength-3, L"...", 3); - - Str[MaxLength]=0; - } - } + const size_t Len = StrLength(Str); + size_t n = Len; + StrVisualTruncateRight(Str, n, MaxLength); + assert(n <= Len); + Str[n] = 0; return Str; } @@ -248,27 +241,13 @@ wchar_t* WINAPI TruncStr(wchar_t *Str,int MaxLength) { assert(MaxLength >= 0); - MaxLength=Max(0, MaxLength); + MaxLength = Max(0, MaxLength); - if (Str) - { - int Length=StrLength(Str); - - if (MaxLength<0) - MaxLength=0; - - if (Length > MaxLength) - { - if (MaxLength>3) - { - wchar_t *MovePos = Str+Length-MaxLength+3; - wmemmove(Str+3, MovePos, StrLength(MovePos)+1); - wmemcpy(Str,L"...",3); - } - - Str[MaxLength]=0; - } - } + const size_t Len = StrLength(Str); + size_t n = Len; + StrVisualTruncateLeft(Str, n, MaxLength); + assert(n <= Len); + Str[n] = 0; return Str; } @@ -286,31 +265,13 @@ wchar_t* TruncStrFromCenter(wchar_t *Str, int MaxLength) { assert(MaxLength >= 0); - MaxLength=Max(0, MaxLength); - - if (Str) - { - int Length = StrLength(Str); - - if (MaxLength < 0) - MaxLength=0; - - if (Length > MaxLength) - { - const int DotsLen = 3; - - if (MaxLength > DotsLen) - { - int Len1 = (MaxLength - DotsLen) / 2; - int Len2 = MaxLength - DotsLen - Len1; - wmemcpy(Str + Len1, L"...", DotsLen); - wmemmove(Str + Len1 + DotsLen, Str + Length - Len2, Len2); - } - - Str[MaxLength] = 0; - } - } + MaxLength = Max(0, MaxLength); + const size_t Len = StrLength(Str); + size_t n = Len; + StrVisualTruncateCenter(Str, n, MaxLength); + assert(n <= Len); + Str[n] = 0; return Str; } @@ -324,44 +285,8 @@ FARString& TruncStrFromCenter(FARString &strStr, int MaxLength) wchar_t* WINAPI TruncPathStr(wchar_t *Str, int MaxLength) { - assert(MaxLength >= 0); - - MaxLength=Max(0, MaxLength); - - if (Str) - { - int nLength = (int)wcslen(Str); - - if ((MaxLength > 0) && (nLength > MaxLength) && (nLength >= 2)) - { - wchar_t *lpStart = nullptr; - -/* if (*Str && (Str[1] == L':') && IsSlash(Str[2])) - lpStart = Str+3; - else*/ - { - if ((Str[0] == GOOD_SLASH) && (Str[1] == GOOD_SLASH)) - { - if ((lpStart = const_cast(FirstSlash(Str+2))) ) - { - wchar_t *lpStart2=lpStart; - - if ((lpStart-Str < nLength) && ((lpStart=const_cast(FirstSlash(lpStart2+1))))) - lpStart++; - } - } - } - - if (!lpStart || (lpStart-Str > MaxLength-5)) - return TruncStr(Str, MaxLength); - - wchar_t *lpInPos = lpStart+3+(nLength-MaxLength); - wmemmove(lpStart+3, lpInPos, (wcslen(lpInPos)+1)); - wmemcpy(lpStart, L"...", 3); - } - } - - return Str; + // TODO + return TruncStr(Str, MaxLength); } diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 44333da9..224e5644 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -30,6 +30,8 @@ set(SOURCES src/TestPath.cpp src/PipeIPC.cpp src/PathParts.cpp + src/CharClasses.cpp + src/StrVisual.cpp ) add_library (utils ${SOURCES}) diff --git a/utils/include/utils.h b/utils/include/utils.h index cb0b7e03..20163a34 100644 --- a/utils/include/utils.h +++ b/utils/include/utils.h @@ -332,3 +332,15 @@ template } #define DBGLINE fprintf(stderr, "%d %d @%s\n", getpid(), __LINE__, __FILE__) + +bool IsCharFullWidth(wchar_t c); +bool IsCharPrefix(wchar_t c); +bool IsCharSuffix(wchar_t c); +bool IsCharXxxfix(wchar_t c); + +bool IsCharUnstableWidth(wchar_t c); + +size_t StrVisualLength(const wchar_t *pwz, size_t n); +void StrVisualTruncateLeft(wchar_t *pwz, size_t &n, size_t vl_max); +void StrVisualTruncateRight(wchar_t *pwz, size_t &n, size_t vl_max); +void StrVisualTruncateCenter(wchar_t *pwz, size_t &n, size_t vl_max); diff --git a/utils/src/CharClasses.cpp b/utils/src/CharClasses.cpp new file mode 100644 index 00000000..3a0b4142 --- /dev/null +++ b/utils/src/CharClasses.cpp @@ -0,0 +1,507 @@ +// this file autogenerated by IsUnstableWidthChar_mk.cpp + +#include + +bool IsCharFullWidth(wchar_t c) +{ + return (c >= 0x20 && c <= 0x7e) + || (c >= 0xa1 && c <= 0xa8) + || (c == 0xaa) + || (c >= 0xac && c <= 0xb4) + || (c >= 0xb6 && c <= 0xba) + || (c >= 0xbc && c <= 0xbf) + || (c == 0xc6) + || (c == 0xd0) + || (c == 0xd7 || c == 0xd8) + || (c >= 0xde && c <= 0xe1) + || (c == 0xe6) + || (c >= 0xe8 && c <= 0xea) + || (c == 0xec || c == 0xed) + || (c == 0xf0) + || (c == 0xf2 || c == 0xf3) + || (c >= 0xf7 && c <= 0xfa) + || (c == 0xfc) + || (c == 0xfe) + || (c == 0x101) + || (c == 0x111) + || (c == 0x113) + || (c == 0x11b) + || (c == 0x126 || c == 0x127) + || (c == 0x12b) + || (c >= 0x131 && c <= 0x133) + || (c == 0x138) + || (c >= 0x13f && c <= 0x142) + || (c == 0x144) + || (c >= 0x148 && c <= 0x14b) + || (c == 0x14d) + || (c == 0x152 || c == 0x153) + || (c == 0x166 || c == 0x167) + || (c == 0x16b) + || (c == 0x1ce) + || (c == 0x1d0) + || (c == 0x1d2) + || (c == 0x1d4) + || (c == 0x1d6) + || (c == 0x1d8) + || (c == 0x1da) + || (c == 0x1dc) + || (c == 0x251) + || (c == 0x261) + || (c == 0x2c4) + || (c == 0x2c7) + || (c >= 0x2c9 && c <= 0x2cb) + || (c == 0x2cd) + || (c == 0x2d0) + || (c >= 0x2d8 && c <= 0x2db) + || (c == 0x2dd) + || (c == 0x2df) + || (c >= 0x300 && c <= 0x36f) + || (c >= 0x391 && c <= 0x3a1) + || (c >= 0x3a3 && c <= 0x3a9) + || (c >= 0x3b1 && c <= 0x3c1) + || (c >= 0x3c3 && c <= 0x3c9) + || (c == 0x401) + || (c >= 0x410 && c <= 0x44f) + || (c == 0x451) + || (c >= 0x1100 && c <= 0x115f) + || (c == 0x2010) + || (c >= 0x2013 && c <= 0x2016) + || (c == 0x2018 || c == 0x2019) + || (c == 0x201c || c == 0x201d) + || (c >= 0x2020 && c <= 0x2022) + || (c >= 0x2024 && c <= 0x2027) + || (c == 0x2030) + || (c == 0x2032 || c == 0x2033) + || (c == 0x2035) + || (c == 0x203b) + || (c == 0x203e) + || (c == 0x2074) + || (c == 0x207f) + || (c >= 0x2081 && c <= 0x2084) + || (c == 0x20a9) + || (c == 0x20ac) + || (c == 0x2103) + || (c == 0x2105) + || (c == 0x2109) + || (c == 0x2113) + || (c == 0x2116) + || (c == 0x2121 || c == 0x2122) + || (c == 0x2126) + || (c == 0x212b) + || (c == 0x2153 || c == 0x2154) + || (c >= 0x215b && c <= 0x215e) + || (c >= 0x2160 && c <= 0x216b) + || (c >= 0x2170 && c <= 0x2179) + || (c == 0x2189) + || (c >= 0x2190 && c <= 0x243f) + || (c >= 0x2460 && c <= 0x24e9) + || (c >= 0x24eb && c <= 0x254b) + || (c >= 0x2550 && c <= 0x2573) + || (c >= 0x2580 && c <= 0x258f) + || (c >= 0x2592 && c <= 0x2595) + || (c >= 0x25a0 && c <= 0x26ff) + || (c == 0x2705) + || (c == 0x270a || c == 0x270b) + || (c == 0x2728) + || (c == 0x273d) + || (c == 0x274c) + || (c == 0x274e) + || (c >= 0x2753 && c <= 0x2755) + || (c == 0x2757) + || (c >= 0x2776 && c <= 0x277f) + || (c >= 0x2795 && c <= 0x2797) + || (c == 0x27b0) + || (c == 0x27bf) + || (c >= 0x27e6 && c <= 0x27ed) + || (c == 0x2985 || c == 0x2986) + || (c == 0x2b1b || c == 0x2b1c) + || (c == 0x2b50) + || (c >= 0x2b55 && c <= 0x2b59) + || (c >= 0x2e80 && c <= 0x2e99) + || (c >= 0x2e9b && c <= 0x2ef3) + || (c >= 0x2f00 && c <= 0x2fd5) + || (c >= 0x2ff0 && c <= 0x2ffb) + || (c >= 0x3000 && c <= 0x303e) + || (c >= 0x3041 && c <= 0x3096) + || (c >= 0x3099 && c <= 0x30ff) + || (c >= 0x3105 && c <= 0x312e) + || (c >= 0x3131 && c <= 0x318e) + || (c >= 0x3190 && c <= 0x31ba) + || (c >= 0x31c0 && c <= 0x31e3) + || (c >= 0x31f0 && c <= 0x321e) + || (c >= 0x3220 && c <= 0x32fe) + || (c >= 0x3300 && c <= 0x4dbf) + || (c >= 0x4e00 && c <= 0xa48c) + || (c >= 0xa490 && c <= 0xa4c6) + || (c >= 0xa960 && c <= 0xa97c) + || (c >= 0xac00 && c <= 0xd7a3) + || (c >= 0xe000 && c <= 0xfaff) + || (c >= 0xfe00 && c <= 0xfe19) + || (c >= 0xfe30 && c <= 0xfe52) + || (c >= 0xfe54 && c <= 0xfe66) + || (c >= 0xfe68 && c <= 0xfe6b) + || (c >= 0xff01 && c <= 0xffbe) + || (c >= 0xffc2 && c <= 0xffc7) + || (c >= 0xffca && c <= 0xffcf) + || (c >= 0xffd2 && c <= 0xffd7) + || (c >= 0xffda && c <= 0xffdc) + || (c >= 0xffe0 && c <= 0xffe6) + || (c >= 0xffe8 && c <= 0xffee) + || (c == 0xfffd) + || (c == 0x16fe0 || c == 0x16fe1) + || (c >= 0x17000 && c <= 0x187ec) + || (c >= 0x18800 && c <= 0x18af2) + || (c >= 0x1b000 && c <= 0x1b11e) + || (c >= 0x1b170 && c <= 0x1b2fb) + || (c == 0x1f004) + || (c == 0x1f0cf) + || (c >= 0x1f100 && c <= 0x1f10a) + || (c >= 0x1f110 && c <= 0x1f12d) + || (c >= 0x1f130 && c <= 0x1f169) + || (c >= 0x1f170 && c <= 0x1f1ac) + || (c >= 0x1f200 && c <= 0x1f202) + || (c >= 0x1f210 && c <= 0x1f23b) + || (c >= 0x1f240 && c <= 0x1f248) + || (c == 0x1f250 || c == 0x1f251) + || (c >= 0x1f260 && c <= 0x1f265) + || (c >= 0x1f300 && c <= 0x1f320) + || (c >= 0x1f32d && c <= 0x1f335) + || (c >= 0x1f337 && c <= 0x1f37c) + || (c >= 0x1f37e && c <= 0x1f393) + || (c >= 0x1f3a0 && c <= 0x1f3ca) + || (c >= 0x1f3cf && c <= 0x1f3d3) + || (c >= 0x1f3e0 && c <= 0x1f3f0) + || (c == 0x1f3f4) + || (c >= 0x1f3f8 && c <= 0x1f43e) + || (c == 0x1f440) + || (c >= 0x1f442 && c <= 0x1f4fc) + || (c >= 0x1f4ff && c <= 0x1f53d) + || (c >= 0x1f54b && c <= 0x1f54e) + || (c >= 0x1f550 && c <= 0x1f567) + || (c == 0x1f57a) + || (c == 0x1f595 || c == 0x1f596) + || (c == 0x1f5a4) + || (c >= 0x1f5fb && c <= 0x1f64f) + || (c >= 0x1f680 && c <= 0x1f6c5) + || (c == 0x1f6cc) + || (c >= 0x1f6d0 && c <= 0x1f6d2) + || (c == 0x1f6eb || c == 0x1f6ec) + || (c >= 0x1f6f4 && c <= 0x1f6f8) + || (c >= 0x1f910 && c <= 0x1f93e) + || (c >= 0x1f940 && c <= 0x1f94c) + || (c >= 0x1f950 && c <= 0x1f96b) + || (c >= 0x1f980 && c <= 0x1f997) + || (c == 0x1f9c0) + || (c >= 0x1f9d0 && c <= 0x1f9e6) + || (c >= 0x20000 && c <= 0x2fffd) + || (c >= 0x30000 && c <= 0x3fffd) + || (c >= 0xe0100 && c <= 0xe01ef) + || (c >= 0xf0000 && c <= 0xffffd) + || (c >= 0x100000 && c <= 0x10fffd) + ; +} + +bool IsCharPrefix(wchar_t c) +{ + return (c >= 0xd800 && c <= 0xdfff) + ; +} + +bool IsCharSuffix(wchar_t c) +{ + return (c >= 0x300 && c <= 0x36f) + || (c >= 0x483 && c <= 0x487) + || (c >= 0x591 && c <= 0x5bd) + || (c == 0x5bf) + || (c == 0x5c1 || c == 0x5c2) + || (c == 0x5c4 || c == 0x5c5) + || (c == 0x5c7) + || (c >= 0x610 && c <= 0x61a) + || (c == 0x620) + || (c >= 0x622 && c <= 0x65f) + || (c >= 0x66e && c <= 0x673) + || (c >= 0x675 && c <= 0x6d3) + || (c >= 0x6d5 && c <= 0x6dc) + || (c >= 0x6df && c <= 0x6e4) + || (c == 0x6e7 || c == 0x6e8) + || (c >= 0x6ea && c <= 0x6ef) + || (c >= 0x6fa && c <= 0x6fc) + || (c == 0x6ff) + || (c >= 0x710 && c <= 0x74a) + || (c >= 0x74d && c <= 0x77f) + || (c >= 0x7a6 && c <= 0x7b0) + || (c >= 0x7ca && c <= 0x7f3) + || (c == 0x7fa) + || (c >= 0x816 && c <= 0x819) + || (c >= 0x81b && c <= 0x823) + || (c >= 0x825 && c <= 0x827) + || (c >= 0x829 && c <= 0x82d) + || (c >= 0x840 && c <= 0x855) + || (c >= 0x859 && c <= 0x85b) + || (c == 0x860) + || (c >= 0x862 && c <= 0x865) + || (c >= 0x867 && c <= 0x86a) + || (c >= 0x8a0 && c <= 0x8ac) + || (c >= 0x8ae && c <= 0x8b4) + || (c >= 0x8b6 && c <= 0x8bd) + || (c >= 0x8d4 && c <= 0x8e1) + || (c >= 0x8e3 && c <= 0x903) + || (c >= 0x93a && c <= 0x93c) + || (c >= 0x93e && c <= 0x94f) + || (c >= 0x951 && c <= 0x957) + || (c == 0x962 || c == 0x963) + || (c >= 0x981 && c <= 0x983) + || (c == 0x9bc) + || (c >= 0x9be && c <= 0x9c4) + || (c == 0x9c7 || c == 0x9c8) + || (c >= 0x9cb && c <= 0x9cd) + || (c == 0x9d7) + || (c == 0x9e2 || c == 0x9e3) + || (c >= 0xa01 && c <= 0xa03) + || (c == 0xa3c) + || (c >= 0xa3e && c <= 0xa42) + || (c == 0xa47 || c == 0xa48) + || (c >= 0xa4b && c <= 0xa4d) + || (c == 0xa51) + || (c == 0xa70 || c == 0xa71) + || (c == 0xa75) + || (c >= 0xa81 && c <= 0xa83) + || (c == 0xabc) + || (c >= 0xabe && c <= 0xac5) + || (c >= 0xac7 && c <= 0xac9) + || (c >= 0xacb && c <= 0xacd) + || (c == 0xae2 || c == 0xae3) + || (c >= 0xafa && c <= 0xaff) + || (c >= 0xb01 && c <= 0xb03) + || (c == 0xb3c) + || (c >= 0xb3e && c <= 0xb44) + || (c == 0xb47 || c == 0xb48) + || (c >= 0xb4b && c <= 0xb4d) + || (c == 0xb56 || c == 0xb57) + || (c == 0xb62 || c == 0xb63) + || (c == 0xb82) + || (c >= 0xbbe && c <= 0xbc2) + || (c >= 0xbc6 && c <= 0xbc8) + || (c >= 0xbca && c <= 0xbcd) + || (c == 0xbd7) + || (c >= 0xc00 && c <= 0xc03) + || (c >= 0xc3e && c <= 0xc44) + || (c >= 0xc46 && c <= 0xc48) + || (c >= 0xc4a && c <= 0xc4d) + || (c == 0xc55 || c == 0xc56) + || (c == 0xc62 || c == 0xc63) + || (c >= 0xc81 && c <= 0xc83) + || (c == 0xcbc) + || (c >= 0xcbe && c <= 0xcc4) + || (c >= 0xcc6 && c <= 0xcc8) + || (c >= 0xcca && c <= 0xccd) + || (c == 0xcd5 || c == 0xcd6) + || (c == 0xce2 || c == 0xce3) + || (c >= 0xd00 && c <= 0xd03) + || (c == 0xd3b || c == 0xd3c) + || (c >= 0xd3e && c <= 0xd44) + || (c >= 0xd46 && c <= 0xd48) + || (c >= 0xd4a && c <= 0xd4d) + || (c == 0xd57) + || (c == 0xd62 || c == 0xd63) + || (c == 0xd82 || c == 0xd83) + || (c == 0xdca) + || (c >= 0xdcf && c <= 0xdd4) + || (c == 0xdd6) + || (c >= 0xdd8 && c <= 0xddf) + || (c == 0xdf2 || c == 0xdf3) + || (c == 0xe31) + || (c >= 0xe34 && c <= 0xe3a) + || (c >= 0xe47 && c <= 0xe4e) + || (c == 0xeb1) + || (c >= 0xeb4 && c <= 0xeb9) + || (c == 0xebb || c == 0xebc) + || (c >= 0xec8 && c <= 0xecd) + || (c == 0xf18 || c == 0xf19) + || (c == 0xf35) + || (c == 0xf37) + || (c == 0xf39) + || (c == 0xf3e || c == 0xf3f) + || (c >= 0xf71 && c <= 0xf84) + || (c == 0xf86 || c == 0xf87) + || (c >= 0xf8d && c <= 0xf97) + || (c >= 0xf99 && c <= 0xfbc) + || (c == 0xfc6) + || (c >= 0x102b && c <= 0x103e) + || (c >= 0x1056 && c <= 0x1059) + || (c >= 0x105e && c <= 0x1060) + || (c >= 0x1062 && c <= 0x1064) + || (c >= 0x1067 && c <= 0x106d) + || (c >= 0x1071 && c <= 0x1074) + || (c >= 0x1082 && c <= 0x108d) + || (c == 0x108f) + || (c >= 0x109a && c <= 0x109d) + || (c >= 0x135d && c <= 0x135f) + || (c >= 0x1712 && c <= 0x1714) + || (c >= 0x1732 && c <= 0x1734) + || (c == 0x1752 || c == 0x1753) + || (c == 0x1772 || c == 0x1773) + || (c >= 0x17b4 && c <= 0x17d3) + || (c == 0x17dd) + || (c == 0x1807) + || (c >= 0x180a && c <= 0x180d) + || (c >= 0x1820 && c <= 0x1877) + || (c >= 0x1885 && c <= 0x18aa) + || (c >= 0x1920 && c <= 0x192b) + || (c >= 0x1930 && c <= 0x193b) + || (c >= 0x1a17 && c <= 0x1a1b) + || (c >= 0x1a55 && c <= 0x1a5e) + || (c >= 0x1a60 && c <= 0x1a7c) + || (c == 0x1a7f) + || (c >= 0x1ab0 && c <= 0x1b04) + || (c >= 0x1b34 && c <= 0x1b44) + || (c >= 0x1b6b && c <= 0x1b73) + || (c >= 0x1b80 && c <= 0x1b82) + || (c >= 0x1ba1 && c <= 0x1bad) + || (c >= 0x1be6 && c <= 0x1bf3) + || (c >= 0x1c24 && c <= 0x1c37) + || (c >= 0x1cd0 && c <= 0x1cd2) + || (c >= 0x1cd4 && c <= 0x1ce8) + || (c == 0x1ced) + || (c >= 0x1cf2 && c <= 0x1cf4) + || (c >= 0x1cf7 && c <= 0x1cf9) + || (c >= 0x1dc0 && c <= 0x1dff) + || (c == 0x200d) + || (c >= 0x20d0 && c <= 0x20ff) + || (c >= 0x2190 && c <= 0x243f) + || (c >= 0x25a0 && c <= 0x26ff) + || (c >= 0x2cef && c <= 0x2cf1) + || (c == 0x2d7f) + || (c >= 0x2de0 && c <= 0x2dff) + || (c >= 0x302a && c <= 0x302f) + || (c == 0x3099 || c == 0x309a) + || (c == 0xa66f) + || (c >= 0xa674 && c <= 0xa67d) + || (c == 0xa69e || c == 0xa69f) + || (c == 0xa6f0 || c == 0xa6f1) + || (c == 0xa802) + || (c == 0xa806) + || (c == 0xa80b) + || (c >= 0xa823 && c <= 0xa827) + || (c >= 0xa840 && c <= 0xa872) + || (c == 0xa880 || c == 0xa881) + || (c >= 0xa8b4 && c <= 0xa8c5) + || (c >= 0xa8e0 && c <= 0xa8f1) + || (c >= 0xa926 && c <= 0xa92d) + || (c >= 0xa947 && c <= 0xa953) + || (c >= 0xa980 && c <= 0xa983) + || (c >= 0xa9b3 && c <= 0xa9c0) + || (c == 0xa9e5) + || (c >= 0xaa29 && c <= 0xaa36) + || (c == 0xaa43) + || (c == 0xaa4c || c == 0xaa4d) + || (c >= 0xaa7b && c <= 0xaa7d) + || (c == 0xaab0) + || (c >= 0xaab2 && c <= 0xaab4) + || (c == 0xaab7 || c == 0xaab8) + || (c == 0xaabe || c == 0xaabf) + || (c == 0xaac1) + || (c >= 0xaaeb && c <= 0xaaef) + || (c == 0xaaf5 || c == 0xaaf6) + || (c >= 0xabe3 && c <= 0xabea) + || (c == 0xabec || c == 0xabed) + || (c == 0xfb1e) + || (c >= 0xfe00 && c <= 0xfe0f) + || (c >= 0xfe20 && c <= 0xfe2f) + || (c == 0x101fd) + || (c == 0x102e0) + || (c >= 0x10376 && c <= 0x1037a) + || (c >= 0x10a01 && c <= 0x10a03) + || (c == 0x10a05 || c == 0x10a06) + || (c >= 0x10a0c && c <= 0x10a0f) + || (c >= 0x10a38 && c <= 0x10a3a) + || (c == 0x10a3f) + || (c >= 0x10ac0 && c <= 0x10ac5) + || (c == 0x10ac7) + || (c == 0x10ac9 || c == 0x10aca) + || (c >= 0x10acd && c <= 0x10ae1) + || (c >= 0x10ae4 && c <= 0x10ae6) + || (c >= 0x10aeb && c <= 0x10aef) + || (c >= 0x10b80 && c <= 0x10b91) + || (c >= 0x10ba9 && c <= 0x10bae) + || (c >= 0x11000 && c <= 0x11002) + || (c >= 0x11038 && c <= 0x11046) + || (c >= 0x1107f && c <= 0x11082) + || (c >= 0x110b0 && c <= 0x110ba) + || (c >= 0x11100 && c <= 0x11102) + || (c >= 0x11127 && c <= 0x11134) + || (c == 0x11173) + || (c >= 0x11180 && c <= 0x11182) + || (c >= 0x111b3 && c <= 0x111c0) + || (c >= 0x111ca && c <= 0x111cc) + || (c >= 0x1122c && c <= 0x11237) + || (c == 0x1123e) + || (c >= 0x112df && c <= 0x112ea) + || (c >= 0x11300 && c <= 0x11303) + || (c == 0x1133c) + || (c >= 0x1133e && c <= 0x11344) + || (c == 0x11347 || c == 0x11348) + || (c >= 0x1134b && c <= 0x1134d) + || (c == 0x11357) + || (c == 0x11362 || c == 0x11363) + || (c >= 0x11366 && c <= 0x1136c) + || (c >= 0x11370 && c <= 0x11374) + || (c >= 0x11435 && c <= 0x11446) + || (c >= 0x114b0 && c <= 0x114c3) + || (c >= 0x115af && c <= 0x115b5) + || (c >= 0x115b8 && c <= 0x115c0) + || (c == 0x115dc || c == 0x115dd) + || (c >= 0x11630 && c <= 0x11640) + || (c >= 0x116ab && c <= 0x116b7) + || (c >= 0x1171d && c <= 0x1172b) + || (c >= 0x11a01 && c <= 0x11a0a) + || (c >= 0x11a33 && c <= 0x11a39) + || (c >= 0x11a3b && c <= 0x11a3e) + || (c == 0x11a47) + || (c >= 0x11a51 && c <= 0x11a5b) + || (c >= 0x11a8a && c <= 0x11a99) + || (c >= 0x11c2f && c <= 0x11c36) + || (c >= 0x11c38 && c <= 0x11c3f) + || (c >= 0x11c92 && c <= 0x11ca7) + || (c >= 0x11ca9 && c <= 0x11cb6) + || (c >= 0x11d31 && c <= 0x11d36) + || (c == 0x11d3a) + || (c == 0x11d3c || c == 0x11d3d) + || (c >= 0x11d3f && c <= 0x11d45) + || (c == 0x11d47) + || (c >= 0x16af0 && c <= 0x16af4) + || (c >= 0x16b30 && c <= 0x16b36) + || (c >= 0x16f51 && c <= 0x16f7e) + || (c >= 0x16f8f && c <= 0x16f92) + || (c == 0x1bc9d || c == 0x1bc9e) + || (c >= 0x1d165 && c <= 0x1d169) + || (c >= 0x1d16d && c <= 0x1d172) + || (c >= 0x1d17b && c <= 0x1d182) + || (c >= 0x1d185 && c <= 0x1d18b) + || (c >= 0x1d1aa && c <= 0x1d1ad) + || (c >= 0x1d242 && c <= 0x1d244) + || (c >= 0x1da00 && c <= 0x1da36) + || (c >= 0x1da3b && c <= 0x1da6c) + || (c == 0x1da75) + || (c == 0x1da84) + || (c >= 0x1da9b && c <= 0x1da9f) + || (c >= 0x1daa1 && c <= 0x1daaf) + || (c >= 0x1e000 && c <= 0x1e006) + || (c >= 0x1e008 && c <= 0x1e018) + || (c >= 0x1e01b && c <= 0x1e021) + || (c == 0x1e023 || c == 0x1e024) + || (c >= 0x1e026 && c <= 0x1e02a) + || (c >= 0x1e8d0 && c <= 0x1e8d6) + || (c >= 0x1e900 && c <= 0x1e94a) + || (c >= 0xe0100 && c <= 0xe01ef) + ; +} + +bool IsCharXxxfix(wchar_t c) +{ + return IsCharPrefix(c) || IsCharSuffix(c); +} +bool IsCharUnstableWidth(wchar_t c) +{ + return IsCharFullWidth(c) || IsCharXxxfix(c); +} diff --git a/utils/src/CharClasses_mk.cpp b/utils/src/CharClasses_mk.cpp new file mode 100644 index 00000000..cc78f61f --- /dev/null +++ b/utils/src/CharClasses_mk.cpp @@ -0,0 +1,110 @@ +#include "unicode/uchar.h" +#include "unicode/utypes.h" +#include "unicode/stringoptions.h" +#include "stdio.h" + +/// Usage: +/// g++ -O2 ./CharClasses_mk.cpp -o /tmp/CharClasses_mk -licuuc && /tmp/CharClasses_mk > CharClasses.cpp + +static bool IsUnstableWidthBlock(int block) +{ + return block == UBLOCK_ARROWS + || block == UBLOCK_MATHEMATICAL_OPERATORS + || block == UBLOCK_MISCELLANEOUS_TECHNICAL + || block == UBLOCK_CONTROL_PICTURES + || block == UBLOCK_GEOMETRIC_SHAPES + || block == UBLOCK_MISCELLANEOUS_SYMBOLS + || block == UBLOCK_COMBINING_DIACRITICAL_MARKS + || block == UBLOCK_COMBINING_MARKS_FOR_SYMBOLS + || block == UBLOCK_COMBINING_HALF_MARKS + || block == UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT + || block == UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED + ; +} + +template + static void WriteFunc(const char *name, FN fn) +{ + UChar32 c, last = 0x10ffff; + UChar32 start = 0; + bool first = true; + printf("bool %s(wchar_t c)\n", name); + printf("{\n"); + printf("\treturn "); + for (c = 1; c <= last + 1; ++c) { + const bool matched = (c <= last) && fn(c); + if (matched) { + if (!start) { + start = c; + } + + } else if (start) { + if (first) { + first = false; + } else { + printf("\t || "); + } + if (start + 2 == c) { + printf("(c == 0x%x || c == 0x%x)\n", (unsigned int)start, (unsigned int)c - 1); + } else if (start + 1 < c) { + printf("(c >= 0x%x && c <= 0x%x)\n", (unsigned int)start, (unsigned int)c - 1); + } else { + printf("(c == 0x%x)\n", (unsigned int)start); + } + start = 0; + } + } + printf("\t;\n"); + printf("}\n\n"); +} + +int main() +{ +// printf("%u\n", u_getIntPropertyValue(0xcbe, UCHAR_GENERAL_CATEGORY)); +// return -1; + UChar32 c, last = 0x10ffff; + UChar32 unstable_start = 0; + bool first = true; + printf("// this file autogenerated by IsUnstableWidthChar_mk.cpp\n\n"); + printf("#include \n\n"); + + WriteFunc("IsCharFullWidth", [](wchar_t c)->bool { + const auto block = u_getIntPropertyValue(c, UCHAR_BLOCK); + return u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH) != 0 + || block == UBLOCK_ARROWS + || block == UBLOCK_MATHEMATICAL_OPERATORS + || block == UBLOCK_MISCELLANEOUS_TECHNICAL + || block == UBLOCK_CONTROL_PICTURES + || block == UBLOCK_GEOMETRIC_SHAPES + || block == UBLOCK_MISCELLANEOUS_SYMBOLS; + }); + + WriteFunc("IsCharPrefix", [](wchar_t c)->bool { + const auto cat = u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY); + return (cat == U_SURROGATE); + }); + + WriteFunc("IsCharSuffix", [](wchar_t c)->bool { + const auto block = u_getIntPropertyValue(c, UCHAR_BLOCK); + const auto jt = u_getIntPropertyValue(c, UCHAR_JOINING_TYPE); + const auto cat = u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY); + return ( (jt != U_JT_NON_JOINING && jt != U_JT_TRANSPARENT) + || cat == U_NON_SPACING_MARK || cat == U_COMBINING_SPACING_MARK + || IsUnstableWidthBlock(u_getIntPropertyValue(c, UCHAR_BLOCK)) + || block == UBLOCK_COMBINING_DIACRITICAL_MARKS + || block == UBLOCK_COMBINING_MARKS_FOR_SYMBOLS + || block == UBLOCK_COMBINING_HALF_MARKS + || block == UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT + || block == UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED); + }); + + printf("bool IsCharXxxfix(wchar_t c)\n"); + printf("{\n"); + printf("\treturn IsCharPrefix(c) || IsCharSuffix(c);\n"); + printf("}\n"); + + printf("bool IsCharUnstableWidth(wchar_t c)\n"); + printf("{\n"); + printf("\treturn IsCharFullWidth(c) || IsCharXxxfix(c);\n"); + printf("}\n"); +} diff --git a/utils/src/StrVisual.cpp b/utils/src/StrVisual.cpp new file mode 100644 index 00000000..0ded4932 --- /dev/null +++ b/utils/src/StrVisual.cpp @@ -0,0 +1,105 @@ +#include "utils.h" + +size_t StrVisualLength(const wchar_t *pwz, size_t n) +{ + size_t out = 0; + for (size_t i = 0; i < n; ++i) { + if (IsCharFullWidth(*pwz)) { + out+= 2; + } else if ((i == n - 1 || !IsCharPrefix(pwz[i])) && (i == 0 || !IsCharSuffix(pwz[i]))) { + ++out; + } + } + return out; +} + +void StrVisualTruncateLeft(wchar_t *pwz, size_t &n, size_t vl_max) +{ + size_t vl = StrVisualLength(pwz, n); + if (vl <= vl_max || n < 3) { + return; + } + + for (size_t ofs = 3; ofs < n; ++ofs) { + if (!IsCharXxxfix(pwz[ofs]) && StrVisualLength(pwz + ofs, n - ofs) + 3 <= vl_max) { + n-= ofs; + wmemmove(pwz + 3, pwz + ofs, n); + n+= 3; + wmemcpy(pwz, L"...", 3); + return; + } + } + wcsncpy(pwz, L"...", vl_max); + n = vl_max; +} + +void StrVisualTruncateRight(wchar_t *pwz, size_t &n, size_t vl_max) +{ + size_t vl = StrVisualLength(pwz, n); + if (vl <= vl_max || n < 3) { + return; + } + + n-= 3; // pre-reserve space for ... + do { + while (n > 0 && IsCharXxxfix(pwz[n - 1])) { + --n; + } + if (n == 0) { + break; + } + --n; + } while (StrVisualLength(pwz, n) + 3 > vl_max); + + wmemcpy(&pwz[n], L"...", 3); + n+= 3; +} + +void StrVisualTruncateCenter(wchar_t *pwz, size_t &n, size_t vl_max) +{ + size_t vl = StrVisualLength(pwz, n); + if (vl <= vl_max || n < 3) { + return; + } + + auto cut_start = n / 2; + if (cut_start > 0) { + --cut_start; + } + if (cut_start > 0) { + --cut_start; + } + while (cut_start > 0 && IsCharXxxfix(pwz[cut_start])) { + --cut_start; + } + auto cut_end = cut_start + 3; + while (cut_end < n && IsCharXxxfix(pwz[cut_end])) { + ++cut_end; + } + + while (StrVisualLength(pwz, cut_start) + StrVisualLength(pwz + cut_end, n - cut_end) + 3 > vl_max) { + if (cut_start > 0) { + --cut_start; + while (cut_start > 0 && IsCharXxxfix(pwz[cut_start])) { + --cut_start; + } + if (StrVisualLength(pwz, cut_start) + StrVisualLength(pwz + cut_end, n - cut_end) + 3 <= vl_max) { + break; + } + } + if (cut_end < n) { + ++cut_end; + while (cut_end < n && IsCharXxxfix(pwz[cut_end])) { + ++cut_end; + } + } + } + + wmemmove(&pwz[cut_start + 3], &pwz[cut_end], n - cut_end); + wmemcpy(&pwz[cut_start], L"...", 3); + n-= (cut_end - cut_start); + n+= 3; +} + + + -- cgit v1.2.3