diff options
author | elfmz <fenix1905@tut.by> | 2022-10-11 23:21:32 +0300 |
---|---|---|
committer | elfmz <fenix1905@tut.by> | 2022-10-12 00:18:27 +0300 |
commit | 9c01830a901898402bcabe777fa155534729f239 (patch) | |
tree | efadb3c690b4c663d92135050a0a2f731fc011ad | |
parent | 83a268c783d50999c6b3e95fcdb9301e0b1a2288 (diff) |
initial utils for visual string length improvementsstr-visual
-rw-r--r-- | WinPort/CMakeLists.txt | 1 | ||||
-rw-r--r-- | WinPort/src/Backend/TTY/IsUnstableWidthChar_mk.cpp | 72 | ||||
-rw-r--r-- | WinPort/src/Backend/TTY/TTYBackend.cpp | 4 | ||||
-rw-r--r-- | far2l/src/mix/strmix.cpp | 115 | ||||
-rw-r--r-- | utils/CMakeLists.txt | 2 | ||||
-rw-r--r-- | utils/include/utils.h | 12 | ||||
-rw-r--r-- | utils/src/CharClasses.cpp (renamed from WinPort/src/Backend/TTY/IsUnstableWidthChar.cpp) | 374 | ||||
-rw-r--r-- | utils/src/CharClasses_mk.cpp | 110 | ||||
-rw-r--r-- | utils/src/StrVisual.cpp | 105 |
9 files changed, 483 insertions, 312 deletions
diff --git a/WinPort/CMakeLists.txt b/WinPort/CMakeLists.txt index 2467b440..62d8320b 100644 --- a/WinPort/CMakeLists.txt +++ b/WinPort/CMakeLists.txt @@ -36,7 +36,6 @@ src/Backend/TTY/TTYOutput.cpp src/Backend/TTY/TTYFar2lClipboardBackend.cpp src/Backend/TTY/TTYNegotiateFar2l.cpp src/Backend/TTY/TTYXGlue.cpp -src/Backend/TTY/IsUnstableWidthChar.cpp ) add_library (WinPort ${SOURCES}) diff --git a/WinPort/src/Backend/TTY/IsUnstableWidthChar_mk.cpp b/WinPort/src/Backend/TTY/IsUnstableWidthChar_mk.cpp deleted file mode 100644 index e5684611..00000000 --- a/WinPort/src/Backend/TTY/IsUnstableWidthChar_mk.cpp +++ /dev/null @@ -1,72 +0,0 @@ -#include "unicode/uchar.h" -#include "unicode/utypes.h" -#include "unicode/stringoptions.h" -#include "stdio.h" - -/// Usage: -/// g++ -O2 ./IsUnstableWidthChar_mk.cpp -o /tmp/IsUnstableWidthChar_mk -licuuc && /tmp/IsUnstableWidthChar_mk > IsUnstableWidthChar.cpp - -static bool IsUnstableWidthBlock(int block) -{ - return block == UBLOCK_ARROWS - || block == UBLOCK_MATHEMATICAL_OPERATORS - || block == UBLOCK_MISCELLANEOUS_TECHNICAL - || block == UBLOCK_CONTROL_PICTURES - || block == UBLOCK_GEOMETRIC_SHAPES - || block == UBLOCK_MISCELLANEOUS_SYMBOLS - || block == UBLOCK_COMBINING_DIACRITICAL_MARKS - || block == UBLOCK_COMBINING_MARKS_FOR_SYMBOLS - || block == UBLOCK_COMBINING_HALF_MARKS - || block == UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT - || block == UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED - ; -} - -int main() -{ -// printf("%u\n", u_getIntPropertyValue(0xcbe, UCHAR_GENERAL_CATEGORY)); -// return -1; - UChar32 c, last = 0x10ffff; - UChar32 unstable_start = 0; - bool first = true; - printf("// this file autogenerated by IsUnstableWidthChar_mk.cpp\n\n"); - printf("#include <wchar.h>\n\n"); - printf("bool IsUnstableWidthChar(wchar_t c)\n"); - printf("{\n"); - printf("\treturn "); - for (c = 1; c <= last + 1; ++c) { - bool unstable = false; - if (c <= last) { - const auto width = u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH); - const auto jt = u_getIntPropertyValue(c, UCHAR_JOINING_TYPE); - const auto cat = u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY); - unstable = (width == U_EA_FULLWIDTH || width == U_EA_WIDE - || (jt != U_JT_NON_JOINING && jt != U_JT_TRANSPARENT) - || cat == U_NON_SPACING_MARK || cat == U_COMBINING_SPACING_MARK || cat == U_SURROGATE - || IsUnstableWidthBlock(u_getIntPropertyValue(c, UCHAR_BLOCK))); - } - - if (unstable) { - if (!unstable_start) { - unstable_start = c; - } - - } else if (unstable_start) { - if (first) { - first = false; - } else { - printf("\t || "); - } - if (unstable_start + 2 == c) { - printf("(c == 0x%x || c == 0x%x)\n", (unsigned int)unstable_start, (unsigned int)c - 1); - } else if (unstable_start + 1 < c) { - printf("(c >= 0x%x && c <= 0x%x)\n", (unsigned int)unstable_start, (unsigned int)c - 1); - } else { - printf("(c == 0x%x)\n", (unsigned int)unstable_start); - } - unstable_start = 0; - } - } - printf("\t;\n"); - printf("}\n"); -} diff --git a/WinPort/src/Backend/TTY/TTYBackend.cpp b/WinPort/src/Backend/TTY/TTYBackend.cpp index 7c893d83..62c52d4e 100644 --- a/WinPort/src/Backend/TTY/TTYBackend.cpp +++ b/WinPort/src/Backend/TTY/TTYBackend.cpp @@ -28,8 +28,6 @@ #include "FarTTY.h" #include "../FSClipboardBackend.h" -bool IsUnstableWidthChar(wchar_t c); - static volatile long s_terminal_size_change_id = 0; static TTYBackend * g_vtb = nullptr; @@ -386,7 +384,7 @@ bool TTYBackend::IsUnstableWidthCharCached(wchar_t c) { const size_t h = size_t(c) % ARRAYSIZE(_stable_width_chars_cache); if (_stable_width_chars_cache[h] != c) { - if (IsUnstableWidthChar(c)) { + if (IsCharUnstableWidth(c)) { return true; } _stable_width_chars_cache[h] = c; diff --git a/far2l/src/mix/strmix.cpp b/far2l/src/mix/strmix.cpp index 1e422050..190f9cd2 100644 --- a/far2l/src/mix/strmix.cpp +++ b/far2l/src/mix/strmix.cpp @@ -225,20 +225,13 @@ wchar_t* WINAPI TruncStrFromEnd(wchar_t *Str,int MaxLength) { assert(MaxLength >= 0); - MaxLength=Max(0, MaxLength); + MaxLength = Max(0, MaxLength); - if (Str) - { - int Length = StrLength(Str); - - if (Length > MaxLength) - { - if (MaxLength>3) - wmemcpy(Str+MaxLength-3, L"...", 3); - - Str[MaxLength]=0; - } - } + const size_t Len = StrLength(Str); + size_t n = Len; + StrVisualTruncateRight(Str, n, MaxLength); + assert(n <= Len); + Str[n] = 0; return Str; } @@ -248,27 +241,13 @@ wchar_t* WINAPI TruncStr(wchar_t *Str,int MaxLength) { assert(MaxLength >= 0); - MaxLength=Max(0, MaxLength); + MaxLength = Max(0, MaxLength); - if (Str) - { - int Length=StrLength(Str); - - if (MaxLength<0) - MaxLength=0; - - if (Length > MaxLength) - { - if (MaxLength>3) - { - wchar_t *MovePos = Str+Length-MaxLength+3; - wmemmove(Str+3, MovePos, StrLength(MovePos)+1); - wmemcpy(Str,L"...",3); - } - - Str[MaxLength]=0; - } - } + const size_t Len = StrLength(Str); + size_t n = Len; + StrVisualTruncateLeft(Str, n, MaxLength); + assert(n <= Len); + Str[n] = 0; return Str; } @@ -286,31 +265,13 @@ wchar_t* TruncStrFromCenter(wchar_t *Str, int MaxLength) { assert(MaxLength >= 0); - MaxLength=Max(0, MaxLength); - - if (Str) - { - int Length = StrLength(Str); - - if (MaxLength < 0) - MaxLength=0; - - if (Length > MaxLength) - { - const int DotsLen = 3; - - if (MaxLength > DotsLen) - { - int Len1 = (MaxLength - DotsLen) / 2; - int Len2 = MaxLength - DotsLen - Len1; - wmemcpy(Str + Len1, L"...", DotsLen); - wmemmove(Str + Len1 + DotsLen, Str + Length - Len2, Len2); - } - - Str[MaxLength] = 0; - } - } + MaxLength = Max(0, MaxLength); + const size_t Len = StrLength(Str); + size_t n = Len; + StrVisualTruncateCenter(Str, n, MaxLength); + assert(n <= Len); + Str[n] = 0; return Str; } @@ -324,44 +285,8 @@ FARString& TruncStrFromCenter(FARString &strStr, int MaxLength) wchar_t* WINAPI TruncPathStr(wchar_t *Str, int MaxLength) { - assert(MaxLength >= 0); - - MaxLength=Max(0, MaxLength); - - if (Str) - { - int nLength = (int)wcslen(Str); - - if ((MaxLength > 0) && (nLength > MaxLength) && (nLength >= 2)) - { - wchar_t *lpStart = nullptr; - -/* if (*Str && (Str[1] == L':') && IsSlash(Str[2])) - lpStart = Str+3; - else*/ - { - if ((Str[0] == GOOD_SLASH) && (Str[1] == GOOD_SLASH)) - { - if ((lpStart = const_cast<wchar_t*>(FirstSlash(Str+2))) ) - { - wchar_t *lpStart2=lpStart; - - if ((lpStart-Str < nLength) && ((lpStart=const_cast<wchar_t*>(FirstSlash(lpStart2+1))))) - lpStart++; - } - } - } - - if (!lpStart || (lpStart-Str > MaxLength-5)) - return TruncStr(Str, MaxLength); - - wchar_t *lpInPos = lpStart+3+(nLength-MaxLength); - wmemmove(lpStart+3, lpInPos, (wcslen(lpInPos)+1)); - wmemcpy(lpStart, L"...", 3); - } - } - - return Str; + // TODO + return TruncStr(Str, MaxLength); } diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 44333da9..224e5644 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -30,6 +30,8 @@ set(SOURCES src/TestPath.cpp src/PipeIPC.cpp src/PathParts.cpp + src/CharClasses.cpp + src/StrVisual.cpp ) add_library (utils ${SOURCES}) diff --git a/utils/include/utils.h b/utils/include/utils.h index cb0b7e03..20163a34 100644 --- a/utils/include/utils.h +++ b/utils/include/utils.h @@ -332,3 +332,15 @@ template <typename ARRAY_T, class CHAR_T> } #define DBGLINE fprintf(stderr, "%d %d @%s\n", getpid(), __LINE__, __FILE__) + +bool IsCharFullWidth(wchar_t c); +bool IsCharPrefix(wchar_t c); +bool IsCharSuffix(wchar_t c); +bool IsCharXxxfix(wchar_t c); + +bool IsCharUnstableWidth(wchar_t c); + +size_t StrVisualLength(const wchar_t *pwz, size_t n); +void StrVisualTruncateLeft(wchar_t *pwz, size_t &n, size_t vl_max); +void StrVisualTruncateRight(wchar_t *pwz, size_t &n, size_t vl_max); +void StrVisualTruncateCenter(wchar_t *pwz, size_t &n, size_t vl_max); diff --git a/WinPort/src/Backend/TTY/IsUnstableWidthChar.cpp b/utils/src/CharClasses.cpp index 636266c3..3a0b4142 100644 --- a/WinPort/src/Backend/TTY/IsUnstableWidthChar.cpp +++ b/utils/src/CharClasses.cpp @@ -2,7 +2,212 @@ #include <wchar.h> -bool IsUnstableWidthChar(wchar_t c) +bool IsCharFullWidth(wchar_t c) +{ + return (c >= 0x20 && c <= 0x7e) + || (c >= 0xa1 && c <= 0xa8) + || (c == 0xaa) + || (c >= 0xac && c <= 0xb4) + || (c >= 0xb6 && c <= 0xba) + || (c >= 0xbc && c <= 0xbf) + || (c == 0xc6) + || (c == 0xd0) + || (c == 0xd7 || c == 0xd8) + || (c >= 0xde && c <= 0xe1) + || (c == 0xe6) + || (c >= 0xe8 && c <= 0xea) + || (c == 0xec || c == 0xed) + || (c == 0xf0) + || (c == 0xf2 || c == 0xf3) + || (c >= 0xf7 && c <= 0xfa) + || (c == 0xfc) + || (c == 0xfe) + || (c == 0x101) + || (c == 0x111) + || (c == 0x113) + || (c == 0x11b) + || (c == 0x126 || c == 0x127) + || (c == 0x12b) + || (c >= 0x131 && c <= 0x133) + || (c == 0x138) + || (c >= 0x13f && c <= 0x142) + || (c == 0x144) + || (c >= 0x148 && c <= 0x14b) + || (c == 0x14d) + || (c == 0x152 || c == 0x153) + || (c == 0x166 || c == 0x167) + || (c == 0x16b) + || (c == 0x1ce) + || (c == 0x1d0) + || (c == 0x1d2) + || (c == 0x1d4) + || (c == 0x1d6) + || (c == 0x1d8) + || (c == 0x1da) + || (c == 0x1dc) + || (c == 0x251) + || (c == 0x261) + || (c == 0x2c4) + || (c == 0x2c7) + || (c >= 0x2c9 && c <= 0x2cb) + || (c == 0x2cd) + || (c == 0x2d0) + || (c >= 0x2d8 && c <= 0x2db) + || (c == 0x2dd) + || (c == 0x2df) + || (c >= 0x300 && c <= 0x36f) + || (c >= 0x391 && c <= 0x3a1) + || (c >= 0x3a3 && c <= 0x3a9) + || (c >= 0x3b1 && c <= 0x3c1) + || (c >= 0x3c3 && c <= 0x3c9) + || (c == 0x401) + || (c >= 0x410 && c <= 0x44f) + || (c == 0x451) + || (c >= 0x1100 && c <= 0x115f) + || (c == 0x2010) + || (c >= 0x2013 && c <= 0x2016) + || (c == 0x2018 || c == 0x2019) + || (c == 0x201c || c == 0x201d) + || (c >= 0x2020 && c <= 0x2022) + || (c >= 0x2024 && c <= 0x2027) + || (c == 0x2030) + || (c == 0x2032 || c == 0x2033) + || (c == 0x2035) + || (c == 0x203b) + || (c == 0x203e) + || (c == 0x2074) + || (c == 0x207f) + || (c >= 0x2081 && c <= 0x2084) + || (c == 0x20a9) + || (c == 0x20ac) + || (c == 0x2103) + || (c == 0x2105) + || (c == 0x2109) + || (c == 0x2113) + || (c == 0x2116) + || (c == 0x2121 || c == 0x2122) + || (c == 0x2126) + || (c == 0x212b) + || (c == 0x2153 || c == 0x2154) + || (c >= 0x215b && c <= 0x215e) + || (c >= 0x2160 && c <= 0x216b) + || (c >= 0x2170 && c <= 0x2179) + || (c == 0x2189) + || (c >= 0x2190 && c <= 0x243f) + || (c >= 0x2460 && c <= 0x24e9) + || (c >= 0x24eb && c <= 0x254b) + || (c >= 0x2550 && c <= 0x2573) + || (c >= 0x2580 && c <= 0x258f) + || (c >= 0x2592 && c <= 0x2595) + || (c >= 0x25a0 && c <= 0x26ff) + || (c == 0x2705) + || (c == 0x270a || c == 0x270b) + || (c == 0x2728) + || (c == 0x273d) + || (c == 0x274c) + || (c == 0x274e) + || (c >= 0x2753 && c <= 0x2755) + || (c == 0x2757) + || (c >= 0x2776 && c <= 0x277f) + || (c >= 0x2795 && c <= 0x2797) + || (c == 0x27b0) + || (c == 0x27bf) + || (c >= 0x27e6 && c <= 0x27ed) + || (c == 0x2985 || c == 0x2986) + || (c == 0x2b1b || c == 0x2b1c) + || (c == 0x2b50) + || (c >= 0x2b55 && c <= 0x2b59) + || (c >= 0x2e80 && c <= 0x2e99) + || (c >= 0x2e9b && c <= 0x2ef3) + || (c >= 0x2f00 && c <= 0x2fd5) + || (c >= 0x2ff0 && c <= 0x2ffb) + || (c >= 0x3000 && c <= 0x303e) + || (c >= 0x3041 && c <= 0x3096) + || (c >= 0x3099 && c <= 0x30ff) + || (c >= 0x3105 && c <= 0x312e) + || (c >= 0x3131 && c <= 0x318e) + || (c >= 0x3190 && c <= 0x31ba) + || (c >= 0x31c0 && c <= 0x31e3) + || (c >= 0x31f0 && c <= 0x321e) + || (c >= 0x3220 && c <= 0x32fe) + || (c >= 0x3300 && c <= 0x4dbf) + || (c >= 0x4e00 && c <= 0xa48c) + || (c >= 0xa490 && c <= 0xa4c6) + || (c >= 0xa960 && c <= 0xa97c) + || (c >= 0xac00 && c <= 0xd7a3) + || (c >= 0xe000 && c <= 0xfaff) + || (c >= 0xfe00 && c <= 0xfe19) + || (c >= 0xfe30 && c <= 0xfe52) + || (c >= 0xfe54 && c <= 0xfe66) + || (c >= 0xfe68 && c <= 0xfe6b) + || (c >= 0xff01 && c <= 0xffbe) + || (c >= 0xffc2 && c <= 0xffc7) + || (c >= 0xffca && c <= 0xffcf) + || (c >= 0xffd2 && c <= 0xffd7) + || (c >= 0xffda && c <= 0xffdc) + || (c >= 0xffe0 && c <= 0xffe6) + || (c >= 0xffe8 && c <= 0xffee) + || (c == 0xfffd) + || (c == 0x16fe0 || c == 0x16fe1) + || (c >= 0x17000 && c <= 0x187ec) + || (c >= 0x18800 && c <= 0x18af2) + || (c >= 0x1b000 && c <= 0x1b11e) + || (c >= 0x1b170 && c <= 0x1b2fb) + || (c == 0x1f004) + || (c == 0x1f0cf) + || (c >= 0x1f100 && c <= 0x1f10a) + || (c >= 0x1f110 && c <= 0x1f12d) + || (c >= 0x1f130 && c <= 0x1f169) + || (c >= 0x1f170 && c <= 0x1f1ac) + || (c >= 0x1f200 && c <= 0x1f202) + || (c >= 0x1f210 && c <= 0x1f23b) + || (c >= 0x1f240 && c <= 0x1f248) + || (c == 0x1f250 || c == 0x1f251) + || (c >= 0x1f260 && c <= 0x1f265) + || (c >= 0x1f300 && c <= 0x1f320) + || (c >= 0x1f32d && c <= 0x1f335) + || (c >= 0x1f337 && c <= 0x1f37c) + || (c >= 0x1f37e && c <= 0x1f393) + || (c >= 0x1f3a0 && c <= 0x1f3ca) + || (c >= 0x1f3cf && c <= 0x1f3d3) + || (c >= 0x1f3e0 && c <= 0x1f3f0) + || (c == 0x1f3f4) + || (c >= 0x1f3f8 && c <= 0x1f43e) + || (c == 0x1f440) + || (c >= 0x1f442 && c <= 0x1f4fc) + || (c >= 0x1f4ff && c <= 0x1f53d) + || (c >= 0x1f54b && c <= 0x1f54e) + || (c >= 0x1f550 && c <= 0x1f567) + || (c == 0x1f57a) + || (c == 0x1f595 || c == 0x1f596) + || (c == 0x1f5a4) + || (c >= 0x1f5fb && c <= 0x1f64f) + || (c >= 0x1f680 && c <= 0x1f6c5) + || (c == 0x1f6cc) + || (c >= 0x1f6d0 && c <= 0x1f6d2) + || (c == 0x1f6eb || c == 0x1f6ec) + || (c >= 0x1f6f4 && c <= 0x1f6f8) + || (c >= 0x1f910 && c <= 0x1f93e) + || (c >= 0x1f940 && c <= 0x1f94c) + || (c >= 0x1f950 && c <= 0x1f96b) + || (c >= 0x1f980 && c <= 0x1f997) + || (c == 0x1f9c0) + || (c >= 0x1f9d0 && c <= 0x1f9e6) + || (c >= 0x20000 && c <= 0x2fffd) + || (c >= 0x30000 && c <= 0x3fffd) + || (c >= 0xe0100 && c <= 0xe01ef) + || (c >= 0xf0000 && c <= 0xffffd) + || (c >= 0x100000 && c <= 0x10fffd) + ; +} + +bool IsCharPrefix(wchar_t c) +{ + return (c >= 0xd800 && c <= 0xdfff) + ; +} + +bool IsCharSuffix(wchar_t c) { return (c >= 0x300 && c <= 0x36f) || (c >= 0x483 && c <= 0x487) @@ -27,19 +232,19 @@ bool IsUnstableWidthChar(wchar_t c) || (c >= 0x7a6 && c <= 0x7b0) || (c >= 0x7ca && c <= 0x7f3) || (c == 0x7fa) - || (c == 0x7fd) || (c >= 0x816 && c <= 0x819) || (c >= 0x81b && c <= 0x823) || (c >= 0x825 && c <= 0x827) || (c >= 0x829 && c <= 0x82d) - || (c >= 0x840 && c <= 0x85b) + || (c >= 0x840 && c <= 0x855) + || (c >= 0x859 && c <= 0x85b) || (c == 0x860) || (c >= 0x862 && c <= 0x865) || (c >= 0x867 && c <= 0x86a) || (c >= 0x8a0 && c <= 0x8ac) || (c >= 0x8ae && c <= 0x8b4) - || (c >= 0x8b6 && c <= 0x8c7) - || (c >= 0x8d3 && c <= 0x8e1) + || (c >= 0x8b6 && c <= 0x8bd) + || (c >= 0x8d4 && c <= 0x8e1) || (c >= 0x8e3 && c <= 0x903) || (c >= 0x93a && c <= 0x93c) || (c >= 0x93e && c <= 0x94f) @@ -52,7 +257,6 @@ bool IsUnstableWidthChar(wchar_t c) || (c >= 0x9cb && c <= 0x9cd) || (c == 0x9d7) || (c == 0x9e2 || c == 0x9e3) - || (c == 0x9fe) || (c >= 0xa01 && c <= 0xa03) || (c == 0xa3c) || (c >= 0xa3e && c <= 0xa42) @@ -73,14 +277,14 @@ bool IsUnstableWidthChar(wchar_t c) || (c >= 0xb3e && c <= 0xb44) || (c == 0xb47 || c == 0xb48) || (c >= 0xb4b && c <= 0xb4d) - || (c >= 0xb55 && c <= 0xb57) + || (c == 0xb56 || c == 0xb57) || (c == 0xb62 || c == 0xb63) || (c == 0xb82) || (c >= 0xbbe && c <= 0xbc2) || (c >= 0xbc6 && c <= 0xbc8) || (c >= 0xbca && c <= 0xbcd) || (c == 0xbd7) - || (c >= 0xc00 && c <= 0xc04) + || (c >= 0xc00 && c <= 0xc03) || (c >= 0xc3e && c <= 0xc44) || (c >= 0xc46 && c <= 0xc48) || (c >= 0xc4a && c <= 0xc4d) @@ -100,7 +304,7 @@ bool IsUnstableWidthChar(wchar_t c) || (c >= 0xd4a && c <= 0xd4d) || (c == 0xd57) || (c == 0xd62 || c == 0xd63) - || (c >= 0xd81 && c <= 0xd83) + || (c == 0xd82 || c == 0xd83) || (c == 0xdca) || (c >= 0xdcf && c <= 0xdd4) || (c == 0xdd6) @@ -110,7 +314,8 @@ bool IsUnstableWidthChar(wchar_t c) || (c >= 0xe34 && c <= 0xe3a) || (c >= 0xe47 && c <= 0xe4e) || (c == 0xeb1) - || (c >= 0xeb4 && c <= 0xebc) + || (c >= 0xeb4 && c <= 0xeb9) + || (c == 0xebb || c == 0xebc) || (c >= 0xec8 && c <= 0xecd) || (c == 0xf18 || c == 0xf19) || (c == 0xf35) @@ -131,7 +336,6 @@ bool IsUnstableWidthChar(wchar_t c) || (c >= 0x1082 && c <= 0x108d) || (c == 0x108f) || (c >= 0x109a && c <= 0x109d) - || (c >= 0x1100 && c <= 0x115f) || (c >= 0x135d && c <= 0x135f) || (c >= 0x1712 && c <= 0x1714) || (c >= 0x1732 && c <= 0x1734) @@ -141,7 +345,7 @@ bool IsUnstableWidthChar(wchar_t c) || (c == 0x17dd) || (c == 0x1807) || (c >= 0x180a && c <= 0x180d) - || (c >= 0x1820 && c <= 0x1878) + || (c >= 0x1820 && c <= 0x1877) || (c >= 0x1885 && c <= 0x18aa) || (c >= 0x1920 && c <= 0x192b) || (c >= 0x1930 && c <= 0x193b) @@ -159,44 +363,18 @@ bool IsUnstableWidthChar(wchar_t c) || (c >= 0x1cd0 && c <= 0x1cd2) || (c >= 0x1cd4 && c <= 0x1ce8) || (c == 0x1ced) - || (c == 0x1cf4) + || (c >= 0x1cf2 && c <= 0x1cf4) || (c >= 0x1cf7 && c <= 0x1cf9) || (c >= 0x1dc0 && c <= 0x1dff) || (c == 0x200d) || (c >= 0x20d0 && c <= 0x20ff) || (c >= 0x2190 && c <= 0x243f) || (c >= 0x25a0 && c <= 0x26ff) - || (c == 0x2705) - || (c == 0x270a || c == 0x270b) - || (c == 0x2728) - || (c == 0x274c) - || (c == 0x274e) - || (c >= 0x2753 && c <= 0x2755) - || (c == 0x2757) - || (c >= 0x2795 && c <= 0x2797) - || (c == 0x27b0) - || (c == 0x27bf) - || (c == 0x2b1b || c == 0x2b1c) - || (c == 0x2b50) - || (c == 0x2b55) || (c >= 0x2cef && c <= 0x2cf1) || (c == 0x2d7f) || (c >= 0x2de0 && c <= 0x2dff) - || (c >= 0x2e80 && c <= 0x2e99) - || (c >= 0x2e9b && c <= 0x2ef3) - || (c >= 0x2f00 && c <= 0x2fd5) - || (c >= 0x2ff0 && c <= 0x2ffb) - || (c >= 0x3000 && c <= 0x303e) - || (c >= 0x3041 && c <= 0x3096) - || (c >= 0x3099 && c <= 0x30ff) - || (c >= 0x3105 && c <= 0x312f) - || (c >= 0x3131 && c <= 0x318e) - || (c >= 0x3190 && c <= 0x31e3) - || (c >= 0x31f0 && c <= 0x321e) - || (c >= 0x3220 && c <= 0x3247) - || (c >= 0x3250 && c <= 0x4dbf) - || (c >= 0x4e00 && c <= 0xa48c) - || (c >= 0xa490 && c <= 0xa4c6) + || (c >= 0x302a && c <= 0x302f) + || (c == 0x3099 || c == 0x309a) || (c == 0xa66f) || (c >= 0xa674 && c <= 0xa67d) || (c == 0xa69e || c == 0xa69f) @@ -205,15 +383,12 @@ bool IsUnstableWidthChar(wchar_t c) || (c == 0xa806) || (c == 0xa80b) || (c >= 0xa823 && c <= 0xa827) - || (c == 0xa82c) || (c >= 0xa840 && c <= 0xa872) || (c == 0xa880 || c == 0xa881) || (c >= 0xa8b4 && c <= 0xa8c5) || (c >= 0xa8e0 && c <= 0xa8f1) - || (c == 0xa8ff) || (c >= 0xa926 && c <= 0xa92d) || (c >= 0xa947 && c <= 0xa953) - || (c >= 0xa960 && c <= 0xa97c) || (c >= 0xa980 && c <= 0xa983) || (c >= 0xa9b3 && c <= 0xa9c0) || (c == 0xa9e5) @@ -230,16 +405,9 @@ bool IsUnstableWidthChar(wchar_t c) || (c == 0xaaf5 || c == 0xaaf6) || (c >= 0xabe3 && c <= 0xabea) || (c == 0xabec || c == 0xabed) - || (c >= 0xac00 && c <= 0xd7a3) - || (c >= 0xd800 && c <= 0xdfff) - || (c >= 0xf900 && c <= 0xfaff) || (c == 0xfb1e) - || (c >= 0xfe00 && c <= 0xfe19) - || (c >= 0xfe20 && c <= 0xfe52) - || (c >= 0xfe54 && c <= 0xfe66) - || (c >= 0xfe68 && c <= 0xfe6b) - || (c >= 0xff01 && c <= 0xff60) - || (c >= 0xffe0 && c <= 0xffe6) + || (c >= 0xfe00 && c <= 0xfe0f) + || (c >= 0xfe20 && c <= 0xfe2f) || (c == 0x101fd) || (c == 0x102e0) || (c >= 0x10376 && c <= 0x1037a) @@ -256,32 +424,21 @@ bool IsUnstableWidthChar(wchar_t c) || (c >= 0x10aeb && c <= 0x10aef) || (c >= 0x10b80 && c <= 0x10b91) || (c >= 0x10ba9 && c <= 0x10bae) - || (c >= 0x10d00 && c <= 0x10d27) - || (c == 0x10eab || c == 0x10eac) - || (c >= 0x10f30 && c <= 0x10f44) - || (c >= 0x10f46 && c <= 0x10f54) - || (c == 0x10fb0) - || (c >= 0x10fb2 && c <= 0x10fb6) - || (c >= 0x10fb8 && c <= 0x10fbf) - || (c >= 0x10fc1 && c <= 0x10fc4) - || (c >= 0x10fc9 && c <= 0x10fcb) || (c >= 0x11000 && c <= 0x11002) || (c >= 0x11038 && c <= 0x11046) || (c >= 0x1107f && c <= 0x11082) || (c >= 0x110b0 && c <= 0x110ba) || (c >= 0x11100 && c <= 0x11102) || (c >= 0x11127 && c <= 0x11134) - || (c == 0x11145 || c == 0x11146) || (c == 0x11173) || (c >= 0x11180 && c <= 0x11182) || (c >= 0x111b3 && c <= 0x111c0) - || (c >= 0x111c9 && c <= 0x111cc) - || (c == 0x111ce || c == 0x111cf) + || (c >= 0x111ca && c <= 0x111cc) || (c >= 0x1122c && c <= 0x11237) || (c == 0x1123e) || (c >= 0x112df && c <= 0x112ea) || (c >= 0x11300 && c <= 0x11303) - || (c == 0x1133b || c == 0x1133c) + || (c == 0x1133c) || (c >= 0x1133e && c <= 0x11344) || (c == 0x11347 || c == 0x11348) || (c >= 0x1134b && c <= 0x1134d) @@ -290,7 +447,6 @@ bool IsUnstableWidthChar(wchar_t c) || (c >= 0x11366 && c <= 0x1136c) || (c >= 0x11370 && c <= 0x11374) || (c >= 0x11435 && c <= 0x11446) - || (c == 0x1145e) || (c >= 0x114b0 && c <= 0x114c3) || (c >= 0x115af && c <= 0x115b5) || (c >= 0x115b8 && c <= 0x115c0) @@ -298,15 +454,6 @@ bool IsUnstableWidthChar(wchar_t c) || (c >= 0x11630 && c <= 0x11640) || (c >= 0x116ab && c <= 0x116b7) || (c >= 0x1171d && c <= 0x1172b) - || (c >= 0x1182c && c <= 0x1183a) - || (c >= 0x11930 && c <= 0x11935) - || (c == 0x11937 || c == 0x11938) - || (c >= 0x1193b && c <= 0x1193e) - || (c == 0x11940) - || (c == 0x11942 || c == 0x11943) - || (c >= 0x119d1 && c <= 0x119d7) - || (c >= 0x119da && c <= 0x119e0) - || (c == 0x119e4) || (c >= 0x11a01 && c <= 0x11a0a) || (c >= 0x11a33 && c <= 0x11a39) || (c >= 0x11a3b && c <= 0x11a3e) @@ -322,24 +469,10 @@ bool IsUnstableWidthChar(wchar_t c) || (c == 0x11d3c || c == 0x11d3d) || (c >= 0x11d3f && c <= 0x11d45) || (c == 0x11d47) - || (c >= 0x11d8a && c <= 0x11d8e) - || (c == 0x11d90 || c == 0x11d91) - || (c >= 0x11d93 && c <= 0x11d97) - || (c >= 0x11ef3 && c <= 0x11ef6) || (c >= 0x16af0 && c <= 0x16af4) || (c >= 0x16b30 && c <= 0x16b36) - || (c == 0x16f4f) - || (c >= 0x16f51 && c <= 0x16f87) + || (c >= 0x16f51 && c <= 0x16f7e) || (c >= 0x16f8f && c <= 0x16f92) - || (c >= 0x16fe0 && c <= 0x16fe4) - || (c == 0x16ff0 || c == 0x16ff1) - || (c >= 0x17000 && c <= 0x187f7) - || (c >= 0x18800 && c <= 0x18cd5) - || (c >= 0x18d00 && c <= 0x18d08) - || (c >= 0x1b000 && c <= 0x1b11e) - || (c >= 0x1b150 && c <= 0x1b152) - || (c >= 0x1b164 && c <= 0x1b167) - || (c >= 0x1b170 && c <= 0x1b2fb) || (c == 0x1bc9d || c == 0x1bc9e) || (c >= 0x1d165 && c <= 0x1d169) || (c >= 0x1d16d && c <= 0x1d172) @@ -358,58 +491,17 @@ bool IsUnstableWidthChar(wchar_t c) || (c >= 0x1e01b && c <= 0x1e021) || (c == 0x1e023 || c == 0x1e024) || (c >= 0x1e026 && c <= 0x1e02a) - || (c >= 0x1e130 && c <= 0x1e136) - || (c >= 0x1e2ec && c <= 0x1e2ef) || (c >= 0x1e8d0 && c <= 0x1e8d6) || (c >= 0x1e900 && c <= 0x1e94a) - || (c == 0x1f004) - || (c == 0x1f0cf) - || (c == 0x1f18e) - || (c >= 0x1f191 && c <= 0x1f19a) - || (c >= 0x1f200 && c <= 0x1f202) - || (c >= 0x1f210 && c <= 0x1f23b) - || (c >= 0x1f240 && c <= 0x1f248) - || (c == 0x1f250 || c == 0x1f251) - || (c >= 0x1f260 && c <= 0x1f265) - || (c >= 0x1f300 && c <= 0x1f320) - || (c >= 0x1f32d && c <= 0x1f335) - || (c >= 0x1f337 && c <= 0x1f37c) - || (c >= 0x1f37e && c <= 0x1f393) - || (c >= 0x1f3a0 && c <= 0x1f3ca) - || (c >= 0x1f3cf && c <= 0x1f3d3) - || (c >= 0x1f3e0 && c <= 0x1f3f0) - || (c == 0x1f3f4) - || (c >= 0x1f3f8 && c <= 0x1f43e) - || (c == 0x1f440) - || (c >= 0x1f442 && c <= 0x1f4fc) - || (c >= 0x1f4ff && c <= 0x1f53d) - || (c >= 0x1f54b && c <= 0x1f54e) - || (c >= 0x1f550 && c <= 0x1f567) - || (c == 0x1f57a) - || (c == 0x1f595 || c == 0x1f596) - || (c == 0x1f5a4) - || (c >= 0x1f5fb && c <= 0x1f64f) - || (c >= 0x1f680 && c <= 0x1f6c5) - || (c == 0x1f6cc) - || (c >= 0x1f6d0 && c <= 0x1f6d2) - || (c >= 0x1f6d5 && c <= 0x1f6d7) - || (c == 0x1f6eb || c == 0x1f6ec) - || (c >= 0x1f6f4 && c <= 0x1f6fc) - || (c >= 0x1f7e0 && c <= 0x1f7eb) - || (c >= 0x1f90c && c <= 0x1f93a) - || (c >= 0x1f93c && c <= 0x1f945) - || (c >= 0x1f947 && c <= 0x1f978) - || (c >= 0x1f97a && c <= 0x1f9cb) - || (c >= 0x1f9cd && c <= 0x1f9ff) - || (c >= 0x1fa70 && c <= 0x1fa74) - || (c >= 0x1fa78 && c <= 0x1fa7a) - || (c >= 0x1fa80 && c <= 0x1fa86) - || (c >= 0x1fa90 && c <= 0x1faa8) - || (c >= 0x1fab0 && c <= 0x1fab6) - || (c >= 0x1fac0 && c <= 0x1fac2) - || (c >= 0x1fad0 && c <= 0x1fad6) - || (c >= 0x20000 && c <= 0x2fffd) - || (c >= 0x30000 && c <= 0x3fffd) || (c >= 0xe0100 && c <= 0xe01ef) ; } + +bool IsCharXxxfix(wchar_t c) +{ + return IsCharPrefix(c) || IsCharSuffix(c); +} +bool IsCharUnstableWidth(wchar_t c) +{ + return IsCharFullWidth(c) || IsCharXxxfix(c); +} diff --git a/utils/src/CharClasses_mk.cpp b/utils/src/CharClasses_mk.cpp new file mode 100644 index 00000000..cc78f61f --- /dev/null +++ b/utils/src/CharClasses_mk.cpp @@ -0,0 +1,110 @@ +#include "unicode/uchar.h" +#include "unicode/utypes.h" +#include "unicode/stringoptions.h" +#include "stdio.h" + +/// Usage: +/// g++ -O2 ./CharClasses_mk.cpp -o /tmp/CharClasses_mk -licuuc && /tmp/CharClasses_mk > CharClasses.cpp + +static bool IsUnstableWidthBlock(int block) +{ + return block == UBLOCK_ARROWS + || block == UBLOCK_MATHEMATICAL_OPERATORS + || block == UBLOCK_MISCELLANEOUS_TECHNICAL + || block == UBLOCK_CONTROL_PICTURES + || block == UBLOCK_GEOMETRIC_SHAPES + || block == UBLOCK_MISCELLANEOUS_SYMBOLS + || block == UBLOCK_COMBINING_DIACRITICAL_MARKS + || block == UBLOCK_COMBINING_MARKS_FOR_SYMBOLS + || block == UBLOCK_COMBINING_HALF_MARKS + || block == UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT + || block == UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED + ; +} + +template <class FN> + static void WriteFunc(const char *name, FN fn) +{ + UChar32 c, last = 0x10ffff; + UChar32 start = 0; + bool first = true; + printf("bool %s(wchar_t c)\n", name); + printf("{\n"); + printf("\treturn "); + for (c = 1; c <= last + 1; ++c) { + const bool matched = (c <= last) && fn(c); + if (matched) { + if (!start) { + start = c; + } + + } else if (start) { + if (first) { + first = false; + } else { + printf("\t || "); + } + if (start + 2 == c) { + printf("(c == 0x%x || c == 0x%x)\n", (unsigned int)start, (unsigned int)c - 1); + } else if (start + 1 < c) { + printf("(c >= 0x%x && c <= 0x%x)\n", (unsigned int)start, (unsigned int)c - 1); + } else { + printf("(c == 0x%x)\n", (unsigned int)start); + } + start = 0; + } + } + printf("\t;\n"); + printf("}\n\n"); +} + +int main() +{ +// printf("%u\n", u_getIntPropertyValue(0xcbe, UCHAR_GENERAL_CATEGORY)); +// return -1; + UChar32 c, last = 0x10ffff; + UChar32 unstable_start = 0; + bool first = true; + printf("// this file autogenerated by IsUnstableWidthChar_mk.cpp\n\n"); + printf("#include <wchar.h>\n\n"); + + WriteFunc("IsCharFullWidth", [](wchar_t c)->bool { + const auto block = u_getIntPropertyValue(c, UCHAR_BLOCK); + return u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH) != 0 + || block == UBLOCK_ARROWS + || block == UBLOCK_MATHEMATICAL_OPERATORS + || block == UBLOCK_MISCELLANEOUS_TECHNICAL + || block == UBLOCK_CONTROL_PICTURES + || block == UBLOCK_GEOMETRIC_SHAPES + || block == UBLOCK_MISCELLANEOUS_SYMBOLS; + }); + + WriteFunc("IsCharPrefix", [](wchar_t c)->bool { + const auto cat = u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY); + return (cat == U_SURROGATE); + }); + + WriteFunc("IsCharSuffix", [](wchar_t c)->bool { + const auto block = u_getIntPropertyValue(c, UCHAR_BLOCK); + const auto jt = u_getIntPropertyValue(c, UCHAR_JOINING_TYPE); + const auto cat = u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY); + return ( (jt != U_JT_NON_JOINING && jt != U_JT_TRANSPARENT) + || cat == U_NON_SPACING_MARK || cat == U_COMBINING_SPACING_MARK + || IsUnstableWidthBlock(u_getIntPropertyValue(c, UCHAR_BLOCK)) + || block == UBLOCK_COMBINING_DIACRITICAL_MARKS + || block == UBLOCK_COMBINING_MARKS_FOR_SYMBOLS + || block == UBLOCK_COMBINING_HALF_MARKS + || block == UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT + || block == UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED); + }); + + printf("bool IsCharXxxfix(wchar_t c)\n"); + printf("{\n"); + printf("\treturn IsCharPrefix(c) || IsCharSuffix(c);\n"); + printf("}\n"); + + printf("bool IsCharUnstableWidth(wchar_t c)\n"); + printf("{\n"); + printf("\treturn IsCharFullWidth(c) || IsCharXxxfix(c);\n"); + printf("}\n"); +} diff --git a/utils/src/StrVisual.cpp b/utils/src/StrVisual.cpp new file mode 100644 index 00000000..0ded4932 --- /dev/null +++ b/utils/src/StrVisual.cpp @@ -0,0 +1,105 @@ +#include "utils.h" + +size_t StrVisualLength(const wchar_t *pwz, size_t n) +{ + size_t out = 0; + for (size_t i = 0; i < n; ++i) { + if (IsCharFullWidth(*pwz)) { + out+= 2; + } else if ((i == n - 1 || !IsCharPrefix(pwz[i])) && (i == 0 || !IsCharSuffix(pwz[i]))) { + ++out; + } + } + return out; +} + +void StrVisualTruncateLeft(wchar_t *pwz, size_t &n, size_t vl_max) +{ + size_t vl = StrVisualLength(pwz, n); + if (vl <= vl_max || n < 3) { + return; + } + + for (size_t ofs = 3; ofs < n; ++ofs) { + if (!IsCharXxxfix(pwz[ofs]) && StrVisualLength(pwz + ofs, n - ofs) + 3 <= vl_max) { + n-= ofs; + wmemmove(pwz + 3, pwz + ofs, n); + n+= 3; + wmemcpy(pwz, L"...", 3); + return; + } + } + wcsncpy(pwz, L"...", vl_max); + n = vl_max; +} + +void StrVisualTruncateRight(wchar_t *pwz, size_t &n, size_t vl_max) +{ + size_t vl = StrVisualLength(pwz, n); + if (vl <= vl_max || n < 3) { + return; + } + + n-= 3; // pre-reserve space for ... + do { + while (n > 0 && IsCharXxxfix(pwz[n - 1])) { + --n; + } + if (n == 0) { + break; + } + --n; + } while (StrVisualLength(pwz, n) + 3 > vl_max); + + wmemcpy(&pwz[n], L"...", 3); + n+= 3; +} + +void StrVisualTruncateCenter(wchar_t *pwz, size_t &n, size_t vl_max) +{ + size_t vl = StrVisualLength(pwz, n); + if (vl <= vl_max || n < 3) { + return; + } + + auto cut_start = n / 2; + if (cut_start > 0) { + --cut_start; + } + if (cut_start > 0) { + --cut_start; + } + while (cut_start > 0 && IsCharXxxfix(pwz[cut_start])) { + --cut_start; + } + auto cut_end = cut_start + 3; + while (cut_end < n && IsCharXxxfix(pwz[cut_end])) { + ++cut_end; + } + + while (StrVisualLength(pwz, cut_start) + StrVisualLength(pwz + cut_end, n - cut_end) + 3 > vl_max) { + if (cut_start > 0) { + --cut_start; + while (cut_start > 0 && IsCharXxxfix(pwz[cut_start])) { + --cut_start; + } + if (StrVisualLength(pwz, cut_start) + StrVisualLength(pwz + cut_end, n - cut_end) + 3 <= vl_max) { + break; + } + } + if (cut_end < n) { + ++cut_end; + while (cut_end < n && IsCharXxxfix(pwz[cut_end])) { + ++cut_end; + } + } + } + + wmemmove(&pwz[cut_start + 3], &pwz[cut_end], n - cut_end); + wmemcpy(&pwz[cut_start], L"...", 3); + n-= (cut_end - cut_start); + n+= 3; +} + + + |