1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <wctype.h>
#include <stdint.h>
//#include <errno.h>
#include "local.h"
/*
struct caseconv_entry describes the case conversion behaviour
of a range of Unicode characters.
It was designed to be compact for a minimal table size.
The range is first...first + diff.
Conversion behaviour for a character c in the respective range:
mode == TOLO towlower (c) = c + delta
mode == TOUP towupper (c) = c + delta
mode == TOBOTH (titling case characters)
towlower (c) = c + 1
towupper (c) = c - 1
mode == TO1 capital/small letters are alternating
delta == EVENCAP even codes are capital
delta == ODDCAP odd codes are capital
(this correlates with an even/odd first range value
as of Unicode 10.0 but we do not rely on this)
As of Unicode 10.0, the following field lengths are sufficient
first: 17 bits
diff: 8 bits
delta: 17 bits
mode: 2 bits
The reserve of 4 bits (to limit the struct to 6 bytes)
is currently added to the 'first' field;
should a future Unicode version make it necessary to expand the others,
the 'first' field could be reduced as needed, or larger ranges could
be split up (reduce limit max=255 e.g. to max=127 or max=63 in
script mkcaseconv, check increasing table size).
*/
enum {TO1, TOLO, TOUP, TOBOTH};
enum {EVENCAP, ODDCAP};
static struct caseconv_entry {
uint_least32_t first: 21;
uint_least8_t diff: 8;
uint_least8_t mode: 2;
uint_least32_t delta: 17;
} __attribute__ ((packed))
caseconv_table [] = {
#include "caseconv.t"
};
#define first(ce) ce.first
#define last(ce) (ce.first + ce.diff)
/* auxiliary function for binary search in interval properties table */
static const struct caseconv_entry *
bisearch (wint_t ucs, const struct caseconv_entry *table, int max)
{
int min = 0;
int mid;
if (ucs < first(table[0]) || ucs > last(table[max]))
return 0;
while (max >= min)
{
mid = (min + max) / 2;
if (ucs > last(table[mid]))
min = mid + 1;
else if (ucs < first(table[mid]))
max = mid - 1;
else
return &table[mid];
}
return 0;
}
static wint_t
toulower (wint_t c)
{
const struct caseconv_entry * cce =
bisearch(c, caseconv_table,
sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
if (cce)
switch (cce->mode)
{
case TOLO:
return c + cce->delta;
case TOBOTH:
return c + 1;
case TO1:
switch (cce->delta)
{
case EVENCAP:
if (!(c & 1))
return c + 1;
break;
case ODDCAP:
if (c & 1)
return c + 1;
break;
default:
break;
}
default:
break;
}
return c;
}
static wint_t
touupper (wint_t c)
{
const struct caseconv_entry * cce =
bisearch(c, caseconv_table,
sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
if (cce)
switch (cce->mode)
{
case TOUP:
return c + cce->delta;
case TOBOTH:
return c - 1;
case TO1:
switch (cce->delta)
{
case EVENCAP:
if (c & 1)
return c - 1;
break;
case ODDCAP:
if (!(c & 1))
return c - 1;
break;
default:
break;
}
default:
break;
}
return c;
}
wint_t
towctrans_l (wint_t c, wctrans_t w, struct __locale_t *locale)
{
wint_t u = _jp2uc_l (c, locale);
wint_t res;
if (w == WCT_TOLOWER)
res = toulower (u);
else if (w == WCT_TOUPPER)
res = touupper (u);
else
{
// skipping the errno setting that was previously involved
// by delegating to towctrans; it was causing trouble (cygwin crash)
// and there is no errno specified for towctrans
return c;
}
if (res != u)
return _uc2jp_l (res, locale);
else
return c;
}
|