]>
Commit | Line | Data |
---|---|---|
55985355 | 1 | /* Transliteration using the locale's data. |
2b778ceb | 2 | Copyright (C) 2000-2021 Free Software Foundation, Inc. |
55985355 UD |
3 | This file is part of the GNU C Library. |
4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
55985355 UD |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 14 | Lesser General Public License for more details. |
55985355 | 15 | |
41bdb6e2 | 16 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 17 | License along with the GNU C Library; if not, see |
5a82c748 | 18 | <https://www.gnu.org/licenses/>. */ |
55985355 | 19 | |
d6204268 | 20 | #include <assert.h> |
f1d5c60d | 21 | #include <dlfcn.h> |
d6204268 | 22 | #include <search.h> |
55985355 | 23 | #include <stdint.h> |
d6204268 | 24 | #include <string.h> |
7884bf47 | 25 | #include <stdlib.h> |
55985355 | 26 | |
ec999b8e | 27 | #include <libc-lock.h> |
55985355 UD |
28 | #include "gconv_int.h" |
29 | #include "../locale/localeinfo.h" | |
30 | ||
31 | ||
32 | int | |
f1d5c60d UD |
33 | __gconv_transliterate (struct __gconv_step *step, |
34 | struct __gconv_step_data *step_data, | |
35 | const unsigned char *inbufstart, | |
36 | const unsigned char **inbufp, | |
37 | const unsigned char *inbufend, | |
38 | unsigned char **outbufstart, size_t *irreversible) | |
55985355 UD |
39 | { |
40 | /* Find out about the locale's transliteration. */ | |
f1d5c60d | 41 | uint_fast32_t size; |
17427edd UD |
42 | const uint32_t *from_idx; |
43 | const uint32_t *from_tbl; | |
44 | const uint32_t *to_idx; | |
45 | const uint32_t *to_tbl; | |
46 | const uint32_t *winbuf; | |
47 | const uint32_t *winbufend; | |
f1d5c60d UD |
48 | uint_fast32_t low; |
49 | uint_fast32_t high; | |
55985355 | 50 | |
d5055a20 | 51 | /* The input buffer. There are actually 4-byte values. */ |
17427edd UD |
52 | winbuf = (const uint32_t *) *inbufp; |
53 | winbufend = (const uint32_t *) inbufend; | |
d5055a20 | 54 | |
1911b455 UD |
55 | __gconv_fct fct = step->__fct; |
56 | #ifdef PTR_DEMANGLE | |
57 | if (step->__shlib_handle != NULL) | |
58 | PTR_DEMANGLE (fct); | |
59 | #endif | |
60 | ||
55985355 UD |
61 | /* If there is no transliteration information in the locale don't do |
62 | anything and return the error. */ | |
04fbc779 | 63 | size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE); |
55985355 | 64 | if (size == 0) |
1d96d74d | 65 | goto no_rules; |
55985355 | 66 | |
f1d5c60d | 67 | /* Get the rest of the values. */ |
17427edd UD |
68 | from_idx = |
69 | (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX); | |
70 | from_tbl = | |
71 | (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL); | |
72 | to_idx = | |
73 | (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX); | |
74 | to_tbl = | |
75 | (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL); | |
f1d5c60d | 76 | |
f1d5c60d UD |
77 | /* Test whether there is enough input. */ |
78 | if (winbuf + 1 > winbufend) | |
79 | return (winbuf == winbufend | |
80 | ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); | |
81 | ||
14ef9c18 DL |
82 | /* The array starting at FROM_IDX contains indices to the string table |
83 | in FROM_TBL. The indices are sorted wrt to the strings. I.e., we | |
f1d5c60d UD |
84 | are doing binary search. */ |
85 | low = 0; | |
86 | high = size; | |
87 | while (low < high) | |
88 | { | |
89 | uint_fast32_t med = (low + high) / 2; | |
90 | uint32_t idx; | |
91 | int cnt; | |
92 | ||
93 | /* Compare the string at this index with the string at the current | |
94 | position in the input buffer. */ | |
95 | idx = from_idx[med]; | |
96 | cnt = 0; | |
97 | do | |
98 | { | |
99 | if (from_tbl[idx + cnt] != winbuf[cnt]) | |
100 | /* Does not match. */ | |
101 | break; | |
102 | ++cnt; | |
103 | } | |
104 | while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend); | |
105 | ||
106 | if (cnt > 0 && from_tbl[idx + cnt] == L'\0') | |
107 | { | |
108 | /* Found a matching input sequence. Now try to convert the | |
109 | possible replacements. */ | |
110 | uint32_t idx2 = to_idx[med]; | |
111 | ||
112 | do | |
113 | { | |
114 | /* Determine length of replacement. */ | |
115 | uint_fast32_t len = 0; | |
116 | int res; | |
117 | const unsigned char *toinptr; | |
403cb8a1 | 118 | unsigned char *outptr; |
f1d5c60d UD |
119 | |
120 | while (to_tbl[idx2 + len] != L'\0') | |
121 | ++len; | |
122 | ||
123 | /* Try this input text. */ | |
124 | toinptr = (const unsigned char *) &to_tbl[idx2]; | |
403cb8a1 | 125 | outptr = *outbufstart; |
1911b455 | 126 | res = DL_CALL_FCT (fct, |
f1d5c60d UD |
127 | (step, step_data, &toinptr, |
128 | (const unsigned char *) &to_tbl[idx2 + len], | |
403cb8a1 | 129 | &outptr, NULL, 0, 0)); |
f1d5c60d UD |
130 | if (res != __GCONV_ILLEGAL_INPUT) |
131 | { | |
132 | /* If the conversion succeeds we have to increment the | |
133 | input buffer. */ | |
134 | if (res == __GCONV_EMPTY_INPUT) | |
135 | { | |
136 | *inbufp += cnt * sizeof (uint32_t); | |
137 | ++*irreversible; | |
a8e4c924 | 138 | res = __GCONV_OK; |
f1d5c60d | 139 | } |
1b14353e UD |
140 | /* Do not increment the output pointer if we could not |
141 | store the entire output. */ | |
142 | if (res != __GCONV_FULL_OUTPUT) | |
143 | *outbufstart = outptr; | |
f1d5c60d UD |
144 | |
145 | return res; | |
146 | } | |
147 | ||
148 | /* Next replacement. */ | |
149 | idx2 += len + 1; | |
150 | } | |
151 | while (to_tbl[idx2] != L'\0'); | |
152 | ||
153 | /* Nothing found, continue searching. */ | |
154 | } | |
a8e4c924 UD |
155 | else if (cnt > 0) |
156 | /* This means that the input buffer contents matches a prefix of | |
157 | an entry. Since we cannot match it unless we get more input, | |
158 | we will tell the caller about it. */ | |
159 | return __GCONV_INCOMPLETE_INPUT; | |
f1d5c60d UD |
160 | |
161 | if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt]) | |
04fbc779 | 162 | low = med + 1; |
f1d5c60d | 163 | else |
04fbc779 | 164 | high = med; |
f1d5c60d UD |
165 | } |
166 | ||
1d96d74d | 167 | no_rules: |
a8e4c924 UD |
168 | /* Maybe the character is supposed to be ignored. */ |
169 | if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0) | |
170 | { | |
171 | int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN); | |
17427edd UD |
172 | const uint32_t *ranges = |
173 | (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE); | |
174 | const uint32_t wc = *(const uint32_t *) (*inbufp); | |
a8e4c924 UD |
175 | int i; |
176 | ||
177 | /* Test whether there is enough input. */ | |
178 | if (winbuf + 1 > winbufend) | |
179 | return (winbuf == winbufend | |
180 | ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); | |
181 | ||
182 | for (i = 0; i < n; ranges += 3, ++i) | |
183 | if (ranges[0] <= wc && wc <= ranges[1] | |
184 | && (wc - ranges[0]) % ranges[2] == 0) | |
185 | { | |
186 | /* Matches the range. Ignore it. */ | |
187 | *inbufp += 4; | |
188 | ++*irreversible; | |
189 | return __GCONV_OK; | |
190 | } | |
191 | else if (wc < ranges[0]) | |
192 | /* There cannot be any other matching range since they are | |
193 | sorted. */ | |
194 | break; | |
195 | } | |
196 | ||
197 | /* One last chance: use the default replacement. */ | |
fb46e8d2 | 198 | if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0) |
1d96d74d | 199 | { |
17427edd | 200 | const uint32_t *default_missing = (const uint32_t *) |
fb46e8d2 | 201 | _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING); |
1d96d74d UD |
202 | const unsigned char *toinptr = (const unsigned char *) default_missing; |
203 | uint32_t len = _NL_CURRENT_WORD (LC_CTYPE, | |
204 | _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN); | |
403cb8a1 | 205 | unsigned char *outptr; |
1d96d74d UD |
206 | int res; |
207 | ||
a8e4c924 UD |
208 | /* Test whether there is enough input. */ |
209 | if (winbuf + 1 > winbufend) | |
210 | return (winbuf == winbufend | |
211 | ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); | |
212 | ||
403cb8a1 | 213 | outptr = *outbufstart; |
1911b455 | 214 | res = DL_CALL_FCT (fct, |
1d96d74d UD |
215 | (step, step_data, &toinptr, |
216 | (const unsigned char *) (default_missing + len), | |
403cb8a1 | 217 | &outptr, NULL, 0, 0)); |
1d96d74d UD |
218 | |
219 | if (res != __GCONV_ILLEGAL_INPUT) | |
220 | { | |
221 | /* If the conversion succeeds we have to increment the | |
222 | input buffer. */ | |
223 | if (res == __GCONV_EMPTY_INPUT) | |
224 | { | |
a8e4c924 | 225 | /* This worked but is not reversible. */ |
1d96d74d | 226 | ++*irreversible; |
a8e4c924 UD |
227 | *inbufp += 4; |
228 | res = __GCONV_OK; | |
1d96d74d | 229 | } |
403cb8a1 | 230 | *outbufstart = outptr; |
1d96d74d UD |
231 | |
232 | return res; | |
233 | } | |
234 | } | |
235 | ||
f1d5c60d | 236 | /* Haven't found a match. */ |
55985355 UD |
237 | return __GCONV_ILLEGAL_INPUT; |
238 | } | |
ba7b4d29 | 239 | libc_hidden_def (__gconv_transliterate) |