]> git.ipfire.org Git - thirdparty/glibc.git/blame - iconv/gconv_trans.c
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / iconv / gconv_trans.c
CommitLineData
55985355 1/* Transliteration using the locale's data.
bfff8b1b 2 Copyright (C) 2000-2017 Free Software Foundation, Inc.
55985355
UD
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
5
6 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
55985355
UD
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 14 Lesser General Public License for more details.
55985355 15
41bdb6e2 16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
55985355 19
d6204268 20#include <assert.h>
f1d5c60d 21#include <dlfcn.h>
d6204268 22#include <search.h>
55985355 23#include <stdint.h>
d6204268 24#include <string.h>
7884bf47 25#include <stdlib.h>
55985355 26
ec999b8e 27#include <libc-lock.h>
55985355
UD
28#include "gconv_int.h"
29#include "../locale/localeinfo.h"
30
31
32int
f1d5c60d
UD
33__gconv_transliterate (struct __gconv_step *step,
34 struct __gconv_step_data *step_data,
35 const unsigned char *inbufstart,
36 const unsigned char **inbufp,
37 const unsigned char *inbufend,
38 unsigned char **outbufstart, size_t *irreversible)
55985355
UD
39{
40 /* Find out about the locale's transliteration. */
f1d5c60d 41 uint_fast32_t size;
17427edd
UD
42 const uint32_t *from_idx;
43 const uint32_t *from_tbl;
44 const uint32_t *to_idx;
45 const uint32_t *to_tbl;
46 const uint32_t *winbuf;
47 const uint32_t *winbufend;
f1d5c60d
UD
48 uint_fast32_t low;
49 uint_fast32_t high;
55985355 50
d5055a20 51 /* The input buffer. There are actually 4-byte values. */
17427edd
UD
52 winbuf = (const uint32_t *) *inbufp;
53 winbufend = (const uint32_t *) inbufend;
d5055a20 54
1911b455
UD
55 __gconv_fct fct = step->__fct;
56#ifdef PTR_DEMANGLE
57 if (step->__shlib_handle != NULL)
58 PTR_DEMANGLE (fct);
59#endif
60
55985355
UD
61 /* If there is no transliteration information in the locale don't do
62 anything and return the error. */
04fbc779 63 size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE);
55985355 64 if (size == 0)
1d96d74d 65 goto no_rules;
55985355 66
f1d5c60d 67 /* Get the rest of the values. */
17427edd
UD
68 from_idx =
69 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
70 from_tbl =
71 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
72 to_idx =
73 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
74 to_tbl =
75 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
f1d5c60d 76
f1d5c60d
UD
77 /* Test whether there is enough input. */
78 if (winbuf + 1 > winbufend)
79 return (winbuf == winbufend
80 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
81
82 /* The array starting at FROM_IDX contains indeces to the string table
83 in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we
84 are doing binary search. */
85 low = 0;
86 high = size;
87 while (low < high)
88 {
89 uint_fast32_t med = (low + high) / 2;
90 uint32_t idx;
91 int cnt;
92
93 /* Compare the string at this index with the string at the current
94 position in the input buffer. */
95 idx = from_idx[med];
96 cnt = 0;
97 do
98 {
99 if (from_tbl[idx + cnt] != winbuf[cnt])
100 /* Does not match. */
101 break;
102 ++cnt;
103 }
104 while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
105
106 if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
107 {
108 /* Found a matching input sequence. Now try to convert the
109 possible replacements. */
110 uint32_t idx2 = to_idx[med];
111
112 do
113 {
114 /* Determine length of replacement. */
115 uint_fast32_t len = 0;
116 int res;
117 const unsigned char *toinptr;
403cb8a1 118 unsigned char *outptr;
f1d5c60d
UD
119
120 while (to_tbl[idx2 + len] != L'\0')
121 ++len;
122
123 /* Try this input text. */
124 toinptr = (const unsigned char *) &to_tbl[idx2];
403cb8a1 125 outptr = *outbufstart;
1911b455 126 res = DL_CALL_FCT (fct,
f1d5c60d
UD
127 (step, step_data, &toinptr,
128 (const unsigned char *) &to_tbl[idx2 + len],
403cb8a1 129 &outptr, NULL, 0, 0));
f1d5c60d
UD
130 if (res != __GCONV_ILLEGAL_INPUT)
131 {
132 /* If the conversion succeeds we have to increment the
133 input buffer. */
134 if (res == __GCONV_EMPTY_INPUT)
135 {
136 *inbufp += cnt * sizeof (uint32_t);
137 ++*irreversible;
a8e4c924 138 res = __GCONV_OK;
f1d5c60d 139 }
1b14353e
UD
140 /* Do not increment the output pointer if we could not
141 store the entire output. */
142 if (res != __GCONV_FULL_OUTPUT)
143 *outbufstart = outptr;
f1d5c60d
UD
144
145 return res;
146 }
147
148 /* Next replacement. */
149 idx2 += len + 1;
150 }
151 while (to_tbl[idx2] != L'\0');
152
153 /* Nothing found, continue searching. */
154 }
a8e4c924
UD
155 else if (cnt > 0)
156 /* This means that the input buffer contents matches a prefix of
157 an entry. Since we cannot match it unless we get more input,
158 we will tell the caller about it. */
159 return __GCONV_INCOMPLETE_INPUT;
f1d5c60d
UD
160
161 if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
04fbc779 162 low = med + 1;
f1d5c60d 163 else
04fbc779 164 high = med;
f1d5c60d
UD
165 }
166
1d96d74d 167 no_rules:
a8e4c924
UD
168 /* Maybe the character is supposed to be ignored. */
169 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0)
170 {
171 int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN);
17427edd
UD
172 const uint32_t *ranges =
173 (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE);
174 const uint32_t wc = *(const uint32_t *) (*inbufp);
a8e4c924
UD
175 int i;
176
177 /* Test whether there is enough input. */
178 if (winbuf + 1 > winbufend)
179 return (winbuf == winbufend
180 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
181
182 for (i = 0; i < n; ranges += 3, ++i)
183 if (ranges[0] <= wc && wc <= ranges[1]
184 && (wc - ranges[0]) % ranges[2] == 0)
185 {
186 /* Matches the range. Ignore it. */
187 *inbufp += 4;
188 ++*irreversible;
189 return __GCONV_OK;
190 }
191 else if (wc < ranges[0])
192 /* There cannot be any other matching range since they are
193 sorted. */
194 break;
195 }
196
197 /* One last chance: use the default replacement. */
fb46e8d2 198 if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0)
1d96d74d 199 {
17427edd 200 const uint32_t *default_missing = (const uint32_t *)
fb46e8d2 201 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING);
1d96d74d
UD
202 const unsigned char *toinptr = (const unsigned char *) default_missing;
203 uint32_t len = _NL_CURRENT_WORD (LC_CTYPE,
204 _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN);
403cb8a1 205 unsigned char *outptr;
1d96d74d
UD
206 int res;
207
a8e4c924
UD
208 /* Test whether there is enough input. */
209 if (winbuf + 1 > winbufend)
210 return (winbuf == winbufend
211 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
212
403cb8a1 213 outptr = *outbufstart;
1911b455 214 res = DL_CALL_FCT (fct,
1d96d74d
UD
215 (step, step_data, &toinptr,
216 (const unsigned char *) (default_missing + len),
403cb8a1 217 &outptr, NULL, 0, 0));
1d96d74d
UD
218
219 if (res != __GCONV_ILLEGAL_INPUT)
220 {
221 /* If the conversion succeeds we have to increment the
222 input buffer. */
223 if (res == __GCONV_EMPTY_INPUT)
224 {
a8e4c924 225 /* This worked but is not reversible. */
1d96d74d 226 ++*irreversible;
a8e4c924
UD
227 *inbufp += 4;
228 res = __GCONV_OK;
1d96d74d 229 }
403cb8a1 230 *outbufstart = outptr;
1d96d74d
UD
231
232 return res;
233 }
234 }
235
f1d5c60d 236 /* Haven't found a match. */
55985355
UD
237 return __GCONV_ILLEGAL_INPUT;
238}
ba7b4d29 239libc_hidden_def (__gconv_transliterate)