]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/s390/s390-64/utf16-utf32-z9.c
[PATCH] S390: Fix remaining ONE_DIRECTION warning messages
[thirdparty/glibc.git] / sysdeps / s390 / s390-64 / utf16-utf32-z9.c
1 /* Conversion between UTF-16 and UTF-32 BE/internal.
2
3 This module uses the Z9-109 variants of the Convert Unicode
4 instructions.
5 Copyright (C) 1997-2014 Free Software Foundation, Inc.
6
7 Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
8 Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
9
10 Thanks to Daniel Appich who covered the relevant performance work
11 in his diploma thesis.
12
13 This is free software; you can redistribute it and/or
14 modify it under the terms of the GNU Lesser General Public
15 License as published by the Free Software Foundation; either
16 version 2.1 of the License, or (at your option) any later version.
17
18 This is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Lesser General Public License for more details.
22
23 You should have received a copy of the GNU Lesser General Public
24 License along with the GNU C Library; if not, see
25 <http://www.gnu.org/licenses/>. */
26
27 #include <dlfcn.h>
28 #include <stdint.h>
29 #include <unistd.h>
30 #include <dl-procinfo.h>
31 #include <gconv.h>
32
33 /* UTF-32 big endian byte order mark. */
34 #define BOM_UTF32 0x0000feffu
35
36 /* UTF-16 big endian byte order mark. */
37 #define BOM_UTF16 0xfeff
38
39 #define DEFINE_INIT 0
40 #define DEFINE_FINI 0
41 #define MIN_NEEDED_FROM 2
42 #define MAX_NEEDED_FROM 4
43 #define MIN_NEEDED_TO 4
44 #define FROM_LOOP from_utf16_loop
45 #define TO_LOOP to_utf16_loop
46 #define FROM_DIRECTION (dir == from_utf16)
47 #define ONE_DIRECTION 0
48 #define PREPARE_LOOP \
49 enum direction dir = ((struct utf16_data *) step->__data)->dir; \
50 int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \
51 \
52 if (emit_bom && !data->__internal_use \
53 && data->__invocation_counter == 0) \
54 { \
55 if (dir == to_utf16) \
56 { \
57 /* Emit the UTF-16 Byte Order Mark. */ \
58 if (__glibc_unlikely (outbuf + 2 > outend)) \
59 return __GCONV_FULL_OUTPUT; \
60 \
61 put16u (outbuf, BOM_UTF16); \
62 outbuf += 2; \
63 } \
64 else \
65 { \
66 /* Emit the UTF-32 Byte Order Mark. */ \
67 if (__glibc_unlikely (outbuf + 4 > outend)) \
68 return __GCONV_FULL_OUTPUT; \
69 \
70 put32u (outbuf, BOM_UTF32); \
71 outbuf += 4; \
72 } \
73 }
74
75 /* Direction of the transformation. */
76 enum direction
77 {
78 illegal_dir,
79 to_utf16,
80 from_utf16
81 };
82
83 struct utf16_data
84 {
85 enum direction dir;
86 int emit_bom;
87 };
88
89
90 extern int gconv_init (struct __gconv_step *step);
91 int
92 gconv_init (struct __gconv_step *step)
93 {
94 /* Determine which direction. */
95 struct utf16_data *new_data;
96 enum direction dir = illegal_dir;
97 int emit_bom;
98 int result;
99
100 emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0
101 || __strcasecmp (step->__to_name, "UTF-16//") == 0);
102
103 if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0
104 && (__strcasecmp (step->__to_name, "UTF-32//") == 0
105 || __strcasecmp (step->__to_name, "UTF-32BE//") == 0
106 || __strcasecmp (step->__to_name, "INTERNAL") == 0))
107 {
108 dir = from_utf16;
109 }
110 else if ((__strcasecmp (step->__to_name, "UTF-16//") == 0
111 || __strcasecmp (step->__to_name, "UTF-16BE//") == 0)
112 && (__strcasecmp (step->__from_name, "UTF-32BE//") == 0
113 || __strcasecmp (step->__from_name, "INTERNAL") == 0))
114 {
115 dir = to_utf16;
116 }
117
118 result = __GCONV_NOCONV;
119 if (dir != illegal_dir)
120 {
121 new_data = (struct utf16_data *) malloc (sizeof (struct utf16_data));
122
123 result = __GCONV_NOMEM;
124 if (new_data != NULL)
125 {
126 new_data->dir = dir;
127 new_data->emit_bom = emit_bom;
128 step->__data = new_data;
129
130 if (dir == from_utf16)
131 {
132 step->__min_needed_from = MIN_NEEDED_FROM;
133 step->__max_needed_from = MIN_NEEDED_FROM;
134 step->__min_needed_to = MIN_NEEDED_TO;
135 step->__max_needed_to = MIN_NEEDED_TO;
136 }
137 else
138 {
139 step->__min_needed_from = MIN_NEEDED_TO;
140 step->__max_needed_from = MIN_NEEDED_TO;
141 step->__min_needed_to = MIN_NEEDED_FROM;
142 step->__max_needed_to = MIN_NEEDED_FROM;
143 }
144
145 step->__stateful = 0;
146
147 result = __GCONV_OK;
148 }
149 }
150
151 return result;
152 }
153
154
155 extern void gconv_end (struct __gconv_step *data);
156 void
157 gconv_end (struct __gconv_step *data)
158 {
159 free (data->__data);
160 }
161
162 /* The macro for the hardware loop. This is used for both
163 directions. */
164 #define HARDWARE_CONVERT(INSTRUCTION) \
165 { \
166 register const unsigned char* pInput asm ("8") = inptr; \
167 register unsigned long long inlen asm ("9") = inend - inptr; \
168 register unsigned char* pOutput asm ("10") = outptr; \
169 register unsigned long long outlen asm("11") = outend - outptr; \
170 uint64_t cc = 0; \
171 \
172 asm volatile (".machine push \n\t" \
173 ".machine \"z9-109\" \n\t" \
174 "0: " INSTRUCTION " \n\t" \
175 ".machine pop \n\t" \
176 " jo 0b \n\t" \
177 " ipm %2 \n" \
178 : "+a" (pOutput), "+a" (pInput), "+d" (cc), \
179 "+d" (outlen), "+d" (inlen) \
180 : \
181 : "cc", "memory"); \
182 \
183 inptr = pInput; \
184 outptr = pOutput; \
185 cc >>= 28; \
186 \
187 if (cc == 1) \
188 { \
189 result = __GCONV_FULL_OUTPUT; \
190 break; \
191 } \
192 else if (cc == 2) \
193 { \
194 result = __GCONV_ILLEGAL_INPUT; \
195 break; \
196 } \
197 }
198
199 /* Conversion function from UTF-16 to UTF-32 internal/BE. */
200
201 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
202 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
203 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
204 #define LOOPFCT FROM_LOOP
205 /* The software routine is copied from utf-16.c (minus bytes
206 swapping). */
207 #define BODY \
208 { \
209 /* The hardware instruction currently fails to report an error for \
210 isolated low surrogates so we have to disable the instruction \
211 until this gets resolved. */ \
212 if (0) /* (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) */ \
213 { \
214 HARDWARE_CONVERT ("cu24 %0, %1, 1"); \
215 if (inptr != inend) \
216 { \
217 /* Check if the third byte is \
218 a valid start of a UTF-16 surrogate. */ \
219 if (inend - inptr == 3 && (inptr[3] & 0xfc) != 0xdc) \
220 STANDARD_FROM_LOOP_ERR_HANDLER (3); \
221 \
222 result = __GCONV_INCOMPLETE_INPUT; \
223 break; \
224 } \
225 continue; \
226 } \
227 \
228 uint16_t u1 = get16 (inptr); \
229 \
230 if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff) \
231 { \
232 /* No surrogate. */ \
233 put32 (outptr, u1); \
234 inptr += 2; \
235 } \
236 else \
237 { \
238 /* An isolated low-surrogate was found. This has to be \
239 considered ill-formed. */ \
240 if (__glibc_unlikely (u1 >= 0xdc00)) \
241 { \
242 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
243 } \
244 /* It's a surrogate character. At least the first word says \
245 it is. */ \
246 if (__glibc_unlikely (inptr + 4 > inend)) \
247 { \
248 /* We don't have enough input for another complete input \
249 character. */ \
250 result = __GCONV_INCOMPLETE_INPUT; \
251 break; \
252 } \
253 \
254 inptr += 2; \
255 uint16_t u2 = get16 (inptr); \
256 if (__builtin_expect (u2 < 0xdc00, 0) \
257 || __builtin_expect (u2 > 0xdfff, 0)) \
258 { \
259 /* This is no valid second word for a surrogate. */ \
260 inptr -= 2; \
261 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
262 } \
263 \
264 put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00)); \
265 inptr += 2; \
266 } \
267 outptr += 4; \
268 }
269 #define LOOP_NEED_FLAGS
270 #include <iconv/loop.c>
271
272 /* Conversion from UTF-32 internal/BE to UTF-16. */
273
274 #define MIN_NEEDED_INPUT MIN_NEEDED_TO
275 #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
276 #define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
277 #define LOOPFCT TO_LOOP
278 /* The software routine is copied from utf-16.c (minus bytes
279 swapping). */
280 #define BODY \
281 { \
282 if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
283 { \
284 HARDWARE_CONVERT ("cu42 %0, %1"); \
285 \
286 if (inptr != inend) \
287 { \
288 result = __GCONV_INCOMPLETE_INPUT; \
289 break; \
290 } \
291 continue; \
292 } \
293 \
294 uint32_t c = get32 (inptr); \
295 \
296 if (__builtin_expect (c <= 0xd7ff, 1) \
297 || (c >=0xdc00 && c <= 0xffff)) \
298 { \
299 /* Two UTF-16 chars. */ \
300 put16 (outptr, c); \
301 } \
302 else if (__builtin_expect (c >= 0x10000, 1) \
303 && __builtin_expect (c <= 0x10ffff, 1)) \
304 { \
305 /* Four UTF-16 chars. */ \
306 uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1; \
307 uint16_t out; \
308 \
309 /* Generate a surrogate character. */ \
310 if (__glibc_unlikely (outptr + 4 > outend)) \
311 { \
312 /* Overflow in the output buffer. */ \
313 result = __GCONV_FULL_OUTPUT; \
314 break; \
315 } \
316 \
317 out = 0xd800; \
318 out |= (zabcd & 0xff) << 6; \
319 out |= (c >> 10) & 0x3f; \
320 put16 (outptr, out); \
321 outptr += 2; \
322 \
323 out = 0xdc00; \
324 out |= c & 0x3ff; \
325 put16 (outptr, out); \
326 } \
327 else \
328 { \
329 STANDARD_TO_LOOP_ERR_HANDLER (4); \
330 } \
331 outptr += 2; \
332 inptr += 4; \
333 }
334 #define LOOP_NEED_FLAGS
335 #include <iconv/loop.c>
336
337 #include <iconv/skeleton.c>