]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/s390/s390-64/utf16-utf32-z9.c
[PATCH] S390: Fix remaining ONE_DIRECTION warning messages
[thirdparty/glibc.git] / sysdeps / s390 / s390-64 / utf16-utf32-z9.c
CommitLineData
f957edde
AK
1/* Conversion between UTF-16 and UTF-32 BE/internal.
2
3 This module uses the Z9-109 variants of the Convert Unicode
4 instructions.
d4697bc9 5 Copyright (C) 1997-2014 Free Software Foundation, Inc.
f957edde
AK
6
7 Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
8 Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
9
10 Thanks to Daniel Appich who covered the relevant performance work
11 in his diploma thesis.
12
13 This is free software; you can redistribute it and/or
14 modify it under the terms of the GNU Lesser General Public
15 License as published by the Free Software Foundation; either
16 version 2.1 of the License, or (at your option) any later version.
17
18 This is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Lesser General Public License for more details.
22
23 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
24 License along with the GNU C Library; if not, see
25 <http://www.gnu.org/licenses/>. */
f957edde
AK
26
27#include <dlfcn.h>
28#include <stdint.h>
29#include <unistd.h>
30#include <dl-procinfo.h>
31#include <gconv.h>
32
33/* UTF-32 big endian byte order mark. */
34#define BOM_UTF32 0x0000feffu
35
36/* UTF-16 big endian byte order mark. */
37#define BOM_UTF16 0xfeff
38
39#define DEFINE_INIT 0
40#define DEFINE_FINI 0
41#define MIN_NEEDED_FROM 2
42#define MAX_NEEDED_FROM 4
43#define MIN_NEEDED_TO 4
44#define FROM_LOOP from_utf16_loop
45#define TO_LOOP to_utf16_loop
46#define FROM_DIRECTION (dir == from_utf16)
f349489e 47#define ONE_DIRECTION 0
f957edde
AK
48#define PREPARE_LOOP \
49 enum direction dir = ((struct utf16_data *) step->__data)->dir; \
50 int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \
51 \
52 if (emit_bom && !data->__internal_use \
53 && data->__invocation_counter == 0) \
54 { \
55 if (dir == to_utf16) \
56 { \
57 /* Emit the UTF-16 Byte Order Mark. */ \
a1ffb40e 58 if (__glibc_unlikely (outbuf + 2 > outend)) \
f957edde
AK
59 return __GCONV_FULL_OUTPUT; \
60 \
61 put16u (outbuf, BOM_UTF16); \
62 outbuf += 2; \
63 } \
64 else \
65 { \
66 /* Emit the UTF-32 Byte Order Mark. */ \
a1ffb40e 67 if (__glibc_unlikely (outbuf + 4 > outend)) \
f957edde
AK
68 return __GCONV_FULL_OUTPUT; \
69 \
70 put32u (outbuf, BOM_UTF32); \
71 outbuf += 4; \
72 } \
73 }
74
75/* Direction of the transformation. */
76enum direction
77{
78 illegal_dir,
79 to_utf16,
80 from_utf16
81};
82
83struct utf16_data
84{
85 enum direction dir;
86 int emit_bom;
87};
88
89
90extern int gconv_init (struct __gconv_step *step);
91int
92gconv_init (struct __gconv_step *step)
93{
94 /* Determine which direction. */
95 struct utf16_data *new_data;
96 enum direction dir = illegal_dir;
97 int emit_bom;
98 int result;
99
100 emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0
101 || __strcasecmp (step->__to_name, "UTF-16//") == 0);
102
103 if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0
104 && (__strcasecmp (step->__to_name, "UTF-32//") == 0
105 || __strcasecmp (step->__to_name, "UTF-32BE//") == 0
89749d19 106 || __strcasecmp (step->__to_name, "INTERNAL") == 0))
f957edde
AK
107 {
108 dir = from_utf16;
109 }
110 else if ((__strcasecmp (step->__to_name, "UTF-16//") == 0
111 || __strcasecmp (step->__to_name, "UTF-16BE//") == 0)
112 && (__strcasecmp (step->__from_name, "UTF-32BE//") == 0
113 || __strcasecmp (step->__from_name, "INTERNAL") == 0))
114 {
115 dir = to_utf16;
116 }
117
118 result = __GCONV_NOCONV;
119 if (dir != illegal_dir)
120 {
121 new_data = (struct utf16_data *) malloc (sizeof (struct utf16_data));
122
123 result = __GCONV_NOMEM;
124 if (new_data != NULL)
125 {
126 new_data->dir = dir;
127 new_data->emit_bom = emit_bom;
128 step->__data = new_data;
129
130 if (dir == from_utf16)
131 {
132 step->__min_needed_from = MIN_NEEDED_FROM;
133 step->__max_needed_from = MIN_NEEDED_FROM;
134 step->__min_needed_to = MIN_NEEDED_TO;
135 step->__max_needed_to = MIN_NEEDED_TO;
136 }
137 else
138 {
139 step->__min_needed_from = MIN_NEEDED_TO;
140 step->__max_needed_from = MIN_NEEDED_TO;
141 step->__min_needed_to = MIN_NEEDED_FROM;
142 step->__max_needed_to = MIN_NEEDED_FROM;
143 }
144
145 step->__stateful = 0;
146
147 result = __GCONV_OK;
148 }
149 }
150
151 return result;
152}
153
154
155extern void gconv_end (struct __gconv_step *data);
156void
157gconv_end (struct __gconv_step *data)
158{
159 free (data->__data);
160}
161
162/* The macro for the hardware loop. This is used for both
163 directions. */
164#define HARDWARE_CONVERT(INSTRUCTION) \
165 { \
166 register const unsigned char* pInput asm ("8") = inptr; \
167 register unsigned long long inlen asm ("9") = inend - inptr; \
168 register unsigned char* pOutput asm ("10") = outptr; \
169 register unsigned long long outlen asm("11") = outend - outptr; \
170 uint64_t cc = 0; \
171 \
27390476
AK
172 asm volatile (".machine push \n\t" \
173 ".machine \"z9-109\" \n\t" \
174 "0: " INSTRUCTION " \n\t" \
175 ".machine pop \n\t" \
f957edde
AK
176 " jo 0b \n\t" \
177 " ipm %2 \n" \
178 : "+a" (pOutput), "+a" (pInput), "+d" (cc), \
179 "+d" (outlen), "+d" (inlen) \
180 : \
181 : "cc", "memory"); \
182 \
183 inptr = pInput; \
184 outptr = pOutput; \
89749d19 185 cc >>= 28; \
f957edde
AK
186 \
187 if (cc == 1) \
188 { \
189 result = __GCONV_FULL_OUTPUT; \
190 break; \
191 } \
192 else if (cc == 2) \
193 { \
194 result = __GCONV_ILLEGAL_INPUT; \
195 break; \
196 } \
197 }
198
199/* Conversion function from UTF-16 to UTF-32 internal/BE. */
200
201#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
202#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
203#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
204#define LOOPFCT FROM_LOOP
205/* The software routine is copied from utf-16.c (minus bytes
206 swapping). */
207#define BODY \
208 { \
a3dc4658
AK
209 /* The hardware instruction currently fails to report an error for \
210 isolated low surrogates so we have to disable the instruction \
211 until this gets resolved. */ \
212 if (0) /* (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) */ \
f957edde
AK
213 { \
214 HARDWARE_CONVERT ("cu24 %0, %1, 1"); \
215 if (inptr != inend) \
216 { \
217 /* Check if the third byte is \
218 a valid start of a UTF-16 surrogate. */ \
219 if (inend - inptr == 3 && (inptr[3] & 0xfc) != 0xdc) \
220 STANDARD_FROM_LOOP_ERR_HANDLER (3); \
221 \
222 result = __GCONV_INCOMPLETE_INPUT; \
223 break; \
224 } \
225 continue; \
226 } \
227 \
228 uint16_t u1 = get16 (inptr); \
229 \
230 if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff) \
231 { \
232 /* No surrogate. */ \
233 put32 (outptr, u1); \
234 inptr += 2; \
235 } \
236 else \
237 { \
a3dc4658
AK
238 /* An isolated low-surrogate was found. This has to be \
239 considered ill-formed. */ \
a1ffb40e 240 if (__glibc_unlikely (u1 >= 0xdc00)) \
a3dc4658
AK
241 { \
242 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
243 } \
f957edde
AK
244 /* It's a surrogate character. At least the first word says \
245 it is. */ \
a1ffb40e 246 if (__glibc_unlikely (inptr + 4 > inend)) \
f957edde
AK
247 { \
248 /* We don't have enough input for another complete input \
249 character. */ \
250 result = __GCONV_INCOMPLETE_INPUT; \
251 break; \
252 } \
253 \
254 inptr += 2; \
255 uint16_t u2 = get16 (inptr); \
256 if (__builtin_expect (u2 < 0xdc00, 0) \
257 || __builtin_expect (u2 > 0xdfff, 0)) \
258 { \
259 /* This is no valid second word for a surrogate. */ \
260 inptr -= 2; \
261 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
262 } \
263 \
264 put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00)); \
265 inptr += 2; \
266 } \
267 outptr += 4; \
268 }
269#define LOOP_NEED_FLAGS
270#include <iconv/loop.c>
271
272/* Conversion from UTF-32 internal/BE to UTF-16. */
273
274#define MIN_NEEDED_INPUT MIN_NEEDED_TO
275#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
276#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
277#define LOOPFCT TO_LOOP
278/* The software routine is copied from utf-16.c (minus bytes
279 swapping). */
280#define BODY \
281 { \
282 if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
283 { \
284 HARDWARE_CONVERT ("cu42 %0, %1"); \
285 \
286 if (inptr != inend) \
287 { \
288 result = __GCONV_INCOMPLETE_INPUT; \
289 break; \
290 } \
291 continue; \
292 } \
293 \
294 uint32_t c = get32 (inptr); \
295 \
296 if (__builtin_expect (c <= 0xd7ff, 1) \
297 || (c >=0xdc00 && c <= 0xffff)) \
298 { \
299 /* Two UTF-16 chars. */ \
300 put16 (outptr, c); \
301 } \
302 else if (__builtin_expect (c >= 0x10000, 1) \
303 && __builtin_expect (c <= 0x10ffff, 1)) \
304 { \
305 /* Four UTF-16 chars. */ \
306 uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1; \
307 uint16_t out; \
308 \
309 /* Generate a surrogate character. */ \
a1ffb40e 310 if (__glibc_unlikely (outptr + 4 > outend)) \
f957edde
AK
311 { \
312 /* Overflow in the output buffer. */ \
313 result = __GCONV_FULL_OUTPUT; \
314 break; \
315 } \
316 \
317 out = 0xd800; \
318 out |= (zabcd & 0xff) << 6; \
319 out |= (c >> 10) & 0x3f; \
320 put16 (outptr, out); \
321 outptr += 2; \
322 \
323 out = 0xdc00; \
324 out |= c & 0x3ff; \
325 put16 (outptr, out); \
326 } \
327 else \
328 { \
329 STANDARD_TO_LOOP_ERR_HANDLER (4); \
330 } \
331 outptr += 2; \
332 inptr += 4; \
333 }
334#define LOOP_NEED_FLAGS
335#include <iconv/loop.c>
336
337#include <iconv/skeleton.c>