]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/s390/utf8-utf16-z9.c
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / s390 / utf8-utf16-z9.c
CommitLineData
df6cc7ee 1/* Conversion between UTF-8 and UTF-16 - s390 version.
f957edde
AK
2
3 This module uses the Z9-109 variants of the Convert Unicode
4 instructions.
04277e02 5 Copyright (C) 1997-2019 Free Software Foundation, Inc.
f957edde
AK
6
7 Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
8 Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
9
10 Thanks to Daniel Appich who covered the relevant performance work
11 in his diploma thesis.
12
13 This is free software; you can redistribute it and/or
14 modify it under the terms of the GNU Lesser General Public
15 License as published by the Free Software Foundation; either
16 version 2.1 of the License, or (at your option) any later version.
17
18 This is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Lesser General Public License for more details.
22
23 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
24 License along with the GNU C Library; if not, see
25 <http://www.gnu.org/licenses/>. */
f957edde
AK
26
27#include <dlfcn.h>
28#include <stdint.h>
29#include <unistd.h>
f957edde 30#include <gconv.h>
df6cc7ee
SL
31#include <string.h>
32
33/* Select which versions should be defined depending on support
34 for multiarch, vector and used minimum architecture level. */
35#ifdef HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT
36# define HAVE_FROM_C 0
37# define FROM_LOOP_DEFAULT FROM_LOOP_CU
38#else
39# define HAVE_FROM_C 1
40# define FROM_LOOP_DEFAULT FROM_LOOP_C
41#endif
42
43#define HAVE_TO_C 1
44#define TO_LOOP_DEFAULT TO_LOOP_C
45
46#if defined HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT || defined USE_MULTIARCH
47# define HAVE_FROM_CU 1
48#else
49# define HAVE_FROM_CU 0
50#endif
51
52#if defined HAVE_S390_VX_ASM_SUPPORT && defined USE_MULTIARCH
53# define HAVE_FROM_VX 1
54# define HAVE_TO_VX 1
a37b5daa 55# define HAVE_TO_VX_CU 1
df6cc7ee
SL
56#else
57# define HAVE_FROM_VX 0
58# define HAVE_TO_VX 0
a37b5daa 59# define HAVE_TO_VX_CU 0
df6cc7ee 60#endif
f957edde 61
5bd11b19
SL
62#if defined HAVE_S390_VX_GCC_SUPPORT
63# define ASM_CLOBBER_VR(NR) , NR
64#else
65# define ASM_CLOBBER_VR(NR)
66#endif
f957edde 67
ee518b70
SL
68#if defined __s390x__
69# define CONVERT_32BIT_SIZE_T(REG)
70#else
71# define CONVERT_32BIT_SIZE_T(REG) "llgfr %" #REG ",%" #REG "\n\t"
72#endif
73
5bd11b19 74/* Defines for skeleton.c. */
f957edde
AK
75#define DEFINE_INIT 0
76#define DEFINE_FINI 0
77#define MIN_NEEDED_FROM 1
78#define MAX_NEEDED_FROM 4
79#define MIN_NEEDED_TO 2
80#define MAX_NEEDED_TO 4
df6cc7ee
SL
81#define FROM_LOOP FROM_LOOP_DEFAULT
82#define TO_LOOP TO_LOOP_DEFAULT
f957edde 83#define FROM_DIRECTION (dir == from_utf8)
f349489e 84#define ONE_DIRECTION 0
5bd11b19
SL
85
86
87/* UTF-16 big endian byte order mark. */
88#define BOM_UTF16 0xfeff
f957edde
AK
89
90/* Direction of the transformation. */
91enum direction
92{
93 illegal_dir,
94 to_utf8,
95 from_utf8
96};
97
98struct utf8_data
99{
100 enum direction dir;
101 int emit_bom;
102};
103
104
105extern int gconv_init (struct __gconv_step *step);
106int
107gconv_init (struct __gconv_step *step)
108{
109 /* Determine which direction. */
110 struct utf8_data *new_data;
111 enum direction dir = illegal_dir;
112 int emit_bom;
113 int result;
114
115 emit_bom = (__strcasecmp (step->__to_name, "UTF-16//") == 0);
116
117 if (__strcasecmp (step->__from_name, "ISO-10646/UTF8/") == 0
118 && (__strcasecmp (step->__to_name, "UTF-16//") == 0
119 || __strcasecmp (step->__to_name, "UTF-16BE//") == 0))
120 {
121 dir = from_utf8;
122 }
123 else if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0
124 && __strcasecmp (step->__to_name, "ISO-10646/UTF8/") == 0)
125 {
126 dir = to_utf8;
127 }
128
129 result = __GCONV_NOCONV;
130 if (dir != illegal_dir)
131 {
132 new_data = (struct utf8_data *) malloc (sizeof (struct utf8_data));
133
134 result = __GCONV_NOMEM;
135 if (new_data != NULL)
136 {
137 new_data->dir = dir;
138 new_data->emit_bom = emit_bom;
139 step->__data = new_data;
140
141 if (dir == from_utf8)
142 {
143 step->__min_needed_from = MIN_NEEDED_FROM;
144 step->__max_needed_from = MIN_NEEDED_FROM;
145 step->__min_needed_to = MIN_NEEDED_TO;
146 step->__max_needed_to = MIN_NEEDED_TO;
147 }
148 else
149 {
150 step->__min_needed_from = MIN_NEEDED_TO;
151 step->__max_needed_from = MIN_NEEDED_TO;
152 step->__min_needed_to = MIN_NEEDED_FROM;
153 step->__max_needed_to = MIN_NEEDED_FROM;
154 }
155
156 step->__stateful = 0;
157
158 result = __GCONV_OK;
159 }
160 }
161
162 return result;
163}
164
165
166extern void gconv_end (struct __gconv_step *data);
167void
168gconv_end (struct __gconv_step *data)
169{
170 free (data->__data);
171}
172
173/* The macro for the hardware loop. This is used for both
174 directions. */
175#define HARDWARE_CONVERT(INSTRUCTION) \
176 { \
31cf3942 177 register const unsigned char* pInput __asm__ ("8") = inptr; \
ee518b70 178 register size_t inlen __asm__ ("9") = inend - inptr; \
31cf3942 179 register unsigned char* pOutput __asm__ ("10") = outptr; \
ee518b70
SL
180 register size_t outlen __asm__("11") = outend - outptr; \
181 unsigned long cc = 0; \
f957edde 182 \
5bd11b19
SL
183 __asm__ __volatile__ (".machine push \n\t" \
184 ".machine \"z9-109\" \n\t" \
ee518b70 185 ".machinemode \"zarch_nohighgprs\"\n\t" \
5bd11b19
SL
186 "0: " INSTRUCTION " \n\t" \
187 ".machine pop \n\t" \
188 " jo 0b \n\t" \
189 " ipm %2 \n" \
190 : "+a" (pOutput), "+a" (pInput), "+d" (cc), \
191 "+d" (outlen), "+d" (inlen) \
192 : \
193 : "cc", "memory"); \
f957edde
AK
194 \
195 inptr = pInput; \
196 outptr = pOutput; \
197 cc >>= 28; \
198 \
199 if (cc == 1) \
200 { \
201 result = __GCONV_FULL_OUTPUT; \
f957edde
AK
202 } \
203 else if (cc == 2) \
204 { \
205 result = __GCONV_ILLEGAL_INPUT; \
f957edde
AK
206 } \
207 }
208
5bd11b19
SL
209#define PREPARE_LOOP \
210 enum direction dir = ((struct utf8_data *) step->__data)->dir; \
211 int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \
212 \
213 if (emit_bom && !data->__internal_use \
214 && data->__invocation_counter == 0) \
215 { \
216 /* Emit the UTF-16 Byte Order Mark. */ \
217 if (__glibc_unlikely (outbuf + 2 > outend)) \
218 return __GCONV_FULL_OUTPUT; \
219 \
220 put16u (outbuf, BOM_UTF16); \
221 outbuf += 2; \
222 }
223
f957edde 224/* Conversion function from UTF-8 to UTF-16. */
5bd11b19
SL
225#define BODY_FROM_HW(ASM) \
226 { \
227 ASM; \
228 if (__glibc_likely (inptr == inend) \
229 || result == __GCONV_FULL_OUTPUT) \
230 break; \
231 \
232 int i; \
233 for (i = 1; inptr + i < inend && i < 5; ++i) \
234 if ((inptr[i] & 0xc0) != 0x80) \
235 break; \
236 \
237 if (__glibc_likely (inptr + i == inend \
238 && result == __GCONV_EMPTY_INPUT)) \
239 { \
240 result = __GCONV_INCOMPLETE_INPUT; \
241 break; \
242 } \
243 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
244 }
245
df6cc7ee
SL
246#if HAVE_FROM_VX == 1
247# define HW_FROM_VX \
5bd11b19
SL
248 { \
249 register const unsigned char* pInput asm ("8") = inptr; \
250 register size_t inlen asm ("9") = inend - inptr; \
251 register unsigned char* pOutput asm ("10") = outptr; \
252 register size_t outlen asm("11") = outend - outptr; \
253 unsigned long tmp, tmp2, tmp3; \
254 asm volatile (".machine push\n\t" \
255 ".machine \"z13\"\n\t" \
256 ".machinemode \"zarch_nohighgprs\"\n\t" \
257 " vrepib %%v30,0x7f\n\t" /* For compare > 0x7f. */ \
258 " vrepib %%v31,0x20\n\t" \
ee518b70
SL
259 CONVERT_32BIT_SIZE_T ([R_INLEN]) \
260 CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \
5bd11b19
SL
261 /* Loop which handles UTF-8 chars <=0x7f. */ \
262 "0: clgijl %[R_INLEN],16,20f\n\t" \
263 " clgijl %[R_OUTLEN],32,20f\n\t" \
264 "1: vl %%v16,0(%[R_IN])\n\t" \
265 " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \
266 " jno 10f\n\t" /* Jump away if not all bytes are 1byte \
267 UTF8 chars. */ \
268 /* Enlarge to UTF-16. */ \
269 " vuplhb %%v18,%%v16\n\t" \
270 " la %[R_IN],16(%[R_IN])\n\t" \
271 " vupllb %%v19,%%v16\n\t" \
272 " aghi %[R_INLEN],-16\n\t" \
273 /* Store 32 bytes to buf_out. */ \
274 " vstm %%v18,%%v19,0(%[R_OUT])\n\t" \
275 " aghi %[R_OUTLEN],-32\n\t" \
276 " la %[R_OUT],32(%[R_OUT])\n\t" \
277 " clgijl %[R_INLEN],16,20f\n\t" \
278 " clgijl %[R_OUTLEN],32,20f\n\t" \
279 " j 1b\n\t" \
280 "10:\n\t" \
281 /* At least one byte is > 0x7f. \
282 Store the preceding 1-byte chars. */ \
283 " vlgvb %[R_TMP],%%v17,7\n\t" \
284 " sllk %[R_TMP2],%[R_TMP],1\n\t" /* Compute highest \
285 index to store. */ \
286 " llgfr %[R_TMP3],%[R_TMP2]\n\t" \
287 " ahi %[R_TMP2],-1\n\t" \
288 " jl 20f\n\t" \
289 " vuplhb %%v18,%%v16\n\t" \
290 " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \
291 " ahi %[R_TMP2],-16\n\t" \
292 " jl 11f\n\t" \
293 " vupllb %%v19,%%v16\n\t" \
294 " vstl %%v19,%[R_TMP2],16(%[R_OUT])\n\t" \
295 "11: \n\t" /* Update pointers. */ \
296 " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \
297 " slgr %[R_INLEN],%[R_TMP]\n\t" \
298 " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \
299 " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \
300 /* Handle multibyte utf8-char with convert instruction. */ \
301 "20: cu12 %[R_OUT],%[R_IN],1\n\t" \
302 " jo 0b\n\t" /* Try vector implemenation again. */ \
303 " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \
304 " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \
305 ".machine pop" \
306 : /* outputs */ [R_IN] "+a" (pInput) \
307 , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \
308 , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \
309 , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \
310 , [R_RES] "+d" (result) \
311 : /* inputs */ \
312 [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \
313 , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \
314 : /* clobber list */ "memory", "cc" \
315 ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
316 ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
317 ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \
318 ); \
319 inptr = pInput; \
320 outptr = pOutput; \
321 }
df6cc7ee 322# define BODY_FROM_VX BODY_FROM_HW (HW_FROM_VX)
5bd11b19 323
df6cc7ee
SL
324/* Generate loop-function with hardware vector and utf-convert instructions. */
325# define MIN_NEEDED_INPUT MIN_NEEDED_FROM
326# define MAX_NEEDED_INPUT MAX_NEEDED_FROM
327# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
328# define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
329# define FROM_LOOP_VX __from_utf8_loop_vx
330# define LOOPFCT FROM_LOOP_VX
331# define LOOP_NEED_FLAGS
332# define BODY BODY_FROM_VX
333# include <iconv/loop.c>
334#else
335# define FROM_LOOP_VX NULL
336#endif /* HAVE_FROM_VX != 1 */
337
338#if HAVE_FROM_CU == 1
339# define BODY_FROM_ETF3EH BODY_FROM_HW (HARDWARE_CONVERT ("cu12 %0, %1, 1"))
f957edde 340
df6cc7ee
SL
341/* Generate loop-function with hardware utf-convert instruction. */
342# define MIN_NEEDED_INPUT MIN_NEEDED_FROM
343# define MAX_NEEDED_INPUT MAX_NEEDED_FROM
344# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
345# define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
346# define FROM_LOOP_CU __from_utf8_loop_etf3eh
347# define LOOPFCT FROM_LOOP_CU
348# define LOOP_NEED_FLAGS
349# define BODY BODY_FROM_ETF3EH
350# include <iconv/loop.c>
351#else
352# define FROM_LOOP_CU NULL
353#endif /* HAVE_FROM_CU != 1 */
354
355#if HAVE_FROM_C == 1
f957edde 356/* The software implementation is based on the code in gconv_simple.c. */
df6cc7ee 357# define BODY_FROM_C \
f957edde 358 { \
f957edde
AK
359 /* Next input byte. */ \
360 uint16_t ch = *inptr; \
361 \
5bd11b19 362 if (__glibc_likely (ch < 0x80)) \
f957edde
AK
363 { \
364 /* One byte sequence. */ \
365 ++inptr; \
366 } \
367 else \
368 { \
369 uint_fast32_t cnt; \
370 uint_fast32_t i; \
371 \
372 if (ch >= 0xc2 && ch < 0xe0) \
373 { \
374 /* We expect two bytes. The first byte cannot be 0xc0 \
375 or 0xc1, otherwise the wide character could have been \
376 represented using a single byte. */ \
377 cnt = 2; \
378 ch &= 0x1f; \
379 } \
5bd11b19 380 else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \
f957edde
AK
381 { \
382 /* We expect three bytes. */ \
383 cnt = 3; \
384 ch &= 0x0f; \
385 } \
5bd11b19 386 else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \
f957edde
AK
387 { \
388 /* We expect four bytes. */ \
389 cnt = 4; \
390 ch &= 0x07; \
391 } \
392 else \
393 { \
394 /* Search the end of this ill-formed UTF-8 character. This \
395 is the next byte with (x & 0xc0) != 0x80. */ \
396 i = 0; \
397 do \
398 ++i; \
399 while (inptr + i < inend \
400 && (*(inptr + i) & 0xc0) == 0x80 \
401 && i < 5); \
402 \
403 errout: \
404 STANDARD_FROM_LOOP_ERR_HANDLER (i); \
405 } \
406 \
5bd11b19 407 if (__glibc_unlikely (inptr + cnt > inend)) \
f957edde
AK
408 { \
409 /* We don't have enough input. But before we report \
410 that check that all the bytes are correct. */ \
411 for (i = 1; inptr + i < inend; ++i) \
412 if ((inptr[i] & 0xc0) != 0x80) \
413 break; \
414 \
5bd11b19 415 if (__glibc_likely (inptr + i == inend)) \
f957edde
AK
416 { \
417 result = __GCONV_INCOMPLETE_INPUT; \
418 break; \
419 } \
420 \
421 goto errout; \
422 } \
423 \
424 if (cnt == 4) \
425 { \
426 /* For 4 byte UTF-8 chars two UTF-16 chars (high and \
427 low) are needed. */ \
428 uint16_t zabcd, high, low; \
89749d19 429 \
5bd11b19 430 if (__glibc_unlikely (outptr + 4 > outend)) \
f957edde
AK
431 { \
432 /* Overflow in the output buffer. */ \
433 result = __GCONV_FULL_OUTPUT; \
434 break; \
435 } \
436 \
5bd11b19
SL
437 /* Check if tail-bytes >= 0x80, < 0xc0. */ \
438 for (i = 1; i < cnt; ++i) \
439 { \
440 if ((inptr[i] & 0xc0) != 0x80) \
441 /* This is an illegal encoding. */ \
442 goto errout; \
443 } \
444 \
f957edde
AK
445 /* See Principles of Operations cu12. */ \
446 zabcd = (((inptr[0] & 0x7) << 2) | \
5bd11b19 447 ((inptr[1] & 0x30) >> 4)) - 1; \
f957edde
AK
448 \
449 /* z-bit must be zero after subtracting 1. */ \
450 if (zabcd & 0x10) \
451 STANDARD_FROM_LOOP_ERR_HANDLER (4) \
452 \
453 high = (uint16_t)(0xd8 << 8); /* high surrogate id */ \
5bd11b19 454 high |= zabcd << 6; /* abcd bits */ \
f957edde
AK
455 high |= (inptr[1] & 0xf) << 2; /* efgh bits */ \
456 high |= (inptr[2] & 0x30) >> 4; /* ij bits */ \
457 \
458 low = (uint16_t)(0xdc << 8); /* low surrogate id */ \
459 low |= ((uint16_t)inptr[2] & 0xc) << 6; /* kl bits */ \
460 low |= (inptr[2] & 0x3) << 6; /* mn bits */ \
461 low |= inptr[3] & 0x3f; /* opqrst bits */ \
89749d19 462 \
f957edde
AK
463 put16 (outptr, high); \
464 outptr += 2; \
465 put16 (outptr, low); \
466 outptr += 2; \
467 inptr += 4; \
468 continue; \
469 } \
470 else \
471 { \
472 /* Read the possible remaining bytes. */ \
473 for (i = 1; i < cnt; ++i) \
474 { \
475 uint16_t byte = inptr[i]; \
476 \
477 if ((byte & 0xc0) != 0x80) \
478 /* This is an illegal encoding. */ \
479 break; \
480 \
481 ch <<= 6; \
482 ch |= byte & 0x3f; \
483 } \
f957edde 484 \
5bd11b19
SL
485 /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
486 If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
487 have been represented with fewer than cnt bytes. */ \
488 if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \
489 /* Do not accept UTF-16 surrogates. */ \
490 || (ch >= 0xd800 && ch <= 0xdfff)) \
491 { \
492 /* This is an illegal encoding. */ \
493 goto errout; \
494 } \
495 \
496 inptr += cnt; \
f957edde
AK
497 } \
498 } \
499 /* Now adjust the pointers and store the result. */ \
500 *((uint16_t *) outptr) = ch; \
501 outptr += sizeof (uint16_t); \
502 }
503
5bd11b19 504/* Generate loop-function with software implementation. */
5bd11b19
SL
505# define MIN_NEEDED_INPUT MIN_NEEDED_FROM
506# define MAX_NEEDED_INPUT MAX_NEEDED_FROM
507# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
508# define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
df6cc7ee
SL
509# define FROM_LOOP_C __from_utf8_loop_c
510# define LOOPFCT FROM_LOOP_C
5bd11b19 511# define LOOP_NEED_FLAGS
df6cc7ee 512# define BODY BODY_FROM_C
5bd11b19 513# include <iconv/loop.c>
df6cc7ee
SL
514#else
515# define FROM_LOOP_C NULL
516#endif /* HAVE_FROM_C != 1 */
5bd11b19 517
f957edde
AK
518/* Conversion from UTF-16 to UTF-8. */
519
df6cc7ee 520#if HAVE_TO_C == 1
f957edde
AK
521/* The software routine is based on the functionality of the S/390
522 hardware instruction (cu21) as described in the Principles of
523 Operation. */
df6cc7ee 524# define BODY_TO_C \
f957edde 525 { \
f957edde
AK
526 uint16_t c = get16 (inptr); \
527 \
5bd11b19 528 if (__glibc_likely (c <= 0x007f)) \
f957edde
AK
529 { \
530 /* Single byte UTF-8 char. */ \
531 *outptr = c & 0xff; \
532 outptr++; \
533 } \
534 else if (c >= 0x0080 && c <= 0x07ff) \
535 { \
5bd11b19 536 /* Two byte UTF-8 char. */ \
f957edde 537 \
5bd11b19 538 if (__glibc_unlikely (outptr + 2 > outend)) \
f957edde
AK
539 { \
540 /* Overflow in the output buffer. */ \
541 result = __GCONV_FULL_OUTPUT; \
542 break; \
543 } \
544 \
5bd11b19
SL
545 outptr[0] = 0xc0; \
546 outptr[0] |= c >> 6; \
89749d19 547 \
5bd11b19
SL
548 outptr[1] = 0x80; \
549 outptr[1] |= c & 0x3f; \
f957edde
AK
550 \
551 outptr += 2; \
552 } \
a3dc4658 553 else if ((c >= 0x0800 && c <= 0xd7ff) || c > 0xdfff) \
f957edde
AK
554 { \
555 /* Three byte UTF-8 char. */ \
556 \
5bd11b19 557 if (__glibc_unlikely (outptr + 3 > outend)) \
f957edde
AK
558 { \
559 /* Overflow in the output buffer. */ \
560 result = __GCONV_FULL_OUTPUT; \
561 break; \
562 } \
563 outptr[0] = 0xe0; \
564 outptr[0] |= c >> 12; \
565 \
566 outptr[1] = 0x80; \
567 outptr[1] |= (c >> 6) & 0x3f; \
568 \
569 outptr[2] = 0x80; \
570 outptr[2] |= c & 0x3f; \
571 \
572 outptr += 3; \
573 } \
574 else if (c >= 0xd800 && c <= 0xdbff) \
575 { \
5bd11b19 576 /* Four byte UTF-8 char. */ \
f957edde
AK
577 uint16_t low, uvwxy; \
578 \
5bd11b19 579 if (__glibc_unlikely (outptr + 4 > outend)) \
f957edde
AK
580 { \
581 /* Overflow in the output buffer. */ \
582 result = __GCONV_FULL_OUTPUT; \
583 break; \
584 } \
5bd11b19 585 if (__glibc_unlikely (inptr + 4 > inend)) \
f957edde
AK
586 { \
587 result = __GCONV_INCOMPLETE_INPUT; \
588 break; \
589 } \
590 \
5bd11b19 591 inptr += 2; \
f957edde
AK
592 low = get16 (inptr); \
593 \
594 if ((low & 0xfc00) != 0xdc00) \
595 { \
596 inptr -= 2; \
597 STANDARD_TO_LOOP_ERR_HANDLER (2); \
598 } \
599 uvwxy = ((c >> 6) & 0xf) + 1; \
600 outptr[0] = 0xf0; \
601 outptr[0] |= uvwxy >> 2; \
602 \
603 outptr[1] = 0x80; \
604 outptr[1] |= (uvwxy << 4) & 0x30; \
605 outptr[1] |= (c >> 2) & 0x0f; \
606 \
607 outptr[2] = 0x80; \
608 outptr[2] |= (c & 0x03) << 4; \
609 outptr[2] |= (low >> 6) & 0x0f; \
610 \
611 outptr[3] = 0x80; \
612 outptr[3] |= low & 0x3f; \
613 \
614 outptr += 4; \
615 } \
616 else \
617 { \
5bd11b19 618 STANDARD_TO_LOOP_ERR_HANDLER (2); \
f957edde
AK
619 } \
620 inptr += 2; \
621 }
5bd11b19 622
df6cc7ee
SL
623/* Generate loop-function with software implementation. */
624# define MIN_NEEDED_INPUT MIN_NEEDED_TO
625# define MAX_NEEDED_INPUT MAX_NEEDED_TO
626# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
627# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
628# define TO_LOOP_C __to_utf8_loop_c
629# define LOOPFCT TO_LOOP_C
630# define BODY BODY_TO_C
631# define LOOP_NEED_FLAGS
632# include <iconv/loop.c>
633#else
634# define TO_LOOP_C NULL
635#endif /* HAVE_TO_C != 1 */
636
637#if HAVE_TO_VX == 1
638# define BODY_TO_VX \
5bd11b19
SL
639 { \
640 size_t inlen = inend - inptr; \
641 size_t outlen = outend - outptr; \
642 unsigned long tmp, tmp2, tmp3; \
643 asm volatile (".machine push\n\t" \
644 ".machine \"z13\"\n\t" \
645 ".machinemode \"zarch_nohighgprs\"\n\t" \
646 /* Setup to check for values <= 0x7f. */ \
647 " larl %[R_TMP],9f\n\t" \
648 " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \
ee518b70
SL
649 CONVERT_32BIT_SIZE_T ([R_INLEN]) \
650 CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \
5bd11b19
SL
651 /* Loop which handles UTF-16 chars <=0x7f. */ \
652 "0: clgijl %[R_INLEN],32,2f\n\t" \
653 " clgijl %[R_OUTLEN],16,2f\n\t" \
654 "1: vlm %%v16,%%v17,0(%[R_IN])\n\t" \
655 " lghi %[R_TMP2],0\n\t" \
656 /* Check for > 1byte UTF-8 chars. */ \
657 " vstrchs %%v19,%%v16,%%v30,%%v31\n\t" \
658 " jno 10f\n\t" /* Jump away if not all bytes are 1byte \
659 UTF8 chars. */ \
660 " vstrchs %%v19,%%v17,%%v30,%%v31\n\t" \
661 " jno 11f\n\t" /* Jump away if not all bytes are 1byte \
662 UTF8 chars. */ \
663 /* Shorten to UTF-8. */ \
664 " vpkh %%v18,%%v16,%%v17\n\t" \
665 " la %[R_IN],32(%[R_IN])\n\t" \
666 " aghi %[R_INLEN],-32\n\t" \
667 /* Store 16 bytes to buf_out. */ \
668 " vst %%v18,0(%[R_OUT])\n\t" \
669 " aghi %[R_OUTLEN],-16\n\t" \
670 " la %[R_OUT],16(%[R_OUT])\n\t" \
671 " clgijl %[R_INLEN],32,2f\n\t" \
672 " clgijl %[R_OUTLEN],16,2f\n\t" \
673 " j 1b\n\t" \
674 /* Setup to check for ch > 0x7f. (v30, v31) */ \
675 "9: .short 0x7f,0x7f,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
676 " .short 0x2000,0x2000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
677 /* At least one byte is > 0x7f. \
678 Store the preceding 1-byte chars. */ \
679 "11: lghi %[R_TMP2],16\n\t" /* match was found in v17. */ \
680 "10:\n\t" \
681 " vlgvb %[R_TMP],%%v19,7\n\t" \
682 /* Shorten to UTF-8. */ \
683 " vpkh %%v18,%%v16,%%v17\n\t" \
684 " ar %[R_TMP],%[R_TMP2]\n\t" /* Number of in bytes. */ \
685 " srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \
686 " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \
687 " jl 13f\n\t" \
688 " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \
689 /* Update pointers. */ \
690 " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \
691 " slgr %[R_INLEN],%[R_TMP]\n\t" \
692 " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \
693 " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \
694 "13: \n\t" \
695 /* Calculate remaining uint16_t values in loaded vrs. */ \
696 " lghi %[R_TMP2],16\n\t" \
697 " slgr %[R_TMP2],%[R_TMP3]\n\t" \
698 " llh %[R_TMP],0(%[R_IN])\n\t" \
699 " aghi %[R_INLEN],-2\n\t" \
700 " j 22f\n\t" \
701 /* Handle remaining bytes. */ \
702 "2: \n\t" \
703 /* Zero, one or more bytes available? */ \
704 " clgfi %[R_INLEN],1\n\t" \
705 " locghie %[R_RES],%[RES_IN_FULL]\n\t" /* Only one byte. */ \
706 " jle 99f\n\t" /* End if less than two bytes. */ \
707 /* Calculate remaining uint16_t values in inptr. */ \
708 " srlg %[R_TMP2],%[R_INLEN],1\n\t" \
709 /* Handle multibyte utf8-char. */ \
710 "20: llh %[R_TMP],0(%[R_IN])\n\t" \
711 " aghi %[R_INLEN],-2\n\t" \
712 /* Test if ch is 1-byte UTF-8 char. */ \
713 "21: clijh %[R_TMP],0x7f,22f\n\t" \
714 /* Handle 1-byte UTF-8 char. */ \
715 "31: slgfi %[R_OUTLEN],1\n\t" \
716 " jl 90f \n\t" \
717 " stc %[R_TMP],0(%[R_OUT])\n\t" \
718 " la %[R_IN],2(%[R_IN])\n\t" \
719 " la %[R_OUT],1(%[R_OUT])\n\t" \
720 " brctg %[R_TMP2],20b\n\t" \
721 " j 0b\n\t" /* Switch to vx-loop. */ \
722 /* Test if ch is 2-byte UTF-8 char. */ \
723 "22: clfi %[R_TMP],0x7ff\n\t" \
724 " jh 23f\n\t" \
725 /* Handle 2-byte UTF-8 char. */ \
726 "32: slgfi %[R_OUTLEN],2\n\t" \
727 " jl 90f \n\t" \
728 " llill %[R_TMP3],0xc080\n\t" \
729 " la %[R_IN],2(%[R_IN])\n\t" \
730 " risbgn %[R_TMP3],%[R_TMP],51,55,2\n\t" /* 1. byte. */ \
731 " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 2. byte. */ \
732 " sth %[R_TMP3],0(%[R_OUT])\n\t" \
733 " la %[R_OUT],2(%[R_OUT])\n\t" \
734 " brctg %[R_TMP2],20b\n\t" \
735 " j 0b\n\t" /* Switch to vx-loop. */ \
736 /* Test if ch is 3-byte UTF-8 char. */ \
737 "23: clfi %[R_TMP],0xd7ff\n\t" \
738 " jh 24f\n\t" \
739 /* Handle 3-byte UTF-8 char. */ \
740 "33: slgfi %[R_OUTLEN],3\n\t" \
741 " jl 90f \n\t" \
742 " llilf %[R_TMP3],0xe08080\n\t" \
743 " la %[R_IN],2(%[R_IN])\n\t" \
744 " risbgn %[R_TMP3],%[R_TMP],44,47,4\n\t" /* 1. byte. */ \
745 " risbgn %[R_TMP3],%[R_TMP],50,55,2\n\t" /* 2. byte. */ \
746 " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 3. byte. */ \
747 " stcm %[R_TMP3],7,0(%[R_OUT])\n\t" \
748 " la %[R_OUT],3(%[R_OUT])\n\t" \
749 " brctg %[R_TMP2],20b\n\t" \
750 " j 0b\n\t" /* Switch to vx-loop. */ \
751 /* Test if ch is 4-byte UTF-8 char. */ \
752 "24: clfi %[R_TMP],0xdfff\n\t" \
753 " jh 33b\n\t" /* Handle this 3-byte UTF-8 char. */ \
754 " clfi %[R_TMP],0xdbff\n\t" \
755 " locghih %[R_RES],%[RES_IN_ILL]\n\t" \
756 " jh 99f\n\t" /* Jump away if this is a low surrogate \
757 without a preceding high surrogate. */ \
758 /* Handle 4-byte UTF-8 char. */ \
759 "34: slgfi %[R_OUTLEN],4\n\t" \
760 " jl 90f \n\t" \
761 " slgfi %[R_INLEN],2\n\t" \
762 " locghil %[R_RES],%[RES_IN_FULL]\n\t" \
763 " jl 99f\n\t" /* Jump away if low surrogate is missing. */ \
764 " llilf %[R_TMP3],0xf0808080\n\t" \
765 " aghi %[R_TMP],0x40\n\t" \
766 " risbgn %[R_TMP3],%[R_TMP],37,39,16\n\t" /* 1. byte: uvw */ \
767 " risbgn %[R_TMP3],%[R_TMP],42,43,14\n\t" /* 2. byte: xy */ \
768 " risbgn %[R_TMP3],%[R_TMP],44,47,14\n\t" /* 2. byte: efgh */ \
769 " risbgn %[R_TMP3],%[R_TMP],50,51,12\n\t" /* 3. byte: ij */ \
770 " llh %[R_TMP],2(%[R_IN])\n\t" /* Load low surrogate. */ \
771 " risbgn %[R_TMP3],%[R_TMP],52,55,2\n\t" /* 3. byte: klmn */ \
772 " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 4. byte: opqrst */ \
773 " nilf %[R_TMP],0xfc00\n\t" \
774 " clfi %[R_TMP],0xdc00\n\t" /* Check if it starts with 0xdc00. */ \
775 " locghine %[R_RES],%[RES_IN_ILL]\n\t" \
776 " jne 99f\n\t" /* Jump away if low surrogate is invalid. */ \
777 " st %[R_TMP3],0(%[R_OUT])\n\t" \
778 " la %[R_IN],4(%[R_IN])\n\t" \
779 " la %[R_OUT],4(%[R_OUT])\n\t" \
780 " aghi %[R_TMP2],-2\n\t" \
781 " jh 20b\n\t" \
782 " j 0b\n\t" /* Switch to vx-loop. */ \
783 /* Exit with __GCONV_FULL_OUTPUT. */ \
784 "90: lghi %[R_RES],%[RES_OUT_FULL]\n\t" \
785 "99: \n\t" \
786 ".machine pop" \
787 : /* outputs */ [R_IN] "+a" (inptr) \
788 , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr) \
789 , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \
790 , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \
791 , [R_RES] "+d" (result) \
792 : /* inputs */ \
793 [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \
794 , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \
795 , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \
796 : /* clobber list */ "memory", "cc" \
797 ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
798 ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
799 ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \
800 ); \
801 if (__glibc_likely (inptr == inend) \
802 || result != __GCONV_ILLEGAL_INPUT) \
803 break; \
804 \
805 STANDARD_TO_LOOP_ERR_HANDLER (2); \
806 }
807
df6cc7ee 808/* Generate loop-function with vector implementation. */
5bd11b19
SL
809# define MIN_NEEDED_INPUT MIN_NEEDED_TO
810# define MAX_NEEDED_INPUT MAX_NEEDED_TO
811# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
812# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
df6cc7ee
SL
813# define TO_LOOP_VX __to_utf8_loop_vx
814# define LOOPFCT TO_LOOP_VX
5bd11b19
SL
815# define BODY BODY_TO_VX
816# define LOOP_NEED_FLAGS
817# include <iconv/loop.c>
5bd11b19 818#else
df6cc7ee
SL
819# define TO_LOOP_VX NULL
820#endif /* HAVE_TO_VX != 1 */
821
a37b5daa
SL
822#if HAVE_TO_VX_CU == 1
823#define BODY_TO_VX_CU \
824 { \
825 register const unsigned char* pInput asm ("8") = inptr; \
826 register size_t inlen asm ("9") = inend - inptr; \
827 register unsigned char* pOutput asm ("10") = outptr; \
828 register size_t outlen asm ("11") = outend - outptr; \
829 unsigned long tmp, tmp2, tmp3; \
830 asm volatile (".machine push\n\t" \
831 ".machine \"z13\"\n\t" \
832 ".machinemode \"zarch_nohighgprs\"\n\t" \
833 /* Setup to check for values <= 0x7f. */ \
834 " larl %[R_TMP],9f\n\t" \
835 " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \
836 CONVERT_32BIT_SIZE_T ([R_INLEN]) \
837 CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \
838 /* Loop which handles UTF-16 chars <=0x7f. */ \
839 "0: clgijl %[R_INLEN],32,20f\n\t" \
840 " clgijl %[R_OUTLEN],16,20f\n\t" \
841 "1: vlm %%v16,%%v17,0(%[R_IN])\n\t" \
842 " lghi %[R_TMP2],0\n\t" \
843 /* Check for > 1byte UTF-8 chars. */ \
844 " vstrchs %%v19,%%v16,%%v30,%%v31\n\t" \
845 " jno 10f\n\t" /* Jump away if not all bytes are 1byte \
846 UTF8 chars. */ \
847 " vstrchs %%v19,%%v17,%%v30,%%v31\n\t" \
848 " jno 11f\n\t" /* Jump away if not all bytes are 1byte \
849 UTF8 chars. */ \
850 /* Shorten to UTF-8. */ \
851 " vpkh %%v18,%%v16,%%v17\n\t" \
852 " la %[R_IN],32(%[R_IN])\n\t" \
853 " aghi %[R_INLEN],-32\n\t" \
854 /* Store 16 bytes to buf_out. */ \
855 " vst %%v18,0(%[R_OUT])\n\t" \
856 " aghi %[R_OUTLEN],-16\n\t" \
857 " la %[R_OUT],16(%[R_OUT])\n\t" \
858 " clgijl %[R_INLEN],32,20f\n\t" \
859 " clgijl %[R_OUTLEN],16,20f\n\t" \
860 " j 1b\n\t" \
861 /* Setup to check for ch > 0x7f. (v30, v31) */ \
862 "9: .short 0x7f,0x7f,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
863 " .short 0x2000,0x2000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
864 /* At least one byte is > 0x7f. \
865 Store the preceding 1-byte chars. */ \
866 "11: lghi %[R_TMP2],16\n\t" /* match was found in v17. */ \
867 "10: vlgvb %[R_TMP],%%v19,7\n\t" \
868 /* Shorten to UTF-8. */ \
869 " vpkh %%v18,%%v16,%%v17\n\t" \
870 " ar %[R_TMP],%[R_TMP2]\n\t" /* Number of in bytes. */ \
871 " srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \
872 " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \
873 " jl 20f\n\t" \
874 " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \
875 /* Update pointers. */ \
876 " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \
877 " slgr %[R_INLEN],%[R_TMP]\n\t" \
878 " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \
879 " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \
880 /* Handles UTF16 surrogates with convert instruction. */ \
881 "20: cu21 %[R_OUT],%[R_IN],1\n\t" \
882 " jo 0b\n\t" /* Try vector implemenation again. */ \
883 " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \
884 " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \
885 ".machine pop" \
886 : /* outputs */ [R_IN] "+a" (pInput) \
887 , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \
888 , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \
889 , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \
890 , [R_RES] "+d" (result) \
891 : /* inputs */ \
892 [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \
893 , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \
894 : /* clobber list */ "memory", "cc" \
895 ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
896 ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
897 ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \
898 ); \
899 inptr = pInput; \
900 outptr = pOutput; \
901 \
902 if (__glibc_likely (inlen == 0) \
903 || result == __GCONV_FULL_OUTPUT) \
904 break; \
905 if (inlen == 1) \
906 { \
907 /* Input does not contain a complete utf16 character. */ \
908 result = __GCONV_INCOMPLETE_INPUT; \
909 break; \
910 } \
911 else if (result != __GCONV_ILLEGAL_INPUT) \
912 { \
913 /* Input is >= 2 and < 4 bytes (as cu21 would have processed \
914 a possible next utf16 character) and not illegal. \
915 => we have a single high surrogate at end of input. */ \
916 result = __GCONV_INCOMPLETE_INPUT; \
917 break; \
918 } \
919 \
920 STANDARD_TO_LOOP_ERR_HANDLER (2); \
921 }
922
923/* Generate loop-function with vector and utf-convert instructions. */
924# define MIN_NEEDED_INPUT MIN_NEEDED_TO
925# define MAX_NEEDED_INPUT MAX_NEEDED_TO
926# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
927# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
928# define TO_LOOP_VX_CU __to_utf8_loop_vx_cu
929# define LOOPFCT TO_LOOP_VX_CU
930# define BODY BODY_TO_VX_CU
931# define LOOP_NEED_FLAGS
932# include <iconv/loop.c>
933#else
934# define TO_LOOP_VX_CU NULL
935#endif /* HAVE_TO_VX_CU != 1 */
936
df6cc7ee
SL
937/* This file also exists in sysdeps/s390/multiarch/ which
938 generates ifunc resolvers for FROM/TO_LOOP functions
939 and includes iconv/skeleton.c afterwards. */
940#if ! defined USE_MULTIARCH
941# include <iconv/skeleton.c>
942#endif