]> git.ipfire.org Git - thirdparty/glibc.git/blob - stdio-common/vfscanf.c
Update.
[thirdparty/glibc.git] / stdio-common / vfscanf.c
1 /* Copyright (C) 1991-2002, 2003, 2004 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA. */
18
19 #include <assert.h>
20 #include <errno.h>
21 #include <limits.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wchar.h>
29 #include <wctype.h>
30 #include <bits/libc-lock.h>
31 #include <locale/localeinfo.h>
32
33 #ifdef __GNUC__
34 # define HAVE_LONGLONG
35 # define LONGLONG long long
36 #else
37 # define LONGLONG long
38 #endif
39
40 /* Determine whether we have to handle `long long' at all. */
41 #if LONG_MAX == LONG_LONG_MAX
42 # define need_longlong 0
43 #else
44 # define need_longlong 1
45 #endif
46
47 /* Determine whether we have to handle `long'. */
48 #if INT_MAX == LONG_MAX
49 # define need_long 0
50 #else
51 # define need_long 1
52 #endif
53
54 /* Those are flags in the conversion format. */
55 #define LONG 0x001 /* l: long or double */
56 #define LONGDBL 0x002 /* L: long long or long double */
57 #define SHORT 0x004 /* h: short */
58 #define SUPPRESS 0x008 /* *: suppress assignment */
59 #define POINTER 0x010 /* weird %p pointer (`fake hex') */
60 #define NOSKIP 0x020 /* do not skip blanks */
61 #define WIDTH 0x040 /* width was given */
62 #define GROUP 0x080 /* ': group numbers */
63 #define MALLOC 0x100 /* a: malloc strings */
64 #define CHAR 0x200 /* hh: char */
65 #define I18N 0x400 /* I: use locale's digits */
66
67
68 #include <locale/localeinfo.h>
69 #include <libioP.h>
70 #include <libio.h>
71
72 #undef va_list
73 #define va_list _IO_va_list
74
75 #ifdef COMPILE_WSCANF
76 # define ungetc(c, s) ((void) (c == WEOF \
77 || (--read_in, \
78 INTUSE(_IO_sputbackwc) (s, c))))
79 # define ungetc_not_eof(c, s) ((void) (--read_in, \
80 INTUSE(_IO_sputbackwc) (s, c)))
81 # define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
82 : ((c = _IO_getwc_unlocked (s)), \
83 (void) (c != WEOF \
84 ? ++read_in \
85 : (size_t) (inchar_errno = errno)), c))
86
87 # define MEMCPY(d, s, n) __wmemcpy (d, s, n)
88 # define ISSPACE(Ch) iswspace (Ch)
89 # define ISDIGIT(Ch) iswdigit (Ch)
90 # define ISXDIGIT(Ch) iswxdigit (Ch)
91 # define TOLOWER(Ch) towlower (Ch)
92 # define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
93 # define __strtoll_internal __wcstoll_internal
94 # define __strtoull_internal __wcstoull_internal
95 # define __strtol_internal __wcstol_internal
96 # define __strtoul_internal __wcstoul_internal
97 # define __strtold_internal __wcstold_internal
98 # define __strtod_internal __wcstod_internal
99 # define __strtof_internal __wcstof_internal
100
101 # define L_(Str) L##Str
102 # define CHAR_T wchar_t
103 # define UCHAR_T unsigned int
104 # define WINT_T wint_t
105 # undef EOF
106 # define EOF WEOF
107 #else
108 # define ungetc(c, s) ((void) ((int) c == EOF \
109 || (--read_in, \
110 INTUSE(_IO_sputbackc) (s, (unsigned char) c))))
111 # define ungetc_not_eof(c, s) ((void) (--read_in, \
112 INTUSE(_IO_sputbackc) (s, (unsigned char) c)))
113 # define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
114 : ((c = _IO_getc_unlocked (s)), \
115 (void) (c != EOF \
116 ? ++read_in \
117 : (size_t) (inchar_errno = errno)), c))
118 # define MEMCPY(d, s, n) memcpy (d, s, n)
119 # define ISSPACE(Ch) __isspace_l (Ch, loc)
120 # define ISDIGIT(Ch) __isdigit_l (Ch, loc)
121 # define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
122 # define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc)
123 # define ORIENT if (_IO_vtable_offset (s) == 0 \
124 && _IO_fwide (s, -1) != -1) \
125 return EOF
126
127 # define L_(Str) Str
128 # define CHAR_T char
129 # define UCHAR_T unsigned char
130 # define WINT_T int
131 #endif
132
133 #define encode_error() do { \
134 errval = 4; \
135 __set_errno (EILSEQ); \
136 goto errout; \
137 } while (0)
138 #define conv_error() do { \
139 errval = 2; \
140 goto errout; \
141 } while (0)
142 #define input_error() do { \
143 errval = 1; \
144 if (done == 0) done = EOF; \
145 goto errout; \
146 } while (0)
147 #define memory_error() do { \
148 __set_errno (ENOMEM); \
149 done = EOF; \
150 goto errout; \
151 } while (0)
152 #define ARGCHECK(s, format) \
153 do \
154 { \
155 /* Check file argument for consistence. */ \
156 CHECK_FILE (s, EOF); \
157 if (s->_flags & _IO_NO_READS) \
158 { \
159 __set_errno (EBADF); \
160 return EOF; \
161 } \
162 else if (format == NULL) \
163 { \
164 MAYBE_SET_EINVAL; \
165 return EOF; \
166 } \
167 } while (0)
168 #define LOCK_STREAM(S) \
169 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
170 _IO_flockfile (S)
171 #define UNLOCK_STREAM(S) \
172 _IO_funlockfile (S); \
173 __libc_cleanup_region_end (0)
174
175
176 /* Read formatted input from S according to the format string
177 FORMAT, using the argument list in ARG.
178 Return the number of assignments made, or -1 for an input error. */
179 #ifdef COMPILE_WSCANF
180 int
181 _IO_vfwscanf (s, format, argptr, errp)
182 _IO_FILE *s;
183 const wchar_t *format;
184 _IO_va_list argptr;
185 int *errp;
186 #else
187 int
188 _IO_vfscanf (s, format, argptr, errp)
189 _IO_FILE *s;
190 const char *format;
191 _IO_va_list argptr;
192 int *errp;
193 #endif
194 {
195 va_list arg;
196 register const CHAR_T *f = format;
197 register UCHAR_T fc; /* Current character of the format. */
198 register WINT_T done = 0; /* Assignments done. */
199 register size_t read_in = 0; /* Chars read in. */
200 register WINT_T c = 0; /* Last char read. */
201 register int width; /* Maximum field width. */
202 register int flags; /* Modifiers for current format element. */
203 int errval = 0;
204 #ifndef COMPILE_WSCANF
205 __locale_t loc = _NL_CURRENT_LOCALE;
206 struct locale_data *const curctype = loc->__locales[LC_CTYPE];
207 #endif
208
209 /* Errno of last failed inchar call. */
210 int inchar_errno = 0;
211 /* Status for reading F-P nums. */
212 char got_dot, got_e, negative;
213 /* If a [...] is a [^...]. */
214 CHAR_T not_in;
215 #define exp_char not_in
216 /* Base for integral numbers. */
217 int base;
218 /* Signedness for integral numbers. */
219 int number_signed;
220 #define is_hexa number_signed
221 /* Decimal point character. */
222 #ifdef COMPILE_WSCANF
223 wint_t decimal;
224 #else
225 const char *decimal;
226 #endif
227 /* The thousands character of the current locale. */
228 #ifdef COMPILE_WSCANF
229 wint_t thousands;
230 #else
231 const char *thousands;
232 #endif
233 /* State for the conversions. */
234 mbstate_t state;
235 /* Integral holding variables. */
236 union
237 {
238 long long int q;
239 unsigned long long int uq;
240 long int l;
241 unsigned long int ul;
242 } num;
243 /* Character-buffer pointer. */
244 char *str = NULL;
245 wchar_t *wstr = NULL;
246 char **strptr = NULL;
247 ssize_t strsize = 0;
248 /* We must not react on white spaces immediately because they can
249 possibly be matched even if in the input stream no character is
250 available anymore. */
251 int skip_space = 0;
252 /* Nonzero if we are reading a pointer. */
253 int read_pointer;
254 /* Workspace. */
255 CHAR_T *tw; /* Temporary pointer. */
256 CHAR_T *wp = NULL; /* Workspace. */
257 size_t wpmax = 0; /* Maximal size of workspace. */
258 size_t wpsize; /* Currently used bytes in workspace. */
259 #define ADDW(Ch) \
260 do \
261 { \
262 if (wpsize == wpmax) \
263 { \
264 CHAR_T *old = wp; \
265 wpmax = (UCHAR_MAX + 1 > 2 * wpmax ? UCHAR_MAX + 1 : 2 * wpmax); \
266 wp = (CHAR_T *) alloca (wpmax * sizeof (wchar_t)); \
267 if (old != NULL) \
268 MEMCPY (wp, old, wpsize); \
269 } \
270 wp[wpsize++] = (Ch); \
271 } \
272 while (0)
273
274 #ifdef __va_copy
275 __va_copy (arg, argptr);
276 #else
277 arg = (va_list) argptr;
278 #endif
279
280 #ifdef ORIENT
281 ORIENT;
282 #endif
283
284 ARGCHECK (s, format);
285
286 {
287 #ifndef COMPILE_WSCANF
288 struct locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
289 #endif
290
291 /* Figure out the decimal point character. */
292 #ifdef COMPILE_WSCANF
293 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
294 #else
295 decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
296 #endif
297 /* Figure out the thousands separator character. */
298 #ifdef COMPILE_WSCANF
299 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
300 #else
301 thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
302 if (*thousands == '\0')
303 thousands = NULL;
304 #endif
305 }
306
307 /* Lock the stream. */
308 LOCK_STREAM (s);
309
310
311 #ifndef COMPILE_WSCANF
312 /* From now on we use `state' to convert the format string. */
313 memset (&state, '\0', sizeof (state));
314 #endif
315
316 /* Run through the format string. */
317 while (*f != '\0')
318 {
319 unsigned int argpos;
320 /* Extract the next argument, which is of type TYPE.
321 For a %N$... spec, this is the Nth argument from the beginning;
322 otherwise it is the next argument after the state now in ARG. */
323 #ifdef __va_copy
324 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
325 ({ unsigned int pos = argpos; \
326 va_list arg; \
327 __va_copy (arg, argptr); \
328 while (--pos > 0) \
329 (void) va_arg (arg, void *); \
330 va_arg (arg, type); \
331 }))
332 #else
333 # if 0
334 /* XXX Possible optimization. */
335 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
336 ({ va_list arg = (va_list) argptr; \
337 arg = (va_list) ((char *) arg \
338 + (argpos - 1) \
339 * __va_rounded_size (void *)); \
340 va_arg (arg, type); \
341 }))
342 # else
343 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
344 ({ unsigned int pos = argpos; \
345 va_list arg = (va_list) argptr; \
346 while (--pos > 0) \
347 (void) va_arg (arg, void *); \
348 va_arg (arg, type); \
349 }))
350 # endif
351 #endif
352
353 #ifndef COMPILE_WSCANF
354 if (!isascii ((unsigned char) *f))
355 {
356 /* Non-ASCII, may be a multibyte. */
357 int len = __mbrlen (f, strlen (f), &state);
358 if (len > 0)
359 {
360 do
361 {
362 c = inchar ();
363 if (c == EOF)
364 input_error ();
365 else if (c != (unsigned char) *f++)
366 {
367 ungetc_not_eof (c, s);
368 conv_error ();
369 }
370 }
371 while (--len > 0);
372 continue;
373 }
374 }
375 #endif
376
377 fc = *f++;
378 if (fc != '%')
379 {
380 /* Remember to skip spaces. */
381 if (ISSPACE (fc))
382 {
383 skip_space = 1;
384 continue;
385 }
386
387 /* Read a character. */
388 c = inchar ();
389
390 /* Characters other than format specs must just match. */
391 if (c == EOF)
392 input_error ();
393
394 /* We saw white space char as the last character in the format
395 string. Now it's time to skip all leading white space. */
396 if (skip_space)
397 {
398 while (ISSPACE (c))
399 if (inchar () == EOF)
400 input_error ();
401 skip_space = 0;
402 }
403
404 if (c != fc)
405 {
406 ungetc (c, s);
407 conv_error ();
408 }
409
410 continue;
411 }
412
413 /* This is the start of the conversion string. */
414 flags = 0;
415
416 /* Not yet decided whether we read a pointer or not. */
417 read_pointer = 0;
418
419 /* Initialize state of modifiers. */
420 argpos = 0;
421
422 /* Prepare temporary buffer. */
423 wpsize = 0;
424
425 /* Check for a positional parameter specification. */
426 if (ISDIGIT ((UCHAR_T) *f))
427 {
428 argpos = (UCHAR_T) *f++ - L_('0');
429 while (ISDIGIT ((UCHAR_T) *f))
430 argpos = argpos * 10 + ((UCHAR_T) *f++ - L_('0'));
431 if (*f == L_('$'))
432 ++f;
433 else
434 {
435 /* Oops; that was actually the field width. */
436 width = argpos;
437 flags |= WIDTH;
438 argpos = 0;
439 goto got_width;
440 }
441 }
442
443 /* Check for the assignment-suppressing, the number grouping flag,
444 and the signal to use the locale's digit representation. */
445 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
446 switch (*f++)
447 {
448 case L_('*'):
449 flags |= SUPPRESS;
450 break;
451 case L_('\''):
452 flags |= GROUP;
453 break;
454 case L_('I'):
455 flags |= I18N;
456 break;
457 }
458
459 /* We have seen width. */
460 if (ISDIGIT ((UCHAR_T) *f))
461 flags |= WIDTH;
462
463 /* Find the maximum field width. */
464 width = 0;
465 while (ISDIGIT ((UCHAR_T) *f))
466 {
467 width *= 10;
468 width += (UCHAR_T) *f++ - L_('0');
469 }
470 got_width:
471 if (width == 0)
472 width = -1;
473
474 /* Check for type modifiers. */
475 switch (*f++)
476 {
477 case L_('h'):
478 /* ints are short ints or chars. */
479 if (*f == L_('h'))
480 {
481 ++f;
482 flags |= CHAR;
483 }
484 else
485 flags |= SHORT;
486 break;
487 case L_('l'):
488 if (*f == L_('l'))
489 {
490 /* A double `l' is equivalent to an `L'. */
491 ++f;
492 flags |= LONGDBL | LONG;
493 }
494 else
495 /* ints are long ints. */
496 flags |= LONG;
497 break;
498 case L_('q'):
499 case L_('L'):
500 /* doubles are long doubles, and ints are long long ints. */
501 flags |= LONGDBL | LONG;
502 break;
503 case L_('a'):
504 /* The `a' is used as a flag only if followed by `s', `S' or
505 `['. */
506 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
507 {
508 --f;
509 break;
510 }
511 /* String conversions (%s, %[) take a `char **'
512 arg and fill it in with a malloc'd pointer. */
513 flags |= MALLOC;
514 break;
515 case L_('z'):
516 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
517 flags |= LONGDBL;
518 else if (sizeof (size_t) > sizeof (unsigned int))
519 flags |= LONG;
520 break;
521 case L_('j'):
522 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
523 flags |= LONGDBL;
524 else if (sizeof (uintmax_t) > sizeof (unsigned int))
525 flags |= LONG;
526 break;
527 case L_('t'):
528 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
529 flags |= LONGDBL;
530 else if (sizeof (ptrdiff_t) > sizeof (int))
531 flags |= LONG;
532 break;
533 default:
534 /* Not a recognized modifier. Backup. */
535 --f;
536 break;
537 }
538
539 /* End of the format string? */
540 if (*f == L_('\0'))
541 conv_error ();
542
543 /* Find the conversion specifier. */
544 fc = *f++;
545 if (skip_space || (fc != L_('[') && fc != L_('c')
546 && fc != L_('C') && fc != L_('n')
547 && fc != L_('%')))
548 {
549 /* Eat whitespace. */
550 int save_errno = errno;
551 errno = 0;
552 do
553 if (inchar () == EOF && errno == EINTR)
554 input_error ();
555 while (ISSPACE (c));
556 errno = save_errno;
557 ungetc (c, s);
558 skip_space = 0;
559 }
560
561 switch (fc)
562 {
563 case L_('%'): /* Must match a literal '%'. */
564 c = inchar ();
565 if (c == EOF)
566 input_error ();
567 if (c != fc)
568 {
569 ungetc_not_eof (c, s);
570 conv_error ();
571 }
572 break;
573
574 case L_('n'): /* Answer number of assignments done. */
575 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
576 with the 'n' conversion specifier. */
577 if (!(flags & SUPPRESS))
578 {
579 /* Don't count the read-ahead. */
580 if (need_longlong && (flags & LONGDBL))
581 *ARG (long long int *) = read_in;
582 else if (need_long && (flags & LONG))
583 *ARG (long int *) = read_in;
584 else if (flags & SHORT)
585 *ARG (short int *) = read_in;
586 else if (!(flags & CHAR))
587 *ARG (int *) = read_in;
588 else
589 *ARG (char *) = read_in;
590
591 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
592 /* We have a severe problem here. The ISO C standard
593 contradicts itself in explaining the effect of the %n
594 format in `scanf'. While in ISO C:1990 and the ISO C
595 Amendement 1:1995 the result is described as
596
597 Execution of a %n directive does not effect the
598 assignment count returned at the completion of
599 execution of the f(w)scanf function.
600
601 in ISO C Corrigendum 1:1994 the following was added:
602
603 Subclause 7.9.6.2
604 Add the following fourth example:
605 In:
606 #include <stdio.h>
607 int d1, d2, n1, n2, i;
608 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
609 the value 123 is assigned to d1 and the value3 to n1.
610 Because %n can never get an input failure the value
611 of 3 is also assigned to n2. The value of d2 is not
612 affected. The value 3 is assigned to i.
613
614 We go for now with the historically correct code from ISO C,
615 i.e., we don't count the %n assignments. When it ever
616 should proof to be wrong just remove the #ifdef above. */
617 ++done;
618 #endif
619 }
620 break;
621
622 case L_('c'): /* Match characters. */
623 if ((flags & LONG) == 0)
624 {
625 if (!(flags & SUPPRESS))
626 {
627 str = ARG (char *);
628 if (str == NULL)
629 conv_error ();
630 }
631
632 c = inchar ();
633 if (c == EOF)
634 input_error ();
635
636 if (width == -1)
637 width = 1;
638
639 #ifdef COMPILE_WSCANF
640 /* We have to convert the wide character(s) into multibyte
641 characters and store the result. */
642 memset (&state, '\0', sizeof (state));
643
644 do
645 {
646 size_t n;
647
648 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
649 if (n == (size_t) -1)
650 /* No valid wide character. */
651 input_error ();
652
653 /* Increment the output pointer. Even if we don't
654 write anything. */
655 str += n;
656 }
657 while (--width > 0 && inchar () != EOF);
658 #else
659 if (!(flags & SUPPRESS))
660 {
661 do
662 *str++ = c;
663 while (--width > 0 && inchar () != EOF);
664 }
665 else
666 while (--width > 0 && inchar () != EOF);
667 #endif
668
669 if (!(flags & SUPPRESS))
670 ++done;
671
672 break;
673 }
674 /* FALLTHROUGH */
675 case L_('C'):
676 if (!(flags & SUPPRESS))
677 {
678 wstr = ARG (wchar_t *);
679 if (wstr == NULL)
680 conv_error ();
681 }
682
683 c = inchar ();
684 if (c == EOF)
685 input_error ();
686
687 #ifdef COMPILE_WSCANF
688 /* Just store the incoming wide characters. */
689 if (!(flags & SUPPRESS))
690 {
691 do
692 *wstr++ = c;
693 while (--width > 0 && inchar () != EOF);
694 }
695 else
696 while (--width > 0 && inchar () != EOF);
697 #else
698 {
699 /* We have to convert the multibyte input sequence to wide
700 characters. */
701 char buf[1];
702 mbstate_t cstate;
703
704 memset (&cstate, '\0', sizeof (cstate));
705
706 do
707 {
708 /* This is what we present the mbrtowc function first. */
709 buf[0] = c;
710
711 while (1)
712 {
713 size_t n;
714
715 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
716 buf, 1, &cstate);
717
718 if (n == (size_t) -2)
719 {
720 /* Possibly correct character, just not enough
721 input. */
722 if (inchar () == EOF)
723 encode_error ();
724
725 buf[0] = c;
726 continue;
727 }
728
729 if (n != 1)
730 encode_error ();
731
732 /* We have a match. */
733 break;
734 }
735
736 /* Advance the result pointer. */
737 ++wstr;
738 }
739 while (--width > 0 && inchar () != EOF);
740 }
741 #endif
742
743 if (!(flags & SUPPRESS))
744 ++done;
745
746 break;
747
748 case L_('s'): /* Read a string. */
749 if (!(flags & LONG))
750 {
751 #define STRING_ARG(Str, Type) \
752 do if (!(flags & SUPPRESS)) \
753 { \
754 if (flags & MALLOC) \
755 { \
756 /* The string is to be stored in a malloc'd buffer. */ \
757 strptr = ARG (char **); \
758 if (strptr == NULL) \
759 conv_error (); \
760 /* Allocate an initial buffer. */ \
761 strsize = 100; \
762 *strptr = (char *) malloc (strsize * sizeof (Type)); \
763 Str = (Type *) *strptr; \
764 } \
765 else \
766 Str = ARG (Type *); \
767 if (Str == NULL) \
768 conv_error (); \
769 } while (0)
770 STRING_ARG (str, char);
771
772 c = inchar ();
773 if (c == EOF)
774 input_error ();
775
776 #ifdef COMPILE_WSCANF
777 memset (&state, '\0', sizeof (state));
778 #endif
779
780 do
781 {
782 if (ISSPACE (c))
783 {
784 ungetc_not_eof (c, s);
785 break;
786 }
787
788 #ifdef COMPILE_WSCANF
789 /* This is quite complicated. We have to convert the
790 wide characters into multibyte characters and then
791 store them. */
792 {
793 size_t n;
794
795 if (!(flags & SUPPRESS) && (flags & MALLOC)
796 && str + MB_CUR_MAX >= *strptr + strsize)
797 {
798 /* We have to enlarge the buffer if the `a' flag
799 was given. */
800 size_t strleng = str - *strptr;
801 char *newstr;
802
803 newstr = (char *) realloc (*strptr, strsize * 2);
804 if (newstr == NULL)
805 {
806 /* Can't allocate that much. Last-ditch
807 effort. */
808 newstr = (char *) realloc (*strptr,
809 strleng + MB_CUR_MAX);
810 if (newstr == NULL)
811 {
812 /* We lose. Oh well. Terminate the
813 string and stop converting,
814 so at least we don't skip any input. */
815 ((char *) (*strptr))[strleng] = '\0';
816 ++done;
817 conv_error ();
818 }
819 else
820 {
821 *strptr = newstr;
822 str = newstr + strleng;
823 strsize = strleng + MB_CUR_MAX;
824 }
825 }
826 else
827 {
828 *strptr = newstr;
829 str = newstr + strleng;
830 strsize *= 2;
831 }
832 }
833
834 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
835 &state);
836 if (n == (size_t) -1)
837 encode_error ();
838
839 assert (n <= MB_CUR_MAX);
840 str += n;
841 }
842 #else
843 /* This is easy. */
844 if (!(flags & SUPPRESS))
845 {
846 *str++ = c;
847 if ((flags & MALLOC)
848 && (char *) str == *strptr + strsize)
849 {
850 /* Enlarge the buffer. */
851 str = (char *) realloc (*strptr, 2 * strsize);
852 if (str == NULL)
853 {
854 /* Can't allocate that much. Last-ditch
855 effort. */
856 str = (char *) realloc (*strptr, strsize + 1);
857 if (str == NULL)
858 {
859 /* We lose. Oh well. Terminate the
860 string and stop converting,
861 so at least we don't skip any input. */
862 ((char *) (*strptr))[strsize - 1] = '\0';
863 ++done;
864 conv_error ();
865 }
866 else
867 {
868 *strptr = (char *) str;
869 str += strsize;
870 ++strsize;
871 }
872 }
873 else
874 {
875 *strptr = (char *) str;
876 str += strsize;
877 strsize *= 2;
878 }
879 }
880 }
881 #endif
882 }
883 while ((width <= 0 || --width > 0) && inchar () != EOF);
884
885 if (!(flags & SUPPRESS))
886 {
887 #ifdef COMPILE_WSCANF
888 /* We have to emit the code to get into the initial
889 state. */
890 char buf[MB_LEN_MAX];
891 size_t n = __wcrtomb (buf, L'\0', &state);
892 if (n > 0 && (flags & MALLOC)
893 && str + n >= *strptr + strsize)
894 {
895 /* Enlarge the buffer. */
896 size_t strleng = str - *strptr;
897 char *newstr;
898
899 newstr = (char *) realloc (*strptr, strleng + n + 1);
900 if (newstr == NULL)
901 {
902 /* We lose. Oh well. Terminate the string
903 and stop converting, so at least we don't
904 skip any input. */
905 ((char *) (*strptr))[strleng] = '\0';
906 ++done;
907 conv_error ();
908 }
909 else
910 {
911 *strptr = newstr;
912 str = newstr + strleng;
913 strsize = strleng + n + 1;
914 }
915 }
916
917 str = __mempcpy (str, buf, n);
918 #endif
919 *str++ = '\0';
920
921 if ((flags & MALLOC) && str - *strptr != strsize)
922 {
923 char *cp = (char *) realloc (*strptr, str - *strptr);
924 if (cp != NULL)
925 *strptr = cp;
926 }
927
928 ++done;
929 }
930 break;
931 }
932 /* FALLTHROUGH */
933
934 case L_('S'):
935 {
936 #ifndef COMPILE_WSCANF
937 mbstate_t cstate;
938 #endif
939
940 /* Wide character string. */
941 STRING_ARG (wstr, wchar_t);
942
943 c = inchar ();
944 if (c == EOF)
945 input_error ();
946
947 #ifndef COMPILE_WSCANF
948 memset (&cstate, '\0', sizeof (cstate));
949 #endif
950
951 do
952 {
953 if (ISSPACE (c))
954 {
955 ungetc_not_eof (c, s);
956 break;
957 }
958
959 #ifdef COMPILE_WSCANF
960 /* This is easy. */
961 if (!(flags & SUPPRESS))
962 {
963 *wstr++ = c;
964 if ((flags & MALLOC)
965 && wstr == (wchar_t *) *strptr + strsize)
966 {
967 /* Enlarge the buffer. */
968 wstr = (wchar_t *) realloc (*strptr,
969 (2 * strsize)
970 * sizeof (wchar_t));
971 if (wstr == NULL)
972 {
973 /* Can't allocate that much. Last-ditch
974 effort. */
975 wstr = (wchar_t *) realloc (*strptr,
976 (strsize + 1)
977 * sizeof (wchar_t));
978 if (wstr == NULL)
979 {
980 /* We lose. Oh well. Terminate the string
981 and stop converting, so at least we don't
982 skip any input. */
983 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
984 ++done;
985 conv_error ();
986 }
987 else
988 {
989 *strptr = (char *) wstr;
990 wstr += strsize;
991 ++strsize;
992 }
993 }
994 else
995 {
996 *strptr = (char *) wstr;
997 wstr += strsize;
998 strsize *= 2;
999 }
1000 }
1001 }
1002 #else
1003 {
1004 char buf[1];
1005
1006 buf[0] = c;
1007
1008 while (1)
1009 {
1010 size_t n;
1011
1012 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1013 buf, 1, &cstate);
1014
1015 if (n == (size_t) -2)
1016 {
1017 /* Possibly correct character, just not enough
1018 input. */
1019 if (inchar () == EOF)
1020 encode_error ();
1021
1022 buf[0] = c;
1023 continue;
1024 }
1025
1026 if (n != 1)
1027 encode_error ();
1028
1029 /* We have a match. */
1030 ++wstr;
1031 break;
1032 }
1033
1034 if (!(flags & SUPPRESS) && (flags & MALLOC)
1035 && wstr == (wchar_t *) *strptr + strsize)
1036 {
1037 /* Enlarge the buffer. */
1038 wstr = (wchar_t *) realloc (*strptr,
1039 (2 * strsize
1040 * sizeof (wchar_t)));
1041 if (wstr == NULL)
1042 {
1043 /* Can't allocate that much. Last-ditch effort. */
1044 wstr = (wchar_t *) realloc (*strptr,
1045 ((strsize + 1)
1046 * sizeof (wchar_t)));
1047 if (wstr == NULL)
1048 {
1049 /* We lose. Oh well. Terminate the
1050 string and stop converting, so at
1051 least we don't skip any input. */
1052 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1053 ++done;
1054 conv_error ();
1055 }
1056 else
1057 {
1058 *strptr = (char *) wstr;
1059 wstr += strsize;
1060 ++strsize;
1061 }
1062 }
1063 else
1064 {
1065 *strptr = (char *) wstr;
1066 wstr += strsize;
1067 strsize *= 2;
1068 }
1069 }
1070 }
1071 #endif
1072 }
1073 while ((width <= 0 || --width > 0) && inchar () != EOF);
1074
1075 if (!(flags & SUPPRESS))
1076 {
1077 *wstr++ = L'\0';
1078
1079 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1080 {
1081 wchar_t *cp = (wchar_t *) realloc (*strptr,
1082 ((wstr
1083 - (wchar_t *) *strptr)
1084 * sizeof(wchar_t)));
1085 if (cp != NULL)
1086 *strptr = (char *) cp;
1087 }
1088
1089 ++done;
1090 }
1091 }
1092 break;
1093
1094 case L_('x'): /* Hexadecimal integer. */
1095 case L_('X'): /* Ditto. */
1096 base = 16;
1097 number_signed = 0;
1098 goto number;
1099
1100 case L_('o'): /* Octal integer. */
1101 base = 8;
1102 number_signed = 0;
1103 goto number;
1104
1105 case L_('u'): /* Unsigned decimal integer. */
1106 base = 10;
1107 number_signed = 0;
1108 goto number;
1109
1110 case L_('d'): /* Signed decimal integer. */
1111 base = 10;
1112 number_signed = 1;
1113 goto number;
1114
1115 case L_('i'): /* Generic number. */
1116 base = 0;
1117 number_signed = 1;
1118
1119 number:
1120 c = inchar ();
1121 if (c == EOF)
1122 input_error ();
1123
1124 /* Check for a sign. */
1125 if (c == L_('-') || c == L_('+'))
1126 {
1127 ADDW (c);
1128 if (width > 0)
1129 --width;
1130 c = inchar ();
1131 }
1132
1133 /* Look for a leading indication of base. */
1134 if (width != 0 && c == L_('0'))
1135 {
1136 if (width > 0)
1137 --width;
1138
1139 ADDW (c);
1140 c = inchar ();
1141
1142 if (width != 0 && TOLOWER (c) == L_('x'))
1143 {
1144 if (base == 0)
1145 base = 16;
1146 if (base == 16)
1147 {
1148 if (width > 0)
1149 --width;
1150 c = inchar ();
1151 }
1152 }
1153 else if (base == 0)
1154 base = 8;
1155 }
1156
1157 if (base == 0)
1158 base = 10;
1159
1160 if (base == 10 && (flags & I18N) != 0)
1161 {
1162 int from_level;
1163 int to_level;
1164 int level;
1165 #ifdef COMPILE_WSCANF
1166 const wchar_t *wcdigits[10];
1167 #else
1168 const char *mbdigits[10];
1169 #endif
1170 int n;
1171
1172 from_level = 0;
1173 #ifdef COMPILE_WSCANF
1174 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1175 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1176 #else
1177 to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
1178 #endif
1179
1180 /* Read the number into workspace. */
1181 while (c != EOF && width != 0)
1182 {
1183 /* In this round we get the pointer to the digit strings
1184 and also perform the first round of comparisons. */
1185 for (n = 0; n < 10; ++n)
1186 {
1187 /* Get the string for the digits with value N. */
1188 #ifdef COMPILE_WSCANF
1189 wcdigits[n] = (const wchar_t *)
1190 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1191 wcdigits[n] += from_level;
1192
1193 if (c == (wint_t) *wcdigits[n])
1194 {
1195 to_level = from_level;
1196 break;
1197 }
1198
1199 /* Advance the pointer to the next string. */
1200 ++wcdigits[n];
1201 #else
1202 const char *cmpp;
1203 int avail = width > 0 ? width : INT_MAX;
1204
1205 mbdigits[n]
1206 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1207
1208 for (level = 0; level < from_level; level++)
1209 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1210
1211 cmpp = mbdigits[n];
1212 while ((unsigned char) *cmpp == c && avail > 0)
1213 {
1214 if (*++cmpp == '\0')
1215 break;
1216 else
1217 {
1218 if ((c = inchar ()) == EOF)
1219 break;
1220 --avail;
1221 }
1222 }
1223
1224 if (*cmpp == '\0')
1225 {
1226 if (width > 0)
1227 width = avail;
1228 to_level = from_level;
1229 break;
1230 }
1231
1232 /* We are pushing all read characters back. */
1233 if (cmpp > mbdigits[n])
1234 {
1235 ungetc (c, s);
1236 while (--cmpp > mbdigits[n])
1237 ungetc_not_eof ((unsigned char) *cmpp, s);
1238 c = (unsigned char) *cmpp;
1239 }
1240
1241 /* Advance the pointer to the next string. */
1242 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1243 #endif
1244 }
1245
1246 if (n == 10)
1247 {
1248 /* Have not yet found the digit. */
1249 for (level = from_level + 1; level <= to_level; ++level)
1250 {
1251 /* Search all ten digits of this level. */
1252 for (n = 0; n < 10; ++n)
1253 {
1254 #ifdef COMPILE_WSCANF
1255 if (c == (wint_t) *wcdigits[n])
1256 break;
1257
1258 /* Advance the pointer to the next string. */
1259 ++wcdigits[n];
1260 #else
1261 const char *cmpp;
1262 int avail = width > 0 ? width : INT_MAX;
1263
1264 cmpp = mbdigits[n];
1265 while ((unsigned char) *cmpp == c && avail > 0)
1266 {
1267 if (*++cmpp == '\0')
1268 break;
1269 else
1270 {
1271 if ((c = inchar ()) == EOF)
1272 break;
1273 --avail;
1274 }
1275 }
1276
1277 if (*cmpp == '\0')
1278 {
1279 if (width > 0)
1280 width = avail;
1281 break;
1282 }
1283
1284 /* We are pushing all read characters back. */
1285 if (cmpp > mbdigits[n])
1286 {
1287 ungetc (c, s);
1288 while (--cmpp > mbdigits[n])
1289 ungetc_not_eof ((unsigned char) *cmpp, s);
1290 c = (unsigned char) *cmpp;
1291 }
1292
1293 /* Advance the pointer to the next string. */
1294 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1295 #endif
1296 }
1297
1298 if (n < 10)
1299 {
1300 /* Found it. */
1301 from_level = level;
1302 to_level = level;
1303 break;
1304 }
1305 }
1306 }
1307
1308 if (n < 10)
1309 c = L_('0') + n;
1310 else if ((flags & GROUP)
1311 #ifdef COMPILE_WSCANF
1312 && thousands != L'\0'
1313 #else
1314 && thousands != NULL
1315 #endif
1316 )
1317 {
1318 /* Try matching against the thousands separator. */
1319 #ifdef COMPILE_WSCANF
1320 if (c != thousands)
1321 break;
1322 #else
1323 const char *cmpp = thousands;
1324 int avail = width > 0 ? width : INT_MAX;
1325
1326 while ((unsigned char) *cmpp == c && avail > 0)
1327 {
1328 ADDW (c);
1329 if (*++cmpp == '\0')
1330 break;
1331 else
1332 {
1333 if ((c = inchar ()) == EOF)
1334 break;
1335 --avail;
1336 }
1337 }
1338
1339 if (*cmpp != '\0')
1340 {
1341 /* We are pushing all read characters back. */
1342 if (cmpp > thousands)
1343 {
1344 wpsize -= cmpp - thousands;
1345 ungetc (c, s);
1346 while (--cmpp > thousands)
1347 ungetc_not_eof ((unsigned char) *cmpp, s);
1348 c = (unsigned char) *cmpp;
1349 }
1350 break;
1351 }
1352
1353 if (width > 0)
1354 width = avail;
1355
1356 /* The last thousands character will be added back by
1357 the ADDW below. */
1358 --wpsize;
1359 #endif
1360 }
1361 else
1362 break;
1363
1364 ADDW (c);
1365 if (width > 0)
1366 --width;
1367
1368 c = inchar ();
1369 }
1370 }
1371 else
1372 /* Read the number into workspace. */
1373 while (c != EOF && width != 0)
1374 {
1375 if (base == 16)
1376 {
1377 if (!ISXDIGIT (c))
1378 break;
1379 }
1380 else if (!ISDIGIT (c) || (int) (c - L_('0')) >= base)
1381 {
1382 if (base == 10 && (flags & GROUP)
1383 #ifdef COMPILE_WSCANF
1384 && thousands != L'\0'
1385 #else
1386 && thousands != NULL
1387 #endif
1388 )
1389 {
1390 /* Try matching against the thousands separator. */
1391 #ifdef COMPILE_WSCANF
1392 if (c != thousands)
1393 break;
1394 #else
1395 const char *cmpp = thousands;
1396 int avail = width > 0 ? width : INT_MAX;
1397
1398 while ((unsigned char) *cmpp == c && avail > 0)
1399 {
1400 ADDW (c);
1401 if (*++cmpp == '\0')
1402 break;
1403 else
1404 {
1405 if ((c = inchar ()) == EOF)
1406 break;
1407 --avail;
1408 }
1409 }
1410
1411 if (*cmpp != '\0')
1412 {
1413 /* We are pushing all read characters back. */
1414 if (cmpp > thousands)
1415 {
1416 wpsize -= cmpp - thousands;
1417 ungetc (c, s);
1418 while (--cmpp > thousands)
1419 ungetc_not_eof ((unsigned char) *cmpp, s);
1420 c = (unsigned char) *cmpp;
1421 }
1422 break;
1423 }
1424
1425 if (width > 0)
1426 width = avail;
1427
1428 /* The last thousands character will be added back by
1429 the ADDW below. */
1430 --wpsize;
1431 #endif
1432 }
1433 else
1434 break;
1435 }
1436 ADDW (c);
1437 if (width > 0)
1438 --width;
1439
1440 c = inchar ();
1441 }
1442
1443 if (wpsize == 0
1444 || (wpsize == 1 && (wp[0] == L_('+') || wp[0] == L_('-'))))
1445 {
1446 /* There was no number. If we are supposed to read a pointer
1447 we must recognize "(nil)" as well. */
1448 if (wpsize == 0 && read_pointer && (width < 0 || width >= 0)
1449 && c == '('
1450 && TOLOWER (inchar ()) == L_('n')
1451 && TOLOWER (inchar ()) == L_('i')
1452 && TOLOWER (inchar ()) == L_('l')
1453 && inchar () == L_(')'))
1454 /* We must produce the value of a NULL pointer. A single
1455 '0' digit is enough. */
1456 ADDW (L_('0'));
1457 else
1458 {
1459 /* The last read character is not part of the number
1460 anymore. */
1461 ungetc (c, s);
1462
1463 conv_error ();
1464 }
1465 }
1466 else
1467 /* The just read character is not part of the number anymore. */
1468 ungetc (c, s);
1469
1470 /* Convert the number. */
1471 ADDW (L_('\0'));
1472 if (need_longlong && (flags & LONGDBL))
1473 {
1474 if (number_signed)
1475 num.q = __strtoll_internal (wp, &tw, base, flags & GROUP);
1476 else
1477 num.uq = __strtoull_internal (wp, &tw, base, flags & GROUP);
1478 }
1479 else
1480 {
1481 if (number_signed)
1482 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
1483 else
1484 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
1485 }
1486 if (wp == tw)
1487 conv_error ();
1488
1489 if (!(flags & SUPPRESS))
1490 {
1491 if (! number_signed)
1492 {
1493 if (need_longlong && (flags & LONGDBL))
1494 *ARG (unsigned LONGLONG int *) = num.uq;
1495 else if (need_long && (flags & LONG))
1496 *ARG (unsigned long int *) = num.ul;
1497 else if (flags & SHORT)
1498 *ARG (unsigned short int *)
1499 = (unsigned short int) num.ul;
1500 else if (!(flags & CHAR))
1501 *ARG (unsigned int *) = (unsigned int) num.ul;
1502 else
1503 *ARG (unsigned char *) = (unsigned char) num.ul;
1504 }
1505 else
1506 {
1507 if (need_longlong && (flags & LONGDBL))
1508 *ARG (LONGLONG int *) = num.q;
1509 else if (need_long && (flags & LONG))
1510 *ARG (long int *) = num.l;
1511 else if (flags & SHORT)
1512 *ARG (short int *) = (short int) num.l;
1513 else if (!(flags & CHAR))
1514 *ARG (int *) = (int) num.l;
1515 else
1516 *ARG (signed char *) = (signed char) num.ul;
1517 }
1518 ++done;
1519 }
1520 break;
1521
1522 case L_('e'): /* Floating-point numbers. */
1523 case L_('E'):
1524 case L_('f'):
1525 case L_('F'):
1526 case L_('g'):
1527 case L_('G'):
1528 case L_('a'):
1529 case L_('A'):
1530 c = inchar ();
1531 if (c == EOF)
1532 input_error ();
1533
1534 /* Check for a sign. */
1535 if (c == L_('-') || c == L_('+'))
1536 {
1537 negative = c == L_('-');
1538 if (width == 0 || inchar () == EOF)
1539 /* EOF is only an input error before we read any chars. */
1540 conv_error ();
1541 if (! ISDIGIT (c) && TOLOWER (c) != L_('i')
1542 && TOLOWER (c) != L_('n'))
1543 {
1544 #ifdef COMPILE_WSCANF
1545 if (c != decimal)
1546 {
1547 /* This is no valid number. */
1548 ungetc (c, s);
1549 conv_error ();
1550 }
1551 #else
1552 /* Match against the decimal point. At this point
1553 we are taking advantage of the fact that we can
1554 push more than one character back. This is
1555 (almost) never necessary since the decimal point
1556 string hopefully never contains more than one
1557 byte. */
1558 const char *cmpp = decimal;
1559 int avail = width > 0 ? width : INT_MAX;
1560
1561 while ((unsigned char) *cmpp == c && avail > 0)
1562 if (*++cmpp == '\0')
1563 break;
1564 else
1565 {
1566 if (inchar () == EOF)
1567 break;
1568 --avail;
1569 }
1570
1571 if (*cmpp != '\0')
1572 {
1573 /* This is no valid number. */
1574 while (1)
1575 {
1576 ungetc (c, s);
1577 if (cmpp == decimal)
1578 break;
1579 c = (unsigned char) *--cmpp;
1580 }
1581
1582 conv_error ();
1583 }
1584 if (width > 0)
1585 width = avail;
1586 #endif
1587 }
1588 if (width > 0)
1589 --width;
1590 }
1591 else
1592 negative = 0;
1593
1594 /* Take care for the special arguments "nan" and "inf". */
1595 if (TOLOWER (c) == L_('n'))
1596 {
1597 /* Maybe "nan". */
1598 ADDW (c);
1599 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('a'))
1600 conv_error ();
1601 if (width > 0)
1602 --width;
1603 ADDW (c);
1604 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n'))
1605 conv_error ();
1606 if (width > 0)
1607 --width;
1608 ADDW (c);
1609 /* It is "nan". */
1610 goto scan_float;
1611 }
1612 else if (TOLOWER (c) == L_('i'))
1613 {
1614 /* Maybe "inf" or "infinity". */
1615 ADDW (c);
1616 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n'))
1617 conv_error ();
1618 if (width > 0)
1619 --width;
1620 ADDW (c);
1621 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('f'))
1622 conv_error ();
1623 if (width > 0)
1624 --width;
1625 ADDW (c);
1626 /* It is as least "inf". */
1627 if (width != 0 && inchar () != EOF)
1628 {
1629 if (TOLOWER (c) == L_('i'))
1630 {
1631 if (width > 0)
1632 --width;
1633 /* Now we have to read the rest as well. */
1634 ADDW (c);
1635 if (width == 0 || inchar () == EOF
1636 || TOLOWER (c) != L_('n'))
1637 conv_error ();
1638 if (width > 0)
1639 --width;
1640 ADDW (c);
1641 if (width == 0 || inchar () == EOF
1642 || TOLOWER (c) != L_('i'))
1643 conv_error ();
1644 if (width > 0)
1645 --width;
1646 ADDW (c);
1647 if (width == 0 || inchar () == EOF
1648 || TOLOWER (c) != L_('t'))
1649 conv_error ();
1650 if (width > 0)
1651 --width;
1652 ADDW (c);
1653 if (width == 0 || inchar () == EOF
1654 || TOLOWER (c) != L_('y'))
1655 conv_error ();
1656 if (width > 0)
1657 --width;
1658 ADDW (c);
1659 }
1660 else
1661 /* Never mind. */
1662 ungetc (c, s);
1663 }
1664 goto scan_float;
1665 }
1666
1667 is_hexa = 0;
1668 exp_char = L_('e');
1669 if (width != 0 && c == L_('0'))
1670 {
1671 ADDW (c);
1672 c = inchar ();
1673 if (width > 0)
1674 --width;
1675 if (width != 0 && TOLOWER (c) == L_('x'))
1676 {
1677 /* It is a number in hexadecimal format. */
1678 ADDW (c);
1679
1680 is_hexa = 1;
1681 exp_char = L_('p');
1682
1683 /* Grouping is not allowed. */
1684 flags &= ~GROUP;
1685 c = inchar ();
1686 if (width > 0)
1687 --width;
1688 }
1689 }
1690
1691 got_dot = got_e = 0;
1692 do
1693 {
1694 if (ISDIGIT (c))
1695 ADDW (c);
1696 else if (!got_e && is_hexa && ISXDIGIT (c))
1697 ADDW (c);
1698 else if (got_e && wp[wpsize - 1] == exp_char
1699 && (c == L_('-') || c == L_('+')))
1700 ADDW (c);
1701 else if (wpsize > 0 && !got_e
1702 && (CHAR_T) TOLOWER (c) == exp_char)
1703 {
1704 ADDW (exp_char);
1705 got_e = got_dot = 1;
1706 }
1707 else
1708 {
1709 #ifdef COMPILE_WSCANF
1710 if (! got_dot && c == decimal)
1711 {
1712 ADDW (c);
1713 got_dot = 1;
1714 }
1715 else if ((flags & GROUP) != 0 && thousands != L'\0'
1716 && ! got_dot && c == thousands)
1717 ADDW (c);
1718 else
1719 {
1720 /* The last read character is not part of the number
1721 anymore. */
1722 ungetc (c, s);
1723 break;
1724 }
1725 #else
1726 const char *cmpp = decimal;
1727 int avail = width > 0 ? width : INT_MAX;
1728
1729 if (! got_dot)
1730 {
1731 while ((unsigned char) *cmpp == c && avail > 0)
1732 if (*++cmpp == '\0')
1733 break;
1734 else
1735 {
1736 if (inchar () == EOF)
1737 break;
1738 --avail;
1739 }
1740 }
1741
1742 if (*cmpp == '\0')
1743 {
1744 /* Add all the characters. */
1745 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
1746 ADDW ((unsigned char) *cmpp);
1747 if (width > 0)
1748 width = avail;
1749 got_dot = 1;
1750 }
1751 else
1752 {
1753 /* Figure out whether it is a thousands separator.
1754 There is one problem: we possibly read more than
1755 one character. We cannot push them back but since
1756 we know that parts of the `decimal' string matched,
1757 we can compare against it. */
1758 const char *cmp2p = thousands;
1759
1760 if ((flags & GROUP) != 0 && thousands != NULL
1761 && ! got_dot)
1762 {
1763 while (cmp2p - thousands < cmpp - decimal
1764 && *cmp2p == decimal[cmp2p - thousands])
1765 ++cmp2p;
1766 if (cmp2p - thousands == cmpp - decimal)
1767 {
1768 while ((unsigned char) *cmp2p == c && avail > 0)
1769 if (*++cmp2p == '\0')
1770 break;
1771 else
1772 {
1773 if (inchar () == EOF)
1774 break;
1775 --avail;
1776 }
1777 }
1778 }
1779
1780 if (cmp2p != NULL && *cmp2p == '\0')
1781 {
1782 /* Add all the characters. */
1783 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
1784 ADDW ((unsigned char) *cmpp);
1785 if (width > 0)
1786 width = avail;
1787 }
1788 else
1789 {
1790 /* The last read character is not part of the number
1791 anymore. */
1792 ungetc (c, s);
1793 break;
1794 }
1795 }
1796 #endif
1797 }
1798 if (width > 0)
1799 --width;
1800 }
1801 while (width != 0 && inchar () != EOF);
1802
1803 /* Have we read any character? If we try to read a number
1804 in hexadecimal notation and we have read only the `0x'
1805 prefix or no exponent this is an error. */
1806 if (wpsize == 0 || (is_hexa && (wpsize == 2 || ! got_e)))
1807 conv_error ();
1808
1809 scan_float:
1810 /* Convert the number. */
1811 ADDW (L_('\0'));
1812 if (flags & LONGDBL)
1813 {
1814 long double d = __strtold_internal (wp, &tw, flags & GROUP);
1815 if (!(flags & SUPPRESS) && tw != wp)
1816 *ARG (long double *) = negative ? -d : d;
1817 }
1818 else if (flags & LONG)
1819 {
1820 double d = __strtod_internal (wp, &tw, flags & GROUP);
1821 if (!(flags & SUPPRESS) && tw != wp)
1822 *ARG (double *) = negative ? -d : d;
1823 }
1824 else
1825 {
1826 float d = __strtof_internal (wp, &tw, flags & GROUP);
1827 if (!(flags & SUPPRESS) && tw != wp)
1828 *ARG (float *) = negative ? -d : d;
1829 }
1830
1831 if (tw == wp)
1832 conv_error ();
1833
1834 if (!(flags & SUPPRESS))
1835 ++done;
1836 break;
1837
1838 case L_('['): /* Character class. */
1839 if (flags & LONG)
1840 STRING_ARG (wstr, wchar_t);
1841 else
1842 STRING_ARG (str, char);
1843
1844 if (*f == L_('^'))
1845 {
1846 ++f;
1847 not_in = 1;
1848 }
1849 else
1850 not_in = 0;
1851
1852 if (width < 0)
1853 /* There is no width given so there is also no limit on the
1854 number of characters we read. Therefore we set width to
1855 a very high value to make the algorithm easier. */
1856 width = INT_MAX;
1857
1858 #ifdef COMPILE_WSCANF
1859 /* Find the beginning and the end of the scanlist. We are not
1860 creating a lookup table since it would have to be too large.
1861 Instead we search each time through the string. This is not
1862 a constant lookup time but who uses this feature deserves to
1863 be punished. */
1864 tw = (wchar_t *) f; /* Marks the beginning. */
1865
1866 if (*f == L']')
1867 ++f;
1868
1869 while ((fc = *f++) != L'\0' && fc != L']');
1870
1871 if (fc == L'\0')
1872 conv_error ();
1873 wp = (wchar_t *) f - 1;
1874 #else
1875 /* Fill WP with byte flags indexed by character.
1876 We will use this flag map for matching input characters. */
1877 if (wpmax < UCHAR_MAX + 1)
1878 {
1879 wpmax = UCHAR_MAX + 1;
1880 wp = (char *) alloca (wpmax);
1881 }
1882 memset (wp, '\0', UCHAR_MAX + 1);
1883
1884 fc = *f;
1885 if (fc == ']' || fc == '-')
1886 {
1887 /* If ] or - appears before any char in the set, it is not
1888 the terminator or separator, but the first char in the
1889 set. */
1890 wp[fc] = 1;
1891 ++f;
1892 }
1893
1894 while ((fc = *f++) != '\0' && fc != ']')
1895 if (fc == '-' && *f != '\0' && *f != ']'
1896 && (unsigned char) f[-2] <= (unsigned char) *f)
1897 {
1898 /* Add all characters from the one before the '-'
1899 up to (but not including) the next format char. */
1900 for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
1901 wp[fc] = 1;
1902 }
1903 else
1904 /* Add the character to the flag map. */
1905 wp[fc] = 1;
1906
1907 if (fc == '\0')
1908 conv_error();
1909 #endif
1910
1911 if (flags & LONG)
1912 {
1913 size_t now = read_in;
1914 #ifdef COMPILE_WSCANF
1915 if (inchar () == WEOF)
1916 input_error ();
1917
1918 do
1919 {
1920 wchar_t *runp;
1921
1922 /* Test whether it's in the scanlist. */
1923 runp = tw;
1924 while (runp < wp)
1925 {
1926 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
1927 && runp != tw
1928 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
1929 {
1930 /* Match against all characters in between the
1931 first and last character of the sequence. */
1932 wchar_t wc;
1933
1934 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
1935 if ((wint_t) wc == c)
1936 break;
1937
1938 if (wc <= runp[1] && !not_in)
1939 break;
1940 if (wc <= runp[1] && not_in)
1941 {
1942 /* The current character is not in the
1943 scanset. */
1944 ungetc (c, s);
1945 goto out;
1946 }
1947
1948 runp += 2;
1949 }
1950 else
1951 {
1952 if ((wint_t) *runp == c && !not_in)
1953 break;
1954 if ((wint_t) *runp == c && not_in)
1955 {
1956 ungetc (c, s);
1957 goto out;
1958 }
1959
1960 ++runp;
1961 }
1962 }
1963
1964 if (runp == wp && !not_in)
1965 {
1966 ungetc (c, s);
1967 goto out;
1968 }
1969
1970 if (!(flags & SUPPRESS))
1971 {
1972 *wstr++ = c;
1973
1974 if ((flags & MALLOC)
1975 && wstr == (wchar_t *) *strptr + strsize)
1976 {
1977 /* Enlarge the buffer. */
1978 wstr = (wchar_t *) realloc (*strptr,
1979 (2 * strsize)
1980 * sizeof (wchar_t));
1981 if (wstr == NULL)
1982 {
1983 /* Can't allocate that much. Last-ditch
1984 effort. */
1985 wstr = (wchar_t *)
1986 realloc (*strptr, (strsize + 1)
1987 * sizeof (wchar_t));
1988 if (wstr == NULL)
1989 {
1990 /* We lose. Oh well. Terminate the string
1991 and stop converting, so at least we don't
1992 skip any input. */
1993 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1994 ++done;
1995 conv_error ();
1996 }
1997 else
1998 {
1999 *strptr = (char *) wstr;
2000 wstr += strsize;
2001 ++strsize;
2002 }
2003 }
2004 else
2005 {
2006 *strptr = (char *) wstr;
2007 wstr += strsize;
2008 strsize *= 2;
2009 }
2010 }
2011 }
2012 }
2013 while (--width > 0 && inchar () != WEOF);
2014 out:
2015 #else
2016 char buf[MB_LEN_MAX];
2017 size_t cnt = 0;
2018 mbstate_t cstate;
2019
2020 if (inchar () == EOF)
2021 input_error ();
2022
2023 memset (&cstate, '\0', sizeof (cstate));
2024
2025 do
2026 {
2027 if (wp[c] == not_in)
2028 {
2029 ungetc_not_eof (c, s);
2030 break;
2031 }
2032
2033 /* This is easy. */
2034 if (!(flags & SUPPRESS))
2035 {
2036 size_t n;
2037
2038 /* Convert it into a wide character. */
2039 buf[0] = c;
2040 n = __mbrtowc (wstr, buf, 1, &cstate);
2041
2042 if (n == (size_t) -2)
2043 {
2044 /* Possibly correct character, just not enough
2045 input. */
2046 ++cnt;
2047 assert (cnt < MB_CUR_MAX);
2048 continue;
2049 }
2050 cnt = 0;
2051
2052 ++wstr;
2053 if ((flags & MALLOC)
2054 && wstr == (wchar_t *) *strptr + strsize)
2055 {
2056 /* Enlarge the buffer. */
2057 wstr = (wchar_t *) realloc (*strptr,
2058 (2 * strsize
2059 * sizeof (wchar_t)));
2060 if (wstr == NULL)
2061 {
2062 /* Can't allocate that much. Last-ditch
2063 effort. */
2064 wstr = (wchar_t *)
2065 realloc (*strptr, ((strsize + 1)
2066 * sizeof (wchar_t)));
2067 if (wstr == NULL)
2068 {
2069 /* We lose. Oh well. Terminate the
2070 string and stop converting,
2071 so at least we don't skip any input. */
2072 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2073 ++done;
2074 conv_error ();
2075 }
2076 else
2077 {
2078 *strptr = (char *) wstr;
2079 wstr += strsize;
2080 ++strsize;
2081 }
2082 }
2083 else
2084 {
2085 *strptr = (char *) wstr;
2086 wstr += strsize;
2087 strsize *= 2;
2088 }
2089 }
2090 }
2091
2092 if (--width <= 0)
2093 break;
2094 }
2095 while (inchar () != EOF);
2096
2097 if (cnt != 0)
2098 /* We stopped in the middle of recognizing another
2099 character. That's a problem. */
2100 encode_error ();
2101 #endif
2102
2103 if (now == read_in)
2104 /* We haven't succesfully read any character. */
2105 conv_error ();
2106
2107 if (!(flags & SUPPRESS))
2108 {
2109 *wstr++ = L'\0';
2110
2111 if ((flags & MALLOC)
2112 && wstr - (wchar_t *) *strptr != strsize)
2113 {
2114 wchar_t *cp = (wchar_t *)
2115 realloc (*strptr, ((wstr - (wchar_t *) *strptr)
2116 * sizeof(wchar_t)));
2117 if (cp != NULL)
2118 *strptr = (char *) cp;
2119 }
2120
2121 ++done;
2122 }
2123 }
2124 else
2125 {
2126 size_t now = read_in;
2127
2128 if (inchar () == EOF)
2129 input_error ();
2130
2131 #ifdef COMPILE_WSCANF
2132
2133 memset (&state, '\0', sizeof (state));
2134
2135 do
2136 {
2137 wchar_t *runp;
2138 size_t n;
2139
2140 /* Test whether it's in the scanlist. */
2141 runp = tw;
2142 while (runp < wp)
2143 {
2144 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
2145 && runp != tw
2146 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2147 {
2148 /* Match against all characters in between the
2149 first and last character of the sequence. */
2150 wchar_t wc;
2151
2152 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2153 if ((wint_t) wc == c)
2154 break;
2155
2156 if (wc <= runp[1] && !not_in)
2157 break;
2158 if (wc <= runp[1] && not_in)
2159 {
2160 /* The current character is not in the
2161 scanset. */
2162 ungetc (c, s);
2163 goto out2;
2164 }
2165
2166 runp += 2;
2167 }
2168 else
2169 {
2170 if ((wint_t) *runp == c && !not_in)
2171 break;
2172 if ((wint_t) *runp == c && not_in)
2173 {
2174 ungetc (c, s);
2175 goto out2;
2176 }
2177
2178 ++runp;
2179 }
2180 }
2181
2182 if (runp == wp && !not_in)
2183 {
2184 ungetc (c, s);
2185 goto out2;
2186 }
2187
2188 if (!(flags & SUPPRESS))
2189 {
2190 if ((flags & MALLOC)
2191 && str + MB_CUR_MAX >= *strptr + strsize)
2192 {
2193 /* Enlarge the buffer. */
2194 size_t strleng = str - *strptr;
2195 char *newstr;
2196
2197 newstr = (char *) realloc (*strptr, 2 * strsize);
2198 if (newstr == NULL)
2199 {
2200 /* Can't allocate that much. Last-ditch
2201 effort. */
2202 newstr = (char *) realloc (*strptr,
2203 strleng + MB_CUR_MAX);
2204 if (newstr == NULL)
2205 {
2206 /* We lose. Oh well. Terminate the string
2207 and stop converting, so at least we don't
2208 skip any input. */
2209 ((char *) (*strptr))[strleng] = '\0';
2210 ++done;
2211 conv_error ();
2212 }
2213 else
2214 {
2215 *strptr = newstr;
2216 str = newstr + strleng;
2217 strsize = strleng + MB_CUR_MAX;
2218 }
2219 }
2220 else
2221 {
2222 *strptr = newstr;
2223 str = newstr + strleng;
2224 strsize *= 2;
2225 }
2226 }
2227 }
2228
2229 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2230 if (n == (size_t) -1)
2231 encode_error ();
2232
2233 assert (n <= MB_CUR_MAX);
2234 str += n;
2235 }
2236 while (--width > 0 && inchar () != WEOF);
2237 out2:
2238 #else
2239 do
2240 {
2241 if (wp[c] == not_in)
2242 {
2243 ungetc_not_eof (c, s);
2244 break;
2245 }
2246
2247 /* This is easy. */
2248 if (!(flags & SUPPRESS))
2249 {
2250 *str++ = c;
2251 if ((flags & MALLOC)
2252 && (char *) str == *strptr + strsize)
2253 {
2254 /* Enlarge the buffer. */
2255 size_t newsize = 2 * strsize;
2256
2257 allocagain:
2258 str = (char *) realloc (*strptr, newsize);
2259 if (str == NULL)
2260 {
2261 /* Can't allocate that much. Last-ditch
2262 effort. */
2263 if (newsize > strsize + 1)
2264 {
2265 newsize = strsize + 1;
2266 goto allocagain;
2267 }
2268 /* We lose. Oh well. Terminate the
2269 string and stop converting,
2270 so at least we don't skip any input. */
2271 ((char *) (*strptr))[strsize - 1] = '\0';
2272 ++done;
2273 conv_error ();
2274 }
2275 else
2276 {
2277 *strptr = (char *) str;
2278 str += strsize;
2279 strsize = newsize;
2280 }
2281 }
2282 }
2283 }
2284 while (--width > 0 && inchar () != EOF);
2285 #endif
2286
2287 if (now == read_in)
2288 /* We haven't succesfully read any character. */
2289 conv_error ();
2290
2291 if (!(flags & SUPPRESS))
2292 {
2293 #ifdef COMPILE_WSCANF
2294 /* We have to emit the code to get into the initial
2295 state. */
2296 char buf[MB_LEN_MAX];
2297 size_t n = __wcrtomb (buf, L'\0', &state);
2298 if (n > 0 && (flags & MALLOC)
2299 && str + n >= *strptr + strsize)
2300 {
2301 /* Enlarge the buffer. */
2302 size_t strleng = str - *strptr;
2303 char *newstr;
2304
2305 newstr = (char *) realloc (*strptr, strleng + n + 1);
2306 if (newstr == NULL)
2307 {
2308 /* We lose. Oh well. Terminate the string
2309 and stop converting, so at least we don't
2310 skip any input. */
2311 ((char *) (*strptr))[strleng] = '\0';
2312 ++done;
2313 conv_error ();
2314 }
2315 else
2316 {
2317 *strptr = newstr;
2318 str = newstr + strleng;
2319 strsize = strleng + n + 1;
2320 }
2321 }
2322
2323 str = __mempcpy (str, buf, n);
2324 #endif
2325 *str++ = '\0';
2326
2327 if ((flags & MALLOC) && str - *strptr != strsize)
2328 {
2329 char *cp = (char *) realloc (*strptr, str - *strptr);
2330 if (cp != NULL)
2331 *strptr = cp;
2332 }
2333
2334 ++done;
2335 }
2336 }
2337 break;
2338
2339 case L_('p'): /* Generic pointer. */
2340 base = 16;
2341 /* A PTR must be the same size as a `long int'. */
2342 flags &= ~(SHORT|LONGDBL);
2343 if (need_long)
2344 flags |= LONG;
2345 number_signed = 0;
2346 read_pointer = 1;
2347 goto number;
2348
2349 default:
2350 /* If this is an unknown format character punt. */
2351 conv_error ();
2352 }
2353 }
2354
2355 /* The last thing we saw int the format string was a white space.
2356 Consume the last white spaces. */
2357 if (skip_space)
2358 {
2359 do
2360 c = inchar ();
2361 while (ISSPACE (c));
2362 ungetc (c, s);
2363 }
2364
2365 errout:
2366 /* Unlock stream. */
2367 UNLOCK_STREAM (s);
2368
2369 if (errp != NULL)
2370 *errp |= errval;
2371
2372 return done;
2373 }
2374
2375 #ifdef COMPILE_WSCANF
2376 int
2377 __vfwscanf (FILE *s, const wchar_t *format, va_list argptr)
2378 {
2379 return _IO_vfwscanf (s, format, argptr, NULL);
2380 }
2381 #else
2382 int
2383 __vfscanf (FILE *s, const char *format, va_list argptr)
2384 {
2385 return INTUSE(_IO_vfscanf) (s, format, argptr, NULL);
2386 }
2387 libc_hidden_def (__vfscanf)
2388 #endif
2389
2390 #ifdef COMPILE_WSCANF
2391 weak_alias (__vfwscanf, vfwscanf)
2392 #else
2393 weak_alias (__vfscanf, vfscanf)
2394 INTDEF(_IO_vfscanf)
2395 #endif