]> git.ipfire.org Git - thirdparty/glibc.git/blob - stdio-common/vfscanf.c
* math/math.h [__NO_LONG_DOUBLE_MATH] (__nldbl_nexttowardf): New
[thirdparty/glibc.git] / stdio-common / vfscanf.c
1 /* Copyright (C) 1991-2002, 2003, 2004, 2005, 2006
2 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
19
20 #include <assert.h>
21 #include <errno.h>
22 #include <limits.h>
23 #include <ctype.h>
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <wctype.h>
31 #include <bits/libc-lock.h>
32 #include <locale/localeinfo.h>
33
34 #ifdef __GNUC__
35 # define HAVE_LONGLONG
36 # define LONGLONG long long
37 #else
38 # define LONGLONG long
39 #endif
40
41 /* Determine whether we have to handle `long long' at all. */
42 #if LONG_MAX == LONG_LONG_MAX
43 # define need_longlong 0
44 #else
45 # define need_longlong 1
46 #endif
47
48 /* Determine whether we have to handle `long'. */
49 #if INT_MAX == LONG_MAX
50 # define need_long 0
51 #else
52 # define need_long 1
53 #endif
54
55 /* Those are flags in the conversion format. */
56 #define LONG 0x001 /* l: long or double */
57 #define LONGDBL 0x002 /* L: long long or long double */
58 #define SHORT 0x004 /* h: short */
59 #define SUPPRESS 0x008 /* *: suppress assignment */
60 #define POINTER 0x010 /* weird %p pointer (`fake hex') */
61 #define NOSKIP 0x020 /* do not skip blanks */
62 #define WIDTH 0x040 /* width was given */
63 #define GROUP 0x080 /* ': group numbers */
64 #define MALLOC 0x100 /* a: malloc strings */
65 #define CHAR 0x200 /* hh: char */
66 #define I18N 0x400 /* I: use locale's digits */
67
68
69 #include <locale/localeinfo.h>
70 #include <libioP.h>
71 #include <libio.h>
72
73 #undef va_list
74 #define va_list _IO_va_list
75
76 #ifdef COMPILE_WSCANF
77 # define ungetc(c, s) ((void) (c == WEOF \
78 || (--read_in, \
79 INTUSE(_IO_sputbackwc) (s, c))))
80 # define ungetc_not_eof(c, s) ((void) (--read_in, \
81 INTUSE(_IO_sputbackwc) (s, c)))
82 # define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
83 : ((c = _IO_getwc_unlocked (s)), \
84 (void) (c != WEOF \
85 ? ++read_in \
86 : (size_t) (inchar_errno = errno)), c))
87
88 # define MEMCPY(d, s, n) __wmemcpy (d, s, n)
89 # define ISSPACE(Ch) iswspace (Ch)
90 # define ISDIGIT(Ch) iswdigit (Ch)
91 # define ISXDIGIT(Ch) iswxdigit (Ch)
92 # define TOLOWER(Ch) towlower (Ch)
93 # define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
94 # define __strtoll_internal __wcstoll_internal
95 # define __strtoull_internal __wcstoull_internal
96 # define __strtol_internal __wcstol_internal
97 # define __strtoul_internal __wcstoul_internal
98 # define __strtold_internal __wcstold_internal
99 # define __strtod_internal __wcstod_internal
100 # define __strtof_internal __wcstof_internal
101
102 # define L_(Str) L##Str
103 # define CHAR_T wchar_t
104 # define UCHAR_T unsigned int
105 # define WINT_T wint_t
106 # undef EOF
107 # define EOF WEOF
108 #else
109 # define ungetc(c, s) ((void) ((int) c == EOF \
110 || (--read_in, \
111 INTUSE(_IO_sputbackc) (s, (unsigned char) c))))
112 # define ungetc_not_eof(c, s) ((void) (--read_in, \
113 INTUSE(_IO_sputbackc) (s, (unsigned char) c)))
114 # define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
115 : ((c = _IO_getc_unlocked (s)), \
116 (void) (c != EOF \
117 ? ++read_in \
118 : (size_t) (inchar_errno = errno)), c))
119 # define MEMCPY(d, s, n) memcpy (d, s, n)
120 # define ISSPACE(Ch) __isspace_l (Ch, loc)
121 # define ISDIGIT(Ch) __isdigit_l (Ch, loc)
122 # define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
123 # define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc)
124 # define ORIENT if (_IO_vtable_offset (s) == 0 \
125 && _IO_fwide (s, -1) != -1) \
126 return EOF
127
128 # define L_(Str) Str
129 # define CHAR_T char
130 # define UCHAR_T unsigned char
131 # define WINT_T int
132 #endif
133
134 #define encode_error() do { \
135 errval = 4; \
136 __set_errno (EILSEQ); \
137 goto errout; \
138 } while (0)
139 #define conv_error() do { \
140 errval = 2; \
141 goto errout; \
142 } while (0)
143 #define input_error() do { \
144 errval = 1; \
145 if (done == 0) done = EOF; \
146 goto errout; \
147 } while (0)
148 #define memory_error() do { \
149 __set_errno (ENOMEM); \
150 done = EOF; \
151 goto errout; \
152 } while (0)
153 #define ARGCHECK(s, format) \
154 do \
155 { \
156 /* Check file argument for consistence. */ \
157 CHECK_FILE (s, EOF); \
158 if (s->_flags & _IO_NO_READS) \
159 { \
160 __set_errno (EBADF); \
161 return EOF; \
162 } \
163 else if (format == NULL) \
164 { \
165 MAYBE_SET_EINVAL; \
166 return EOF; \
167 } \
168 } while (0)
169 #define LOCK_STREAM(S) \
170 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
171 _IO_flockfile (S)
172 #define UNLOCK_STREAM(S) \
173 _IO_funlockfile (S); \
174 __libc_cleanup_region_end (0)
175
176
177 /* Read formatted input from S according to the format string
178 FORMAT, using the argument list in ARG.
179 Return the number of assignments made, or -1 for an input error. */
180 #ifdef COMPILE_WSCANF
181 int
182 _IO_vfwscanf (_IO_FILE *s, const wchar_t *format, _IO_va_list argptr,
183 int *errp)
184 #else
185 int
186 _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
187 int *errp)
188 #endif
189 {
190 va_list arg;
191 register const CHAR_T *f = format;
192 register UCHAR_T fc; /* Current character of the format. */
193 register WINT_T done = 0; /* Assignments done. */
194 register size_t read_in = 0; /* Chars read in. */
195 register WINT_T c = 0; /* Last char read. */
196 register int width; /* Maximum field width. */
197 register int flags; /* Modifiers for current format element. */
198 int errval = 0;
199 #ifndef COMPILE_WSCANF
200 __locale_t loc = _NL_CURRENT_LOCALE;
201 struct locale_data *const curctype = loc->__locales[LC_CTYPE];
202 #endif
203
204 /* Errno of last failed inchar call. */
205 int inchar_errno = 0;
206 /* Status for reading F-P nums. */
207 char got_dot, got_e, negative;
208 /* If a [...] is a [^...]. */
209 CHAR_T not_in;
210 #define exp_char not_in
211 /* Base for integral numbers. */
212 int base;
213 /* Signedness for integral numbers. */
214 int number_signed;
215 #define is_hexa number_signed
216 /* Decimal point character. */
217 #ifdef COMPILE_WSCANF
218 wint_t decimal;
219 #else
220 const char *decimal;
221 #endif
222 /* The thousands character of the current locale. */
223 #ifdef COMPILE_WSCANF
224 wint_t thousands;
225 #else
226 const char *thousands;
227 #endif
228 /* State for the conversions. */
229 mbstate_t state;
230 /* Integral holding variables. */
231 union
232 {
233 long long int q;
234 unsigned long long int uq;
235 long int l;
236 unsigned long int ul;
237 } num;
238 /* Character-buffer pointer. */
239 char *str = NULL;
240 wchar_t *wstr = NULL;
241 char **strptr = NULL;
242 ssize_t strsize = 0;
243 /* We must not react on white spaces immediately because they can
244 possibly be matched even if in the input stream no character is
245 available anymore. */
246 int skip_space = 0;
247 /* Nonzero if we are reading a pointer. */
248 int read_pointer;
249 /* Workspace. */
250 CHAR_T *tw; /* Temporary pointer. */
251 CHAR_T *wp = NULL; /* Workspace. */
252 size_t wpmax = 0; /* Maximal size of workspace. */
253 size_t wpsize; /* Currently used bytes in workspace. */
254 #define ADDW(Ch) \
255 do \
256 { \
257 if (wpsize == wpmax) \
258 { \
259 CHAR_T *old = wp; \
260 wpmax = (UCHAR_MAX + 1 > 2 * wpmax ? UCHAR_MAX + 1 : 2 * wpmax); \
261 wp = (CHAR_T *) alloca (wpmax * sizeof (wchar_t)); \
262 if (old != NULL) \
263 MEMCPY (wp, old, wpsize); \
264 } \
265 wp[wpsize++] = (Ch); \
266 } \
267 while (0)
268
269 #ifdef __va_copy
270 __va_copy (arg, argptr);
271 #else
272 arg = (va_list) argptr;
273 #endif
274
275 #ifdef ORIENT
276 ORIENT;
277 #endif
278
279 ARGCHECK (s, format);
280
281 {
282 #ifndef COMPILE_WSCANF
283 struct locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
284 #endif
285
286 /* Figure out the decimal point character. */
287 #ifdef COMPILE_WSCANF
288 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
289 #else
290 decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
291 #endif
292 /* Figure out the thousands separator character. */
293 #ifdef COMPILE_WSCANF
294 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
295 #else
296 thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
297 if (*thousands == '\0')
298 thousands = NULL;
299 #endif
300 }
301
302 /* Lock the stream. */
303 LOCK_STREAM (s);
304
305
306 #ifndef COMPILE_WSCANF
307 /* From now on we use `state' to convert the format string. */
308 memset (&state, '\0', sizeof (state));
309 #endif
310
311 /* Run through the format string. */
312 while (*f != '\0')
313 {
314 unsigned int argpos;
315 /* Extract the next argument, which is of type TYPE.
316 For a %N$... spec, this is the Nth argument from the beginning;
317 otherwise it is the next argument after the state now in ARG. */
318 #ifdef __va_copy
319 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
320 ({ unsigned int pos = argpos; \
321 va_list arg; \
322 __va_copy (arg, argptr); \
323 while (--pos > 0) \
324 (void) va_arg (arg, void *); \
325 va_arg (arg, type); \
326 }))
327 #else
328 # if 0
329 /* XXX Possible optimization. */
330 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
331 ({ va_list arg = (va_list) argptr; \
332 arg = (va_list) ((char *) arg \
333 + (argpos - 1) \
334 * __va_rounded_size (void *)); \
335 va_arg (arg, type); \
336 }))
337 # else
338 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
339 ({ unsigned int pos = argpos; \
340 va_list arg = (va_list) argptr; \
341 while (--pos > 0) \
342 (void) va_arg (arg, void *); \
343 va_arg (arg, type); \
344 }))
345 # endif
346 #endif
347
348 #ifndef COMPILE_WSCANF
349 if (!isascii ((unsigned char) *f))
350 {
351 /* Non-ASCII, may be a multibyte. */
352 int len = __mbrlen (f, strlen (f), &state);
353 if (len > 0)
354 {
355 do
356 {
357 c = inchar ();
358 if (c == EOF)
359 input_error ();
360 else if (c != (unsigned char) *f++)
361 {
362 ungetc_not_eof (c, s);
363 conv_error ();
364 }
365 }
366 while (--len > 0);
367 continue;
368 }
369 }
370 #endif
371
372 fc = *f++;
373 if (fc != '%')
374 {
375 /* Remember to skip spaces. */
376 if (ISSPACE (fc))
377 {
378 skip_space = 1;
379 continue;
380 }
381
382 /* Read a character. */
383 c = inchar ();
384
385 /* Characters other than format specs must just match. */
386 if (c == EOF)
387 input_error ();
388
389 /* We saw white space char as the last character in the format
390 string. Now it's time to skip all leading white space. */
391 if (skip_space)
392 {
393 while (ISSPACE (c))
394 if (inchar () == EOF)
395 input_error ();
396 skip_space = 0;
397 }
398
399 if (c != fc)
400 {
401 ungetc (c, s);
402 conv_error ();
403 }
404
405 continue;
406 }
407
408 /* This is the start of the conversion string. */
409 flags = 0;
410
411 /* Not yet decided whether we read a pointer or not. */
412 read_pointer = 0;
413
414 /* Initialize state of modifiers. */
415 argpos = 0;
416
417 /* Prepare temporary buffer. */
418 wpsize = 0;
419
420 /* Check for a positional parameter specification. */
421 if (ISDIGIT ((UCHAR_T) *f))
422 {
423 argpos = (UCHAR_T) *f++ - L_('0');
424 while (ISDIGIT ((UCHAR_T) *f))
425 argpos = argpos * 10 + ((UCHAR_T) *f++ - L_('0'));
426 if (*f == L_('$'))
427 ++f;
428 else
429 {
430 /* Oops; that was actually the field width. */
431 width = argpos;
432 flags |= WIDTH;
433 argpos = 0;
434 goto got_width;
435 }
436 }
437
438 /* Check for the assignment-suppressing, the number grouping flag,
439 and the signal to use the locale's digit representation. */
440 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
441 switch (*f++)
442 {
443 case L_('*'):
444 flags |= SUPPRESS;
445 break;
446 case L_('\''):
447 flags |= GROUP;
448 break;
449 case L_('I'):
450 flags |= I18N;
451 break;
452 }
453
454 /* We have seen width. */
455 if (ISDIGIT ((UCHAR_T) *f))
456 flags |= WIDTH;
457
458 /* Find the maximum field width. */
459 width = 0;
460 while (ISDIGIT ((UCHAR_T) *f))
461 {
462 width *= 10;
463 width += (UCHAR_T) *f++ - L_('0');
464 }
465 got_width:
466 if (width == 0)
467 width = -1;
468
469 /* Check for type modifiers. */
470 switch (*f++)
471 {
472 case L_('h'):
473 /* ints are short ints or chars. */
474 if (*f == L_('h'))
475 {
476 ++f;
477 flags |= CHAR;
478 }
479 else
480 flags |= SHORT;
481 break;
482 case L_('l'):
483 if (*f == L_('l'))
484 {
485 /* A double `l' is equivalent to an `L'. */
486 ++f;
487 flags |= LONGDBL | LONG;
488 }
489 else
490 /* ints are long ints. */
491 flags |= LONG;
492 break;
493 case L_('q'):
494 case L_('L'):
495 /* doubles are long doubles, and ints are long long ints. */
496 flags |= LONGDBL | LONG;
497 break;
498 case L_('a'):
499 /* The `a' is used as a flag only if followed by `s', `S' or
500 `['. */
501 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
502 {
503 --f;
504 break;
505 }
506 /* String conversions (%s, %[) take a `char **'
507 arg and fill it in with a malloc'd pointer. */
508 flags |= MALLOC;
509 break;
510 case L_('z'):
511 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
512 flags |= LONGDBL;
513 else if (sizeof (size_t) > sizeof (unsigned int))
514 flags |= LONG;
515 break;
516 case L_('j'):
517 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
518 flags |= LONGDBL;
519 else if (sizeof (uintmax_t) > sizeof (unsigned int))
520 flags |= LONG;
521 break;
522 case L_('t'):
523 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
524 flags |= LONGDBL;
525 else if (sizeof (ptrdiff_t) > sizeof (int))
526 flags |= LONG;
527 break;
528 default:
529 /* Not a recognized modifier. Backup. */
530 --f;
531 break;
532 }
533
534 /* End of the format string? */
535 if (*f == L_('\0'))
536 conv_error ();
537
538 /* Find the conversion specifier. */
539 fc = *f++;
540 if (skip_space || (fc != L_('[') && fc != L_('c')
541 && fc != L_('C') && fc != L_('n')))
542 {
543 /* Eat whitespace. */
544 int save_errno = errno;
545 errno = 0;
546 do
547 if (inchar () == EOF && errno == EINTR)
548 input_error ();
549 while (ISSPACE (c));
550 errno = save_errno;
551 ungetc (c, s);
552 skip_space = 0;
553 }
554
555 switch (fc)
556 {
557 case L_('%'): /* Must match a literal '%'. */
558 c = inchar ();
559 if (c == EOF)
560 input_error ();
561 if (c != fc)
562 {
563 ungetc_not_eof (c, s);
564 conv_error ();
565 }
566 break;
567
568 case L_('n'): /* Answer number of assignments done. */
569 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
570 with the 'n' conversion specifier. */
571 if (!(flags & SUPPRESS))
572 {
573 /* Don't count the read-ahead. */
574 if (need_longlong && (flags & LONGDBL))
575 *ARG (long long int *) = read_in;
576 else if (need_long && (flags & LONG))
577 *ARG (long int *) = read_in;
578 else if (flags & SHORT)
579 *ARG (short int *) = read_in;
580 else if (!(flags & CHAR))
581 *ARG (int *) = read_in;
582 else
583 *ARG (char *) = read_in;
584
585 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
586 /* We have a severe problem here. The ISO C standard
587 contradicts itself in explaining the effect of the %n
588 format in `scanf'. While in ISO C:1990 and the ISO C
589 Amendement 1:1995 the result is described as
590
591 Execution of a %n directive does not effect the
592 assignment count returned at the completion of
593 execution of the f(w)scanf function.
594
595 in ISO C Corrigendum 1:1994 the following was added:
596
597 Subclause 7.9.6.2
598 Add the following fourth example:
599 In:
600 #include <stdio.h>
601 int d1, d2, n1, n2, i;
602 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
603 the value 123 is assigned to d1 and the value3 to n1.
604 Because %n can never get an input failure the value
605 of 3 is also assigned to n2. The value of d2 is not
606 affected. The value 3 is assigned to i.
607
608 We go for now with the historically correct code from ISO C,
609 i.e., we don't count the %n assignments. When it ever
610 should proof to be wrong just remove the #ifdef above. */
611 ++done;
612 #endif
613 }
614 break;
615
616 case L_('c'): /* Match characters. */
617 if ((flags & LONG) == 0)
618 {
619 if (!(flags & SUPPRESS))
620 {
621 str = ARG (char *);
622 if (str == NULL)
623 conv_error ();
624 }
625
626 c = inchar ();
627 if (c == EOF)
628 input_error ();
629
630 if (width == -1)
631 width = 1;
632
633 #ifdef COMPILE_WSCANF
634 /* We have to convert the wide character(s) into multibyte
635 characters and store the result. */
636 memset (&state, '\0', sizeof (state));
637
638 do
639 {
640 size_t n;
641
642 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
643 if (n == (size_t) -1)
644 /* No valid wide character. */
645 input_error ();
646
647 /* Increment the output pointer. Even if we don't
648 write anything. */
649 str += n;
650 }
651 while (--width > 0 && inchar () != EOF);
652 #else
653 if (!(flags & SUPPRESS))
654 {
655 do
656 *str++ = c;
657 while (--width > 0 && inchar () != EOF);
658 }
659 else
660 while (--width > 0 && inchar () != EOF);
661 #endif
662
663 if (!(flags & SUPPRESS))
664 ++done;
665
666 break;
667 }
668 /* FALLTHROUGH */
669 case L_('C'):
670 if (!(flags & SUPPRESS))
671 {
672 wstr = ARG (wchar_t *);
673 if (wstr == NULL)
674 conv_error ();
675 }
676
677 c = inchar ();
678 if (c == EOF)
679 input_error ();
680
681 #ifdef COMPILE_WSCANF
682 /* Just store the incoming wide characters. */
683 if (!(flags & SUPPRESS))
684 {
685 do
686 *wstr++ = c;
687 while (--width > 0 && inchar () != EOF);
688 }
689 else
690 while (--width > 0 && inchar () != EOF);
691 #else
692 {
693 /* We have to convert the multibyte input sequence to wide
694 characters. */
695 char buf[1];
696 mbstate_t cstate;
697
698 memset (&cstate, '\0', sizeof (cstate));
699
700 do
701 {
702 /* This is what we present the mbrtowc function first. */
703 buf[0] = c;
704
705 while (1)
706 {
707 size_t n;
708
709 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
710 buf, 1, &cstate);
711
712 if (n == (size_t) -2)
713 {
714 /* Possibly correct character, just not enough
715 input. */
716 if (inchar () == EOF)
717 encode_error ();
718
719 buf[0] = c;
720 continue;
721 }
722
723 if (n != 1)
724 encode_error ();
725
726 /* We have a match. */
727 break;
728 }
729
730 /* Advance the result pointer. */
731 ++wstr;
732 }
733 while (--width > 0 && inchar () != EOF);
734 }
735 #endif
736
737 if (!(flags & SUPPRESS))
738 ++done;
739
740 break;
741
742 case L_('s'): /* Read a string. */
743 if (!(flags & LONG))
744 {
745 #define STRING_ARG(Str, Type) \
746 do if (!(flags & SUPPRESS)) \
747 { \
748 if (flags & MALLOC) \
749 { \
750 /* The string is to be stored in a malloc'd buffer. */ \
751 strptr = ARG (char **); \
752 if (strptr == NULL) \
753 conv_error (); \
754 /* Allocate an initial buffer. */ \
755 strsize = 100; \
756 *strptr = (char *) malloc (strsize * sizeof (Type)); \
757 Str = (Type *) *strptr; \
758 } \
759 else \
760 Str = ARG (Type *); \
761 if (Str == NULL) \
762 conv_error (); \
763 } while (0)
764 STRING_ARG (str, char);
765
766 c = inchar ();
767 if (c == EOF)
768 input_error ();
769
770 #ifdef COMPILE_WSCANF
771 memset (&state, '\0', sizeof (state));
772 #endif
773
774 do
775 {
776 if (ISSPACE (c))
777 {
778 ungetc_not_eof (c, s);
779 break;
780 }
781
782 #ifdef COMPILE_WSCANF
783 /* This is quite complicated. We have to convert the
784 wide characters into multibyte characters and then
785 store them. */
786 {
787 size_t n;
788
789 if (!(flags & SUPPRESS) && (flags & MALLOC)
790 && str + MB_CUR_MAX >= *strptr + strsize)
791 {
792 /* We have to enlarge the buffer if the `a' flag
793 was given. */
794 size_t strleng = str - *strptr;
795 char *newstr;
796
797 newstr = (char *) realloc (*strptr, strsize * 2);
798 if (newstr == NULL)
799 {
800 /* Can't allocate that much. Last-ditch
801 effort. */
802 newstr = (char *) realloc (*strptr,
803 strleng + MB_CUR_MAX);
804 if (newstr == NULL)
805 {
806 /* We lose. Oh well. Terminate the
807 string and stop converting,
808 so at least we don't skip any input. */
809 ((char *) (*strptr))[strleng] = '\0';
810 ++done;
811 conv_error ();
812 }
813 else
814 {
815 *strptr = newstr;
816 str = newstr + strleng;
817 strsize = strleng + MB_CUR_MAX;
818 }
819 }
820 else
821 {
822 *strptr = newstr;
823 str = newstr + strleng;
824 strsize *= 2;
825 }
826 }
827
828 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
829 &state);
830 if (n == (size_t) -1)
831 encode_error ();
832
833 assert (n <= MB_CUR_MAX);
834 str += n;
835 }
836 #else
837 /* This is easy. */
838 if (!(flags & SUPPRESS))
839 {
840 *str++ = c;
841 if ((flags & MALLOC)
842 && (char *) str == *strptr + strsize)
843 {
844 /* Enlarge the buffer. */
845 str = (char *) realloc (*strptr, 2 * strsize);
846 if (str == NULL)
847 {
848 /* Can't allocate that much. Last-ditch
849 effort. */
850 str = (char *) realloc (*strptr, strsize + 1);
851 if (str == NULL)
852 {
853 /* We lose. Oh well. Terminate the
854 string and stop converting,
855 so at least we don't skip any input. */
856 ((char *) (*strptr))[strsize - 1] = '\0';
857 ++done;
858 conv_error ();
859 }
860 else
861 {
862 *strptr = (char *) str;
863 str += strsize;
864 ++strsize;
865 }
866 }
867 else
868 {
869 *strptr = (char *) str;
870 str += strsize;
871 strsize *= 2;
872 }
873 }
874 }
875 #endif
876 }
877 while ((width <= 0 || --width > 0) && inchar () != EOF);
878
879 if (!(flags & SUPPRESS))
880 {
881 #ifdef COMPILE_WSCANF
882 /* We have to emit the code to get into the initial
883 state. */
884 char buf[MB_LEN_MAX];
885 size_t n = __wcrtomb (buf, L'\0', &state);
886 if (n > 0 && (flags & MALLOC)
887 && str + n >= *strptr + strsize)
888 {
889 /* Enlarge the buffer. */
890 size_t strleng = str - *strptr;
891 char *newstr;
892
893 newstr = (char *) realloc (*strptr, strleng + n + 1);
894 if (newstr == NULL)
895 {
896 /* We lose. Oh well. Terminate the string
897 and stop converting, so at least we don't
898 skip any input. */
899 ((char *) (*strptr))[strleng] = '\0';
900 ++done;
901 conv_error ();
902 }
903 else
904 {
905 *strptr = newstr;
906 str = newstr + strleng;
907 strsize = strleng + n + 1;
908 }
909 }
910
911 str = __mempcpy (str, buf, n);
912 #endif
913 *str++ = '\0';
914
915 if ((flags & MALLOC) && str - *strptr != strsize)
916 {
917 char *cp = (char *) realloc (*strptr, str - *strptr);
918 if (cp != NULL)
919 *strptr = cp;
920 }
921
922 ++done;
923 }
924 break;
925 }
926 /* FALLTHROUGH */
927
928 case L_('S'):
929 {
930 #ifndef COMPILE_WSCANF
931 mbstate_t cstate;
932 #endif
933
934 /* Wide character string. */
935 STRING_ARG (wstr, wchar_t);
936
937 c = inchar ();
938 if (c == EOF)
939 input_error ();
940
941 #ifndef COMPILE_WSCANF
942 memset (&cstate, '\0', sizeof (cstate));
943 #endif
944
945 do
946 {
947 if (ISSPACE (c))
948 {
949 ungetc_not_eof (c, s);
950 break;
951 }
952
953 #ifdef COMPILE_WSCANF
954 /* This is easy. */
955 if (!(flags & SUPPRESS))
956 {
957 *wstr++ = c;
958 if ((flags & MALLOC)
959 && wstr == (wchar_t *) *strptr + strsize)
960 {
961 /* Enlarge the buffer. */
962 wstr = (wchar_t *) realloc (*strptr,
963 (2 * strsize)
964 * sizeof (wchar_t));
965 if (wstr == NULL)
966 {
967 /* Can't allocate that much. Last-ditch
968 effort. */
969 wstr = (wchar_t *) realloc (*strptr,
970 (strsize + 1)
971 * sizeof (wchar_t));
972 if (wstr == NULL)
973 {
974 /* We lose. Oh well. Terminate the string
975 and stop converting, so at least we don't
976 skip any input. */
977 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
978 ++done;
979 conv_error ();
980 }
981 else
982 {
983 *strptr = (char *) wstr;
984 wstr += strsize;
985 ++strsize;
986 }
987 }
988 else
989 {
990 *strptr = (char *) wstr;
991 wstr += strsize;
992 strsize *= 2;
993 }
994 }
995 }
996 #else
997 {
998 char buf[1];
999
1000 buf[0] = c;
1001
1002 while (1)
1003 {
1004 size_t n;
1005
1006 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1007 buf, 1, &cstate);
1008
1009 if (n == (size_t) -2)
1010 {
1011 /* Possibly correct character, just not enough
1012 input. */
1013 if (inchar () == EOF)
1014 encode_error ();
1015
1016 buf[0] = c;
1017 continue;
1018 }
1019
1020 if (n != 1)
1021 encode_error ();
1022
1023 /* We have a match. */
1024 ++wstr;
1025 break;
1026 }
1027
1028 if (!(flags & SUPPRESS) && (flags & MALLOC)
1029 && wstr == (wchar_t *) *strptr + strsize)
1030 {
1031 /* Enlarge the buffer. */
1032 wstr = (wchar_t *) realloc (*strptr,
1033 (2 * strsize
1034 * sizeof (wchar_t)));
1035 if (wstr == NULL)
1036 {
1037 /* Can't allocate that much. Last-ditch effort. */
1038 wstr = (wchar_t *) realloc (*strptr,
1039 ((strsize + 1)
1040 * sizeof (wchar_t)));
1041 if (wstr == NULL)
1042 {
1043 /* We lose. Oh well. Terminate the
1044 string and stop converting, so at
1045 least we don't skip any input. */
1046 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1047 ++done;
1048 conv_error ();
1049 }
1050 else
1051 {
1052 *strptr = (char *) wstr;
1053 wstr += strsize;
1054 ++strsize;
1055 }
1056 }
1057 else
1058 {
1059 *strptr = (char *) wstr;
1060 wstr += strsize;
1061 strsize *= 2;
1062 }
1063 }
1064 }
1065 #endif
1066 }
1067 while ((width <= 0 || --width > 0) && inchar () != EOF);
1068
1069 if (!(flags & SUPPRESS))
1070 {
1071 *wstr++ = L'\0';
1072
1073 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1074 {
1075 wchar_t *cp = (wchar_t *) realloc (*strptr,
1076 ((wstr
1077 - (wchar_t *) *strptr)
1078 * sizeof(wchar_t)));
1079 if (cp != NULL)
1080 *strptr = (char *) cp;
1081 }
1082
1083 ++done;
1084 }
1085 }
1086 break;
1087
1088 case L_('x'): /* Hexadecimal integer. */
1089 case L_('X'): /* Ditto. */
1090 base = 16;
1091 number_signed = 0;
1092 goto number;
1093
1094 case L_('o'): /* Octal integer. */
1095 base = 8;
1096 number_signed = 0;
1097 goto number;
1098
1099 case L_('u'): /* Unsigned decimal integer. */
1100 base = 10;
1101 number_signed = 0;
1102 goto number;
1103
1104 case L_('d'): /* Signed decimal integer. */
1105 base = 10;
1106 number_signed = 1;
1107 goto number;
1108
1109 case L_('i'): /* Generic number. */
1110 base = 0;
1111 number_signed = 1;
1112
1113 number:
1114 c = inchar ();
1115 if (c == EOF)
1116 input_error ();
1117
1118 /* Check for a sign. */
1119 if (c == L_('-') || c == L_('+'))
1120 {
1121 ADDW (c);
1122 if (width > 0)
1123 --width;
1124 c = inchar ();
1125 }
1126
1127 /* Look for a leading indication of base. */
1128 if (width != 0 && c == L_('0'))
1129 {
1130 if (width > 0)
1131 --width;
1132
1133 ADDW (c);
1134 c = inchar ();
1135
1136 if (width != 0 && TOLOWER (c) == L_('x'))
1137 {
1138 if (base == 0)
1139 base = 16;
1140 if (base == 16)
1141 {
1142 if (width > 0)
1143 --width;
1144 c = inchar ();
1145 }
1146 }
1147 else if (base == 0)
1148 base = 8;
1149 }
1150
1151 if (base == 0)
1152 base = 10;
1153
1154 if (base == 10 && (flags & I18N) != 0)
1155 {
1156 int from_level;
1157 int to_level;
1158 int level;
1159 #ifdef COMPILE_WSCANF
1160 const wchar_t *wcdigits[10];
1161 const wchar_t *wcdigits_extended[10];
1162 #else
1163 const char *mbdigits[10];
1164 const char *mbdigits_extended[10];
1165 #endif
1166 /* "to_inpunct" is a map from ASCII digits to their
1167 equivalent in locale. This is defined for locales
1168 which use an extra digits set. */
1169 wctrans_t map = __wctrans ("to_inpunct");
1170 int n;
1171
1172 from_level = 0;
1173 #ifdef COMPILE_WSCANF
1174 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1175 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1176 #else
1177 to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
1178 #endif
1179
1180 /* Get the alternative digit forms if there are any. */
1181 if (__builtin_expect (map != NULL, 0))
1182 {
1183 /* Adding new level for extra digits set in locale file. */
1184 ++to_level;
1185
1186 for (n = 0; n < 10; ++n)
1187 {
1188 #ifdef COMPILE_WSCANF
1189 wcdigits[n] = (const wchar_t *)
1190 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1191
1192 wchar_t *wc_extended = (wchar_t *)
1193 alloca ((to_level + 2) * sizeof (wchar_t));
1194 __wmemcpy (wc_extended, wcdigits[n], to_level);
1195 wc_extended[to_level] = __towctrans (L'0' + n, map);
1196 wc_extended[to_level + 1] = '\0';
1197 wcdigits_extended[n] = wc_extended;
1198 #else
1199 mbdigits[n]
1200 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1201
1202 /* Get the equivalent wide char in map. */
1203 wint_t extra_wcdigit = __towctrans (L'0' + n, map);
1204
1205 /* Convert it to multibyte representation. */
1206 mbstate_t state;
1207 memset (&state, '\0', sizeof (state));
1208
1209 char extra_mbdigit[MB_LEN_MAX];
1210 size_t mblen
1211 = __wcrtomb (extra_mbdigit, extra_wcdigit, &state);
1212
1213 if (mblen == (size_t) -1)
1214 {
1215 /* Ignore this new level. */
1216 map = NULL;
1217 break;
1218 }
1219
1220 /* Calculate the length of mbdigits[n]. */
1221 const char *last_char = mbdigits[n];
1222 for (level = 0; level < to_level; ++level)
1223 last_char = strchr (last_char, '\0') + 1;
1224
1225 size_t mbdigits_len = last_char - mbdigits[n];
1226
1227 /* Allocate memory for extended multibyte digit. */
1228 char *mb_extended;
1229 mb_extended = (char *) alloca (mbdigits_len + mblen + 1);
1230
1231 /* And get the mbdigits + extra_digit string. */
1232 *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n],
1233 mbdigits_len),
1234 extra_mbdigit, mblen) = '\0';
1235 mbdigits_extended[n] = mb_extended;
1236 #endif
1237 }
1238 }
1239
1240 /* Read the number into workspace. */
1241 while (c != EOF && width != 0)
1242 {
1243 /* In this round we get the pointer to the digit strings
1244 and also perform the first round of comparisons. */
1245 for (n = 0; n < 10; ++n)
1246 {
1247 /* Get the string for the digits with value N. */
1248 #ifdef COMPILE_WSCANF
1249 if (__builtin_expect (map != NULL, 0))
1250 wcdigits[n] = wcdigits_extended[n];
1251 else
1252 wcdigits[n] = (const wchar_t *)
1253 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1254 wcdigits[n] += from_level;
1255
1256 if (c == (wint_t) *wcdigits[n])
1257 {
1258 to_level = from_level;
1259 break;
1260 }
1261
1262 /* Advance the pointer to the next string. */
1263 ++wcdigits[n];
1264 #else
1265 const char *cmpp;
1266 int avail = width > 0 ? width : INT_MAX;
1267
1268 if (__builtin_expect (map != NULL, 0))
1269 mbdigits[n] = mbdigits_extended[n];
1270 else
1271 mbdigits[n]
1272 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1273
1274 for (level = 0; level < from_level; level++)
1275 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1276
1277 cmpp = mbdigits[n];
1278 while ((unsigned char) *cmpp == c && avail > 0)
1279 {
1280 if (*++cmpp == '\0')
1281 break;
1282 else
1283 {
1284 if ((c = inchar ()) == EOF)
1285 break;
1286 --avail;
1287 }
1288 }
1289
1290 if (*cmpp == '\0')
1291 {
1292 if (width > 0)
1293 width = avail;
1294 to_level = from_level;
1295 break;
1296 }
1297
1298 /* We are pushing all read characters back. */
1299 if (cmpp > mbdigits[n])
1300 {
1301 ungetc (c, s);
1302 while (--cmpp > mbdigits[n])
1303 ungetc_not_eof ((unsigned char) *cmpp, s);
1304 c = (unsigned char) *cmpp;
1305 }
1306
1307 /* Advance the pointer to the next string. */
1308 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1309 #endif
1310 }
1311
1312 if (n == 10)
1313 {
1314 /* Have not yet found the digit. */
1315 for (level = from_level + 1; level <= to_level; ++level)
1316 {
1317 /* Search all ten digits of this level. */
1318 for (n = 0; n < 10; ++n)
1319 {
1320 #ifdef COMPILE_WSCANF
1321 if (c == (wint_t) *wcdigits[n])
1322 break;
1323
1324 /* Advance the pointer to the next string. */
1325 ++wcdigits[n];
1326 #else
1327 const char *cmpp;
1328 int avail = width > 0 ? width : INT_MAX;
1329
1330 cmpp = mbdigits[n];
1331 while ((unsigned char) *cmpp == c && avail > 0)
1332 {
1333 if (*++cmpp == '\0')
1334 break;
1335 else
1336 {
1337 if ((c = inchar ()) == EOF)
1338 break;
1339 --avail;
1340 }
1341 }
1342
1343 if (*cmpp == '\0')
1344 {
1345 if (width > 0)
1346 width = avail;
1347 break;
1348 }
1349
1350 /* We are pushing all read characters back. */
1351 if (cmpp > mbdigits[n])
1352 {
1353 ungetc (c, s);
1354 while (--cmpp > mbdigits[n])
1355 ungetc_not_eof ((unsigned char) *cmpp, s);
1356 c = (unsigned char) *cmpp;
1357 }
1358
1359 /* Advance the pointer to the next string. */
1360 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1361 #endif
1362 }
1363
1364 if (n < 10)
1365 {
1366 /* Found it. */
1367 from_level = level;
1368 to_level = level;
1369 break;
1370 }
1371 }
1372 }
1373
1374 if (n < 10)
1375 c = L_('0') + n;
1376 else if ((flags & GROUP)
1377 #ifdef COMPILE_WSCANF
1378 && thousands != L'\0'
1379 #else
1380 && thousands != NULL
1381 #endif
1382 )
1383 {
1384 /* Try matching against the thousands separator. */
1385 #ifdef COMPILE_WSCANF
1386 if (c != thousands)
1387 break;
1388 #else
1389 const char *cmpp = thousands;
1390 int avail = width > 0 ? width : INT_MAX;
1391
1392 while ((unsigned char) *cmpp == c && avail > 0)
1393 {
1394 ADDW (c);
1395 if (*++cmpp == '\0')
1396 break;
1397 else
1398 {
1399 if ((c = inchar ()) == EOF)
1400 break;
1401 --avail;
1402 }
1403 }
1404
1405 if (*cmpp != '\0')
1406 {
1407 /* We are pushing all read characters back. */
1408 if (cmpp > thousands)
1409 {
1410 wpsize -= cmpp - thousands;
1411 ungetc (c, s);
1412 while (--cmpp > thousands)
1413 ungetc_not_eof ((unsigned char) *cmpp, s);
1414 c = (unsigned char) *cmpp;
1415 }
1416 break;
1417 }
1418
1419 if (width > 0)
1420 width = avail;
1421
1422 /* The last thousands character will be added back by
1423 the ADDW below. */
1424 --wpsize;
1425 #endif
1426 }
1427 else
1428 break;
1429
1430 ADDW (c);
1431 if (width > 0)
1432 --width;
1433
1434 c = inchar ();
1435 }
1436 }
1437 else
1438 /* Read the number into workspace. */
1439 while (c != EOF && width != 0)
1440 {
1441 if (base == 16)
1442 {
1443 if (!ISXDIGIT (c))
1444 break;
1445 }
1446 else if (!ISDIGIT (c) || (int) (c - L_('0')) >= base)
1447 {
1448 if (base == 10 && (flags & GROUP)
1449 #ifdef COMPILE_WSCANF
1450 && thousands != L'\0'
1451 #else
1452 && thousands != NULL
1453 #endif
1454 )
1455 {
1456 /* Try matching against the thousands separator. */
1457 #ifdef COMPILE_WSCANF
1458 if (c != thousands)
1459 break;
1460 #else
1461 const char *cmpp = thousands;
1462 int avail = width > 0 ? width : INT_MAX;
1463
1464 while ((unsigned char) *cmpp == c && avail > 0)
1465 {
1466 ADDW (c);
1467 if (*++cmpp == '\0')
1468 break;
1469 else
1470 {
1471 if ((c = inchar ()) == EOF)
1472 break;
1473 --avail;
1474 }
1475 }
1476
1477 if (*cmpp != '\0')
1478 {
1479 /* We are pushing all read characters back. */
1480 if (cmpp > thousands)
1481 {
1482 wpsize -= cmpp - thousands;
1483 ungetc (c, s);
1484 while (--cmpp > thousands)
1485 ungetc_not_eof ((unsigned char) *cmpp, s);
1486 c = (unsigned char) *cmpp;
1487 }
1488 break;
1489 }
1490
1491 if (width > 0)
1492 width = avail;
1493
1494 /* The last thousands character will be added back by
1495 the ADDW below. */
1496 --wpsize;
1497 #endif
1498 }
1499 else
1500 break;
1501 }
1502 ADDW (c);
1503 if (width > 0)
1504 --width;
1505
1506 c = inchar ();
1507 }
1508
1509 if (wpsize == 0
1510 || (wpsize == 1 && (wp[0] == L_('+') || wp[0] == L_('-'))))
1511 {
1512 /* There was no number. If we are supposed to read a pointer
1513 we must recognize "(nil)" as well. */
1514 if (wpsize == 0 && read_pointer && (width < 0 || width >= 0)
1515 && c == '('
1516 && TOLOWER (inchar ()) == L_('n')
1517 && TOLOWER (inchar ()) == L_('i')
1518 && TOLOWER (inchar ()) == L_('l')
1519 && inchar () == L_(')'))
1520 /* We must produce the value of a NULL pointer. A single
1521 '0' digit is enough. */
1522 ADDW (L_('0'));
1523 else
1524 {
1525 /* The last read character is not part of the number
1526 anymore. */
1527 ungetc (c, s);
1528
1529 conv_error ();
1530 }
1531 }
1532 else
1533 /* The just read character is not part of the number anymore. */
1534 ungetc (c, s);
1535
1536 /* Convert the number. */
1537 ADDW (L_('\0'));
1538 if (need_longlong && (flags & LONGDBL))
1539 {
1540 if (number_signed)
1541 num.q = __strtoll_internal (wp, &tw, base, flags & GROUP);
1542 else
1543 num.uq = __strtoull_internal (wp, &tw, base, flags & GROUP);
1544 }
1545 else
1546 {
1547 if (number_signed)
1548 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
1549 else
1550 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
1551 }
1552 if (wp == tw)
1553 conv_error ();
1554
1555 if (!(flags & SUPPRESS))
1556 {
1557 if (! number_signed)
1558 {
1559 if (need_longlong && (flags & LONGDBL))
1560 *ARG (unsigned LONGLONG int *) = num.uq;
1561 else if (need_long && (flags & LONG))
1562 *ARG (unsigned long int *) = num.ul;
1563 else if (flags & SHORT)
1564 *ARG (unsigned short int *)
1565 = (unsigned short int) num.ul;
1566 else if (!(flags & CHAR))
1567 *ARG (unsigned int *) = (unsigned int) num.ul;
1568 else
1569 *ARG (unsigned char *) = (unsigned char) num.ul;
1570 }
1571 else
1572 {
1573 if (need_longlong && (flags & LONGDBL))
1574 *ARG (LONGLONG int *) = num.q;
1575 else if (need_long && (flags & LONG))
1576 *ARG (long int *) = num.l;
1577 else if (flags & SHORT)
1578 *ARG (short int *) = (short int) num.l;
1579 else if (!(flags & CHAR))
1580 *ARG (int *) = (int) num.l;
1581 else
1582 *ARG (signed char *) = (signed char) num.ul;
1583 }
1584 ++done;
1585 }
1586 break;
1587
1588 case L_('e'): /* Floating-point numbers. */
1589 case L_('E'):
1590 case L_('f'):
1591 case L_('F'):
1592 case L_('g'):
1593 case L_('G'):
1594 case L_('a'):
1595 case L_('A'):
1596 c = inchar ();
1597 if (c == EOF)
1598 input_error ();
1599
1600 got_dot = got_e = 0;
1601
1602 /* Check for a sign. */
1603 if (c == L_('-') || c == L_('+'))
1604 {
1605 negative = c == L_('-');
1606 if (width == 0 || inchar () == EOF)
1607 /* EOF is only an input error before we read any chars. */
1608 conv_error ();
1609 if (! ISDIGIT (c) && TOLOWER (c) != L_('i')
1610 && TOLOWER (c) != L_('n'))
1611 {
1612 #ifdef COMPILE_WSCANF
1613 if (c != decimal)
1614 {
1615 /* This is no valid number. */
1616 ungetc (c, s);
1617 conv_error ();
1618 }
1619 #else
1620 /* Match against the decimal point. At this point
1621 we are taking advantage of the fact that we can
1622 push more than one character back. This is
1623 (almost) never necessary since the decimal point
1624 string hopefully never contains more than one
1625 byte. */
1626 const char *cmpp = decimal;
1627 int avail = width > 0 ? width : INT_MAX;
1628
1629 while ((unsigned char) *cmpp == c && avail-- > 0)
1630 if (*++cmpp == '\0')
1631 break;
1632 else
1633 {
1634 if (inchar () == EOF)
1635 break;
1636 }
1637
1638 if (*cmpp != '\0')
1639 {
1640 /* This is no valid number. */
1641 while (1)
1642 {
1643 ungetc (c, s);
1644 if (cmpp == decimal)
1645 break;
1646 c = (unsigned char) *--cmpp;
1647 }
1648
1649 conv_error ();
1650 }
1651 else
1652 {
1653 /* Add all the characters. */
1654 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
1655 ADDW ((unsigned char) *cmpp);
1656 if (width > 0)
1657 width = avail;
1658 got_dot = 1;
1659
1660 c = inchar ();
1661 }
1662 if (width > 0)
1663 width = avail;
1664 #endif
1665 }
1666 if (width > 0)
1667 --width;
1668 }
1669 else
1670 negative = 0;
1671
1672 /* Take care for the special arguments "nan" and "inf". */
1673 if (TOLOWER (c) == L_('n'))
1674 {
1675 /* Maybe "nan". */
1676 ADDW (c);
1677 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('a'))
1678 conv_error ();
1679 if (width > 0)
1680 --width;
1681 ADDW (c);
1682 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n'))
1683 conv_error ();
1684 if (width > 0)
1685 --width;
1686 ADDW (c);
1687 /* It is "nan". */
1688 goto scan_float;
1689 }
1690 else if (TOLOWER (c) == L_('i'))
1691 {
1692 /* Maybe "inf" or "infinity". */
1693 ADDW (c);
1694 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n'))
1695 conv_error ();
1696 if (width > 0)
1697 --width;
1698 ADDW (c);
1699 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('f'))
1700 conv_error ();
1701 if (width > 0)
1702 --width;
1703 ADDW (c);
1704 /* It is as least "inf". */
1705 if (width != 0 && inchar () != EOF)
1706 {
1707 if (TOLOWER (c) == L_('i'))
1708 {
1709 if (width > 0)
1710 --width;
1711 /* Now we have to read the rest as well. */
1712 ADDW (c);
1713 if (width == 0 || inchar () == EOF
1714 || TOLOWER (c) != L_('n'))
1715 conv_error ();
1716 if (width > 0)
1717 --width;
1718 ADDW (c);
1719 if (width == 0 || inchar () == EOF
1720 || TOLOWER (c) != L_('i'))
1721 conv_error ();
1722 if (width > 0)
1723 --width;
1724 ADDW (c);
1725 if (width == 0 || inchar () == EOF
1726 || TOLOWER (c) != L_('t'))
1727 conv_error ();
1728 if (width > 0)
1729 --width;
1730 ADDW (c);
1731 if (width == 0 || inchar () == EOF
1732 || TOLOWER (c) != L_('y'))
1733 conv_error ();
1734 if (width > 0)
1735 --width;
1736 ADDW (c);
1737 }
1738 else
1739 /* Never mind. */
1740 ungetc (c, s);
1741 }
1742 goto scan_float;
1743 }
1744
1745 is_hexa = 0;
1746 exp_char = L_('e');
1747 if (width != 0 && c == L_('0'))
1748 {
1749 ADDW (c);
1750 c = inchar ();
1751 if (width > 0)
1752 --width;
1753 if (width != 0 && TOLOWER (c) == L_('x'))
1754 {
1755 /* It is a number in hexadecimal format. */
1756 ADDW (c);
1757
1758 is_hexa = 1;
1759 exp_char = L_('p');
1760
1761 /* Grouping is not allowed. */
1762 flags &= ~GROUP;
1763 c = inchar ();
1764 if (width > 0)
1765 --width;
1766 }
1767 }
1768
1769 do
1770 {
1771 if (ISDIGIT (c))
1772 ADDW (c);
1773 else if (!got_e && is_hexa && ISXDIGIT (c))
1774 ADDW (c);
1775 else if (got_e && wp[wpsize - 1] == exp_char
1776 && (c == L_('-') || c == L_('+')))
1777 ADDW (c);
1778 else if (wpsize > 0 && !got_e
1779 && (CHAR_T) TOLOWER (c) == exp_char)
1780 {
1781 ADDW (exp_char);
1782 got_e = got_dot = 1;
1783 }
1784 else
1785 {
1786 #ifdef COMPILE_WSCANF
1787 if (! got_dot && c == decimal)
1788 {
1789 ADDW (c);
1790 got_dot = 1;
1791 }
1792 else if ((flags & GROUP) != 0 && thousands != L'\0'
1793 && ! got_dot && c == thousands)
1794 ADDW (c);
1795 else
1796 {
1797 /* The last read character is not part of the number
1798 anymore. */
1799 ungetc (c, s);
1800 break;
1801 }
1802 #else
1803 const char *cmpp = decimal;
1804 int avail = width > 0 ? width : INT_MAX;
1805
1806 if (! got_dot)
1807 {
1808 while ((unsigned char) *cmpp == c && avail > 0)
1809 if (*++cmpp == '\0')
1810 break;
1811 else
1812 {
1813 if (inchar () == EOF)
1814 break;
1815 --avail;
1816 }
1817 }
1818
1819 if (*cmpp == '\0')
1820 {
1821 /* Add all the characters. */
1822 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
1823 ADDW ((unsigned char) *cmpp);
1824 if (width > 0)
1825 width = avail;
1826 got_dot = 1;
1827 }
1828 else
1829 {
1830 /* Figure out whether it is a thousands separator.
1831 There is one problem: we possibly read more than
1832 one character. We cannot push them back but since
1833 we know that parts of the `decimal' string matched,
1834 we can compare against it. */
1835 const char *cmp2p = thousands;
1836
1837 if ((flags & GROUP) != 0 && thousands != NULL
1838 && ! got_dot)
1839 {
1840 while (cmp2p - thousands < cmpp - decimal
1841 && *cmp2p == decimal[cmp2p - thousands])
1842 ++cmp2p;
1843 if (cmp2p - thousands == cmpp - decimal)
1844 {
1845 while ((unsigned char) *cmp2p == c && avail > 0)
1846 if (*++cmp2p == '\0')
1847 break;
1848 else
1849 {
1850 if (inchar () == EOF)
1851 break;
1852 --avail;
1853 }
1854 }
1855 }
1856
1857 if (cmp2p != NULL && *cmp2p == '\0')
1858 {
1859 /* Add all the characters. */
1860 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
1861 ADDW ((unsigned char) *cmpp);
1862 if (width > 0)
1863 width = avail;
1864 }
1865 else
1866 {
1867 /* The last read character is not part of the number
1868 anymore. */
1869 ungetc (c, s);
1870 break;
1871 }
1872 }
1873 #endif
1874 }
1875 if (width > 0)
1876 --width;
1877 }
1878 while (width != 0 && inchar () != EOF);
1879
1880 /* Have we read any character? If we try to read a number
1881 in hexadecimal notation and we have read only the `0x'
1882 prefix or no exponent this is an error. */
1883 if (wpsize == 0 || (is_hexa && (wpsize == 2 || ! got_e)))
1884 conv_error ();
1885
1886 scan_float:
1887 /* Convert the number. */
1888 ADDW (L_('\0'));
1889 if ((flags & LONGDBL) && !__ldbl_is_dbl)
1890 {
1891 long double d = __strtold_internal (wp, &tw, flags & GROUP);
1892 if (!(flags & SUPPRESS) && tw != wp)
1893 *ARG (long double *) = negative ? -d : d;
1894 }
1895 else if (flags & (LONG | LONGDBL))
1896 {
1897 double d = __strtod_internal (wp, &tw, flags & GROUP);
1898 if (!(flags & SUPPRESS) && tw != wp)
1899 *ARG (double *) = negative ? -d : d;
1900 }
1901 else
1902 {
1903 float d = __strtof_internal (wp, &tw, flags & GROUP);
1904 if (!(flags & SUPPRESS) && tw != wp)
1905 *ARG (float *) = negative ? -d : d;
1906 }
1907
1908 if (tw == wp)
1909 conv_error ();
1910
1911 if (!(flags & SUPPRESS))
1912 ++done;
1913 break;
1914
1915 case L_('['): /* Character class. */
1916 if (flags & LONG)
1917 STRING_ARG (wstr, wchar_t);
1918 else
1919 STRING_ARG (str, char);
1920
1921 if (*f == L_('^'))
1922 {
1923 ++f;
1924 not_in = 1;
1925 }
1926 else
1927 not_in = 0;
1928
1929 if (width < 0)
1930 /* There is no width given so there is also no limit on the
1931 number of characters we read. Therefore we set width to
1932 a very high value to make the algorithm easier. */
1933 width = INT_MAX;
1934
1935 #ifdef COMPILE_WSCANF
1936 /* Find the beginning and the end of the scanlist. We are not
1937 creating a lookup table since it would have to be too large.
1938 Instead we search each time through the string. This is not
1939 a constant lookup time but who uses this feature deserves to
1940 be punished. */
1941 tw = (wchar_t *) f; /* Marks the beginning. */
1942
1943 if (*f == L']')
1944 ++f;
1945
1946 while ((fc = *f++) != L'\0' && fc != L']');
1947
1948 if (fc == L'\0')
1949 conv_error ();
1950 wp = (wchar_t *) f - 1;
1951 #else
1952 /* Fill WP with byte flags indexed by character.
1953 We will use this flag map for matching input characters. */
1954 if (wpmax < UCHAR_MAX + 1)
1955 {
1956 wpmax = UCHAR_MAX + 1;
1957 wp = (char *) alloca (wpmax);
1958 }
1959 memset (wp, '\0', UCHAR_MAX + 1);
1960
1961 fc = *f;
1962 if (fc == ']' || fc == '-')
1963 {
1964 /* If ] or - appears before any char in the set, it is not
1965 the terminator or separator, but the first char in the
1966 set. */
1967 wp[fc] = 1;
1968 ++f;
1969 }
1970
1971 while ((fc = *f++) != '\0' && fc != ']')
1972 if (fc == '-' && *f != '\0' && *f != ']'
1973 && (unsigned char) f[-2] <= (unsigned char) *f)
1974 {
1975 /* Add all characters from the one before the '-'
1976 up to (but not including) the next format char. */
1977 for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
1978 wp[fc] = 1;
1979 }
1980 else
1981 /* Add the character to the flag map. */
1982 wp[fc] = 1;
1983
1984 if (fc == '\0')
1985 conv_error();
1986 #endif
1987
1988 if (flags & LONG)
1989 {
1990 size_t now = read_in;
1991 #ifdef COMPILE_WSCANF
1992 if (inchar () == WEOF)
1993 input_error ();
1994
1995 do
1996 {
1997 wchar_t *runp;
1998
1999 /* Test whether it's in the scanlist. */
2000 runp = tw;
2001 while (runp < wp)
2002 {
2003 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
2004 && runp != tw
2005 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2006 {
2007 /* Match against all characters in between the
2008 first and last character of the sequence. */
2009 wchar_t wc;
2010
2011 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2012 if ((wint_t) wc == c)
2013 break;
2014
2015 if (wc <= runp[1] && !not_in)
2016 break;
2017 if (wc <= runp[1] && not_in)
2018 {
2019 /* The current character is not in the
2020 scanset. */
2021 ungetc (c, s);
2022 goto out;
2023 }
2024
2025 runp += 2;
2026 }
2027 else
2028 {
2029 if ((wint_t) *runp == c && !not_in)
2030 break;
2031 if ((wint_t) *runp == c && not_in)
2032 {
2033 ungetc (c, s);
2034 goto out;
2035 }
2036
2037 ++runp;
2038 }
2039 }
2040
2041 if (runp == wp && !not_in)
2042 {
2043 ungetc (c, s);
2044 goto out;
2045 }
2046
2047 if (!(flags & SUPPRESS))
2048 {
2049 *wstr++ = c;
2050
2051 if ((flags & MALLOC)
2052 && wstr == (wchar_t *) *strptr + strsize)
2053 {
2054 /* Enlarge the buffer. */
2055 wstr = (wchar_t *) realloc (*strptr,
2056 (2 * strsize)
2057 * sizeof (wchar_t));
2058 if (wstr == NULL)
2059 {
2060 /* Can't allocate that much. Last-ditch
2061 effort. */
2062 wstr = (wchar_t *)
2063 realloc (*strptr, (strsize + 1)
2064 * sizeof (wchar_t));
2065 if (wstr == NULL)
2066 {
2067 /* We lose. Oh well. Terminate the string
2068 and stop converting, so at least we don't
2069 skip any input. */
2070 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2071 ++done;
2072 conv_error ();
2073 }
2074 else
2075 {
2076 *strptr = (char *) wstr;
2077 wstr += strsize;
2078 ++strsize;
2079 }
2080 }
2081 else
2082 {
2083 *strptr = (char *) wstr;
2084 wstr += strsize;
2085 strsize *= 2;
2086 }
2087 }
2088 }
2089 }
2090 while (--width > 0 && inchar () != WEOF);
2091 out:
2092 #else
2093 char buf[MB_LEN_MAX];
2094 size_t cnt = 0;
2095 mbstate_t cstate;
2096
2097 if (inchar () == EOF)
2098 input_error ();
2099
2100 memset (&cstate, '\0', sizeof (cstate));
2101
2102 do
2103 {
2104 if (wp[c] == not_in)
2105 {
2106 ungetc_not_eof (c, s);
2107 break;
2108 }
2109
2110 /* This is easy. */
2111 if (!(flags & SUPPRESS))
2112 {
2113 size_t n;
2114
2115 /* Convert it into a wide character. */
2116 buf[0] = c;
2117 n = __mbrtowc (wstr, buf, 1, &cstate);
2118
2119 if (n == (size_t) -2)
2120 {
2121 /* Possibly correct character, just not enough
2122 input. */
2123 ++cnt;
2124 assert (cnt < MB_CUR_MAX);
2125 continue;
2126 }
2127 cnt = 0;
2128
2129 ++wstr;
2130 if ((flags & MALLOC)
2131 && wstr == (wchar_t *) *strptr + strsize)
2132 {
2133 /* Enlarge the buffer. */
2134 wstr = (wchar_t *) realloc (*strptr,
2135 (2 * strsize
2136 * sizeof (wchar_t)));
2137 if (wstr == NULL)
2138 {
2139 /* Can't allocate that much. Last-ditch
2140 effort. */
2141 wstr = (wchar_t *)
2142 realloc (*strptr, ((strsize + 1)
2143 * sizeof (wchar_t)));
2144 if (wstr == NULL)
2145 {
2146 /* We lose. Oh well. Terminate the
2147 string and stop converting,
2148 so at least we don't skip any input. */
2149 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2150 ++done;
2151 conv_error ();
2152 }
2153 else
2154 {
2155 *strptr = (char *) wstr;
2156 wstr += strsize;
2157 ++strsize;
2158 }
2159 }
2160 else
2161 {
2162 *strptr = (char *) wstr;
2163 wstr += strsize;
2164 strsize *= 2;
2165 }
2166 }
2167 }
2168
2169 if (--width <= 0)
2170 break;
2171 }
2172 while (inchar () != EOF);
2173
2174 if (cnt != 0)
2175 /* We stopped in the middle of recognizing another
2176 character. That's a problem. */
2177 encode_error ();
2178 #endif
2179
2180 if (now == read_in)
2181 /* We haven't succesfully read any character. */
2182 conv_error ();
2183
2184 if (!(flags & SUPPRESS))
2185 {
2186 *wstr++ = L'\0';
2187
2188 if ((flags & MALLOC)
2189 && wstr - (wchar_t *) *strptr != strsize)
2190 {
2191 wchar_t *cp = (wchar_t *)
2192 realloc (*strptr, ((wstr - (wchar_t *) *strptr)
2193 * sizeof(wchar_t)));
2194 if (cp != NULL)
2195 *strptr = (char *) cp;
2196 }
2197
2198 ++done;
2199 }
2200 }
2201 else
2202 {
2203 size_t now = read_in;
2204
2205 if (inchar () == EOF)
2206 input_error ();
2207
2208 #ifdef COMPILE_WSCANF
2209
2210 memset (&state, '\0', sizeof (state));
2211
2212 do
2213 {
2214 wchar_t *runp;
2215 size_t n;
2216
2217 /* Test whether it's in the scanlist. */
2218 runp = tw;
2219 while (runp < wp)
2220 {
2221 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
2222 && runp != tw
2223 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2224 {
2225 /* Match against all characters in between the
2226 first and last character of the sequence. */
2227 wchar_t wc;
2228
2229 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2230 if ((wint_t) wc == c)
2231 break;
2232
2233 if (wc <= runp[1] && !not_in)
2234 break;
2235 if (wc <= runp[1] && not_in)
2236 {
2237 /* The current character is not in the
2238 scanset. */
2239 ungetc (c, s);
2240 goto out2;
2241 }
2242
2243 runp += 2;
2244 }
2245 else
2246 {
2247 if ((wint_t) *runp == c && !not_in)
2248 break;
2249 if ((wint_t) *runp == c && not_in)
2250 {
2251 ungetc (c, s);
2252 goto out2;
2253 }
2254
2255 ++runp;
2256 }
2257 }
2258
2259 if (runp == wp && !not_in)
2260 {
2261 ungetc (c, s);
2262 goto out2;
2263 }
2264
2265 if (!(flags & SUPPRESS))
2266 {
2267 if ((flags & MALLOC)
2268 && str + MB_CUR_MAX >= *strptr + strsize)
2269 {
2270 /* Enlarge the buffer. */
2271 size_t strleng = str - *strptr;
2272 char *newstr;
2273
2274 newstr = (char *) realloc (*strptr, 2 * strsize);
2275 if (newstr == NULL)
2276 {
2277 /* Can't allocate that much. Last-ditch
2278 effort. */
2279 newstr = (char *) realloc (*strptr,
2280 strleng + MB_CUR_MAX);
2281 if (newstr == NULL)
2282 {
2283 /* We lose. Oh well. Terminate the string
2284 and stop converting, so at least we don't
2285 skip any input. */
2286 ((char *) (*strptr))[strleng] = '\0';
2287 ++done;
2288 conv_error ();
2289 }
2290 else
2291 {
2292 *strptr = newstr;
2293 str = newstr + strleng;
2294 strsize = strleng + MB_CUR_MAX;
2295 }
2296 }
2297 else
2298 {
2299 *strptr = newstr;
2300 str = newstr + strleng;
2301 strsize *= 2;
2302 }
2303 }
2304 }
2305
2306 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2307 if (n == (size_t) -1)
2308 encode_error ();
2309
2310 assert (n <= MB_CUR_MAX);
2311 str += n;
2312 }
2313 while (--width > 0 && inchar () != WEOF);
2314 out2:
2315 #else
2316 do
2317 {
2318 if (wp[c] == not_in)
2319 {
2320 ungetc_not_eof (c, s);
2321 break;
2322 }
2323
2324 /* This is easy. */
2325 if (!(flags & SUPPRESS))
2326 {
2327 *str++ = c;
2328 if ((flags & MALLOC)
2329 && (char *) str == *strptr + strsize)
2330 {
2331 /* Enlarge the buffer. */
2332 size_t newsize = 2 * strsize;
2333
2334 allocagain:
2335 str = (char *) realloc (*strptr, newsize);
2336 if (str == NULL)
2337 {
2338 /* Can't allocate that much. Last-ditch
2339 effort. */
2340 if (newsize > strsize + 1)
2341 {
2342 newsize = strsize + 1;
2343 goto allocagain;
2344 }
2345 /* We lose. Oh well. Terminate the
2346 string and stop converting,
2347 so at least we don't skip any input. */
2348 ((char *) (*strptr))[strsize - 1] = '\0';
2349 ++done;
2350 conv_error ();
2351 }
2352 else
2353 {
2354 *strptr = (char *) str;
2355 str += strsize;
2356 strsize = newsize;
2357 }
2358 }
2359 }
2360 }
2361 while (--width > 0 && inchar () != EOF);
2362 #endif
2363
2364 if (now == read_in)
2365 /* We haven't succesfully read any character. */
2366 conv_error ();
2367
2368 if (!(flags & SUPPRESS))
2369 {
2370 #ifdef COMPILE_WSCANF
2371 /* We have to emit the code to get into the initial
2372 state. */
2373 char buf[MB_LEN_MAX];
2374 size_t n = __wcrtomb (buf, L'\0', &state);
2375 if (n > 0 && (flags & MALLOC)
2376 && str + n >= *strptr + strsize)
2377 {
2378 /* Enlarge the buffer. */
2379 size_t strleng = str - *strptr;
2380 char *newstr;
2381
2382 newstr = (char *) realloc (*strptr, strleng + n + 1);
2383 if (newstr == NULL)
2384 {
2385 /* We lose. Oh well. Terminate the string
2386 and stop converting, so at least we don't
2387 skip any input. */
2388 ((char *) (*strptr))[strleng] = '\0';
2389 ++done;
2390 conv_error ();
2391 }
2392 else
2393 {
2394 *strptr = newstr;
2395 str = newstr + strleng;
2396 strsize = strleng + n + 1;
2397 }
2398 }
2399
2400 str = __mempcpy (str, buf, n);
2401 #endif
2402 *str++ = '\0';
2403
2404 if ((flags & MALLOC) && str - *strptr != strsize)
2405 {
2406 char *cp = (char *) realloc (*strptr, str - *strptr);
2407 if (cp != NULL)
2408 *strptr = cp;
2409 }
2410
2411 ++done;
2412 }
2413 }
2414 break;
2415
2416 case L_('p'): /* Generic pointer. */
2417 base = 16;
2418 /* A PTR must be the same size as a `long int'. */
2419 flags &= ~(SHORT|LONGDBL);
2420 if (need_long)
2421 flags |= LONG;
2422 number_signed = 0;
2423 read_pointer = 1;
2424 goto number;
2425
2426 default:
2427 /* If this is an unknown format character punt. */
2428 conv_error ();
2429 }
2430 }
2431
2432 /* The last thing we saw int the format string was a white space.
2433 Consume the last white spaces. */
2434 if (skip_space)
2435 {
2436 do
2437 c = inchar ();
2438 while (ISSPACE (c));
2439 ungetc (c, s);
2440 }
2441
2442 errout:
2443 /* Unlock stream. */
2444 UNLOCK_STREAM (s);
2445
2446 if (errp != NULL)
2447 *errp |= errval;
2448
2449 return done;
2450 }
2451
2452 #ifdef COMPILE_WSCANF
2453 int
2454 __vfwscanf (FILE *s, const wchar_t *format, va_list argptr)
2455 {
2456 return _IO_vfwscanf (s, format, argptr, NULL);
2457 }
2458 ldbl_weak_alias (__vfwscanf, vfwscanf)
2459 #else
2460 int
2461 ___vfscanf (FILE *s, const char *format, va_list argptr)
2462 {
2463 return _IO_vfscanf_internal (s, format, argptr, NULL);
2464 }
2465 ldbl_strong_alias (_IO_vfscanf_internal, _IO_vfscanf)
2466 ldbl_strong_alias (___vfscanf, __vfscanf)
2467 ldbl_hidden_def (___vfscanf, __vfscanf)
2468 ldbl_weak_alias (___vfscanf, vfscanf)
2469 #endif