]> git.ipfire.org Git - thirdparty/glibc.git/blame - stdio-common/vfscanf-internal.c
malloc: set NON_MAIN_ARENA flag for reclaimed memalign chunk (BZ #30101)
[thirdparty/glibc.git] / stdio-common / vfscanf-internal.c
CommitLineData
349718d4 1/* Internal functions for the *scanf* implementation.
6d7e8eda 2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
349718d4
ZW
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
349718d4
ZW
18
19#include <assert.h>
20#include <errno.h>
21#include <limits.h>
22#include <ctype.h>
23#include <stdarg.h>
24#include <stdbool.h>
25#include <stdio.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29#include <wchar.h>
30#include <wctype.h>
31#include <libc-diag.h>
32#include <libc-lock.h>
33#include <locale/localeinfo.h>
34#include <scratch_buffer.h>
35
36#ifdef __GNUC__
37# define HAVE_LONGLONG
38# define LONGLONG long long
39#else
40# define LONGLONG long
41#endif
42
43/* Determine whether we have to handle `long long' at all. */
44#if LONG_MAX == LONG_LONG_MAX
45# define need_longlong 0
46#else
47# define need_longlong 1
48#endif
49
50/* Determine whether we have to handle `long'. */
51#if INT_MAX == LONG_MAX
52# define need_long 0
53#else
54# define need_long 1
55#endif
56
57/* Those are flags in the conversion format. */
58#define LONG 0x0001 /* l: long or double */
59#define LONGDBL 0x0002 /* L: long long or long double */
60#define SHORT 0x0004 /* h: short */
61#define SUPPRESS 0x0008 /* *: suppress assignment */
62#define POINTER 0x0010 /* weird %p pointer (`fake hex') */
63#define NOSKIP 0x0020 /* do not skip blanks */
64#define NUMBER_SIGNED 0x0040 /* signed integer */
65#define GROUP 0x0080 /* ': group numbers */
66#define GNU_MALLOC 0x0100 /* a: malloc strings */
67#define CHAR 0x0200 /* hh: char */
68#define I18N 0x0400 /* I: use locale's digits */
69#define HEXA_FLOAT 0x0800 /* hexadecimal float */
70#define READ_POINTER 0x1000 /* this is a pointer value */
71#define POSIX_MALLOC 0x2000 /* m: malloc strings */
72#define MALLOC (GNU_MALLOC | POSIX_MALLOC)
73
74#include <locale/localeinfo.h>
75#include <libioP.h>
76
77#ifdef COMPILE_WSCANF
78# define ungetc(c, s) ((void) (c == WEOF \
79 || (--read_in, \
80 _IO_sputbackwc (s, c))))
81# define ungetc_not_eof(c, s) ((void) (--read_in, \
82 _IO_sputbackwc (s, c)))
83# define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
84 : ((c = _IO_getwc_unlocked (s)), \
85 (void) (c != WEOF \
86 ? ++read_in \
87 : (size_t) (inchar_errno = errno)), c))
88
89# define ISSPACE(Ch) iswspace (Ch)
90# define ISDIGIT(Ch) iswdigit (Ch)
91# define ISXDIGIT(Ch) iswxdigit (Ch)
92# define TOLOWER(Ch) towlower (Ch)
93# define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
94# define __strtoll_internal __wcstoll_internal
95# define __strtoull_internal __wcstoull_internal
96# define __strtol_internal __wcstol_internal
97# define __strtoul_internal __wcstoul_internal
98# define __strtold_internal __wcstold_internal
99# define __strtod_internal __wcstod_internal
100# define __strtof_internal __wcstof_internal
10446f5d
GG
101# if __HAVE_FLOAT128_UNLIKE_LDBL
102# define __strtof128_internal __wcstof128_internal
103# endif
349718d4
ZW
104
105# define L_(Str) L##Str
106# define CHAR_T wchar_t
107# define UCHAR_T unsigned int
108# define WINT_T wint_t
109# undef EOF
110# define EOF WEOF
111#else
112# define ungetc(c, s) ((void) ((int) c == EOF \
113 || (--read_in, \
114 _IO_sputbackc (s, (unsigned char) c))))
115# define ungetc_not_eof(c, s) ((void) (--read_in, \
116 _IO_sputbackc (s, (unsigned char) c)))
117# define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
118 : ((c = _IO_getc_unlocked (s)), \
119 (void) (c != EOF \
120 ? ++read_in \
121 : (size_t) (inchar_errno = errno)), c))
122# define ISSPACE(Ch) __isspace_l (Ch, loc)
123# define ISDIGIT(Ch) __isdigit_l (Ch, loc)
124# define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
125# define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc)
126# define ORIENT if (_IO_vtable_offset (s) == 0 \
127 && _IO_fwide (s, -1) != -1) \
128 return EOF
129
130# define L_(Str) Str
131# define CHAR_T char
132# define UCHAR_T unsigned char
133# define WINT_T int
134#endif
135
136#include "printf-parse.h" /* Use read_int. */
137
138#define encode_error() do { \
139 __set_errno (EILSEQ); \
140 goto errout; \
141 } while (0)
142#define conv_error() do { \
143 goto errout; \
144 } while (0)
145#define input_error() do { \
146 if (done == 0) done = EOF; \
147 goto errout; \
148 } while (0)
149#define add_ptr_to_free(ptr) \
150 do \
151 { \
152 if (ptrs_to_free == NULL \
153 || ptrs_to_free->count == (sizeof (ptrs_to_free->ptrs) \
154 / sizeof (ptrs_to_free->ptrs[0]))) \
155 { \
156 struct ptrs_to_free *new_ptrs = alloca (sizeof (*ptrs_to_free)); \
157 new_ptrs->count = 0; \
158 new_ptrs->next = ptrs_to_free; \
159 ptrs_to_free = new_ptrs; \
160 } \
161 ptrs_to_free->ptrs[ptrs_to_free->count++] = (ptr); \
162 } \
163 while (0)
164#define ARGCHECK(s, format) \
165 do \
166 { \
167 /* Check file argument for consistence. */ \
168 CHECK_FILE (s, EOF); \
169 if (s->_flags & _IO_NO_READS) \
170 { \
171 __set_errno (EBADF); \
172 return EOF; \
173 } \
174 else if (format == NULL) \
175 { \
176 __set_errno (EINVAL); \
177 return EOF; \
178 } \
179 } while (0)
180#define LOCK_STREAM(S) \
181 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
182 _IO_flockfile (S)
183#define UNLOCK_STREAM(S) \
184 _IO_funlockfile (S); \
185 __libc_cleanup_region_end (0)
186
187struct ptrs_to_free
188{
189 size_t count;
190 struct ptrs_to_free *next;
191 char **ptrs[32];
192};
193
194struct char_buffer {
195 CHAR_T *current;
196 CHAR_T *end;
197 struct scratch_buffer scratch;
198};
199
200/* Returns a pointer to the first CHAR_T object in the buffer. Only
201 valid if char_buffer_add (BUFFER, CH) has been called and
202 char_buffer_error (BUFFER) is false. */
203static inline CHAR_T *
204char_buffer_start (const struct char_buffer *buffer)
205{
206 return (CHAR_T *) buffer->scratch.data;
207}
208
209/* Returns the number of CHAR_T objects in the buffer. Only valid if
210 char_buffer_error (BUFFER) is false. */
211static inline size_t
212char_buffer_size (const struct char_buffer *buffer)
213{
214 return buffer->current - char_buffer_start (buffer);
215}
216
217/* Reinitializes BUFFER->current and BUFFER->end to cover the entire
218 scratch buffer. */
219static inline void
220char_buffer_rewind (struct char_buffer *buffer)
221{
222 buffer->current = char_buffer_start (buffer);
223 buffer->end = buffer->current + buffer->scratch.length / sizeof (CHAR_T);
224}
225
226/* Returns true if a previous call to char_buffer_add (BUFFER, CH)
227 failed. */
228static inline bool
229char_buffer_error (const struct char_buffer *buffer)
230{
231 return __glibc_unlikely (buffer->current == NULL);
232}
233
234/* Slow path for char_buffer_add. */
235static void
236char_buffer_add_slow (struct char_buffer *buffer, CHAR_T ch)
237{
238 if (char_buffer_error (buffer))
239 return;
240 size_t offset = buffer->end - (CHAR_T *) buffer->scratch.data;
241 if (!scratch_buffer_grow_preserve (&buffer->scratch))
242 {
243 buffer->current = NULL;
244 buffer->end = NULL;
245 return;
246 }
247 char_buffer_rewind (buffer);
248 buffer->current += offset;
249 *buffer->current++ = ch;
250}
251
252/* Adds CH to BUFFER. This function does not report any errors, check
253 for them with char_buffer_error. */
254static inline void
255char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
256 __attribute__ ((always_inline));
257static inline void
258char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
259{
260 if (__glibc_unlikely (buffer->current == buffer->end))
261 char_buffer_add_slow (buffer, ch);
262 else
263 *buffer->current++ = ch;
264}
265
266/* Read formatted input from S according to the format string
267 FORMAT, using the argument list in ARG.
268 Return the number of assignments made, or -1 for an input error. */
269#ifdef COMPILE_WSCANF
270int
271__vfwscanf_internal (FILE *s, const wchar_t *format, va_list argptr,
272 unsigned int mode_flags)
273#else
274int
275__vfscanf_internal (FILE *s, const char *format, va_list argptr,
276 unsigned int mode_flags)
277#endif
278{
279 va_list arg;
c0e9ddf5 280 const UCHAR_T *f = (const UCHAR_T *) format;
349718d4
ZW
281 UCHAR_T fc; /* Current character of the format. */
282 WINT_T done = 0; /* Assignments done. */
283 size_t read_in = 0; /* Chars read in. */
284 WINT_T c = 0; /* Last char read. */
285 int width; /* Maximum field width. */
286 int flags; /* Modifiers for current format element. */
287#ifndef COMPILE_WSCANF
288 locale_t loc = _NL_CURRENT_LOCALE;
289 struct __locale_data *const curctype = loc->__locales[LC_CTYPE];
290#endif
291
292 /* Errno of last failed inchar call. */
293 int inchar_errno = 0;
294 /* Status for reading F-P nums. */
295 char got_digit, got_dot, got_e, got_sign;
296 /* If a [...] is a [^...]. */
297 CHAR_T not_in;
298#define exp_char not_in
299 /* Base for integral numbers. */
300 int base;
301 /* Decimal point character. */
302#ifdef COMPILE_WSCANF
303 wint_t decimal;
304#else
305 const char *decimal;
306#endif
307 /* The thousands character of the current locale. */
308#ifdef COMPILE_WSCANF
309 wint_t thousands;
310#else
311 const char *thousands;
312#endif
313 struct ptrs_to_free *ptrs_to_free = NULL;
314 /* State for the conversions. */
315 mbstate_t state;
316 /* Integral holding variables. */
317 union
318 {
319 long long int q;
320 unsigned long long int uq;
321 long int l;
322 unsigned long int ul;
323 } num;
324 /* Character-buffer pointer. */
325 char *str = NULL;
326 wchar_t *wstr = NULL;
327 char **strptr = NULL;
328 ssize_t strsize = 0;
329 /* We must not react on white spaces immediately because they can
330 possibly be matched even if in the input stream no character is
331 available anymore. */
332 int skip_space = 0;
333 /* Workspace. */
334 CHAR_T *tw; /* Temporary pointer. */
335 struct char_buffer charbuf;
336 scratch_buffer_init (&charbuf.scratch);
337
349718d4
ZW
338#ifdef __va_copy
339 __va_copy (arg, argptr);
340#else
341 arg = (va_list) argptr;
342#endif
343
344#ifdef ORIENT
345 ORIENT;
346#endif
347
348 ARGCHECK (s, format);
349
350 {
351#ifndef COMPILE_WSCANF
352 struct __locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
353#endif
354
355 /* Figure out the decimal point character. */
356#ifdef COMPILE_WSCANF
357 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
358#else
359 decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
360#endif
361 /* Figure out the thousands separator character. */
362#ifdef COMPILE_WSCANF
363 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
364#else
365 thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
366 if (*thousands == '\0')
367 thousands = NULL;
368#endif
369 }
370
371 /* Lock the stream. */
372 LOCK_STREAM (s);
373
374
375#ifndef COMPILE_WSCANF
376 /* From now on we use `state' to convert the format string. */
377 memset (&state, '\0', sizeof (state));
378#endif
379
380 /* Run through the format string. */
381 while (*f != '\0')
382 {
383 unsigned int argpos;
384 /* Extract the next argument, which is of type TYPE.
385 For a %N$... spec, this is the Nth argument from the beginning;
386 otherwise it is the next argument after the state now in ARG. */
387#ifdef __va_copy
c5f65462
JM
388# define ARG(type) (argpos == 0 ? va_arg (arg, type) \
389 : ({ unsigned int pos = argpos; \
390 va_list arg; \
391 __va_copy (arg, argptr); \
392 while (--pos > 0) \
393 (void) va_arg (arg, void *); \
394 va_arg (arg, type); \
395 }))
349718d4
ZW
396#else
397# if 0
398 /* XXX Possible optimization. */
c5f65462
JM
399# define ARG(type) (argpos == 0 ? va_arg (arg, type) \
400 : ({ va_list arg = (va_list) argptr; \
401 arg = (va_list) ((char *) arg \
402 + (argpos - 1) \
403 * __va_rounded_size (void *)); \
404 va_arg (arg, type); \
405 }))
349718d4 406# else
c5f65462
JM
407# define ARG(type) (argpos == 0 ? va_arg (arg, type) \
408 : ({ unsigned int pos = argpos; \
409 va_list arg = (va_list) argptr; \
410 while (--pos > 0) \
411 (void) va_arg (arg, void *); \
412 va_arg (arg, type); \
413 }))
349718d4
ZW
414# endif
415#endif
416
417#ifndef COMPILE_WSCANF
c0e9ddf5 418 if (!isascii (*f))
349718d4
ZW
419 {
420 /* Non-ASCII, may be a multibyte. */
c0e9ddf5
AS
421 int len = __mbrlen ((const char *) f, strlen ((const char *) f),
422 &state);
349718d4
ZW
423 if (len > 0)
424 {
425 do
426 {
427 c = inchar ();
428 if (__glibc_unlikely (c == EOF))
429 input_error ();
c0e9ddf5 430 else if (c != *f++)
349718d4
ZW
431 {
432 ungetc_not_eof (c, s);
433 conv_error ();
434 }
435 }
436 while (--len > 0);
437 continue;
438 }
439 }
440#endif
441
442 fc = *f++;
443 if (fc != '%')
444 {
445 /* Remember to skip spaces. */
446 if (ISSPACE (fc))
447 {
448 skip_space = 1;
449 continue;
450 }
451
452 /* Read a character. */
453 c = inchar ();
454
455 /* Characters other than format specs must just match. */
456 if (__glibc_unlikely (c == EOF))
457 input_error ();
458
459 /* We saw white space char as the last character in the format
460 string. Now it's time to skip all leading white space. */
461 if (skip_space)
462 {
463 while (ISSPACE (c))
464 if (__glibc_unlikely (inchar () == EOF))
465 input_error ();
466 skip_space = 0;
467 }
468
469 if (__glibc_unlikely (c != fc))
470 {
471 ungetc (c, s);
472 conv_error ();
473 }
474
475 continue;
476 }
477
478 /* This is the start of the conversion string. */
479 flags = 0;
480
481 /* Initialize state of modifiers. */
482 argpos = 0;
483
484 /* Prepare temporary buffer. */
485 char_buffer_rewind (&charbuf);
486
487 /* Check for a positional parameter specification. */
c0e9ddf5 488 if (ISDIGIT (*f))
349718d4 489 {
c0e9ddf5 490 argpos = read_int (&f);
349718d4
ZW
491 if (*f == L_('$'))
492 ++f;
493 else
494 {
495 /* Oops; that was actually the field width. */
496 width = argpos;
497 argpos = 0;
498 goto got_width;
499 }
500 }
501
502 /* Check for the assignment-suppressing, the number grouping flag,
503 and the signal to use the locale's digit representation. */
504 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
505 switch (*f++)
506 {
507 case L_('*'):
508 flags |= SUPPRESS;
509 break;
510 case L_('\''):
511#ifdef COMPILE_WSCANF
512 if (thousands != L'\0')
513#else
514 if (thousands != NULL)
515#endif
516 flags |= GROUP;
517 break;
518 case L_('I'):
519 flags |= I18N;
520 break;
521 }
522
523 /* Find the maximum field width. */
524 width = 0;
c0e9ddf5
AS
525 if (ISDIGIT (*f))
526 width = read_int (&f);
349718d4
ZW
527 got_width:
528 if (width == 0)
529 width = -1;
530
531 /* Check for type modifiers. */
532 switch (*f++)
533 {
534 case L_('h'):
535 /* ints are short ints or chars. */
536 if (*f == L_('h'))
537 {
538 ++f;
539 flags |= CHAR;
540 }
541 else
542 flags |= SHORT;
543 break;
544 case L_('l'):
545 if (*f == L_('l'))
546 {
547 /* A double `l' is equivalent to an `L'. */
548 ++f;
549 flags |= LONGDBL | LONG;
550 }
551 else
552 /* ints are long ints. */
553 flags |= LONG;
554 break;
555 case L_('q'):
556 case L_('L'):
557 /* doubles are long doubles, and ints are long long ints. */
558 flags |= LONGDBL | LONG;
559 break;
560 case L_('a'):
561 /* The `a' is used as a flag only if followed by `s', `S' or
562 `['. */
563 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
564 {
565 --f;
566 break;
567 }
568 /* In __isoc99_*scanf %as, %aS and %a[ extension is not
569 supported at all. */
570 if (__glibc_likely ((mode_flags & SCANF_ISOC99_A) != 0))
571 {
572 --f;
573 break;
574 }
575 /* String conversions (%s, %[) take a `char **'
576 arg and fill it in with a malloc'd pointer. */
577 flags |= GNU_MALLOC;
578 break;
579 case L_('m'):
580 flags |= POSIX_MALLOC;
581 if (*f == L_('l'))
582 {
583 ++f;
584 flags |= LONG;
585 }
586 break;
587 case L_('z'):
588 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
589 flags |= LONGDBL;
590 else if (sizeof (size_t) > sizeof (unsigned int))
591 flags |= LONG;
592 break;
593 case L_('j'):
594 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
595 flags |= LONGDBL;
596 else if (sizeof (uintmax_t) > sizeof (unsigned int))
597 flags |= LONG;
598 break;
599 case L_('t'):
600 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
601 flags |= LONGDBL;
602 else if (sizeof (ptrdiff_t) > sizeof (int))
603 flags |= LONG;
604 break;
605 default:
606 /* Not a recognized modifier. Backup. */
607 --f;
608 break;
609 }
610
611 /* End of the format string? */
612 if (__glibc_unlikely (*f == L_('\0')))
613 conv_error ();
614
615 /* Find the conversion specifier. */
616 fc = *f++;
617 if (skip_space || (fc != L_('[') && fc != L_('c')
618 && fc != L_('C') && fc != L_('n')))
619 {
620 /* Eat whitespace. */
621 int save_errno = errno;
622 __set_errno (0);
623 do
624 /* We add the additional test for EOF here since otherwise
625 inchar will restore the old errno value which might be
626 EINTR but does not indicate an interrupt since nothing
627 was read at this time. */
628 if (__builtin_expect ((c == EOF || inchar () == EOF)
629 && errno == EINTR, 0))
630 input_error ();
631 while (ISSPACE (c));
632 __set_errno (save_errno);
633 ungetc (c, s);
634 skip_space = 0;
635 }
636
637 switch (fc)
638 {
639 case L_('%'): /* Must match a literal '%'. */
640 c = inchar ();
641 if (__glibc_unlikely (c == EOF))
642 input_error ();
643 if (__glibc_unlikely (c != fc))
644 {
645 ungetc_not_eof (c, s);
646 conv_error ();
647 }
648 break;
649
650 case L_('n'): /* Answer number of assignments done. */
651 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
652 with the 'n' conversion specifier. */
653 if (!(flags & SUPPRESS))
654 {
655 /* Don't count the read-ahead. */
656 if (need_longlong && (flags & LONGDBL))
657 *ARG (long long int *) = read_in;
658 else if (need_long && (flags & LONG))
659 *ARG (long int *) = read_in;
660 else if (flags & SHORT)
661 *ARG (short int *) = read_in;
662 else if (!(flags & CHAR))
663 *ARG (int *) = read_in;
664 else
665 *ARG (char *) = read_in;
666
667#ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
668 /* We have a severe problem here. The ISO C standard
669 contradicts itself in explaining the effect of the %n
670 format in `scanf'. While in ISO C:1990 and the ISO C
671 Amendement 1:1995 the result is described as
672
673 Execution of a %n directive does not effect the
674 assignment count returned at the completion of
675 execution of the f(w)scanf function.
676
677 in ISO C Corrigendum 1:1994 the following was added:
678
679 Subclause 7.9.6.2
680 Add the following fourth example:
681 In:
682 #include <stdio.h>
683 int d1, d2, n1, n2, i;
684 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
685 the value 123 is assigned to d1 and the value3 to n1.
686 Because %n can never get an input failure the value
687 of 3 is also assigned to n2. The value of d2 is not
688 affected. The value 3 is assigned to i.
689
690 We go for now with the historically correct code from ISO C,
691 i.e., we don't count the %n assignments. When it ever
692 should proof to be wrong just remove the #ifdef above. */
693 ++done;
694#endif
695 }
696 break;
697
698 case L_('c'): /* Match characters. */
699 if ((flags & LONG) == 0)
700 {
701 if (width == -1)
702 width = 1;
703
704#define STRING_ARG(Str, Type, Width) \
705 do if (!(flags & SUPPRESS)) \
706 { \
707 if (flags & MALLOC) \
708 { \
709 /* The string is to be stored in a malloc'd buffer. */ \
710 /* For %mS using char ** is actually wrong, but \
711 shouldn't make a difference on any arch glibc \
712 supports and would unnecessarily complicate \
713 things. */ \
714 strptr = ARG (char **); \
715 if (strptr == NULL) \
716 conv_error (); \
717 /* Allocate an initial buffer. */ \
718 strsize = Width; \
719 *strptr = (char *) malloc (strsize * sizeof (Type)); \
720 Str = (Type *) *strptr; \
721 if (Str != NULL) \
722 add_ptr_to_free (strptr); \
723 else if (flags & POSIX_MALLOC) \
724 { \
725 done = EOF; \
726 goto errout; \
727 } \
728 } \
729 else \
730 Str = ARG (Type *); \
731 if (Str == NULL) \
732 conv_error (); \
733 } while (0)
734#ifdef COMPILE_WSCANF
735 STRING_ARG (str, char, 100);
736#else
737 STRING_ARG (str, char, (width > 1024 ? 1024 : width));
738#endif
739
740 c = inchar ();
741 if (__glibc_unlikely (c == EOF))
742 input_error ();
743
744#ifdef COMPILE_WSCANF
745 /* We have to convert the wide character(s) into multibyte
746 characters and store the result. */
747 memset (&state, '\0', sizeof (state));
748
749 do
750 {
751 size_t n;
752
753 if (!(flags & SUPPRESS) && (flags & POSIX_MALLOC)
754 && *strptr + strsize - str <= MB_LEN_MAX)
755 {
756 /* We have to enlarge the buffer if the `m' flag
757 was given. */
758 size_t strleng = str - *strptr;
759 char *newstr;
760
761 newstr = (char *) realloc (*strptr, strsize * 2);
762 if (newstr == NULL)
763 {
764 /* Can't allocate that much. Last-ditch effort. */
765 newstr = (char *) realloc (*strptr,
766 strleng + MB_LEN_MAX);
767 if (newstr == NULL)
768 {
769 /* c can't have `a' flag, only `m'. */
770 done = EOF;
771 goto errout;
772 }
773 else
774 {
775 *strptr = newstr;
776 str = newstr + strleng;
777 strsize = strleng + MB_LEN_MAX;
778 }
779 }
780 else
781 {
782 *strptr = newstr;
783 str = newstr + strleng;
784 strsize *= 2;
785 }
786 }
787
788 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
789 if (__glibc_unlikely (n == (size_t) -1))
790 /* No valid wide character. */
791 input_error ();
792
793 /* Increment the output pointer. Even if we don't
794 write anything. */
795 str += n;
796 }
797 while (--width > 0 && inchar () != EOF);
798#else
799 if (!(flags & SUPPRESS))
800 {
801 do
802 {
803 if ((flags & MALLOC)
804 && (char *) str == *strptr + strsize)
805 {
806 /* Enlarge the buffer. */
807 size_t newsize
808 = strsize
809 + (strsize >= width ? width - 1 : strsize);
810
811 str = (char *) realloc (*strptr, newsize);
812 if (str == NULL)
813 {
814 /* Can't allocate that much. Last-ditch
815 effort. */
816 str = (char *) realloc (*strptr, strsize + 1);
817 if (str == NULL)
818 {
819 /* c can't have `a' flag, only `m'. */
820 done = EOF;
821 goto errout;
822 }
823 else
824 {
825 *strptr = (char *) str;
826 str += strsize;
827 ++strsize;
828 }
829 }
830 else
831 {
832 *strptr = (char *) str;
833 str += strsize;
834 strsize = newsize;
835 }
836 }
837 *str++ = c;
838 }
839 while (--width > 0 && inchar () != EOF);
840 }
841 else
842 while (--width > 0 && inchar () != EOF);
843#endif
844
845 if (!(flags & SUPPRESS))
846 {
847 if ((flags & MALLOC) && str - *strptr != strsize)
848 {
849 char *cp = (char *) realloc (*strptr, str - *strptr);
850 if (cp != NULL)
851 *strptr = cp;
852 }
853 strptr = NULL;
854 ++done;
855 }
856
857 break;
858 }
859 /* FALLTHROUGH */
860 case L_('C'):
861 if (width == -1)
862 width = 1;
863
864 STRING_ARG (wstr, wchar_t, (width > 1024 ? 1024 : width));
865
866 c = inchar ();
867 if (__glibc_unlikely (c == EOF))
868 input_error ();
869
870#ifdef COMPILE_WSCANF
871 /* Just store the incoming wide characters. */
872 if (!(flags & SUPPRESS))
873 {
874 do
875 {
876 if ((flags & MALLOC)
877 && wstr == (wchar_t *) *strptr + strsize)
878 {
879 size_t newsize
880 = strsize + (strsize > width ? width - 1 : strsize);
881 /* Enlarge the buffer. */
882 wstr = (wchar_t *) realloc (*strptr,
883 newsize * sizeof (wchar_t));
884 if (wstr == NULL)
885 {
886 /* Can't allocate that much. Last-ditch effort. */
887 wstr = (wchar_t *) realloc (*strptr,
888 (strsize + 1)
889 * sizeof (wchar_t));
890 if (wstr == NULL)
891 {
892 /* C or lc can't have `a' flag, only `m'
893 flag. */
894 done = EOF;
895 goto errout;
896 }
897 else
898 {
899 *strptr = (char *) wstr;
900 wstr += strsize;
901 ++strsize;
902 }
903 }
904 else
905 {
906 *strptr = (char *) wstr;
907 wstr += strsize;
908 strsize = newsize;
909 }
910 }
911 *wstr++ = c;
912 }
913 while (--width > 0 && inchar () != EOF);
914 }
915 else
916 while (--width > 0 && inchar () != EOF);
917#else
918 {
919 /* We have to convert the multibyte input sequence to wide
920 characters. */
921 char buf[1];
922 mbstate_t cstate;
923
924 memset (&cstate, '\0', sizeof (cstate));
925
926 do
927 {
928 /* This is what we present the mbrtowc function first. */
929 buf[0] = c;
930
931 if (!(flags & SUPPRESS) && (flags & MALLOC)
932 && wstr == (wchar_t *) *strptr + strsize)
933 {
934 size_t newsize
935 = strsize + (strsize > width ? width - 1 : strsize);
936 /* Enlarge the buffer. */
937 wstr = (wchar_t *) realloc (*strptr,
938 newsize * sizeof (wchar_t));
939 if (wstr == NULL)
940 {
941 /* Can't allocate that much. Last-ditch effort. */
942 wstr = (wchar_t *) realloc (*strptr,
943 ((strsize + 1)
944 * sizeof (wchar_t)));
945 if (wstr == NULL)
946 {
947 /* C or lc can't have `a' flag, only `m' flag. */
948 done = EOF;
949 goto errout;
950 }
951 else
952 {
953 *strptr = (char *) wstr;
954 wstr += strsize;
955 ++strsize;
956 }
957 }
958 else
959 {
960 *strptr = (char *) wstr;
961 wstr += strsize;
962 strsize = newsize;
963 }
964 }
965
966 while (1)
967 {
968 size_t n;
969
970 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
971 buf, 1, &cstate);
972
973 if (n == (size_t) -2)
974 {
975 /* Possibly correct character, just not enough
976 input. */
977 if (__glibc_unlikely (inchar () == EOF))
978 encode_error ();
979
980 buf[0] = c;
981 continue;
982 }
983
984 if (__glibc_unlikely (n != 1))
985 encode_error ();
986
987 /* We have a match. */
988 break;
989 }
990
991 /* Advance the result pointer. */
992 ++wstr;
993 }
994 while (--width > 0 && inchar () != EOF);
995 }
996#endif
997
998 if (!(flags & SUPPRESS))
999 {
1000 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1001 {
1002 wchar_t *cp = (wchar_t *) realloc (*strptr,
1003 ((wstr
1004 - (wchar_t *) *strptr)
1005 * sizeof (wchar_t)));
1006 if (cp != NULL)
1007 *strptr = (char *) cp;
1008 }
1009 strptr = NULL;
1010
1011 ++done;
1012 }
1013
1014 break;
1015
1016 case L_('s'): /* Read a string. */
1017 if (!(flags & LONG))
1018 {
1019 STRING_ARG (str, char, 100);
1020
1021 c = inchar ();
1022 if (__glibc_unlikely (c == EOF))
1023 input_error ();
1024
1025#ifdef COMPILE_WSCANF
1026 memset (&state, '\0', sizeof (state));
1027#endif
1028
1029 do
1030 {
1031 if (ISSPACE (c))
1032 {
1033 ungetc_not_eof (c, s);
1034 break;
1035 }
1036
1037#ifdef COMPILE_WSCANF
1038 /* This is quite complicated. We have to convert the
1039 wide characters into multibyte characters and then
1040 store them. */
1041 {
1042 size_t n;
1043
1044 if (!(flags & SUPPRESS) && (flags & MALLOC)
1045 && *strptr + strsize - str <= MB_LEN_MAX)
1046 {
1047 /* We have to enlarge the buffer if the `a' or `m'
1048 flag was given. */
1049 size_t strleng = str - *strptr;
1050 char *newstr;
1051
1052 newstr = (char *) realloc (*strptr, strsize * 2);
1053 if (newstr == NULL)
1054 {
1055 /* Can't allocate that much. Last-ditch
1056 effort. */
1057 newstr = (char *) realloc (*strptr,
1058 strleng + MB_LEN_MAX);
1059 if (newstr == NULL)
1060 {
1061 if (flags & POSIX_MALLOC)
1062 {
1063 done = EOF;
1064 goto errout;
1065 }
1066 /* We lose. Oh well. Terminate the
1067 string and stop converting,
1068 so at least we don't skip any input. */
1069 ((char *) (*strptr))[strleng] = '\0';
1070 strptr = NULL;
1071 ++done;
1072 conv_error ();
1073 }
1074 else
1075 {
1076 *strptr = newstr;
1077 str = newstr + strleng;
1078 strsize = strleng + MB_LEN_MAX;
1079 }
1080 }
1081 else
1082 {
1083 *strptr = newstr;
1084 str = newstr + strleng;
1085 strsize *= 2;
1086 }
1087 }
1088
1089 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
1090 &state);
1091 if (__glibc_unlikely (n == (size_t) -1))
1092 encode_error ();
1093
1094 assert (n <= MB_LEN_MAX);
1095 str += n;
1096 }
1097#else
1098 /* This is easy. */
1099 if (!(flags & SUPPRESS))
1100 {
1101 *str++ = c;
1102 if ((flags & MALLOC)
1103 && (char *) str == *strptr + strsize)
1104 {
1105 /* Enlarge the buffer. */
1106 str = (char *) realloc (*strptr, 2 * strsize);
1107 if (str == NULL)
1108 {
1109 /* Can't allocate that much. Last-ditch
1110 effort. */
1111 str = (char *) realloc (*strptr, strsize + 1);
1112 if (str == NULL)
1113 {
1114 if (flags & POSIX_MALLOC)
1115 {
1116 done = EOF;
1117 goto errout;
1118 }
1119 /* We lose. Oh well. Terminate the
1120 string and stop converting,
1121 so at least we don't skip any input. */
1122 ((char *) (*strptr))[strsize - 1] = '\0';
1123 strptr = NULL;
1124 ++done;
1125 conv_error ();
1126 }
1127 else
1128 {
1129 *strptr = (char *) str;
1130 str += strsize;
1131 ++strsize;
1132 }
1133 }
1134 else
1135 {
1136 *strptr = (char *) str;
1137 str += strsize;
1138 strsize *= 2;
1139 }
1140 }
1141 }
1142#endif
1143 }
1144 while ((width <= 0 || --width > 0) && inchar () != EOF);
1145
1146 if (!(flags & SUPPRESS))
1147 {
1148#ifdef COMPILE_WSCANF
1149 /* We have to emit the code to get into the initial
1150 state. */
1151 char buf[MB_LEN_MAX];
1152 size_t n = __wcrtomb (buf, L'\0', &state);
1153 if (n > 0 && (flags & MALLOC)
1154 && str + n >= *strptr + strsize)
1155 {
1156 /* Enlarge the buffer. */
1157 size_t strleng = str - *strptr;
1158 char *newstr;
1159
1160 newstr = (char *) realloc (*strptr, strleng + n + 1);
1161 if (newstr == NULL)
1162 {
1163 if (flags & POSIX_MALLOC)
1164 {
1165 done = EOF;
1166 goto errout;
1167 }
1168 /* We lose. Oh well. Terminate the string
1169 and stop converting, so at least we don't
1170 skip any input. */
1171 ((char *) (*strptr))[strleng] = '\0';
1172 strptr = NULL;
1173 ++done;
1174 conv_error ();
1175 }
1176 else
1177 {
1178 *strptr = newstr;
1179 str = newstr + strleng;
1180 strsize = strleng + n + 1;
1181 }
1182 }
1183
1184 str = __mempcpy (str, buf, n);
1185#endif
1186 *str++ = '\0';
1187
1188 if ((flags & MALLOC) && str - *strptr != strsize)
1189 {
1190 char *cp = (char *) realloc (*strptr, str - *strptr);
1191 if (cp != NULL)
1192 *strptr = cp;
1193 }
1194 strptr = NULL;
1195
1196 ++done;
1197 }
1198 break;
1199 }
1200 /* FALLTHROUGH */
1201
1202 case L_('S'):
1203 {
1204#ifndef COMPILE_WSCANF
1205 mbstate_t cstate;
1206#endif
1207
1208 /* Wide character string. */
1209 STRING_ARG (wstr, wchar_t, 100);
1210
1211 c = inchar ();
1212 if (__builtin_expect (c == EOF, 0))
1213 input_error ();
1214
1215#ifndef COMPILE_WSCANF
1216 memset (&cstate, '\0', sizeof (cstate));
1217#endif
1218
1219 do
1220 {
1221 if (ISSPACE (c))
1222 {
1223 ungetc_not_eof (c, s);
1224 break;
1225 }
1226
1227#ifdef COMPILE_WSCANF
1228 /* This is easy. */
1229 if (!(flags & SUPPRESS))
1230 {
1231 *wstr++ = c;
1232 if ((flags & MALLOC)
1233 && wstr == (wchar_t *) *strptr + strsize)
1234 {
1235 /* Enlarge the buffer. */
1236 wstr = (wchar_t *) realloc (*strptr,
1237 (2 * strsize)
1238 * sizeof (wchar_t));
1239 if (wstr == NULL)
1240 {
1241 /* Can't allocate that much. Last-ditch
1242 effort. */
1243 wstr = (wchar_t *) realloc (*strptr,
1244 (strsize + 1)
1245 * sizeof (wchar_t));
1246 if (wstr == NULL)
1247 {
1248 if (flags & POSIX_MALLOC)
1249 {
1250 done = EOF;
1251 goto errout;
1252 }
1253 /* We lose. Oh well. Terminate the string
1254 and stop converting, so at least we don't
1255 skip any input. */
1256 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1257 strptr = NULL;
1258 ++done;
1259 conv_error ();
1260 }
1261 else
1262 {
1263 *strptr = (char *) wstr;
1264 wstr += strsize;
1265 ++strsize;
1266 }
1267 }
1268 else
1269 {
1270 *strptr = (char *) wstr;
1271 wstr += strsize;
1272 strsize *= 2;
1273 }
1274 }
1275 }
1276#else
1277 {
1278 char buf[1];
1279
1280 buf[0] = c;
1281
1282 while (1)
1283 {
1284 size_t n;
1285
1286 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1287 buf, 1, &cstate);
1288
1289 if (n == (size_t) -2)
1290 {
1291 /* Possibly correct character, just not enough
1292 input. */
1293 if (__glibc_unlikely (inchar () == EOF))
1294 encode_error ();
1295
1296 buf[0] = c;
1297 continue;
1298 }
1299
1300 if (__glibc_unlikely (n != 1))
1301 encode_error ();
1302
1303 /* We have a match. */
1304 ++wstr;
1305 break;
1306 }
1307
1308 if (!(flags & SUPPRESS) && (flags & MALLOC)
1309 && wstr == (wchar_t *) *strptr + strsize)
1310 {
1311 /* Enlarge the buffer. */
1312 wstr = (wchar_t *) realloc (*strptr,
1313 (2 * strsize
1314 * sizeof (wchar_t)));
1315 if (wstr == NULL)
1316 {
1317 /* Can't allocate that much. Last-ditch effort. */
1318 wstr = (wchar_t *) realloc (*strptr,
1319 ((strsize + 1)
1320 * sizeof (wchar_t)));
1321 if (wstr == NULL)
1322 {
1323 if (flags & POSIX_MALLOC)
1324 {
1325 done = EOF;
1326 goto errout;
1327 }
1328 /* We lose. Oh well. Terminate the
1329 string and stop converting, so at
1330 least we don't skip any input. */
1331 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1332 strptr = NULL;
1333 ++done;
1334 conv_error ();
1335 }
1336 else
1337 {
1338 *strptr = (char *) wstr;
1339 wstr += strsize;
1340 ++strsize;
1341 }
1342 }
1343 else
1344 {
1345 *strptr = (char *) wstr;
1346 wstr += strsize;
1347 strsize *= 2;
1348 }
1349 }
1350 }
1351#endif
1352 }
1353 while ((width <= 0 || --width > 0) && inchar () != EOF);
1354
1355 if (!(flags & SUPPRESS))
1356 {
1357 *wstr++ = L'\0';
1358
1359 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1360 {
1361 wchar_t *cp = (wchar_t *) realloc (*strptr,
1362 ((wstr
1363 - (wchar_t *) *strptr)
c4f50205 1364 * sizeof (wchar_t)));
349718d4
ZW
1365 if (cp != NULL)
1366 *strptr = (char *) cp;
1367 }
1368 strptr = NULL;
1369
1370 ++done;
1371 }
1372 }
1373 break;
1374
1375 case L_('x'): /* Hexadecimal integer. */
1376 case L_('X'): /* Ditto. */
1377 base = 16;
1378 goto number;
1379
1380 case L_('o'): /* Octal integer. */
1381 base = 8;
1382 goto number;
1383
1384 case L_('u'): /* Unsigned decimal integer. */
1385 base = 10;
1386 goto number;
1387
1388 case L_('d'): /* Signed decimal integer. */
1389 base = 10;
1390 flags |= NUMBER_SIGNED;
1391 goto number;
1392
1393 case L_('i'): /* Generic number. */
1394 base = 0;
1395 flags |= NUMBER_SIGNED;
1396
1397 number:
1398 c = inchar ();
1399 if (__glibc_unlikely (c == EOF))
1400 input_error ();
1401
1402 /* Check for a sign. */
1403 if (c == L_('-') || c == L_('+'))
1404 {
1405 char_buffer_add (&charbuf, c);
1406 if (width > 0)
1407 --width;
1408 c = inchar ();
1409 }
1410
1411 /* Look for a leading indication of base. */
1412 if (width != 0 && c == L_('0'))
1413 {
1414 if (width > 0)
1415 --width;
1416
1417 char_buffer_add (&charbuf, c);
1418 c = inchar ();
1419
1420 if (width != 0 && TOLOWER (c) == L_('x'))
1421 {
1422 if (base == 0)
1423 base = 16;
1424 if (base == 16)
1425 {
1426 if (width > 0)
1427 --width;
1428 c = inchar ();
1429 }
1430 }
dee2bea0
JM
1431 else if ((mode_flags & SCANF_ISOC23_BIN_CST) != 0
1432 && base == 0
1433 && width != 0
1434 && TOLOWER (c) == L_('b'))
1435 {
1436 base = 2;
1437 if (width > 0)
1438 --width;
1439 c = inchar ();
1440 }
349718d4
ZW
1441 else if (base == 0)
1442 base = 8;
1443 }
1444
1445 if (base == 0)
1446 base = 10;
1447
1448 if (base == 10 && __builtin_expect ((flags & I18N) != 0, 0))
1449 {
1450 int from_level;
1451 int to_level;
1452 int level;
1453#ifdef COMPILE_WSCANF
1454 const wchar_t *wcdigits[10];
1455 const wchar_t *wcdigits_extended[10];
1456#else
1457 const char *mbdigits[10];
1458 const char *mbdigits_extended[10];
1459#endif
1460 /* "to_inpunct" is a map from ASCII digits to their
1461 equivalent in locale. This is defined for locales
1462 which use an extra digits set. */
1463 wctrans_t map = __wctrans ("to_inpunct");
1464 int n;
1465
1466 from_level = 0;
1467#ifdef COMPILE_WSCANF
1468 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1469 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1470#else
1471 to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
1472#endif
1473
1474 /* Get the alternative digit forms if there are any. */
1475 if (__glibc_unlikely (map != NULL))
1476 {
1477 /* Adding new level for extra digits set in locale file. */
1478 ++to_level;
1479
1480 for (n = 0; n < 10; ++n)
1481 {
1482#ifdef COMPILE_WSCANF
1483 wcdigits[n] = (const wchar_t *)
1484 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1485
1486 wchar_t *wc_extended = (wchar_t *)
1487 alloca ((to_level + 2) * sizeof (wchar_t));
1488 __wmemcpy (wc_extended, wcdigits[n], to_level);
1489 wc_extended[to_level] = __towctrans (L'0' + n, map);
1490 wc_extended[to_level + 1] = '\0';
1491 wcdigits_extended[n] = wc_extended;
1492#else
1493 mbdigits[n]
1494 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1495
1496 /* Get the equivalent wide char in map. */
1497 wint_t extra_wcdigit = __towctrans (L'0' + n, map);
1498
1499 /* Convert it to multibyte representation. */
1500 mbstate_t state;
1501 memset (&state, '\0', sizeof (state));
1502
1503 char extra_mbdigit[MB_LEN_MAX];
1504 size_t mblen
1505 = __wcrtomb (extra_mbdigit, extra_wcdigit, &state);
1506
1507 if (mblen == (size_t) -1)
1508 {
1509 /* Ignore this new level. */
1510 map = NULL;
1511 break;
1512 }
1513
1514 /* Calculate the length of mbdigits[n]. */
1515 const char *last_char = mbdigits[n];
1516 for (level = 0; level < to_level; ++level)
1517 last_char = strchr (last_char, '\0') + 1;
1518
1519 size_t mbdigits_len = last_char - mbdigits[n];
1520
1521 /* Allocate memory for extended multibyte digit. */
1522 char *mb_extended;
1523 mb_extended = (char *) alloca (mbdigits_len + mblen + 1);
1524
1525 /* And get the mbdigits + extra_digit string. */
1526 *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n],
1527 mbdigits_len),
1528 extra_mbdigit, mblen) = '\0';
1529 mbdigits_extended[n] = mb_extended;
1530#endif
1531 }
1532 }
1533
1534 /* Read the number into workspace. */
1535 while (c != EOF && width != 0)
1536 {
1537 /* In this round we get the pointer to the digit strings
1538 and also perform the first round of comparisons. */
1539 for (n = 0; n < 10; ++n)
1540 {
1541 /* Get the string for the digits with value N. */
1542#ifdef COMPILE_WSCANF
1543
1544 /* wcdigits_extended[] is fully set in the loop
1545 above, but the test for "map != NULL" is done
1546 inside the loop here and outside the loop there. */
1547 DIAG_PUSH_NEEDS_COMMENT;
1548 DIAG_IGNORE_NEEDS_COMMENT (4.7, "-Wmaybe-uninitialized");
1549
1550 if (__glibc_unlikely (map != NULL))
1551 wcdigits[n] = wcdigits_extended[n];
1552 else
1553 wcdigits[n] = (const wchar_t *)
1554 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1555 wcdigits[n] += from_level;
1556
1557 DIAG_POP_NEEDS_COMMENT;
1558
1559 if (c == (wint_t) *wcdigits[n])
1560 {
1561 to_level = from_level;
1562 break;
1563 }
1564
1565 /* Advance the pointer to the next string. */
1566 ++wcdigits[n];
1567#else
1568 const char *cmpp;
1569 int avail = width > 0 ? width : INT_MAX;
1570
1571 if (__glibc_unlikely (map != NULL))
1572 mbdigits[n] = mbdigits_extended[n];
1573 else
1574 mbdigits[n]
1575 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1576
1577 for (level = 0; level < from_level; level++)
1578 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1579
1580 cmpp = mbdigits[n];
1581 while ((unsigned char) *cmpp == c && avail >= 0)
1582 {
1583 if (*++cmpp == '\0')
1584 break;
1585 else
1586 {
1587 if (avail == 0 || inchar () == EOF)
1588 break;
1589 --avail;
1590 }
1591 }
1592
1593 if (*cmpp == '\0')
1594 {
1595 if (width > 0)
1596 width = avail;
1597 to_level = from_level;
1598 break;
1599 }
1600
1601 /* We are pushing all read characters back. */
1602 if (cmpp > mbdigits[n])
1603 {
1604 ungetc (c, s);
1605 while (--cmpp > mbdigits[n])
1606 ungetc_not_eof ((unsigned char) *cmpp, s);
1607 c = (unsigned char) *cmpp;
1608 }
1609
1610 /* Advance the pointer to the next string. */
1611 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1612#endif
1613 }
1614
1615 if (n == 10)
1616 {
1617 /* Have not yet found the digit. */
1618 for (level = from_level + 1; level <= to_level; ++level)
1619 {
1620 /* Search all ten digits of this level. */
1621 for (n = 0; n < 10; ++n)
1622 {
1623#ifdef COMPILE_WSCANF
1624 if (c == (wint_t) *wcdigits[n])
1625 break;
1626
1627 /* Advance the pointer to the next string. */
1628 ++wcdigits[n];
1629#else
1630 const char *cmpp;
1631 int avail = width > 0 ? width : INT_MAX;
1632
1633 cmpp = mbdigits[n];
1634 while ((unsigned char) *cmpp == c && avail >= 0)
1635 {
1636 if (*++cmpp == '\0')
1637 break;
1638 else
1639 {
1640 if (avail == 0 || inchar () == EOF)
1641 break;
1642 --avail;
1643 }
1644 }
1645
1646 if (*cmpp == '\0')
1647 {
1648 if (width > 0)
1649 width = avail;
1650 break;
1651 }
1652
1653 /* We are pushing all read characters back. */
1654 if (cmpp > mbdigits[n])
1655 {
1656 ungetc (c, s);
1657 while (--cmpp > mbdigits[n])
1658 ungetc_not_eof ((unsigned char) *cmpp, s);
1659 c = (unsigned char) *cmpp;
1660 }
1661
1662 /* Advance the pointer to the next string. */
1663 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1664#endif
1665 }
1666
1667 if (n < 10)
1668 {
1669 /* Found it. */
1670 from_level = level;
1671 to_level = level;
1672 break;
1673 }
1674 }
1675 }
1676
1677 if (n < 10)
1678 c = L_('0') + n;
1679 else if (flags & GROUP)
1680 {
1681 /* Try matching against the thousands separator. */
1682#ifdef COMPILE_WSCANF
1683 if (c != thousands)
1684 break;
1685#else
1686 const char *cmpp = thousands;
1687 int avail = width > 0 ? width : INT_MAX;
1688
1689 while ((unsigned char) *cmpp == c && avail >= 0)
1690 {
1691 char_buffer_add (&charbuf, c);
1692 if (*++cmpp == '\0')
1693 break;
1694 else
1695 {
1696 if (avail == 0 || inchar () == EOF)
1697 break;
1698 --avail;
1699 }
1700 }
1701
1702 if (char_buffer_error (&charbuf))
1703 {
1704 __set_errno (ENOMEM);
1705 done = EOF;
1706 goto errout;
1707 }
1708
1709 if (*cmpp != '\0')
1710 {
1711 /* We are pushing all read characters back. */
1712 if (cmpp > thousands)
1713 {
1714 charbuf.current -= cmpp - thousands;
1715 ungetc (c, s);
1716 while (--cmpp > thousands)
1717 ungetc_not_eof ((unsigned char) *cmpp, s);
1718 c = (unsigned char) *cmpp;
1719 }
1720 break;
1721 }
1722
1723 if (width > 0)
1724 width = avail;
1725
1726 /* The last thousands character will be added back by
1727 the char_buffer_add below. */
1728 --charbuf.current;
1729#endif
1730 }
1731 else
1732 break;
1733
1734 char_buffer_add (&charbuf, c);
1735 if (width > 0)
1736 --width;
1737
1738 c = inchar ();
1739 }
1740 }
1741 else
1742 /* Read the number into workspace. */
1743 while (c != EOF && width != 0)
1744 {
1745 if (base == 16)
1746 {
1747 if (!ISXDIGIT (c))
1748 break;
1749 }
1750 else if (!ISDIGIT (c) || (int) (c - L_('0')) >= base)
1751 {
1752 if (base == 10 && (flags & GROUP))
1753 {
1754 /* Try matching against the thousands separator. */
1755#ifdef COMPILE_WSCANF
1756 if (c != thousands)
1757 break;
1758#else
1759 const char *cmpp = thousands;
1760 int avail = width > 0 ? width : INT_MAX;
1761
1762 while ((unsigned char) *cmpp == c && avail >= 0)
1763 {
1764 char_buffer_add (&charbuf, c);
1765 if (*++cmpp == '\0')
1766 break;
1767 else
1768 {
1769 if (avail == 0 || inchar () == EOF)
1770 break;
1771 --avail;
1772 }
1773 }
1774
1775 if (char_buffer_error (&charbuf))
1776 {
1777 __set_errno (ENOMEM);
1778 done = EOF;
1779 goto errout;
1780 }
1781
1782 if (*cmpp != '\0')
1783 {
1784 /* We are pushing all read characters back. */
1785 if (cmpp > thousands)
1786 {
1787 charbuf.current -= cmpp - thousands;
1788 ungetc (c, s);
1789 while (--cmpp > thousands)
1790 ungetc_not_eof ((unsigned char) *cmpp, s);
1791 c = (unsigned char) *cmpp;
1792 }
1793 break;
1794 }
1795
1796 if (width > 0)
1797 width = avail;
1798
1799 /* The last thousands character will be added back by
1800 the char_buffer_add below. */
1801 --charbuf.current;
1802#endif
1803 }
1804 else
1805 break;
1806 }
1807 char_buffer_add (&charbuf, c);
1808 if (width > 0)
1809 --width;
1810
1811 c = inchar ();
1812 }
1813
1814 if (char_buffer_error (&charbuf))
1815 {
1816 __set_errno (ENOMEM);
1817 done = EOF;
1818 goto errout;
1819 }
1820
1821 if (char_buffer_size (&charbuf) == 0
1822 || (char_buffer_size (&charbuf) == 1
1823 && (char_buffer_start (&charbuf)[0] == L_('+')
1824 || char_buffer_start (&charbuf)[0] == L_('-'))))
1825 {
1826 /* There was no number. If we are supposed to read a pointer
1827 we must recognize "(nil)" as well. */
1828 if (__builtin_expect (char_buffer_size (&charbuf) == 0
1829 && (flags & READ_POINTER)
1830 && (width < 0 || width >= 5)
1831 && c == '('
1832 && TOLOWER (inchar ()) == L_('n')
1833 && TOLOWER (inchar ()) == L_('i')
1834 && TOLOWER (inchar ()) == L_('l')
1835 && inchar () == L_(')'), 1))
1836 /* We must produce the value of a NULL pointer. A single
1837 '0' digit is enough. */
1838 char_buffer_add (&charbuf, L_('0'));
1839 else
1840 {
1841 /* The last read character is not part of the number
1842 anymore. */
1843 ungetc (c, s);
1844
1845 conv_error ();
1846 }
1847 }
1848 else
1849 /* The just read character is not part of the number anymore. */
1850 ungetc (c, s);
1851
1852 /* Convert the number. */
1853 char_buffer_add (&charbuf, L_('\0'));
1854 if (char_buffer_error (&charbuf))
1855 {
1856 __set_errno (ENOMEM);
1857 done = EOF;
1858 goto errout;
1859 }
1860 if (need_longlong && (flags & LONGDBL))
1861 {
1862 if (flags & NUMBER_SIGNED)
1863 num.q = __strtoll_internal
1864 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1865 else
1866 num.uq = __strtoull_internal
1867 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1868 }
1869 else
1870 {
1871 if (flags & NUMBER_SIGNED)
1872 num.l = __strtol_internal
1873 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1874 else
1875 num.ul = __strtoul_internal
1876 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1877 }
1878 if (__glibc_unlikely (char_buffer_start (&charbuf) == tw))
1879 conv_error ();
1880
1881 if (!(flags & SUPPRESS))
1882 {
1883 if (flags & NUMBER_SIGNED)
1884 {
1885 if (need_longlong && (flags & LONGDBL))
1886 *ARG (LONGLONG int *) = num.q;
1887 else if (need_long && (flags & LONG))
1888 *ARG (long int *) = num.l;
1889 else if (flags & SHORT)
1890 *ARG (short int *) = (short int) num.l;
1891 else if (!(flags & CHAR))
1892 *ARG (int *) = (int) num.l;
1893 else
1894 *ARG (signed char *) = (signed char) num.ul;
1895 }
1896 else
1897 {
1898 if (need_longlong && (flags & LONGDBL))
1899 *ARG (unsigned LONGLONG int *) = num.uq;
1900 else if (need_long && (flags & LONG))
1901 *ARG (unsigned long int *) = num.ul;
1902 else if (flags & SHORT)
1903 *ARG (unsigned short int *)
1904 = (unsigned short int) num.ul;
1905 else if (!(flags & CHAR))
1906 *ARG (unsigned int *) = (unsigned int) num.ul;
1907 else
1908 *ARG (unsigned char *) = (unsigned char) num.ul;
1909 }
1910 ++done;
1911 }
1912 break;
1913
1914 case L_('e'): /* Floating-point numbers. */
1915 case L_('E'):
1916 case L_('f'):
1917 case L_('F'):
1918 case L_('g'):
1919 case L_('G'):
1920 case L_('a'):
1921 case L_('A'):
1922 c = inchar ();
1923 if (width > 0)
1924 --width;
1925 if (__glibc_unlikely (c == EOF))
1926 input_error ();
1927
1928 got_digit = got_dot = got_e = got_sign = 0;
1929
1930 /* Check for a sign. */
1931 if (c == L_('-') || c == L_('+'))
1932 {
1933 got_sign = 1;
1934 char_buffer_add (&charbuf, c);
1935 if (__glibc_unlikely (width == 0 || inchar () == EOF))
1936 /* EOF is only an input error before we read any chars. */
1937 conv_error ();
1938 if (width > 0)
1939 --width;
1940 }
1941
1942 /* Take care for the special arguments "nan" and "inf". */
1943 if (TOLOWER (c) == L_('n'))
1944 {
1945 /* Maybe "nan". */
1946 char_buffer_add (&charbuf, c);
1947 if (__builtin_expect (width == 0
1948 || inchar () == EOF
1949 || TOLOWER (c) != L_('a'), 0))
1950 conv_error ();
1951 if (width > 0)
1952 --width;
1953 char_buffer_add (&charbuf, c);
1954 if (__builtin_expect (width == 0
1955 || inchar () == EOF
1956 || TOLOWER (c) != L_('n'), 0))
1957 conv_error ();
1958 if (width > 0)
1959 --width;
1960 char_buffer_add (&charbuf, c);
1961 /* It is "nan". */
1962 goto scan_float;
1963 }
1964 else if (TOLOWER (c) == L_('i'))
1965 {
1966 /* Maybe "inf" or "infinity". */
1967 char_buffer_add (&charbuf, c);
1968 if (__builtin_expect (width == 0
1969 || inchar () == EOF
1970 || TOLOWER (c) != L_('n'), 0))
1971 conv_error ();
1972 if (width > 0)
1973 --width;
1974 char_buffer_add (&charbuf, c);
1975 if (__builtin_expect (width == 0
1976 || inchar () == EOF
1977 || TOLOWER (c) != L_('f'), 0))
1978 conv_error ();
1979 if (width > 0)
1980 --width;
1981 char_buffer_add (&charbuf, c);
1982 /* It is as least "inf". */
1983 if (width != 0 && inchar () != EOF)
1984 {
1985 if (TOLOWER (c) == L_('i'))
1986 {
1987 if (width > 0)
1988 --width;
1989 /* Now we have to read the rest as well. */
1990 char_buffer_add (&charbuf, c);
1991 if (__builtin_expect (width == 0
1992 || inchar () == EOF
1993 || TOLOWER (c) != L_('n'), 0))
1994 conv_error ();
1995 if (width > 0)
1996 --width;
1997 char_buffer_add (&charbuf, c);
1998 if (__builtin_expect (width == 0
1999 || inchar () == EOF
2000 || TOLOWER (c) != L_('i'), 0))
2001 conv_error ();
2002 if (width > 0)
2003 --width;
2004 char_buffer_add (&charbuf, c);
2005 if (__builtin_expect (width == 0
2006 || inchar () == EOF
2007 || TOLOWER (c) != L_('t'), 0))
2008 conv_error ();
2009 if (width > 0)
2010 --width;
2011 char_buffer_add (&charbuf, c);
2012 if (__builtin_expect (width == 0
2013 || inchar () == EOF
2014 || TOLOWER (c) != L_('y'), 0))
2015 conv_error ();
2016 if (width > 0)
2017 --width;
2018 char_buffer_add (&charbuf, c);
2019 }
2020 else
2021 /* Never mind. */
2022 ungetc (c, s);
2023 }
2024 goto scan_float;
2025 }
2026
2027 exp_char = L_('e');
2028 if (width != 0 && c == L_('0'))
2029 {
2030 char_buffer_add (&charbuf, c);
2031 c = inchar ();
2032 if (width > 0)
2033 --width;
2034 if (width != 0 && TOLOWER (c) == L_('x'))
2035 {
2036 /* It is a number in hexadecimal format. */
2037 char_buffer_add (&charbuf, c);
2038
2039 flags |= HEXA_FLOAT;
2040 exp_char = L_('p');
2041
2042 /* Grouping is not allowed. */
2043 flags &= ~GROUP;
2044 c = inchar ();
2045 if (width > 0)
2046 --width;
2047 }
2048 else
2049 got_digit = 1;
2050 }
2051
2052 while (1)
2053 {
2054 if (char_buffer_error (&charbuf))
2055 {
2056 __set_errno (ENOMEM);
2057 done = EOF;
2058 goto errout;
2059 }
2060 if (ISDIGIT (c))
2061 {
2062 char_buffer_add (&charbuf, c);
2063 got_digit = 1;
2064 }
2065 else if (!got_e && (flags & HEXA_FLOAT) && ISXDIGIT (c))
2066 {
2067 char_buffer_add (&charbuf, c);
2068 got_digit = 1;
2069 }
2070 else if (got_e && charbuf.current[-1] == exp_char
2071 && (c == L_('-') || c == L_('+')))
2072 char_buffer_add (&charbuf, c);
2073 else if (got_digit && !got_e
2074 && (CHAR_T) TOLOWER (c) == exp_char)
2075 {
2076 char_buffer_add (&charbuf, exp_char);
2077 got_e = got_dot = 1;
2078 }
2079 else
2080 {
2081#ifdef COMPILE_WSCANF
2082 if (! got_dot && c == decimal)
2083 {
2084 char_buffer_add (&charbuf, c);
2085 got_dot = 1;
2086 }
2087 else if ((flags & GROUP) != 0 && ! got_dot && c == thousands)
2088 char_buffer_add (&charbuf, c);
2089 else
2090 {
2091 /* The last read character is not part of the number
2092 anymore. */
2093 ungetc (c, s);
2094 break;
2095 }
2096#else
2097 const char *cmpp = decimal;
2098 int avail = width > 0 ? width : INT_MAX;
2099
2100 if (! got_dot)
2101 {
2102 while ((unsigned char) *cmpp == c && avail >= 0)
2103 if (*++cmpp == '\0')
2104 break;
2105 else
2106 {
2107 if (avail == 0 || inchar () == EOF)
2108 break;
2109 --avail;
2110 }
2111 }
2112
2113 if (*cmpp == '\0')
2114 {
2115 /* Add all the characters. */
2116 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
2117 char_buffer_add (&charbuf, (unsigned char) *cmpp);
2118 if (width > 0)
2119 width = avail;
2120 got_dot = 1;
2121 }
2122 else
2123 {
2124 /* Figure out whether it is a thousands separator.
2125 There is one problem: we possibly read more than
2126 one character. We cannot push them back but since
2127 we know that parts of the `decimal' string matched,
2128 we can compare against it. */
2129 const char *cmp2p = thousands;
2130
2131 if ((flags & GROUP) != 0 && ! got_dot)
2132 {
2133 while (cmp2p - thousands < cmpp - decimal
2134 && *cmp2p == decimal[cmp2p - thousands])
2135 ++cmp2p;
2136 if (cmp2p - thousands == cmpp - decimal)
2137 {
2138 while ((unsigned char) *cmp2p == c && avail >= 0)
2139 if (*++cmp2p == '\0')
2140 break;
2141 else
2142 {
2143 if (avail == 0 || inchar () == EOF)
2144 break;
2145 --avail;
2146 }
2147 }
2148 }
2149
2150 if (cmp2p != NULL && *cmp2p == '\0')
2151 {
2152 /* Add all the characters. */
2153 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
2154 char_buffer_add (&charbuf, (unsigned char) *cmpp);
2155 if (width > 0)
2156 width = avail;
2157 }
2158 else
2159 {
2160 /* The last read character is not part of the number
2161 anymore. */
2162 ungetc (c, s);
2163 break;
2164 }
2165 }
2166#endif
2167 }
2168
2169 if (width == 0 || inchar () == EOF)
2170 break;
2171
2172 if (width > 0)
2173 --width;
2174 }
2175
2176 if (char_buffer_error (&charbuf))
2177 {
2178 __set_errno (ENOMEM);
2179 done = EOF;
2180 goto errout;
2181 }
2182
2183 wctrans_t map;
2184 if (__builtin_expect ((flags & I18N) != 0, 0)
2185 /* Hexadecimal floats make no sense, fixing localized
2186 digits with ASCII letters. */
2187 && !(flags & HEXA_FLOAT)
2188 /* Minimum requirement. */
2189 && (char_buffer_size (&charbuf) == got_sign || got_dot)
2190 && (map = __wctrans ("to_inpunct")) != NULL)
2191 {
2192 /* Reget the first character. */
2193 inchar ();
2194
2195 /* Localized digits, decimal points, and thousands
2196 separator. */
2197 wint_t wcdigits[12];
2198
2199 /* First get decimal equivalent to check if we read it
2200 or not. */
2201 wcdigits[11] = __towctrans (L'.', map);
2202
2203 /* If we have not read any character or have just read
2204 locale decimal point which matches the decimal point
2205 for localized FP numbers, then we may have localized
2206 digits. Note, we test GOT_DOT above. */
2207#ifdef COMPILE_WSCANF
2208 if (char_buffer_size (&charbuf) == got_sign
2209 || (char_buffer_size (&charbuf) == got_sign + 1
2210 && wcdigits[11] == decimal))
2211#else
2212 char mbdigits[12][MB_LEN_MAX + 1];
2213
2214 mbstate_t state;
2215 memset (&state, '\0', sizeof (state));
2216
2217 bool match_so_far = char_buffer_size (&charbuf) == got_sign;
2218 size_t mblen = __wcrtomb (mbdigits[11], wcdigits[11], &state);
2219 if (mblen != (size_t) -1)
2220 {
2221 mbdigits[11][mblen] = '\0';
2222 match_so_far |=
2223 (char_buffer_size (&charbuf) == strlen (decimal) + got_sign
2224 && strcmp (decimal, mbdigits[11]) == 0);
2225 }
2226 else
2227 {
2228 size_t decimal_len = strlen (decimal);
2229 /* This should always be the case but the data comes
2230 from a file. */
2231 if (decimal_len <= MB_LEN_MAX)
2232 {
2233 match_so_far |= (char_buffer_size (&charbuf)
2234 == decimal_len + got_sign);
2235 memcpy (mbdigits[11], decimal, decimal_len + 1);
2236 }
2237 else
2238 match_so_far = false;
2239 }
2240
2241 if (match_so_far)
2242#endif
2243 {
2244 bool have_locthousands = (flags & GROUP) != 0;
2245
2246 /* Now get the digits and the thousands-sep equivalents. */
2247 for (int n = 0; n < 11; ++n)
2248 {
2249 if (n < 10)
2250 wcdigits[n] = __towctrans (L'0' + n, map);
2251 else if (n == 10)
2252 {
2253 wcdigits[10] = __towctrans (L',', map);
2254 have_locthousands &= wcdigits[10] != L'\0';
2255 }
2256
2257#ifndef COMPILE_WSCANF
2258 memset (&state, '\0', sizeof (state));
2259
2260 size_t mblen = __wcrtomb (mbdigits[n], wcdigits[n],
2261 &state);
2262 if (mblen == (size_t) -1)
2263 {
2264 if (n == 10)
2265 {
2266 if (have_locthousands)
2267 {
2268 size_t thousands_len = strlen (thousands);
2269 if (thousands_len <= MB_LEN_MAX)
2270 memcpy (mbdigits[10], thousands,
2271 thousands_len + 1);
2272 else
2273 have_locthousands = false;
2274 }
2275 }
2276 else
2277 /* Ignore checking against localized digits. */
2278 goto no_i18nflt;
2279 }
2280 else
2281 mbdigits[n][mblen] = '\0';
2282#endif
2283 }
2284
2285 /* Start checking against localized digits, if
2286 conversion is done correctly. */
2287 while (1)
2288 {
2289 if (char_buffer_error (&charbuf))
2290 {
2291 __set_errno (ENOMEM);
2292 done = EOF;
2293 goto errout;
2294 }
2295 if (got_e && charbuf.current[-1] == exp_char
2296 && (c == L_('-') || c == L_('+')))
2297 char_buffer_add (&charbuf, c);
2298 else if (char_buffer_size (&charbuf) > got_sign && !got_e
2299 && (CHAR_T) TOLOWER (c) == exp_char)
2300 {
2301 char_buffer_add (&charbuf, exp_char);
2302 got_e = got_dot = 1;
2303 }
2304 else
2305 {
2306 /* Check against localized digits, decimal point,
2307 and thousands separator. */
2308 int n;
2309 for (n = 0; n < 12; ++n)
2310 {
2311#ifdef COMPILE_WSCANF
2312 if (c == wcdigits[n])
2313 {
2314 if (n < 10)
2315 char_buffer_add (&charbuf, L_('0') + n);
2316 else if (n == 11 && !got_dot)
2317 {
2318 char_buffer_add (&charbuf, decimal);
2319 got_dot = 1;
2320 }
2321 else if (n == 10 && have_locthousands
2322 && ! got_dot)
2323 char_buffer_add (&charbuf, thousands);
2324 else
2325 /* The last read character is not part
2326 of the number anymore. */
2327 n = 12;
2328
2329 break;
2330 }
2331#else
2332 const char *cmpp = mbdigits[n];
2333 int avail = width > 0 ? width : INT_MAX;
2334
2335 while ((unsigned char) *cmpp == c && avail >= 0)
2336 if (*++cmpp == '\0')
2337 break;
2338 else
2339 {
2340 if (avail == 0 || inchar () == EOF)
2341 break;
2342 --avail;
2343 }
2344 if (*cmpp == '\0')
2345 {
2346 if (width > 0)
2347 width = avail;
2348
2349 if (n < 10)
2350 char_buffer_add (&charbuf, L_('0') + n);
2351 else if (n == 11 && !got_dot)
2352 {
2353 /* Add all the characters. */
2354 for (cmpp = decimal; *cmpp != '\0';
2355 ++cmpp)
2356 char_buffer_add (&charbuf,
2357 (unsigned char) *cmpp);
2358
2359 got_dot = 1;
2360 }
2361 else if (n == 10 && (flags & GROUP) != 0
2362 && ! got_dot)
2363 {
2364 /* Add all the characters. */
2365 for (cmpp = thousands; *cmpp != '\0';
2366 ++cmpp)
2367 char_buffer_add (&charbuf,
2368 (unsigned char) *cmpp);
2369 }
2370 else
2371 /* The last read character is not part
2372 of the number anymore. */
2373 n = 12;
2374
2375 break;
2376 }
2377
2378 /* We are pushing all read characters back. */
2379 if (cmpp > mbdigits[n])
2380 {
2381 ungetc (c, s);
2382 while (--cmpp > mbdigits[n])
2383 ungetc_not_eof ((unsigned char) *cmpp, s);
2384 c = (unsigned char) *cmpp;
2385 }
2386#endif
2387 }
2388
2389 if (n >= 12)
2390 {
2391 /* The last read character is not part
2392 of the number anymore. */
2393 ungetc (c, s);
2394 break;
2395 }
2396 }
2397
2398 if (width == 0 || inchar () == EOF)
2399 break;
2400
2401 if (width > 0)
2402 --width;
2403 }
2404 }
2405
2406#ifndef COMPILE_WSCANF
2407 no_i18nflt:
2408 ;
2409#endif
2410 }
2411
2412 if (char_buffer_error (&charbuf))
2413 {
2414 __set_errno (ENOMEM);
2415 done = EOF;
2416 goto errout;
2417 }
2418
2419 /* Have we read any character? If we try to read a number
2420 in hexadecimal notation and we have read only the `0x'
2421 prefix this is an error. */
2422 if (__glibc_unlikely (char_buffer_size (&charbuf) == got_sign
2423 || ((flags & HEXA_FLOAT)
2424 && (char_buffer_size (&charbuf)
2425 == 2 + got_sign))))
2426 conv_error ();
2427
2428 scan_float:
2429 /* Convert the number. */
2430 char_buffer_add (&charbuf, L_('\0'));
2431 if (char_buffer_error (&charbuf))
2432 {
2433 __set_errno (ENOMEM);
2434 done = EOF;
2435 goto errout;
2436 }
10446f5d
GG
2437#if __HAVE_FLOAT128_UNLIKE_LDBL
2438 if ((flags & LONGDBL) \
2439 && (mode_flags & SCANF_LDBL_USES_FLOAT128) != 0)
2440 {
2441 _Float128 d = __strtof128_internal
2442 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2443 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2444 *ARG (_Float128 *) = d;
2445 }
2446 else
2447#endif
349718d4
ZW
2448 if ((flags & LONGDBL) \
2449 && __glibc_likely ((mode_flags & SCANF_LDBL_IS_DBL) == 0))
2450 {
2451 long double d = __strtold_internal
2452 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2453 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2454 *ARG (long double *) = d;
2455 }
2456 else if (flags & (LONG | LONGDBL))
2457 {
2458 double d = __strtod_internal
2459 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2460 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2461 *ARG (double *) = d;
2462 }
2463 else
2464 {
2465 float d = __strtof_internal
2466 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2467 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2468 *ARG (float *) = d;
2469 }
2470
2471 if (__glibc_unlikely (tw == char_buffer_start (&charbuf)))
2472 conv_error ();
2473
2474 if (!(flags & SUPPRESS))
2475 ++done;
2476 break;
2477
2478 case L_('['): /* Character class. */
2479 if (flags & LONG)
2480 STRING_ARG (wstr, wchar_t, 100);
2481 else
2482 STRING_ARG (str, char, 100);
2483
2484 if (*f == L_('^'))
2485 {
2486 ++f;
2487 not_in = 1;
2488 }
2489 else
2490 not_in = 0;
2491
349718d4
ZW
2492
2493#ifdef COMPILE_WSCANF
2494 /* Find the beginning and the end of the scanlist. We are not
2495 creating a lookup table since it would have to be too large.
2496 Instead we search each time through the string. This is not
2497 a constant lookup time but who uses this feature deserves to
2498 be punished. */
2499 tw = (wchar_t *) f; /* Marks the beginning. */
2500
2501 if (*f == L']')
2502 ++f;
2503
2504 while ((fc = *f++) != L'\0' && fc != L']');
2505
2506 if (__glibc_unlikely (fc == L'\0'))
2507 conv_error ();
2508 wchar_t *twend = (wchar_t *) f - 1;
2509#else
2510 /* Fill WP with byte flags indexed by character.
2511 We will use this flag map for matching input characters. */
2512 if (!scratch_buffer_set_array_size
2513 (&charbuf.scratch, UCHAR_MAX + 1, 1))
2514 {
2515 done = EOF;
2516 goto errout;
2517 }
2518 memset (charbuf.scratch.data, '\0', UCHAR_MAX + 1);
2519
2520 fc = *f;
2521 if (fc == ']' || fc == '-')
2522 {
2523 /* If ] or - appears before any char in the set, it is not
2524 the terminator or separator, but the first char in the
2525 set. */
2526 ((char *)charbuf.scratch.data)[fc] = 1;
2527 ++f;
2528 }
2529
2530 while ((fc = *f++) != '\0' && fc != ']')
c0e9ddf5 2531 if (fc == '-' && *f != '\0' && *f != ']' && f[-2] <= *f)
349718d4
ZW
2532 {
2533 /* Add all characters from the one before the '-'
2534 up to (but not including) the next format char. */
c0e9ddf5 2535 for (fc = f[-2]; fc < *f; ++fc)
349718d4
ZW
2536 ((char *)charbuf.scratch.data)[fc] = 1;
2537 }
2538 else
2539 /* Add the character to the flag map. */
2540 ((char *)charbuf.scratch.data)[fc] = 1;
2541
2542 if (__glibc_unlikely (fc == '\0'))
2543 conv_error();
2544#endif
2545
2546 if (flags & LONG)
2547 {
2548 size_t now = read_in;
2549#ifdef COMPILE_WSCANF
2550 if (__glibc_unlikely (inchar () == WEOF))
2551 input_error ();
2552
2553 do
2554 {
2555 wchar_t *runp;
2556
2557 /* Test whether it's in the scanlist. */
2558 runp = tw;
2559 while (runp < twend)
2560 {
2561 if (runp[0] == L'-' && runp[1] != '\0'
2562 && runp + 1 != twend
2563 && runp != tw
2564 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2565 {
2566 /* Match against all characters in between the
2567 first and last character of the sequence. */
2568 wchar_t wc;
2569
2570 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2571 if ((wint_t) wc == c)
2572 break;
2573
2574 if (wc <= runp[1] && !not_in)
2575 break;
2576 if (wc <= runp[1] && not_in)
2577 {
2578 /* The current character is not in the
2579 scanset. */
2580 ungetc (c, s);
2581 goto out;
2582 }
2583
2584 runp += 2;
2585 }
2586 else
2587 {
2588 if ((wint_t) *runp == c && !not_in)
2589 break;
2590 if ((wint_t) *runp == c && not_in)
2591 {
2592 ungetc (c, s);
2593 goto out;
2594 }
2595
2596 ++runp;
2597 }
2598 }
2599
2600 if (runp == twend && !not_in)
2601 {
2602 ungetc (c, s);
2603 goto out;
2604 }
2605
2606 if (!(flags & SUPPRESS))
2607 {
2608 *wstr++ = c;
2609
2610 if ((flags & MALLOC)
2611 && wstr == (wchar_t *) *strptr + strsize)
2612 {
2613 /* Enlarge the buffer. */
2614 wstr = (wchar_t *) realloc (*strptr,
2615 (2 * strsize)
2616 * sizeof (wchar_t));
2617 if (wstr == NULL)
2618 {
2619 /* Can't allocate that much. Last-ditch
2620 effort. */
2621 wstr = (wchar_t *)
2622 realloc (*strptr, (strsize + 1)
2623 * sizeof (wchar_t));
2624 if (wstr == NULL)
2625 {
2626 if (flags & POSIX_MALLOC)
2627 {
2628 done = EOF;
2629 goto errout;
2630 }
2631 /* We lose. Oh well. Terminate the string
2632 and stop converting, so at least we don't
2633 skip any input. */
2634 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2635 strptr = NULL;
2636 ++done;
2637 conv_error ();
2638 }
2639 else
2640 {
2641 *strptr = (char *) wstr;
2642 wstr += strsize;
2643 ++strsize;
2644 }
2645 }
2646 else
2647 {
2648 *strptr = (char *) wstr;
2649 wstr += strsize;
2650 strsize *= 2;
2651 }
2652 }
2653 }
2654 }
b03e4d7b 2655 while ((width < 0 || --width > 0) && inchar () != WEOF);
349718d4
ZW
2656 out:
2657#else
2658 char buf[MB_LEN_MAX];
2659 size_t cnt = 0;
2660 mbstate_t cstate;
2661
2662 if (__glibc_unlikely (inchar () == EOF))
2663 input_error ();
2664
2665 memset (&cstate, '\0', sizeof (cstate));
2666
2667 do
2668 {
2669 if (((char *) charbuf.scratch.data)[c] == not_in)
2670 {
2671 ungetc_not_eof (c, s);
2672 break;
2673 }
2674
2675 /* This is easy. */
2676 if (!(flags & SUPPRESS))
2677 {
2678 size_t n;
2679
2680 /* Convert it into a wide character. */
2681 buf[0] = c;
2682 n = __mbrtowc (wstr, buf, 1, &cstate);
2683
2684 if (n == (size_t) -2)
2685 {
2686 /* Possibly correct character, just not enough
2687 input. */
2688 ++cnt;
2689 assert (cnt < MB_LEN_MAX);
2690 continue;
2691 }
2692 cnt = 0;
2693
2694 ++wstr;
2695 if ((flags & MALLOC)
2696 && wstr == (wchar_t *) *strptr + strsize)
2697 {
2698 /* Enlarge the buffer. */
2699 wstr = (wchar_t *) realloc (*strptr,
2700 (2 * strsize
2701 * sizeof (wchar_t)));
2702 if (wstr == NULL)
2703 {
2704 /* Can't allocate that much. Last-ditch
2705 effort. */
2706 wstr = (wchar_t *)
2707 realloc (*strptr, ((strsize + 1)
2708 * sizeof (wchar_t)));
2709 if (wstr == NULL)
2710 {
2711 if (flags & POSIX_MALLOC)
2712 {
2713 done = EOF;
2714 goto errout;
2715 }
2716 /* We lose. Oh well. Terminate the
2717 string and stop converting,
2718 so at least we don't skip any input. */
2719 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2720 strptr = NULL;
2721 ++done;
2722 conv_error ();
2723 }
2724 else
2725 {
2726 *strptr = (char *) wstr;
2727 wstr += strsize;
2728 ++strsize;
2729 }
2730 }
2731 else
2732 {
2733 *strptr = (char *) wstr;
2734 wstr += strsize;
2735 strsize *= 2;
2736 }
2737 }
2738 }
2739
b03e4d7b 2740 if (width >= 0 && --width <= 0)
349718d4
ZW
2741 break;
2742 }
2743 while (inchar () != EOF);
2744
2745 if (__glibc_unlikely (cnt != 0))
2746 /* We stopped in the middle of recognizing another
2747 character. That's a problem. */
2748 encode_error ();
2749#endif
2750
2751 if (__glibc_unlikely (now == read_in))
2752 /* We haven't succesfully read any character. */
2753 conv_error ();
2754
2755 if (!(flags & SUPPRESS))
2756 {
2757 *wstr++ = L'\0';
2758
2759 if ((flags & MALLOC)
2760 && wstr - (wchar_t *) *strptr != strsize)
2761 {
2762 wchar_t *cp = (wchar_t *)
2763 realloc (*strptr, ((wstr - (wchar_t *) *strptr)
c4f50205 2764 * sizeof (wchar_t)));
349718d4
ZW
2765 if (cp != NULL)
2766 *strptr = (char *) cp;
2767 }
2768 strptr = NULL;
2769
2770 ++done;
2771 }
2772 }
2773 else
2774 {
2775 size_t now = read_in;
2776
2777 if (__glibc_unlikely (inchar () == EOF))
2778 input_error ();
2779
2780#ifdef COMPILE_WSCANF
2781
2782 memset (&state, '\0', sizeof (state));
2783
2784 do
2785 {
2786 wchar_t *runp;
2787 size_t n;
2788
2789 /* Test whether it's in the scanlist. */
2790 runp = tw;
2791 while (runp < twend)
2792 {
2793 if (runp[0] == L'-' && runp[1] != '\0'
2794 && runp + 1 != twend
2795 && runp != tw
2796 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2797 {
2798 /* Match against all characters in between the
2799 first and last character of the sequence. */
2800 wchar_t wc;
2801
2802 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2803 if ((wint_t) wc == c)
2804 break;
2805
2806 if (wc <= runp[1] && !not_in)
2807 break;
2808 if (wc <= runp[1] && not_in)
2809 {
2810 /* The current character is not in the
2811 scanset. */
2812 ungetc (c, s);
2813 goto out2;
2814 }
2815
2816 runp += 2;
2817 }
2818 else
2819 {
2820 if ((wint_t) *runp == c && !not_in)
2821 break;
2822 if ((wint_t) *runp == c && not_in)
2823 {
2824 ungetc (c, s);
2825 goto out2;
2826 }
2827
2828 ++runp;
2829 }
2830 }
2831
2832 if (runp == twend && !not_in)
2833 {
2834 ungetc (c, s);
2835 goto out2;
2836 }
2837
2838 if (!(flags & SUPPRESS))
2839 {
2840 if ((flags & MALLOC)
2841 && *strptr + strsize - str <= MB_LEN_MAX)
2842 {
2843 /* Enlarge the buffer. */
2844 size_t strleng = str - *strptr;
2845 char *newstr;
2846
2847 newstr = (char *) realloc (*strptr, 2 * strsize);
2848 if (newstr == NULL)
2849 {
2850 /* Can't allocate that much. Last-ditch
2851 effort. */
2852 newstr = (char *) realloc (*strptr,
2853 strleng + MB_LEN_MAX);
2854 if (newstr == NULL)
2855 {
2856 if (flags & POSIX_MALLOC)
2857 {
2858 done = EOF;
2859 goto errout;
2860 }
2861 /* We lose. Oh well. Terminate the string
2862 and stop converting, so at least we don't
2863 skip any input. */
2864 ((char *) (*strptr))[strleng] = '\0';
2865 strptr = NULL;
2866 ++done;
2867 conv_error ();
2868 }
2869 else
2870 {
2871 *strptr = newstr;
2872 str = newstr + strleng;
2873 strsize = strleng + MB_LEN_MAX;
2874 }
2875 }
2876 else
2877 {
2878 *strptr = newstr;
2879 str = newstr + strleng;
2880 strsize *= 2;
2881 }
2882 }
2883 }
2884
2885 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2886 if (__glibc_unlikely (n == (size_t) -1))
2887 encode_error ();
2888
2889 assert (n <= MB_LEN_MAX);
2890 str += n;
2891 }
b03e4d7b 2892 while ((width < 0 || --width > 0) && inchar () != WEOF);
349718d4
ZW
2893 out2:
2894#else
2895 do
2896 {
2897 if (((char *) charbuf.scratch.data)[c] == not_in)
2898 {
2899 ungetc_not_eof (c, s);
2900 break;
2901 }
2902
2903 /* This is easy. */
2904 if (!(flags & SUPPRESS))
2905 {
2906 *str++ = c;
2907 if ((flags & MALLOC)
2908 && (char *) str == *strptr + strsize)
2909 {
2910 /* Enlarge the buffer. */
2911 size_t newsize = 2 * strsize;
2912
2913 allocagain:
2914 str = (char *) realloc (*strptr, newsize);
2915 if (str == NULL)
2916 {
2917 /* Can't allocate that much. Last-ditch
2918 effort. */
2919 if (newsize > strsize + 1)
2920 {
2921 newsize = strsize + 1;
2922 goto allocagain;
2923 }
2924 if (flags & POSIX_MALLOC)
2925 {
2926 done = EOF;
2927 goto errout;
2928 }
2929 /* We lose. Oh well. Terminate the
2930 string and stop converting,
2931 so at least we don't skip any input. */
2932 ((char *) (*strptr))[strsize - 1] = '\0';
2933 strptr = NULL;
2934 ++done;
2935 conv_error ();
2936 }
2937 else
2938 {
2939 *strptr = (char *) str;
2940 str += strsize;
2941 strsize = newsize;
2942 }
2943 }
2944 }
2945 }
b03e4d7b 2946 while ((width < 0 || --width > 0) && inchar () != EOF);
349718d4
ZW
2947#endif
2948
2949 if (__glibc_unlikely (now == read_in))
2950 /* We haven't succesfully read any character. */
2951 conv_error ();
2952
2953 if (!(flags & SUPPRESS))
2954 {
2955#ifdef COMPILE_WSCANF
2956 /* We have to emit the code to get into the initial
2957 state. */
2958 char buf[MB_LEN_MAX];
2959 size_t n = __wcrtomb (buf, L'\0', &state);
2960 if (n > 0 && (flags & MALLOC)
2961 && str + n >= *strptr + strsize)
2962 {
2963 /* Enlarge the buffer. */
2964 size_t strleng = str - *strptr;
2965 char *newstr;
2966
2967 newstr = (char *) realloc (*strptr, strleng + n + 1);
2968 if (newstr == NULL)
2969 {
2970 if (flags & POSIX_MALLOC)
2971 {
2972 done = EOF;
2973 goto errout;
2974 }
2975 /* We lose. Oh well. Terminate the string
2976 and stop converting, so at least we don't
2977 skip any input. */
2978 ((char *) (*strptr))[strleng] = '\0';
2979 strptr = NULL;
2980 ++done;
2981 conv_error ();
2982 }
2983 else
2984 {
2985 *strptr = newstr;
2986 str = newstr + strleng;
2987 strsize = strleng + n + 1;
2988 }
2989 }
2990
2991 str = __mempcpy (str, buf, n);
2992#endif
2993 *str++ = '\0';
2994
2995 if ((flags & MALLOC) && str - *strptr != strsize)
2996 {
2997 char *cp = (char *) realloc (*strptr, str - *strptr);
2998 if (cp != NULL)
2999 *strptr = cp;
3000 }
3001 strptr = NULL;
3002
3003 ++done;
3004 }
3005 }
3006 break;
3007
3008 case L_('p'): /* Generic pointer. */
3009 base = 16;
3010 /* A PTR must be the same size as a `long int'. */
3011 flags &= ~(SHORT|LONGDBL);
3012 if (need_long)
3013 flags |= LONG;
3014 flags |= READ_POINTER;
3015 goto number;
3016
3017 default:
3018 /* If this is an unknown format character punt. */
3019 conv_error ();
3020 }
3021 }
3022
3023 /* The last thing we saw int the format string was a white space.
3024 Consume the last white spaces. */
3025 if (skip_space)
3026 {
3027 do
3028 c = inchar ();
3029 while (ISSPACE (c));
3030 ungetc (c, s);
3031 }
3032
3033 errout:
3034 /* Unlock stream. */
3035 UNLOCK_STREAM (s);
3036
3037 scratch_buffer_free (&charbuf.scratch);
3038
3039 if (__glibc_unlikely (done == EOF))
3040 {
3041 if (__glibc_unlikely (ptrs_to_free != NULL))
3042 {
3043 struct ptrs_to_free *p = ptrs_to_free;
3044 while (p != NULL)
3045 {
3046 for (size_t cnt = 0; cnt < p->count; ++cnt)
3047 {
3048 free (*p->ptrs[cnt]);
3049 *p->ptrs[cnt] = NULL;
3050 }
3051 p = p->next;
3052 ptrs_to_free = p;
3053 }
3054 }
3055 }
3056 else if (__glibc_unlikely (strptr != NULL))
3057 {
3058 free (*strptr);
3059 *strptr = NULL;
3060 }
3061 return done;
3062}