]> git.ipfire.org Git - thirdparty/glibc.git/blame - stdio-common/vfscanf.c
Update.
[thirdparty/glibc.git] / stdio-common / vfscanf.c
CommitLineData
390955cb 1/* Copyright (C) 1991,92,93,94,95,96,97,98,99 Free Software Foundation, Inc.
2c6fe0bd 2 This file is part of the GNU C Library.
28f540f4 3
2c6fe0bd
UD
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
28f540f4 8
2c6fe0bd
UD
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
28f540f4 13
2c6fe0bd
UD
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
28f540f4 18
28f540f4
RM
19#include <errno.h>
20#include <limits.h>
21#include <ctype.h>
22#include <stdarg.h>
23#include <stdio.h>
e852e889 24#include <stdint.h>
28f540f4
RM
25#include <stdlib.h>
26#include <string.h>
8d8c6efa 27#include <wchar.h>
2c6fe0bd 28#include <wctype.h>
5107cf1d 29#include <bits/libc-lock.h>
14ea22e9 30#include <locale/localeinfo.h>
28f540f4 31
28f540f4 32#ifdef __GNUC__
14ea22e9
UD
33# define HAVE_LONGLONG
34# define LONGLONG long long
28f540f4 35#else
14ea22e9 36# define LONGLONG long
28f540f4
RM
37#endif
38
96d0213e
UD
39/* Determine whether we have to handle `long long' at all. */
40#if LONG_MAX == LONG_LONG_MAX
41# define need_longlong 0
42#else
43# define need_longlong 1
44#endif
45
96aa2d94 46/* Those are flags in the conversion format. */
0793d348
RM
47# define LONG 0x001 /* l: long or double */
48# define LONGDBL 0x002 /* L: long long or long double */
49# define SHORT 0x004 /* h: short */
50# define SUPPRESS 0x008 /* *: suppress assignment */
51# define POINTER 0x010 /* weird %p pointer (`fake hex') */
52# define NOSKIP 0x020 /* do not skip blanks */
53# define WIDTH 0x040 /* width was given */
54# define GROUP 0x080 /* ': group numbers */
55# define MALLOC 0x100 /* a: malloc strings */
cc3fa755 56# define CHAR 0x200 /* hh: char */
96aa2d94
RM
57
58
05be689b
RM
59#ifdef USE_IN_LIBIO
60# include <libioP.h>
61# include <libio.h>
62
a68b0d31 63# undef va_list
96aa2d94 64# define va_list _IO_va_list
c764b9a7
UD
65# define ungetc(c, s) ((void) ((int) c == EOF \
66 || (--read_in, \
67 _IO_sputbackc (s, (unsigned char) c))))
b112c02f
UD
68# define inchar() (c == EOF ? EOF \
69 : ((c = _IO_getc_unlocked (s)), \
70 (void) (c != EOF && ++read_in), c))
2c6fe0bd
UD
71# define encode_error() do { \
72 if (errp != NULL) *errp |= 4; \
73 _IO_funlockfile (s); \
c764b9a7 74 __libc_cleanup_end (0); \
2c6fe0bd
UD
75 __set_errno (EILSEQ); \
76 return done; \
77 } while (0)
7c713e28
RM
78# define conv_error() do { \
79 if (errp != NULL) *errp |= 2; \
7c713e28 80 _IO_funlockfile (s); \
c764b9a7 81 __libc_cleanup_end (0); \
7c713e28
RM
82 return done; \
83 } while (0)
84# define input_error() do { \
85 _IO_funlockfile (s); \
86 if (errp != NULL) *errp |= 1; \
c764b9a7 87 __libc_cleanup_end (0); \
7c713e28
RM
88 return done ?: EOF; \
89 } while (0)
90# define memory_error() do { \
91 _IO_funlockfile (s); \
c4029823 92 __set_errno (ENOMEM); \
c764b9a7 93 __libc_cleanup_end (0); \
7c713e28
RM
94 return EOF; \
95 } while (0)
96# define ARGCHECK(s, format) \
97 do \
98 { \
99 /* Check file argument for consistence. */ \
100 CHECK_FILE (s, EOF); \
f8b87ef0
UD
101 if (s->_flags & _IO_NO_READS) \
102 { \
103 __set_errno (EBADF); \
104 return EOF; \
105 } \
106 else if (format == NULL) \
107 { \
108 MAYBE_SET_EINVAL; \
109 return EOF; \
110 } \
96aa2d94 111 } while (0)
edf5b2d7
UD
112# define LOCK_STREAM(S) \
113 __libc_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, (S)); \
114 _IO_flockfile (S)
c0fb8a56
UD
115# define UNLOCK_STREAM(S) \
116 _IO_funlockfile (S); \
117 __libc_cleanup_region_end (0)
96aa2d94 118#else
d705269e 119# define ungetc(c, s) ((void) (c != EOF && --read_in), ungetc (c, s))
b112c02f 120# define inchar() (c == EOF ? EOF \
a588b67d 121 : ((c = getc (s)), (void) (c != EOF && ++read_in), c))
2c6fe0bd 122# define encode_error() do { \
ebe3b3eb 123 funlockfile (s); \
2c6fe0bd
UD
124 __set_errno (EILSEQ); \
125 return done; \
126 } while (0)
7c713e28
RM
127# define conv_error() do { \
128 funlockfile (s); \
7c713e28
RM
129 return done; \
130 } while (0)
131# define input_error() do { \
132 funlockfile (s); \
133 return done ?: EOF; \
134 } while (0)
135# define memory_error() do { \
136 funlockfile (s); \
c4029823 137 __set_errno (ENOMEM); \
7c713e28
RM
138 return EOF; \
139 } while (0)
140# define ARGCHECK(s, format) \
141 do \
142 { \
143 /* Check file argument for consistence. */ \
f8b87ef0
UD
144 if (!__validfp (s) || !s->__mode.__read) \
145 { \
146 __set_errno (EBADF); \
147 return EOF; \
148 } \
149 else if (format == NULL) \
7c713e28 150 { \
c4029823 151 __set_errno (EINVAL); \
7c713e28
RM
152 return EOF; \
153 } \
96aa2d94 154 } while (0)
edf5b2d7
UD
155#if 1
156 /* XXX For now !!! */
aa1075ea
RM
157# define flockfile(S) /* nothing */
158# define funlockfile(S) /* nothing */
edf5b2d7 159# define LOCK_STREAM(S)
c0fb8a56 160# define UNLOCK_STREAM(S)
edf5b2d7
UD
161#else
162# define LOCK_STREAM(S) \
163 __libc_cleanup_region_start (&__funlockfile, (S)); \
164 __flockfile (S)
c0fb8a56
UD
165# define UNLOCK_STREAM(S) \
166 __funlockfile (S); \
167 __libc_cleanup_region_end (0)
edf5b2d7 168#endif
96aa2d94 169#endif
28f540f4
RM
170
171
172/* Read formatted input from S according to the format string
173 FORMAT, using the argument list in ARG.
174 Return the number of assignments made, or -1 for an input error. */
96aa2d94 175#ifdef USE_IN_LIBIO
28f540f4 176int
96aa2d94
RM
177_IO_vfscanf (s, format, argptr, errp)
178 _IO_FILE *s;
179 const char *format;
180 _IO_va_list argptr;
181 int *errp;
182#else
183int
42d2676e 184__vfscanf (FILE *s, const char *format, va_list argptr)
96aa2d94 185#endif
28f540f4 186{
4cca6b86 187 va_list arg;
96aa2d94 188 register const char *f = format;
b17178fc 189 register unsigned char fc; /* Current character of the format. */
28f540f4
RM
190 register size_t done = 0; /* Assignments done. */
191 register size_t read_in = 0; /* Chars read in. */
f8b87ef0 192 register int c = 0; /* Last char read. */
28f540f4 193 register int width; /* Maximum field width. */
0793d348
RM
194 register int flags; /* Modifiers for current format element. */
195
28f540f4 196 /* Status for reading F-P nums. */
01cdeca0 197 char got_dot, got_e, negative;
28f540f4
RM
198 /* If a [...] is a [^...]. */
199 char not_in;
377a515b 200#define exp_char not_in
28f540f4
RM
201 /* Base for integral numbers. */
202 int base;
203 /* Signedness for integral numbers. */
204 int number_signed;
377a515b 205#define is_hexa number_signed
77a58cad
RM
206 /* Decimal point character. */
207 wchar_t decimal;
0793d348
RM
208 /* The thousands character of the current locale. */
209 wchar_t thousands;
8d8c6efa
UD
210 /* State for the conversions. */
211 mbstate_t state;
28f540f4 212 /* Integral holding variables. */
f0bf9cb9
RM
213 union
214 {
215 long long int q;
216 unsigned long long int uq;
217 long int l;
218 unsigned long int ul;
219 } num;
28f540f4 220 /* Character-buffer pointer. */
ba1ffaa1 221 char *str = NULL;
2c6fe0bd 222 wchar_t *wstr = NULL;
ba1ffaa1
UD
223 char **strptr = NULL;
224 size_t strsize = 0;
05be689b
RM
225 /* We must not react on white spaces immediately because they can
226 possibly be matched even if in the input stream no character is
227 available anymore. */
228 int skip_space = 0;
c53a89d4
UD
229 /* Nonzero if we are reading a pointer. */
230 int read_pointer;
28f540f4 231 /* Workspace. */
77a58cad
RM
232 char *tw; /* Temporary pointer. */
233 char *wp = NULL; /* Workspace. */
77a58cad 234 size_t wpmax = 0; /* Maximal size of workspace. */
05be689b 235 size_t wpsize; /* Currently used bytes in workspace. */
77a58cad
RM
236#define ADDW(Ch) \
237 do \
238 { \
239 if (wpsize == wpmax) \
240 { \
241 char *old = wp; \
b17178fc 242 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
77a58cad 243 wp = (char *) alloca (wpmax); \
05be689b 244 if (old != NULL) \
77a58cad
RM
245 memcpy (wp, old, wpsize); \
246 } \
247 wp[wpsize++] = (Ch); \
248 } \
249 while (0)
28f540f4 250
4cca6b86
UD
251#ifdef __va_copy
252 __va_copy (arg, argptr);
253#else
254 arg = (va_list) argptr;
255#endif
256
96aa2d94 257 ARGCHECK (s, format);
28f540f4
RM
258
259 /* Figure out the decimal point character. */
8d8c6efa
UD
260 memset (&state, '\0', sizeof (state));
261 if (__mbrtowc (&decimal, _NL_CURRENT (LC_NUMERIC, DECIMAL_POINT),
262 strlen (_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT)), &state)
263 <= 0)
933e73fa 264 decimal = (wchar_t) *_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT);
0793d348 265 /* Figure out the thousands separator character. */
8d8c6efa
UD
266 memset (&state, '\0', sizeof (state));
267 if (__mbrtowc (&thousands, _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP),
268 strlen (_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP)),
269 &state) <= 0)
0793d348 270 thousands = (wchar_t) *_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
28f540f4 271
7c713e28 272 /* Lock the stream. */
edf5b2d7 273 LOCK_STREAM (s);
aa1075ea 274
8d8c6efa
UD
275
276 /* From now on we use `state' to convert the format string. */
277 memset (&state, '\0', sizeof (state));
278
28f540f4
RM
279 /* Run through the format string. */
280 while (*f != '\0')
281 {
be69ea41
RM
282 unsigned int argpos;
283 /* Extract the next argument, which is of type TYPE.
284 For a %N$... spec, this is the Nth argument from the beginning;
285 otherwise it is the next argument after the state now in ARG. */
4cca6b86 286#ifdef __va_copy
96aa2d94 287# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
4cca6b86
UD
288 ({ unsigned int pos = argpos; \
289 va_list arg; \
290 __va_copy (arg, argptr); \
291 while (--pos > 0) \
292 (void) va_arg (arg, void *); \
293 va_arg (arg, type); \
294 }))
295#else
296# if 0
297 /* XXX Possible optimization. */
298# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
96aa2d94
RM
299 ({ va_list arg = (va_list) argptr; \
300 arg = (va_list) ((char *) arg \
301 + (argpos - 1) \
302 * __va_rounded_size (void *)); \
303 va_arg (arg, type); \
304 }))
4cca6b86
UD
305# else
306# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
be69ea41
RM
307 ({ unsigned int pos = argpos; \
308 va_list arg = (va_list) argptr; \
309 while (--pos > 0) \
310 (void) va_arg (arg, void *); \
311 va_arg (arg, type); \
312 }))
4cca6b86 313# endif
96aa2d94 314#endif
be69ea41 315
f0bf9cb9 316 if (!isascii (*f))
28f540f4
RM
317 {
318 /* Non-ASCII, may be a multibyte. */
8d8c6efa 319 int len = __mbrlen (f, strlen (f), &state);
28f540f4
RM
320 if (len > 0)
321 {
3867ee64
RM
322 do
323 {
324 c = inchar ();
325 if (c == EOF)
326 input_error ();
327 else if (c != *f++)
328 {
329 ungetc (c, s);
330 conv_error ();
331 }
332 }
333 while (--len > 0);
28f540f4
RM
334 continue;
335 }
336 }
337
338 fc = *f++;
339 if (fc != '%')
340 {
05be689b
RM
341 /* Remember to skip spaces. */
342 if (isspace (fc))
343 {
344 skip_space = 1;
345 continue;
346 }
347
3867ee64
RM
348 /* Read a character. */
349 c = inchar ();
350
28f540f4
RM
351 /* Characters other than format specs must just match. */
352 if (c == EOF)
96aa2d94 353 input_error ();
05be689b 354
2114e10a
RM
355 /* We saw white space char as the last character in the format
356 string. Now it's time to skip all leading white space. */
05be689b 357 if (skip_space)
28f540f4 358 {
28f540f4 359 while (isspace (c))
f8b87ef0
UD
360 if (inchar () == EOF && errno == EINTR)
361 conv_error ();
05be689b 362 skip_space = 0;
28f540f4 363 }
05be689b 364
3867ee64
RM
365 if (c != fc)
366 {
367 ungetc (c, s);
368 conv_error ();
369 }
05be689b 370
28f540f4
RM
371 continue;
372 }
373
05be689b 374 /* This is the start of the conversion string. */
96aa2d94 375 flags = 0;
96aa2d94 376
c53a89d4
UD
377 /* Not yet decided whether we read a pointer or not. */
378 read_pointer = 0;
379
be69ea41
RM
380 /* Initialize state of modifiers. */
381 argpos = 0;
be69ea41 382
05be689b
RM
383 /* Prepare temporary buffer. */
384 wpsize = 0;
385
be69ea41
RM
386 /* Check for a positional parameter specification. */
387 if (isdigit (*f))
388 {
389 argpos = *f++ - '0';
390 while (isdigit (*f))
391 argpos = argpos * 10 + (*f++ - '0');
392 if (*f == '$')
393 ++f;
394 else
395 {
396 /* Oops; that was actually the field width. */
397 width = argpos;
0793d348 398 flags |= WIDTH;
be69ea41
RM
399 argpos = 0;
400 goto got_width;
401 }
402 }
403
6d52618b 404 /* Check for the assignment-suppressing and the number grouping flag. */
f0bf9cb9
RM
405 while (*f == '*' || *f == '\'')
406 switch (*f++)
407 {
408 case '*':
0793d348 409 flags |= SUPPRESS;
f0bf9cb9
RM
410 break;
411 case '\'':
0793d348 412 flags |= GROUP;
f0bf9cb9
RM
413 break;
414 }
415
96aa2d94
RM
416 /* We have seen width. */
417 if (isdigit (*f))
418 flags |= WIDTH;
96aa2d94 419
28f540f4
RM
420 /* Find the maximum field width. */
421 width = 0;
96aa2d94 422 while (isdigit (*f))
28f540f4
RM
423 {
424 width *= 10;
425 width += *f++ - '0';
426 }
be69ea41 427 got_width:
28f540f4
RM
428 if (width == 0)
429 width = -1;
430
431 /* Check for type modifiers. */
c3966b88
UD
432 switch (*f++)
433 {
434 case 'h':
435 /* ints are short ints or chars. */
436 if (*f == 'h')
437 {
438 ++f;
439 flags |= CHAR;
440 }
441 else
442 flags |= SHORT;
443 break;
444 case 'l':
445 if (*f == 'l')
446 {
447 /* A double `l' is equivalent to an `L'. */
448 ++f;
40cbb1a7 449 flags |= need_longlong ? LONGDBL : LONG;
c3966b88
UD
450 }
451 else
452 /* ints are long ints. */
453 flags |= LONG;
454 break;
455 case 'q':
456 case 'L':
457 /* doubles are long doubles, and ints are long long ints. */
40cbb1a7 458 flags |= need_longlong ? LONGDBL : LONG;
c3966b88
UD
459 break;
460 case 'a':
461 /* The `a' is used as a flag only if followed by `s', `S' or
462 `['. */
463 if (*f != 's' && *f != 'S' && *f != '[')
464 {
465 --f;
466 break;
467 }
468 /* String conversions (%s, %[) take a `char **'
469 arg and fill it in with a malloc'd pointer. */
470 flags |= MALLOC;
471 break;
472 case 'z':
96d0213e 473 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
c3966b88
UD
474 flags |= LONGDBL;
475 else if (sizeof (size_t) > sizeof (unsigned int))
476 flags |= LONG;
477 break;
478 case 'j':
96d0213e 479 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
c3966b88
UD
480 flags |= LONGDBL;
481 else if (sizeof (uintmax_t) > sizeof (unsigned int))
482 flags |= LONG;
483 break;
484 case 't':
96d0213e 485 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
c3966b88
UD
486 flags |= LONGDBL;
487 else if (sizeof (ptrdiff_t) > sizeof (int))
488 flags |= LONG;
489 break;
490 default:
491 /* Not a recognized modifier. Backup. */
492 --f;
493 break;
494 }
28f540f4
RM
495
496 /* End of the format string? */
497 if (*f == '\0')
96aa2d94 498 conv_error ();
28f540f4
RM
499
500 /* Find the conversion specifier. */
28f540f4 501 fc = *f++;
f8b87ef0 502 if (skip_space || (fc != '[' && fc != 'c' && fc != 'C' && fc != 'n'))
05be689b
RM
503 {
504 /* Eat whitespace. */
14ea22e9
UD
505 int save_errno = errno;
506 errno = 0;
3867ee64 507 do
14ea22e9 508 if (inchar () == EOF && errno == EINTR)
f8b87ef0 509 input_error ();
3867ee64 510 while (isspace (c));
14ea22e9 511 errno = save_errno;
3867ee64 512 ungetc (c, s);
05be689b
RM
513 skip_space = 0;
514 }
515
28f540f4
RM
516 switch (fc)
517 {
518 case '%': /* Must match a literal '%'. */
14ea22e9 519 c = inchar ();
dc30f461
UD
520 if (c == EOF)
521 input_error ();
28f540f4 522 if (c != fc)
3867ee64
RM
523 {
524 ungetc (c, s);
525 conv_error ();
526 }
28f540f4
RM
527 break;
528
529 case 'n': /* Answer number of assignments done. */
01cdeca0
RM
530 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
531 with the 'n' conversion specifier. */
0793d348 532 if (!(flags & SUPPRESS))
5290baf0
UD
533 {
534 /* Don't count the read-ahead. */
96d0213e 535 if (need_longlong && (flags & LONGDBL))
5290baf0
UD
536 *ARG (long long int *) = read_in;
537 else if (flags & LONG)
538 *ARG (long int *) = read_in;
539 else if (flags & SHORT)
540 *ARG (short int *) = read_in;
541 else
542 *ARG (int *) = read_in;
8d57beea
UD
543
544#ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
545 /* We have a severe problem here. The ISO C standard
546 contradicts itself in explaining the effect of the %n
547 format in `scanf'. While in ISO C:1990 and the ISO C
548 Amendement 1:1995 the result is described as
549
550 Execution of a %n directive does not effect the
551 assignment count returned at the completion of
552 execution of the f(w)scanf function.
553
554 in ISO C Corrigendum 1:1994 the following was added:
555
556 Subclause 7.9.6.2
557 Add the following fourth example:
558 In:
559 #include <stdio.h>
560 int d1, d2, n1, n2, i;
561 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
562 the value 123 is assigned to d1 and the value3 to n1.
563 Because %n can never get an input failure the value
564 of 3 is also assigned to n2. The value of d2 is not
565 affected. The value 3 is assigned to i.
566
14ea22e9 567 We go for now with the historically correct code from ISO C,
8d57beea
UD
568 i.e., we don't count the %n assignments. When it ever
569 should proof to be wrong just remove the #ifdef above. */
5290baf0 570 ++done;
8d57beea 571#endif
5290baf0 572 }
28f540f4
RM
573 break;
574
575 case 'c': /* Match characters. */
2c6fe0bd 576 if ((flags & LONG) == 0)
28f540f4 577 {
2c6fe0bd
UD
578 if (!(flags & SUPPRESS))
579 {
580 str = ARG (char *);
581 if (str == NULL)
582 conv_error ();
583 }
28f540f4 584
14ea22e9 585 c = inchar ();
2c6fe0bd
UD
586 if (c == EOF)
587 input_error ();
28f540f4 588
2c6fe0bd
UD
589 if (width == -1)
590 width = 1;
28f540f4 591
2c6fe0bd
UD
592 if (!(flags & SUPPRESS))
593 {
594 do
595 *str++ = c;
596 while (--width > 0 && inchar () != EOF);
597 }
598 else
599 while (--width > 0 && inchar () != EOF);
600
2c6fe0bd
UD
601 if (!(flags & SUPPRESS))
602 ++done;
603
604 break;
28f540f4 605 }
2c6fe0bd
UD
606 /* FALLTHROUGH */
607 case 'C':
608 /* Get UTF-8 encoded wide character. Here we assume (as in
609 other parts of the libc) that we only have to handle
610 UTF-8. */
611 {
612 wint_t val;
613 size_t cnt = 0;
614 int first = 1;
28f540f4 615
2c6fe0bd
UD
616 if (!(flags & SUPPRESS))
617 {
618 wstr = ARG (wchar_t *);
619 if (str == NULL)
620 conv_error ();
621 }
edf5b2d7 622
2c6fe0bd
UD
623 do
624 {
625#define NEXT_WIDE_CHAR(First) \
626 c = inchar (); \
627 if (c == EOF) \
6e4c40ba
UD
628 { \
629 /* EOF is only an error for the first character. */ \
630 if (First) \
631 input_error (); \
632 else \
633 break; \
634 } \
2c6fe0bd
UD
635 val = c; \
636 if (val >= 0x80) \
637 { \
638 if ((c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
639 encode_error (); \
640 if ((c & 0xe0) == 0xc0) \
641 { \
642 /* We expect two bytes. */ \
643 cnt = 1; \
644 val &= 0x1f; \
645 } \
646 else if ((c & 0xf0) == 0xe0) \
647 { \
648 /* We expect three bytes. */ \
649 cnt = 2; \
650 val &= 0x0f; \
651 } \
652 else if ((c & 0xf8) == 0xf0) \
653 { \
654 /* We expect four bytes. */ \
655 cnt = 3; \
656 val &= 0x07; \
657 } \
658 else if ((c & 0xfc) == 0xf8) \
659 { \
660 /* We expect five bytes. */ \
661 cnt = 4; \
662 val &= 0x03; \
663 } \
664 else \
665 { \
666 /* We expect six bytes. */ \
667 cnt = 5; \
668 val &= 0x01; \
669 } \
670 \
671 do \
672 { \
673 c = inchar (); \
674 if (c == EOF \
675 || (c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
676 encode_error (); \
677 val <<= 6; \
678 val |= c & 0x3f; \
679 } \
680 while (--cnt > 0); \
681 } \
682 \
683 if (!(flags & SUPPRESS)) \
684 *wstr++ = val; \
dc30f461 685 First = 0
2c6fe0bd
UD
686
687 NEXT_WIDE_CHAR (first);
688 }
689 while (--width > 0);
28f540f4 690
2c6fe0bd
UD
691 if (!(flags & SUPPRESS))
692 ++done;
693 }
28f540f4
RM
694 break;
695
696 case 's': /* Read a string. */
2c6fe0bd
UD
697 if (flags & LONG)
698 /* We have to process a wide character string. */
699 goto wide_char_string;
700
701#define STRING_ARG(Str, Type) \
0793d348 702 if (!(flags & SUPPRESS)) \
28f540f4 703 { \
0793d348 704 if (flags & MALLOC) \
28f540f4
RM
705 { \
706 /* The string is to be stored in a malloc'd buffer. */ \
96aa2d94 707 strptr = ARG (char **); \
28f540f4
RM
708 if (strptr == NULL) \
709 conv_error (); \
710 /* Allocate an initial buffer. */ \
711 strsize = 100; \
2c6fe0bd
UD
712 *strptr = malloc (strsize * sizeof (Type)); \
713 Str = (Type *) *strptr; \
28f540f4
RM
714 } \
715 else \
2c6fe0bd
UD
716 Str = ARG (Type *); \
717 if (Str == NULL) \
28f540f4
RM
718 conv_error (); \
719 }
2c6fe0bd 720 STRING_ARG (str, char);
28f540f4 721
14ea22e9 722 c = inchar ();
28f540f4
RM
723 if (c == EOF)
724 input_error ();
725
726 do
727 {
728 if (isspace (c))
3867ee64
RM
729 {
730 ungetc (c, s);
731 break;
732 }
2c6fe0bd 733#define STRING_ADD_CHAR(Str, c, Type) \
0793d348 734 if (!(flags & SUPPRESS)) \
28f540f4 735 { \
2c6fe0bd
UD
736 *Str++ = c; \
737 if ((flags & MALLOC) && (char *) Str == *strptr + strsize) \
28f540f4
RM
738 { \
739 /* Enlarge the buffer. */ \
2c6fe0bd
UD
740 Str = realloc (*strptr, strsize * 2 * sizeof (Type)); \
741 if (Str == NULL) \
28f540f4
RM
742 { \
743 /* Can't allocate that much. Last-ditch effort. */\
2c6fe0bd
UD
744 Str = realloc (*strptr, \
745 (strsize + 1) * sizeof (Type)); \
746 if (Str == NULL) \
28f540f4
RM
747 { \
748 /* We lose. Oh well. \
749 Terminate the string and stop converting, \
01cdeca0 750 so at least we don't skip any input. */ \
2c6fe0bd 751 ((Type *) (*strptr))[strsize] = '\0'; \
28f540f4
RM
752 ++done; \
753 conv_error (); \
754 } \
755 else \
756 { \
2c6fe0bd
UD
757 *strptr = (char *) Str; \
758 Str = ((Type *) *strptr) + strsize; \
28f540f4
RM
759 ++strsize; \
760 } \
761 } \
762 else \
763 { \
2c6fe0bd
UD
764 *strptr = (char *) Str; \
765 Str = ((Type *) *strptr) + strsize; \
28f540f4
RM
766 strsize *= 2; \
767 } \
768 } \
769 }
2c6fe0bd 770 STRING_ADD_CHAR (str, c, char);
3867ee64 771 } while ((width <= 0 || --width > 0) && inchar () != EOF);
28f540f4 772
0793d348 773 if (!(flags & SUPPRESS))
28f540f4
RM
774 {
775 *str = '\0';
776 ++done;
777 }
778 break;
779
2c6fe0bd
UD
780 case 'S':
781 /* Wide character string. */
782 wide_char_string:
783 {
784 wint_t val;
785 int first = 1;
786 STRING_ARG (wstr, wchar_t);
787
788 do
789 {
790 size_t cnt = 0;
791 NEXT_WIDE_CHAR (first);
792
390955cb 793 if (__iswspace (val))
2c6fe0bd
UD
794 {
795 /* XXX We would have to push back the whole wide char
796 with possibly many bytes. But since scanf does
797 not make a difference for white space characters
798 we can simply push back a simple <SP> which is
799 guaranteed to be in the [:space:] class. */
800 ungetc (' ', s);
801 break;
802 }
803
804 STRING_ADD_CHAR (wstr, val, wchar_t);
805 first = 0;
806 }
807 while (width <= 0 || --width > 0);
808
809 if (!(flags & SUPPRESS))
810 {
811 *wstr = L'\0';
812 ++done;
813 }
814 }
815 break;
816
28f540f4 817 case 'x': /* Hexadecimal integer. */
96aa2d94 818 case 'X': /* Ditto. */
28f540f4
RM
819 base = 16;
820 number_signed = 0;
821 goto number;
822
823 case 'o': /* Octal integer. */
824 base = 8;
825 number_signed = 0;
826 goto number;
827
828 case 'u': /* Unsigned decimal integer. */
829 base = 10;
830 number_signed = 0;
831 goto number;
832
833 case 'd': /* Signed decimal integer. */
834 base = 10;
835 number_signed = 1;
836 goto number;
837
838 case 'i': /* Generic number. */
839 base = 0;
840 number_signed = 1;
841
842 number:
14ea22e9 843 c = inchar ();
28f540f4 844 if (c == EOF)
96aa2d94 845 input_error ();
28f540f4
RM
846
847 /* Check for a sign. */
848 if (c == '-' || c == '+')
849 {
77a58cad 850 ADDW (c);
28f540f4
RM
851 if (width > 0)
852 --width;
3867ee64 853 c = inchar ();
28f540f4
RM
854 }
855
856 /* Look for a leading indication of base. */
05be689b 857 if (width != 0 && c == '0')
28f540f4
RM
858 {
859 if (width > 0)
860 --width;
28f540f4 861
be10a868 862 ADDW (c);
3867ee64 863 c = inchar ();
28f540f4 864
4caef86c 865 if (width != 0 && _tolower (c) == 'x')
28f540f4
RM
866 {
867 if (base == 0)
868 base = 16;
869 if (base == 16)
870 {
871 if (width > 0)
872 --width;
3867ee64 873 c = inchar ();
28f540f4
RM
874 }
875 }
876 else if (base == 0)
877 base = 8;
878 }
879
880 if (base == 0)
881 base = 10;
882
77a58cad 883 /* Read the number into workspace. */
05be689b 884 while (c != EOF && width != 0)
28f540f4 885 {
96aa2d94 886 if (base == 16 ? !isxdigit (c) :
0793d348
RM
887 ((!isdigit (c) || c - '0' >= base) &&
888 !((flags & GROUP) && base == 10 && c == thousands)))
28f540f4 889 break;
77a58cad 890 ADDW (c);
28f540f4
RM
891 if (width > 0)
892 --width;
05be689b 893
3867ee64 894 c = inchar ();
7752137a 895 }
28f540f4 896
77a58cad
RM
897 if (wpsize == 0 ||
898 (wpsize == 1 && (wp[0] == '+' || wp[0] == '-')))
c53a89d4
UD
899 {
900 /* There was no number. If we are supposed to read a pointer
901 we must recognize "(nil)" as well. */
902 if (wpsize == 0 && read_pointer && (width < 0 || width >= 0)
903 && c == '('
4caef86c
UD
904 && _tolower (inchar ()) == 'n'
905 && _tolower (inchar ()) == 'i'
906 && _tolower (inchar ()) == 'l'
c53a89d4
UD
907 && inchar () == ')')
908 /* We must produce the value of a NULL pointer. A single
909 '0' digit is enough. */
910 ADDW ('0');
911 else
912 {
913 /* The last read character is not part of the number
914 anymore. */
915 ungetc (c, s);
916
917 conv_error ();
918 }
919 }
920 else
921 /* The just read character is not part of the number anymore. */
922 ungetc (c, s);
28f540f4
RM
923
924 /* Convert the number. */
77a58cad 925 ADDW ('\0');
96d0213e 926 if (need_longlong && (flags & LONGDBL))
f0bf9cb9 927 {
be69ea41 928 if (number_signed)
2f6d1f1b 929 num.q = __strtoll_internal (wp, &tw, base, flags & GROUP);
f0bf9cb9 930 else
2f6d1f1b 931 num.uq = __strtoull_internal (wp, &tw, base, flags & GROUP);
f0bf9cb9 932 }
28f540f4 933 else
f0bf9cb9 934 {
be69ea41 935 if (number_signed)
0793d348 936 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
f0bf9cb9 937 else
0793d348 938 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
f0bf9cb9 939 }
77a58cad 940 if (wp == tw)
28f540f4
RM
941 conv_error ();
942
0793d348 943 if (!(flags & SUPPRESS))
28f540f4
RM
944 {
945 if (! number_signed)
946 {
96d0213e 947 if (need_longlong && (flags & LONGDBL))
be69ea41 948 *ARG (unsigned LONGLONG int *) = num.uq;
0793d348 949 else if (flags & LONG)
be69ea41 950 *ARG (unsigned long int *) = num.ul;
0793d348 951 else if (flags & SHORT)
be69ea41 952 *ARG (unsigned short int *)
f0bf9cb9 953 = (unsigned short int) num.ul;
cc3fa755
UD
954 else if (flags & CHAR)
955 *ARG (unsigned char *) = (unsigned char) num.ul;
28f540f4 956 else
be69ea41 957 *ARG (unsigned int *) = (unsigned int) num.ul;
28f540f4
RM
958 }
959 else
960 {
96d0213e 961 if (need_longlong && (flags & LONGDBL))
be69ea41 962 *ARG (LONGLONG int *) = num.q;
0793d348 963 else if (flags & LONG)
be69ea41 964 *ARG (long int *) = num.l;
0793d348 965 else if (flags & SHORT)
be69ea41 966 *ARG (short int *) = (short int) num.l;
cc3fa755
UD
967 else if (flags & CHAR)
968 *ARG (signed char *) = (signed char) num.ul;
28f540f4 969 else
be69ea41 970 *ARG (int *) = (int) num.l;
28f540f4
RM
971 }
972 ++done;
973 }
974 break;
975
976 case 'e': /* Floating-point numbers. */
977 case 'E':
978 case 'f':
979 case 'g':
980 case 'G':
f43ce637 981 case 'a':
377a515b 982 case 'A':
14ea22e9 983 c = inchar ();
28f540f4 984 if (c == EOF)
96aa2d94 985 input_error ();
28f540f4
RM
986
987 /* Check for a sign. */
988 if (c == '-' || c == '+')
989 {
01cdeca0 990 negative = c == '-';
96aa2d94 991 if (inchar () == EOF)
28f540f4 992 /* EOF is only an input error before we read any chars. */
96aa2d94 993 conv_error ();
8d8c6efa
UD
994 if (! isdigit (c) && c != decimal)
995 {
996 /* This is no valid number. */
997 ungetc (c, s);
998 input_error ();
999 }
28f540f4
RM
1000 if (width > 0)
1001 --width;
1002 }
01cdeca0
RM
1003 else
1004 negative = 0;
28f540f4 1005
9b26f5c4 1006 /* Take care for the special arguments "nan" and "inf". */
4caef86c 1007 if (_tolower (c) == 'n')
9b26f5c4
UD
1008 {
1009 /* Maybe "nan". */
1010 ADDW (c);
4caef86c 1011 if (inchar () == EOF || _tolower (c) != 'a')
9b26f5c4
UD
1012 input_error ();
1013 ADDW (c);
4caef86c 1014 if (inchar () == EOF || _tolower (c) != 'n')
9b26f5c4
UD
1015 input_error ();
1016 ADDW (c);
1017 /* It is "nan". */
1018 goto scan_float;
1019 }
4caef86c 1020 else if (_tolower (c) == 'i')
9b26f5c4
UD
1021 {
1022 /* Maybe "inf" or "infinity". */
1023 ADDW (c);
4caef86c 1024 if (inchar () == EOF || _tolower (c) != 'n')
9b26f5c4
UD
1025 input_error ();
1026 ADDW (c);
4caef86c 1027 if (inchar () == EOF || _tolower (c) != 'f')
9b26f5c4
UD
1028 input_error ();
1029 ADDW (c);
1030 /* It is as least "inf". */
1031 if (inchar () != EOF)
1032 {
4caef86c 1033 if (_tolower (c) == 'i')
9b26f5c4 1034 {
8d8c6efa 1035 /* Now we have to read the rest as well. */
9b26f5c4 1036 ADDW (c);
4caef86c 1037 if (inchar () == EOF || _tolower (c) != 'n')
9b26f5c4
UD
1038 input_error ();
1039 ADDW (c);
4caef86c 1040 if (inchar () == EOF || _tolower (c) != 'i')
9b26f5c4
UD
1041 input_error ();
1042 ADDW (c);
4caef86c 1043 if (inchar () == EOF || _tolower (c) != 't')
9b26f5c4
UD
1044 input_error ();
1045 ADDW (c);
4caef86c 1046 if (inchar () == EOF || _tolower (c) != 'y')
9b26f5c4
UD
1047 input_error ();
1048 ADDW (c);
1049 }
1050 else
1051 /* Never mind. */
1052 ungetc (c, s);
1053 }
1054 goto scan_float;
1055 }
1056
63551311
UD
1057 is_hexa = 0;
1058 exp_char = 'e';
1059 if (c == '0')
377a515b 1060 {
63551311
UD
1061 ADDW (c);
1062 c = inchar ();
4caef86c 1063 if (_tolower (c) == 'x')
63551311
UD
1064 {
1065 /* It is a number in hexadecimal format. */
1066 ADDW (c);
377a515b 1067
63551311
UD
1068 is_hexa = 1;
1069 exp_char = 'p';
377a515b 1070
63551311
UD
1071 /* Grouping is not allowed. */
1072 flags &= ~GROUP;
1073 c = inchar ();
1074 }
377a515b
UD
1075 }
1076
28f540f4
RM
1077 got_dot = got_e = 0;
1078 do
1079 {
96aa2d94 1080 if (isdigit (c))
77a58cad 1081 ADDW (c);
377a515b
UD
1082 else if (!got_e && is_hexa && isxdigit (c))
1083 ADDW (c);
1084 else if (got_e && wp[wpsize - 1] == exp_char
77a58cad
RM
1085 && (c == '-' || c == '+'))
1086 ADDW (c);
4caef86c 1087 else if (wpsize > 0 && !got_e && _tolower (c) == exp_char)
28f540f4 1088 {
377a515b 1089 ADDW (exp_char);
28f540f4
RM
1090 got_e = got_dot = 1;
1091 }
1092 else if (c == decimal && !got_dot)
1093 {
77a58cad 1094 ADDW (c);
28f540f4
RM
1095 got_dot = 1;
1096 }
0793d348
RM
1097 else if ((flags & GROUP) && c == thousands && !got_dot)
1098 ADDW (c);
28f540f4 1099 else
377a515b
UD
1100 {
1101 /* The last read character is not part of the number
1102 anymore. */
1103 ungetc (c, s);
1104 break;
1105 }
28f540f4
RM
1106 if (width > 0)
1107 --width;
01cdeca0 1108 }
377a515b 1109 while (width != 0 && inchar () != EOF);
3867ee64 1110
377a515b
UD
1111 /* Have we read any character? If we try to read a number
1112 in hexadecimal notation and we have read only the `0x'
1113 prefix this is an error. */
1114 if (wpsize == 0 || (is_hexa && wpsize == 2))
96aa2d94 1115 conv_error ();
28f540f4 1116
9b26f5c4 1117 scan_float:
28f540f4 1118 /* Convert the number. */
77a58cad 1119 ADDW ('\0');
0793d348 1120 if (flags & LONGDBL)
28f540f4 1121 {
0793d348
RM
1122 long double d = __strtold_internal (wp, &tw, flags & GROUP);
1123 if (!(flags & SUPPRESS) && tw != wp)
01cdeca0 1124 *ARG (long double *) = negative ? -d : d;
28f540f4 1125 }
0793d348 1126 else if (flags & LONG)
28f540f4 1127 {
0793d348
RM
1128 double d = __strtod_internal (wp, &tw, flags & GROUP);
1129 if (!(flags & SUPPRESS) && tw != wp)
01cdeca0 1130 *ARG (double *) = negative ? -d : d;
28f540f4
RM
1131 }
1132 else
1133 {
0793d348
RM
1134 float d = __strtof_internal (wp, &tw, flags & GROUP);
1135 if (!(flags & SUPPRESS) && tw != wp)
01cdeca0 1136 *ARG (float *) = negative ? -d : d;
28f540f4
RM
1137 }
1138
77a58cad 1139 if (tw == wp)
28f540f4
RM
1140 conv_error ();
1141
0793d348 1142 if (!(flags & SUPPRESS))
28f540f4
RM
1143 ++done;
1144 break;
1145
1146 case '[': /* Character class. */
2c6fe0bd
UD
1147 if (flags & LONG)
1148 {
1149 STRING_ARG (wstr, wchar_t);
14ea22e9 1150 c = '\0'; /* This is to keep gcc quiet. */
2c6fe0bd
UD
1151 }
1152 else
1153 {
1154 STRING_ARG (str, char);
28f540f4 1155
14ea22e9 1156 c = inchar ();
2c6fe0bd
UD
1157 if (c == EOF)
1158 input_error ();
1159 }
28f540f4
RM
1160
1161 if (*f == '^')
1162 {
1163 ++f;
1164 not_in = 1;
1165 }
1166 else
1167 not_in = 0;
1168
b17178fc
RM
1169 /* Fill WP with byte flags indexed by character.
1170 We will use this flag map for matching input characters. */
1171 if (wpmax < UCHAR_MAX)
1172 {
1173 wpmax = UCHAR_MAX;
1174 wp = (char *) alloca (wpmax);
1175 }
1176 memset (wp, 0, UCHAR_MAX);
1177
1178 fc = *f;
1179 if (fc == ']' || fc == '-')
1180 {
1181 /* If ] or - appears before any char in the set, it is not
1182 the terminator or separator, but the first char in the
1183 set. */
1184 wp[fc] = 1;
1185 ++f;
1186 }
02ac66c5 1187
28f540f4
RM
1188 while ((fc = *f++) != '\0' && fc != ']')
1189 {
1d8dc429
RM
1190 if (fc == '-' && *f != '\0' && *f != ']' &&
1191 (unsigned char) f[-2] <= (unsigned char) *f)
b17178fc
RM
1192 {
1193 /* Add all characters from the one before the '-'
1194 up to (but not including) the next format char. */
1d8dc429
RM
1195 for (fc = f[-2]; fc < *f; ++fc)
1196 wp[fc] = 1;
b17178fc 1197 }
28f540f4 1198 else
b17178fc
RM
1199 /* Add the character to the flag map. */
1200 wp[fc] = 1;
28f540f4
RM
1201 }
1202 if (fc == '\0')
3867ee64 1203 {
2c6fe0bd
UD
1204 if (!(flags & LONG))
1205 ungetc (c, s);
3867ee64
RM
1206 conv_error();
1207 }
28f540f4 1208
2c6fe0bd 1209 if (flags & LONG)
28f540f4 1210 {
2c6fe0bd
UD
1211 wint_t val;
1212 int first = 1;
1213
1214 do
3867ee64 1215 {
2c6fe0bd
UD
1216 size_t cnt = 0;
1217 NEXT_WIDE_CHAR (first);
cd2bd591 1218 if (val <= 255 && wp[val] == not_in)
2c6fe0bd 1219 {
cd2bd591 1220 ungetc (val, s);
2c6fe0bd
UD
1221 break;
1222 }
1223 STRING_ADD_CHAR (wstr, val, wchar_t);
1224 if (width > 0)
1225 --width;
1226 first = 0;
3867ee64 1227 }
2c6fe0bd 1228 while (width != 0);
3867ee64 1229
2c6fe0bd
UD
1230 if (first)
1231 conv_error ();
28f540f4 1232
2c6fe0bd
UD
1233 if (!(flags & SUPPRESS))
1234 {
1235 *wstr = L'\0';
1236 ++done;
1237 }
1238 }
1239 else
28f540f4 1240 {
2c6fe0bd
UD
1241 num.ul = read_in - 1; /* -1 because we already read one char. */
1242 do
1243 {
1244 if (wp[c] == not_in)
1245 {
1246 ungetc (c, s);
1247 break;
1248 }
1249 STRING_ADD_CHAR (str, c, char);
1250 if (width > 0)
1251 --width;
1252 }
1253 while (width != 0 && inchar () != EOF);
1254
1255 if (read_in == num.ul)
1256 conv_error ();
1257
1258 if (!(flags & SUPPRESS))
1259 {
1260 *str = '\0';
1261 ++done;
1262 }
28f540f4
RM
1263 }
1264 break;
1265
1266 case 'p': /* Generic pointer. */
1267 base = 16;
1268 /* A PTR must be the same size as a `long int'. */
0793d348
RM
1269 flags &= ~(SHORT|LONGDBL);
1270 flags |= LONG;
96aa2d94 1271 number_signed = 0;
c53a89d4 1272 read_pointer = 1;
28f540f4 1273 goto number;
47b856a9
UD
1274
1275 default:
1276 /* If this is an unknown format character punt. */
27658f20 1277 conv_error ();
28f540f4
RM
1278 }
1279 }
1280
14ea22e9 1281 /* The last thing we saw int the format string was a white space.
05be689b
RM
1282 Consume the last white spaces. */
1283 if (skip_space)
3867ee64
RM
1284 {
1285 do
1286 c = inchar ();
1287 while (isspace (c));
1288 ungetc (c, s);
1289 }
05be689b 1290
7c713e28 1291 /* Unlock stream. */
c0fb8a56 1292 UNLOCK_STREAM (s);
aa1075ea 1293
3867ee64 1294 return done;
28f540f4
RM
1295}
1296
b2518f55
RM
1297#ifdef USE_IN_LIBIO
1298int
1299__vfscanf (FILE *s, const char *format, va_list argptr)
1300{
1301 return _IO_vfscanf (s, format, argptr, NULL);
1302}
96aa2d94 1303#endif
b2518f55
RM
1304
1305weak_alias (__vfscanf, vfscanf)