]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
* cppmacro.c (funlike_invocation_p): Don't step back over CPP_EOF.
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
0578f103 1/* CPP Library - lexical analysis.
e484a1cc 2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
0578f103 3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
6060326b 7 Single-pass line tokenization by Neil Booth, April 2000
0578f103 8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#include "config.h"
24#include "system.h"
0578f103 25#include "cpplib.h"
26#include "cpphash.h"
27
8330799c 28/* MULTIBYTE_CHARS support only works for native compilers.
29 ??? Ideally what we want is to model widechar support after
30 the current floating point support. */
31#ifdef CROSS_COMPILE
32#undef MULTIBYTE_CHARS
33#endif
34
35#ifdef MULTIBYTE_CHARS
36#include "mbchar.h"
37#include <locale.h>
38#endif
39
79bd622b 40/* Tokens with SPELL_STRING store their spelling in the token list,
41 and it's length in the token->val.name.len. */
42enum spell_type
241e762e 43{
79bd622b 44 SPELL_OPERATOR = 0,
45 SPELL_CHAR,
46 SPELL_IDENT,
8d27e472 47 SPELL_NUMBER,
79bd622b 48 SPELL_STRING,
49 SPELL_NONE
241e762e 50};
51
79bd622b 52struct token_spelling
241e762e 53{
79bd622b 54 enum spell_type category;
55 const unsigned char *name;
241e762e 56};
57
0ca849f9 58static const unsigned char *const digraph_spellings[] =
59{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
79bd622b 60
61#define OP(e, s) { SPELL_OPERATOR, U s },
62#define TK(e, s) { s, U STRINGX (e) },
0ca849f9 63static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
79bd622b 64#undef OP
65#undef TK
66
67#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
68#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
1c124f85 69#define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
e2f9a79f 70
1e0ef2fd 71static void handle_newline PARAMS ((cpp_reader *));
72static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
c808d026 73static cppchar_t get_effective_char PARAMS ((cpp_reader *));
338fa5f7 74
f80e83a9 75static int skip_block_comment PARAMS ((cpp_reader *));
f669338a 76static int skip_line_comment PARAMS ((cpp_reader *));
338fa5f7 77static void adjust_column PARAMS ((cpp_reader *));
435fb09b 78static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
66a5287e 79static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
78a11351 80static U_CHAR *parse_slow PARAMS ((cpp_reader *, const U_CHAR *, int,
81 unsigned int *));
82static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
79bd622b 83static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
338fa5f7 84static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
1e0ef2fd 85static bool trigraph_p PARAMS ((cpp_reader *));
d3f7919d 86static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *,
87 cppchar_t));
79bd622b 88static int name_p PARAMS ((cpp_reader *, const cpp_string *));
c8342759 89static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
90 const unsigned char *, unsigned int *));
83dcbb5c 91static tokenrun *next_tokenrun PARAMS ((tokenrun *));
e916a356 92
8330799c 93static unsigned int hex_digit_value PARAMS ((unsigned int));
4b31a107 94static _cpp_buff *new_buff PARAMS ((size_t));
bce8e0c0 95
f80e83a9 96/* Utility routine:
2c63d6c8 97
76faa4c0 98 Compares, the token TOKEN to the NUL-terminated string STRING.
99 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
f80e83a9 100int
76faa4c0 101cpp_ideq (token, string)
102 const cpp_token *token;
f80e83a9 103 const char *string;
104{
76faa4c0 105 if (token->type != CPP_NAME)
f80e83a9 106 return 0;
76faa4c0 107
c86dbc5b 108 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
bce8e0c0 109}
50fd6b48 110
1e0ef2fd 111/* Call when meeting a newline, assumed to be in buffer->cur[-1].
112 Returns with buffer->cur pointing to the character immediately
113 following the newline (combination). */
114static void
115handle_newline (pfile)
36a0aa7c 116 cpp_reader *pfile;
338fa5f7 117{
1e0ef2fd 118 cpp_buffer *buffer = pfile->buffer;
338fa5f7 119
1e0ef2fd 120 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
435fb09b 121 only accept CR-LF; maybe we should fall back to that behaviour? */
122 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
1e0ef2fd 123 buffer->cur++;
338fa5f7 124
1e0ef2fd 125 buffer->line_base = buffer->cur;
126 buffer->col_adjust = 0;
127 pfile->line++;
338fa5f7 128}
129
1e0ef2fd 130/* Subroutine of skip_escaped_newlines; called when a 3-character
131 sequence beginning with "??" is encountered. buffer->cur points to
132 the second '?'.
133
134 Warn if necessary, and returns true if the sequence forms a
135 trigraph and the trigraph should be honoured. */
136static bool
137trigraph_p (pfile)
0578f103 138 cpp_reader *pfile;
0578f103 139{
1e0ef2fd 140 cpp_buffer *buffer = pfile->buffer;
141 cppchar_t from_char = buffer->cur[1];
142 bool accept;
143
144 if (!_cpp_trigraph_map[from_char])
145 return false;
146
147 accept = CPP_OPTION (pfile, trigraphs);
148
f669338a 149 /* Don't warn about trigraphs in comments. */
150 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
0578f103 151 {
f80e83a9 152 if (accept)
73328dce 153 cpp_error_with_line (pfile, DL_WARNING,
154 pfile->line, CPP_BUF_COL (buffer) - 1,
155 "trigraph ??%c converted to %c",
156 (int) from_char,
157 (int) _cpp_trigraph_map[from_char]);
4b912310 158 else if (buffer->cur != buffer->last_Wtrigraphs)
159 {
160 buffer->last_Wtrigraphs = buffer->cur;
73328dce 161 cpp_error_with_line (pfile, DL_WARNING,
162 pfile->line, CPP_BUF_COL (buffer) - 1,
163 "trigraph ??%c ignored", (int) from_char);
4b912310 164 }
0578f103 165 }
338fa5f7 166
f80e83a9 167 return accept;
0578f103 168}
169
1e0ef2fd 170/* Skips any escaped newlines introduced by '?' or a '\\', assumed to
1c124f85 171 lie in buffer->cur[-1]. Returns the next byte, which will be in
172 buffer->cur[-1]. This routine performs preprocessing stages 1 and
173 2 of the ISO C standard. */
338fa5f7 174static cppchar_t
1e0ef2fd 175skip_escaped_newlines (pfile)
c808d026 176 cpp_reader *pfile;
0578f103 177{
c808d026 178 cpp_buffer *buffer = pfile->buffer;
1e0ef2fd 179 cppchar_t next = buffer->cur[-1];
c808d026 180
396ffa86 181 /* Only do this if we apply stages 1 and 2. */
182 if (!buffer->from_stage3)
f80e83a9 183 {
396ffa86 184 const unsigned char *saved_cur;
1e0ef2fd 185 cppchar_t next1;
396ffa86 186
187 do
338fa5f7 188 {
396ffa86 189 if (next == '?')
338fa5f7 190 {
435fb09b 191 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
1e0ef2fd 192 break;
396ffa86 193
1e0ef2fd 194 /* Translate the trigraph. */
195 next = _cpp_trigraph_map[buffer->cur[1]];
196 buffer->cur += 2;
435fb09b 197 if (next != '\\')
396ffa86 198 break;
396ffa86 199 }
200
435fb09b 201 if (buffer->cur == buffer->rlimit)
202 break;
203
1e0ef2fd 204 /* We have a backslash, and room for at least one more
205 character. Skip horizontal whitespace. */
206 saved_cur = buffer->cur;
396ffa86 207 do
1e0ef2fd 208 next1 = *buffer->cur++;
209 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
f80e83a9 210
396ffa86 211 if (!is_vspace (next1))
338fa5f7 212 {
1e0ef2fd 213 buffer->cur = saved_cur;
338fa5f7 214 break;
215 }
0578f103 216
1e0ef2fd 217 if (saved_cur != buffer->cur - 1
218 && !pfile->state.lexing_comment)
73328dce 219 cpp_error (pfile, DL_WARNING,
220 "backslash and newline separated by space");
338fa5f7 221
1e0ef2fd 222 handle_newline (pfile);
1c124f85 223 buffer->backup_to = buffer->cur;
1e0ef2fd 224 if (buffer->cur == buffer->rlimit)
225 {
73328dce 226 cpp_error (pfile, DL_PEDWARN,
227 "backslash-newline at end of file");
1e0ef2fd 228 next = EOF;
229 }
230 else
231 next = *buffer->cur++;
338fa5f7 232 }
396ffa86 233 while (next == '\\' || next == '?');
f80e83a9 234 }
0578f103 235
338fa5f7 236 return next;
0578f103 237}
238
338fa5f7 239/* Obtain the next character, after trigraph conversion and skipping
1e0ef2fd 240 an arbitrarily long string of escaped newlines. The common case of
241 no trigraphs or escaped newlines falls through quickly. On return,
1c124f85 242 buffer->backup_to points to where to return to if the character is
243 not to be processed. */
338fa5f7 244static cppchar_t
c808d026 245get_effective_char (pfile)
246 cpp_reader *pfile;
852d1b04 247{
435fb09b 248 cppchar_t next;
1c124f85 249 cpp_buffer *buffer = pfile->buffer;
338fa5f7 250
1c124f85 251 buffer->backup_to = buffer->cur;
435fb09b 252 next = *buffer->cur++;
253 if (__builtin_expect (next == '?' || next == '\\', 0))
254 next = skip_escaped_newlines (pfile);
338fa5f7 255
1c124f85 256 return next;
852d1b04 257}
258
338fa5f7 259/* Skip a C-style block comment. We find the end of the comment by
260 seeing if an asterisk is before every '/' we encounter. Returns
261 non-zero if comment terminated by EOF, zero otherwise. */
f80e83a9 262static int
263skip_block_comment (pfile)
0578f103 264 cpp_reader *pfile;
265{
f80e83a9 266 cpp_buffer *buffer = pfile->buffer;
63e1abce 267 cppchar_t c = EOF, prevc = EOF;
338fa5f7 268
f669338a 269 pfile->state.lexing_comment = 1;
338fa5f7 270 while (buffer->cur != buffer->rlimit)
0578f103 271 {
338fa5f7 272 prevc = c, c = *buffer->cur++;
273
338fa5f7 274 /* FIXME: For speed, create a new character class of characters
79bd622b 275 of interest inside block comments. */
338fa5f7 276 if (c == '?' || c == '\\')
1e0ef2fd 277 c = skip_escaped_newlines (pfile);
f80e83a9 278
338fa5f7 279 /* People like decorating comments with '*', so check for '/'
280 instead for efficiency. */
f80e83a9 281 if (c == '/')
0578f103 282 {
338fa5f7 283 if (prevc == '*')
284 break;
f80e83a9 285
338fa5f7 286 /* Warn about potential nested comments, but not if the '/'
3fb1e43b 287 comes immediately before the true comment delimiter.
f80e83a9 288 Don't bother to get it right across escaped newlines. */
338fa5f7 289 if (CPP_OPTION (pfile, warn_comments)
1e0ef2fd 290 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
73328dce 291 cpp_error_with_line (pfile, DL_WARNING,
292 pfile->line, CPP_BUF_COL (buffer),
293 "\"/*\" within comment");
0578f103 294 }
78719282 295 else if (is_vspace (c))
1e0ef2fd 296 handle_newline (pfile);
b86584f6 297 else if (c == '\t')
338fa5f7 298 adjust_column (pfile);
0578f103 299 }
f80e83a9 300
f669338a 301 pfile->state.lexing_comment = 0;
338fa5f7 302 return c != '/' || prevc != '*';
0578f103 303}
304
1c124f85 305/* Skip a C++ line comment, leaving buffer->cur pointing to the
306 terminating newline. Handles escaped newlines. Returns non-zero
307 if a multiline comment. */
f80e83a9 308static int
f669338a 309skip_line_comment (pfile)
310 cpp_reader *pfile;
0578f103 311{
f669338a 312 cpp_buffer *buffer = pfile->buffer;
1ea7ed21 313 unsigned int orig_line = pfile->line;
338fa5f7 314 cppchar_t c;
f80e83a9 315
f669338a 316 pfile->state.lexing_comment = 1;
338fa5f7 317 do
f80e83a9 318 {
338fa5f7 319 if (buffer->cur == buffer->rlimit)
1c124f85 320 goto at_eof;
f80e83a9 321
338fa5f7 322 c = *buffer->cur++;
323 if (c == '?' || c == '\\')
1e0ef2fd 324 c = skip_escaped_newlines (pfile);
f80e83a9 325 }
338fa5f7 326 while (!is_vspace (c));
0578f103 327
1c124f85 328 /* Step back over the newline, except at EOF. */
329 buffer->cur--;
330 at_eof:
331
f669338a 332 pfile->state.lexing_comment = 0;
1ea7ed21 333 return orig_line != pfile->line;
f80e83a9 334}
0578f103 335
338fa5f7 336/* pfile->buffer->cur is one beyond the \t character. Update
337 col_adjust so we track the column correctly. */
b86584f6 338static void
338fa5f7 339adjust_column (pfile)
b86584f6 340 cpp_reader *pfile;
b86584f6 341{
338fa5f7 342 cpp_buffer *buffer = pfile->buffer;
343 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
b86584f6 344
345 /* Round it up to multiple of the tabstop, but subtract 1 since the
346 tab itself occupies a character position. */
338fa5f7 347 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
348 - col % CPP_OPTION (pfile, tabstop)) - 1;
b86584f6 349}
350
338fa5f7 351/* Skips whitespace, saving the next non-whitespace character.
352 Adjusts pfile->col_adjust to account for tabs. Without this,
353 tokens might be assigned an incorrect column. */
435fb09b 354static int
338fa5f7 355skip_whitespace (pfile, c)
f80e83a9 356 cpp_reader *pfile;
338fa5f7 357 cppchar_t c;
f80e83a9 358{
359 cpp_buffer *buffer = pfile->buffer;
338fa5f7 360 unsigned int warned = 0;
0578f103 361
338fa5f7 362 do
f80e83a9 363 {
78719282 364 /* Horizontal space always OK. */
365 if (c == ' ')
338fa5f7 366 ;
78719282 367 else if (c == '\t')
338fa5f7 368 adjust_column (pfile);
369 /* Just \f \v or \0 left. */
78719282 370 else if (c == '\0')
f80e83a9 371 {
435fb09b 372 if (buffer->cur - 1 == buffer->rlimit)
373 return 0;
78719282 374 if (!warned)
338fa5f7 375 {
73328dce 376 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
338fa5f7 377 warned = 1;
378 }
0578f103 379 }
79bd622b 380 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
73328dce 381 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
382 CPP_BUF_COL (buffer),
383 "%s in preprocessing directive",
384 c == '\f' ? "form feed" : "vertical tab");
338fa5f7 385
338fa5f7 386 c = *buffer->cur++;
0578f103 387 }
2c0e001b 388 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
338fa5f7 389 while (is_nvspace (c));
390
1c124f85 391 buffer->cur--;
435fb09b 392 return 1;
f80e83a9 393}
0578f103 394
79bd622b 395/* See if the characters of a number token are valid in a name (no
396 '.', '+' or '-'). */
397static int
398name_p (pfile, string)
399 cpp_reader *pfile;
400 const cpp_string *string;
401{
402 unsigned int i;
403
404 for (i = 0; i < string->len; i++)
405 if (!is_idchar (string->text[i]))
406 return 0;
407
408 return 1;
409}
410
66a5287e 411/* Parse an identifier, skipping embedded backslash-newlines. This is
412 a critical inner loop. The common case is an identifier which has
413 not been split by backslash-newline, does not contain a dollar
414 sign, and has already been scanned (roughly 10:1 ratio of
415 seen:unseen identifiers in normal code; the distribution is
416 Poisson-like). Second most common case is a new identifier, not
417 split and no dollar sign. The other possibilities are rare and
78a11351 418 have been relegated to parse_slow. */
338fa5f7 419static cpp_hashnode *
66a5287e 420parse_identifier (pfile)
0578f103 421 cpp_reader *pfile;
0578f103 422{
79bd622b 423 cpp_hashnode *result;
78a11351 424 const U_CHAR *cur, *base;
66a5287e 425
426 /* Fast-path loop. Skim over a normal identifier.
427 N.B. ISIDNUM does not include $. */
435fb09b 428 cur = pfile->buffer->cur;
429 while (ISIDNUM (*cur))
66a5287e 430 cur++;
66a5287e 431
432 /* Check for slow-path cases. */
435fb09b 433 if (*cur == '?' || *cur == '\\' || *cur == '$')
78a11351 434 {
435 unsigned int len;
436
437 base = parse_slow (pfile, cur, 0, &len);
438 result = (cpp_hashnode *)
439 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
440 }
66a5287e 441 else
442 {
78a11351 443 base = pfile->buffer->cur - 1;
444 pfile->buffer->cur = cur;
66a5287e 445 result = (cpp_hashnode *)
446 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
66a5287e 447 }
448
449 /* Rarely, identifiers require diagnostics when lexed.
450 XXX Has to be forced out of the fast path. */
451 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
452 && !pfile->state.skipping, 0))
453 {
454 /* It is allowed to poison the same identifier twice. */
455 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
73328dce 456 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
66a5287e 457 NODE_NAME (result));
458
459 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
460 replacement list of a variadic macro. */
461 if (result == pfile->spec_nodes.n__VA_ARGS__
462 && !pfile->state.va_args_ok)
73328dce 463 cpp_error (pfile, DL_PEDWARN,
66a5287e 464 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
465 }
466
467 return result;
468}
469
78a11351 470/* Slow path. This handles numbers and identifiers which have been
471 split, or contain dollar signs. The part of the token from
472 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
473 1 if it's a number, and 2 if it has a leading period. Returns a
474 pointer to the token's NUL-terminated spelling in permanent
475 storage, and sets PLEN to its length. */
476static U_CHAR *
477parse_slow (pfile, cur, number_p, plen)
66a5287e 478 cpp_reader *pfile;
479 const U_CHAR *cur;
78a11351 480 int number_p;
481 unsigned int *plen;
66a5287e 482{
338fa5f7 483 cpp_buffer *buffer = pfile->buffer;
66a5287e 484 const U_CHAR *base = buffer->cur - 1;
0d086e18 485 struct obstack *stack = &pfile->hash_table->stack;
78a11351 486 unsigned int c, prevc, saw_dollar = 0;
487
488 /* Place any leading period. */
489 if (number_p == 2)
490 obstack_1grow (stack, '.');
66a5287e 491
492 /* Copy the part of the token which is known to be okay. */
493 obstack_grow (stack, base, cur - base);
f80e83a9 494
66a5287e 495 /* Now process the part which isn't. We are looking at one of
496 '$', '\\', or '?' on entry to this loop. */
78a11351 497 prevc = cur[-1];
66a5287e 498 c = *cur++;
499 buffer->cur = cur;
78a11351 500 for (;;)
f80e83a9 501 {
78a11351 502 /* Potential escaped newline? */
503 buffer->backup_to = buffer->cur - 1;
504 if (c == '?' || c == '\\')
505 c = skip_escaped_newlines (pfile);
506
507 if (!is_idchar (c))
508 {
509 if (!number_p)
510 break;
511 if (c != '.' && !VALID_SIGN (c, prevc))
512 break;
513 }
514
515 /* Handle normal identifier characters in this loop. */
516 do
66a5287e 517 {
78a11351 518 prevc = c;
66a5287e 519 obstack_1grow (stack, c);
0578f103 520
66a5287e 521 if (c == '$')
522 saw_dollar++;
71aa9da4 523
66a5287e 524 c = *buffer->cur++;
525 }
78a11351 526 while (is_idchar (c));
f80e83a9 527 }
338fa5f7 528
435fb09b 529 /* Step back over the unwanted char. */
1c124f85 530 BACKUP ();
79bd622b 531
20dd417a 532 /* $ is not an identifier character in the standard, but is commonly
338fa5f7 533 accepted as an extension. Don't warn about it in skipped
534 conditional blocks. */
5e878517 535 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
73328dce 536 cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
338fa5f7 537
78a11351 538 /* Identifiers and numbers are null-terminated. */
539 *plen = obstack_object_size (stack);
0d086e18 540 obstack_1grow (stack, '\0');
78a11351 541 return obstack_finish (stack);
0578f103 542}
543
e484a1cc 544/* Parse a number, beginning with character C, skipping embedded
545 backslash-newlines. LEADING_PERIOD is non-zero if there was a "."
546 before C. Place the result in NUMBER. */
0578f103 547static void
78a11351 548parse_number (pfile, number, leading_period)
0578f103 549 cpp_reader *pfile;
338fa5f7 550 cpp_string *number;
79bd622b 551 int leading_period;
0578f103 552{
78a11351 553 const U_CHAR *cur;
0578f103 554
78a11351 555 /* Fast-path loop. Skim over a normal number.
556 N.B. ISIDNUM does not include $. */
557 cur = pfile->buffer->cur;
558 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
559 cur++;
f669338a 560
78a11351 561 /* Check for slow-path cases. */
562 if (*cur == '?' || *cur == '\\' || *cur == '$')
563 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
564 else
f80e83a9 565 {
78a11351 566 const U_CHAR *base = pfile->buffer->cur - 1;
567 U_CHAR *dest;
338fa5f7 568
78a11351 569 number->len = cur - base + leading_period;
570 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
571 dest[number->len] = '\0';
572 number->text = dest;
0578f103 573
78a11351 574 if (leading_period)
575 *dest++ = '.';
576 memcpy (dest, base, cur - base);
577 pfile->buffer->cur = cur;
0578f103 578 }
338fa5f7 579}
580
79bd622b 581/* Subroutine of parse_string. */
582static int
583unescaped_terminator_p (pfile, dest)
584 cpp_reader *pfile;
585 const unsigned char *dest;
586{
587 const unsigned char *start, *temp;
588
589 /* In #include-style directives, terminators are not escapeable. */
590 if (pfile->state.angled_headers)
591 return 1;
592
1fdf6039 593 start = BUFF_FRONT (pfile->u_buff);
79bd622b 594
595 /* An odd number of consecutive backslashes represents an escaped
596 terminator. */
597 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
598 ;
599
600 return ((dest - temp) & 1) == 0;
601}
602
338fa5f7 603/* Parses a string, character constant, or angle-bracketed header file
4b0c16ee 604 name. Handles embedded trigraphs and escaped newlines. The stored
605 string is guaranteed NUL-terminated, but it is not guaranteed that
606 this is the first NUL since embedded NULs are preserved.
0578f103 607
1e0ef2fd 608 When this function returns, buffer->cur points to the next
609 character to be processed. */
f80e83a9 610static void
338fa5f7 611parse_string (pfile, token, terminator)
0578f103 612 cpp_reader *pfile;
f80e83a9 613 cpp_token *token;
338fa5f7 614 cppchar_t terminator;
0578f103 615{
f80e83a9 616 cpp_buffer *buffer = pfile->buffer;
79bd622b 617 unsigned char *dest, *limit;
338fa5f7 618 cppchar_t c;
34cf330f 619 bool warned_nulls = false;
338fa5f7 620
1fdf6039 621 dest = BUFF_FRONT (pfile->u_buff);
622 limit = BUFF_LIMIT (pfile->u_buff);
79bd622b 623
338fa5f7 624 for (;;)
0578f103 625 {
1e0ef2fd 626 /* We need room for another char, possibly the terminating NUL. */
1fdf6039 627 if ((size_t) (limit - dest) < 1)
628 {
629 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
e6a5f963 630 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
1fdf6039 631 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
632 limit = BUFF_LIMIT (pfile->u_buff);
633 }
4b0c16ee 634
338fa5f7 635 /* Handle trigraphs, escaped newlines etc. */
1e0ef2fd 636 c = *buffer->cur++;
338fa5f7 637 if (c == '?' || c == '\\')
1e0ef2fd 638 c = skip_escaped_newlines (pfile);
0578f103 639
1e0ef2fd 640 if (c == terminator)
0578f103 641 {
1e0ef2fd 642 if (unescaped_terminator_p (pfile, dest))
643 break;
338fa5f7 644 }
645 else if (is_vspace (c))
646 {
34cf330f 647 /* No string literal may extend over multiple lines. In
648 assembly language, suppress the error except for <>
649 includes. This is a kludge around not knowing where
650 comments are. */
651 unterminated:
652 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
73328dce 653 cpp_error (pfile, DL_ERROR, "missing terminating %c character",
654 terminator);
34cf330f 655 buffer->cur--;
656 break;
338fa5f7 657 }
435fb09b 658 else if (c == '\0')
338fa5f7 659 {
435fb09b 660 if (buffer->cur - 1 == buffer->rlimit)
34cf330f 661 goto unterminated;
435fb09b 662 if (!warned_nulls)
663 {
664 warned_nulls = true;
73328dce 665 cpp_error (pfile, DL_WARNING,
666 "null character(s) preserved in literal");
435fb09b 667 }
0578f103 668 }
0578f103 669
79bd622b 670 *dest++ = c;
0578f103 671 }
672
4b0c16ee 673 *dest = '\0';
0578f103 674
1fdf6039 675 token->val.str.text = BUFF_FRONT (pfile->u_buff);
676 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
677 BUFF_FRONT (pfile->u_buff) = dest + 1;
338fa5f7 678}
f80e83a9 679
79bd622b 680/* The stored comment includes the comment start and any terminator. */
2c63d6c8 681static void
d3f7919d 682save_comment (pfile, token, from, type)
338fa5f7 683 cpp_reader *pfile;
f80e83a9 684 cpp_token *token;
685 const unsigned char *from;
d3f7919d 686 cppchar_t type;
2c63d6c8 687{
f80e83a9 688 unsigned char *buffer;
d3f7919d 689 unsigned int len, clen;
338fa5f7 690
f0495c2c 691 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1c124f85 692
a543b315 693 /* C++ comments probably (not definitely) have moved past a new
694 line, which we don't want to save in the comment. */
1c124f85 695 if (is_vspace (pfile->buffer->cur[-1]))
a543b315 696 len--;
d3f7919d 697
698 /* If we are currently in a directive, then we need to store all
699 C++ comments as C comments internally, and so we need to
700 allocate a little extra space in that case.
701
702 Note that the only time we encounter a directive here is
703 when we are saving comments in a "#define". */
704 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
705
706 buffer = _cpp_unaligned_alloc (pfile, clen);
f80e83a9 707
f80e83a9 708 token->type = CPP_COMMENT;
d3f7919d 709 token->val.str.len = clen;
338fa5f7 710 token->val.str.text = buffer;
0578f103 711
f0495c2c 712 buffer[0] = '/';
713 memcpy (buffer + 1, from, len - 1);
d3f7919d 714
715 /* Finish conversion to a C comment, if necessary. */
716 if (pfile->state.in_directive && type == '/')
717 {
718 buffer[1] = '*';
719 buffer[clen - 2] = '*';
720 buffer[clen - 1] = '/';
721 }
338fa5f7 722}
0578f103 723
83dcbb5c 724/* Allocate COUNT tokens for RUN. */
725void
726_cpp_init_tokenrun (run, count)
727 tokenrun *run;
728 unsigned int count;
729{
730 run->base = xnewvec (cpp_token, count);
731 run->limit = run->base + count;
732 run->next = NULL;
733}
734
735/* Returns the next tokenrun, or creates one if there is none. */
736static tokenrun *
737next_tokenrun (run)
738 tokenrun *run;
739{
740 if (run->next == NULL)
741 {
742 run->next = xnew (tokenrun);
fb5ab82c 743 run->next->prev = run;
83dcbb5c 744 _cpp_init_tokenrun (run->next, 250);
745 }
746
747 return run->next;
748}
749
f9b5f742 750/* Allocate a single token that is invalidated at the same time as the
751 rest of the tokens on the line. Has its line and col set to the
752 same as the last lexed token, so that diagnostics appear in the
753 right place. */
754cpp_token *
755_cpp_temp_token (pfile)
756 cpp_reader *pfile;
757{
758 cpp_token *old, *result;
759
760 old = pfile->cur_token - 1;
761 if (pfile->cur_token == pfile->cur_run->limit)
762 {
763 pfile->cur_run = next_tokenrun (pfile->cur_run);
764 pfile->cur_token = pfile->cur_run->base;
765 }
766
767 result = pfile->cur_token++;
768 result->line = old->line;
769 result->col = old->col;
770 return result;
771}
772
10b4496a 773/* Lex a token into RESULT (external interface). Takes care of issues
774 like directive handling, token lookahead, multiple include
3fb1e43b 775 optimization and skipping. */
c00e481c 776const cpp_token *
777_cpp_lex_token (pfile)
0578f103 778 cpp_reader *pfile;
83dcbb5c 779{
fb5ab82c 780 cpp_token *result;
83dcbb5c 781
fb5ab82c 782 for (;;)
83dcbb5c 783 {
fb5ab82c 784 if (pfile->cur_token == pfile->cur_run->limit)
83dcbb5c 785 {
fb5ab82c 786 pfile->cur_run = next_tokenrun (pfile->cur_run);
787 pfile->cur_token = pfile->cur_run->base;
83dcbb5c 788 }
789
fb5ab82c 790 if (pfile->lookaheads)
10b4496a 791 {
792 pfile->lookaheads--;
793 result = pfile->cur_token++;
794 }
fb5ab82c 795 else
10b4496a 796 result = _cpp_lex_direct (pfile);
fb5ab82c 797
798 if (result->flags & BOL)
83dcbb5c 799 {
fb5ab82c 800 /* Is this a directive. If _cpp_handle_directive returns
801 false, it is an assembler #. */
802 if (result->type == CPP_HASH
d6af0368 803 /* 6.10.3 p 11: Directives in a list of macro arguments
804 gives undefined behavior. This implementation
805 handles the directive as normal. */
806 && pfile->state.parsing_args != 1
fb5ab82c 807 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
808 continue;
5621a364 809 if (pfile->cb.line_change && !pfile->state.skipping)
810 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
83dcbb5c 811 }
83dcbb5c 812
fb5ab82c 813 /* We don't skip tokens in directives. */
814 if (pfile->state.in_directive)
815 break;
83dcbb5c 816
fb5ab82c 817 /* Outside a directive, invalidate controlling macros. At file
10b4496a 818 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
fb5ab82c 819 get here and MI optimisation works. */
83dcbb5c 820 pfile->mi_valid = false;
fb5ab82c 821
822 if (!pfile->state.skipping || result->type == CPP_EOF)
823 break;
83dcbb5c 824 }
825
c00e481c 826 return result;
83dcbb5c 827}
828
1c124f85 829#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
830 do { \
831 if (get_effective_char (pfile) == CHAR) \
832 result->type = THEN_TYPE; \
833 else \
834 { \
835 BACKUP (); \
836 result->type = ELSE_TYPE; \
837 } \
838 } while (0)
839
10b4496a 840/* Lex a token into pfile->cur_token, which is also incremented, to
841 get diagnostics pointing to the correct location.
842
843 Does not handle issues such as token lookahead, multiple-include
844 optimisation, directives, skipping etc. This function is only
845 suitable for use by _cpp_lex_token, and in special cases like
846 lex_expansion_token which doesn't care for any of these issues.
847
848 When meeting a newline, returns CPP_EOF if parsing a directive,
849 otherwise returns to the start of the token buffer if permissible.
850 Returns the location of the lexed token. */
851cpp_token *
852_cpp_lex_direct (pfile)
83dcbb5c 853 cpp_reader *pfile;
0578f103 854{
338fa5f7 855 cppchar_t c;
230f0943 856 cpp_buffer *buffer;
338fa5f7 857 const unsigned char *comment_start;
10b4496a 858 cpp_token *result = pfile->cur_token++;
0653b94e 859
83dcbb5c 860 fresh_line:
230f0943 861 buffer = pfile->buffer;
8c2e2fc5 862 result->flags = buffer->saved_flags;
863 buffer->saved_flags = 0;
83dcbb5c 864 update_tokens_line:
36a0aa7c 865 result->line = pfile->line;
f80e83a9 866
83dcbb5c 867 skipped_white:
1c124f85 868 c = *buffer->cur++;
83dcbb5c 869 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
83dcbb5c 870
871 trigraph:
338fa5f7 872 switch (c)
0578f103 873 {
435fb09b 874 case ' ': case '\t': case '\f': case '\v': case '\0':
875 result->flags |= PREV_WHITE;
876 if (skip_whitespace (pfile, c))
877 goto skipped_white;
878
879 /* EOF. */
880 buffer->cur--;
fb5ab82c 881 buffer->saved_flags = BOL;
83dcbb5c 882 if (!pfile->state.parsing_args && !pfile->state.in_directive)
4dfe8b74 883 {
fb5ab82c 884 if (buffer->cur != buffer->line_base)
83dcbb5c 885 {
886 /* Non-empty files should end in a newline. Don't warn
887 for command line and _Pragma buffers. */
888 if (!buffer->from_stage3)
73328dce 889 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
1e0ef2fd 890 handle_newline (pfile);
5475a165 891 }
fb5ab82c 892
893 /* Don't pop the last buffer. */
894 if (buffer->prev)
895 {
896 unsigned char stop = buffer->return_at_eof;
897
898 _cpp_pop_buffer (pfile);
899 if (!stop)
900 goto fresh_line;
901 }
4dfe8b74 902 }
338fa5f7 903 result->type = CPP_EOF;
83dcbb5c 904 break;
0578f103 905
338fa5f7 906 case '\n': case '\r':
1e0ef2fd 907 handle_newline (pfile);
fb5ab82c 908 buffer->saved_flags = BOL;
909 if (! pfile->state.in_directive)
0578f103 910 {
f9b5f742 911 if (pfile->state.parsing_args == 2)
912 buffer->saved_flags |= PREV_WHITE;
fb5ab82c 913 if (!pfile->keep_tokens)
914 {
915 pfile->cur_run = &pfile->base_run;
916 result = pfile->base_run.base;
917 pfile->cur_token = result + 1;
918 }
919 goto fresh_line;
0578f103 920 }
83dcbb5c 921 result->type = CPP_EOF;
922 break;
732cb4c9 923
338fa5f7 924 case '?':
925 case '\\':
926 /* These could start an escaped newline, or '?' a trigraph. Let
927 skip_escaped_newlines do all the work. */
928 {
1ea7ed21 929 unsigned int line = pfile->line;
338fa5f7 930
1e0ef2fd 931 c = skip_escaped_newlines (pfile);
1ea7ed21 932 if (line != pfile->line)
1e0ef2fd 933 {
1c124f85 934 buffer->cur--;
1e0ef2fd 935 /* We had at least one escaped newline of some sort.
936 Update the token's line and column. */
83dcbb5c 937 goto update_tokens_line;
1e0ef2fd 938 }
1c124f85 939 }
338fa5f7 940
1c124f85 941 /* We are either the original '?' or '\\', or a trigraph. */
942 if (c == '?')
338fa5f7 943 result->type = CPP_QUERY;
1c124f85 944 else if (c == '\\')
945 goto random_char;
946 else
947 goto trigraph;
338fa5f7 948 break;
732cb4c9 949
338fa5f7 950 case '0': case '1': case '2': case '3': case '4':
951 case '5': case '6': case '7': case '8': case '9':
952 result->type = CPP_NUMBER;
78a11351 953 parse_number (pfile, &result->val.str, 0);
338fa5f7 954 break;
732cb4c9 955
78c551ad 956 case 'L':
957 /* 'L' may introduce wide characters or strings. */
958 {
959 const unsigned char *pos = buffer->cur;
338fa5f7 960
78c551ad 961 c = get_effective_char (pfile);
962 if (c == '\'' || c == '"')
963 {
964 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
965 parse_string (pfile, result, c);
966 break;
967 }
968 buffer->cur = pos;
969 }
970 /* Fall through. */
971
972 start_ident:
338fa5f7 973 case '_':
974 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
975 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
976 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
977 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
978 case 'y': case 'z':
979 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
78c551ad 980 case 'G': case 'H': case 'I': case 'J': case 'K':
338fa5f7 981 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
982 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
983 case 'Y': case 'Z':
984 result->type = CPP_NAME;
66a5287e 985 result->val.node = parse_identifier (pfile);
338fa5f7 986
338fa5f7 987 /* Convert named operators to their proper types. */
78c551ad 988 if (result->val.node->flags & NODE_OPERATOR)
338fa5f7 989 {
990 result->flags |= NAMED_OP;
79bd622b 991 result->type = result->val.node->value.operator;
338fa5f7 992 }
993 break;
994
995 case '\'':
996 case '"':
997 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
338fa5f7 998 parse_string (pfile, result, c);
999 break;
f80e83a9 1000
338fa5f7 1001 case '/':
f0495c2c 1002 /* A potential block or line comment. */
1003 comment_start = buffer->cur;
c808d026 1004 c = get_effective_char (pfile);
1c124f85 1005
f0495c2c 1006 if (c == '*')
1007 {
338fa5f7 1008 if (skip_block_comment (pfile))
73328dce 1009 cpp_error (pfile, DL_ERROR, "unterminated comment");
338fa5f7 1010 }
1c124f85 1011 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1012 || CPP_IN_SYSTEM_HEADER (pfile)))
338fa5f7 1013 {
5db5d057 1014 /* Warn about comments only if pedantically GNUC89, and not
1015 in system headers. */
1016 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
66914e49 1017 && ! buffer->warned_cplusplus_comments)
f80e83a9 1018 {
73328dce 1019 cpp_error (pfile, DL_PEDWARN,
f0495c2c 1020 "C++ style comments are not allowed in ISO C89");
73328dce 1021 cpp_error (pfile, DL_PEDWARN,
1022 "(this will be reported only once per input file)");
f0495c2c 1023 buffer->warned_cplusplus_comments = 1;
1024 }
338fa5f7 1025
e1caf668 1026 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
73328dce 1027 cpp_error (pfile, DL_WARNING, "multi-line comment");
f0495c2c 1028 }
1c124f85 1029 else if (c == '=')
1030 {
1031 result->type = CPP_DIV_EQ;
1032 break;
1033 }
1034 else
1035 {
1036 BACKUP ();
1037 result->type = CPP_DIV;
1038 break;
1039 }
338fa5f7 1040
f0495c2c 1041 if (!pfile->state.save_comments)
1042 {
1043 result->flags |= PREV_WHITE;
83dcbb5c 1044 goto update_tokens_line;
338fa5f7 1045 }
f0495c2c 1046
1047 /* Save the comment as a token in its own right. */
d3f7919d 1048 save_comment (pfile, result, comment_start, c);
fb5ab82c 1049 break;
338fa5f7 1050
1051 case '<':
1052 if (pfile->state.angled_headers)
1053 {
1054 result->type = CPP_HEADER_NAME;
1c124f85 1055 parse_string (pfile, result, '>');
1056 break;
338fa5f7 1057 }
0578f103 1058
c808d026 1059 c = get_effective_char (pfile);
338fa5f7 1060 if (c == '=')
1c124f85 1061 result->type = CPP_LESS_EQ;
338fa5f7 1062 else if (c == '<')
1c124f85 1063 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
338fa5f7 1064 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1c124f85 1065 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
338fa5f7 1066 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1067 {
1c124f85 1068 result->type = CPP_OPEN_SQUARE;
338fa5f7 1069 result->flags |= DIGRAPH;
1070 }
1071 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1072 {
1c124f85 1073 result->type = CPP_OPEN_BRACE;
338fa5f7 1074 result->flags |= DIGRAPH;
1075 }
1c124f85 1076 else
1077 {
1078 BACKUP ();
1079 result->type = CPP_LESS;
1080 }
338fa5f7 1081 break;
1082
1083 case '>':
c808d026 1084 c = get_effective_char (pfile);
338fa5f7 1085 if (c == '=')
1c124f85 1086 result->type = CPP_GREATER_EQ;
338fa5f7 1087 else if (c == '>')
1c124f85 1088 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
338fa5f7 1089 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1c124f85 1090 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1091 else
338fa5f7 1092 {
1c124f85 1093 BACKUP ();
1094 result->type = CPP_GREATER;
338fa5f7 1095 }
1096 break;
1097
f669338a 1098 case '%':
1c124f85 1099 c = get_effective_char (pfile);
1100 if (c == '=')
1101 result->type = CPP_MOD_EQ;
1102 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1103 {
1104 result->flags |= DIGRAPH;
1105 result->type = CPP_HASH;
1106 if (get_effective_char (pfile) == '%')
1107 {
1108 const unsigned char *pos = buffer->cur;
1109
1110 if (get_effective_char (pfile) == ':')
1111 result->type = CPP_PASTE;
1112 else
1113 buffer->cur = pos - 1;
1114 }
1115 else
1116 BACKUP ();
1117 }
1118 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1119 {
1120 result->flags |= DIGRAPH;
1121 result->type = CPP_CLOSE_BRACE;
1122 }
1123 else
1124 {
1125 BACKUP ();
1126 result->type = CPP_MOD;
1127 }
338fa5f7 1128 break;
1129
f669338a 1130 case '.':
1c124f85 1131 result->type = CPP_DOT;
1132 c = get_effective_char (pfile);
1133 if (c == '.')
1134 {
1135 const unsigned char *pos = buffer->cur;
1136
1137 if (get_effective_char (pfile) == '.')
1138 result->type = CPP_ELLIPSIS;
1139 else
1140 buffer->cur = pos - 1;
1141 }
1142 /* All known character sets have 0...9 contiguous. */
66a33570 1143 else if (ISDIGIT (c))
1c124f85 1144 {
1145 result->type = CPP_NUMBER;
78a11351 1146 parse_number (pfile, &result->val.str, 1);
1c124f85 1147 }
1148 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1149 result->type = CPP_DOT_STAR;
1150 else
1151 BACKUP ();
338fa5f7 1152 break;
0578f103 1153
338fa5f7 1154 case '+':
c808d026 1155 c = get_effective_char (pfile);
1c124f85 1156 if (c == '+')
1157 result->type = CPP_PLUS_PLUS;
1158 else if (c == '=')
1159 result->type = CPP_PLUS_EQ;
1160 else
1161 {
1162 BACKUP ();
1163 result->type = CPP_PLUS;
1164 }
338fa5f7 1165 break;
ac0749c7 1166
338fa5f7 1167 case '-':
c808d026 1168 c = get_effective_char (pfile);
338fa5f7 1169 if (c == '>')
1170 {
1c124f85 1171 result->type = CPP_DEREF;
1172 if (CPP_OPTION (pfile, cplusplus))
1173 {
1174 if (get_effective_char (pfile) == '*')
1175 result->type = CPP_DEREF_STAR;
1176 else
1177 BACKUP ();
1178 }
338fa5f7 1179 }
338fa5f7 1180 else if (c == '-')
1c124f85 1181 result->type = CPP_MINUS_MINUS;
1182 else if (c == '=')
1183 result->type = CPP_MINUS_EQ;
1184 else
1185 {
1186 BACKUP ();
1187 result->type = CPP_MINUS;
1188 }
338fa5f7 1189 break;
0578f103 1190
338fa5f7 1191 case '&':
c808d026 1192 c = get_effective_char (pfile);
1c124f85 1193 if (c == '&')
1194 result->type = CPP_AND_AND;
1195 else if (c == '=')
1196 result->type = CPP_AND_EQ;
1197 else
1198 {
1199 BACKUP ();
1200 result->type = CPP_AND;
1201 }
338fa5f7 1202 break;
1203
338fa5f7 1204 case '|':
c808d026 1205 c = get_effective_char (pfile);
1c124f85 1206 if (c == '|')
1207 result->type = CPP_OR_OR;
1208 else if (c == '=')
1209 result->type = CPP_OR_EQ;
1210 else
1211 {
1212 BACKUP ();
1213 result->type = CPP_OR;
1214 }
338fa5f7 1215 break;
0578f103 1216
338fa5f7 1217 case ':':
c808d026 1218 c = get_effective_char (pfile);
338fa5f7 1219 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1c124f85 1220 result->type = CPP_SCOPE;
338fa5f7 1221 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1222 {
1223 result->flags |= DIGRAPH;
1c124f85 1224 result->type = CPP_CLOSE_SQUARE;
1225 }
1226 else
1227 {
1228 BACKUP ();
1229 result->type = CPP_COLON;
338fa5f7 1230 }
1231 break;
0578f103 1232
1c124f85 1233 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1234 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1235 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1236 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1237 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1238
338fa5f7 1239 case '~': result->type = CPP_COMPL; break;
1240 case ',': result->type = CPP_COMMA; break;
1241 case '(': result->type = CPP_OPEN_PAREN; break;
1242 case ')': result->type = CPP_CLOSE_PAREN; break;
1243 case '[': result->type = CPP_OPEN_SQUARE; break;
1244 case ']': result->type = CPP_CLOSE_SQUARE; break;
1245 case '{': result->type = CPP_OPEN_BRACE; break;
1246 case '}': result->type = CPP_CLOSE_BRACE; break;
1247 case ';': result->type = CPP_SEMICOLON; break;
1248
9ee99ac6 1249 /* @ is a punctuator in Objective C. */
1250 case '@': result->type = CPP_ATSIGN; break;
338fa5f7 1251
78c551ad 1252 case '$':
1253 if (CPP_OPTION (pfile, dollars_in_ident))
1254 goto start_ident;
1255 /* Fall through... */
1256
338fa5f7 1257 random_char:
1258 default:
1259 result->type = CPP_OTHER;
33344a1c 1260 result->val.c = c;
338fa5f7 1261 break;
1262 }
fb5ab82c 1263
1264 return result;
338fa5f7 1265}
1266
e484a1cc 1267/* An upper bound on the number of bytes needed to spell TOKEN,
79bd622b 1268 including preceding whitespace. */
1269unsigned int
1270cpp_token_len (token)
1271 const cpp_token *token;
338fa5f7 1272{
79bd622b 1273 unsigned int len;
cfad5579 1274
79bd622b 1275 switch (TOKEN_SPELL (token))
f80e83a9 1276 {
c86dbc5b 1277 default: len = 0; break;
8d27e472 1278 case SPELL_NUMBER:
c86dbc5b 1279 case SPELL_STRING: len = token->val.str.len; break;
1280 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
f80e83a9 1281 }
8d27e472 1282 /* 1 for whitespace, 4 for comment delimiters. */
79bd622b 1283 return len + 5;
cfad5579 1284}
1285
f80e83a9 1286/* Write the spelling of a token TOKEN to BUFFER. The buffer must
c5ea33a8 1287 already contain the enough space to hold the token's spelling.
1288 Returns a pointer to the character after the last character
1289 written. */
79bd622b 1290unsigned char *
1291cpp_spell_token (pfile, token, buffer)
f80e83a9 1292 cpp_reader *pfile; /* Would be nice to be rid of this... */
1293 const cpp_token *token;
1294 unsigned char *buffer;
1295{
7e842f95 1296 switch (TOKEN_SPELL (token))
f80e83a9 1297 {
1298 case SPELL_OPERATOR:
1299 {
1300 const unsigned char *spelling;
1301 unsigned char c;
ab12a39c 1302
f80e83a9 1303 if (token->flags & DIGRAPH)
ee6c4e4b 1304 spelling
1305 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
31674461 1306 else if (token->flags & NAMED_OP)
1307 goto spell_ident;
f80e83a9 1308 else
7e842f95 1309 spelling = TOKEN_NAME (token);
f80e83a9 1310
1311 while ((c = *spelling++) != '\0')
1312 *buffer++ = c;
1313 }
1314 break;
ab12a39c 1315
8d27e472 1316 case SPELL_CHAR:
1317 *buffer++ = token->val.c;
1318 break;
1319
1320 spell_ident:
f80e83a9 1321 case SPELL_IDENT:
c86dbc5b 1322 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1323 buffer += NODE_LEN (token->val.node);
f80e83a9 1324 break;
ab12a39c 1325
8d27e472 1326 case SPELL_NUMBER:
1327 memcpy (buffer, token->val.str.text, token->val.str.len);
1328 buffer += token->val.str.len;
1329 break;
1330
f80e83a9 1331 case SPELL_STRING:
1332 {
71aa9da4 1333 int left, right, tag;
1334 switch (token->type)
1335 {
1336 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1337 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
71aa9da4 1338 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1339 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1340 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
8d27e472 1341 default:
73328dce 1342 cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1343 TOKEN_NAME (token));
8d27e472 1344 return buffer;
71aa9da4 1345 }
1346 if (tag) *buffer++ = tag;
8d27e472 1347 *buffer++ = left;
76faa4c0 1348 memcpy (buffer, token->val.str.text, token->val.str.len);
1349 buffer += token->val.str.len;
8d27e472 1350 *buffer++ = right;
f80e83a9 1351 }
1352 break;
ab12a39c 1353
f80e83a9 1354 case SPELL_NONE:
73328dce 1355 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
f80e83a9 1356 break;
1357 }
ab12a39c 1358
f80e83a9 1359 return buffer;
1360}
ab12a39c 1361
e484a1cc 1362/* Returns TOKEN spelt as a null-terminated string. The string is
1363 freed when the reader is destroyed. Useful for diagnostics. */
79bd622b 1364unsigned char *
1365cpp_token_as_text (pfile, token)
6060326b 1366 cpp_reader *pfile;
f80e83a9 1367 const cpp_token *token;
6060326b 1368{
79bd622b 1369 unsigned int len = cpp_token_len (token);
1fdf6039 1370 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
6060326b 1371
79bd622b 1372 end = cpp_spell_token (pfile, token, start);
1373 end[0] = '\0';
6060326b 1374
79bd622b 1375 return start;
1376}
6060326b 1377
e484a1cc 1378/* Used by C front ends, which really should move to using
1379 cpp_token_as_text. */
79bd622b 1380const char *
1381cpp_type2name (type)
1382 enum cpp_ttype type;
1383{
1384 return (const char *) token_spellings[type].name;
1385}
6060326b 1386
f9b5f742 1387/* Writes the spelling of token to FP, without any preceding space.
1388 Separated from cpp_spell_token for efficiency - to avoid stdio
1389 double-buffering. */
79bd622b 1390void
1391cpp_output_token (token, fp)
1392 const cpp_token *token;
1393 FILE *fp;
1394{
79bd622b 1395 switch (TOKEN_SPELL (token))
6060326b 1396 {
79bd622b 1397 case SPELL_OPERATOR:
1398 {
1399 const unsigned char *spelling;
28874558 1400 int c;
6060326b 1401
79bd622b 1402 if (token->flags & DIGRAPH)
ee6c4e4b 1403 spelling
1404 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
79bd622b 1405 else if (token->flags & NAMED_OP)
1406 goto spell_ident;
1407 else
1408 spelling = TOKEN_NAME (token);
f80e83a9 1409
28874558 1410 c = *spelling;
1411 do
1412 putc (c, fp);
1413 while ((c = *++spelling) != '\0');
79bd622b 1414 }
1415 break;
f80e83a9 1416
8d27e472 1417 case SPELL_CHAR:
1418 putc (token->val.c, fp);
1419 break;
1420
79bd622b 1421 spell_ident:
1422 case SPELL_IDENT:
28874558 1423 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
79bd622b 1424 break;
f80e83a9 1425
8d27e472 1426 case SPELL_NUMBER:
1427 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1428 break;
1429
79bd622b 1430 case SPELL_STRING:
1431 {
1432 int left, right, tag;
1433 switch (token->type)
1434 {
1435 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1436 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
79bd622b 1437 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1438 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1439 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
8d27e472 1440 default:
1441 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1442 return;
79bd622b 1443 }
1444 if (tag) putc (tag, fp);
8d27e472 1445 putc (left, fp);
79bd622b 1446 fwrite (token->val.str.text, 1, token->val.str.len, fp);
8d27e472 1447 putc (right, fp);
79bd622b 1448 }
1449 break;
6060326b 1450
79bd622b 1451 case SPELL_NONE:
1452 /* An error, most probably. */
1453 break;
f80e83a9 1454 }
6060326b 1455}
1456
79bd622b 1457/* Compare two tokens. */
1458int
1459_cpp_equiv_tokens (a, b)
1460 const cpp_token *a, *b;
6060326b 1461{
79bd622b 1462 if (a->type == b->type && a->flags == b->flags)
1463 switch (TOKEN_SPELL (a))
1464 {
1465 default: /* Keep compiler happy. */
1466 case SPELL_OPERATOR:
1467 return 1;
1468 case SPELL_CHAR:
33344a1c 1469 return a->val.c == b->val.c; /* Character. */
79bd622b 1470 case SPELL_NONE:
588d632b 1471 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
79bd622b 1472 case SPELL_IDENT:
1473 return a->val.node == b->val.node;
8d27e472 1474 case SPELL_NUMBER:
79bd622b 1475 case SPELL_STRING:
1476 return (a->val.str.len == b->val.str.len
1477 && !memcmp (a->val.str.text, b->val.str.text,
1478 a->val.str.len));
1479 }
6060326b 1480
f80e83a9 1481 return 0;
1482}
1483
79bd622b 1484/* Returns nonzero if a space should be inserted to avoid an
1485 accidental token paste for output. For simplicity, it is
1486 conservative, and occasionally advises a space where one is not
1487 needed, e.g. "." and ".2". */
79bd622b 1488int
1489cpp_avoid_paste (pfile, token1, token2)
6060326b 1490 cpp_reader *pfile;
79bd622b 1491 const cpp_token *token1, *token2;
6060326b 1492{
79bd622b 1493 enum cpp_ttype a = token1->type, b = token2->type;
1494 cppchar_t c;
6060326b 1495
79bd622b 1496 if (token1->flags & NAMED_OP)
1497 a = CPP_NAME;
1498 if (token2->flags & NAMED_OP)
1499 b = CPP_NAME;
6060326b 1500
79bd622b 1501 c = EOF;
1502 if (token2->flags & DIGRAPH)
ee6c4e4b 1503 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
79bd622b 1504 else if (token_spellings[b].category == SPELL_OPERATOR)
1505 c = token_spellings[b].name[0];
6060326b 1506
79bd622b 1507 /* Quickly get everything that can paste with an '='. */
ee6c4e4b 1508 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
79bd622b 1509 return 1;
6060326b 1510
79bd622b 1511 switch (a)
6060326b 1512 {
79bd622b 1513 case CPP_GREATER: return c == '>' || c == '?';
1514 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1515 case CPP_PLUS: return c == '+';
1516 case CPP_MINUS: return c == '-' || c == '>';
1517 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1518 case CPP_MOD: return c == ':' || c == '>';
1519 case CPP_AND: return c == '&';
1520 case CPP_OR: return c == '|';
1521 case CPP_COLON: return c == ':' || c == '>';
1522 case CPP_DEREF: return c == '*';
efdcc728 1523 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
79bd622b 1524 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1525 case CPP_NAME: return ((b == CPP_NUMBER
1526 && name_p (pfile, &token2->val.str))
1527 || b == CPP_NAME
1528 || b == CPP_CHAR || b == CPP_STRING); /* L */
1529 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1530 || c == '.' || c == '+' || c == '-');
1531 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
33344a1c 1532 && token1->val.c == '@'
79bd622b 1533 && (b == CPP_NAME || b == CPP_STRING));
1534 default: break;
6060326b 1535 }
6060326b 1536
deb356cf 1537 return 0;
6060326b 1538}
1539
79bd622b 1540/* Output all the remaining tokens on the current line, and a newline
f9b5f742 1541 character, to FP. Leading whitespace is removed. If there are
1542 macros, special token padding is not performed. */
6060326b 1543void
79bd622b 1544cpp_output_line (pfile, fp)
6060326b 1545 cpp_reader *pfile;
79bd622b 1546 FILE *fp;
6060326b 1547{
f9b5f742 1548 const cpp_token *token;
7e842f95 1549
f9b5f742 1550 token = cpp_get_token (pfile);
1551 while (token->type != CPP_EOF)
7e842f95 1552 {
f9b5f742 1553 cpp_output_token (token, fp);
1554 token = cpp_get_token (pfile);
1555 if (token->flags & PREV_WHITE)
1556 putc (' ', fp);
7e842f95 1557 }
1558
79bd622b 1559 putc ('\n', fp);
f80e83a9 1560}
6060326b 1561
8330799c 1562/* Returns the value of a hexadecimal digit. */
1563static unsigned int
1564hex_digit_value (c)
1565 unsigned int c;
1566{
768169bd 1567 if (hex_p (c))
1568 return hex_value (c);
1569 else
1570 abort ();
8330799c 1571}
1572
c8342759 1573/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1574 failure if cpplib is not parsing C++ or C99. Such failure is
1575 silent, and no variables are updated. Otherwise returns 0, and
1576 warns if -Wtraditional.
8330799c 1577
1578 [lex.charset]: The character designated by the universal character
1579 name \UNNNNNNNN is that character whose character short name in
1580 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1581 universal character name \uNNNN is that character whose character
1582 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1583 for a universal character name is less than 0x20 or in the range
1584 0x7F-0x9F (inclusive), or if the universal character name
1585 designates a character in the basic source character set, then the
1586 program is ill-formed.
1587
1588 We assume that wchar_t is Unicode, so we don't need to do any
c8342759 1589 mapping. Is this ever wrong?
8330799c 1590
c8342759 1591 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1592 LIMIT is the end of the string or charconst. PSTR is updated to
1593 point after the UCS on return, and the UCS is written into PC. */
1594
1595static int
1596maybe_read_ucs (pfile, pstr, limit, pc)
8330799c 1597 cpp_reader *pfile;
1598 const unsigned char **pstr;
1599 const unsigned char *limit;
c8342759 1600 unsigned int *pc;
8330799c 1601{
1602 const unsigned char *p = *pstr;
c8342759 1603 unsigned int code = 0;
1604 unsigned int c = *pc, length;
1605
1606 /* Only attempt to interpret a UCS for C++ and C99. */
1607 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1608 return 1;
8330799c 1609
c8342759 1610 if (CPP_WTRADITIONAL (pfile))
73328dce 1611 cpp_error (pfile, DL_WARNING,
1612 "the meaning of '\\%c' is different in traditional C", c);
8330799c 1613
f73bab03 1614 length = (c == 'u' ? 4: 8);
1615
1616 if ((size_t) (limit - p) < length)
1617 {
73328dce 1618 cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
f73bab03 1619 /* Skip to the end to avoid more diagnostics. */
1620 p = limit;
1621 }
1622 else
1623 {
1624 for (; length; length--, p++)
8330799c 1625 {
f73bab03 1626 c = *p;
1627 if (ISXDIGIT (c))
1628 code = (code << 4) + hex_digit_value (c);
1629 else
1630 {
73328dce 1631 cpp_error (pfile, DL_ERROR,
f73bab03 1632 "non-hex digit '%c' in universal-character-name", c);
1633 /* We shouldn't skip in case there are multibyte chars. */
1634 break;
1635 }
8330799c 1636 }
8330799c 1637 }
1638
1639#ifdef TARGET_EBCDIC
73328dce 1640 cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
8330799c 1641 code = 0x3f; /* EBCDIC invalid character */
1642#else
f73bab03 1643 /* True extended characters are OK. */
1644 if (code >= 0xa0
1645 && !(code & 0x80000000)
1646 && !(code >= 0xD800 && code <= 0xDFFF))
1647 ;
1648 /* The standard permits $, @ and ` to be specified as UCNs. We use
1649 hex escapes so that this also works with EBCDIC hosts. */
1650 else if (code == 0x24 || code == 0x40 || code == 0x60)
1651 ;
1652 /* Don't give another error if one occurred above. */
1653 else if (length == 0)
73328dce 1654 cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
8330799c 1655#endif
1656
1657 *pstr = p;
c8342759 1658 *pc = code;
1659 return 0;
8330799c 1660}
1661
1662/* Interpret an escape sequence, and return its value. PSTR points to
1663 the input pointer, which is just after the backslash. LIMIT is how
c8342759 1664 much text we have. MASK is a bitmask for the precision for the
455730ef 1665 destination type (char or wchar_t).
8330799c 1666
c8342759 1667 Handles all relevant diagnostics. */
c8342759 1668unsigned int
455730ef 1669cpp_parse_escape (pfile, pstr, limit, mask)
8330799c 1670 cpp_reader *pfile;
1671 const unsigned char **pstr;
1672 const unsigned char *limit;
c8342759 1673 unsigned HOST_WIDE_INT mask;
8330799c 1674{
1675 int unknown = 0;
1676 const unsigned char *str = *pstr;
1677 unsigned int c = *str++;
1678
1679 switch (c)
1680 {
1681 case '\\': case '\'': case '"': case '?': break;
1682 case 'b': c = TARGET_BS; break;
1683 case 'f': c = TARGET_FF; break;
1684 case 'n': c = TARGET_NEWLINE; break;
1685 case 'r': c = TARGET_CR; break;
1686 case 't': c = TARGET_TAB; break;
1687 case 'v': c = TARGET_VT; break;
1688
1689 case '(': case '{': case '[': case '%':
1690 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1691 '\%' is used to prevent SCCS from getting confused. */
1692 unknown = CPP_PEDANTIC (pfile);
1693 break;
1694
1695 case 'a':
1696 if (CPP_WTRADITIONAL (pfile))
73328dce 1697 cpp_error (pfile, DL_WARNING,
1698 "the meaning of '\\a' is different in traditional C");
0b3481a4 1699 c = TARGET_BELL;
8330799c 1700 break;
1701
1702 case 'e': case 'E':
1703 if (CPP_PEDANTIC (pfile))
73328dce 1704 cpp_error (pfile, DL_PEDWARN,
1705 "non-ISO-standard escape sequence, '\\%c'", c);
8330799c 1706 c = TARGET_ESC;
1707 break;
1708
8330799c 1709 case 'u': case 'U':
c8342759 1710 unknown = maybe_read_ucs (pfile, &str, limit, &c);
8330799c 1711 break;
1712
1713 case 'x':
1714 if (CPP_WTRADITIONAL (pfile))
73328dce 1715 cpp_error (pfile, DL_WARNING,
1716 "the meaning of '\\x' is different in traditional C");
8330799c 1717
8330799c 1718 {
1719 unsigned int i = 0, overflow = 0;
1720 int digits_found = 0;
1721
1722 while (str < limit)
1723 {
1724 c = *str;
1725 if (! ISXDIGIT (c))
1726 break;
1727 str++;
1728 overflow |= i ^ (i << 4 >> 4);
1729 i = (i << 4) + hex_digit_value (c);
1730 digits_found = 1;
1731 }
1732
1733 if (!digits_found)
73328dce 1734 cpp_error (pfile, DL_ERROR,
1735 "\\x used with no following hex digits");
8330799c 1736
1737 if (overflow | (i != (i & mask)))
1738 {
73328dce 1739 cpp_error (pfile, DL_PEDWARN,
1740 "hex escape sequence out of range");
8330799c 1741 i &= mask;
1742 }
1743 c = i;
1744 }
1745 break;
1746
1747 case '0': case '1': case '2': case '3':
1748 case '4': case '5': case '6': case '7':
1749 {
1750 unsigned int i = c - '0';
1751 int count = 0;
1752
1753 while (str < limit && ++count < 3)
1754 {
1755 c = *str;
1756 if (c < '0' || c > '7')
1757 break;
1758 str++;
1759 i = (i << 3) + c - '0';
1760 }
1761
1762 if (i != (i & mask))
1763 {
73328dce 1764 cpp_error (pfile, DL_PEDWARN,
1765 "octal escape sequence out of range");
8330799c 1766 i &= mask;
1767 }
1768 c = i;
1769 }
1770 break;
1771
1772 default:
1773 unknown = 1;
1774 break;
1775 }
1776
1777 if (unknown)
1778 {
1779 if (ISGRAPH (c))
73328dce 1780 cpp_error (pfile, DL_PEDWARN, "unknown escape sequence '\\%c'", c);
8330799c 1781 else
73328dce 1782 cpp_error (pfile, DL_PEDWARN, "unknown escape sequence: '\\%03o'", c);
8330799c 1783 }
1784
c8342759 1785 if (c > mask)
73328dce 1786 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type");
c8342759 1787
8330799c 1788 *pstr = str;
1789 return c;
1790}
1791
1792#ifndef MAX_CHAR_TYPE_SIZE
1793#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1794#endif
1795
1796#ifndef MAX_WCHAR_TYPE_SIZE
1797#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1798#endif
1799
1800/* Interpret a (possibly wide) character constant in TOKEN.
455730ef 1801 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN points
1802 to a variable that is filled in with the number of characters seen. */
8330799c 1803HOST_WIDE_INT
455730ef 1804cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
8330799c 1805 cpp_reader *pfile;
1806 const cpp_token *token;
1807 int warn_multi;
8330799c 1808 unsigned int *pchars_seen;
1809{
1810 const unsigned char *str = token->val.str.text;
1811 const unsigned char *limit = str + token->val.str.len;
1812 unsigned int chars_seen = 0;
1813 unsigned int width, max_chars, c;
0d086e18 1814 unsigned HOST_WIDE_INT mask;
1815 HOST_WIDE_INT result = 0;
b3a9482f 1816 bool unsigned_p;
8330799c 1817
1818#ifdef MULTIBYTE_CHARS
1819 (void) local_mbtowc (NULL, NULL, 0);
1820#endif
1821
1822 /* Width in bits. */
1823 if (token->type == CPP_CHAR)
b3a9482f 1824 {
1825 width = MAX_CHAR_TYPE_SIZE;
1826 unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
1827 }
8330799c 1828 else
b3a9482f 1829 {
1830 width = MAX_WCHAR_TYPE_SIZE;
1831 unsigned_p = WCHAR_UNSIGNED;
1832 }
8330799c 1833
1834 if (width < HOST_BITS_PER_WIDE_INT)
1835 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1836 else
1837 mask = ~0;
1838 max_chars = HOST_BITS_PER_WIDE_INT / width;
1839
1840 while (str < limit)
1841 {
1842#ifdef MULTIBYTE_CHARS
1843 wchar_t wc;
1844 int char_len;
1845
1846 char_len = local_mbtowc (&wc, str, limit - str);
1847 if (char_len == -1)
1848 {
73328dce 1849 cpp_error (pfile, DL_WARNING,
1850 "ignoring invalid multibyte character");
8330799c 1851 c = *str++;
1852 }
1853 else
1854 {
1855 str += char_len;
1856 c = wc;
1857 }
1858#else
1859 c = *str++;
1860#endif
1861
1862 if (c == '\\')
455730ef 1863 c = cpp_parse_escape (pfile, &str, limit, mask);
8330799c 1864
1865#ifdef MAP_CHARACTER
1866 if (ISPRINT (c))
1867 c = MAP_CHARACTER (c);
1868#endif
1869
1870 /* Merge character into result; ignore excess chars. */
1871 if (++chars_seen <= max_chars)
1872 {
1873 if (width < HOST_BITS_PER_WIDE_INT)
1874 result = (result << width) | (c & mask);
1875 else
1876 result = c;
1877 }
1878 }
1879
1880 if (chars_seen == 0)
73328dce 1881 cpp_error (pfile, DL_ERROR, "empty character constant");
8330799c 1882 else if (chars_seen > max_chars)
1883 {
1884 chars_seen = max_chars;
73328dce 1885 cpp_error (pfile, DL_WARNING, "character constant too long");
8330799c 1886 }
455730ef 1887 else if (chars_seen > 1 && warn_multi)
73328dce 1888 cpp_error (pfile, DL_WARNING, "multi-character character constant");
8330799c 1889
b3a9482f 1890 /* If relevant type is signed, sign-extend the constant. */
1891 if (chars_seen)
8330799c 1892 {
1893 unsigned int nbits = chars_seen * width;
8330799c 1894
43653c0a 1895 mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
b3a9482f 1896 if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
8330799c 1897 result &= mask;
1898 else
1899 result |= ~mask;
1900 }
1901
1902 *pchars_seen = chars_seen;
1903 return result;
1904}
1905
084163dc 1906/* Memory buffers. Changing these three constants can have a dramatic
1907 effect on performance. The values here are reasonable defaults,
1908 but might be tuned. If you adjust them, be sure to test across a
1909 range of uses of cpplib, including heavy nested function-like macro
1910 expansion. Also check the change in peak memory usage (NJAMD is a
1911 good tool for this). */
1912#define MIN_BUFF_SIZE 8000
1e0ef2fd 1913#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
084163dc 1914#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1915 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
deb356cf 1916
1e0ef2fd 1917#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1918 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1919#endif
1920
79bd622b 1921struct dummy
deb356cf 1922{
79bd622b 1923 char c;
1924 union
1925 {
1926 double d;
1927 int *p;
1928 } u;
1929};
deb356cf 1930
79bd622b 1931#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
06c92cbc 1932#define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1933
1785b647 1934/* Create a new allocation buffer. Place the control block at the end
1935 of the buffer, so that buffer overflows will cause immediate chaos. */
06c92cbc 1936static _cpp_buff *
1937new_buff (len)
4b31a107 1938 size_t len;
06c92cbc 1939{
1940 _cpp_buff *result;
1fdf6039 1941 unsigned char *base;
06c92cbc 1942
084163dc 1943 if (len < MIN_BUFF_SIZE)
1944 len = MIN_BUFF_SIZE;
06c92cbc 1945 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
1946
1947 base = xmalloc (len + sizeof (_cpp_buff));
1948 result = (_cpp_buff *) (base + len);
1949 result->base = base;
1950 result->cur = base;
1951 result->limit = base + len;
1952 result->next = NULL;
1953 return result;
1954}
1955
1956/* Place a chain of unwanted allocation buffers on the free list. */
1957void
1958_cpp_release_buff (pfile, buff)
1959 cpp_reader *pfile;
1960 _cpp_buff *buff;
1961{
1962 _cpp_buff *end = buff;
1963
1964 while (end->next)
1965 end = end->next;
1966 end->next = pfile->free_buffs;
1967 pfile->free_buffs = buff;
1968}
1969
1970/* Return a free buffer of size at least MIN_SIZE. */
1971_cpp_buff *
1972_cpp_get_buff (pfile, min_size)
1973 cpp_reader *pfile;
4b31a107 1974 size_t min_size;
06c92cbc 1975{
1976 _cpp_buff *result, **p;
1977
1978 for (p = &pfile->free_buffs;; p = &(*p)->next)
1979 {
4b31a107 1980 size_t size;
084163dc 1981
1982 if (*p == NULL)
06c92cbc 1983 return new_buff (min_size);
084163dc 1984 result = *p;
1985 size = result->limit - result->base;
1986 /* Return a buffer that's big enough, but don't waste one that's
1987 way too big. */
4085c149 1988 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
06c92cbc 1989 break;
1990 }
1991
1992 *p = result->next;
1993 result->next = NULL;
1994 result->cur = result->base;
1995 return result;
1996}
1997
20dd417a 1998/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 1999 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2000 the excess bytes to the new buffer. Chains the new buffer after
2001 BUFF, and returns the new buffer. */
06c92cbc 2002_cpp_buff *
e6a5f963 2003_cpp_append_extend_buff (pfile, buff, min_extra)
06c92cbc 2004 cpp_reader *pfile;
2005 _cpp_buff *buff;
4b31a107 2006 size_t min_extra;
06c92cbc 2007{
4b31a107 2008 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
e6a5f963 2009 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
06c92cbc 2010
e6a5f963 2011 buff->next = new_buff;
2012 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2013 return new_buff;
2014}
2015
20dd417a 2016/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 2017 remaining bytes of the buffer pointed to by BUFF, and at least
2018 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2019 Chains the new buffer before the buffer pointed to by BUFF, and
2020 updates the pointer to point to the new buffer. */
2021void
2022_cpp_extend_buff (pfile, pbuff, min_extra)
2023 cpp_reader *pfile;
2024 _cpp_buff **pbuff;
2025 size_t min_extra;
2026{
2027 _cpp_buff *new_buff, *old_buff = *pbuff;
2028 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2029
2030 new_buff = _cpp_get_buff (pfile, size);
2031 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2032 new_buff->next = old_buff;
2033 *pbuff = new_buff;
06c92cbc 2034}
2035
2036/* Free a chain of buffers starting at BUFF. */
2037void
2038_cpp_free_buff (buff)
2039 _cpp_buff *buff;
2040{
2041 _cpp_buff *next;
2042
2043 for (; buff; buff = next)
2044 {
2045 next = buff->next;
2046 free (buff->base);
2047 }
2048}
deb356cf 2049
1fdf6039 2050/* Allocate permanent, unaligned storage of length LEN. */
2051unsigned char *
2052_cpp_unaligned_alloc (pfile, len)
2053 cpp_reader *pfile;
2054 size_t len;
2055{
2056 _cpp_buff *buff = pfile->u_buff;
2057 unsigned char *result = buff->cur;
2058
2059 if (len > (size_t) (buff->limit - result))
2060 {
2061 buff = _cpp_get_buff (pfile, len);
2062 buff->next = pfile->u_buff;
2063 pfile->u_buff = buff;
2064 result = buff->cur;
2065 }
2066
2067 buff->cur = result + len;
2068 return result;
2069}
2070
1e0ef2fd 2071/* Allocate permanent, unaligned storage of length LEN from a_buff.
2072 That buffer is used for growing allocations when saving macro
2073 replacement lists in a #define, and when parsing an answer to an
2074 assertion in #assert, #unassert or #if (and therefore possibly
2075 whilst expanding macros). It therefore must not be used by any
2076 code that they might call: specifically the lexer and the guts of
2077 the macro expander.
2078
2079 All existing other uses clearly fit this restriction: storing
2080 registered pragmas during initialization. */
79bd622b 2081unsigned char *
e6a5f963 2082_cpp_aligned_alloc (pfile, len)
2083 cpp_reader *pfile;
2084 size_t len;
89b05ef6 2085{
e6a5f963 2086 _cpp_buff *buff = pfile->a_buff;
2087 unsigned char *result = buff->cur;
89b05ef6 2088
e6a5f963 2089 if (len > (size_t) (buff->limit - result))
89b05ef6 2090 {
e6a5f963 2091 buff = _cpp_get_buff (pfile, len);
2092 buff->next = pfile->a_buff;
2093 pfile->a_buff = buff;
2094 result = buff->cur;
89b05ef6 2095 }
f80e83a9 2096
e6a5f963 2097 buff->cur = result + len;
79bd622b 2098 return result;
f80e83a9 2099}