]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
Daily bump.
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
0578f103 1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
6060326b 7 Single-pass line tokenization by Neil Booth, April 2000
0578f103 8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
79bd622b 23/* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
27
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
f80e83a9 36
0578f103 37#include "config.h"
38#include "system.h"
0578f103 39#include "cpplib.h"
40#include "cpphash.h"
41
8330799c 42/* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45#ifdef CROSS_COMPILE
46#undef MULTIBYTE_CHARS
47#endif
48
49#ifdef MULTIBYTE_CHARS
50#include "mbchar.h"
51#include <locale.h>
52#endif
53
79bd622b 54/* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
56enum spell_type
241e762e 57{
79bd622b 58 SPELL_OPERATOR = 0,
59 SPELL_CHAR,
60 SPELL_IDENT,
61 SPELL_STRING,
62 SPELL_NONE
241e762e 63};
64
79bd622b 65struct token_spelling
241e762e 66{
79bd622b 67 enum spell_type category;
68 const unsigned char *name;
241e762e 69};
70
79bd622b 71const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
72 U":>", U"<%", U"%>"};
73
74#define OP(e, s) { SPELL_OPERATOR, U s },
75#define TK(e, s) { s, U STRINGX (e) },
76const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
77#undef OP
78#undef TK
79
80#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
e2f9a79f 82
36a0aa7c 83static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
c808d026 84static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
85static cppchar_t get_effective_char PARAMS ((cpp_reader *));
338fa5f7 86
f80e83a9 87static int skip_block_comment PARAMS ((cpp_reader *));
f669338a 88static int skip_line_comment PARAMS ((cpp_reader *));
338fa5f7 89static void adjust_column PARAMS ((cpp_reader *));
90static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
66a5287e 91static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
92static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
93 const U_CHAR *));
79bd622b 94static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
95static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
338fa5f7 96static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
79bd622b 97static void unterminated PARAMS ((cpp_reader *, int));
338fa5f7 98static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
99static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
c808d026 100static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
f669338a 101static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
79bd622b 102static int name_p PARAMS ((cpp_reader *, const cpp_string *));
c8342759 103static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
104 const unsigned char *, unsigned int *));
83dcbb5c 105static tokenrun *next_tokenrun PARAMS ((tokenrun *));
e916a356 106
79bd622b 107static cpp_chunk *new_chunk PARAMS ((unsigned int));
084163dc 108static int chunk_suitable PARAMS ((cpp_chunk *, unsigned int));
8330799c 109static unsigned int hex_digit_value PARAMS ((unsigned int));
06c92cbc 110static _cpp_buff *new_buff PARAMS ((unsigned int));
bce8e0c0 111
f80e83a9 112/* Utility routine:
2c63d6c8 113
76faa4c0 114 Compares, the token TOKEN to the NUL-terminated string STRING.
115 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
bce8e0c0 116
f80e83a9 117int
76faa4c0 118cpp_ideq (token, string)
119 const cpp_token *token;
f80e83a9 120 const char *string;
121{
76faa4c0 122 if (token->type != CPP_NAME)
f80e83a9 123 return 0;
76faa4c0 124
c86dbc5b 125 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
bce8e0c0 126}
50fd6b48 127
338fa5f7 128/* Call when meeting a newline. Returns the character after the newline
129 (or carriage-return newline combination), or EOF. */
130static cppchar_t
36a0aa7c 131handle_newline (pfile, newline_char)
132 cpp_reader *pfile;
338fa5f7 133 cppchar_t newline_char;
134{
36a0aa7c 135 cpp_buffer *buffer;
338fa5f7 136 cppchar_t next = EOF;
137
36a0aa7c 138 pfile->line++;
36a0aa7c 139 buffer = pfile->buffer;
338fa5f7 140 buffer->col_adjust = 0;
338fa5f7 141 buffer->line_base = buffer->cur;
142
143 /* Handle CR-LF and LF-CR combinations, get the next character. */
144 if (buffer->cur < buffer->rlimit)
145 {
146 next = *buffer->cur++;
147 if (next + newline_char == '\r' + '\n')
148 {
149 buffer->line_base = buffer->cur;
150 if (buffer->cur < buffer->rlimit)
151 next = *buffer->cur++;
152 else
153 next = EOF;
154 }
155 }
156
157 buffer->read_ahead = next;
158 return next;
159}
160
161/* Subroutine of skip_escaped_newlines; called when a trigraph is
162 encountered. It warns if necessary, and returns true if the
163 trigraph should be honoured. FROM_CHAR is the third character of a
164 trigraph, and presumed to be the previous character for position
165 reporting. */
0578f103 166static int
338fa5f7 167trigraph_ok (pfile, from_char)
0578f103 168 cpp_reader *pfile;
338fa5f7 169 cppchar_t from_char;
0578f103 170{
f80e83a9 171 int accept = CPP_OPTION (pfile, trigraphs);
172
f669338a 173 /* Don't warn about trigraphs in comments. */
174 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
0578f103 175 {
338fa5f7 176 cpp_buffer *buffer = pfile->buffer;
1ea7ed21 177
f80e83a9 178 if (accept)
1ea7ed21 179 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
f80e83a9 180 "trigraph ??%c converted to %c",
338fa5f7 181 (int) from_char,
182 (int) _cpp_trigraph_map[from_char]);
4b912310 183 else if (buffer->cur != buffer->last_Wtrigraphs)
184 {
185 buffer->last_Wtrigraphs = buffer->cur;
1ea7ed21 186 cpp_warning_with_line (pfile, pfile->line,
4b912310 187 CPP_BUF_COL (buffer) - 2,
188 "trigraph ??%c ignored", (int) from_char);
189 }
0578f103 190 }
338fa5f7 191
f80e83a9 192 return accept;
0578f103 193}
194
338fa5f7 195/* Assumes local variables buffer and result. */
196#define ACCEPT_CHAR(t) \
197 do { result->type = t; buffer->read_ahead = EOF; } while (0)
198
199/* When we move to multibyte character sets, add to these something
200 that saves and restores the state of the multibyte conversion
201 library. This probably involves saving and restoring a "cookie".
202 In the case of glibc it is an 8-byte structure, so is not a high
203 overhead operation. In any case, it's out of the fast path. */
204#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
205#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
206
207/* Skips any escaped newlines introduced by NEXT, which is either a
208 '?' or a '\\'. Returns the next character, which will also have
396ffa86 209 been placed in buffer->read_ahead. This routine performs
210 preprocessing stages 1 and 2 of the ISO C standard. */
338fa5f7 211static cppchar_t
c808d026 212skip_escaped_newlines (pfile, next)
213 cpp_reader *pfile;
338fa5f7 214 cppchar_t next;
0578f103 215{
c808d026 216 cpp_buffer *buffer = pfile->buffer;
217
396ffa86 218 /* Only do this if we apply stages 1 and 2. */
219 if (!buffer->from_stage3)
f80e83a9 220 {
396ffa86 221 cppchar_t next1;
222 const unsigned char *saved_cur;
223 int space;
224
225 do
338fa5f7 226 {
396ffa86 227 if (buffer->cur == buffer->rlimit)
228 break;
229
230 SAVE_STATE ();
231 if (next == '?')
338fa5f7 232 {
396ffa86 233 next1 = *buffer->cur++;
234 if (next1 != '?' || buffer->cur == buffer->rlimit)
235 {
236 RESTORE_STATE ();
237 break;
238 }
239
240 next1 = *buffer->cur++;
241 if (!_cpp_trigraph_map[next1]
c808d026 242 || !trigraph_ok (pfile, next1))
396ffa86 243 {
244 RESTORE_STATE ();
245 break;
246 }
247
248 /* We have a full trigraph here. */
249 next = _cpp_trigraph_map[next1];
250 if (next != '\\' || buffer->cur == buffer->rlimit)
251 break;
252 SAVE_STATE ();
253 }
254
255 /* We have a backslash, and room for at least one more character. */
256 space = 0;
257 do
258 {
259 next1 = *buffer->cur++;
260 if (!is_nvspace (next1))
261 break;
262 space = 1;
338fa5f7 263 }
396ffa86 264 while (buffer->cur < buffer->rlimit);
f80e83a9 265
396ffa86 266 if (!is_vspace (next1))
338fa5f7 267 {
268 RESTORE_STATE ();
269 break;
270 }
0578f103 271
c808d026 272 if (space && !pfile->state.lexing_comment)
273 cpp_warning (pfile, "backslash and newline separated by space");
338fa5f7 274
c808d026 275 next = handle_newline (pfile, next1);
396ffa86 276 if (next == EOF)
c808d026 277 cpp_pedwarn (pfile, "backslash-newline at end of file");
338fa5f7 278 }
396ffa86 279 while (next == '\\' || next == '?');
f80e83a9 280 }
0578f103 281
338fa5f7 282 buffer->read_ahead = next;
283 return next;
0578f103 284}
285
338fa5f7 286/* Obtain the next character, after trigraph conversion and skipping
287 an arbitrary string of escaped newlines. The common case of no
288 trigraphs or escaped newlines falls through quickly. */
289static cppchar_t
c808d026 290get_effective_char (pfile)
291 cpp_reader *pfile;
852d1b04 292{
c808d026 293 cpp_buffer *buffer = pfile->buffer;
338fa5f7 294 cppchar_t next = EOF;
295
296 if (buffer->cur < buffer->rlimit)
297 {
298 next = *buffer->cur++;
299
300 /* '?' can introduce trigraphs (and therefore backslash); '\\'
301 can introduce escaped newlines, which we want to skip, or
302 UCNs, which, depending upon lexer state, we will handle in
303 the future. */
304 if (next == '?' || next == '\\')
c808d026 305 next = skip_escaped_newlines (pfile, next);
338fa5f7 306 }
307
308 buffer->read_ahead = next;
309 return next;
852d1b04 310}
311
338fa5f7 312/* Skip a C-style block comment. We find the end of the comment by
313 seeing if an asterisk is before every '/' we encounter. Returns
314 non-zero if comment terminated by EOF, zero otherwise. */
f80e83a9 315static int
316skip_block_comment (pfile)
0578f103 317 cpp_reader *pfile;
318{
f80e83a9 319 cpp_buffer *buffer = pfile->buffer;
63e1abce 320 cppchar_t c = EOF, prevc = EOF;
338fa5f7 321
f669338a 322 pfile->state.lexing_comment = 1;
338fa5f7 323 while (buffer->cur != buffer->rlimit)
0578f103 324 {
338fa5f7 325 prevc = c, c = *buffer->cur++;
326
327 next_char:
328 /* FIXME: For speed, create a new character class of characters
79bd622b 329 of interest inside block comments. */
338fa5f7 330 if (c == '?' || c == '\\')
c808d026 331 c = skip_escaped_newlines (pfile, c);
f80e83a9 332
338fa5f7 333 /* People like decorating comments with '*', so check for '/'
334 instead for efficiency. */
f80e83a9 335 if (c == '/')
0578f103 336 {
338fa5f7 337 if (prevc == '*')
338 break;
f80e83a9 339
338fa5f7 340 /* Warn about potential nested comments, but not if the '/'
341 comes immediately before the true comment delimeter.
f80e83a9 342 Don't bother to get it right across escaped newlines. */
338fa5f7 343 if (CPP_OPTION (pfile, warn_comments)
344 && buffer->cur != buffer->rlimit)
0578f103 345 {
338fa5f7 346 prevc = c, c = *buffer->cur++;
347 if (c == '*' && buffer->cur != buffer->rlimit)
348 {
349 prevc = c, c = *buffer->cur++;
350 if (c != '/')
1ea7ed21 351 cpp_warning_with_line (pfile, pfile->line,
352 CPP_BUF_COL (buffer) - 2,
338fa5f7 353 "\"/*\" within comment");
354 }
355 goto next_char;
0578f103 356 }
0578f103 357 }
78719282 358 else if (is_vspace (c))
0578f103 359 {
36a0aa7c 360 prevc = c, c = handle_newline (pfile, c);
338fa5f7 361 goto next_char;
0578f103 362 }
b86584f6 363 else if (c == '\t')
338fa5f7 364 adjust_column (pfile);
0578f103 365 }
f80e83a9 366
f669338a 367 pfile->state.lexing_comment = 0;
338fa5f7 368 buffer->read_ahead = EOF;
369 return c != '/' || prevc != '*';
0578f103 370}
371
241e762e 372/* Skip a C++ line comment. Handles escaped newlines. Returns
338fa5f7 373 non-zero if a multiline comment. The following new line, if any,
374 is left in buffer->read_ahead. */
f80e83a9 375static int
f669338a 376skip_line_comment (pfile)
377 cpp_reader *pfile;
0578f103 378{
f669338a 379 cpp_buffer *buffer = pfile->buffer;
1ea7ed21 380 unsigned int orig_line = pfile->line;
338fa5f7 381 cppchar_t c;
f80e83a9 382
f669338a 383 pfile->state.lexing_comment = 1;
338fa5f7 384 do
f80e83a9 385 {
338fa5f7 386 c = EOF;
387 if (buffer->cur == buffer->rlimit)
388 break;
f80e83a9 389
338fa5f7 390 c = *buffer->cur++;
391 if (c == '?' || c == '\\')
c808d026 392 c = skip_escaped_newlines (pfile, c);
f80e83a9 393 }
338fa5f7 394 while (!is_vspace (c));
0578f103 395
f669338a 396 pfile->state.lexing_comment = 0;
338fa5f7 397 buffer->read_ahead = c; /* Leave any newline for caller. */
1ea7ed21 398 return orig_line != pfile->line;
f80e83a9 399}
0578f103 400
338fa5f7 401/* pfile->buffer->cur is one beyond the \t character. Update
402 col_adjust so we track the column correctly. */
b86584f6 403static void
338fa5f7 404adjust_column (pfile)
b86584f6 405 cpp_reader *pfile;
b86584f6 406{
338fa5f7 407 cpp_buffer *buffer = pfile->buffer;
408 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
b86584f6 409
410 /* Round it up to multiple of the tabstop, but subtract 1 since the
411 tab itself occupies a character position. */
338fa5f7 412 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
413 - col % CPP_OPTION (pfile, tabstop)) - 1;
b86584f6 414}
415
338fa5f7 416/* Skips whitespace, saving the next non-whitespace character.
417 Adjusts pfile->col_adjust to account for tabs. Without this,
418 tokens might be assigned an incorrect column. */
f80e83a9 419static void
338fa5f7 420skip_whitespace (pfile, c)
f80e83a9 421 cpp_reader *pfile;
338fa5f7 422 cppchar_t c;
f80e83a9 423{
424 cpp_buffer *buffer = pfile->buffer;
338fa5f7 425 unsigned int warned = 0;
0578f103 426
338fa5f7 427 do
f80e83a9 428 {
78719282 429 /* Horizontal space always OK. */
430 if (c == ' ')
338fa5f7 431 ;
78719282 432 else if (c == '\t')
338fa5f7 433 adjust_column (pfile);
434 /* Just \f \v or \0 left. */
78719282 435 else if (c == '\0')
f80e83a9 436 {
78719282 437 if (!warned)
338fa5f7 438 {
439 cpp_warning (pfile, "null character(s) ignored");
440 warned = 1;
441 }
0578f103 442 }
79bd622b 443 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1ea7ed21 444 cpp_pedwarn_with_line (pfile, pfile->line,
78719282 445 CPP_BUF_COL (buffer),
446 "%s in preprocessing directive",
447 c == '\f' ? "form feed" : "vertical tab");
338fa5f7 448
449 c = EOF;
450 if (buffer->cur == buffer->rlimit)
451 break;
452 c = *buffer->cur++;
0578f103 453 }
2c0e001b 454 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
338fa5f7 455 while (is_nvspace (c));
456
457 /* Remember the next character. */
458 buffer->read_ahead = c;
f80e83a9 459}
0578f103 460
79bd622b 461/* See if the characters of a number token are valid in a name (no
462 '.', '+' or '-'). */
463static int
464name_p (pfile, string)
465 cpp_reader *pfile;
466 const cpp_string *string;
467{
468 unsigned int i;
469
470 for (i = 0; i < string->len; i++)
471 if (!is_idchar (string->text[i]))
472 return 0;
473
474 return 1;
475}
476
66a5287e 477/* Parse an identifier, skipping embedded backslash-newlines. This is
478 a critical inner loop. The common case is an identifier which has
479 not been split by backslash-newline, does not contain a dollar
480 sign, and has already been scanned (roughly 10:1 ratio of
481 seen:unseen identifiers in normal code; the distribution is
482 Poisson-like). Second most common case is a new identifier, not
483 split and no dollar sign. The other possibilities are rare and
484 have been relegated to parse_identifier_slow. */
338fa5f7 485
486static cpp_hashnode *
66a5287e 487parse_identifier (pfile)
0578f103 488 cpp_reader *pfile;
0578f103 489{
79bd622b 490 cpp_hashnode *result;
66a5287e 491 const U_CHAR *cur, *rlimit;
492
493 /* Fast-path loop. Skim over a normal identifier.
494 N.B. ISIDNUM does not include $. */
495 cur = pfile->buffer->cur - 1;
496 rlimit = pfile->buffer->rlimit;
497 do
498 cur++;
499 while (cur < rlimit && ISIDNUM (*cur));
500
501 /* Check for slow-path cases. */
502 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
503 result = parse_identifier_slow (pfile, cur);
504 else
505 {
506 const U_CHAR *base = pfile->buffer->cur - 1;
507 result = (cpp_hashnode *)
508 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
509 pfile->buffer->cur = cur;
510 }
511
512 /* Rarely, identifiers require diagnostics when lexed.
513 XXX Has to be forced out of the fast path. */
514 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
515 && !pfile->state.skipping, 0))
516 {
517 /* It is allowed to poison the same identifier twice. */
518 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
519 cpp_error (pfile, "attempt to use poisoned \"%s\"",
520 NODE_NAME (result));
521
522 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
523 replacement list of a variadic macro. */
524 if (result == pfile->spec_nodes.n__VA_ARGS__
525 && !pfile->state.va_args_ok)
526 cpp_pedwarn (pfile,
527 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
528 }
529
530 return result;
531}
532
533/* Slow path. This handles identifiers which have been split, and
534 identifiers which contain dollar signs. The part of the identifier
535 from PFILE->buffer->cur-1 to CUR has already been scanned. */
536static cpp_hashnode *
537parse_identifier_slow (pfile, cur)
538 cpp_reader *pfile;
539 const U_CHAR *cur;
540{
338fa5f7 541 cpp_buffer *buffer = pfile->buffer;
66a5287e 542 const U_CHAR *base = buffer->cur - 1;
0d086e18 543 struct obstack *stack = &pfile->hash_table->stack;
66a5287e 544 unsigned int c, saw_dollar = 0, len;
545
546 /* Copy the part of the token which is known to be okay. */
547 obstack_grow (stack, base, cur - base);
f80e83a9 548
66a5287e 549 /* Now process the part which isn't. We are looking at one of
550 '$', '\\', or '?' on entry to this loop. */
551 c = *cur++;
552 buffer->cur = cur;
338fa5f7 553 do
f80e83a9 554 {
66a5287e 555 while (is_idchar (c))
556 {
557 obstack_1grow (stack, c);
0578f103 558
66a5287e 559 if (c == '$')
560 saw_dollar++;
71aa9da4 561
66a5287e 562 c = EOF;
563 if (buffer->cur == buffer->rlimit)
564 break;
71aa9da4 565
66a5287e 566 c = *buffer->cur++;
567 }
71aa9da4 568
338fa5f7 569 /* Potential escaped newline? */
570 if (c != '?' && c != '\\')
66a5287e 571 break;
c808d026 572 c = skip_escaped_newlines (pfile, c);
f80e83a9 573 }
338fa5f7 574 while (is_idchar (c));
575
79bd622b 576 /* Remember the next character. */
577 buffer->read_ahead = c;
578
338fa5f7 579 /* $ is not a identifier character in the standard, but is commonly
580 accepted as an extension. Don't warn about it in skipped
581 conditional blocks. */
5e878517 582 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
338fa5f7 583 cpp_pedwarn (pfile, "'$' character(s) in identifier");
584
79bd622b 585 /* Identifiers are null-terminated. */
0d086e18 586 len = obstack_object_size (stack);
587 obstack_1grow (stack, '\0');
79bd622b 588
66a5287e 589 return (cpp_hashnode *)
0d086e18 590 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
0578f103 591}
592
338fa5f7 593/* Parse a number, skipping embedded backslash-newlines. */
0578f103 594static void
79bd622b 595parse_number (pfile, number, c, leading_period)
0578f103 596 cpp_reader *pfile;
338fa5f7 597 cpp_string *number;
598 cppchar_t c;
79bd622b 599 int leading_period;
0578f103 600{
f80e83a9 601 cpp_buffer *buffer = pfile->buffer;
5f3f0010 602 cpp_pool *pool = &pfile->ident_pool;
79bd622b 603 unsigned char *dest, *limit;
0578f103 604
79bd622b 605 dest = POOL_FRONT (pool);
606 limit = POOL_LIMIT (pool);
f669338a 607
79bd622b 608 /* Place a leading period. */
609 if (leading_period)
610 {
611 if (dest >= limit)
612 limit = _cpp_next_chunk (pool, 0, &dest);
613 *dest++ = '.';
614 }
615
338fa5f7 616 do
f80e83a9 617 {
338fa5f7 618 do
619 {
79bd622b 620 /* Need room for terminating null. */
621 if (dest + 1 >= limit)
622 limit = _cpp_next_chunk (pool, 0, &dest);
623 *dest++ = c;
338fa5f7 624
338fa5f7 625 c = EOF;
626 if (buffer->cur == buffer->rlimit)
627 break;
0578f103 628
338fa5f7 629 c = *buffer->cur++;
630 }
79bd622b 631 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
0578f103 632
338fa5f7 633 /* Potential escaped newline? */
634 if (c != '?' && c != '\\')
635 break;
c808d026 636 c = skip_escaped_newlines (pfile, c);
0578f103 637 }
79bd622b 638 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
f669338a 639
338fa5f7 640 /* Remember the next character. */
641 buffer->read_ahead = c;
852d1b04 642
79bd622b 643 /* Null-terminate the number. */
644 *dest = '\0';
645
646 number->text = POOL_FRONT (pool);
647 number->len = dest - number->text;
648 POOL_COMMIT (pool, number->len + 1);
338fa5f7 649}
650
651/* Subroutine of parse_string. Emits error for unterminated strings. */
652static void
79bd622b 653unterminated (pfile, term)
338fa5f7 654 cpp_reader *pfile;
338fa5f7 655 int term;
656{
657 cpp_error (pfile, "missing terminating %c character", term);
658
729d2022 659 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
f80e83a9 660 {
729d2022 661 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
338fa5f7 662 "possible start of unterminated string literal");
729d2022 663 pfile->mls_line = 0;
f80e83a9 664 }
0578f103 665}
666
79bd622b 667/* Subroutine of parse_string. */
668static int
669unescaped_terminator_p (pfile, dest)
670 cpp_reader *pfile;
671 const unsigned char *dest;
672{
673 const unsigned char *start, *temp;
674
675 /* In #include-style directives, terminators are not escapeable. */
676 if (pfile->state.angled_headers)
677 return 1;
678
5f3f0010 679 start = POOL_FRONT (&pfile->ident_pool);
79bd622b 680
681 /* An odd number of consecutive backslashes represents an escaped
682 terminator. */
683 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
684 ;
685
686 return ((dest - temp) & 1) == 0;
687}
688
338fa5f7 689/* Parses a string, character constant, or angle-bracketed header file
4b0c16ee 690 name. Handles embedded trigraphs and escaped newlines. The stored
691 string is guaranteed NUL-terminated, but it is not guaranteed that
692 this is the first NUL since embedded NULs are preserved.
0578f103 693
4b0c16ee 694 Multi-line strings are allowed, but they are deprecated. */
f80e83a9 695static void
338fa5f7 696parse_string (pfile, token, terminator)
0578f103 697 cpp_reader *pfile;
f80e83a9 698 cpp_token *token;
338fa5f7 699 cppchar_t terminator;
0578f103 700{
f80e83a9 701 cpp_buffer *buffer = pfile->buffer;
5f3f0010 702 cpp_pool *pool = &pfile->ident_pool;
79bd622b 703 unsigned char *dest, *limit;
338fa5f7 704 cppchar_t c;
38692459 705 bool warned_nulls = false, warned_multi = false;
338fa5f7 706
79bd622b 707 dest = POOL_FRONT (pool);
708 limit = POOL_LIMIT (pool);
709
338fa5f7 710 for (;;)
0578f103 711 {
338fa5f7 712 if (buffer->cur == buffer->rlimit)
4b0c16ee 713 c = EOF;
714 else
715 c = *buffer->cur++;
716
717 have_char:
718 /* We need space for the terminating NUL. */
719 if (dest >= limit)
720 limit = _cpp_next_chunk (pool, 0, &dest);
721
722 if (c == EOF)
338fa5f7 723 {
79bd622b 724 unterminated (pfile, terminator);
338fa5f7 725 break;
726 }
338fa5f7 727
338fa5f7 728 /* Handle trigraphs, escaped newlines etc. */
729 if (c == '?' || c == '\\')
c808d026 730 c = skip_escaped_newlines (pfile, c);
0578f103 731
79bd622b 732 if (c == terminator && unescaped_terminator_p (pfile, dest))
0578f103 733 {
79bd622b 734 c = EOF;
735 break;
338fa5f7 736 }
737 else if (is_vspace (c))
738 {
739 /* In assembly language, silently terminate string and
740 character literals at end of line. This is a kludge
741 around not knowing where comments are. */
5db5d057 742 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
338fa5f7 743 break;
0578f103 744
338fa5f7 745 /* Character constants and header names may not extend over
746 multiple lines. In Standard C, neither may strings.
747 Unfortunately, we accept multiline strings as an
cc8770bf 748 extension, except in #include family directives. */
749 if (terminator != '"' || pfile->state.angled_headers)
0578f103 750 {
79bd622b 751 unterminated (pfile, terminator);
338fa5f7 752 break;
0578f103 753 }
0578f103 754
38692459 755 if (!warned_multi)
756 {
757 warned_multi = true;
758 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
759 }
760
729d2022 761 if (pfile->mls_line == 0)
762 {
763 pfile->mls_line = token->line;
764 pfile->mls_col = token->col;
765 }
338fa5f7 766
36a0aa7c 767 c = handle_newline (pfile, c);
4b0c16ee 768 *dest++ = '\n';
769 goto have_char;
338fa5f7 770 }
38692459 771 else if (c == '\0' && !warned_nulls)
338fa5f7 772 {
38692459 773 warned_nulls = true;
774 cpp_warning (pfile, "null character(s) preserved in literal");
0578f103 775 }
0578f103 776
79bd622b 777 *dest++ = c;
0578f103 778 }
779
79bd622b 780 /* Remember the next character. */
338fa5f7 781 buffer->read_ahead = c;
4b0c16ee 782 *dest = '\0';
0578f103 783
79bd622b 784 token->val.str.text = POOL_FRONT (pool);
785 token->val.str.len = dest - token->val.str.text;
4b0c16ee 786 POOL_COMMIT (pool, token->val.str.len + 1);
338fa5f7 787}
f80e83a9 788
79bd622b 789/* The stored comment includes the comment start and any terminator. */
2c63d6c8 790static void
338fa5f7 791save_comment (pfile, token, from)
792 cpp_reader *pfile;
f80e83a9 793 cpp_token *token;
794 const unsigned char *from;
2c63d6c8 795{
f80e83a9 796 unsigned char *buffer;
338fa5f7 797 unsigned int len;
338fa5f7 798
f0495c2c 799 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
a543b315 800 /* C++ comments probably (not definitely) have moved past a new
801 line, which we don't want to save in the comment. */
802 if (pfile->buffer->read_ahead != EOF)
803 len--;
5f3f0010 804 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
f80e83a9 805
f80e83a9 806 token->type = CPP_COMMENT;
76faa4c0 807 token->val.str.len = len;
338fa5f7 808 token->val.str.text = buffer;
0578f103 809
f0495c2c 810 buffer[0] = '/';
811 memcpy (buffer + 1, from, len - 1);
338fa5f7 812}
0578f103 813
10b4496a 814/* Subroutine of _cpp_lex_direct to handle '%'. A little tricky, since we
f669338a 815 want to avoid stepping back when lexing %:%X. */
338fa5f7 816static void
c808d026 817lex_percent (pfile, result)
818 cpp_reader *pfile;
338fa5f7 819 cpp_token *result;
338fa5f7 820{
c808d026 821 cpp_buffer *buffer= pfile->buffer;
f669338a 822 cppchar_t c;
823
824 result->type = CPP_MOD;
825 /* Parsing %:%X could leave an extra character. */
826 if (buffer->extra_char == EOF)
c808d026 827 c = get_effective_char (pfile);
f669338a 828 else
829 {
830 c = buffer->read_ahead = buffer->extra_char;
831 buffer->extra_char = EOF;
832 }
833
834 if (c == '=')
835 ACCEPT_CHAR (CPP_MOD_EQ);
c808d026 836 else if (CPP_OPTION (pfile, digraphs))
f669338a 837 {
838 if (c == ':')
839 {
840 result->flags |= DIGRAPH;
841 ACCEPT_CHAR (CPP_HASH);
c808d026 842 if (get_effective_char (pfile) == '%')
f669338a 843 {
c808d026 844 buffer->extra_char = get_effective_char (pfile);
f669338a 845 if (buffer->extra_char == ':')
846 {
847 buffer->extra_char = EOF;
848 ACCEPT_CHAR (CPP_PASTE);
849 }
850 else
851 /* We'll catch the extra_char when we're called back. */
852 buffer->read_ahead = '%';
853 }
854 }
855 else if (c == '>')
856 {
857 result->flags |= DIGRAPH;
858 ACCEPT_CHAR (CPP_CLOSE_BRACE);
859 }
860 }
861}
862
10b4496a 863/* Subroutine of _cpp_lex_direct to handle '.'. This is tricky, since we
f669338a 864 want to avoid stepping back when lexing '...' or '.123'. In the
865 latter case we should also set a flag for parse_number. */
866static void
867lex_dot (pfile, result)
868 cpp_reader *pfile;
869 cpp_token *result;
870{
871 cpp_buffer *buffer = pfile->buffer;
872 cppchar_t c;
873
874 /* Parsing ..X could leave an extra character. */
875 if (buffer->extra_char == EOF)
c808d026 876 c = get_effective_char (pfile);
f669338a 877 else
878 {
879 c = buffer->read_ahead = buffer->extra_char;
880 buffer->extra_char = EOF;
881 }
338fa5f7 882
f669338a 883 /* All known character sets have 0...9 contiguous. */
884 if (c >= '0' && c <= '9')
885 {
886 result->type = CPP_NUMBER;
79bd622b 887 parse_number (pfile, &result->val.str, c, 1);
f669338a 888 }
f80e83a9 889 else
c4357c92 890 {
f669338a 891 result->type = CPP_DOT;
892 if (c == '.')
893 {
c808d026 894 buffer->extra_char = get_effective_char (pfile);
f669338a 895 if (buffer->extra_char == '.')
896 {
897 buffer->extra_char = EOF;
898 ACCEPT_CHAR (CPP_ELLIPSIS);
899 }
900 else
901 /* We'll catch the extra_char when we're called back. */
902 buffer->read_ahead = '.';
903 }
904 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
905 ACCEPT_CHAR (CPP_DOT_STAR);
c4357c92 906 }
0578f103 907}
908
83dcbb5c 909/* Allocate COUNT tokens for RUN. */
910void
911_cpp_init_tokenrun (run, count)
912 tokenrun *run;
913 unsigned int count;
914{
915 run->base = xnewvec (cpp_token, count);
916 run->limit = run->base + count;
917 run->next = NULL;
918}
919
920/* Returns the next tokenrun, or creates one if there is none. */
921static tokenrun *
922next_tokenrun (run)
923 tokenrun *run;
924{
925 if (run->next == NULL)
926 {
927 run->next = xnew (tokenrun);
fb5ab82c 928 run->next->prev = run;
83dcbb5c 929 _cpp_init_tokenrun (run->next, 250);
930 }
931
932 return run->next;
933}
934
f9b5f742 935/* Allocate a single token that is invalidated at the same time as the
936 rest of the tokens on the line. Has its line and col set to the
937 same as the last lexed token, so that diagnostics appear in the
938 right place. */
939cpp_token *
940_cpp_temp_token (pfile)
941 cpp_reader *pfile;
942{
943 cpp_token *old, *result;
944
945 old = pfile->cur_token - 1;
946 if (pfile->cur_token == pfile->cur_run->limit)
947 {
948 pfile->cur_run = next_tokenrun (pfile->cur_run);
949 pfile->cur_token = pfile->cur_run->base;
950 }
951
952 result = pfile->cur_token++;
953 result->line = old->line;
954 result->col = old->col;
955 return result;
956}
957
10b4496a 958/* Lex a token into RESULT (external interface). Takes care of issues
959 like directive handling, token lookahead, multiple include
960 opimisation and skipping. */
c00e481c 961const cpp_token *
962_cpp_lex_token (pfile)
0578f103 963 cpp_reader *pfile;
83dcbb5c 964{
fb5ab82c 965 cpp_token *result;
83dcbb5c 966
fb5ab82c 967 for (;;)
83dcbb5c 968 {
fb5ab82c 969 if (pfile->cur_token == pfile->cur_run->limit)
83dcbb5c 970 {
fb5ab82c 971 pfile->cur_run = next_tokenrun (pfile->cur_run);
972 pfile->cur_token = pfile->cur_run->base;
83dcbb5c 973 }
974
fb5ab82c 975 if (pfile->lookaheads)
10b4496a 976 {
977 pfile->lookaheads--;
978 result = pfile->cur_token++;
979 }
fb5ab82c 980 else
10b4496a 981 result = _cpp_lex_direct (pfile);
fb5ab82c 982
983 if (result->flags & BOL)
83dcbb5c 984 {
fb5ab82c 985 /* Is this a directive. If _cpp_handle_directive returns
986 false, it is an assembler #. */
987 if (result->type == CPP_HASH
988 && !pfile->state.parsing_args
989 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
990 continue;
5621a364 991 if (pfile->cb.line_change && !pfile->state.skipping)
992 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
83dcbb5c 993 }
83dcbb5c 994
fb5ab82c 995 /* We don't skip tokens in directives. */
996 if (pfile->state.in_directive)
997 break;
83dcbb5c 998
fb5ab82c 999 /* Outside a directive, invalidate controlling macros. At file
10b4496a 1000 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
fb5ab82c 1001 get here and MI optimisation works. */
83dcbb5c 1002 pfile->mi_valid = false;
fb5ab82c 1003
1004 if (!pfile->state.skipping || result->type == CPP_EOF)
1005 break;
83dcbb5c 1006 }
1007
c00e481c 1008 return result;
83dcbb5c 1009}
1010
10b4496a 1011/* Lex a token into pfile->cur_token, which is also incremented, to
1012 get diagnostics pointing to the correct location.
1013
1014 Does not handle issues such as token lookahead, multiple-include
1015 optimisation, directives, skipping etc. This function is only
1016 suitable for use by _cpp_lex_token, and in special cases like
1017 lex_expansion_token which doesn't care for any of these issues.
1018
1019 When meeting a newline, returns CPP_EOF if parsing a directive,
1020 otherwise returns to the start of the token buffer if permissible.
1021 Returns the location of the lexed token. */
1022cpp_token *
1023_cpp_lex_direct (pfile)
83dcbb5c 1024 cpp_reader *pfile;
0578f103 1025{
338fa5f7 1026 cppchar_t c;
230f0943 1027 cpp_buffer *buffer;
338fa5f7 1028 const unsigned char *comment_start;
10b4496a 1029 cpp_token *result = pfile->cur_token++;
0653b94e 1030
83dcbb5c 1031 fresh_line:
230f0943 1032 buffer = pfile->buffer;
8c2e2fc5 1033 result->flags = buffer->saved_flags;
1034 buffer->saved_flags = 0;
83dcbb5c 1035 update_tokens_line:
36a0aa7c 1036 result->line = pfile->line;
f80e83a9 1037
83dcbb5c 1038 skipped_white:
338fa5f7 1039 c = buffer->read_ahead;
1040 if (c == EOF && buffer->cur < buffer->rlimit)
83dcbb5c 1041 c = *buffer->cur++;
1042 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
338fa5f7 1043 buffer->read_ahead = EOF;
83dcbb5c 1044
1045 trigraph:
338fa5f7 1046 switch (c)
0578f103 1047 {
338fa5f7 1048 case EOF:
fb5ab82c 1049 buffer->saved_flags = BOL;
83dcbb5c 1050 if (!pfile->state.parsing_args && !pfile->state.in_directive)
4dfe8b74 1051 {
fb5ab82c 1052 if (buffer->cur != buffer->line_base)
83dcbb5c 1053 {
1054 /* Non-empty files should end in a newline. Don't warn
1055 for command line and _Pragma buffers. */
1056 if (!buffer->from_stage3)
1057 cpp_pedwarn (pfile, "no newline at end of file");
1058 handle_newline (pfile, '\n');
5475a165 1059 }
fb5ab82c 1060
1061 /* Don't pop the last buffer. */
1062 if (buffer->prev)
1063 {
1064 unsigned char stop = buffer->return_at_eof;
1065
1066 _cpp_pop_buffer (pfile);
1067 if (!stop)
1068 goto fresh_line;
1069 }
4dfe8b74 1070 }
338fa5f7 1071 result->type = CPP_EOF;
83dcbb5c 1072 break;
0578f103 1073
338fa5f7 1074 case ' ': case '\t': case '\f': case '\v': case '\0':
1075 skip_whitespace (pfile, c);
1076 result->flags |= PREV_WHITE;
83dcbb5c 1077 goto skipped_white;
338fa5f7 1078
1079 case '\n': case '\r':
fb5ab82c 1080 handle_newline (pfile, c);
1081 buffer->saved_flags = BOL;
1082 if (! pfile->state.in_directive)
0578f103 1083 {
f9b5f742 1084 if (pfile->state.parsing_args == 2)
1085 buffer->saved_flags |= PREV_WHITE;
fb5ab82c 1086 if (!pfile->keep_tokens)
1087 {
1088 pfile->cur_run = &pfile->base_run;
1089 result = pfile->base_run.base;
1090 pfile->cur_token = result + 1;
1091 }
1092 goto fresh_line;
0578f103 1093 }
83dcbb5c 1094 result->type = CPP_EOF;
1095 break;
732cb4c9 1096
338fa5f7 1097 case '?':
1098 case '\\':
1099 /* These could start an escaped newline, or '?' a trigraph. Let
1100 skip_escaped_newlines do all the work. */
1101 {
1ea7ed21 1102 unsigned int line = pfile->line;
338fa5f7 1103
c808d026 1104 c = skip_escaped_newlines (pfile, c);
1ea7ed21 1105 if (line != pfile->line)
338fa5f7 1106 /* We had at least one escaped newline of some sort, and the
1107 next character is in buffer->read_ahead. Update the
1108 token's line and column. */
83dcbb5c 1109 goto update_tokens_line;
338fa5f7 1110
1111 /* We are either the original '?' or '\\', or a trigraph. */
1112 result->type = CPP_QUERY;
1113 buffer->read_ahead = EOF;
1114 if (c == '\\')
3f90a920 1115 goto random_char;
338fa5f7 1116 else if (c != '?')
83dcbb5c 1117 goto trigraph;
338fa5f7 1118 }
1119 break;
732cb4c9 1120
338fa5f7 1121 case '0': case '1': case '2': case '3': case '4':
1122 case '5': case '6': case '7': case '8': case '9':
1123 result->type = CPP_NUMBER;
79bd622b 1124 parse_number (pfile, &result->val.str, c, 0);
338fa5f7 1125 break;
732cb4c9 1126
338fa5f7 1127 case '$':
1128 if (!CPP_OPTION (pfile, dollars_in_ident))
1129 goto random_char;
2c0e001b 1130 /* Fall through... */
338fa5f7 1131
1132 case '_':
1133 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1134 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1135 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1136 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1137 case 'y': case 'z':
1138 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1139 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1140 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1141 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1142 case 'Y': case 'Z':
1143 result->type = CPP_NAME;
66a5287e 1144 result->val.node = parse_identifier (pfile);
338fa5f7 1145
1146 /* 'L' may introduce wide characters or strings. */
79bd622b 1147 if (result->val.node == pfile->spec_nodes.n_L)
338fa5f7 1148 {
66a5287e 1149 c = buffer->read_ahead;
1150 if (c == EOF && buffer->cur < buffer->rlimit)
1151 c = *buffer->cur;
338fa5f7 1152 if (c == '\'' || c == '"')
71aa9da4 1153 {
66a5287e 1154 buffer->cur++;
338fa5f7 1155 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1156 goto make_string;
71aa9da4 1157 }
338fa5f7 1158 }
1159 /* Convert named operators to their proper types. */
79bd622b 1160 else if (result->val.node->flags & NODE_OPERATOR)
338fa5f7 1161 {
1162 result->flags |= NAMED_OP;
79bd622b 1163 result->type = result->val.node->value.operator;
338fa5f7 1164 }
1165 break;
1166
1167 case '\'':
1168 case '"':
1169 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1170 make_string:
1171 parse_string (pfile, result, c);
1172 break;
f80e83a9 1173
338fa5f7 1174 case '/':
f0495c2c 1175 /* A potential block or line comment. */
1176 comment_start = buffer->cur;
338fa5f7 1177 result->type = CPP_DIV;
c808d026 1178 c = get_effective_char (pfile);
338fa5f7 1179 if (c == '=')
1180 ACCEPT_CHAR (CPP_DIV_EQ);
f0495c2c 1181 if (c != '/' && c != '*')
1182 break;
20b8f8ff 1183
f0495c2c 1184 if (c == '*')
1185 {
338fa5f7 1186 if (skip_block_comment (pfile))
1ea7ed21 1187 cpp_error (pfile, "unterminated comment");
338fa5f7 1188 }
f0495c2c 1189 else
338fa5f7 1190 {
f0495c2c 1191 if (!CPP_OPTION (pfile, cplusplus_comments)
1192 && !CPP_IN_SYSTEM_HEADER (pfile))
1193 break;
1194
5db5d057 1195 /* Warn about comments only if pedantically GNUC89, and not
1196 in system headers. */
1197 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
66914e49 1198 && ! buffer->warned_cplusplus_comments)
f80e83a9 1199 {
f0495c2c 1200 cpp_pedwarn (pfile,
1201 "C++ style comments are not allowed in ISO C89");
1202 cpp_pedwarn (pfile,
1203 "(this will be reported only once per input file)");
1204 buffer->warned_cplusplus_comments = 1;
1205 }
338fa5f7 1206
66914e49 1207 /* Skip_line_comment updates buffer->read_ahead. */
e1caf668 1208 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
729d2022 1209 cpp_warning (pfile, "multi-line comment");
f0495c2c 1210 }
338fa5f7 1211
f0495c2c 1212 /* Skipping the comment has updated buffer->read_ahead. */
1213 if (!pfile->state.save_comments)
1214 {
1215 result->flags |= PREV_WHITE;
83dcbb5c 1216 goto update_tokens_line;
338fa5f7 1217 }
f0495c2c 1218
1219 /* Save the comment as a token in its own right. */
1220 save_comment (pfile, result, comment_start);
fb5ab82c 1221 break;
338fa5f7 1222
1223 case '<':
1224 if (pfile->state.angled_headers)
1225 {
1226 result->type = CPP_HEADER_NAME;
1227 c = '>'; /* terminator. */
1228 goto make_string;
1229 }
0578f103 1230
338fa5f7 1231 result->type = CPP_LESS;
c808d026 1232 c = get_effective_char (pfile);
338fa5f7 1233 if (c == '=')
1234 ACCEPT_CHAR (CPP_LESS_EQ);
1235 else if (c == '<')
1236 {
1237 ACCEPT_CHAR (CPP_LSHIFT);
c808d026 1238 if (get_effective_char (pfile) == '=')
338fa5f7 1239 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1240 }
1241 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1242 {
1243 ACCEPT_CHAR (CPP_MIN);
c808d026 1244 if (get_effective_char (pfile) == '=')
338fa5f7 1245 ACCEPT_CHAR (CPP_MIN_EQ);
1246 }
1247 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1248 {
1249 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1250 result->flags |= DIGRAPH;
1251 }
1252 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1253 {
1254 ACCEPT_CHAR (CPP_OPEN_BRACE);
1255 result->flags |= DIGRAPH;
1256 }
1257 break;
1258
1259 case '>':
1260 result->type = CPP_GREATER;
c808d026 1261 c = get_effective_char (pfile);
338fa5f7 1262 if (c == '=')
1263 ACCEPT_CHAR (CPP_GREATER_EQ);
1264 else if (c == '>')
1265 {
1266 ACCEPT_CHAR (CPP_RSHIFT);
c808d026 1267 if (get_effective_char (pfile) == '=')
338fa5f7 1268 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1269 }
1270 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1271 {
1272 ACCEPT_CHAR (CPP_MAX);
c808d026 1273 if (get_effective_char (pfile) == '=')
338fa5f7 1274 ACCEPT_CHAR (CPP_MAX_EQ);
1275 }
1276 break;
1277
f669338a 1278 case '%':
c808d026 1279 lex_percent (pfile, result);
338fa5f7 1280 break;
1281
f669338a 1282 case '.':
1283 lex_dot (pfile, result);
338fa5f7 1284 break;
0578f103 1285
338fa5f7 1286 case '+':
1287 result->type = CPP_PLUS;
c808d026 1288 c = get_effective_char (pfile);
338fa5f7 1289 if (c == '=')
1290 ACCEPT_CHAR (CPP_PLUS_EQ);
1291 else if (c == '+')
1292 ACCEPT_CHAR (CPP_PLUS_PLUS);
1293 break;
ac0749c7 1294
338fa5f7 1295 case '-':
1296 result->type = CPP_MINUS;
c808d026 1297 c = get_effective_char (pfile);
338fa5f7 1298 if (c == '>')
1299 {
1300 ACCEPT_CHAR (CPP_DEREF);
1301 if (CPP_OPTION (pfile, cplusplus)
c808d026 1302 && get_effective_char (pfile) == '*')
338fa5f7 1303 ACCEPT_CHAR (CPP_DEREF_STAR);
1304 }
1305 else if (c == '=')
1306 ACCEPT_CHAR (CPP_MINUS_EQ);
1307 else if (c == '-')
1308 ACCEPT_CHAR (CPP_MINUS_MINUS);
1309 break;
0578f103 1310
338fa5f7 1311 case '*':
1312 result->type = CPP_MULT;
c808d026 1313 if (get_effective_char (pfile) == '=')
338fa5f7 1314 ACCEPT_CHAR (CPP_MULT_EQ);
1315 break;
ac0749c7 1316
338fa5f7 1317 case '=':
1318 result->type = CPP_EQ;
c808d026 1319 if (get_effective_char (pfile) == '=')
338fa5f7 1320 ACCEPT_CHAR (CPP_EQ_EQ);
1321 break;
c4abf88d 1322
338fa5f7 1323 case '!':
1324 result->type = CPP_NOT;
c808d026 1325 if (get_effective_char (pfile) == '=')
338fa5f7 1326 ACCEPT_CHAR (CPP_NOT_EQ);
1327 break;
0578f103 1328
338fa5f7 1329 case '&':
1330 result->type = CPP_AND;
c808d026 1331 c = get_effective_char (pfile);
338fa5f7 1332 if (c == '=')
1333 ACCEPT_CHAR (CPP_AND_EQ);
1334 else if (c == '&')
1335 ACCEPT_CHAR (CPP_AND_AND);
1336 break;
1337
1338 case '#':
e14c5993 1339 result->type = CPP_HASH;
83dcbb5c 1340 if (get_effective_char (pfile) == '#')
1341 ACCEPT_CHAR (CPP_PASTE);
338fa5f7 1342 break;
0578f103 1343
338fa5f7 1344 case '|':
1345 result->type = CPP_OR;
c808d026 1346 c = get_effective_char (pfile);
338fa5f7 1347 if (c == '=')
1348 ACCEPT_CHAR (CPP_OR_EQ);
1349 else if (c == '|')
1350 ACCEPT_CHAR (CPP_OR_OR);
1351 break;
0578f103 1352
338fa5f7 1353 case '^':
1354 result->type = CPP_XOR;
c808d026 1355 if (get_effective_char (pfile) == '=')
338fa5f7 1356 ACCEPT_CHAR (CPP_XOR_EQ);
1357 break;
0578f103 1358
338fa5f7 1359 case ':':
1360 result->type = CPP_COLON;
c808d026 1361 c = get_effective_char (pfile);
338fa5f7 1362 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1363 ACCEPT_CHAR (CPP_SCOPE);
1364 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1365 {
1366 result->flags |= DIGRAPH;
1367 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1368 }
1369 break;
0578f103 1370
338fa5f7 1371 case '~': result->type = CPP_COMPL; break;
1372 case ',': result->type = CPP_COMMA; break;
1373 case '(': result->type = CPP_OPEN_PAREN; break;
1374 case ')': result->type = CPP_CLOSE_PAREN; break;
1375 case '[': result->type = CPP_OPEN_SQUARE; break;
1376 case ']': result->type = CPP_CLOSE_SQUARE; break;
1377 case '{': result->type = CPP_OPEN_BRACE; break;
1378 case '}': result->type = CPP_CLOSE_BRACE; break;
1379 case ';': result->type = CPP_SEMICOLON; break;
1380
9ee99ac6 1381 /* @ is a punctuator in Objective C. */
1382 case '@': result->type = CPP_ATSIGN; break;
338fa5f7 1383
1384 random_char:
1385 default:
1386 result->type = CPP_OTHER;
33344a1c 1387 result->val.c = c;
338fa5f7 1388 break;
1389 }
fb5ab82c 1390
1391 return result;
338fa5f7 1392}
1393
79bd622b 1394/* An upper bound on the number of bytes needed to spell a token,
1395 including preceding whitespace. */
1396unsigned int
1397cpp_token_len (token)
1398 const cpp_token *token;
338fa5f7 1399{
79bd622b 1400 unsigned int len;
cfad5579 1401
79bd622b 1402 switch (TOKEN_SPELL (token))
f80e83a9 1403 {
c86dbc5b 1404 default: len = 0; break;
1405 case SPELL_STRING: len = token->val.str.len; break;
1406 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
f80e83a9 1407 }
79bd622b 1408 /* 1 for whitespace, 4 for comment delimeters. */
1409 return len + 5;
cfad5579 1410}
1411
f80e83a9 1412/* Write the spelling of a token TOKEN to BUFFER. The buffer must
c5ea33a8 1413 already contain the enough space to hold the token's spelling.
1414 Returns a pointer to the character after the last character
1415 written. */
79bd622b 1416unsigned char *
1417cpp_spell_token (pfile, token, buffer)
f80e83a9 1418 cpp_reader *pfile; /* Would be nice to be rid of this... */
1419 const cpp_token *token;
1420 unsigned char *buffer;
1421{
7e842f95 1422 switch (TOKEN_SPELL (token))
f80e83a9 1423 {
1424 case SPELL_OPERATOR:
1425 {
1426 const unsigned char *spelling;
1427 unsigned char c;
ab12a39c 1428
f80e83a9 1429 if (token->flags & DIGRAPH)
ee6c4e4b 1430 spelling
1431 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
31674461 1432 else if (token->flags & NAMED_OP)
1433 goto spell_ident;
f80e83a9 1434 else
7e842f95 1435 spelling = TOKEN_NAME (token);
f80e83a9 1436
1437 while ((c = *spelling++) != '\0')
1438 *buffer++ = c;
1439 }
1440 break;
ab12a39c 1441
f80e83a9 1442 case SPELL_IDENT:
31674461 1443 spell_ident:
c86dbc5b 1444 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1445 buffer += NODE_LEN (token->val.node);
f80e83a9 1446 break;
ab12a39c 1447
f80e83a9 1448 case SPELL_STRING:
1449 {
71aa9da4 1450 int left, right, tag;
1451 switch (token->type)
1452 {
1453 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1454 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
71aa9da4 1455 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1456 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1457 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1458 default: left = '\0'; right = '\0'; tag = '\0'; break;
1459 }
1460 if (tag) *buffer++ = tag;
1461 if (left) *buffer++ = left;
76faa4c0 1462 memcpy (buffer, token->val.str.text, token->val.str.len);
1463 buffer += token->val.str.len;
71aa9da4 1464 if (right) *buffer++ = right;
f80e83a9 1465 }
1466 break;
ab12a39c 1467
f80e83a9 1468 case SPELL_CHAR:
33344a1c 1469 *buffer++ = token->val.c;
f80e83a9 1470 break;
ab12a39c 1471
f80e83a9 1472 case SPELL_NONE:
7e842f95 1473 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
f80e83a9 1474 break;
1475 }
ab12a39c 1476
f80e83a9 1477 return buffer;
1478}
ab12a39c 1479
79bd622b 1480/* Returns a token as a null-terminated string. The string is
1481 temporary, and automatically freed later. Useful for diagnostics. */
1482unsigned char *
1483cpp_token_as_text (pfile, token)
6060326b 1484 cpp_reader *pfile;
f80e83a9 1485 const cpp_token *token;
6060326b 1486{
79bd622b 1487 unsigned int len = cpp_token_len (token);
5f3f0010 1488 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
6060326b 1489
79bd622b 1490 end = cpp_spell_token (pfile, token, start);
1491 end[0] = '\0';
6060326b 1492
79bd622b 1493 return start;
1494}
6060326b 1495
79bd622b 1496/* Used by C front ends. Should really move to using cpp_token_as_text. */
1497const char *
1498cpp_type2name (type)
1499 enum cpp_ttype type;
1500{
1501 return (const char *) token_spellings[type].name;
1502}
6060326b 1503
f9b5f742 1504/* Writes the spelling of token to FP, without any preceding space.
1505 Separated from cpp_spell_token for efficiency - to avoid stdio
1506 double-buffering. */
79bd622b 1507void
1508cpp_output_token (token, fp)
1509 const cpp_token *token;
1510 FILE *fp;
1511{
79bd622b 1512 switch (TOKEN_SPELL (token))
6060326b 1513 {
79bd622b 1514 case SPELL_OPERATOR:
1515 {
1516 const unsigned char *spelling;
28874558 1517 int c;
6060326b 1518
79bd622b 1519 if (token->flags & DIGRAPH)
ee6c4e4b 1520 spelling
1521 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
79bd622b 1522 else if (token->flags & NAMED_OP)
1523 goto spell_ident;
1524 else
1525 spelling = TOKEN_NAME (token);
f80e83a9 1526
28874558 1527 c = *spelling;
1528 do
1529 putc (c, fp);
1530 while ((c = *++spelling) != '\0');
79bd622b 1531 }
1532 break;
f80e83a9 1533
79bd622b 1534 spell_ident:
1535 case SPELL_IDENT:
28874558 1536 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
79bd622b 1537 break;
f80e83a9 1538
79bd622b 1539 case SPELL_STRING:
1540 {
1541 int left, right, tag;
1542 switch (token->type)
1543 {
1544 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1545 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
79bd622b 1546 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1547 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1548 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1549 default: left = '\0'; right = '\0'; tag = '\0'; break;
1550 }
1551 if (tag) putc (tag, fp);
1552 if (left) putc (left, fp);
1553 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1554 if (right) putc (right, fp);
1555 }
1556 break;
6060326b 1557
79bd622b 1558 case SPELL_CHAR:
33344a1c 1559 putc (token->val.c, fp);
79bd622b 1560 break;
6060326b 1561
79bd622b 1562 case SPELL_NONE:
1563 /* An error, most probably. */
1564 break;
f80e83a9 1565 }
6060326b 1566}
1567
79bd622b 1568/* Compare two tokens. */
1569int
1570_cpp_equiv_tokens (a, b)
1571 const cpp_token *a, *b;
6060326b 1572{
79bd622b 1573 if (a->type == b->type && a->flags == b->flags)
1574 switch (TOKEN_SPELL (a))
1575 {
1576 default: /* Keep compiler happy. */
1577 case SPELL_OPERATOR:
1578 return 1;
1579 case SPELL_CHAR:
33344a1c 1580 return a->val.c == b->val.c; /* Character. */
79bd622b 1581 case SPELL_NONE:
588d632b 1582 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
79bd622b 1583 case SPELL_IDENT:
1584 return a->val.node == b->val.node;
1585 case SPELL_STRING:
1586 return (a->val.str.len == b->val.str.len
1587 && !memcmp (a->val.str.text, b->val.str.text,
1588 a->val.str.len));
1589 }
6060326b 1590
f80e83a9 1591 return 0;
1592}
1593
79bd622b 1594/* Returns nonzero if a space should be inserted to avoid an
1595 accidental token paste for output. For simplicity, it is
1596 conservative, and occasionally advises a space where one is not
1597 needed, e.g. "." and ".2". */
f80e83a9 1598
79bd622b 1599int
1600cpp_avoid_paste (pfile, token1, token2)
6060326b 1601 cpp_reader *pfile;
79bd622b 1602 const cpp_token *token1, *token2;
6060326b 1603{
79bd622b 1604 enum cpp_ttype a = token1->type, b = token2->type;
1605 cppchar_t c;
6060326b 1606
79bd622b 1607 if (token1->flags & NAMED_OP)
1608 a = CPP_NAME;
1609 if (token2->flags & NAMED_OP)
1610 b = CPP_NAME;
6060326b 1611
79bd622b 1612 c = EOF;
1613 if (token2->flags & DIGRAPH)
ee6c4e4b 1614 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
79bd622b 1615 else if (token_spellings[b].category == SPELL_OPERATOR)
1616 c = token_spellings[b].name[0];
6060326b 1617
79bd622b 1618 /* Quickly get everything that can paste with an '='. */
ee6c4e4b 1619 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
79bd622b 1620 return 1;
6060326b 1621
79bd622b 1622 switch (a)
6060326b 1623 {
79bd622b 1624 case CPP_GREATER: return c == '>' || c == '?';
1625 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1626 case CPP_PLUS: return c == '+';
1627 case CPP_MINUS: return c == '-' || c == '>';
1628 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1629 case CPP_MOD: return c == ':' || c == '>';
1630 case CPP_AND: return c == '&';
1631 case CPP_OR: return c == '|';
1632 case CPP_COLON: return c == ':' || c == '>';
1633 case CPP_DEREF: return c == '*';
efdcc728 1634 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
79bd622b 1635 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1636 case CPP_NAME: return ((b == CPP_NUMBER
1637 && name_p (pfile, &token2->val.str))
1638 || b == CPP_NAME
1639 || b == CPP_CHAR || b == CPP_STRING); /* L */
1640 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1641 || c == '.' || c == '+' || c == '-');
1642 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
33344a1c 1643 && token1->val.c == '@'
79bd622b 1644 && (b == CPP_NAME || b == CPP_STRING));
1645 default: break;
6060326b 1646 }
6060326b 1647
deb356cf 1648 return 0;
6060326b 1649}
1650
79bd622b 1651/* Output all the remaining tokens on the current line, and a newline
f9b5f742 1652 character, to FP. Leading whitespace is removed. If there are
1653 macros, special token padding is not performed. */
6060326b 1654void
79bd622b 1655cpp_output_line (pfile, fp)
6060326b 1656 cpp_reader *pfile;
79bd622b 1657 FILE *fp;
6060326b 1658{
f9b5f742 1659 const cpp_token *token;
7e842f95 1660
f9b5f742 1661 token = cpp_get_token (pfile);
1662 while (token->type != CPP_EOF)
7e842f95 1663 {
f9b5f742 1664 cpp_output_token (token, fp);
1665 token = cpp_get_token (pfile);
1666 if (token->flags & PREV_WHITE)
1667 putc (' ', fp);
7e842f95 1668 }
1669
79bd622b 1670 putc ('\n', fp);
f80e83a9 1671}
6060326b 1672
8330799c 1673/* Returns the value of a hexadecimal digit. */
1674static unsigned int
1675hex_digit_value (c)
1676 unsigned int c;
1677{
1678 if (c >= 'a' && c <= 'f')
1679 return c - 'a' + 10;
1680 if (c >= 'A' && c <= 'F')
1681 return c - 'A' + 10;
1682 if (c >= '0' && c <= '9')
1683 return c - '0';
1684 abort ();
1685}
1686
c8342759 1687/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1688 failure if cpplib is not parsing C++ or C99. Such failure is
1689 silent, and no variables are updated. Otherwise returns 0, and
1690 warns if -Wtraditional.
8330799c 1691
1692 [lex.charset]: The character designated by the universal character
1693 name \UNNNNNNNN is that character whose character short name in
1694 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1695 universal character name \uNNNN is that character whose character
1696 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1697 for a universal character name is less than 0x20 or in the range
1698 0x7F-0x9F (inclusive), or if the universal character name
1699 designates a character in the basic source character set, then the
1700 program is ill-formed.
1701
1702 We assume that wchar_t is Unicode, so we don't need to do any
c8342759 1703 mapping. Is this ever wrong?
8330799c 1704
c8342759 1705 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1706 LIMIT is the end of the string or charconst. PSTR is updated to
1707 point after the UCS on return, and the UCS is written into PC. */
1708
1709static int
1710maybe_read_ucs (pfile, pstr, limit, pc)
8330799c 1711 cpp_reader *pfile;
1712 const unsigned char **pstr;
1713 const unsigned char *limit;
c8342759 1714 unsigned int *pc;
8330799c 1715{
1716 const unsigned char *p = *pstr;
c8342759 1717 unsigned int code = 0;
1718 unsigned int c = *pc, length;
1719
1720 /* Only attempt to interpret a UCS for C++ and C99. */
1721 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1722 return 1;
8330799c 1723
c8342759 1724 if (CPP_WTRADITIONAL (pfile))
1725 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
8330799c 1726
f73bab03 1727 length = (c == 'u' ? 4: 8);
1728
1729 if ((size_t) (limit - p) < length)
1730 {
1731 cpp_error (pfile, "incomplete universal-character-name");
1732 /* Skip to the end to avoid more diagnostics. */
1733 p = limit;
1734 }
1735 else
1736 {
1737 for (; length; length--, p++)
8330799c 1738 {
f73bab03 1739 c = *p;
1740 if (ISXDIGIT (c))
1741 code = (code << 4) + hex_digit_value (c);
1742 else
1743 {
1744 cpp_error (pfile,
1745 "non-hex digit '%c' in universal-character-name", c);
1746 /* We shouldn't skip in case there are multibyte chars. */
1747 break;
1748 }
8330799c 1749 }
8330799c 1750 }
1751
1752#ifdef TARGET_EBCDIC
1753 cpp_error (pfile, "universal-character-name on EBCDIC target");
1754 code = 0x3f; /* EBCDIC invalid character */
1755#else
f73bab03 1756 /* True extended characters are OK. */
1757 if (code >= 0xa0
1758 && !(code & 0x80000000)
1759 && !(code >= 0xD800 && code <= 0xDFFF))
1760 ;
1761 /* The standard permits $, @ and ` to be specified as UCNs. We use
1762 hex escapes so that this also works with EBCDIC hosts. */
1763 else if (code == 0x24 || code == 0x40 || code == 0x60)
1764 ;
1765 /* Don't give another error if one occurred above. */
1766 else if (length == 0)
1767 cpp_error (pfile, "universal-character-name out of range");
8330799c 1768#endif
1769
1770 *pstr = p;
c8342759 1771 *pc = code;
1772 return 0;
8330799c 1773}
1774
1775/* Interpret an escape sequence, and return its value. PSTR points to
1776 the input pointer, which is just after the backslash. LIMIT is how
c8342759 1777 much text we have. MASK is a bitmask for the precision for the
1778 destination type (char or wchar_t). TRADITIONAL, if true, does not
1779 interpret escapes that did not exist in traditional C.
8330799c 1780
c8342759 1781 Handles all relevant diagnostics. */
1782
1783unsigned int
1784cpp_parse_escape (pfile, pstr, limit, mask, traditional)
8330799c 1785 cpp_reader *pfile;
1786 const unsigned char **pstr;
1787 const unsigned char *limit;
c8342759 1788 unsigned HOST_WIDE_INT mask;
8330799c 1789 int traditional;
1790{
1791 int unknown = 0;
1792 const unsigned char *str = *pstr;
1793 unsigned int c = *str++;
1794
1795 switch (c)
1796 {
1797 case '\\': case '\'': case '"': case '?': break;
1798 case 'b': c = TARGET_BS; break;
1799 case 'f': c = TARGET_FF; break;
1800 case 'n': c = TARGET_NEWLINE; break;
1801 case 'r': c = TARGET_CR; break;
1802 case 't': c = TARGET_TAB; break;
1803 case 'v': c = TARGET_VT; break;
1804
1805 case '(': case '{': case '[': case '%':
1806 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1807 '\%' is used to prevent SCCS from getting confused. */
1808 unknown = CPP_PEDANTIC (pfile);
1809 break;
1810
1811 case 'a':
1812 if (CPP_WTRADITIONAL (pfile))
1813 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1814 if (!traditional)
1815 c = TARGET_BELL;
1816 break;
1817
1818 case 'e': case 'E':
1819 if (CPP_PEDANTIC (pfile))
1820 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1821 c = TARGET_ESC;
1822 break;
1823
8330799c 1824 case 'u': case 'U':
c8342759 1825 unknown = maybe_read_ucs (pfile, &str, limit, &c);
8330799c 1826 break;
1827
1828 case 'x':
1829 if (CPP_WTRADITIONAL (pfile))
1830 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1831
1832 if (!traditional)
1833 {
1834 unsigned int i = 0, overflow = 0;
1835 int digits_found = 0;
1836
1837 while (str < limit)
1838 {
1839 c = *str;
1840 if (! ISXDIGIT (c))
1841 break;
1842 str++;
1843 overflow |= i ^ (i << 4 >> 4);
1844 i = (i << 4) + hex_digit_value (c);
1845 digits_found = 1;
1846 }
1847
1848 if (!digits_found)
1849 cpp_error (pfile, "\\x used with no following hex digits");
1850
1851 if (overflow | (i != (i & mask)))
1852 {
1853 cpp_pedwarn (pfile, "hex escape sequence out of range");
1854 i &= mask;
1855 }
1856 c = i;
1857 }
1858 break;
1859
1860 case '0': case '1': case '2': case '3':
1861 case '4': case '5': case '6': case '7':
1862 {
1863 unsigned int i = c - '0';
1864 int count = 0;
1865
1866 while (str < limit && ++count < 3)
1867 {
1868 c = *str;
1869 if (c < '0' || c > '7')
1870 break;
1871 str++;
1872 i = (i << 3) + c - '0';
1873 }
1874
1875 if (i != (i & mask))
1876 {
1877 cpp_pedwarn (pfile, "octal escape sequence out of range");
1878 i &= mask;
1879 }
1880 c = i;
1881 }
1882 break;
1883
1884 default:
1885 unknown = 1;
1886 break;
1887 }
1888
1889 if (unknown)
1890 {
1891 if (ISGRAPH (c))
1892 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1893 else
1894 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1895 }
1896
c8342759 1897 if (c > mask)
1898 cpp_pedwarn (pfile, "escape sequence out of range for character");
1899
8330799c 1900 *pstr = str;
1901 return c;
1902}
1903
1904#ifndef MAX_CHAR_TYPE_SIZE
1905#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1906#endif
1907
1908#ifndef MAX_WCHAR_TYPE_SIZE
1909#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1910#endif
1911
1912/* Interpret a (possibly wide) character constant in TOKEN.
1913 WARN_MULTI warns about multi-character charconsts, if not
1914 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1915 that did not exist in traditional C. PCHARS_SEEN points to a
1916 variable that is filled in with the number of characters seen. */
1917HOST_WIDE_INT
1918cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1919 cpp_reader *pfile;
1920 const cpp_token *token;
1921 int warn_multi;
1922 int traditional;
1923 unsigned int *pchars_seen;
1924{
1925 const unsigned char *str = token->val.str.text;
1926 const unsigned char *limit = str + token->val.str.len;
1927 unsigned int chars_seen = 0;
1928 unsigned int width, max_chars, c;
0d086e18 1929 unsigned HOST_WIDE_INT mask;
1930 HOST_WIDE_INT result = 0;
8330799c 1931
1932#ifdef MULTIBYTE_CHARS
1933 (void) local_mbtowc (NULL, NULL, 0);
1934#endif
1935
1936 /* Width in bits. */
1937 if (token->type == CPP_CHAR)
1938 width = MAX_CHAR_TYPE_SIZE;
1939 else
1940 width = MAX_WCHAR_TYPE_SIZE;
1941
1942 if (width < HOST_BITS_PER_WIDE_INT)
1943 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1944 else
1945 mask = ~0;
1946 max_chars = HOST_BITS_PER_WIDE_INT / width;
1947
1948 while (str < limit)
1949 {
1950#ifdef MULTIBYTE_CHARS
1951 wchar_t wc;
1952 int char_len;
1953
1954 char_len = local_mbtowc (&wc, str, limit - str);
1955 if (char_len == -1)
1956 {
1957 cpp_warning (pfile, "ignoring invalid multibyte character");
1958 c = *str++;
1959 }
1960 else
1961 {
1962 str += char_len;
1963 c = wc;
1964 }
1965#else
1966 c = *str++;
1967#endif
1968
1969 if (c == '\\')
c8342759 1970 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
8330799c 1971
1972#ifdef MAP_CHARACTER
1973 if (ISPRINT (c))
1974 c = MAP_CHARACTER (c);
1975#endif
1976
1977 /* Merge character into result; ignore excess chars. */
1978 if (++chars_seen <= max_chars)
1979 {
1980 if (width < HOST_BITS_PER_WIDE_INT)
1981 result = (result << width) | (c & mask);
1982 else
1983 result = c;
1984 }
1985 }
1986
1987 if (chars_seen == 0)
1988 cpp_error (pfile, "empty character constant");
1989 else if (chars_seen > max_chars)
1990 {
1991 chars_seen = max_chars;
f73bab03 1992 cpp_warning (pfile, "character constant too long");
8330799c 1993 }
1994 else if (chars_seen > 1 && !traditional && warn_multi)
1995 cpp_warning (pfile, "multi-character character constant");
1996
1997 /* If char type is signed, sign-extend the constant. The
1998 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1999 if (token->type == CPP_CHAR && chars_seen)
2000 {
2001 unsigned int nbits = chars_seen * width;
2002 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2003
2004 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2005 || ((result >> (nbits - 1)) & 1) == 0)
2006 result &= mask;
2007 else
2008 result |= ~mask;
2009 }
2010
2011 *pchars_seen = chars_seen;
2012 return result;
2013}
2014
084163dc 2015/* Memory buffers. Changing these three constants can have a dramatic
2016 effect on performance. The values here are reasonable defaults,
2017 but might be tuned. If you adjust them, be sure to test across a
2018 range of uses of cpplib, including heavy nested function-like macro
2019 expansion. Also check the change in peak memory usage (NJAMD is a
2020 good tool for this). */
2021#define MIN_BUFF_SIZE 8000
2022#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (8000 + (MIN_SIZE) * 3 / 2)
2023#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2024 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
deb356cf 2025
79bd622b 2026struct dummy
deb356cf 2027{
79bd622b 2028 char c;
2029 union
2030 {
2031 double d;
2032 int *p;
2033 } u;
2034};
deb356cf 2035
79bd622b 2036#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
06c92cbc 2037#define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2038
1785b647 2039/* Create a new allocation buffer. Place the control block at the end
2040 of the buffer, so that buffer overflows will cause immediate chaos. */
06c92cbc 2041static _cpp_buff *
2042new_buff (len)
2043 unsigned int len;
2044{
2045 _cpp_buff *result;
2046 char *base;
2047
084163dc 2048 if (len < MIN_BUFF_SIZE)
2049 len = MIN_BUFF_SIZE;
06c92cbc 2050 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2051
2052 base = xmalloc (len + sizeof (_cpp_buff));
2053 result = (_cpp_buff *) (base + len);
2054 result->base = base;
2055 result->cur = base;
2056 result->limit = base + len;
2057 result->next = NULL;
2058 return result;
2059}
2060
2061/* Place a chain of unwanted allocation buffers on the free list. */
2062void
2063_cpp_release_buff (pfile, buff)
2064 cpp_reader *pfile;
2065 _cpp_buff *buff;
2066{
2067 _cpp_buff *end = buff;
2068
2069 while (end->next)
2070 end = end->next;
2071 end->next = pfile->free_buffs;
2072 pfile->free_buffs = buff;
2073}
2074
2075/* Return a free buffer of size at least MIN_SIZE. */
2076_cpp_buff *
2077_cpp_get_buff (pfile, min_size)
2078 cpp_reader *pfile;
2079 unsigned int min_size;
2080{
2081 _cpp_buff *result, **p;
2082
2083 for (p = &pfile->free_buffs;; p = &(*p)->next)
2084 {
084163dc 2085 unsigned int size;
2086
2087 if (*p == NULL)
06c92cbc 2088 return new_buff (min_size);
084163dc 2089 result = *p;
2090 size = result->limit - result->base;
2091 /* Return a buffer that's big enough, but don't waste one that's
2092 way too big. */
2093 if (size >= min_size && size < BUFF_SIZE_UPPER_BOUND (min_size))
06c92cbc 2094 break;
2095 }
2096
2097 *p = result->next;
2098 result->next = NULL;
2099 result->cur = result->base;
2100 return result;
2101}
2102
2103/* Return a buffer chained on the end of BUFF. Copy to it the
2104 uncommitted remaining bytes of BUFF, with at least MIN_EXTRA more
2105 bytes. */
2106_cpp_buff *
2107_cpp_extend_buff (pfile, buff, min_extra)
2108 cpp_reader *pfile;
2109 _cpp_buff *buff;
2110 unsigned int min_extra;
2111{
084163dc 2112 unsigned int size = EXTENDED_BUFF_SIZE (buff, min_extra);
06c92cbc 2113
2114 buff->next = _cpp_get_buff (pfile, size);
2115 memcpy (buff->next->base, buff->cur, buff->limit - buff->cur);
2116 return buff->next;
2117}
2118
2119/* Free a chain of buffers starting at BUFF. */
2120void
2121_cpp_free_buff (buff)
2122 _cpp_buff *buff;
2123{
2124 _cpp_buff *next;
2125
2126 for (; buff; buff = next)
2127 {
2128 next = buff->next;
2129 free (buff->base);
2130 }
2131}
deb356cf 2132
79bd622b 2133static int
084163dc 2134chunk_suitable (chunk, size)
79bd622b 2135 cpp_chunk *chunk;
2136 unsigned int size;
2137{
2138 /* Being at least twice SIZE means we can use memcpy in
2139 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2140 anyway. */
084163dc 2141 return (chunk && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
f80e83a9 2142}
6060326b 2143
79bd622b 2144/* Returns the end of the new pool. PTR points to a char in the old
2145 pool, and is updated to point to the same char in the new pool. */
2146unsigned char *
2147_cpp_next_chunk (pool, len, ptr)
2148 cpp_pool *pool;
2149 unsigned int len;
2150 unsigned char **ptr;
f80e83a9 2151{
79bd622b 2152 cpp_chunk *chunk = pool->cur->next;
6060326b 2153
79bd622b 2154 /* LEN is the minimum size we want in the new pool. */
2155 len += POOL_ROOM (pool);
084163dc 2156 if (! chunk_suitable (chunk, len))
f80e83a9 2157 {
79bd622b 2158 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
6060326b 2159
79bd622b 2160 chunk->next = pool->cur->next;
2161 pool->cur->next = chunk;
6060326b 2162 }
2163
79bd622b 2164 /* Update the pointer before changing chunk's front. */
2165 if (ptr)
2166 *ptr += chunk->base - POOL_FRONT (pool);
f80e83a9 2167
79bd622b 2168 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2169 chunk->front = chunk->base;
f80e83a9 2170
79bd622b 2171 pool->cur = chunk;
2172 return POOL_LIMIT (pool);
6060326b 2173}
2174
79bd622b 2175static cpp_chunk *
2176new_chunk (size)
2177 unsigned int size;
f80e83a9 2178{
79bd622b 2179 unsigned char *base;
2180 cpp_chunk *result;
89b05ef6 2181
a28b091b 2182 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
79bd622b 2183 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2184 /* Put the chunk descriptor at the end. Then chunk overruns will
2185 cause obvious chaos. */
2186 result = (cpp_chunk *) (base + size);
2187 result->base = base;
2188 result->front = base;
2189 result->limit = base + size;
2190 result->next = 0;
deb356cf 2191
79bd622b 2192 return result;
f80e83a9 2193}
2194
79bd622b 2195void
2196_cpp_init_pool (pool, size, align, temp)
2197 cpp_pool *pool;
2198 unsigned int size, align, temp;
2199{
2200 if (align == 0)
2201 align = DEFAULT_ALIGNMENT;
2202 if (align & (align - 1))
2203 abort ();
2204 pool->align = align;
162cee98 2205 pool->first = new_chunk (size);
2206 pool->cur = pool->first;
79bd622b 2207 if (temp)
2208 pool->cur->next = pool->cur;
f80e83a9 2209}
2210
79bd622b 2211void
2212_cpp_free_pool (pool)
2213 cpp_pool *pool;
89b05ef6 2214{
162cee98 2215 cpp_chunk *chunk = pool->first, *next;
89b05ef6 2216
79bd622b 2217 do
89b05ef6 2218 {
79bd622b 2219 next = chunk->next;
2220 free (chunk->base);
2221 chunk = next;
89b05ef6 2222 }
162cee98 2223 while (chunk && chunk != pool->first);
f80e83a9 2224}
f80e83a9 2225
79bd622b 2226/* Reserve LEN bytes from a memory pool. */
2227unsigned char *
2228_cpp_pool_reserve (pool, len)
2229 cpp_pool *pool;
2230 unsigned int len;
f80e83a9 2231{
a28b091b 2232 len = POOL_ALIGN (len, pool->align);
79bd622b 2233 if (len > (unsigned int) POOL_ROOM (pool))
2234 _cpp_next_chunk (pool, len, 0);
f80e83a9 2235
79bd622b 2236 return POOL_FRONT (pool);
6060326b 2237}
2238
79bd622b 2239/* Allocate LEN bytes from a memory pool. */
2240unsigned char *
2241_cpp_pool_alloc (pool, len)
2242 cpp_pool *pool;
2243 unsigned int len;
f80e83a9 2244{
79bd622b 2245 unsigned char *result = _cpp_pool_reserve (pool, len);
deb356cf 2246
79bd622b 2247 POOL_COMMIT (pool, len);
2248 return result;
f80e83a9 2249}