]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
* solaris_threads.c (MAX_ORIG_STACK_SIZE) [I386]: Provide special
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
0578f103 1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
6060326b 7 Single-pass line tokenization by Neil Booth, April 2000
0578f103 8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
79bd622b 23/* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
27
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
f80e83a9 36
0578f103 37#include "config.h"
38#include "system.h"
0578f103 39#include "cpplib.h"
40#include "cpphash.h"
41
8330799c 42/* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45#ifdef CROSS_COMPILE
46#undef MULTIBYTE_CHARS
47#endif
48
49#ifdef MULTIBYTE_CHARS
50#include "mbchar.h"
51#include <locale.h>
52#endif
53
79bd622b 54/* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
56enum spell_type
241e762e 57{
79bd622b 58 SPELL_OPERATOR = 0,
59 SPELL_CHAR,
60 SPELL_IDENT,
61 SPELL_STRING,
62 SPELL_NONE
241e762e 63};
64
79bd622b 65struct token_spelling
241e762e 66{
79bd622b 67 enum spell_type category;
68 const unsigned char *name;
241e762e 69};
70
79bd622b 71const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
72 U":>", U"<%", U"%>"};
73
74#define OP(e, s) { SPELL_OPERATOR, U s },
75#define TK(e, s) { s, U STRINGX (e) },
76const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
77#undef OP
78#undef TK
79
80#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
e2f9a79f 82
36a0aa7c 83static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
c808d026 84static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
85static cppchar_t get_effective_char PARAMS ((cpp_reader *));
338fa5f7 86
f80e83a9 87static int skip_block_comment PARAMS ((cpp_reader *));
f669338a 88static int skip_line_comment PARAMS ((cpp_reader *));
338fa5f7 89static void adjust_column PARAMS ((cpp_reader *));
90static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
66a5287e 91static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
92static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
93 const U_CHAR *));
79bd622b 94static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
95static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
338fa5f7 96static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
79bd622b 97static void unterminated PARAMS ((cpp_reader *, int));
338fa5f7 98static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
99static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
c808d026 100static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
f669338a 101static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
79bd622b 102static int name_p PARAMS ((cpp_reader *, const cpp_string *));
c8342759 103static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
104 const unsigned char *, unsigned int *));
83dcbb5c 105static tokenrun *next_tokenrun PARAMS ((tokenrun *));
e916a356 106
79bd622b 107static cpp_chunk *new_chunk PARAMS ((unsigned int));
108static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
8330799c 109static unsigned int hex_digit_value PARAMS ((unsigned int));
bce8e0c0 110
f80e83a9 111/* Utility routine:
2c63d6c8 112
76faa4c0 113 Compares, the token TOKEN to the NUL-terminated string STRING.
114 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
bce8e0c0 115
f80e83a9 116int
76faa4c0 117cpp_ideq (token, string)
118 const cpp_token *token;
f80e83a9 119 const char *string;
120{
76faa4c0 121 if (token->type != CPP_NAME)
f80e83a9 122 return 0;
76faa4c0 123
c86dbc5b 124 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
bce8e0c0 125}
50fd6b48 126
338fa5f7 127/* Call when meeting a newline. Returns the character after the newline
128 (or carriage-return newline combination), or EOF. */
129static cppchar_t
36a0aa7c 130handle_newline (pfile, newline_char)
131 cpp_reader *pfile;
338fa5f7 132 cppchar_t newline_char;
133{
36a0aa7c 134 cpp_buffer *buffer;
338fa5f7 135 cppchar_t next = EOF;
136
36a0aa7c 137 pfile->line++;
36a0aa7c 138 buffer = pfile->buffer;
338fa5f7 139 buffer->col_adjust = 0;
338fa5f7 140 buffer->line_base = buffer->cur;
141
142 /* Handle CR-LF and LF-CR combinations, get the next character. */
143 if (buffer->cur < buffer->rlimit)
144 {
145 next = *buffer->cur++;
146 if (next + newline_char == '\r' + '\n')
147 {
148 buffer->line_base = buffer->cur;
149 if (buffer->cur < buffer->rlimit)
150 next = *buffer->cur++;
151 else
152 next = EOF;
153 }
154 }
155
156 buffer->read_ahead = next;
157 return next;
158}
159
160/* Subroutine of skip_escaped_newlines; called when a trigraph is
161 encountered. It warns if necessary, and returns true if the
162 trigraph should be honoured. FROM_CHAR is the third character of a
163 trigraph, and presumed to be the previous character for position
164 reporting. */
0578f103 165static int
338fa5f7 166trigraph_ok (pfile, from_char)
0578f103 167 cpp_reader *pfile;
338fa5f7 168 cppchar_t from_char;
0578f103 169{
f80e83a9 170 int accept = CPP_OPTION (pfile, trigraphs);
171
f669338a 172 /* Don't warn about trigraphs in comments. */
173 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
0578f103 174 {
338fa5f7 175 cpp_buffer *buffer = pfile->buffer;
1ea7ed21 176
f80e83a9 177 if (accept)
1ea7ed21 178 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
f80e83a9 179 "trigraph ??%c converted to %c",
338fa5f7 180 (int) from_char,
181 (int) _cpp_trigraph_map[from_char]);
4b912310 182 else if (buffer->cur != buffer->last_Wtrigraphs)
183 {
184 buffer->last_Wtrigraphs = buffer->cur;
1ea7ed21 185 cpp_warning_with_line (pfile, pfile->line,
4b912310 186 CPP_BUF_COL (buffer) - 2,
187 "trigraph ??%c ignored", (int) from_char);
188 }
0578f103 189 }
338fa5f7 190
f80e83a9 191 return accept;
0578f103 192}
193
338fa5f7 194/* Assumes local variables buffer and result. */
195#define ACCEPT_CHAR(t) \
196 do { result->type = t; buffer->read_ahead = EOF; } while (0)
197
198/* When we move to multibyte character sets, add to these something
199 that saves and restores the state of the multibyte conversion
200 library. This probably involves saving and restoring a "cookie".
201 In the case of glibc it is an 8-byte structure, so is not a high
202 overhead operation. In any case, it's out of the fast path. */
203#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
204#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
205
206/* Skips any escaped newlines introduced by NEXT, which is either a
207 '?' or a '\\'. Returns the next character, which will also have
396ffa86 208 been placed in buffer->read_ahead. This routine performs
209 preprocessing stages 1 and 2 of the ISO C standard. */
338fa5f7 210static cppchar_t
c808d026 211skip_escaped_newlines (pfile, next)
212 cpp_reader *pfile;
338fa5f7 213 cppchar_t next;
0578f103 214{
c808d026 215 cpp_buffer *buffer = pfile->buffer;
216
396ffa86 217 /* Only do this if we apply stages 1 and 2. */
218 if (!buffer->from_stage3)
f80e83a9 219 {
396ffa86 220 cppchar_t next1;
221 const unsigned char *saved_cur;
222 int space;
223
224 do
338fa5f7 225 {
396ffa86 226 if (buffer->cur == buffer->rlimit)
227 break;
228
229 SAVE_STATE ();
230 if (next == '?')
338fa5f7 231 {
396ffa86 232 next1 = *buffer->cur++;
233 if (next1 != '?' || buffer->cur == buffer->rlimit)
234 {
235 RESTORE_STATE ();
236 break;
237 }
238
239 next1 = *buffer->cur++;
240 if (!_cpp_trigraph_map[next1]
c808d026 241 || !trigraph_ok (pfile, next1))
396ffa86 242 {
243 RESTORE_STATE ();
244 break;
245 }
246
247 /* We have a full trigraph here. */
248 next = _cpp_trigraph_map[next1];
249 if (next != '\\' || buffer->cur == buffer->rlimit)
250 break;
251 SAVE_STATE ();
252 }
253
254 /* We have a backslash, and room for at least one more character. */
255 space = 0;
256 do
257 {
258 next1 = *buffer->cur++;
259 if (!is_nvspace (next1))
260 break;
261 space = 1;
338fa5f7 262 }
396ffa86 263 while (buffer->cur < buffer->rlimit);
f80e83a9 264
396ffa86 265 if (!is_vspace (next1))
338fa5f7 266 {
267 RESTORE_STATE ();
268 break;
269 }
0578f103 270
c808d026 271 if (space && !pfile->state.lexing_comment)
272 cpp_warning (pfile, "backslash and newline separated by space");
338fa5f7 273
c808d026 274 next = handle_newline (pfile, next1);
396ffa86 275 if (next == EOF)
c808d026 276 cpp_pedwarn (pfile, "backslash-newline at end of file");
338fa5f7 277 }
396ffa86 278 while (next == '\\' || next == '?');
f80e83a9 279 }
0578f103 280
338fa5f7 281 buffer->read_ahead = next;
282 return next;
0578f103 283}
284
338fa5f7 285/* Obtain the next character, after trigraph conversion and skipping
286 an arbitrary string of escaped newlines. The common case of no
287 trigraphs or escaped newlines falls through quickly. */
288static cppchar_t
c808d026 289get_effective_char (pfile)
290 cpp_reader *pfile;
852d1b04 291{
c808d026 292 cpp_buffer *buffer = pfile->buffer;
338fa5f7 293 cppchar_t next = EOF;
294
295 if (buffer->cur < buffer->rlimit)
296 {
297 next = *buffer->cur++;
298
299 /* '?' can introduce trigraphs (and therefore backslash); '\\'
300 can introduce escaped newlines, which we want to skip, or
301 UCNs, which, depending upon lexer state, we will handle in
302 the future. */
303 if (next == '?' || next == '\\')
c808d026 304 next = skip_escaped_newlines (pfile, next);
338fa5f7 305 }
306
307 buffer->read_ahead = next;
308 return next;
852d1b04 309}
310
338fa5f7 311/* Skip a C-style block comment. We find the end of the comment by
312 seeing if an asterisk is before every '/' we encounter. Returns
313 non-zero if comment terminated by EOF, zero otherwise. */
f80e83a9 314static int
315skip_block_comment (pfile)
0578f103 316 cpp_reader *pfile;
317{
f80e83a9 318 cpp_buffer *buffer = pfile->buffer;
63e1abce 319 cppchar_t c = EOF, prevc = EOF;
338fa5f7 320
f669338a 321 pfile->state.lexing_comment = 1;
338fa5f7 322 while (buffer->cur != buffer->rlimit)
0578f103 323 {
338fa5f7 324 prevc = c, c = *buffer->cur++;
325
326 next_char:
327 /* FIXME: For speed, create a new character class of characters
79bd622b 328 of interest inside block comments. */
338fa5f7 329 if (c == '?' || c == '\\')
c808d026 330 c = skip_escaped_newlines (pfile, c);
f80e83a9 331
338fa5f7 332 /* People like decorating comments with '*', so check for '/'
333 instead for efficiency. */
f80e83a9 334 if (c == '/')
0578f103 335 {
338fa5f7 336 if (prevc == '*')
337 break;
f80e83a9 338
338fa5f7 339 /* Warn about potential nested comments, but not if the '/'
340 comes immediately before the true comment delimeter.
f80e83a9 341 Don't bother to get it right across escaped newlines. */
338fa5f7 342 if (CPP_OPTION (pfile, warn_comments)
343 && buffer->cur != buffer->rlimit)
0578f103 344 {
338fa5f7 345 prevc = c, c = *buffer->cur++;
346 if (c == '*' && buffer->cur != buffer->rlimit)
347 {
348 prevc = c, c = *buffer->cur++;
349 if (c != '/')
1ea7ed21 350 cpp_warning_with_line (pfile, pfile->line,
351 CPP_BUF_COL (buffer) - 2,
338fa5f7 352 "\"/*\" within comment");
353 }
354 goto next_char;
0578f103 355 }
0578f103 356 }
78719282 357 else if (is_vspace (c))
0578f103 358 {
36a0aa7c 359 prevc = c, c = handle_newline (pfile, c);
338fa5f7 360 goto next_char;
0578f103 361 }
b86584f6 362 else if (c == '\t')
338fa5f7 363 adjust_column (pfile);
0578f103 364 }
f80e83a9 365
f669338a 366 pfile->state.lexing_comment = 0;
338fa5f7 367 buffer->read_ahead = EOF;
368 return c != '/' || prevc != '*';
0578f103 369}
370
241e762e 371/* Skip a C++ line comment. Handles escaped newlines. Returns
338fa5f7 372 non-zero if a multiline comment. The following new line, if any,
373 is left in buffer->read_ahead. */
f80e83a9 374static int
f669338a 375skip_line_comment (pfile)
376 cpp_reader *pfile;
0578f103 377{
f669338a 378 cpp_buffer *buffer = pfile->buffer;
1ea7ed21 379 unsigned int orig_line = pfile->line;
338fa5f7 380 cppchar_t c;
f80e83a9 381
f669338a 382 pfile->state.lexing_comment = 1;
338fa5f7 383 do
f80e83a9 384 {
338fa5f7 385 c = EOF;
386 if (buffer->cur == buffer->rlimit)
387 break;
f80e83a9 388
338fa5f7 389 c = *buffer->cur++;
390 if (c == '?' || c == '\\')
c808d026 391 c = skip_escaped_newlines (pfile, c);
f80e83a9 392 }
338fa5f7 393 while (!is_vspace (c));
0578f103 394
f669338a 395 pfile->state.lexing_comment = 0;
338fa5f7 396 buffer->read_ahead = c; /* Leave any newline for caller. */
1ea7ed21 397 return orig_line != pfile->line;
f80e83a9 398}
0578f103 399
338fa5f7 400/* pfile->buffer->cur is one beyond the \t character. Update
401 col_adjust so we track the column correctly. */
b86584f6 402static void
338fa5f7 403adjust_column (pfile)
b86584f6 404 cpp_reader *pfile;
b86584f6 405{
338fa5f7 406 cpp_buffer *buffer = pfile->buffer;
407 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
b86584f6 408
409 /* Round it up to multiple of the tabstop, but subtract 1 since the
410 tab itself occupies a character position. */
338fa5f7 411 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
412 - col % CPP_OPTION (pfile, tabstop)) - 1;
b86584f6 413}
414
338fa5f7 415/* Skips whitespace, saving the next non-whitespace character.
416 Adjusts pfile->col_adjust to account for tabs. Without this,
417 tokens might be assigned an incorrect column. */
f80e83a9 418static void
338fa5f7 419skip_whitespace (pfile, c)
f80e83a9 420 cpp_reader *pfile;
338fa5f7 421 cppchar_t c;
f80e83a9 422{
423 cpp_buffer *buffer = pfile->buffer;
338fa5f7 424 unsigned int warned = 0;
0578f103 425
338fa5f7 426 do
f80e83a9 427 {
78719282 428 /* Horizontal space always OK. */
429 if (c == ' ')
338fa5f7 430 ;
78719282 431 else if (c == '\t')
338fa5f7 432 adjust_column (pfile);
433 /* Just \f \v or \0 left. */
78719282 434 else if (c == '\0')
f80e83a9 435 {
78719282 436 if (!warned)
338fa5f7 437 {
438 cpp_warning (pfile, "null character(s) ignored");
439 warned = 1;
440 }
0578f103 441 }
79bd622b 442 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1ea7ed21 443 cpp_pedwarn_with_line (pfile, pfile->line,
78719282 444 CPP_BUF_COL (buffer),
445 "%s in preprocessing directive",
446 c == '\f' ? "form feed" : "vertical tab");
338fa5f7 447
448 c = EOF;
449 if (buffer->cur == buffer->rlimit)
450 break;
451 c = *buffer->cur++;
0578f103 452 }
2c0e001b 453 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
338fa5f7 454 while (is_nvspace (c));
455
456 /* Remember the next character. */
457 buffer->read_ahead = c;
f80e83a9 458}
0578f103 459
79bd622b 460/* See if the characters of a number token are valid in a name (no
461 '.', '+' or '-'). */
462static int
463name_p (pfile, string)
464 cpp_reader *pfile;
465 const cpp_string *string;
466{
467 unsigned int i;
468
469 for (i = 0; i < string->len; i++)
470 if (!is_idchar (string->text[i]))
471 return 0;
472
473 return 1;
474}
475
66a5287e 476/* Parse an identifier, skipping embedded backslash-newlines. This is
477 a critical inner loop. The common case is an identifier which has
478 not been split by backslash-newline, does not contain a dollar
479 sign, and has already been scanned (roughly 10:1 ratio of
480 seen:unseen identifiers in normal code; the distribution is
481 Poisson-like). Second most common case is a new identifier, not
482 split and no dollar sign. The other possibilities are rare and
483 have been relegated to parse_identifier_slow. */
338fa5f7 484
485static cpp_hashnode *
66a5287e 486parse_identifier (pfile)
0578f103 487 cpp_reader *pfile;
0578f103 488{
79bd622b 489 cpp_hashnode *result;
66a5287e 490 const U_CHAR *cur, *rlimit;
491
492 /* Fast-path loop. Skim over a normal identifier.
493 N.B. ISIDNUM does not include $. */
494 cur = pfile->buffer->cur - 1;
495 rlimit = pfile->buffer->rlimit;
496 do
497 cur++;
498 while (cur < rlimit && ISIDNUM (*cur));
499
500 /* Check for slow-path cases. */
501 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
502 result = parse_identifier_slow (pfile, cur);
503 else
504 {
505 const U_CHAR *base = pfile->buffer->cur - 1;
506 result = (cpp_hashnode *)
507 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
508 pfile->buffer->cur = cur;
509 }
510
511 /* Rarely, identifiers require diagnostics when lexed.
512 XXX Has to be forced out of the fast path. */
513 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
514 && !pfile->state.skipping, 0))
515 {
516 /* It is allowed to poison the same identifier twice. */
517 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
518 cpp_error (pfile, "attempt to use poisoned \"%s\"",
519 NODE_NAME (result));
520
521 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
522 replacement list of a variadic macro. */
523 if (result == pfile->spec_nodes.n__VA_ARGS__
524 && !pfile->state.va_args_ok)
525 cpp_pedwarn (pfile,
526 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
527 }
528
529 return result;
530}
531
532/* Slow path. This handles identifiers which have been split, and
533 identifiers which contain dollar signs. The part of the identifier
534 from PFILE->buffer->cur-1 to CUR has already been scanned. */
535static cpp_hashnode *
536parse_identifier_slow (pfile, cur)
537 cpp_reader *pfile;
538 const U_CHAR *cur;
539{
338fa5f7 540 cpp_buffer *buffer = pfile->buffer;
66a5287e 541 const U_CHAR *base = buffer->cur - 1;
0d086e18 542 struct obstack *stack = &pfile->hash_table->stack;
66a5287e 543 unsigned int c, saw_dollar = 0, len;
544
545 /* Copy the part of the token which is known to be okay. */
546 obstack_grow (stack, base, cur - base);
f80e83a9 547
66a5287e 548 /* Now process the part which isn't. We are looking at one of
549 '$', '\\', or '?' on entry to this loop. */
550 c = *cur++;
551 buffer->cur = cur;
338fa5f7 552 do
f80e83a9 553 {
66a5287e 554 while (is_idchar (c))
555 {
556 obstack_1grow (stack, c);
0578f103 557
66a5287e 558 if (c == '$')
559 saw_dollar++;
71aa9da4 560
66a5287e 561 c = EOF;
562 if (buffer->cur == buffer->rlimit)
563 break;
71aa9da4 564
66a5287e 565 c = *buffer->cur++;
566 }
71aa9da4 567
338fa5f7 568 /* Potential escaped newline? */
569 if (c != '?' && c != '\\')
66a5287e 570 break;
c808d026 571 c = skip_escaped_newlines (pfile, c);
f80e83a9 572 }
338fa5f7 573 while (is_idchar (c));
574
79bd622b 575 /* Remember the next character. */
576 buffer->read_ahead = c;
577
338fa5f7 578 /* $ is not a identifier character in the standard, but is commonly
579 accepted as an extension. Don't warn about it in skipped
580 conditional blocks. */
5e878517 581 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
338fa5f7 582 cpp_pedwarn (pfile, "'$' character(s) in identifier");
583
79bd622b 584 /* Identifiers are null-terminated. */
0d086e18 585 len = obstack_object_size (stack);
586 obstack_1grow (stack, '\0');
79bd622b 587
66a5287e 588 return (cpp_hashnode *)
0d086e18 589 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
0578f103 590}
591
338fa5f7 592/* Parse a number, skipping embedded backslash-newlines. */
0578f103 593static void
79bd622b 594parse_number (pfile, number, c, leading_period)
0578f103 595 cpp_reader *pfile;
338fa5f7 596 cpp_string *number;
597 cppchar_t c;
79bd622b 598 int leading_period;
0578f103 599{
f80e83a9 600 cpp_buffer *buffer = pfile->buffer;
5f3f0010 601 cpp_pool *pool = &pfile->ident_pool;
79bd622b 602 unsigned char *dest, *limit;
0578f103 603
79bd622b 604 dest = POOL_FRONT (pool);
605 limit = POOL_LIMIT (pool);
f669338a 606
79bd622b 607 /* Place a leading period. */
608 if (leading_period)
609 {
610 if (dest >= limit)
611 limit = _cpp_next_chunk (pool, 0, &dest);
612 *dest++ = '.';
613 }
614
338fa5f7 615 do
f80e83a9 616 {
338fa5f7 617 do
618 {
79bd622b 619 /* Need room for terminating null. */
620 if (dest + 1 >= limit)
621 limit = _cpp_next_chunk (pool, 0, &dest);
622 *dest++ = c;
338fa5f7 623
338fa5f7 624 c = EOF;
625 if (buffer->cur == buffer->rlimit)
626 break;
0578f103 627
338fa5f7 628 c = *buffer->cur++;
629 }
79bd622b 630 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
0578f103 631
338fa5f7 632 /* Potential escaped newline? */
633 if (c != '?' && c != '\\')
634 break;
c808d026 635 c = skip_escaped_newlines (pfile, c);
0578f103 636 }
79bd622b 637 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
f669338a 638
338fa5f7 639 /* Remember the next character. */
640 buffer->read_ahead = c;
852d1b04 641
79bd622b 642 /* Null-terminate the number. */
643 *dest = '\0';
644
645 number->text = POOL_FRONT (pool);
646 number->len = dest - number->text;
647 POOL_COMMIT (pool, number->len + 1);
338fa5f7 648}
649
650/* Subroutine of parse_string. Emits error for unterminated strings. */
651static void
79bd622b 652unterminated (pfile, term)
338fa5f7 653 cpp_reader *pfile;
338fa5f7 654 int term;
655{
656 cpp_error (pfile, "missing terminating %c character", term);
657
729d2022 658 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
f80e83a9 659 {
729d2022 660 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
338fa5f7 661 "possible start of unterminated string literal");
729d2022 662 pfile->mls_line = 0;
f80e83a9 663 }
0578f103 664}
665
79bd622b 666/* Subroutine of parse_string. */
667static int
668unescaped_terminator_p (pfile, dest)
669 cpp_reader *pfile;
670 const unsigned char *dest;
671{
672 const unsigned char *start, *temp;
673
674 /* In #include-style directives, terminators are not escapeable. */
675 if (pfile->state.angled_headers)
676 return 1;
677
5f3f0010 678 start = POOL_FRONT (&pfile->ident_pool);
79bd622b 679
680 /* An odd number of consecutive backslashes represents an escaped
681 terminator. */
682 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
683 ;
684
685 return ((dest - temp) & 1) == 0;
686}
687
338fa5f7 688/* Parses a string, character constant, or angle-bracketed header file
4b0c16ee 689 name. Handles embedded trigraphs and escaped newlines. The stored
690 string is guaranteed NUL-terminated, but it is not guaranteed that
691 this is the first NUL since embedded NULs are preserved.
0578f103 692
4b0c16ee 693 Multi-line strings are allowed, but they are deprecated. */
f80e83a9 694static void
338fa5f7 695parse_string (pfile, token, terminator)
0578f103 696 cpp_reader *pfile;
f80e83a9 697 cpp_token *token;
338fa5f7 698 cppchar_t terminator;
0578f103 699{
f80e83a9 700 cpp_buffer *buffer = pfile->buffer;
5f3f0010 701 cpp_pool *pool = &pfile->ident_pool;
79bd622b 702 unsigned char *dest, *limit;
338fa5f7 703 cppchar_t c;
38692459 704 bool warned_nulls = false, warned_multi = false;
338fa5f7 705
79bd622b 706 dest = POOL_FRONT (pool);
707 limit = POOL_LIMIT (pool);
708
338fa5f7 709 for (;;)
0578f103 710 {
338fa5f7 711 if (buffer->cur == buffer->rlimit)
4b0c16ee 712 c = EOF;
713 else
714 c = *buffer->cur++;
715
716 have_char:
717 /* We need space for the terminating NUL. */
718 if (dest >= limit)
719 limit = _cpp_next_chunk (pool, 0, &dest);
720
721 if (c == EOF)
338fa5f7 722 {
79bd622b 723 unterminated (pfile, terminator);
338fa5f7 724 break;
725 }
338fa5f7 726
338fa5f7 727 /* Handle trigraphs, escaped newlines etc. */
728 if (c == '?' || c == '\\')
c808d026 729 c = skip_escaped_newlines (pfile, c);
0578f103 730
79bd622b 731 if (c == terminator && unescaped_terminator_p (pfile, dest))
0578f103 732 {
79bd622b 733 c = EOF;
734 break;
338fa5f7 735 }
736 else if (is_vspace (c))
737 {
738 /* In assembly language, silently terminate string and
739 character literals at end of line. This is a kludge
740 around not knowing where comments are. */
5db5d057 741 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
338fa5f7 742 break;
0578f103 743
338fa5f7 744 /* Character constants and header names may not extend over
745 multiple lines. In Standard C, neither may strings.
746 Unfortunately, we accept multiline strings as an
cc8770bf 747 extension, except in #include family directives. */
748 if (terminator != '"' || pfile->state.angled_headers)
0578f103 749 {
79bd622b 750 unterminated (pfile, terminator);
338fa5f7 751 break;
0578f103 752 }
0578f103 753
38692459 754 if (!warned_multi)
755 {
756 warned_multi = true;
757 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
758 }
759
729d2022 760 if (pfile->mls_line == 0)
761 {
762 pfile->mls_line = token->line;
763 pfile->mls_col = token->col;
764 }
338fa5f7 765
36a0aa7c 766 c = handle_newline (pfile, c);
4b0c16ee 767 *dest++ = '\n';
768 goto have_char;
338fa5f7 769 }
38692459 770 else if (c == '\0' && !warned_nulls)
338fa5f7 771 {
38692459 772 warned_nulls = true;
773 cpp_warning (pfile, "null character(s) preserved in literal");
0578f103 774 }
0578f103 775
79bd622b 776 *dest++ = c;
0578f103 777 }
778
79bd622b 779 /* Remember the next character. */
338fa5f7 780 buffer->read_ahead = c;
4b0c16ee 781 *dest = '\0';
0578f103 782
79bd622b 783 token->val.str.text = POOL_FRONT (pool);
784 token->val.str.len = dest - token->val.str.text;
4b0c16ee 785 POOL_COMMIT (pool, token->val.str.len + 1);
338fa5f7 786}
f80e83a9 787
79bd622b 788/* The stored comment includes the comment start and any terminator. */
2c63d6c8 789static void
338fa5f7 790save_comment (pfile, token, from)
791 cpp_reader *pfile;
f80e83a9 792 cpp_token *token;
793 const unsigned char *from;
2c63d6c8 794{
f80e83a9 795 unsigned char *buffer;
338fa5f7 796 unsigned int len;
338fa5f7 797
f0495c2c 798 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
a543b315 799 /* C++ comments probably (not definitely) have moved past a new
800 line, which we don't want to save in the comment. */
801 if (pfile->buffer->read_ahead != EOF)
802 len--;
5f3f0010 803 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
f80e83a9 804
f80e83a9 805 token->type = CPP_COMMENT;
76faa4c0 806 token->val.str.len = len;
338fa5f7 807 token->val.str.text = buffer;
0578f103 808
f0495c2c 809 buffer[0] = '/';
810 memcpy (buffer + 1, from, len - 1);
338fa5f7 811}
0578f103 812
10b4496a 813/* Subroutine of _cpp_lex_direct to handle '%'. A little tricky, since we
f669338a 814 want to avoid stepping back when lexing %:%X. */
338fa5f7 815static void
c808d026 816lex_percent (pfile, result)
817 cpp_reader *pfile;
338fa5f7 818 cpp_token *result;
338fa5f7 819{
c808d026 820 cpp_buffer *buffer= pfile->buffer;
f669338a 821 cppchar_t c;
822
823 result->type = CPP_MOD;
824 /* Parsing %:%X could leave an extra character. */
825 if (buffer->extra_char == EOF)
c808d026 826 c = get_effective_char (pfile);
f669338a 827 else
828 {
829 c = buffer->read_ahead = buffer->extra_char;
830 buffer->extra_char = EOF;
831 }
832
833 if (c == '=')
834 ACCEPT_CHAR (CPP_MOD_EQ);
c808d026 835 else if (CPP_OPTION (pfile, digraphs))
f669338a 836 {
837 if (c == ':')
838 {
839 result->flags |= DIGRAPH;
840 ACCEPT_CHAR (CPP_HASH);
c808d026 841 if (get_effective_char (pfile) == '%')
f669338a 842 {
c808d026 843 buffer->extra_char = get_effective_char (pfile);
f669338a 844 if (buffer->extra_char == ':')
845 {
846 buffer->extra_char = EOF;
847 ACCEPT_CHAR (CPP_PASTE);
848 }
849 else
850 /* We'll catch the extra_char when we're called back. */
851 buffer->read_ahead = '%';
852 }
853 }
854 else if (c == '>')
855 {
856 result->flags |= DIGRAPH;
857 ACCEPT_CHAR (CPP_CLOSE_BRACE);
858 }
859 }
860}
861
10b4496a 862/* Subroutine of _cpp_lex_direct to handle '.'. This is tricky, since we
f669338a 863 want to avoid stepping back when lexing '...' or '.123'. In the
864 latter case we should also set a flag for parse_number. */
865static void
866lex_dot (pfile, result)
867 cpp_reader *pfile;
868 cpp_token *result;
869{
870 cpp_buffer *buffer = pfile->buffer;
871 cppchar_t c;
872
873 /* Parsing ..X could leave an extra character. */
874 if (buffer->extra_char == EOF)
c808d026 875 c = get_effective_char (pfile);
f669338a 876 else
877 {
878 c = buffer->read_ahead = buffer->extra_char;
879 buffer->extra_char = EOF;
880 }
338fa5f7 881
f669338a 882 /* All known character sets have 0...9 contiguous. */
883 if (c >= '0' && c <= '9')
884 {
885 result->type = CPP_NUMBER;
79bd622b 886 parse_number (pfile, &result->val.str, c, 1);
f669338a 887 }
f80e83a9 888 else
c4357c92 889 {
f669338a 890 result->type = CPP_DOT;
891 if (c == '.')
892 {
c808d026 893 buffer->extra_char = get_effective_char (pfile);
f669338a 894 if (buffer->extra_char == '.')
895 {
896 buffer->extra_char = EOF;
897 ACCEPT_CHAR (CPP_ELLIPSIS);
898 }
899 else
900 /* We'll catch the extra_char when we're called back. */
901 buffer->read_ahead = '.';
902 }
903 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
904 ACCEPT_CHAR (CPP_DOT_STAR);
c4357c92 905 }
0578f103 906}
907
83dcbb5c 908/* Allocate COUNT tokens for RUN. */
909void
910_cpp_init_tokenrun (run, count)
911 tokenrun *run;
912 unsigned int count;
913{
914 run->base = xnewvec (cpp_token, count);
915 run->limit = run->base + count;
916 run->next = NULL;
917}
918
919/* Returns the next tokenrun, or creates one if there is none. */
920static tokenrun *
921next_tokenrun (run)
922 tokenrun *run;
923{
924 if (run->next == NULL)
925 {
926 run->next = xnew (tokenrun);
fb5ab82c 927 run->next->prev = run;
83dcbb5c 928 _cpp_init_tokenrun (run->next, 250);
929 }
930
931 return run->next;
932}
933
f9b5f742 934/* Allocate a single token that is invalidated at the same time as the
935 rest of the tokens on the line. Has its line and col set to the
936 same as the last lexed token, so that diagnostics appear in the
937 right place. */
938cpp_token *
939_cpp_temp_token (pfile)
940 cpp_reader *pfile;
941{
942 cpp_token *old, *result;
943
944 old = pfile->cur_token - 1;
945 if (pfile->cur_token == pfile->cur_run->limit)
946 {
947 pfile->cur_run = next_tokenrun (pfile->cur_run);
948 pfile->cur_token = pfile->cur_run->base;
949 }
950
951 result = pfile->cur_token++;
952 result->line = old->line;
953 result->col = old->col;
954 return result;
955}
956
10b4496a 957/* Lex a token into RESULT (external interface). Takes care of issues
958 like directive handling, token lookahead, multiple include
959 opimisation and skipping. */
c00e481c 960const cpp_token *
961_cpp_lex_token (pfile)
0578f103 962 cpp_reader *pfile;
83dcbb5c 963{
fb5ab82c 964 cpp_token *result;
83dcbb5c 965
fb5ab82c 966 for (;;)
83dcbb5c 967 {
fb5ab82c 968 if (pfile->cur_token == pfile->cur_run->limit)
83dcbb5c 969 {
fb5ab82c 970 pfile->cur_run = next_tokenrun (pfile->cur_run);
971 pfile->cur_token = pfile->cur_run->base;
83dcbb5c 972 }
973
fb5ab82c 974 if (pfile->lookaheads)
10b4496a 975 {
976 pfile->lookaheads--;
977 result = pfile->cur_token++;
978 }
fb5ab82c 979 else
10b4496a 980 result = _cpp_lex_direct (pfile);
fb5ab82c 981
982 if (result->flags & BOL)
83dcbb5c 983 {
fb5ab82c 984 /* Is this a directive. If _cpp_handle_directive returns
985 false, it is an assembler #. */
986 if (result->type == CPP_HASH
987 && !pfile->state.parsing_args
988 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
989 continue;
5621a364 990 if (pfile->cb.line_change && !pfile->state.skipping)
991 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
83dcbb5c 992 }
83dcbb5c 993
fb5ab82c 994 /* We don't skip tokens in directives. */
995 if (pfile->state.in_directive)
996 break;
83dcbb5c 997
fb5ab82c 998 /* Outside a directive, invalidate controlling macros. At file
10b4496a 999 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
fb5ab82c 1000 get here and MI optimisation works. */
83dcbb5c 1001 pfile->mi_valid = false;
fb5ab82c 1002
1003 if (!pfile->state.skipping || result->type == CPP_EOF)
1004 break;
83dcbb5c 1005 }
1006
c00e481c 1007 return result;
83dcbb5c 1008}
1009
10b4496a 1010/* Lex a token into pfile->cur_token, which is also incremented, to
1011 get diagnostics pointing to the correct location.
1012
1013 Does not handle issues such as token lookahead, multiple-include
1014 optimisation, directives, skipping etc. This function is only
1015 suitable for use by _cpp_lex_token, and in special cases like
1016 lex_expansion_token which doesn't care for any of these issues.
1017
1018 When meeting a newline, returns CPP_EOF if parsing a directive,
1019 otherwise returns to the start of the token buffer if permissible.
1020 Returns the location of the lexed token. */
1021cpp_token *
1022_cpp_lex_direct (pfile)
83dcbb5c 1023 cpp_reader *pfile;
0578f103 1024{
338fa5f7 1025 cppchar_t c;
230f0943 1026 cpp_buffer *buffer;
338fa5f7 1027 const unsigned char *comment_start;
10b4496a 1028 cpp_token *result = pfile->cur_token++;
0653b94e 1029
83dcbb5c 1030 fresh_line:
230f0943 1031 buffer = pfile->buffer;
8c2e2fc5 1032 result->flags = buffer->saved_flags;
1033 buffer->saved_flags = 0;
83dcbb5c 1034 update_tokens_line:
36a0aa7c 1035 result->line = pfile->line;
f80e83a9 1036
83dcbb5c 1037 skipped_white:
338fa5f7 1038 c = buffer->read_ahead;
1039 if (c == EOF && buffer->cur < buffer->rlimit)
83dcbb5c 1040 c = *buffer->cur++;
1041 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
338fa5f7 1042 buffer->read_ahead = EOF;
83dcbb5c 1043
1044 trigraph:
338fa5f7 1045 switch (c)
0578f103 1046 {
338fa5f7 1047 case EOF:
fb5ab82c 1048 buffer->saved_flags = BOL;
83dcbb5c 1049 if (!pfile->state.parsing_args && !pfile->state.in_directive)
4dfe8b74 1050 {
fb5ab82c 1051 if (buffer->cur != buffer->line_base)
83dcbb5c 1052 {
1053 /* Non-empty files should end in a newline. Don't warn
1054 for command line and _Pragma buffers. */
1055 if (!buffer->from_stage3)
1056 cpp_pedwarn (pfile, "no newline at end of file");
1057 handle_newline (pfile, '\n');
5475a165 1058 }
fb5ab82c 1059
1060 /* Don't pop the last buffer. */
1061 if (buffer->prev)
1062 {
1063 unsigned char stop = buffer->return_at_eof;
1064
1065 _cpp_pop_buffer (pfile);
1066 if (!stop)
1067 goto fresh_line;
1068 }
4dfe8b74 1069 }
338fa5f7 1070 result->type = CPP_EOF;
83dcbb5c 1071 break;
0578f103 1072
338fa5f7 1073 case ' ': case '\t': case '\f': case '\v': case '\0':
1074 skip_whitespace (pfile, c);
1075 result->flags |= PREV_WHITE;
83dcbb5c 1076 goto skipped_white;
338fa5f7 1077
1078 case '\n': case '\r':
fb5ab82c 1079 handle_newline (pfile, c);
1080 buffer->saved_flags = BOL;
1081 if (! pfile->state.in_directive)
0578f103 1082 {
f9b5f742 1083 if (pfile->state.parsing_args == 2)
1084 buffer->saved_flags |= PREV_WHITE;
fb5ab82c 1085 if (!pfile->keep_tokens)
1086 {
1087 pfile->cur_run = &pfile->base_run;
1088 result = pfile->base_run.base;
1089 pfile->cur_token = result + 1;
1090 }
1091 goto fresh_line;
0578f103 1092 }
83dcbb5c 1093 result->type = CPP_EOF;
1094 break;
732cb4c9 1095
338fa5f7 1096 case '?':
1097 case '\\':
1098 /* These could start an escaped newline, or '?' a trigraph. Let
1099 skip_escaped_newlines do all the work. */
1100 {
1ea7ed21 1101 unsigned int line = pfile->line;
338fa5f7 1102
c808d026 1103 c = skip_escaped_newlines (pfile, c);
1ea7ed21 1104 if (line != pfile->line)
338fa5f7 1105 /* We had at least one escaped newline of some sort, and the
1106 next character is in buffer->read_ahead. Update the
1107 token's line and column. */
83dcbb5c 1108 goto update_tokens_line;
338fa5f7 1109
1110 /* We are either the original '?' or '\\', or a trigraph. */
1111 result->type = CPP_QUERY;
1112 buffer->read_ahead = EOF;
1113 if (c == '\\')
3f90a920 1114 goto random_char;
338fa5f7 1115 else if (c != '?')
83dcbb5c 1116 goto trigraph;
338fa5f7 1117 }
1118 break;
732cb4c9 1119
338fa5f7 1120 case '0': case '1': case '2': case '3': case '4':
1121 case '5': case '6': case '7': case '8': case '9':
1122 result->type = CPP_NUMBER;
79bd622b 1123 parse_number (pfile, &result->val.str, c, 0);
338fa5f7 1124 break;
732cb4c9 1125
338fa5f7 1126 case '$':
1127 if (!CPP_OPTION (pfile, dollars_in_ident))
1128 goto random_char;
2c0e001b 1129 /* Fall through... */
338fa5f7 1130
1131 case '_':
1132 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1133 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1134 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1135 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1136 case 'y': case 'z':
1137 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1138 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1139 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1140 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1141 case 'Y': case 'Z':
1142 result->type = CPP_NAME;
66a5287e 1143 result->val.node = parse_identifier (pfile);
338fa5f7 1144
1145 /* 'L' may introduce wide characters or strings. */
79bd622b 1146 if (result->val.node == pfile->spec_nodes.n_L)
338fa5f7 1147 {
66a5287e 1148 c = buffer->read_ahead;
1149 if (c == EOF && buffer->cur < buffer->rlimit)
1150 c = *buffer->cur;
338fa5f7 1151 if (c == '\'' || c == '"')
71aa9da4 1152 {
66a5287e 1153 buffer->cur++;
338fa5f7 1154 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1155 goto make_string;
71aa9da4 1156 }
338fa5f7 1157 }
1158 /* Convert named operators to their proper types. */
79bd622b 1159 else if (result->val.node->flags & NODE_OPERATOR)
338fa5f7 1160 {
1161 result->flags |= NAMED_OP;
79bd622b 1162 result->type = result->val.node->value.operator;
338fa5f7 1163 }
1164 break;
1165
1166 case '\'':
1167 case '"':
1168 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1169 make_string:
1170 parse_string (pfile, result, c);
1171 break;
f80e83a9 1172
338fa5f7 1173 case '/':
f0495c2c 1174 /* A potential block or line comment. */
1175 comment_start = buffer->cur;
338fa5f7 1176 result->type = CPP_DIV;
c808d026 1177 c = get_effective_char (pfile);
338fa5f7 1178 if (c == '=')
1179 ACCEPT_CHAR (CPP_DIV_EQ);
f0495c2c 1180 if (c != '/' && c != '*')
1181 break;
20b8f8ff 1182
f0495c2c 1183 if (c == '*')
1184 {
338fa5f7 1185 if (skip_block_comment (pfile))
1ea7ed21 1186 cpp_error (pfile, "unterminated comment");
338fa5f7 1187 }
f0495c2c 1188 else
338fa5f7 1189 {
f0495c2c 1190 if (!CPP_OPTION (pfile, cplusplus_comments)
1191 && !CPP_IN_SYSTEM_HEADER (pfile))
1192 break;
1193
5db5d057 1194 /* Warn about comments only if pedantically GNUC89, and not
1195 in system headers. */
1196 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
66914e49 1197 && ! buffer->warned_cplusplus_comments)
f80e83a9 1198 {
f0495c2c 1199 cpp_pedwarn (pfile,
1200 "C++ style comments are not allowed in ISO C89");
1201 cpp_pedwarn (pfile,
1202 "(this will be reported only once per input file)");
1203 buffer->warned_cplusplus_comments = 1;
1204 }
338fa5f7 1205
66914e49 1206 /* Skip_line_comment updates buffer->read_ahead. */
e1caf668 1207 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
729d2022 1208 cpp_warning (pfile, "multi-line comment");
f0495c2c 1209 }
338fa5f7 1210
f0495c2c 1211 /* Skipping the comment has updated buffer->read_ahead. */
1212 if (!pfile->state.save_comments)
1213 {
1214 result->flags |= PREV_WHITE;
83dcbb5c 1215 goto update_tokens_line;
338fa5f7 1216 }
f0495c2c 1217
1218 /* Save the comment as a token in its own right. */
1219 save_comment (pfile, result, comment_start);
def71b06 1220 /* Don't do MI optimisation. */
fb5ab82c 1221 break;
338fa5f7 1222
1223 case '<':
1224 if (pfile->state.angled_headers)
1225 {
1226 result->type = CPP_HEADER_NAME;
1227 c = '>'; /* terminator. */
1228 goto make_string;
1229 }
0578f103 1230
338fa5f7 1231 result->type = CPP_LESS;
c808d026 1232 c = get_effective_char (pfile);
338fa5f7 1233 if (c == '=')
1234 ACCEPT_CHAR (CPP_LESS_EQ);
1235 else if (c == '<')
1236 {
1237 ACCEPT_CHAR (CPP_LSHIFT);
c808d026 1238 if (get_effective_char (pfile) == '=')
338fa5f7 1239 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1240 }
1241 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1242 {
1243 ACCEPT_CHAR (CPP_MIN);
c808d026 1244 if (get_effective_char (pfile) == '=')
338fa5f7 1245 ACCEPT_CHAR (CPP_MIN_EQ);
1246 }
1247 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1248 {
1249 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1250 result->flags |= DIGRAPH;
1251 }
1252 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1253 {
1254 ACCEPT_CHAR (CPP_OPEN_BRACE);
1255 result->flags |= DIGRAPH;
1256 }
1257 break;
1258
1259 case '>':
1260 result->type = CPP_GREATER;
c808d026 1261 c = get_effective_char (pfile);
338fa5f7 1262 if (c == '=')
1263 ACCEPT_CHAR (CPP_GREATER_EQ);
1264 else if (c == '>')
1265 {
1266 ACCEPT_CHAR (CPP_RSHIFT);
c808d026 1267 if (get_effective_char (pfile) == '=')
338fa5f7 1268 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1269 }
1270 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1271 {
1272 ACCEPT_CHAR (CPP_MAX);
c808d026 1273 if (get_effective_char (pfile) == '=')
338fa5f7 1274 ACCEPT_CHAR (CPP_MAX_EQ);
1275 }
1276 break;
1277
f669338a 1278 case '%':
c808d026 1279 lex_percent (pfile, result);
338fa5f7 1280 break;
1281
f669338a 1282 case '.':
1283 lex_dot (pfile, result);
338fa5f7 1284 break;
0578f103 1285
338fa5f7 1286 case '+':
1287 result->type = CPP_PLUS;
c808d026 1288 c = get_effective_char (pfile);
338fa5f7 1289 if (c == '=')
1290 ACCEPT_CHAR (CPP_PLUS_EQ);
1291 else if (c == '+')
1292 ACCEPT_CHAR (CPP_PLUS_PLUS);
1293 break;
ac0749c7 1294
338fa5f7 1295 case '-':
1296 result->type = CPP_MINUS;
c808d026 1297 c = get_effective_char (pfile);
338fa5f7 1298 if (c == '>')
1299 {
1300 ACCEPT_CHAR (CPP_DEREF);
1301 if (CPP_OPTION (pfile, cplusplus)
c808d026 1302 && get_effective_char (pfile) == '*')
338fa5f7 1303 ACCEPT_CHAR (CPP_DEREF_STAR);
1304 }
1305 else if (c == '=')
1306 ACCEPT_CHAR (CPP_MINUS_EQ);
1307 else if (c == '-')
1308 ACCEPT_CHAR (CPP_MINUS_MINUS);
1309 break;
0578f103 1310
338fa5f7 1311 case '*':
1312 result->type = CPP_MULT;
c808d026 1313 if (get_effective_char (pfile) == '=')
338fa5f7 1314 ACCEPT_CHAR (CPP_MULT_EQ);
1315 break;
ac0749c7 1316
338fa5f7 1317 case '=':
1318 result->type = CPP_EQ;
c808d026 1319 if (get_effective_char (pfile) == '=')
338fa5f7 1320 ACCEPT_CHAR (CPP_EQ_EQ);
1321 break;
c4abf88d 1322
338fa5f7 1323 case '!':
1324 result->type = CPP_NOT;
c808d026 1325 if (get_effective_char (pfile) == '=')
338fa5f7 1326 ACCEPT_CHAR (CPP_NOT_EQ);
1327 break;
0578f103 1328
338fa5f7 1329 case '&':
1330 result->type = CPP_AND;
c808d026 1331 c = get_effective_char (pfile);
338fa5f7 1332 if (c == '=')
1333 ACCEPT_CHAR (CPP_AND_EQ);
1334 else if (c == '&')
1335 ACCEPT_CHAR (CPP_AND_AND);
1336 break;
1337
1338 case '#':
e14c5993 1339 result->type = CPP_HASH;
83dcbb5c 1340 if (get_effective_char (pfile) == '#')
1341 ACCEPT_CHAR (CPP_PASTE);
338fa5f7 1342 break;
0578f103 1343
338fa5f7 1344 case '|':
1345 result->type = CPP_OR;
c808d026 1346 c = get_effective_char (pfile);
338fa5f7 1347 if (c == '=')
1348 ACCEPT_CHAR (CPP_OR_EQ);
1349 else if (c == '|')
1350 ACCEPT_CHAR (CPP_OR_OR);
1351 break;
0578f103 1352
338fa5f7 1353 case '^':
1354 result->type = CPP_XOR;
c808d026 1355 if (get_effective_char (pfile) == '=')
338fa5f7 1356 ACCEPT_CHAR (CPP_XOR_EQ);
1357 break;
0578f103 1358
338fa5f7 1359 case ':':
1360 result->type = CPP_COLON;
c808d026 1361 c = get_effective_char (pfile);
338fa5f7 1362 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1363 ACCEPT_CHAR (CPP_SCOPE);
1364 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1365 {
1366 result->flags |= DIGRAPH;
1367 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1368 }
1369 break;
0578f103 1370
338fa5f7 1371 case '~': result->type = CPP_COMPL; break;
1372 case ',': result->type = CPP_COMMA; break;
1373 case '(': result->type = CPP_OPEN_PAREN; break;
1374 case ')': result->type = CPP_CLOSE_PAREN; break;
1375 case '[': result->type = CPP_OPEN_SQUARE; break;
1376 case ']': result->type = CPP_CLOSE_SQUARE; break;
1377 case '{': result->type = CPP_OPEN_BRACE; break;
1378 case '}': result->type = CPP_CLOSE_BRACE; break;
1379 case ';': result->type = CPP_SEMICOLON; break;
1380
9ee99ac6 1381 /* @ is a punctuator in Objective C. */
1382 case '@': result->type = CPP_ATSIGN; break;
338fa5f7 1383
1384 random_char:
1385 default:
1386 result->type = CPP_OTHER;
33344a1c 1387 result->val.c = c;
338fa5f7 1388 break;
1389 }
fb5ab82c 1390
1391 return result;
338fa5f7 1392}
1393
79bd622b 1394/* An upper bound on the number of bytes needed to spell a token,
1395 including preceding whitespace. */
1396unsigned int
1397cpp_token_len (token)
1398 const cpp_token *token;
338fa5f7 1399{
79bd622b 1400 unsigned int len;
cfad5579 1401
79bd622b 1402 switch (TOKEN_SPELL (token))
f80e83a9 1403 {
c86dbc5b 1404 default: len = 0; break;
1405 case SPELL_STRING: len = token->val.str.len; break;
1406 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
f80e83a9 1407 }
79bd622b 1408 /* 1 for whitespace, 4 for comment delimeters. */
1409 return len + 5;
cfad5579 1410}
1411
f80e83a9 1412/* Write the spelling of a token TOKEN to BUFFER. The buffer must
c5ea33a8 1413 already contain the enough space to hold the token's spelling.
1414 Returns a pointer to the character after the last character
1415 written. */
79bd622b 1416unsigned char *
1417cpp_spell_token (pfile, token, buffer)
f80e83a9 1418 cpp_reader *pfile; /* Would be nice to be rid of this... */
1419 const cpp_token *token;
1420 unsigned char *buffer;
1421{
7e842f95 1422 switch (TOKEN_SPELL (token))
f80e83a9 1423 {
1424 case SPELL_OPERATOR:
1425 {
1426 const unsigned char *spelling;
1427 unsigned char c;
ab12a39c 1428
f80e83a9 1429 if (token->flags & DIGRAPH)
ee6c4e4b 1430 spelling
1431 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
31674461 1432 else if (token->flags & NAMED_OP)
1433 goto spell_ident;
f80e83a9 1434 else
7e842f95 1435 spelling = TOKEN_NAME (token);
f80e83a9 1436
1437 while ((c = *spelling++) != '\0')
1438 *buffer++ = c;
1439 }
1440 break;
ab12a39c 1441
f80e83a9 1442 case SPELL_IDENT:
31674461 1443 spell_ident:
c86dbc5b 1444 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1445 buffer += NODE_LEN (token->val.node);
f80e83a9 1446 break;
ab12a39c 1447
f80e83a9 1448 case SPELL_STRING:
1449 {
71aa9da4 1450 int left, right, tag;
1451 switch (token->type)
1452 {
1453 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1454 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
71aa9da4 1455 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1456 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1457 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1458 default: left = '\0'; right = '\0'; tag = '\0'; break;
1459 }
1460 if (tag) *buffer++ = tag;
1461 if (left) *buffer++ = left;
76faa4c0 1462 memcpy (buffer, token->val.str.text, token->val.str.len);
1463 buffer += token->val.str.len;
71aa9da4 1464 if (right) *buffer++ = right;
f80e83a9 1465 }
1466 break;
ab12a39c 1467
f80e83a9 1468 case SPELL_CHAR:
33344a1c 1469 *buffer++ = token->val.c;
f80e83a9 1470 break;
ab12a39c 1471
f80e83a9 1472 case SPELL_NONE:
7e842f95 1473 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
f80e83a9 1474 break;
1475 }
ab12a39c 1476
f80e83a9 1477 return buffer;
1478}
ab12a39c 1479
79bd622b 1480/* Returns a token as a null-terminated string. The string is
1481 temporary, and automatically freed later. Useful for diagnostics. */
1482unsigned char *
1483cpp_token_as_text (pfile, token)
6060326b 1484 cpp_reader *pfile;
f80e83a9 1485 const cpp_token *token;
6060326b 1486{
79bd622b 1487 unsigned int len = cpp_token_len (token);
5f3f0010 1488 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
6060326b 1489
79bd622b 1490 end = cpp_spell_token (pfile, token, start);
1491 end[0] = '\0';
6060326b 1492
79bd622b 1493 return start;
1494}
6060326b 1495
79bd622b 1496/* Used by C front ends. Should really move to using cpp_token_as_text. */
1497const char *
1498cpp_type2name (type)
1499 enum cpp_ttype type;
1500{
1501 return (const char *) token_spellings[type].name;
1502}
6060326b 1503
f9b5f742 1504/* Writes the spelling of token to FP, without any preceding space.
1505 Separated from cpp_spell_token for efficiency - to avoid stdio
1506 double-buffering. */
79bd622b 1507void
1508cpp_output_token (token, fp)
1509 const cpp_token *token;
1510 FILE *fp;
1511{
79bd622b 1512 switch (TOKEN_SPELL (token))
6060326b 1513 {
79bd622b 1514 case SPELL_OPERATOR:
1515 {
1516 const unsigned char *spelling;
6060326b 1517
79bd622b 1518 if (token->flags & DIGRAPH)
ee6c4e4b 1519 spelling
1520 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
79bd622b 1521 else if (token->flags & NAMED_OP)
1522 goto spell_ident;
1523 else
1524 spelling = TOKEN_NAME (token);
f80e83a9 1525
79bd622b 1526 ufputs (spelling, fp);
1527 }
1528 break;
f80e83a9 1529
79bd622b 1530 spell_ident:
1531 case SPELL_IDENT:
c86dbc5b 1532 ufputs (NODE_NAME (token->val.node), fp);
79bd622b 1533 break;
f80e83a9 1534
79bd622b 1535 case SPELL_STRING:
1536 {
1537 int left, right, tag;
1538 switch (token->type)
1539 {
1540 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1541 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
79bd622b 1542 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1543 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1544 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1545 default: left = '\0'; right = '\0'; tag = '\0'; break;
1546 }
1547 if (tag) putc (tag, fp);
1548 if (left) putc (left, fp);
1549 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1550 if (right) putc (right, fp);
1551 }
1552 break;
6060326b 1553
79bd622b 1554 case SPELL_CHAR:
33344a1c 1555 putc (token->val.c, fp);
79bd622b 1556 break;
6060326b 1557
79bd622b 1558 case SPELL_NONE:
1559 /* An error, most probably. */
1560 break;
f80e83a9 1561 }
6060326b 1562}
1563
79bd622b 1564/* Compare two tokens. */
1565int
1566_cpp_equiv_tokens (a, b)
1567 const cpp_token *a, *b;
6060326b 1568{
79bd622b 1569 if (a->type == b->type && a->flags == b->flags)
1570 switch (TOKEN_SPELL (a))
1571 {
1572 default: /* Keep compiler happy. */
1573 case SPELL_OPERATOR:
1574 return 1;
1575 case SPELL_CHAR:
33344a1c 1576 return a->val.c == b->val.c; /* Character. */
79bd622b 1577 case SPELL_NONE:
588d632b 1578 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
79bd622b 1579 case SPELL_IDENT:
1580 return a->val.node == b->val.node;
1581 case SPELL_STRING:
1582 return (a->val.str.len == b->val.str.len
1583 && !memcmp (a->val.str.text, b->val.str.text,
1584 a->val.str.len));
1585 }
6060326b 1586
f80e83a9 1587 return 0;
1588}
1589
f80e83a9 1590/* Determine whether two tokens can be pasted together, and if so,
1591 what the resulting token is. Returns CPP_EOF if the tokens cannot
1592 be pasted, or the appropriate type for the merged token if they
1593 can. */
524f0c40 1594enum cpp_ttype
79bd622b 1595cpp_can_paste (pfile, token1, token2, digraph)
f80e83a9 1596 cpp_reader * pfile;
1597 const cpp_token *token1, *token2;
1598 int* digraph;
6060326b 1599{
f80e83a9 1600 enum cpp_ttype a = token1->type, b = token2->type;
1601 int cxx = CPP_OPTION (pfile, cplusplus);
6060326b 1602
31674461 1603 /* Treat named operators as if they were ordinary NAMEs. */
1604 if (token1->flags & NAMED_OP)
1605 a = CPP_NAME;
1606 if (token2->flags & NAMED_OP)
1607 b = CPP_NAME;
1608
ee6c4e4b 1609 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1610 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
6060326b 1611
f80e83a9 1612 switch (a)
6060326b 1613 {
f80e83a9 1614 case CPP_GREATER:
1615 if (b == a) return CPP_RSHIFT;
1616 if (b == CPP_QUERY && cxx) return CPP_MAX;
1617 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1618 break;
1619 case CPP_LESS:
1620 if (b == a) return CPP_LSHIFT;
1621 if (b == CPP_QUERY && cxx) return CPP_MIN;
1622 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
27fdc0b6 1623 if (CPP_OPTION (pfile, digraphs))
1624 {
1625 if (b == CPP_COLON)
1626 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1627 if (b == CPP_MOD)
1628 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1629 }
f80e83a9 1630 break;
6060326b 1631
f80e83a9 1632 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1633 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1634 case CPP_OR: if (b == a) return CPP_OR_OR; break;
6060326b 1635
f80e83a9 1636 case CPP_MINUS:
1637 if (b == a) return CPP_MINUS_MINUS;
1638 if (b == CPP_GREATER) return CPP_DEREF;
1639 break;
1640 case CPP_COLON:
1641 if (b == a && cxx) return CPP_SCOPE;
27fdc0b6 1642 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
f80e83a9 1643 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1644 break;
1645
1646 case CPP_MOD:
27fdc0b6 1647 if (CPP_OPTION (pfile, digraphs))
1648 {
1649 if (b == CPP_GREATER)
1650 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1651 if (b == CPP_COLON)
1652 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1653 }
f80e83a9 1654 break;
1655 case CPP_DEREF:
1656 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1657 break;
1658 case CPP_DOT:
1659 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1660 if (b == CPP_NUMBER) return CPP_NUMBER;
1661 break;
1662
1663 case CPP_HASH:
1664 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1665 /* %:%: digraph */
1666 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1667 break;
1668
1669 case CPP_NAME:
1670 if (b == CPP_NAME) return CPP_NAME;
1671 if (b == CPP_NUMBER
79bd622b 1672 && name_p (pfile, &token2->val.str)) return CPP_NAME;
f80e83a9 1673 if (b == CPP_CHAR
79bd622b 1674 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
f80e83a9 1675 if (b == CPP_STRING
79bd622b 1676 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
f80e83a9 1677 break;
1678
1679 case CPP_NUMBER:
1680 if (b == CPP_NUMBER) return CPP_NUMBER;
1681 if (b == CPP_NAME) return CPP_NUMBER;
1682 if (b == CPP_DOT) return CPP_NUMBER;
1683 /* Numbers cannot have length zero, so this is safe. */
1684 if ((b == CPP_PLUS || b == CPP_MINUS)
76faa4c0 1685 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
f80e83a9 1686 return CPP_NUMBER;
1687 break;
1688
1689 default:
1690 break;
6060326b 1691 }
1692
f80e83a9 1693 return CPP_EOF;
1694}
1695
79bd622b 1696/* Returns nonzero if a space should be inserted to avoid an
1697 accidental token paste for output. For simplicity, it is
1698 conservative, and occasionally advises a space where one is not
1699 needed, e.g. "." and ".2". */
f80e83a9 1700
79bd622b 1701int
1702cpp_avoid_paste (pfile, token1, token2)
6060326b 1703 cpp_reader *pfile;
79bd622b 1704 const cpp_token *token1, *token2;
6060326b 1705{
79bd622b 1706 enum cpp_ttype a = token1->type, b = token2->type;
1707 cppchar_t c;
6060326b 1708
79bd622b 1709 if (token1->flags & NAMED_OP)
1710 a = CPP_NAME;
1711 if (token2->flags & NAMED_OP)
1712 b = CPP_NAME;
6060326b 1713
79bd622b 1714 c = EOF;
1715 if (token2->flags & DIGRAPH)
ee6c4e4b 1716 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
79bd622b 1717 else if (token_spellings[b].category == SPELL_OPERATOR)
1718 c = token_spellings[b].name[0];
6060326b 1719
79bd622b 1720 /* Quickly get everything that can paste with an '='. */
ee6c4e4b 1721 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
79bd622b 1722 return 1;
6060326b 1723
79bd622b 1724 switch (a)
6060326b 1725 {
79bd622b 1726 case CPP_GREATER: return c == '>' || c == '?';
1727 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1728 case CPP_PLUS: return c == '+';
1729 case CPP_MINUS: return c == '-' || c == '>';
1730 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1731 case CPP_MOD: return c == ':' || c == '>';
1732 case CPP_AND: return c == '&';
1733 case CPP_OR: return c == '|';
1734 case CPP_COLON: return c == ':' || c == '>';
1735 case CPP_DEREF: return c == '*';
efdcc728 1736 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
79bd622b 1737 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1738 case CPP_NAME: return ((b == CPP_NUMBER
1739 && name_p (pfile, &token2->val.str))
1740 || b == CPP_NAME
1741 || b == CPP_CHAR || b == CPP_STRING); /* L */
1742 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1743 || c == '.' || c == '+' || c == '-');
1744 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
33344a1c 1745 && token1->val.c == '@'
79bd622b 1746 && (b == CPP_NAME || b == CPP_STRING));
1747 default: break;
6060326b 1748 }
6060326b 1749
deb356cf 1750 return 0;
6060326b 1751}
1752
79bd622b 1753/* Output all the remaining tokens on the current line, and a newline
f9b5f742 1754 character, to FP. Leading whitespace is removed. If there are
1755 macros, special token padding is not performed. */
6060326b 1756void
79bd622b 1757cpp_output_line (pfile, fp)
6060326b 1758 cpp_reader *pfile;
79bd622b 1759 FILE *fp;
6060326b 1760{
f9b5f742 1761 const cpp_token *token;
7e842f95 1762
f9b5f742 1763 token = cpp_get_token (pfile);
1764 while (token->type != CPP_EOF)
7e842f95 1765 {
f9b5f742 1766 cpp_output_token (token, fp);
1767 token = cpp_get_token (pfile);
1768 if (token->flags & PREV_WHITE)
1769 putc (' ', fp);
7e842f95 1770 }
1771
79bd622b 1772 putc ('\n', fp);
f80e83a9 1773}
6060326b 1774
8330799c 1775/* Returns the value of a hexadecimal digit. */
1776static unsigned int
1777hex_digit_value (c)
1778 unsigned int c;
1779{
1780 if (c >= 'a' && c <= 'f')
1781 return c - 'a' + 10;
1782 if (c >= 'A' && c <= 'F')
1783 return c - 'A' + 10;
1784 if (c >= '0' && c <= '9')
1785 return c - '0';
1786 abort ();
1787}
1788
c8342759 1789/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1790 failure if cpplib is not parsing C++ or C99. Such failure is
1791 silent, and no variables are updated. Otherwise returns 0, and
1792 warns if -Wtraditional.
8330799c 1793
1794 [lex.charset]: The character designated by the universal character
1795 name \UNNNNNNNN is that character whose character short name in
1796 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1797 universal character name \uNNNN is that character whose character
1798 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1799 for a universal character name is less than 0x20 or in the range
1800 0x7F-0x9F (inclusive), or if the universal character name
1801 designates a character in the basic source character set, then the
1802 program is ill-formed.
1803
1804 We assume that wchar_t is Unicode, so we don't need to do any
c8342759 1805 mapping. Is this ever wrong?
8330799c 1806
c8342759 1807 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1808 LIMIT is the end of the string or charconst. PSTR is updated to
1809 point after the UCS on return, and the UCS is written into PC. */
1810
1811static int
1812maybe_read_ucs (pfile, pstr, limit, pc)
8330799c 1813 cpp_reader *pfile;
1814 const unsigned char **pstr;
1815 const unsigned char *limit;
c8342759 1816 unsigned int *pc;
8330799c 1817{
1818 const unsigned char *p = *pstr;
c8342759 1819 unsigned int code = 0;
1820 unsigned int c = *pc, length;
1821
1822 /* Only attempt to interpret a UCS for C++ and C99. */
1823 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1824 return 1;
8330799c 1825
c8342759 1826 if (CPP_WTRADITIONAL (pfile))
1827 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
8330799c 1828
f73bab03 1829 length = (c == 'u' ? 4: 8);
1830
1831 if ((size_t) (limit - p) < length)
1832 {
1833 cpp_error (pfile, "incomplete universal-character-name");
1834 /* Skip to the end to avoid more diagnostics. */
1835 p = limit;
1836 }
1837 else
1838 {
1839 for (; length; length--, p++)
8330799c 1840 {
f73bab03 1841 c = *p;
1842 if (ISXDIGIT (c))
1843 code = (code << 4) + hex_digit_value (c);
1844 else
1845 {
1846 cpp_error (pfile,
1847 "non-hex digit '%c' in universal-character-name", c);
1848 /* We shouldn't skip in case there are multibyte chars. */
1849 break;
1850 }
8330799c 1851 }
8330799c 1852 }
1853
1854#ifdef TARGET_EBCDIC
1855 cpp_error (pfile, "universal-character-name on EBCDIC target");
1856 code = 0x3f; /* EBCDIC invalid character */
1857#else
f73bab03 1858 /* True extended characters are OK. */
1859 if (code >= 0xa0
1860 && !(code & 0x80000000)
1861 && !(code >= 0xD800 && code <= 0xDFFF))
1862 ;
1863 /* The standard permits $, @ and ` to be specified as UCNs. We use
1864 hex escapes so that this also works with EBCDIC hosts. */
1865 else if (code == 0x24 || code == 0x40 || code == 0x60)
1866 ;
1867 /* Don't give another error if one occurred above. */
1868 else if (length == 0)
1869 cpp_error (pfile, "universal-character-name out of range");
8330799c 1870#endif
1871
1872 *pstr = p;
c8342759 1873 *pc = code;
1874 return 0;
8330799c 1875}
1876
1877/* Interpret an escape sequence, and return its value. PSTR points to
1878 the input pointer, which is just after the backslash. LIMIT is how
c8342759 1879 much text we have. MASK is a bitmask for the precision for the
1880 destination type (char or wchar_t). TRADITIONAL, if true, does not
1881 interpret escapes that did not exist in traditional C.
8330799c 1882
c8342759 1883 Handles all relevant diagnostics. */
1884
1885unsigned int
1886cpp_parse_escape (pfile, pstr, limit, mask, traditional)
8330799c 1887 cpp_reader *pfile;
1888 const unsigned char **pstr;
1889 const unsigned char *limit;
c8342759 1890 unsigned HOST_WIDE_INT mask;
8330799c 1891 int traditional;
1892{
1893 int unknown = 0;
1894 const unsigned char *str = *pstr;
1895 unsigned int c = *str++;
1896
1897 switch (c)
1898 {
1899 case '\\': case '\'': case '"': case '?': break;
1900 case 'b': c = TARGET_BS; break;
1901 case 'f': c = TARGET_FF; break;
1902 case 'n': c = TARGET_NEWLINE; break;
1903 case 'r': c = TARGET_CR; break;
1904 case 't': c = TARGET_TAB; break;
1905 case 'v': c = TARGET_VT; break;
1906
1907 case '(': case '{': case '[': case '%':
1908 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1909 '\%' is used to prevent SCCS from getting confused. */
1910 unknown = CPP_PEDANTIC (pfile);
1911 break;
1912
1913 case 'a':
1914 if (CPP_WTRADITIONAL (pfile))
1915 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1916 if (!traditional)
1917 c = TARGET_BELL;
1918 break;
1919
1920 case 'e': case 'E':
1921 if (CPP_PEDANTIC (pfile))
1922 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1923 c = TARGET_ESC;
1924 break;
1925
8330799c 1926 case 'u': case 'U':
c8342759 1927 unknown = maybe_read_ucs (pfile, &str, limit, &c);
8330799c 1928 break;
1929
1930 case 'x':
1931 if (CPP_WTRADITIONAL (pfile))
1932 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1933
1934 if (!traditional)
1935 {
1936 unsigned int i = 0, overflow = 0;
1937 int digits_found = 0;
1938
1939 while (str < limit)
1940 {
1941 c = *str;
1942 if (! ISXDIGIT (c))
1943 break;
1944 str++;
1945 overflow |= i ^ (i << 4 >> 4);
1946 i = (i << 4) + hex_digit_value (c);
1947 digits_found = 1;
1948 }
1949
1950 if (!digits_found)
1951 cpp_error (pfile, "\\x used with no following hex digits");
1952
1953 if (overflow | (i != (i & mask)))
1954 {
1955 cpp_pedwarn (pfile, "hex escape sequence out of range");
1956 i &= mask;
1957 }
1958 c = i;
1959 }
1960 break;
1961
1962 case '0': case '1': case '2': case '3':
1963 case '4': case '5': case '6': case '7':
1964 {
1965 unsigned int i = c - '0';
1966 int count = 0;
1967
1968 while (str < limit && ++count < 3)
1969 {
1970 c = *str;
1971 if (c < '0' || c > '7')
1972 break;
1973 str++;
1974 i = (i << 3) + c - '0';
1975 }
1976
1977 if (i != (i & mask))
1978 {
1979 cpp_pedwarn (pfile, "octal escape sequence out of range");
1980 i &= mask;
1981 }
1982 c = i;
1983 }
1984 break;
1985
1986 default:
1987 unknown = 1;
1988 break;
1989 }
1990
1991 if (unknown)
1992 {
1993 if (ISGRAPH (c))
1994 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1995 else
1996 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1997 }
1998
c8342759 1999 if (c > mask)
2000 cpp_pedwarn (pfile, "escape sequence out of range for character");
2001
8330799c 2002 *pstr = str;
2003 return c;
2004}
2005
2006#ifndef MAX_CHAR_TYPE_SIZE
2007#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
2008#endif
2009
2010#ifndef MAX_WCHAR_TYPE_SIZE
2011#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
2012#endif
2013
2014/* Interpret a (possibly wide) character constant in TOKEN.
2015 WARN_MULTI warns about multi-character charconsts, if not
2016 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
2017 that did not exist in traditional C. PCHARS_SEEN points to a
2018 variable that is filled in with the number of characters seen. */
2019HOST_WIDE_INT
2020cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
2021 cpp_reader *pfile;
2022 const cpp_token *token;
2023 int warn_multi;
2024 int traditional;
2025 unsigned int *pchars_seen;
2026{
2027 const unsigned char *str = token->val.str.text;
2028 const unsigned char *limit = str + token->val.str.len;
2029 unsigned int chars_seen = 0;
2030 unsigned int width, max_chars, c;
0d086e18 2031 unsigned HOST_WIDE_INT mask;
2032 HOST_WIDE_INT result = 0;
8330799c 2033
2034#ifdef MULTIBYTE_CHARS
2035 (void) local_mbtowc (NULL, NULL, 0);
2036#endif
2037
2038 /* Width in bits. */
2039 if (token->type == CPP_CHAR)
2040 width = MAX_CHAR_TYPE_SIZE;
2041 else
2042 width = MAX_WCHAR_TYPE_SIZE;
2043
2044 if (width < HOST_BITS_PER_WIDE_INT)
2045 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
2046 else
2047 mask = ~0;
2048 max_chars = HOST_BITS_PER_WIDE_INT / width;
2049
2050 while (str < limit)
2051 {
2052#ifdef MULTIBYTE_CHARS
2053 wchar_t wc;
2054 int char_len;
2055
2056 char_len = local_mbtowc (&wc, str, limit - str);
2057 if (char_len == -1)
2058 {
2059 cpp_warning (pfile, "ignoring invalid multibyte character");
2060 c = *str++;
2061 }
2062 else
2063 {
2064 str += char_len;
2065 c = wc;
2066 }
2067#else
2068 c = *str++;
2069#endif
2070
2071 if (c == '\\')
c8342759 2072 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
8330799c 2073
2074#ifdef MAP_CHARACTER
2075 if (ISPRINT (c))
2076 c = MAP_CHARACTER (c);
2077#endif
2078
2079 /* Merge character into result; ignore excess chars. */
2080 if (++chars_seen <= max_chars)
2081 {
2082 if (width < HOST_BITS_PER_WIDE_INT)
2083 result = (result << width) | (c & mask);
2084 else
2085 result = c;
2086 }
2087 }
2088
2089 if (chars_seen == 0)
2090 cpp_error (pfile, "empty character constant");
2091 else if (chars_seen > max_chars)
2092 {
2093 chars_seen = max_chars;
f73bab03 2094 cpp_warning (pfile, "character constant too long");
8330799c 2095 }
2096 else if (chars_seen > 1 && !traditional && warn_multi)
2097 cpp_warning (pfile, "multi-character character constant");
2098
2099 /* If char type is signed, sign-extend the constant. The
2100 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2101 if (token->type == CPP_CHAR && chars_seen)
2102 {
2103 unsigned int nbits = chars_seen * width;
2104 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2105
2106 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2107 || ((result >> (nbits - 1)) & 1) == 0)
2108 result &= mask;
2109 else
2110 result |= ~mask;
2111 }
2112
2113 *pchars_seen = chars_seen;
2114 return result;
2115}
2116
79bd622b 2117/* Memory pools. */
deb356cf 2118
79bd622b 2119struct dummy
deb356cf 2120{
79bd622b 2121 char c;
2122 union
2123 {
2124 double d;
2125 int *p;
2126 } u;
2127};
deb356cf 2128
79bd622b 2129#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
deb356cf 2130
79bd622b 2131static int
2132chunk_suitable (pool, chunk, size)
2133 cpp_pool *pool;
2134 cpp_chunk *chunk;
2135 unsigned int size;
2136{
2137 /* Being at least twice SIZE means we can use memcpy in
2138 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2139 anyway. */
2140 return (chunk && pool->locked != chunk
2141 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
f80e83a9 2142}
6060326b 2143
79bd622b 2144/* Returns the end of the new pool. PTR points to a char in the old
2145 pool, and is updated to point to the same char in the new pool. */
2146unsigned char *
2147_cpp_next_chunk (pool, len, ptr)
2148 cpp_pool *pool;
2149 unsigned int len;
2150 unsigned char **ptr;
f80e83a9 2151{
79bd622b 2152 cpp_chunk *chunk = pool->cur->next;
6060326b 2153
79bd622b 2154 /* LEN is the minimum size we want in the new pool. */
2155 len += POOL_ROOM (pool);
2156 if (! chunk_suitable (pool, chunk, len))
f80e83a9 2157 {
79bd622b 2158 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
6060326b 2159
79bd622b 2160 chunk->next = pool->cur->next;
2161 pool->cur->next = chunk;
6060326b 2162 }
2163
79bd622b 2164 /* Update the pointer before changing chunk's front. */
2165 if (ptr)
2166 *ptr += chunk->base - POOL_FRONT (pool);
f80e83a9 2167
79bd622b 2168 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2169 chunk->front = chunk->base;
f80e83a9 2170
79bd622b 2171 pool->cur = chunk;
2172 return POOL_LIMIT (pool);
6060326b 2173}
2174
79bd622b 2175static cpp_chunk *
2176new_chunk (size)
2177 unsigned int size;
f80e83a9 2178{
79bd622b 2179 unsigned char *base;
2180 cpp_chunk *result;
89b05ef6 2181
a28b091b 2182 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
79bd622b 2183 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2184 /* Put the chunk descriptor at the end. Then chunk overruns will
2185 cause obvious chaos. */
2186 result = (cpp_chunk *) (base + size);
2187 result->base = base;
2188 result->front = base;
2189 result->limit = base + size;
2190 result->next = 0;
deb356cf 2191
79bd622b 2192 return result;
f80e83a9 2193}
2194
79bd622b 2195void
2196_cpp_init_pool (pool, size, align, temp)
2197 cpp_pool *pool;
2198 unsigned int size, align, temp;
2199{
2200 if (align == 0)
2201 align = DEFAULT_ALIGNMENT;
2202 if (align & (align - 1))
2203 abort ();
2204 pool->align = align;
162cee98 2205 pool->first = new_chunk (size);
2206 pool->cur = pool->first;
79bd622b 2207 pool->locked = 0;
2208 pool->locks = 0;
2209 if (temp)
2210 pool->cur->next = pool->cur;
f80e83a9 2211}
2212
79bd622b 2213void
2214_cpp_lock_pool (pool)
2215 cpp_pool *pool;
f80e83a9 2216{
79bd622b 2217 if (pool->locks++ == 0)
2218 pool->locked = pool->cur;
f80e83a9 2219}
2220
79bd622b 2221void
2222_cpp_unlock_pool (pool)
2223 cpp_pool *pool;
f80e83a9 2224{
79bd622b 2225 if (--pool->locks == 0)
2226 pool->locked = 0;
f80e83a9 2227}
2228
79bd622b 2229void
2230_cpp_free_pool (pool)
2231 cpp_pool *pool;
89b05ef6 2232{
162cee98 2233 cpp_chunk *chunk = pool->first, *next;
89b05ef6 2234
79bd622b 2235 do
89b05ef6 2236 {
79bd622b 2237 next = chunk->next;
2238 free (chunk->base);
2239 chunk = next;
89b05ef6 2240 }
162cee98 2241 while (chunk && chunk != pool->first);
f80e83a9 2242}
f80e83a9 2243
79bd622b 2244/* Reserve LEN bytes from a memory pool. */
2245unsigned char *
2246_cpp_pool_reserve (pool, len)
2247 cpp_pool *pool;
2248 unsigned int len;
f80e83a9 2249{
a28b091b 2250 len = POOL_ALIGN (len, pool->align);
79bd622b 2251 if (len > (unsigned int) POOL_ROOM (pool))
2252 _cpp_next_chunk (pool, len, 0);
f80e83a9 2253
79bd622b 2254 return POOL_FRONT (pool);
6060326b 2255}
2256
79bd622b 2257/* Allocate LEN bytes from a memory pool. */
2258unsigned char *
2259_cpp_pool_alloc (pool, len)
2260 cpp_pool *pool;
2261 unsigned int len;
f80e83a9 2262{
79bd622b 2263 unsigned char *result = _cpp_pool_reserve (pool, len);
deb356cf 2264
79bd622b 2265 POOL_COMMIT (pool, len);
2266 return result;
f80e83a9 2267}