]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
* gcc.c (default_compilers): Remove .ada.
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
0578f103 1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
6060326b 7 Single-pass line tokenization by Neil Booth, April 2000
0578f103 8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
79bd622b 23/* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
27
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
f80e83a9 36
0578f103 37#include "config.h"
38#include "system.h"
0578f103 39#include "cpplib.h"
40#include "cpphash.h"
41
8330799c 42/* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45#ifdef CROSS_COMPILE
46#undef MULTIBYTE_CHARS
47#endif
48
49#ifdef MULTIBYTE_CHARS
50#include "mbchar.h"
51#include <locale.h>
52#endif
53
79bd622b 54/* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
56enum spell_type
241e762e 57{
79bd622b 58 SPELL_OPERATOR = 0,
59 SPELL_CHAR,
60 SPELL_IDENT,
61 SPELL_STRING,
62 SPELL_NONE
241e762e 63};
64
79bd622b 65struct token_spelling
241e762e 66{
79bd622b 67 enum spell_type category;
68 const unsigned char *name;
241e762e 69};
70
79bd622b 71const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
72 U":>", U"<%", U"%>"};
73
74#define OP(e, s) { SPELL_OPERATOR, U s },
75#define TK(e, s) { s, U STRINGX (e) },
76const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
77#undef OP
78#undef TK
79
80#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
e2f9a79f 82
36a0aa7c 83static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
c808d026 84static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
85static cppchar_t get_effective_char PARAMS ((cpp_reader *));
338fa5f7 86
f80e83a9 87static int skip_block_comment PARAMS ((cpp_reader *));
f669338a 88static int skip_line_comment PARAMS ((cpp_reader *));
338fa5f7 89static void adjust_column PARAMS ((cpp_reader *));
90static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
66a5287e 91static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
92static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
93 const U_CHAR *));
79bd622b 94static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
95static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
338fa5f7 96static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
79bd622b 97static void unterminated PARAMS ((cpp_reader *, int));
338fa5f7 98static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
99static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
c808d026 100static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
f669338a 101static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
79bd622b 102static int name_p PARAMS ((cpp_reader *, const cpp_string *));
c8342759 103static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
104 const unsigned char *, unsigned int *));
83dcbb5c 105static tokenrun *next_tokenrun PARAMS ((tokenrun *));
e916a356 106
8330799c 107static unsigned int hex_digit_value PARAMS ((unsigned int));
4b31a107 108static _cpp_buff *new_buff PARAMS ((size_t));
bce8e0c0 109
f80e83a9 110/* Utility routine:
2c63d6c8 111
76faa4c0 112 Compares, the token TOKEN to the NUL-terminated string STRING.
113 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
bce8e0c0 114
f80e83a9 115int
76faa4c0 116cpp_ideq (token, string)
117 const cpp_token *token;
f80e83a9 118 const char *string;
119{
76faa4c0 120 if (token->type != CPP_NAME)
f80e83a9 121 return 0;
76faa4c0 122
c86dbc5b 123 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
bce8e0c0 124}
50fd6b48 125
338fa5f7 126/* Call when meeting a newline. Returns the character after the newline
127 (or carriage-return newline combination), or EOF. */
128static cppchar_t
36a0aa7c 129handle_newline (pfile, newline_char)
130 cpp_reader *pfile;
338fa5f7 131 cppchar_t newline_char;
132{
36a0aa7c 133 cpp_buffer *buffer;
338fa5f7 134 cppchar_t next = EOF;
135
36a0aa7c 136 pfile->line++;
36a0aa7c 137 buffer = pfile->buffer;
338fa5f7 138 buffer->col_adjust = 0;
338fa5f7 139 buffer->line_base = buffer->cur;
140
141 /* Handle CR-LF and LF-CR combinations, get the next character. */
142 if (buffer->cur < buffer->rlimit)
143 {
144 next = *buffer->cur++;
145 if (next + newline_char == '\r' + '\n')
146 {
147 buffer->line_base = buffer->cur;
148 if (buffer->cur < buffer->rlimit)
149 next = *buffer->cur++;
150 else
151 next = EOF;
152 }
153 }
154
155 buffer->read_ahead = next;
156 return next;
157}
158
159/* Subroutine of skip_escaped_newlines; called when a trigraph is
160 encountered. It warns if necessary, and returns true if the
161 trigraph should be honoured. FROM_CHAR is the third character of a
162 trigraph, and presumed to be the previous character for position
163 reporting. */
0578f103 164static int
338fa5f7 165trigraph_ok (pfile, from_char)
0578f103 166 cpp_reader *pfile;
338fa5f7 167 cppchar_t from_char;
0578f103 168{
f80e83a9 169 int accept = CPP_OPTION (pfile, trigraphs);
170
f669338a 171 /* Don't warn about trigraphs in comments. */
172 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
0578f103 173 {
338fa5f7 174 cpp_buffer *buffer = pfile->buffer;
1ea7ed21 175
f80e83a9 176 if (accept)
1ea7ed21 177 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
f80e83a9 178 "trigraph ??%c converted to %c",
338fa5f7 179 (int) from_char,
180 (int) _cpp_trigraph_map[from_char]);
4b912310 181 else if (buffer->cur != buffer->last_Wtrigraphs)
182 {
183 buffer->last_Wtrigraphs = buffer->cur;
1ea7ed21 184 cpp_warning_with_line (pfile, pfile->line,
4b912310 185 CPP_BUF_COL (buffer) - 2,
186 "trigraph ??%c ignored", (int) from_char);
187 }
0578f103 188 }
338fa5f7 189
f80e83a9 190 return accept;
0578f103 191}
192
338fa5f7 193/* Assumes local variables buffer and result. */
194#define ACCEPT_CHAR(t) \
195 do { result->type = t; buffer->read_ahead = EOF; } while (0)
196
197/* When we move to multibyte character sets, add to these something
198 that saves and restores the state of the multibyte conversion
199 library. This probably involves saving and restoring a "cookie".
200 In the case of glibc it is an 8-byte structure, so is not a high
201 overhead operation. In any case, it's out of the fast path. */
202#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
203#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
204
205/* Skips any escaped newlines introduced by NEXT, which is either a
206 '?' or a '\\'. Returns the next character, which will also have
396ffa86 207 been placed in buffer->read_ahead. This routine performs
208 preprocessing stages 1 and 2 of the ISO C standard. */
338fa5f7 209static cppchar_t
c808d026 210skip_escaped_newlines (pfile, next)
211 cpp_reader *pfile;
338fa5f7 212 cppchar_t next;
0578f103 213{
c808d026 214 cpp_buffer *buffer = pfile->buffer;
215
396ffa86 216 /* Only do this if we apply stages 1 and 2. */
217 if (!buffer->from_stage3)
f80e83a9 218 {
396ffa86 219 cppchar_t next1;
220 const unsigned char *saved_cur;
221 int space;
222
223 do
338fa5f7 224 {
396ffa86 225 if (buffer->cur == buffer->rlimit)
226 break;
227
228 SAVE_STATE ();
229 if (next == '?')
338fa5f7 230 {
396ffa86 231 next1 = *buffer->cur++;
232 if (next1 != '?' || buffer->cur == buffer->rlimit)
233 {
234 RESTORE_STATE ();
235 break;
236 }
237
238 next1 = *buffer->cur++;
239 if (!_cpp_trigraph_map[next1]
c808d026 240 || !trigraph_ok (pfile, next1))
396ffa86 241 {
242 RESTORE_STATE ();
243 break;
244 }
245
246 /* We have a full trigraph here. */
247 next = _cpp_trigraph_map[next1];
248 if (next != '\\' || buffer->cur == buffer->rlimit)
249 break;
250 SAVE_STATE ();
251 }
252
253 /* We have a backslash, and room for at least one more character. */
254 space = 0;
255 do
256 {
257 next1 = *buffer->cur++;
258 if (!is_nvspace (next1))
259 break;
260 space = 1;
338fa5f7 261 }
396ffa86 262 while (buffer->cur < buffer->rlimit);
f80e83a9 263
396ffa86 264 if (!is_vspace (next1))
338fa5f7 265 {
266 RESTORE_STATE ();
267 break;
268 }
0578f103 269
c808d026 270 if (space && !pfile->state.lexing_comment)
271 cpp_warning (pfile, "backslash and newline separated by space");
338fa5f7 272
c808d026 273 next = handle_newline (pfile, next1);
396ffa86 274 if (next == EOF)
c808d026 275 cpp_pedwarn (pfile, "backslash-newline at end of file");
338fa5f7 276 }
396ffa86 277 while (next == '\\' || next == '?');
f80e83a9 278 }
0578f103 279
338fa5f7 280 buffer->read_ahead = next;
281 return next;
0578f103 282}
283
338fa5f7 284/* Obtain the next character, after trigraph conversion and skipping
285 an arbitrary string of escaped newlines. The common case of no
286 trigraphs or escaped newlines falls through quickly. */
287static cppchar_t
c808d026 288get_effective_char (pfile)
289 cpp_reader *pfile;
852d1b04 290{
c808d026 291 cpp_buffer *buffer = pfile->buffer;
338fa5f7 292 cppchar_t next = EOF;
293
294 if (buffer->cur < buffer->rlimit)
295 {
296 next = *buffer->cur++;
297
298 /* '?' can introduce trigraphs (and therefore backslash); '\\'
299 can introduce escaped newlines, which we want to skip, or
300 UCNs, which, depending upon lexer state, we will handle in
301 the future. */
302 if (next == '?' || next == '\\')
c808d026 303 next = skip_escaped_newlines (pfile, next);
338fa5f7 304 }
305
306 buffer->read_ahead = next;
307 return next;
852d1b04 308}
309
338fa5f7 310/* Skip a C-style block comment. We find the end of the comment by
311 seeing if an asterisk is before every '/' we encounter. Returns
312 non-zero if comment terminated by EOF, zero otherwise. */
f80e83a9 313static int
314skip_block_comment (pfile)
0578f103 315 cpp_reader *pfile;
316{
f80e83a9 317 cpp_buffer *buffer = pfile->buffer;
63e1abce 318 cppchar_t c = EOF, prevc = EOF;
338fa5f7 319
f669338a 320 pfile->state.lexing_comment = 1;
338fa5f7 321 while (buffer->cur != buffer->rlimit)
0578f103 322 {
338fa5f7 323 prevc = c, c = *buffer->cur++;
324
325 next_char:
326 /* FIXME: For speed, create a new character class of characters
79bd622b 327 of interest inside block comments. */
338fa5f7 328 if (c == '?' || c == '\\')
c808d026 329 c = skip_escaped_newlines (pfile, c);
f80e83a9 330
338fa5f7 331 /* People like decorating comments with '*', so check for '/'
332 instead for efficiency. */
f80e83a9 333 if (c == '/')
0578f103 334 {
338fa5f7 335 if (prevc == '*')
336 break;
f80e83a9 337
338fa5f7 338 /* Warn about potential nested comments, but not if the '/'
339 comes immediately before the true comment delimeter.
f80e83a9 340 Don't bother to get it right across escaped newlines. */
338fa5f7 341 if (CPP_OPTION (pfile, warn_comments)
342 && buffer->cur != buffer->rlimit)
0578f103 343 {
338fa5f7 344 prevc = c, c = *buffer->cur++;
345 if (c == '*' && buffer->cur != buffer->rlimit)
346 {
347 prevc = c, c = *buffer->cur++;
348 if (c != '/')
1ea7ed21 349 cpp_warning_with_line (pfile, pfile->line,
350 CPP_BUF_COL (buffer) - 2,
338fa5f7 351 "\"/*\" within comment");
352 }
353 goto next_char;
0578f103 354 }
0578f103 355 }
78719282 356 else if (is_vspace (c))
0578f103 357 {
36a0aa7c 358 prevc = c, c = handle_newline (pfile, c);
338fa5f7 359 goto next_char;
0578f103 360 }
b86584f6 361 else if (c == '\t')
338fa5f7 362 adjust_column (pfile);
0578f103 363 }
f80e83a9 364
f669338a 365 pfile->state.lexing_comment = 0;
338fa5f7 366 buffer->read_ahead = EOF;
367 return c != '/' || prevc != '*';
0578f103 368}
369
241e762e 370/* Skip a C++ line comment. Handles escaped newlines. Returns
338fa5f7 371 non-zero if a multiline comment. The following new line, if any,
372 is left in buffer->read_ahead. */
f80e83a9 373static int
f669338a 374skip_line_comment (pfile)
375 cpp_reader *pfile;
0578f103 376{
f669338a 377 cpp_buffer *buffer = pfile->buffer;
1ea7ed21 378 unsigned int orig_line = pfile->line;
338fa5f7 379 cppchar_t c;
f80e83a9 380
f669338a 381 pfile->state.lexing_comment = 1;
338fa5f7 382 do
f80e83a9 383 {
338fa5f7 384 c = EOF;
385 if (buffer->cur == buffer->rlimit)
386 break;
f80e83a9 387
338fa5f7 388 c = *buffer->cur++;
389 if (c == '?' || c == '\\')
c808d026 390 c = skip_escaped_newlines (pfile, c);
f80e83a9 391 }
338fa5f7 392 while (!is_vspace (c));
0578f103 393
f669338a 394 pfile->state.lexing_comment = 0;
338fa5f7 395 buffer->read_ahead = c; /* Leave any newline for caller. */
1ea7ed21 396 return orig_line != pfile->line;
f80e83a9 397}
0578f103 398
338fa5f7 399/* pfile->buffer->cur is one beyond the \t character. Update
400 col_adjust so we track the column correctly. */
b86584f6 401static void
338fa5f7 402adjust_column (pfile)
b86584f6 403 cpp_reader *pfile;
b86584f6 404{
338fa5f7 405 cpp_buffer *buffer = pfile->buffer;
406 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
b86584f6 407
408 /* Round it up to multiple of the tabstop, but subtract 1 since the
409 tab itself occupies a character position. */
338fa5f7 410 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
411 - col % CPP_OPTION (pfile, tabstop)) - 1;
b86584f6 412}
413
338fa5f7 414/* Skips whitespace, saving the next non-whitespace character.
415 Adjusts pfile->col_adjust to account for tabs. Without this,
416 tokens might be assigned an incorrect column. */
f80e83a9 417static void
338fa5f7 418skip_whitespace (pfile, c)
f80e83a9 419 cpp_reader *pfile;
338fa5f7 420 cppchar_t c;
f80e83a9 421{
422 cpp_buffer *buffer = pfile->buffer;
338fa5f7 423 unsigned int warned = 0;
0578f103 424
338fa5f7 425 do
f80e83a9 426 {
78719282 427 /* Horizontal space always OK. */
428 if (c == ' ')
338fa5f7 429 ;
78719282 430 else if (c == '\t')
338fa5f7 431 adjust_column (pfile);
432 /* Just \f \v or \0 left. */
78719282 433 else if (c == '\0')
f80e83a9 434 {
78719282 435 if (!warned)
338fa5f7 436 {
437 cpp_warning (pfile, "null character(s) ignored");
438 warned = 1;
439 }
0578f103 440 }
79bd622b 441 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1ea7ed21 442 cpp_pedwarn_with_line (pfile, pfile->line,
78719282 443 CPP_BUF_COL (buffer),
444 "%s in preprocessing directive",
445 c == '\f' ? "form feed" : "vertical tab");
338fa5f7 446
447 c = EOF;
448 if (buffer->cur == buffer->rlimit)
449 break;
450 c = *buffer->cur++;
0578f103 451 }
2c0e001b 452 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
338fa5f7 453 while (is_nvspace (c));
454
455 /* Remember the next character. */
456 buffer->read_ahead = c;
f80e83a9 457}
0578f103 458
79bd622b 459/* See if the characters of a number token are valid in a name (no
460 '.', '+' or '-'). */
461static int
462name_p (pfile, string)
463 cpp_reader *pfile;
464 const cpp_string *string;
465{
466 unsigned int i;
467
468 for (i = 0; i < string->len; i++)
469 if (!is_idchar (string->text[i]))
470 return 0;
471
472 return 1;
473}
474
66a5287e 475/* Parse an identifier, skipping embedded backslash-newlines. This is
476 a critical inner loop. The common case is an identifier which has
477 not been split by backslash-newline, does not contain a dollar
478 sign, and has already been scanned (roughly 10:1 ratio of
479 seen:unseen identifiers in normal code; the distribution is
480 Poisson-like). Second most common case is a new identifier, not
481 split and no dollar sign. The other possibilities are rare and
482 have been relegated to parse_identifier_slow. */
338fa5f7 483
484static cpp_hashnode *
66a5287e 485parse_identifier (pfile)
0578f103 486 cpp_reader *pfile;
0578f103 487{
79bd622b 488 cpp_hashnode *result;
66a5287e 489 const U_CHAR *cur, *rlimit;
490
491 /* Fast-path loop. Skim over a normal identifier.
492 N.B. ISIDNUM does not include $. */
493 cur = pfile->buffer->cur - 1;
494 rlimit = pfile->buffer->rlimit;
495 do
496 cur++;
497 while (cur < rlimit && ISIDNUM (*cur));
498
499 /* Check for slow-path cases. */
500 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
501 result = parse_identifier_slow (pfile, cur);
502 else
503 {
504 const U_CHAR *base = pfile->buffer->cur - 1;
505 result = (cpp_hashnode *)
506 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
507 pfile->buffer->cur = cur;
508 }
509
510 /* Rarely, identifiers require diagnostics when lexed.
511 XXX Has to be forced out of the fast path. */
512 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
513 && !pfile->state.skipping, 0))
514 {
515 /* It is allowed to poison the same identifier twice. */
516 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
517 cpp_error (pfile, "attempt to use poisoned \"%s\"",
518 NODE_NAME (result));
519
520 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
521 replacement list of a variadic macro. */
522 if (result == pfile->spec_nodes.n__VA_ARGS__
523 && !pfile->state.va_args_ok)
524 cpp_pedwarn (pfile,
525 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
526 }
527
528 return result;
529}
530
531/* Slow path. This handles identifiers which have been split, and
532 identifiers which contain dollar signs. The part of the identifier
533 from PFILE->buffer->cur-1 to CUR has already been scanned. */
534static cpp_hashnode *
535parse_identifier_slow (pfile, cur)
536 cpp_reader *pfile;
537 const U_CHAR *cur;
538{
338fa5f7 539 cpp_buffer *buffer = pfile->buffer;
66a5287e 540 const U_CHAR *base = buffer->cur - 1;
0d086e18 541 struct obstack *stack = &pfile->hash_table->stack;
66a5287e 542 unsigned int c, saw_dollar = 0, len;
543
544 /* Copy the part of the token which is known to be okay. */
545 obstack_grow (stack, base, cur - base);
f80e83a9 546
66a5287e 547 /* Now process the part which isn't. We are looking at one of
548 '$', '\\', or '?' on entry to this loop. */
549 c = *cur++;
550 buffer->cur = cur;
338fa5f7 551 do
f80e83a9 552 {
66a5287e 553 while (is_idchar (c))
554 {
555 obstack_1grow (stack, c);
0578f103 556
66a5287e 557 if (c == '$')
558 saw_dollar++;
71aa9da4 559
66a5287e 560 c = EOF;
561 if (buffer->cur == buffer->rlimit)
562 break;
71aa9da4 563
66a5287e 564 c = *buffer->cur++;
565 }
71aa9da4 566
338fa5f7 567 /* Potential escaped newline? */
568 if (c != '?' && c != '\\')
66a5287e 569 break;
c808d026 570 c = skip_escaped_newlines (pfile, c);
f80e83a9 571 }
338fa5f7 572 while (is_idchar (c));
573
79bd622b 574 /* Remember the next character. */
575 buffer->read_ahead = c;
576
338fa5f7 577 /* $ is not a identifier character in the standard, but is commonly
578 accepted as an extension. Don't warn about it in skipped
579 conditional blocks. */
5e878517 580 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
338fa5f7 581 cpp_pedwarn (pfile, "'$' character(s) in identifier");
582
79bd622b 583 /* Identifiers are null-terminated. */
0d086e18 584 len = obstack_object_size (stack);
585 obstack_1grow (stack, '\0');
79bd622b 586
66a5287e 587 return (cpp_hashnode *)
0d086e18 588 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
0578f103 589}
590
338fa5f7 591/* Parse a number, skipping embedded backslash-newlines. */
0578f103 592static void
79bd622b 593parse_number (pfile, number, c, leading_period)
0578f103 594 cpp_reader *pfile;
338fa5f7 595 cpp_string *number;
596 cppchar_t c;
79bd622b 597 int leading_period;
0578f103 598{
f80e83a9 599 cpp_buffer *buffer = pfile->buffer;
79bd622b 600 unsigned char *dest, *limit;
0578f103 601
1fdf6039 602 dest = BUFF_FRONT (pfile->u_buff);
603 limit = BUFF_LIMIT (pfile->u_buff);
f669338a 604
79bd622b 605 /* Place a leading period. */
606 if (leading_period)
607 {
1fdf6039 608 if (dest == limit)
609 {
e6a5f963 610 _cpp_extend_buff (pfile, &pfile->u_buff, 1);
1fdf6039 611 dest = BUFF_FRONT (pfile->u_buff);
612 limit = BUFF_LIMIT (pfile->u_buff);
613 }
79bd622b 614 *dest++ = '.';
615 }
616
338fa5f7 617 do
f80e83a9 618 {
338fa5f7 619 do
620 {
79bd622b 621 /* Need room for terminating null. */
1fdf6039 622 if ((size_t) (limit - dest) < 2)
623 {
624 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
e6a5f963 625 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
1fdf6039 626 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
627 limit = BUFF_LIMIT (pfile->u_buff);
628 }
79bd622b 629 *dest++ = c;
338fa5f7 630
338fa5f7 631 c = EOF;
632 if (buffer->cur == buffer->rlimit)
633 break;
0578f103 634
338fa5f7 635 c = *buffer->cur++;
636 }
79bd622b 637 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
0578f103 638
338fa5f7 639 /* Potential escaped newline? */
640 if (c != '?' && c != '\\')
641 break;
c808d026 642 c = skip_escaped_newlines (pfile, c);
0578f103 643 }
79bd622b 644 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
f669338a 645
338fa5f7 646 /* Remember the next character. */
647 buffer->read_ahead = c;
852d1b04 648
79bd622b 649 /* Null-terminate the number. */
650 *dest = '\0';
651
1fdf6039 652 number->text = BUFF_FRONT (pfile->u_buff);
79bd622b 653 number->len = dest - number->text;
1fdf6039 654 BUFF_FRONT (pfile->u_buff) = dest + 1;
338fa5f7 655}
656
657/* Subroutine of parse_string. Emits error for unterminated strings. */
658static void
79bd622b 659unterminated (pfile, term)
338fa5f7 660 cpp_reader *pfile;
338fa5f7 661 int term;
662{
663 cpp_error (pfile, "missing terminating %c character", term);
664
729d2022 665 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
f80e83a9 666 {
729d2022 667 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
338fa5f7 668 "possible start of unterminated string literal");
729d2022 669 pfile->mls_line = 0;
f80e83a9 670 }
0578f103 671}
672
79bd622b 673/* Subroutine of parse_string. */
674static int
675unescaped_terminator_p (pfile, dest)
676 cpp_reader *pfile;
677 const unsigned char *dest;
678{
679 const unsigned char *start, *temp;
680
681 /* In #include-style directives, terminators are not escapeable. */
682 if (pfile->state.angled_headers)
683 return 1;
684
1fdf6039 685 start = BUFF_FRONT (pfile->u_buff);
79bd622b 686
687 /* An odd number of consecutive backslashes represents an escaped
688 terminator. */
689 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
690 ;
691
692 return ((dest - temp) & 1) == 0;
693}
694
338fa5f7 695/* Parses a string, character constant, or angle-bracketed header file
4b0c16ee 696 name. Handles embedded trigraphs and escaped newlines. The stored
697 string is guaranteed NUL-terminated, but it is not guaranteed that
698 this is the first NUL since embedded NULs are preserved.
0578f103 699
4b0c16ee 700 Multi-line strings are allowed, but they are deprecated. */
f80e83a9 701static void
338fa5f7 702parse_string (pfile, token, terminator)
0578f103 703 cpp_reader *pfile;
f80e83a9 704 cpp_token *token;
338fa5f7 705 cppchar_t terminator;
0578f103 706{
f80e83a9 707 cpp_buffer *buffer = pfile->buffer;
79bd622b 708 unsigned char *dest, *limit;
338fa5f7 709 cppchar_t c;
38692459 710 bool warned_nulls = false, warned_multi = false;
338fa5f7 711
1fdf6039 712 dest = BUFF_FRONT (pfile->u_buff);
713 limit = BUFF_LIMIT (pfile->u_buff);
79bd622b 714
338fa5f7 715 for (;;)
0578f103 716 {
338fa5f7 717 if (buffer->cur == buffer->rlimit)
4b0c16ee 718 c = EOF;
719 else
720 c = *buffer->cur++;
721
722 have_char:
723 /* We need space for the terminating NUL. */
1fdf6039 724 if ((size_t) (limit - dest) < 1)
725 {
726 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
e6a5f963 727 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
1fdf6039 728 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
729 limit = BUFF_LIMIT (pfile->u_buff);
730 }
4b0c16ee 731
732 if (c == EOF)
338fa5f7 733 {
79bd622b 734 unterminated (pfile, terminator);
338fa5f7 735 break;
736 }
338fa5f7 737
338fa5f7 738 /* Handle trigraphs, escaped newlines etc. */
739 if (c == '?' || c == '\\')
c808d026 740 c = skip_escaped_newlines (pfile, c);
0578f103 741
79bd622b 742 if (c == terminator && unescaped_terminator_p (pfile, dest))
0578f103 743 {
79bd622b 744 c = EOF;
745 break;
338fa5f7 746 }
747 else if (is_vspace (c))
748 {
749 /* In assembly language, silently terminate string and
750 character literals at end of line. This is a kludge
751 around not knowing where comments are. */
5db5d057 752 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
338fa5f7 753 break;
0578f103 754
338fa5f7 755 /* Character constants and header names may not extend over
756 multiple lines. In Standard C, neither may strings.
757 Unfortunately, we accept multiline strings as an
cc8770bf 758 extension, except in #include family directives. */
759 if (terminator != '"' || pfile->state.angled_headers)
0578f103 760 {
79bd622b 761 unterminated (pfile, terminator);
338fa5f7 762 break;
0578f103 763 }
0578f103 764
38692459 765 if (!warned_multi)
766 {
767 warned_multi = true;
768 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
769 }
770
729d2022 771 if (pfile->mls_line == 0)
772 {
773 pfile->mls_line = token->line;
774 pfile->mls_col = token->col;
775 }
338fa5f7 776
36a0aa7c 777 c = handle_newline (pfile, c);
4b0c16ee 778 *dest++ = '\n';
779 goto have_char;
338fa5f7 780 }
38692459 781 else if (c == '\0' && !warned_nulls)
338fa5f7 782 {
38692459 783 warned_nulls = true;
784 cpp_warning (pfile, "null character(s) preserved in literal");
0578f103 785 }
0578f103 786
79bd622b 787 *dest++ = c;
0578f103 788 }
789
79bd622b 790 /* Remember the next character. */
338fa5f7 791 buffer->read_ahead = c;
4b0c16ee 792 *dest = '\0';
0578f103 793
1fdf6039 794 token->val.str.text = BUFF_FRONT (pfile->u_buff);
795 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
796 BUFF_FRONT (pfile->u_buff) = dest + 1;
338fa5f7 797}
f80e83a9 798
79bd622b 799/* The stored comment includes the comment start and any terminator. */
2c63d6c8 800static void
338fa5f7 801save_comment (pfile, token, from)
802 cpp_reader *pfile;
f80e83a9 803 cpp_token *token;
804 const unsigned char *from;
2c63d6c8 805{
f80e83a9 806 unsigned char *buffer;
338fa5f7 807 unsigned int len;
338fa5f7 808
f0495c2c 809 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
a543b315 810 /* C++ comments probably (not definitely) have moved past a new
811 line, which we don't want to save in the comment. */
812 if (pfile->buffer->read_ahead != EOF)
813 len--;
1fdf6039 814 buffer = _cpp_unaligned_alloc (pfile, len);
f80e83a9 815
f80e83a9 816 token->type = CPP_COMMENT;
76faa4c0 817 token->val.str.len = len;
338fa5f7 818 token->val.str.text = buffer;
0578f103 819
f0495c2c 820 buffer[0] = '/';
821 memcpy (buffer + 1, from, len - 1);
338fa5f7 822}
0578f103 823
10b4496a 824/* Subroutine of _cpp_lex_direct to handle '%'. A little tricky, since we
f669338a 825 want to avoid stepping back when lexing %:%X. */
338fa5f7 826static void
c808d026 827lex_percent (pfile, result)
828 cpp_reader *pfile;
338fa5f7 829 cpp_token *result;
338fa5f7 830{
c808d026 831 cpp_buffer *buffer= pfile->buffer;
f669338a 832 cppchar_t c;
833
834 result->type = CPP_MOD;
835 /* Parsing %:%X could leave an extra character. */
836 if (buffer->extra_char == EOF)
c808d026 837 c = get_effective_char (pfile);
f669338a 838 else
839 {
840 c = buffer->read_ahead = buffer->extra_char;
841 buffer->extra_char = EOF;
842 }
843
844 if (c == '=')
845 ACCEPT_CHAR (CPP_MOD_EQ);
c808d026 846 else if (CPP_OPTION (pfile, digraphs))
f669338a 847 {
848 if (c == ':')
849 {
850 result->flags |= DIGRAPH;
851 ACCEPT_CHAR (CPP_HASH);
c808d026 852 if (get_effective_char (pfile) == '%')
f669338a 853 {
c808d026 854 buffer->extra_char = get_effective_char (pfile);
f669338a 855 if (buffer->extra_char == ':')
856 {
857 buffer->extra_char = EOF;
858 ACCEPT_CHAR (CPP_PASTE);
859 }
860 else
861 /* We'll catch the extra_char when we're called back. */
862 buffer->read_ahead = '%';
863 }
864 }
865 else if (c == '>')
866 {
867 result->flags |= DIGRAPH;
868 ACCEPT_CHAR (CPP_CLOSE_BRACE);
869 }
870 }
871}
872
10b4496a 873/* Subroutine of _cpp_lex_direct to handle '.'. This is tricky, since we
f669338a 874 want to avoid stepping back when lexing '...' or '.123'. In the
875 latter case we should also set a flag for parse_number. */
876static void
877lex_dot (pfile, result)
878 cpp_reader *pfile;
879 cpp_token *result;
880{
881 cpp_buffer *buffer = pfile->buffer;
882 cppchar_t c;
883
884 /* Parsing ..X could leave an extra character. */
885 if (buffer->extra_char == EOF)
c808d026 886 c = get_effective_char (pfile);
f669338a 887 else
888 {
889 c = buffer->read_ahead = buffer->extra_char;
890 buffer->extra_char = EOF;
891 }
338fa5f7 892
f669338a 893 /* All known character sets have 0...9 contiguous. */
894 if (c >= '0' && c <= '9')
895 {
896 result->type = CPP_NUMBER;
79bd622b 897 parse_number (pfile, &result->val.str, c, 1);
f669338a 898 }
f80e83a9 899 else
c4357c92 900 {
f669338a 901 result->type = CPP_DOT;
902 if (c == '.')
903 {
c808d026 904 buffer->extra_char = get_effective_char (pfile);
f669338a 905 if (buffer->extra_char == '.')
906 {
907 buffer->extra_char = EOF;
908 ACCEPT_CHAR (CPP_ELLIPSIS);
909 }
910 else
911 /* We'll catch the extra_char when we're called back. */
912 buffer->read_ahead = '.';
913 }
914 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
915 ACCEPT_CHAR (CPP_DOT_STAR);
c4357c92 916 }
0578f103 917}
918
83dcbb5c 919/* Allocate COUNT tokens for RUN. */
920void
921_cpp_init_tokenrun (run, count)
922 tokenrun *run;
923 unsigned int count;
924{
925 run->base = xnewvec (cpp_token, count);
926 run->limit = run->base + count;
927 run->next = NULL;
928}
929
930/* Returns the next tokenrun, or creates one if there is none. */
931static tokenrun *
932next_tokenrun (run)
933 tokenrun *run;
934{
935 if (run->next == NULL)
936 {
937 run->next = xnew (tokenrun);
fb5ab82c 938 run->next->prev = run;
83dcbb5c 939 _cpp_init_tokenrun (run->next, 250);
940 }
941
942 return run->next;
943}
944
f9b5f742 945/* Allocate a single token that is invalidated at the same time as the
946 rest of the tokens on the line. Has its line and col set to the
947 same as the last lexed token, so that diagnostics appear in the
948 right place. */
949cpp_token *
950_cpp_temp_token (pfile)
951 cpp_reader *pfile;
952{
953 cpp_token *old, *result;
954
955 old = pfile->cur_token - 1;
956 if (pfile->cur_token == pfile->cur_run->limit)
957 {
958 pfile->cur_run = next_tokenrun (pfile->cur_run);
959 pfile->cur_token = pfile->cur_run->base;
960 }
961
962 result = pfile->cur_token++;
963 result->line = old->line;
964 result->col = old->col;
965 return result;
966}
967
10b4496a 968/* Lex a token into RESULT (external interface). Takes care of issues
969 like directive handling, token lookahead, multiple include
970 opimisation and skipping. */
c00e481c 971const cpp_token *
972_cpp_lex_token (pfile)
0578f103 973 cpp_reader *pfile;
83dcbb5c 974{
fb5ab82c 975 cpp_token *result;
83dcbb5c 976
fb5ab82c 977 for (;;)
83dcbb5c 978 {
fb5ab82c 979 if (pfile->cur_token == pfile->cur_run->limit)
83dcbb5c 980 {
fb5ab82c 981 pfile->cur_run = next_tokenrun (pfile->cur_run);
982 pfile->cur_token = pfile->cur_run->base;
83dcbb5c 983 }
984
fb5ab82c 985 if (pfile->lookaheads)
10b4496a 986 {
987 pfile->lookaheads--;
988 result = pfile->cur_token++;
989 }
fb5ab82c 990 else
10b4496a 991 result = _cpp_lex_direct (pfile);
fb5ab82c 992
993 if (result->flags & BOL)
83dcbb5c 994 {
fb5ab82c 995 /* Is this a directive. If _cpp_handle_directive returns
996 false, it is an assembler #. */
997 if (result->type == CPP_HASH
998 && !pfile->state.parsing_args
999 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1000 continue;
5621a364 1001 if (pfile->cb.line_change && !pfile->state.skipping)
1002 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
83dcbb5c 1003 }
83dcbb5c 1004
fb5ab82c 1005 /* We don't skip tokens in directives. */
1006 if (pfile->state.in_directive)
1007 break;
83dcbb5c 1008
fb5ab82c 1009 /* Outside a directive, invalidate controlling macros. At file
10b4496a 1010 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
fb5ab82c 1011 get here and MI optimisation works. */
83dcbb5c 1012 pfile->mi_valid = false;
fb5ab82c 1013
1014 if (!pfile->state.skipping || result->type == CPP_EOF)
1015 break;
83dcbb5c 1016 }
1017
c00e481c 1018 return result;
83dcbb5c 1019}
1020
10b4496a 1021/* Lex a token into pfile->cur_token, which is also incremented, to
1022 get diagnostics pointing to the correct location.
1023
1024 Does not handle issues such as token lookahead, multiple-include
1025 optimisation, directives, skipping etc. This function is only
1026 suitable for use by _cpp_lex_token, and in special cases like
1027 lex_expansion_token which doesn't care for any of these issues.
1028
1029 When meeting a newline, returns CPP_EOF if parsing a directive,
1030 otherwise returns to the start of the token buffer if permissible.
1031 Returns the location of the lexed token. */
1032cpp_token *
1033_cpp_lex_direct (pfile)
83dcbb5c 1034 cpp_reader *pfile;
0578f103 1035{
338fa5f7 1036 cppchar_t c;
230f0943 1037 cpp_buffer *buffer;
338fa5f7 1038 const unsigned char *comment_start;
10b4496a 1039 cpp_token *result = pfile->cur_token++;
0653b94e 1040
83dcbb5c 1041 fresh_line:
230f0943 1042 buffer = pfile->buffer;
8c2e2fc5 1043 result->flags = buffer->saved_flags;
1044 buffer->saved_flags = 0;
83dcbb5c 1045 update_tokens_line:
36a0aa7c 1046 result->line = pfile->line;
f80e83a9 1047
83dcbb5c 1048 skipped_white:
338fa5f7 1049 c = buffer->read_ahead;
1050 if (c == EOF && buffer->cur < buffer->rlimit)
83dcbb5c 1051 c = *buffer->cur++;
1052 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
338fa5f7 1053 buffer->read_ahead = EOF;
83dcbb5c 1054
1055 trigraph:
338fa5f7 1056 switch (c)
0578f103 1057 {
338fa5f7 1058 case EOF:
fb5ab82c 1059 buffer->saved_flags = BOL;
83dcbb5c 1060 if (!pfile->state.parsing_args && !pfile->state.in_directive)
4dfe8b74 1061 {
fb5ab82c 1062 if (buffer->cur != buffer->line_base)
83dcbb5c 1063 {
1064 /* Non-empty files should end in a newline. Don't warn
1065 for command line and _Pragma buffers. */
1066 if (!buffer->from_stage3)
1067 cpp_pedwarn (pfile, "no newline at end of file");
1068 handle_newline (pfile, '\n');
5475a165 1069 }
fb5ab82c 1070
1071 /* Don't pop the last buffer. */
1072 if (buffer->prev)
1073 {
1074 unsigned char stop = buffer->return_at_eof;
1075
1076 _cpp_pop_buffer (pfile);
1077 if (!stop)
1078 goto fresh_line;
1079 }
4dfe8b74 1080 }
338fa5f7 1081 result->type = CPP_EOF;
83dcbb5c 1082 break;
0578f103 1083
338fa5f7 1084 case ' ': case '\t': case '\f': case '\v': case '\0':
1085 skip_whitespace (pfile, c);
1086 result->flags |= PREV_WHITE;
83dcbb5c 1087 goto skipped_white;
338fa5f7 1088
1089 case '\n': case '\r':
fb5ab82c 1090 handle_newline (pfile, c);
1091 buffer->saved_flags = BOL;
1092 if (! pfile->state.in_directive)
0578f103 1093 {
f9b5f742 1094 if (pfile->state.parsing_args == 2)
1095 buffer->saved_flags |= PREV_WHITE;
fb5ab82c 1096 if (!pfile->keep_tokens)
1097 {
1098 pfile->cur_run = &pfile->base_run;
1099 result = pfile->base_run.base;
1100 pfile->cur_token = result + 1;
1101 }
1102 goto fresh_line;
0578f103 1103 }
83dcbb5c 1104 result->type = CPP_EOF;
1105 break;
732cb4c9 1106
338fa5f7 1107 case '?':
1108 case '\\':
1109 /* These could start an escaped newline, or '?' a trigraph. Let
1110 skip_escaped_newlines do all the work. */
1111 {
1ea7ed21 1112 unsigned int line = pfile->line;
338fa5f7 1113
c808d026 1114 c = skip_escaped_newlines (pfile, c);
1ea7ed21 1115 if (line != pfile->line)
338fa5f7 1116 /* We had at least one escaped newline of some sort, and the
1117 next character is in buffer->read_ahead. Update the
1118 token's line and column. */
83dcbb5c 1119 goto update_tokens_line;
338fa5f7 1120
1121 /* We are either the original '?' or '\\', or a trigraph. */
1122 result->type = CPP_QUERY;
1123 buffer->read_ahead = EOF;
1124 if (c == '\\')
3f90a920 1125 goto random_char;
338fa5f7 1126 else if (c != '?')
83dcbb5c 1127 goto trigraph;
338fa5f7 1128 }
1129 break;
732cb4c9 1130
338fa5f7 1131 case '0': case '1': case '2': case '3': case '4':
1132 case '5': case '6': case '7': case '8': case '9':
1133 result->type = CPP_NUMBER;
79bd622b 1134 parse_number (pfile, &result->val.str, c, 0);
338fa5f7 1135 break;
732cb4c9 1136
338fa5f7 1137 case '$':
1138 if (!CPP_OPTION (pfile, dollars_in_ident))
1139 goto random_char;
2c0e001b 1140 /* Fall through... */
338fa5f7 1141
1142 case '_':
1143 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1144 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1145 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1146 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1147 case 'y': case 'z':
1148 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1149 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1150 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1151 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1152 case 'Y': case 'Z':
1153 result->type = CPP_NAME;
66a5287e 1154 result->val.node = parse_identifier (pfile);
338fa5f7 1155
1156 /* 'L' may introduce wide characters or strings. */
79bd622b 1157 if (result->val.node == pfile->spec_nodes.n_L)
338fa5f7 1158 {
66a5287e 1159 c = buffer->read_ahead;
1160 if (c == EOF && buffer->cur < buffer->rlimit)
1161 c = *buffer->cur;
338fa5f7 1162 if (c == '\'' || c == '"')
71aa9da4 1163 {
66a5287e 1164 buffer->cur++;
338fa5f7 1165 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1166 goto make_string;
71aa9da4 1167 }
338fa5f7 1168 }
1169 /* Convert named operators to their proper types. */
79bd622b 1170 else if (result->val.node->flags & NODE_OPERATOR)
338fa5f7 1171 {
1172 result->flags |= NAMED_OP;
79bd622b 1173 result->type = result->val.node->value.operator;
338fa5f7 1174 }
1175 break;
1176
1177 case '\'':
1178 case '"':
1179 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1180 make_string:
1181 parse_string (pfile, result, c);
1182 break;
f80e83a9 1183
338fa5f7 1184 case '/':
f0495c2c 1185 /* A potential block or line comment. */
1186 comment_start = buffer->cur;
338fa5f7 1187 result->type = CPP_DIV;
c808d026 1188 c = get_effective_char (pfile);
338fa5f7 1189 if (c == '=')
1190 ACCEPT_CHAR (CPP_DIV_EQ);
f0495c2c 1191 if (c != '/' && c != '*')
1192 break;
20b8f8ff 1193
f0495c2c 1194 if (c == '*')
1195 {
338fa5f7 1196 if (skip_block_comment (pfile))
1ea7ed21 1197 cpp_error (pfile, "unterminated comment");
338fa5f7 1198 }
f0495c2c 1199 else
338fa5f7 1200 {
f0495c2c 1201 if (!CPP_OPTION (pfile, cplusplus_comments)
1202 && !CPP_IN_SYSTEM_HEADER (pfile))
1203 break;
1204
5db5d057 1205 /* Warn about comments only if pedantically GNUC89, and not
1206 in system headers. */
1207 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
66914e49 1208 && ! buffer->warned_cplusplus_comments)
f80e83a9 1209 {
f0495c2c 1210 cpp_pedwarn (pfile,
1211 "C++ style comments are not allowed in ISO C89");
1212 cpp_pedwarn (pfile,
1213 "(this will be reported only once per input file)");
1214 buffer->warned_cplusplus_comments = 1;
1215 }
338fa5f7 1216
66914e49 1217 /* Skip_line_comment updates buffer->read_ahead. */
e1caf668 1218 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
729d2022 1219 cpp_warning (pfile, "multi-line comment");
f0495c2c 1220 }
338fa5f7 1221
f0495c2c 1222 /* Skipping the comment has updated buffer->read_ahead. */
1223 if (!pfile->state.save_comments)
1224 {
1225 result->flags |= PREV_WHITE;
83dcbb5c 1226 goto update_tokens_line;
338fa5f7 1227 }
f0495c2c 1228
1229 /* Save the comment as a token in its own right. */
1230 save_comment (pfile, result, comment_start);
fb5ab82c 1231 break;
338fa5f7 1232
1233 case '<':
1234 if (pfile->state.angled_headers)
1235 {
1236 result->type = CPP_HEADER_NAME;
1237 c = '>'; /* terminator. */
1238 goto make_string;
1239 }
0578f103 1240
338fa5f7 1241 result->type = CPP_LESS;
c808d026 1242 c = get_effective_char (pfile);
338fa5f7 1243 if (c == '=')
1244 ACCEPT_CHAR (CPP_LESS_EQ);
1245 else if (c == '<')
1246 {
1247 ACCEPT_CHAR (CPP_LSHIFT);
c808d026 1248 if (get_effective_char (pfile) == '=')
338fa5f7 1249 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1250 }
1251 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1252 {
1253 ACCEPT_CHAR (CPP_MIN);
c808d026 1254 if (get_effective_char (pfile) == '=')
338fa5f7 1255 ACCEPT_CHAR (CPP_MIN_EQ);
1256 }
1257 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1258 {
1259 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1260 result->flags |= DIGRAPH;
1261 }
1262 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1263 {
1264 ACCEPT_CHAR (CPP_OPEN_BRACE);
1265 result->flags |= DIGRAPH;
1266 }
1267 break;
1268
1269 case '>':
1270 result->type = CPP_GREATER;
c808d026 1271 c = get_effective_char (pfile);
338fa5f7 1272 if (c == '=')
1273 ACCEPT_CHAR (CPP_GREATER_EQ);
1274 else if (c == '>')
1275 {
1276 ACCEPT_CHAR (CPP_RSHIFT);
c808d026 1277 if (get_effective_char (pfile) == '=')
338fa5f7 1278 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1279 }
1280 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1281 {
1282 ACCEPT_CHAR (CPP_MAX);
c808d026 1283 if (get_effective_char (pfile) == '=')
338fa5f7 1284 ACCEPT_CHAR (CPP_MAX_EQ);
1285 }
1286 break;
1287
f669338a 1288 case '%':
c808d026 1289 lex_percent (pfile, result);
338fa5f7 1290 break;
1291
f669338a 1292 case '.':
1293 lex_dot (pfile, result);
338fa5f7 1294 break;
0578f103 1295
338fa5f7 1296 case '+':
1297 result->type = CPP_PLUS;
c808d026 1298 c = get_effective_char (pfile);
338fa5f7 1299 if (c == '=')
1300 ACCEPT_CHAR (CPP_PLUS_EQ);
1301 else if (c == '+')
1302 ACCEPT_CHAR (CPP_PLUS_PLUS);
1303 break;
ac0749c7 1304
338fa5f7 1305 case '-':
1306 result->type = CPP_MINUS;
c808d026 1307 c = get_effective_char (pfile);
338fa5f7 1308 if (c == '>')
1309 {
1310 ACCEPT_CHAR (CPP_DEREF);
1311 if (CPP_OPTION (pfile, cplusplus)
c808d026 1312 && get_effective_char (pfile) == '*')
338fa5f7 1313 ACCEPT_CHAR (CPP_DEREF_STAR);
1314 }
1315 else if (c == '=')
1316 ACCEPT_CHAR (CPP_MINUS_EQ);
1317 else if (c == '-')
1318 ACCEPT_CHAR (CPP_MINUS_MINUS);
1319 break;
0578f103 1320
338fa5f7 1321 case '*':
1322 result->type = CPP_MULT;
c808d026 1323 if (get_effective_char (pfile) == '=')
338fa5f7 1324 ACCEPT_CHAR (CPP_MULT_EQ);
1325 break;
ac0749c7 1326
338fa5f7 1327 case '=':
1328 result->type = CPP_EQ;
c808d026 1329 if (get_effective_char (pfile) == '=')
338fa5f7 1330 ACCEPT_CHAR (CPP_EQ_EQ);
1331 break;
c4abf88d 1332
338fa5f7 1333 case '!':
1334 result->type = CPP_NOT;
c808d026 1335 if (get_effective_char (pfile) == '=')
338fa5f7 1336 ACCEPT_CHAR (CPP_NOT_EQ);
1337 break;
0578f103 1338
338fa5f7 1339 case '&':
1340 result->type = CPP_AND;
c808d026 1341 c = get_effective_char (pfile);
338fa5f7 1342 if (c == '=')
1343 ACCEPT_CHAR (CPP_AND_EQ);
1344 else if (c == '&')
1345 ACCEPT_CHAR (CPP_AND_AND);
1346 break;
1347
1348 case '#':
e14c5993 1349 result->type = CPP_HASH;
83dcbb5c 1350 if (get_effective_char (pfile) == '#')
1351 ACCEPT_CHAR (CPP_PASTE);
338fa5f7 1352 break;
0578f103 1353
338fa5f7 1354 case '|':
1355 result->type = CPP_OR;
c808d026 1356 c = get_effective_char (pfile);
338fa5f7 1357 if (c == '=')
1358 ACCEPT_CHAR (CPP_OR_EQ);
1359 else if (c == '|')
1360 ACCEPT_CHAR (CPP_OR_OR);
1361 break;
0578f103 1362
338fa5f7 1363 case '^':
1364 result->type = CPP_XOR;
c808d026 1365 if (get_effective_char (pfile) == '=')
338fa5f7 1366 ACCEPT_CHAR (CPP_XOR_EQ);
1367 break;
0578f103 1368
338fa5f7 1369 case ':':
1370 result->type = CPP_COLON;
c808d026 1371 c = get_effective_char (pfile);
338fa5f7 1372 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1373 ACCEPT_CHAR (CPP_SCOPE);
1374 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1375 {
1376 result->flags |= DIGRAPH;
1377 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1378 }
1379 break;
0578f103 1380
338fa5f7 1381 case '~': result->type = CPP_COMPL; break;
1382 case ',': result->type = CPP_COMMA; break;
1383 case '(': result->type = CPP_OPEN_PAREN; break;
1384 case ')': result->type = CPP_CLOSE_PAREN; break;
1385 case '[': result->type = CPP_OPEN_SQUARE; break;
1386 case ']': result->type = CPP_CLOSE_SQUARE; break;
1387 case '{': result->type = CPP_OPEN_BRACE; break;
1388 case '}': result->type = CPP_CLOSE_BRACE; break;
1389 case ';': result->type = CPP_SEMICOLON; break;
1390
9ee99ac6 1391 /* @ is a punctuator in Objective C. */
1392 case '@': result->type = CPP_ATSIGN; break;
338fa5f7 1393
1394 random_char:
1395 default:
1396 result->type = CPP_OTHER;
33344a1c 1397 result->val.c = c;
338fa5f7 1398 break;
1399 }
fb5ab82c 1400
1401 return result;
338fa5f7 1402}
1403
79bd622b 1404/* An upper bound on the number of bytes needed to spell a token,
1405 including preceding whitespace. */
1406unsigned int
1407cpp_token_len (token)
1408 const cpp_token *token;
338fa5f7 1409{
79bd622b 1410 unsigned int len;
cfad5579 1411
79bd622b 1412 switch (TOKEN_SPELL (token))
f80e83a9 1413 {
c86dbc5b 1414 default: len = 0; break;
1415 case SPELL_STRING: len = token->val.str.len; break;
1416 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
f80e83a9 1417 }
79bd622b 1418 /* 1 for whitespace, 4 for comment delimeters. */
1419 return len + 5;
cfad5579 1420}
1421
f80e83a9 1422/* Write the spelling of a token TOKEN to BUFFER. The buffer must
c5ea33a8 1423 already contain the enough space to hold the token's spelling.
1424 Returns a pointer to the character after the last character
1425 written. */
79bd622b 1426unsigned char *
1427cpp_spell_token (pfile, token, buffer)
f80e83a9 1428 cpp_reader *pfile; /* Would be nice to be rid of this... */
1429 const cpp_token *token;
1430 unsigned char *buffer;
1431{
7e842f95 1432 switch (TOKEN_SPELL (token))
f80e83a9 1433 {
1434 case SPELL_OPERATOR:
1435 {
1436 const unsigned char *spelling;
1437 unsigned char c;
ab12a39c 1438
f80e83a9 1439 if (token->flags & DIGRAPH)
ee6c4e4b 1440 spelling
1441 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
31674461 1442 else if (token->flags & NAMED_OP)
1443 goto spell_ident;
f80e83a9 1444 else
7e842f95 1445 spelling = TOKEN_NAME (token);
f80e83a9 1446
1447 while ((c = *spelling++) != '\0')
1448 *buffer++ = c;
1449 }
1450 break;
ab12a39c 1451
f80e83a9 1452 case SPELL_IDENT:
31674461 1453 spell_ident:
c86dbc5b 1454 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1455 buffer += NODE_LEN (token->val.node);
f80e83a9 1456 break;
ab12a39c 1457
f80e83a9 1458 case SPELL_STRING:
1459 {
71aa9da4 1460 int left, right, tag;
1461 switch (token->type)
1462 {
1463 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1464 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
71aa9da4 1465 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1466 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1467 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1468 default: left = '\0'; right = '\0'; tag = '\0'; break;
1469 }
1470 if (tag) *buffer++ = tag;
1471 if (left) *buffer++ = left;
76faa4c0 1472 memcpy (buffer, token->val.str.text, token->val.str.len);
1473 buffer += token->val.str.len;
71aa9da4 1474 if (right) *buffer++ = right;
f80e83a9 1475 }
1476 break;
ab12a39c 1477
f80e83a9 1478 case SPELL_CHAR:
33344a1c 1479 *buffer++ = token->val.c;
f80e83a9 1480 break;
ab12a39c 1481
f80e83a9 1482 case SPELL_NONE:
7e842f95 1483 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
f80e83a9 1484 break;
1485 }
ab12a39c 1486
f80e83a9 1487 return buffer;
1488}
ab12a39c 1489
79bd622b 1490/* Returns a token as a null-terminated string. The string is
1491 temporary, and automatically freed later. Useful for diagnostics. */
1492unsigned char *
1493cpp_token_as_text (pfile, token)
6060326b 1494 cpp_reader *pfile;
f80e83a9 1495 const cpp_token *token;
6060326b 1496{
79bd622b 1497 unsigned int len = cpp_token_len (token);
1fdf6039 1498 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
6060326b 1499
79bd622b 1500 end = cpp_spell_token (pfile, token, start);
1501 end[0] = '\0';
6060326b 1502
79bd622b 1503 return start;
1504}
6060326b 1505
79bd622b 1506/* Used by C front ends. Should really move to using cpp_token_as_text. */
1507const char *
1508cpp_type2name (type)
1509 enum cpp_ttype type;
1510{
1511 return (const char *) token_spellings[type].name;
1512}
6060326b 1513
f9b5f742 1514/* Writes the spelling of token to FP, without any preceding space.
1515 Separated from cpp_spell_token for efficiency - to avoid stdio
1516 double-buffering. */
79bd622b 1517void
1518cpp_output_token (token, fp)
1519 const cpp_token *token;
1520 FILE *fp;
1521{
79bd622b 1522 switch (TOKEN_SPELL (token))
6060326b 1523 {
79bd622b 1524 case SPELL_OPERATOR:
1525 {
1526 const unsigned char *spelling;
28874558 1527 int c;
6060326b 1528
79bd622b 1529 if (token->flags & DIGRAPH)
ee6c4e4b 1530 spelling
1531 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
79bd622b 1532 else if (token->flags & NAMED_OP)
1533 goto spell_ident;
1534 else
1535 spelling = TOKEN_NAME (token);
f80e83a9 1536
28874558 1537 c = *spelling;
1538 do
1539 putc (c, fp);
1540 while ((c = *++spelling) != '\0');
79bd622b 1541 }
1542 break;
f80e83a9 1543
79bd622b 1544 spell_ident:
1545 case SPELL_IDENT:
28874558 1546 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
79bd622b 1547 break;
f80e83a9 1548
79bd622b 1549 case SPELL_STRING:
1550 {
1551 int left, right, tag;
1552 switch (token->type)
1553 {
1554 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1555 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
79bd622b 1556 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1557 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1558 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1559 default: left = '\0'; right = '\0'; tag = '\0'; break;
1560 }
1561 if (tag) putc (tag, fp);
1562 if (left) putc (left, fp);
1563 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1564 if (right) putc (right, fp);
1565 }
1566 break;
6060326b 1567
79bd622b 1568 case SPELL_CHAR:
33344a1c 1569 putc (token->val.c, fp);
79bd622b 1570 break;
6060326b 1571
79bd622b 1572 case SPELL_NONE:
1573 /* An error, most probably. */
1574 break;
f80e83a9 1575 }
6060326b 1576}
1577
79bd622b 1578/* Compare two tokens. */
1579int
1580_cpp_equiv_tokens (a, b)
1581 const cpp_token *a, *b;
6060326b 1582{
79bd622b 1583 if (a->type == b->type && a->flags == b->flags)
1584 switch (TOKEN_SPELL (a))
1585 {
1586 default: /* Keep compiler happy. */
1587 case SPELL_OPERATOR:
1588 return 1;
1589 case SPELL_CHAR:
33344a1c 1590 return a->val.c == b->val.c; /* Character. */
79bd622b 1591 case SPELL_NONE:
588d632b 1592 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
79bd622b 1593 case SPELL_IDENT:
1594 return a->val.node == b->val.node;
1595 case SPELL_STRING:
1596 return (a->val.str.len == b->val.str.len
1597 && !memcmp (a->val.str.text, b->val.str.text,
1598 a->val.str.len));
1599 }
6060326b 1600
f80e83a9 1601 return 0;
1602}
1603
79bd622b 1604/* Returns nonzero if a space should be inserted to avoid an
1605 accidental token paste for output. For simplicity, it is
1606 conservative, and occasionally advises a space where one is not
1607 needed, e.g. "." and ".2". */
f80e83a9 1608
79bd622b 1609int
1610cpp_avoid_paste (pfile, token1, token2)
6060326b 1611 cpp_reader *pfile;
79bd622b 1612 const cpp_token *token1, *token2;
6060326b 1613{
79bd622b 1614 enum cpp_ttype a = token1->type, b = token2->type;
1615 cppchar_t c;
6060326b 1616
79bd622b 1617 if (token1->flags & NAMED_OP)
1618 a = CPP_NAME;
1619 if (token2->flags & NAMED_OP)
1620 b = CPP_NAME;
6060326b 1621
79bd622b 1622 c = EOF;
1623 if (token2->flags & DIGRAPH)
ee6c4e4b 1624 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
79bd622b 1625 else if (token_spellings[b].category == SPELL_OPERATOR)
1626 c = token_spellings[b].name[0];
6060326b 1627
79bd622b 1628 /* Quickly get everything that can paste with an '='. */
ee6c4e4b 1629 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
79bd622b 1630 return 1;
6060326b 1631
79bd622b 1632 switch (a)
6060326b 1633 {
79bd622b 1634 case CPP_GREATER: return c == '>' || c == '?';
1635 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1636 case CPP_PLUS: return c == '+';
1637 case CPP_MINUS: return c == '-' || c == '>';
1638 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1639 case CPP_MOD: return c == ':' || c == '>';
1640 case CPP_AND: return c == '&';
1641 case CPP_OR: return c == '|';
1642 case CPP_COLON: return c == ':' || c == '>';
1643 case CPP_DEREF: return c == '*';
efdcc728 1644 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
79bd622b 1645 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1646 case CPP_NAME: return ((b == CPP_NUMBER
1647 && name_p (pfile, &token2->val.str))
1648 || b == CPP_NAME
1649 || b == CPP_CHAR || b == CPP_STRING); /* L */
1650 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1651 || c == '.' || c == '+' || c == '-');
1652 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
33344a1c 1653 && token1->val.c == '@'
79bd622b 1654 && (b == CPP_NAME || b == CPP_STRING));
1655 default: break;
6060326b 1656 }
6060326b 1657
deb356cf 1658 return 0;
6060326b 1659}
1660
79bd622b 1661/* Output all the remaining tokens on the current line, and a newline
f9b5f742 1662 character, to FP. Leading whitespace is removed. If there are
1663 macros, special token padding is not performed. */
6060326b 1664void
79bd622b 1665cpp_output_line (pfile, fp)
6060326b 1666 cpp_reader *pfile;
79bd622b 1667 FILE *fp;
6060326b 1668{
f9b5f742 1669 const cpp_token *token;
7e842f95 1670
f9b5f742 1671 token = cpp_get_token (pfile);
1672 while (token->type != CPP_EOF)
7e842f95 1673 {
f9b5f742 1674 cpp_output_token (token, fp);
1675 token = cpp_get_token (pfile);
1676 if (token->flags & PREV_WHITE)
1677 putc (' ', fp);
7e842f95 1678 }
1679
79bd622b 1680 putc ('\n', fp);
f80e83a9 1681}
6060326b 1682
8330799c 1683/* Returns the value of a hexadecimal digit. */
1684static unsigned int
1685hex_digit_value (c)
1686 unsigned int c;
1687{
1688 if (c >= 'a' && c <= 'f')
1689 return c - 'a' + 10;
1690 if (c >= 'A' && c <= 'F')
1691 return c - 'A' + 10;
1692 if (c >= '0' && c <= '9')
1693 return c - '0';
1694 abort ();
1695}
1696
c8342759 1697/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1698 failure if cpplib is not parsing C++ or C99. Such failure is
1699 silent, and no variables are updated. Otherwise returns 0, and
1700 warns if -Wtraditional.
8330799c 1701
1702 [lex.charset]: The character designated by the universal character
1703 name \UNNNNNNNN is that character whose character short name in
1704 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1705 universal character name \uNNNN is that character whose character
1706 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1707 for a universal character name is less than 0x20 or in the range
1708 0x7F-0x9F (inclusive), or if the universal character name
1709 designates a character in the basic source character set, then the
1710 program is ill-formed.
1711
1712 We assume that wchar_t is Unicode, so we don't need to do any
c8342759 1713 mapping. Is this ever wrong?
8330799c 1714
c8342759 1715 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1716 LIMIT is the end of the string or charconst. PSTR is updated to
1717 point after the UCS on return, and the UCS is written into PC. */
1718
1719static int
1720maybe_read_ucs (pfile, pstr, limit, pc)
8330799c 1721 cpp_reader *pfile;
1722 const unsigned char **pstr;
1723 const unsigned char *limit;
c8342759 1724 unsigned int *pc;
8330799c 1725{
1726 const unsigned char *p = *pstr;
c8342759 1727 unsigned int code = 0;
1728 unsigned int c = *pc, length;
1729
1730 /* Only attempt to interpret a UCS for C++ and C99. */
1731 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1732 return 1;
8330799c 1733
c8342759 1734 if (CPP_WTRADITIONAL (pfile))
1735 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
8330799c 1736
f73bab03 1737 length = (c == 'u' ? 4: 8);
1738
1739 if ((size_t) (limit - p) < length)
1740 {
1741 cpp_error (pfile, "incomplete universal-character-name");
1742 /* Skip to the end to avoid more diagnostics. */
1743 p = limit;
1744 }
1745 else
1746 {
1747 for (; length; length--, p++)
8330799c 1748 {
f73bab03 1749 c = *p;
1750 if (ISXDIGIT (c))
1751 code = (code << 4) + hex_digit_value (c);
1752 else
1753 {
1754 cpp_error (pfile,
1755 "non-hex digit '%c' in universal-character-name", c);
1756 /* We shouldn't skip in case there are multibyte chars. */
1757 break;
1758 }
8330799c 1759 }
8330799c 1760 }
1761
1762#ifdef TARGET_EBCDIC
1763 cpp_error (pfile, "universal-character-name on EBCDIC target");
1764 code = 0x3f; /* EBCDIC invalid character */
1765#else
f73bab03 1766 /* True extended characters are OK. */
1767 if (code >= 0xa0
1768 && !(code & 0x80000000)
1769 && !(code >= 0xD800 && code <= 0xDFFF))
1770 ;
1771 /* The standard permits $, @ and ` to be specified as UCNs. We use
1772 hex escapes so that this also works with EBCDIC hosts. */
1773 else if (code == 0x24 || code == 0x40 || code == 0x60)
1774 ;
1775 /* Don't give another error if one occurred above. */
1776 else if (length == 0)
1777 cpp_error (pfile, "universal-character-name out of range");
8330799c 1778#endif
1779
1780 *pstr = p;
c8342759 1781 *pc = code;
1782 return 0;
8330799c 1783}
1784
1785/* Interpret an escape sequence, and return its value. PSTR points to
1786 the input pointer, which is just after the backslash. LIMIT is how
c8342759 1787 much text we have. MASK is a bitmask for the precision for the
1788 destination type (char or wchar_t). TRADITIONAL, if true, does not
1789 interpret escapes that did not exist in traditional C.
8330799c 1790
c8342759 1791 Handles all relevant diagnostics. */
1792
1793unsigned int
1794cpp_parse_escape (pfile, pstr, limit, mask, traditional)
8330799c 1795 cpp_reader *pfile;
1796 const unsigned char **pstr;
1797 const unsigned char *limit;
c8342759 1798 unsigned HOST_WIDE_INT mask;
8330799c 1799 int traditional;
1800{
1801 int unknown = 0;
1802 const unsigned char *str = *pstr;
1803 unsigned int c = *str++;
1804
1805 switch (c)
1806 {
1807 case '\\': case '\'': case '"': case '?': break;
1808 case 'b': c = TARGET_BS; break;
1809 case 'f': c = TARGET_FF; break;
1810 case 'n': c = TARGET_NEWLINE; break;
1811 case 'r': c = TARGET_CR; break;
1812 case 't': c = TARGET_TAB; break;
1813 case 'v': c = TARGET_VT; break;
1814
1815 case '(': case '{': case '[': case '%':
1816 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1817 '\%' is used to prevent SCCS from getting confused. */
1818 unknown = CPP_PEDANTIC (pfile);
1819 break;
1820
1821 case 'a':
1822 if (CPP_WTRADITIONAL (pfile))
1823 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1824 if (!traditional)
1825 c = TARGET_BELL;
1826 break;
1827
1828 case 'e': case 'E':
1829 if (CPP_PEDANTIC (pfile))
1830 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1831 c = TARGET_ESC;
1832 break;
1833
8330799c 1834 case 'u': case 'U':
c8342759 1835 unknown = maybe_read_ucs (pfile, &str, limit, &c);
8330799c 1836 break;
1837
1838 case 'x':
1839 if (CPP_WTRADITIONAL (pfile))
1840 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1841
1842 if (!traditional)
1843 {
1844 unsigned int i = 0, overflow = 0;
1845 int digits_found = 0;
1846
1847 while (str < limit)
1848 {
1849 c = *str;
1850 if (! ISXDIGIT (c))
1851 break;
1852 str++;
1853 overflow |= i ^ (i << 4 >> 4);
1854 i = (i << 4) + hex_digit_value (c);
1855 digits_found = 1;
1856 }
1857
1858 if (!digits_found)
1859 cpp_error (pfile, "\\x used with no following hex digits");
1860
1861 if (overflow | (i != (i & mask)))
1862 {
1863 cpp_pedwarn (pfile, "hex escape sequence out of range");
1864 i &= mask;
1865 }
1866 c = i;
1867 }
1868 break;
1869
1870 case '0': case '1': case '2': case '3':
1871 case '4': case '5': case '6': case '7':
1872 {
1873 unsigned int i = c - '0';
1874 int count = 0;
1875
1876 while (str < limit && ++count < 3)
1877 {
1878 c = *str;
1879 if (c < '0' || c > '7')
1880 break;
1881 str++;
1882 i = (i << 3) + c - '0';
1883 }
1884
1885 if (i != (i & mask))
1886 {
1887 cpp_pedwarn (pfile, "octal escape sequence out of range");
1888 i &= mask;
1889 }
1890 c = i;
1891 }
1892 break;
1893
1894 default:
1895 unknown = 1;
1896 break;
1897 }
1898
1899 if (unknown)
1900 {
1901 if (ISGRAPH (c))
1902 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1903 else
1904 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1905 }
1906
c8342759 1907 if (c > mask)
1908 cpp_pedwarn (pfile, "escape sequence out of range for character");
1909
8330799c 1910 *pstr = str;
1911 return c;
1912}
1913
1914#ifndef MAX_CHAR_TYPE_SIZE
1915#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1916#endif
1917
1918#ifndef MAX_WCHAR_TYPE_SIZE
1919#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1920#endif
1921
1922/* Interpret a (possibly wide) character constant in TOKEN.
1923 WARN_MULTI warns about multi-character charconsts, if not
1924 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1925 that did not exist in traditional C. PCHARS_SEEN points to a
1926 variable that is filled in with the number of characters seen. */
1927HOST_WIDE_INT
1928cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1929 cpp_reader *pfile;
1930 const cpp_token *token;
1931 int warn_multi;
1932 int traditional;
1933 unsigned int *pchars_seen;
1934{
1935 const unsigned char *str = token->val.str.text;
1936 const unsigned char *limit = str + token->val.str.len;
1937 unsigned int chars_seen = 0;
1938 unsigned int width, max_chars, c;
0d086e18 1939 unsigned HOST_WIDE_INT mask;
1940 HOST_WIDE_INT result = 0;
8330799c 1941
1942#ifdef MULTIBYTE_CHARS
1943 (void) local_mbtowc (NULL, NULL, 0);
1944#endif
1945
1946 /* Width in bits. */
1947 if (token->type == CPP_CHAR)
1948 width = MAX_CHAR_TYPE_SIZE;
1949 else
1950 width = MAX_WCHAR_TYPE_SIZE;
1951
1952 if (width < HOST_BITS_PER_WIDE_INT)
1953 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1954 else
1955 mask = ~0;
1956 max_chars = HOST_BITS_PER_WIDE_INT / width;
1957
1958 while (str < limit)
1959 {
1960#ifdef MULTIBYTE_CHARS
1961 wchar_t wc;
1962 int char_len;
1963
1964 char_len = local_mbtowc (&wc, str, limit - str);
1965 if (char_len == -1)
1966 {
1967 cpp_warning (pfile, "ignoring invalid multibyte character");
1968 c = *str++;
1969 }
1970 else
1971 {
1972 str += char_len;
1973 c = wc;
1974 }
1975#else
1976 c = *str++;
1977#endif
1978
1979 if (c == '\\')
c8342759 1980 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
8330799c 1981
1982#ifdef MAP_CHARACTER
1983 if (ISPRINT (c))
1984 c = MAP_CHARACTER (c);
1985#endif
1986
1987 /* Merge character into result; ignore excess chars. */
1988 if (++chars_seen <= max_chars)
1989 {
1990 if (width < HOST_BITS_PER_WIDE_INT)
1991 result = (result << width) | (c & mask);
1992 else
1993 result = c;
1994 }
1995 }
1996
1997 if (chars_seen == 0)
1998 cpp_error (pfile, "empty character constant");
1999 else if (chars_seen > max_chars)
2000 {
2001 chars_seen = max_chars;
f73bab03 2002 cpp_warning (pfile, "character constant too long");
8330799c 2003 }
2004 else if (chars_seen > 1 && !traditional && warn_multi)
2005 cpp_warning (pfile, "multi-character character constant");
2006
2007 /* If char type is signed, sign-extend the constant. The
2008 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2009 if (token->type == CPP_CHAR && chars_seen)
2010 {
2011 unsigned int nbits = chars_seen * width;
2012 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2013
2014 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2015 || ((result >> (nbits - 1)) & 1) == 0)
2016 result &= mask;
2017 else
2018 result |= ~mask;
2019 }
2020
2021 *pchars_seen = chars_seen;
2022 return result;
2023}
2024
084163dc 2025/* Memory buffers. Changing these three constants can have a dramatic
2026 effect on performance. The values here are reasonable defaults,
2027 but might be tuned. If you adjust them, be sure to test across a
2028 range of uses of cpplib, including heavy nested function-like macro
2029 expansion. Also check the change in peak memory usage (NJAMD is a
2030 good tool for this). */
2031#define MIN_BUFF_SIZE 8000
2032#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (8000 + (MIN_SIZE) * 3 / 2)
2033#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2034 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
deb356cf 2035
79bd622b 2036struct dummy
deb356cf 2037{
79bd622b 2038 char c;
2039 union
2040 {
2041 double d;
2042 int *p;
2043 } u;
2044};
deb356cf 2045
79bd622b 2046#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
06c92cbc 2047#define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2048
1785b647 2049/* Create a new allocation buffer. Place the control block at the end
2050 of the buffer, so that buffer overflows will cause immediate chaos. */
06c92cbc 2051static _cpp_buff *
2052new_buff (len)
4b31a107 2053 size_t len;
06c92cbc 2054{
2055 _cpp_buff *result;
1fdf6039 2056 unsigned char *base;
06c92cbc 2057
084163dc 2058 if (len < MIN_BUFF_SIZE)
2059 len = MIN_BUFF_SIZE;
06c92cbc 2060 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2061
2062 base = xmalloc (len + sizeof (_cpp_buff));
2063 result = (_cpp_buff *) (base + len);
2064 result->base = base;
2065 result->cur = base;
2066 result->limit = base + len;
2067 result->next = NULL;
2068 return result;
2069}
2070
2071/* Place a chain of unwanted allocation buffers on the free list. */
2072void
2073_cpp_release_buff (pfile, buff)
2074 cpp_reader *pfile;
2075 _cpp_buff *buff;
2076{
2077 _cpp_buff *end = buff;
2078
2079 while (end->next)
2080 end = end->next;
2081 end->next = pfile->free_buffs;
2082 pfile->free_buffs = buff;
2083}
2084
2085/* Return a free buffer of size at least MIN_SIZE. */
2086_cpp_buff *
2087_cpp_get_buff (pfile, min_size)
2088 cpp_reader *pfile;
4b31a107 2089 size_t min_size;
06c92cbc 2090{
2091 _cpp_buff *result, **p;
2092
2093 for (p = &pfile->free_buffs;; p = &(*p)->next)
2094 {
4b31a107 2095 size_t size;
084163dc 2096
2097 if (*p == NULL)
06c92cbc 2098 return new_buff (min_size);
084163dc 2099 result = *p;
2100 size = result->limit - result->base;
2101 /* Return a buffer that's big enough, but don't waste one that's
2102 way too big. */
2103 if (size >= min_size && size < BUFF_SIZE_UPPER_BOUND (min_size))
06c92cbc 2104 break;
2105 }
2106
2107 *p = result->next;
2108 result->next = NULL;
2109 result->cur = result->base;
2110 return result;
2111}
2112
e6a5f963 2113/* Creates a new buffer with enough space to hold the the uncommitted
2114 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2115 the excess bytes to the new buffer. Chains the new buffer after
2116 BUFF, and returns the new buffer. */
06c92cbc 2117_cpp_buff *
e6a5f963 2118_cpp_append_extend_buff (pfile, buff, min_extra)
06c92cbc 2119 cpp_reader *pfile;
2120 _cpp_buff *buff;
4b31a107 2121 size_t min_extra;
06c92cbc 2122{
4b31a107 2123 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
e6a5f963 2124 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
06c92cbc 2125
e6a5f963 2126 buff->next = new_buff;
2127 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2128 return new_buff;
2129}
2130
2131/* Creates a new buffer with enough space to hold the the uncommitted
2132 remaining bytes of the buffer pointed to by BUFF, and at least
2133 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2134 Chains the new buffer before the buffer pointed to by BUFF, and
2135 updates the pointer to point to the new buffer. */
2136void
2137_cpp_extend_buff (pfile, pbuff, min_extra)
2138 cpp_reader *pfile;
2139 _cpp_buff **pbuff;
2140 size_t min_extra;
2141{
2142 _cpp_buff *new_buff, *old_buff = *pbuff;
2143 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2144
2145 new_buff = _cpp_get_buff (pfile, size);
2146 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2147 new_buff->next = old_buff;
2148 *pbuff = new_buff;
06c92cbc 2149}
2150
2151/* Free a chain of buffers starting at BUFF. */
2152void
2153_cpp_free_buff (buff)
2154 _cpp_buff *buff;
2155{
2156 _cpp_buff *next;
2157
2158 for (; buff; buff = next)
2159 {
2160 next = buff->next;
2161 free (buff->base);
2162 }
2163}
deb356cf 2164
1fdf6039 2165/* Allocate permanent, unaligned storage of length LEN. */
2166unsigned char *
2167_cpp_unaligned_alloc (pfile, len)
2168 cpp_reader *pfile;
2169 size_t len;
2170{
2171 _cpp_buff *buff = pfile->u_buff;
2172 unsigned char *result = buff->cur;
2173
2174 if (len > (size_t) (buff->limit - result))
2175 {
2176 buff = _cpp_get_buff (pfile, len);
2177 buff->next = pfile->u_buff;
2178 pfile->u_buff = buff;
2179 result = buff->cur;
2180 }
2181
2182 buff->cur = result + len;
2183 return result;
2184}
2185
e6a5f963 2186/* Allocate permanent, unaligned storage of length LEN. */
79bd622b 2187unsigned char *
e6a5f963 2188_cpp_aligned_alloc (pfile, len)
2189 cpp_reader *pfile;
2190 size_t len;
89b05ef6 2191{
e6a5f963 2192 _cpp_buff *buff = pfile->a_buff;
2193 unsigned char *result = buff->cur;
89b05ef6 2194
e6a5f963 2195 if (len > (size_t) (buff->limit - result))
89b05ef6 2196 {
e6a5f963 2197 buff = _cpp_get_buff (pfile, len);
2198 buff->next = pfile->a_buff;
2199 pfile->a_buff = buff;
2200 result = buff->cur;
89b05ef6 2201 }
f80e83a9 2202
e6a5f963 2203 buff->cur = result + len;
79bd622b 2204 return result;
f80e83a9 2205}