]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
* g++.old-deja/g++.other/overload12.C
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
0578f103 1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
6060326b 7 Single-pass line tokenization by Neil Booth, April 2000
0578f103 8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
79bd622b 23/* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
27
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
f80e83a9 36
0578f103 37#include "config.h"
38#include "system.h"
0578f103 39#include "cpplib.h"
40#include "cpphash.h"
f80e83a9 41#include "symcat.h"
0578f103 42
79bd622b 43/* Tokens with SPELL_STRING store their spelling in the token list,
44 and it's length in the token->val.name.len. */
45enum spell_type
241e762e 46{
79bd622b 47 SPELL_OPERATOR = 0,
48 SPELL_CHAR,
49 SPELL_IDENT,
50 SPELL_STRING,
51 SPELL_NONE
241e762e 52};
53
79bd622b 54struct token_spelling
241e762e 55{
79bd622b 56 enum spell_type category;
57 const unsigned char *name;
241e762e 58};
59
79bd622b 60const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
61 U":>", U"<%", U"%>"};
62
63#define OP(e, s) { SPELL_OPERATOR, U s },
64#define TK(e, s) { s, U STRINGX (e) },
65const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
66#undef OP
67#undef TK
68
69#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
70#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
e2f9a79f 71
338fa5f7 72static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
73static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
74static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
75
f80e83a9 76static int skip_block_comment PARAMS ((cpp_reader *));
f669338a 77static int skip_line_comment PARAMS ((cpp_reader *));
338fa5f7 78static void adjust_column PARAMS ((cpp_reader *));
79static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
80static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
79bd622b 81static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
82static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
338fa5f7 83static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
79bd622b 84static void unterminated PARAMS ((cpp_reader *, int));
338fa5f7 85static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
86static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
f669338a 87static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
88static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
79bd622b 89static int name_p PARAMS ((cpp_reader *, const cpp_string *));
e916a356 90
79bd622b 91static cpp_chunk *new_chunk PARAMS ((unsigned int));
92static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
bce8e0c0 93
f80e83a9 94/* Utility routine:
2c63d6c8 95
76faa4c0 96 Compares, the token TOKEN to the NUL-terminated string STRING.
97 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
bce8e0c0 98
f80e83a9 99int
76faa4c0 100cpp_ideq (token, string)
101 const cpp_token *token;
f80e83a9 102 const char *string;
103{
76faa4c0 104 if (token->type != CPP_NAME)
f80e83a9 105 return 0;
76faa4c0 106
79bd622b 107 return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
bce8e0c0 108}
50fd6b48 109
338fa5f7 110/* Call when meeting a newline. Returns the character after the newline
111 (or carriage-return newline combination), or EOF. */
112static cppchar_t
113handle_newline (buffer, newline_char)
114 cpp_buffer *buffer;
115 cppchar_t newline_char;
116{
117 cppchar_t next = EOF;
118
119 buffer->col_adjust = 0;
120 buffer->lineno++;
121 buffer->line_base = buffer->cur;
122
123 /* Handle CR-LF and LF-CR combinations, get the next character. */
124 if (buffer->cur < buffer->rlimit)
125 {
126 next = *buffer->cur++;
127 if (next + newline_char == '\r' + '\n')
128 {
129 buffer->line_base = buffer->cur;
130 if (buffer->cur < buffer->rlimit)
131 next = *buffer->cur++;
132 else
133 next = EOF;
134 }
135 }
136
137 buffer->read_ahead = next;
138 return next;
139}
140
141/* Subroutine of skip_escaped_newlines; called when a trigraph is
142 encountered. It warns if necessary, and returns true if the
143 trigraph should be honoured. FROM_CHAR is the third character of a
144 trigraph, and presumed to be the previous character for position
145 reporting. */
0578f103 146static int
338fa5f7 147trigraph_ok (pfile, from_char)
0578f103 148 cpp_reader *pfile;
338fa5f7 149 cppchar_t from_char;
0578f103 150{
f80e83a9 151 int accept = CPP_OPTION (pfile, trigraphs);
152
f669338a 153 /* Don't warn about trigraphs in comments. */
154 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
0578f103 155 {
338fa5f7 156 cpp_buffer *buffer = pfile->buffer;
f80e83a9 157 if (accept)
338fa5f7 158 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
f80e83a9 159 "trigraph ??%c converted to %c",
338fa5f7 160 (int) from_char,
161 (int) _cpp_trigraph_map[from_char]);
4b912310 162 else if (buffer->cur != buffer->last_Wtrigraphs)
163 {
164 buffer->last_Wtrigraphs = buffer->cur;
165 cpp_warning_with_line (pfile, buffer->lineno,
166 CPP_BUF_COL (buffer) - 2,
167 "trigraph ??%c ignored", (int) from_char);
168 }
0578f103 169 }
338fa5f7 170
f80e83a9 171 return accept;
0578f103 172}
173
338fa5f7 174/* Assumes local variables buffer and result. */
175#define ACCEPT_CHAR(t) \
176 do { result->type = t; buffer->read_ahead = EOF; } while (0)
177
178/* When we move to multibyte character sets, add to these something
179 that saves and restores the state of the multibyte conversion
180 library. This probably involves saving and restoring a "cookie".
181 In the case of glibc it is an 8-byte structure, so is not a high
182 overhead operation. In any case, it's out of the fast path. */
183#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
184#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
185
186/* Skips any escaped newlines introduced by NEXT, which is either a
187 '?' or a '\\'. Returns the next character, which will also have
396ffa86 188 been placed in buffer->read_ahead. This routine performs
189 preprocessing stages 1 and 2 of the ISO C standard. */
338fa5f7 190static cppchar_t
191skip_escaped_newlines (buffer, next)
192 cpp_buffer *buffer;
193 cppchar_t next;
0578f103 194{
396ffa86 195 /* Only do this if we apply stages 1 and 2. */
196 if (!buffer->from_stage3)
f80e83a9 197 {
396ffa86 198 cppchar_t next1;
199 const unsigned char *saved_cur;
200 int space;
201
202 do
338fa5f7 203 {
396ffa86 204 if (buffer->cur == buffer->rlimit)
205 break;
206
207 SAVE_STATE ();
208 if (next == '?')
338fa5f7 209 {
396ffa86 210 next1 = *buffer->cur++;
211 if (next1 != '?' || buffer->cur == buffer->rlimit)
212 {
213 RESTORE_STATE ();
214 break;
215 }
216
217 next1 = *buffer->cur++;
218 if (!_cpp_trigraph_map[next1]
219 || !trigraph_ok (buffer->pfile, next1))
220 {
221 RESTORE_STATE ();
222 break;
223 }
224
225 /* We have a full trigraph here. */
226 next = _cpp_trigraph_map[next1];
227 if (next != '\\' || buffer->cur == buffer->rlimit)
228 break;
229 SAVE_STATE ();
230 }
231
232 /* We have a backslash, and room for at least one more character. */
233 space = 0;
234 do
235 {
236 next1 = *buffer->cur++;
237 if (!is_nvspace (next1))
238 break;
239 space = 1;
338fa5f7 240 }
396ffa86 241 while (buffer->cur < buffer->rlimit);
f80e83a9 242
396ffa86 243 if (!is_vspace (next1))
338fa5f7 244 {
245 RESTORE_STATE ();
246 break;
247 }
0578f103 248
396ffa86 249 if (space)
250 cpp_warning (buffer->pfile,
251 "backslash and newline separated by space");
338fa5f7 252
396ffa86 253 next = handle_newline (buffer, next1);
254 if (next == EOF)
255 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
338fa5f7 256 }
396ffa86 257 while (next == '\\' || next == '?');
f80e83a9 258 }
0578f103 259
338fa5f7 260 buffer->read_ahead = next;
261 return next;
0578f103 262}
263
338fa5f7 264/* Obtain the next character, after trigraph conversion and skipping
265 an arbitrary string of escaped newlines. The common case of no
266 trigraphs or escaped newlines falls through quickly. */
267static cppchar_t
268get_effective_char (buffer)
269 cpp_buffer *buffer;
852d1b04 270{
338fa5f7 271 cppchar_t next = EOF;
272
273 if (buffer->cur < buffer->rlimit)
274 {
275 next = *buffer->cur++;
276
277 /* '?' can introduce trigraphs (and therefore backslash); '\\'
278 can introduce escaped newlines, which we want to skip, or
279 UCNs, which, depending upon lexer state, we will handle in
280 the future. */
281 if (next == '?' || next == '\\')
282 next = skip_escaped_newlines (buffer, next);
283 }
284
285 buffer->read_ahead = next;
286 return next;
852d1b04 287}
288
338fa5f7 289/* Skip a C-style block comment. We find the end of the comment by
290 seeing if an asterisk is before every '/' we encounter. Returns
291 non-zero if comment terminated by EOF, zero otherwise. */
f80e83a9 292static int
293skip_block_comment (pfile)
0578f103 294 cpp_reader *pfile;
295{
f80e83a9 296 cpp_buffer *buffer = pfile->buffer;
63e1abce 297 cppchar_t c = EOF, prevc = EOF;
338fa5f7 298
f669338a 299 pfile->state.lexing_comment = 1;
338fa5f7 300 while (buffer->cur != buffer->rlimit)
0578f103 301 {
338fa5f7 302 prevc = c, c = *buffer->cur++;
303
304 next_char:
305 /* FIXME: For speed, create a new character class of characters
79bd622b 306 of interest inside block comments. */
338fa5f7 307 if (c == '?' || c == '\\')
308 c = skip_escaped_newlines (buffer, c);
f80e83a9 309
338fa5f7 310 /* People like decorating comments with '*', so check for '/'
311 instead for efficiency. */
f80e83a9 312 if (c == '/')
0578f103 313 {
338fa5f7 314 if (prevc == '*')
315 break;
f80e83a9 316
338fa5f7 317 /* Warn about potential nested comments, but not if the '/'
318 comes immediately before the true comment delimeter.
f80e83a9 319 Don't bother to get it right across escaped newlines. */
338fa5f7 320 if (CPP_OPTION (pfile, warn_comments)
321 && buffer->cur != buffer->rlimit)
0578f103 322 {
338fa5f7 323 prevc = c, c = *buffer->cur++;
324 if (c == '*' && buffer->cur != buffer->rlimit)
325 {
326 prevc = c, c = *buffer->cur++;
327 if (c != '/')
328 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
329 CPP_BUF_COL (buffer),
330 "\"/*\" within comment");
331 }
332 goto next_char;
0578f103 333 }
0578f103 334 }
78719282 335 else if (is_vspace (c))
0578f103 336 {
338fa5f7 337 prevc = c, c = handle_newline (buffer, c);
338 goto next_char;
0578f103 339 }
b86584f6 340 else if (c == '\t')
338fa5f7 341 adjust_column (pfile);
0578f103 342 }
f80e83a9 343
f669338a 344 pfile->state.lexing_comment = 0;
338fa5f7 345 buffer->read_ahead = EOF;
346 return c != '/' || prevc != '*';
0578f103 347}
348
241e762e 349/* Skip a C++ line comment. Handles escaped newlines. Returns
338fa5f7 350 non-zero if a multiline comment. The following new line, if any,
351 is left in buffer->read_ahead. */
f80e83a9 352static int
f669338a 353skip_line_comment (pfile)
354 cpp_reader *pfile;
0578f103 355{
f669338a 356 cpp_buffer *buffer = pfile->buffer;
338fa5f7 357 unsigned int orig_lineno = buffer->lineno;
358 cppchar_t c;
f80e83a9 359
f669338a 360 pfile->state.lexing_comment = 1;
338fa5f7 361 do
f80e83a9 362 {
338fa5f7 363 c = EOF;
364 if (buffer->cur == buffer->rlimit)
365 break;
f80e83a9 366
338fa5f7 367 c = *buffer->cur++;
368 if (c == '?' || c == '\\')
369 c = skip_escaped_newlines (buffer, c);
f80e83a9 370 }
338fa5f7 371 while (!is_vspace (c));
0578f103 372
f669338a 373 pfile->state.lexing_comment = 0;
338fa5f7 374 buffer->read_ahead = c; /* Leave any newline for caller. */
375 return orig_lineno != buffer->lineno;
f80e83a9 376}
0578f103 377
338fa5f7 378/* pfile->buffer->cur is one beyond the \t character. Update
379 col_adjust so we track the column correctly. */
b86584f6 380static void
338fa5f7 381adjust_column (pfile)
b86584f6 382 cpp_reader *pfile;
b86584f6 383{
338fa5f7 384 cpp_buffer *buffer = pfile->buffer;
385 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
b86584f6 386
387 /* Round it up to multiple of the tabstop, but subtract 1 since the
388 tab itself occupies a character position. */
338fa5f7 389 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
390 - col % CPP_OPTION (pfile, tabstop)) - 1;
b86584f6 391}
392
338fa5f7 393/* Skips whitespace, saving the next non-whitespace character.
394 Adjusts pfile->col_adjust to account for tabs. Without this,
395 tokens might be assigned an incorrect column. */
f80e83a9 396static void
338fa5f7 397skip_whitespace (pfile, c)
f80e83a9 398 cpp_reader *pfile;
338fa5f7 399 cppchar_t c;
f80e83a9 400{
401 cpp_buffer *buffer = pfile->buffer;
338fa5f7 402 unsigned int warned = 0;
0578f103 403
338fa5f7 404 do
f80e83a9 405 {
78719282 406 /* Horizontal space always OK. */
407 if (c == ' ')
338fa5f7 408 ;
78719282 409 else if (c == '\t')
338fa5f7 410 adjust_column (pfile);
411 /* Just \f \v or \0 left. */
78719282 412 else if (c == '\0')
f80e83a9 413 {
78719282 414 if (!warned)
338fa5f7 415 {
416 cpp_warning (pfile, "null character(s) ignored");
417 warned = 1;
418 }
0578f103 419 }
79bd622b 420 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
78719282 421 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
422 CPP_BUF_COL (buffer),
423 "%s in preprocessing directive",
424 c == '\f' ? "form feed" : "vertical tab");
338fa5f7 425
426 c = EOF;
427 if (buffer->cur == buffer->rlimit)
428 break;
429 c = *buffer->cur++;
0578f103 430 }
338fa5f7 431 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
432 while (is_nvspace (c));
433
434 /* Remember the next character. */
435 buffer->read_ahead = c;
f80e83a9 436}
0578f103 437
79bd622b 438/* See if the characters of a number token are valid in a name (no
439 '.', '+' or '-'). */
440static int
441name_p (pfile, string)
442 cpp_reader *pfile;
443 const cpp_string *string;
444{
445 unsigned int i;
446
447 for (i = 0; i < string->len; i++)
448 if (!is_idchar (string->text[i]))
449 return 0;
450
451 return 1;
452}
453
338fa5f7 454/* Parse an identifier, skipping embedded backslash-newlines.
455 Calculate the hash value of the token while parsing, for improved
456 performance. The hashing algorithm *must* match cpp_lookup(). */
457
458static cpp_hashnode *
459parse_identifier (pfile, c)
0578f103 460 cpp_reader *pfile;
338fa5f7 461 cppchar_t c;
0578f103 462{
79bd622b 463 cpp_hashnode *result;
338fa5f7 464 cpp_buffer *buffer = pfile->buffer;
79bd622b 465 unsigned char *dest, *limit;
338fa5f7 466 unsigned int r = 0, saw_dollar = 0;
79bd622b 467
468 dest = POOL_FRONT (&pfile->ident_pool);
469 limit = POOL_LIMIT (&pfile->ident_pool);
f80e83a9 470
338fa5f7 471 do
f80e83a9 472 {
338fa5f7 473 do
f80e83a9 474 {
79bd622b 475 /* Need room for terminating null. */
476 if (dest + 1 >= limit)
477 limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
478
479 *dest++ = c;
338fa5f7 480 r = HASHSTEP (r, c);
0578f103 481
338fa5f7 482 if (c == '$')
483 saw_dollar++;
71aa9da4 484
338fa5f7 485 c = EOF;
486 if (buffer->cur == buffer->rlimit)
487 break;
71aa9da4 488
338fa5f7 489 c = *buffer->cur++;
490 }
491 while (is_idchar (c));
71aa9da4 492
338fa5f7 493 /* Potential escaped newline? */
494 if (c != '?' && c != '\\')
495 break;
496 c = skip_escaped_newlines (buffer, c);
f80e83a9 497 }
338fa5f7 498 while (is_idchar (c));
499
79bd622b 500 /* Remember the next character. */
501 buffer->read_ahead = c;
502
338fa5f7 503 /* $ is not a identifier character in the standard, but is commonly
504 accepted as an extension. Don't warn about it in skipped
505 conditional blocks. */
506 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
507 cpp_pedwarn (pfile, "'$' character(s) in identifier");
508
79bd622b 509 /* Identifiers are null-terminated. */
510 *dest = '\0';
511
512 /* This routine commits the memory if necessary. */
513 result = _cpp_lookup_with_hash (pfile,
514 dest - POOL_FRONT (&pfile->ident_pool), r);
515
516 /* Some identifiers require diagnostics when lexed. */
517 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
518 {
519 /* It is allowed to poison the same identifier twice. */
520 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
521 cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
522
523 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
524 replacement list of a variable-arguments macro. */
525 if (result == pfile->spec_nodes.n__VA_ARGS__
526 && !pfile->state.va_args_ok)
527 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variable-argument macro");
528 }
529
530 return result;
0578f103 531}
532
338fa5f7 533/* Parse a number, skipping embedded backslash-newlines. */
0578f103 534static void
79bd622b 535parse_number (pfile, number, c, leading_period)
0578f103 536 cpp_reader *pfile;
338fa5f7 537 cpp_string *number;
538 cppchar_t c;
79bd622b 539 int leading_period;
0578f103 540{
f80e83a9 541 cpp_buffer *buffer = pfile->buffer;
79bd622b 542 cpp_pool *pool = pfile->string_pool;
543 unsigned char *dest, *limit;
0578f103 544
79bd622b 545 dest = POOL_FRONT (pool);
546 limit = POOL_LIMIT (pool);
f669338a 547
79bd622b 548 /* Place a leading period. */
549 if (leading_period)
550 {
551 if (dest >= limit)
552 limit = _cpp_next_chunk (pool, 0, &dest);
553 *dest++ = '.';
554 }
555
338fa5f7 556 do
f80e83a9 557 {
338fa5f7 558 do
559 {
79bd622b 560 /* Need room for terminating null. */
561 if (dest + 1 >= limit)
562 limit = _cpp_next_chunk (pool, 0, &dest);
563 *dest++ = c;
338fa5f7 564
338fa5f7 565 c = EOF;
566 if (buffer->cur == buffer->rlimit)
567 break;
0578f103 568
338fa5f7 569 c = *buffer->cur++;
570 }
79bd622b 571 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
0578f103 572
338fa5f7 573 /* Potential escaped newline? */
574 if (c != '?' && c != '\\')
575 break;
576 c = skip_escaped_newlines (buffer, c);
0578f103 577 }
79bd622b 578 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
f669338a 579
338fa5f7 580 /* Remember the next character. */
581 buffer->read_ahead = c;
852d1b04 582
79bd622b 583 /* Null-terminate the number. */
584 *dest = '\0';
585
586 number->text = POOL_FRONT (pool);
587 number->len = dest - number->text;
588 POOL_COMMIT (pool, number->len + 1);
338fa5f7 589}
590
591/* Subroutine of parse_string. Emits error for unterminated strings. */
592static void
79bd622b 593unterminated (pfile, term)
338fa5f7 594 cpp_reader *pfile;
338fa5f7 595 int term;
596{
597 cpp_error (pfile, "missing terminating %c character", term);
598
79bd622b 599 if (term == '\"' && pfile->mlstring_pos.line
600 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
f80e83a9 601 {
79bd622b 602 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
603 pfile->mlstring_pos.col,
338fa5f7 604 "possible start of unterminated string literal");
79bd622b 605 pfile->mlstring_pos.line = 0;
f80e83a9 606 }
0578f103 607}
608
79bd622b 609/* Subroutine of parse_string. */
610static int
611unescaped_terminator_p (pfile, dest)
612 cpp_reader *pfile;
613 const unsigned char *dest;
614{
615 const unsigned char *start, *temp;
616
617 /* In #include-style directives, terminators are not escapeable. */
618 if (pfile->state.angled_headers)
619 return 1;
620
621 start = POOL_FRONT (pfile->string_pool);
622
623 /* An odd number of consecutive backslashes represents an escaped
624 terminator. */
625 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
626 ;
627
628 return ((dest - temp) & 1) == 0;
629}
630
338fa5f7 631/* Parses a string, character constant, or angle-bracketed header file
632 name. Handles embedded trigraphs and escaped newlines.
0578f103 633
338fa5f7 634 Multi-line strings are allowed, but they are deprecated within
635 directives. */
f80e83a9 636static void
338fa5f7 637parse_string (pfile, token, terminator)
0578f103 638 cpp_reader *pfile;
f80e83a9 639 cpp_token *token;
338fa5f7 640 cppchar_t terminator;
0578f103 641{
f80e83a9 642 cpp_buffer *buffer = pfile->buffer;
79bd622b 643 cpp_pool *pool = pfile->string_pool;
644 unsigned char *dest, *limit;
338fa5f7 645 cppchar_t c;
646 unsigned int nulls = 0;
647
79bd622b 648 dest = POOL_FRONT (pool);
649 limit = POOL_LIMIT (pool);
650
338fa5f7 651 for (;;)
0578f103 652 {
338fa5f7 653 if (buffer->cur == buffer->rlimit)
654 {
655 c = EOF;
79bd622b 656 unterminated (pfile, terminator);
338fa5f7 657 break;
658 }
659 c = *buffer->cur++;
660
661 have_char:
662 /* Handle trigraphs, escaped newlines etc. */
663 if (c == '?' || c == '\\')
664 c = skip_escaped_newlines (buffer, c);
0578f103 665
79bd622b 666 if (c == terminator && unescaped_terminator_p (pfile, dest))
0578f103 667 {
79bd622b 668 c = EOF;
669 break;
338fa5f7 670 }
671 else if (is_vspace (c))
672 {
673 /* In assembly language, silently terminate string and
674 character literals at end of line. This is a kludge
675 around not knowing where comments are. */
676 if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
677 break;
0578f103 678
338fa5f7 679 /* Character constants and header names may not extend over
680 multiple lines. In Standard C, neither may strings.
681 Unfortunately, we accept multiline strings as an
cc8770bf 682 extension, except in #include family directives. */
683 if (terminator != '"' || pfile->state.angled_headers)
0578f103 684 {
79bd622b 685 unterminated (pfile, terminator);
338fa5f7 686 break;
0578f103 687 }
0578f103 688
79bd622b 689 if (pfile->mlstring_pos.line == 0)
338fa5f7 690 {
79bd622b 691 pfile->mlstring_pos = pfile->lexer_pos;
338fa5f7 692 if (CPP_PEDANTIC (pfile))
693 cpp_pedwarn (pfile, "multi-line string constant");
f80e83a9 694 }
338fa5f7 695
696 handle_newline (buffer, c); /* Stores to read_ahead. */
697 c = '\n';
698 }
699 else if (c == '\0')
700 {
701 if (nulls++ == 0)
702 cpp_warning (pfile, "null character(s) preserved in literal");
0578f103 703 }
0578f103 704
79bd622b 705 /* No terminating null for strings - they could contain nulls. */
706 if (dest >= limit)
707 limit = _cpp_next_chunk (pool, 0, &dest);
708 *dest++ = c;
9fb5b53d 709
338fa5f7 710 /* If we had a new line, the next character is in read_ahead. */
711 if (c != '\n')
712 continue;
713 c = buffer->read_ahead;
714 if (c != EOF)
715 goto have_char;
0578f103 716 }
717
79bd622b 718 /* Remember the next character. */
338fa5f7 719 buffer->read_ahead = c;
0578f103 720
79bd622b 721 token->val.str.text = POOL_FRONT (pool);
722 token->val.str.len = dest - token->val.str.text;
723 POOL_COMMIT (pool, token->val.str.len);
338fa5f7 724}
f80e83a9 725
79bd622b 726/* The stored comment includes the comment start and any terminator. */
2c63d6c8 727static void
338fa5f7 728save_comment (pfile, token, from)
729 cpp_reader *pfile;
f80e83a9 730 cpp_token *token;
731 const unsigned char *from;
2c63d6c8 732{
f80e83a9 733 unsigned char *buffer;
338fa5f7 734 unsigned int len;
338fa5f7 735
f0495c2c 736 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
a543b315 737 /* C++ comments probably (not definitely) have moved past a new
738 line, which we don't want to save in the comment. */
739 if (pfile->buffer->read_ahead != EOF)
740 len--;
79bd622b 741 buffer = _cpp_pool_alloc (pfile->string_pool, len);
f80e83a9 742
f80e83a9 743 token->type = CPP_COMMENT;
76faa4c0 744 token->val.str.len = len;
338fa5f7 745 token->val.str.text = buffer;
0578f103 746
f0495c2c 747 buffer[0] = '/';
748 memcpy (buffer + 1, from, len - 1);
338fa5f7 749}
0578f103 750
f669338a 751/* Subroutine of lex_token to handle '%'. A little tricky, since we
752 want to avoid stepping back when lexing %:%X. */
338fa5f7 753static void
f669338a 754lex_percent (buffer, result)
338fa5f7 755 cpp_buffer *buffer;
756 cpp_token *result;
338fa5f7 757{
f669338a 758 cppchar_t c;
759
760 result->type = CPP_MOD;
761 /* Parsing %:%X could leave an extra character. */
762 if (buffer->extra_char == EOF)
763 c = get_effective_char (buffer);
764 else
765 {
766 c = buffer->read_ahead = buffer->extra_char;
767 buffer->extra_char = EOF;
768 }
769
770 if (c == '=')
771 ACCEPT_CHAR (CPP_MOD_EQ);
772 else if (CPP_OPTION (buffer->pfile, digraphs))
773 {
774 if (c == ':')
775 {
776 result->flags |= DIGRAPH;
777 ACCEPT_CHAR (CPP_HASH);
778 if (get_effective_char (buffer) == '%')
779 {
780 buffer->extra_char = get_effective_char (buffer);
781 if (buffer->extra_char == ':')
782 {
783 buffer->extra_char = EOF;
784 ACCEPT_CHAR (CPP_PASTE);
785 }
786 else
787 /* We'll catch the extra_char when we're called back. */
788 buffer->read_ahead = '%';
789 }
790 }
791 else if (c == '>')
792 {
793 result->flags |= DIGRAPH;
794 ACCEPT_CHAR (CPP_CLOSE_BRACE);
795 }
796 }
797}
798
799/* Subroutine of lex_token to handle '.'. This is tricky, since we
800 want to avoid stepping back when lexing '...' or '.123'. In the
801 latter case we should also set a flag for parse_number. */
802static void
803lex_dot (pfile, result)
804 cpp_reader *pfile;
805 cpp_token *result;
806{
807 cpp_buffer *buffer = pfile->buffer;
808 cppchar_t c;
809
810 /* Parsing ..X could leave an extra character. */
811 if (buffer->extra_char == EOF)
812 c = get_effective_char (buffer);
813 else
814 {
815 c = buffer->read_ahead = buffer->extra_char;
816 buffer->extra_char = EOF;
817 }
338fa5f7 818
f669338a 819 /* All known character sets have 0...9 contiguous. */
820 if (c >= '0' && c <= '9')
821 {
822 result->type = CPP_NUMBER;
79bd622b 823 parse_number (pfile, &result->val.str, c, 1);
f669338a 824 }
f80e83a9 825 else
c4357c92 826 {
f669338a 827 result->type = CPP_DOT;
828 if (c == '.')
829 {
830 buffer->extra_char = get_effective_char (buffer);
831 if (buffer->extra_char == '.')
832 {
833 buffer->extra_char = EOF;
834 ACCEPT_CHAR (CPP_ELLIPSIS);
835 }
836 else
837 /* We'll catch the extra_char when we're called back. */
838 buffer->read_ahead = '.';
839 }
840 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
841 ACCEPT_CHAR (CPP_DOT_STAR);
c4357c92 842 }
0578f103 843}
844
79bd622b 845void
846_cpp_lex_token (pfile, result)
0578f103 847 cpp_reader *pfile;
338fa5f7 848 cpp_token *result;
0578f103 849{
338fa5f7 850 cppchar_t c;
230f0943 851 cpp_buffer *buffer;
338fa5f7 852 const unsigned char *comment_start;
3c7df4d3 853 unsigned char bol = pfile->state.next_bol;
0653b94e 854
230f0943 855 done_directive:
856 buffer = pfile->buffer;
3c7df4d3 857 pfile->state.next_bol = 0;
338fa5f7 858 result->flags = 0;
859 next_char:
79bd622b 860 pfile->lexer_pos.line = buffer->lineno;
338fa5f7 861 next_char2:
79bd622b 862 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
f80e83a9 863
338fa5f7 864 c = buffer->read_ahead;
865 if (c == EOF && buffer->cur < buffer->rlimit)
866 {
867 c = *buffer->cur++;
79bd622b 868 pfile->lexer_pos.col++;
338fa5f7 869 }
0578f103 870
338fa5f7 871 do_switch:
872 buffer->read_ahead = EOF;
873 switch (c)
0578f103 874 {
338fa5f7 875 case EOF:
79bd622b 876 /* Non-empty files should end in a newline. Ignore for command
396ffa86 877 line and _Pragma buffers. */
878 if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
79bd622b 879 cpp_pedwarn (pfile, "no newline at end of file");
3c7df4d3 880 pfile->state.next_bol = 1;
920b5d41 881 pfile->skipping = 0; /* In case missing #endif. */
338fa5f7 882 result->type = CPP_EOF;
def71b06 883 /* Don't do MI optimisation. */
884 return;
0578f103 885
338fa5f7 886 case ' ': case '\t': case '\f': case '\v': case '\0':
887 skip_whitespace (pfile, c);
888 result->flags |= PREV_WHITE;
889 goto next_char2;
890
891 case '\n': case '\r':
e14c5993 892 if (!pfile->state.in_directive)
0578f103 893 {
79bd622b 894 handle_newline (buffer, c);
e14c5993 895 bol = 1;
79bd622b 896 pfile->lexer_pos.output_line = buffer->lineno;
897
e14c5993 898 /* Newlines in arguments are white space (6.10.3.10).
899 Otherwise, clear any white space flag. */
79bd622b 900 if (pfile->state.parsing_args)
e14c5993 901 result->flags |= PREV_WHITE;
902 else
903 result->flags &= ~PREV_WHITE;
904 goto next_char;
0578f103 905 }
79bd622b 906
e14c5993 907 /* Don't let directives spill over to the next line. */
908 buffer->read_ahead = c;
3c7df4d3 909 pfile->state.next_bol = 1;
79bd622b 910 result->type = CPP_EOF;
338fa5f7 911 break;
732cb4c9 912
338fa5f7 913 case '?':
914 case '\\':
915 /* These could start an escaped newline, or '?' a trigraph. Let
916 skip_escaped_newlines do all the work. */
917 {
918 unsigned int lineno = buffer->lineno;
919
920 c = skip_escaped_newlines (buffer, c);
921 if (lineno != buffer->lineno)
922 /* We had at least one escaped newline of some sort, and the
923 next character is in buffer->read_ahead. Update the
924 token's line and column. */
925 goto next_char;
926
927 /* We are either the original '?' or '\\', or a trigraph. */
928 result->type = CPP_QUERY;
929 buffer->read_ahead = EOF;
930 if (c == '\\')
3f90a920 931 goto random_char;
338fa5f7 932 else if (c != '?')
933 goto do_switch;
934 }
935 break;
732cb4c9 936
338fa5f7 937 case '0': case '1': case '2': case '3': case '4':
938 case '5': case '6': case '7': case '8': case '9':
939 result->type = CPP_NUMBER;
79bd622b 940 parse_number (pfile, &result->val.str, c, 0);
338fa5f7 941 break;
732cb4c9 942
338fa5f7 943 case '$':
944 if (!CPP_OPTION (pfile, dollars_in_ident))
945 goto random_char;
946 /* Fall through... */
947
948 case '_':
949 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
950 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
951 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
952 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
953 case 'y': case 'z':
954 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
955 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
956 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
957 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
958 case 'Y': case 'Z':
959 result->type = CPP_NAME;
960 result->val.node = parse_identifier (pfile, c);
961
962 /* 'L' may introduce wide characters or strings. */
79bd622b 963 if (result->val.node == pfile->spec_nodes.n_L)
338fa5f7 964 {
965 c = buffer->read_ahead; /* For make_string. */
966 if (c == '\'' || c == '"')
71aa9da4 967 {
338fa5f7 968 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
969 goto make_string;
71aa9da4 970 }
338fa5f7 971 }
972 /* Convert named operators to their proper types. */
79bd622b 973 else if (result->val.node->flags & NODE_OPERATOR)
338fa5f7 974 {
975 result->flags |= NAMED_OP;
79bd622b 976 result->type = result->val.node->value.operator;
338fa5f7 977 }
978 break;
979
980 case '\'':
981 case '"':
982 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
983 make_string:
984 parse_string (pfile, result, c);
985 break;
f80e83a9 986
338fa5f7 987 case '/':
f0495c2c 988 /* A potential block or line comment. */
989 comment_start = buffer->cur;
338fa5f7 990 result->type = CPP_DIV;
991 c = get_effective_char (buffer);
992 if (c == '=')
993 ACCEPT_CHAR (CPP_DIV_EQ);
f0495c2c 994 if (c != '/' && c != '*')
995 break;
0578f103 996
f0495c2c 997 if (c == '*')
998 {
338fa5f7 999 if (skip_block_comment (pfile))
79bd622b 1000 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1001 pfile->lexer_pos.col,
338fa5f7 1002 "unterminated comment");
338fa5f7 1003 }
f0495c2c 1004 else
338fa5f7 1005 {
f0495c2c 1006 if (!CPP_OPTION (pfile, cplusplus_comments)
1007 && !CPP_IN_SYSTEM_HEADER (pfile))
1008 break;
1009
338fa5f7 1010 /* We silently allow C++ comments in system headers,
1011 irrespective of conformance mode, because lots of
1012 broken systems do that and trying to clean it up in
1013 fixincludes is a nightmare. */
66914e49 1014 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1015 && ! buffer->warned_cplusplus_comments)
f80e83a9 1016 {
f0495c2c 1017 cpp_pedwarn (pfile,
1018 "C++ style comments are not allowed in ISO C89");
1019 cpp_pedwarn (pfile,
1020 "(this will be reported only once per input file)");
1021 buffer->warned_cplusplus_comments = 1;
1022 }
338fa5f7 1023
66914e49 1024 /* Skip_line_comment updates buffer->read_ahead. */
f0495c2c 1025 if (skip_line_comment (pfile))
79bd622b 1026 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1027 pfile->lexer_pos.col,
f0495c2c 1028 "multi-line comment");
1029 }
338fa5f7 1030
f0495c2c 1031 /* Skipping the comment has updated buffer->read_ahead. */
1032 if (!pfile->state.save_comments)
1033 {
1034 result->flags |= PREV_WHITE;
1035 goto next_char;
338fa5f7 1036 }
f0495c2c 1037
1038 /* Save the comment as a token in its own right. */
1039 save_comment (pfile, result, comment_start);
def71b06 1040 /* Don't do MI optimisation. */
1041 return;
338fa5f7 1042
1043 case '<':
1044 if (pfile->state.angled_headers)
1045 {
1046 result->type = CPP_HEADER_NAME;
1047 c = '>'; /* terminator. */
1048 goto make_string;
1049 }
0578f103 1050
338fa5f7 1051 result->type = CPP_LESS;
1052 c = get_effective_char (buffer);
1053 if (c == '=')
1054 ACCEPT_CHAR (CPP_LESS_EQ);
1055 else if (c == '<')
1056 {
1057 ACCEPT_CHAR (CPP_LSHIFT);
1058 if (get_effective_char (buffer) == '=')
1059 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1060 }
1061 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1062 {
1063 ACCEPT_CHAR (CPP_MIN);
1064 if (get_effective_char (buffer) == '=')
1065 ACCEPT_CHAR (CPP_MIN_EQ);
1066 }
1067 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1068 {
1069 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1070 result->flags |= DIGRAPH;
1071 }
1072 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1073 {
1074 ACCEPT_CHAR (CPP_OPEN_BRACE);
1075 result->flags |= DIGRAPH;
1076 }
1077 break;
1078
1079 case '>':
1080 result->type = CPP_GREATER;
1081 c = get_effective_char (buffer);
1082 if (c == '=')
1083 ACCEPT_CHAR (CPP_GREATER_EQ);
1084 else if (c == '>')
1085 {
1086 ACCEPT_CHAR (CPP_RSHIFT);
1087 if (get_effective_char (buffer) == '=')
1088 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1089 }
1090 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1091 {
1092 ACCEPT_CHAR (CPP_MAX);
1093 if (get_effective_char (buffer) == '=')
1094 ACCEPT_CHAR (CPP_MAX_EQ);
1095 }
1096 break;
1097
f669338a 1098 case '%':
1099 lex_percent (buffer, result);
79bd622b 1100 if (result->type == CPP_HASH)
1101 goto do_hash;
338fa5f7 1102 break;
1103
f669338a 1104 case '.':
1105 lex_dot (pfile, result);
338fa5f7 1106 break;
0578f103 1107
338fa5f7 1108 case '+':
1109 result->type = CPP_PLUS;
1110 c = get_effective_char (buffer);
1111 if (c == '=')
1112 ACCEPT_CHAR (CPP_PLUS_EQ);
1113 else if (c == '+')
1114 ACCEPT_CHAR (CPP_PLUS_PLUS);
1115 break;
ac0749c7 1116
338fa5f7 1117 case '-':
1118 result->type = CPP_MINUS;
1119 c = get_effective_char (buffer);
1120 if (c == '>')
1121 {
1122 ACCEPT_CHAR (CPP_DEREF);
1123 if (CPP_OPTION (pfile, cplusplus)
1124 && get_effective_char (buffer) == '*')
1125 ACCEPT_CHAR (CPP_DEREF_STAR);
1126 }
1127 else if (c == '=')
1128 ACCEPT_CHAR (CPP_MINUS_EQ);
1129 else if (c == '-')
1130 ACCEPT_CHAR (CPP_MINUS_MINUS);
1131 break;
0578f103 1132
338fa5f7 1133 case '*':
1134 result->type = CPP_MULT;
1135 if (get_effective_char (buffer) == '=')
1136 ACCEPT_CHAR (CPP_MULT_EQ);
1137 break;
ac0749c7 1138
338fa5f7 1139 case '=':
1140 result->type = CPP_EQ;
1141 if (get_effective_char (buffer) == '=')
1142 ACCEPT_CHAR (CPP_EQ_EQ);
1143 break;
c4abf88d 1144
338fa5f7 1145 case '!':
1146 result->type = CPP_NOT;
1147 if (get_effective_char (buffer) == '=')
1148 ACCEPT_CHAR (CPP_NOT_EQ);
1149 break;
0578f103 1150
338fa5f7 1151 case '&':
1152 result->type = CPP_AND;
1153 c = get_effective_char (buffer);
1154 if (c == '=')
1155 ACCEPT_CHAR (CPP_AND_EQ);
1156 else if (c == '&')
1157 ACCEPT_CHAR (CPP_AND_AND);
1158 break;
1159
1160 case '#':
230f0943 1161 c = buffer->extra_char; /* Can be set by error condition below. */
1162 if (c != EOF)
1163 {
1164 buffer->read_ahead = c;
1165 buffer->extra_char = EOF;
1166 }
1167 else
1168 c = get_effective_char (buffer);
1169
1170 if (c == '#')
79bd622b 1171 {
e14c5993 1172 ACCEPT_CHAR (CPP_PASTE);
1173 break;
1174 }
1175
1176 result->type = CPP_HASH;
1177 do_hash:
1178 if (bol)
1179 {
1180 if (pfile->state.parsing_args)
79bd622b 1181 {
230f0943 1182 /* 6.10.3 paragraph 11: If there are sequences of
1183 preprocessing tokens within the list of arguments that
1184 would otherwise act as preprocessing directives, the
1185 behavior is undefined.
1186
1187 This implementation will report a hard error, terminate
1188 the macro invocation, and proceed to process the
1189 directive. */
1190 cpp_error (pfile,
1191 "directives may not be used inside a macro argument");
1192
1193 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1194 buffer->extra_char = buffer->read_ahead;
1195 buffer->read_ahead = '#';
3c7df4d3 1196 pfile->state.next_bol = 1;
230f0943 1197 result->type = CPP_EOF;
1198
79bd622b 1199 /* Get whitespace right - newline_in_args sets it. */
1200 if (pfile->lexer_pos.col == 1)
1201 result->flags &= ~PREV_WHITE;
1202 }
e14c5993 1203 else
230f0943 1204 {
1205 /* This is the hash introducing a directive. */
1206 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
e14c5993 1207 goto done_directive; /* bol still 1. */
230f0943 1208 /* This is in fact an assembler #. */
1209 }
79bd622b 1210 }
338fa5f7 1211 break;
0578f103 1212
338fa5f7 1213 case '|':
1214 result->type = CPP_OR;
1215 c = get_effective_char (buffer);
1216 if (c == '=')
1217 ACCEPT_CHAR (CPP_OR_EQ);
1218 else if (c == '|')
1219 ACCEPT_CHAR (CPP_OR_OR);
1220 break;
0578f103 1221
338fa5f7 1222 case '^':
1223 result->type = CPP_XOR;
1224 if (get_effective_char (buffer) == '=')
1225 ACCEPT_CHAR (CPP_XOR_EQ);
1226 break;
0578f103 1227
338fa5f7 1228 case ':':
1229 result->type = CPP_COLON;
1230 c = get_effective_char (buffer);
1231 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1232 ACCEPT_CHAR (CPP_SCOPE);
1233 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1234 {
1235 result->flags |= DIGRAPH;
1236 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1237 }
1238 break;
0578f103 1239
338fa5f7 1240 case '~': result->type = CPP_COMPL; break;
1241 case ',': result->type = CPP_COMMA; break;
1242 case '(': result->type = CPP_OPEN_PAREN; break;
1243 case ')': result->type = CPP_CLOSE_PAREN; break;
1244 case '[': result->type = CPP_OPEN_SQUARE; break;
1245 case ']': result->type = CPP_CLOSE_SQUARE; break;
1246 case '{': result->type = CPP_OPEN_BRACE; break;
1247 case '}': result->type = CPP_CLOSE_BRACE; break;
1248 case ';': result->type = CPP_SEMICOLON; break;
1249
1250 case '@':
1251 if (CPP_OPTION (pfile, objc))
1252 {
1253 /* In Objective C, '@' may begin keywords or strings, like
1254 @keyword or @"string". It would be nice to call
1255 get_effective_char here and test the result. However, we
1256 would then need to pass 2 characters to parse_identifier,
1257 making it ugly and slowing down its main loop. Instead,
1258 we assume we have an identifier, and recover if not. */
1259 result->type = CPP_NAME;
1260 result->val.node = parse_identifier (pfile, c);
1261 if (result->val.node->length != 1)
1262 break;
ac0749c7 1263
338fa5f7 1264 /* OK, so it wasn't an identifier. Maybe a string? */
1265 if (buffer->read_ahead == '"')
f80e83a9 1266 {
338fa5f7 1267 c = '"';
1268 ACCEPT_CHAR (CPP_OSTRING);
1269 goto make_string;
f80e83a9 1270 }
338fa5f7 1271 }
1272 goto random_char;
1273
1274 random_char:
1275 default:
1276 result->type = CPP_OTHER;
33344a1c 1277 result->val.c = c;
338fa5f7 1278 break;
1279 }
920b5d41 1280
def71b06 1281 /* If not in a directive, this token invalidates controlling macros. */
1282 if (!pfile->state.in_directive)
920b5d41 1283 pfile->mi_state = MI_FAILED;
338fa5f7 1284}
1285
79bd622b 1286/* An upper bound on the number of bytes needed to spell a token,
1287 including preceding whitespace. */
1288unsigned int
1289cpp_token_len (token)
1290 const cpp_token *token;
338fa5f7 1291{
79bd622b 1292 unsigned int len;
cfad5579 1293
79bd622b 1294 switch (TOKEN_SPELL (token))
f80e83a9 1295 {
79bd622b 1296 default: len = 0; break;
1297 case SPELL_STRING: len = token->val.str.len; break;
1298 case SPELL_IDENT: len = token->val.node->length; break;
f80e83a9 1299 }
79bd622b 1300 /* 1 for whitespace, 4 for comment delimeters. */
1301 return len + 5;
cfad5579 1302}
1303
f80e83a9 1304/* Write the spelling of a token TOKEN to BUFFER. The buffer must
c5ea33a8 1305 already contain the enough space to hold the token's spelling.
1306 Returns a pointer to the character after the last character
1307 written. */
79bd622b 1308unsigned char *
1309cpp_spell_token (pfile, token, buffer)
f80e83a9 1310 cpp_reader *pfile; /* Would be nice to be rid of this... */
1311 const cpp_token *token;
1312 unsigned char *buffer;
1313{
7e842f95 1314 switch (TOKEN_SPELL (token))
f80e83a9 1315 {
1316 case SPELL_OPERATOR:
1317 {
1318 const unsigned char *spelling;
1319 unsigned char c;
ab12a39c 1320
f80e83a9 1321 if (token->flags & DIGRAPH)
79bd622b 1322 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
31674461 1323 else if (token->flags & NAMED_OP)
1324 goto spell_ident;
f80e83a9 1325 else
7e842f95 1326 spelling = TOKEN_NAME (token);
f80e83a9 1327
1328 while ((c = *spelling++) != '\0')
1329 *buffer++ = c;
1330 }
1331 break;
ab12a39c 1332
f80e83a9 1333 case SPELL_IDENT:
31674461 1334 spell_ident:
76faa4c0 1335 memcpy (buffer, token->val.node->name, token->val.node->length);
1336 buffer += token->val.node->length;
f80e83a9 1337 break;
ab12a39c 1338
f80e83a9 1339 case SPELL_STRING:
1340 {
71aa9da4 1341 int left, right, tag;
1342 switch (token->type)
1343 {
1344 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1345 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1346 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1347 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1348 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1349 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1350 default: left = '\0'; right = '\0'; tag = '\0'; break;
1351 }
1352 if (tag) *buffer++ = tag;
1353 if (left) *buffer++ = left;
76faa4c0 1354 memcpy (buffer, token->val.str.text, token->val.str.len);
1355 buffer += token->val.str.len;
71aa9da4 1356 if (right) *buffer++ = right;
f80e83a9 1357 }
1358 break;
ab12a39c 1359
f80e83a9 1360 case SPELL_CHAR:
33344a1c 1361 *buffer++ = token->val.c;
f80e83a9 1362 break;
ab12a39c 1363
f80e83a9 1364 case SPELL_NONE:
7e842f95 1365 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
f80e83a9 1366 break;
1367 }
ab12a39c 1368
f80e83a9 1369 return buffer;
1370}
ab12a39c 1371
79bd622b 1372/* Returns a token as a null-terminated string. The string is
1373 temporary, and automatically freed later. Useful for diagnostics. */
1374unsigned char *
1375cpp_token_as_text (pfile, token)
6060326b 1376 cpp_reader *pfile;
f80e83a9 1377 const cpp_token *token;
6060326b 1378{
79bd622b 1379 unsigned int len = cpp_token_len (token);
1380 unsigned char *start = _cpp_pool_alloc (&pfile->temp_string_pool, len), *end;
6060326b 1381
79bd622b 1382 end = cpp_spell_token (pfile, token, start);
1383 end[0] = '\0';
6060326b 1384
79bd622b 1385 return start;
1386}
6060326b 1387
79bd622b 1388/* Used by C front ends. Should really move to using cpp_token_as_text. */
1389const char *
1390cpp_type2name (type)
1391 enum cpp_ttype type;
1392{
1393 return (const char *) token_spellings[type].name;
1394}
6060326b 1395
79bd622b 1396/* Writes the spelling of token to FP. Separate from cpp_spell_token
1397 for efficiency - to avoid double-buffering. Also, outputs a space
1398 if PREV_WHITE is flagged. */
1399void
1400cpp_output_token (token, fp)
1401 const cpp_token *token;
1402 FILE *fp;
1403{
1404 if (token->flags & PREV_WHITE)
1405 putc (' ', fp);
63e1abce 1406
79bd622b 1407 switch (TOKEN_SPELL (token))
6060326b 1408 {
79bd622b 1409 case SPELL_OPERATOR:
1410 {
1411 const unsigned char *spelling;
6060326b 1412
79bd622b 1413 if (token->flags & DIGRAPH)
1414 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1415 else if (token->flags & NAMED_OP)
1416 goto spell_ident;
1417 else
1418 spelling = TOKEN_NAME (token);
f80e83a9 1419
79bd622b 1420 ufputs (spelling, fp);
1421 }
1422 break;
f80e83a9 1423
79bd622b 1424 spell_ident:
1425 case SPELL_IDENT:
1426 ufputs (token->val.node->name, fp);
1427 break;
f80e83a9 1428
79bd622b 1429 case SPELL_STRING:
1430 {
1431 int left, right, tag;
1432 switch (token->type)
1433 {
1434 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1435 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1436 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1437 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1438 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1439 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1440 default: left = '\0'; right = '\0'; tag = '\0'; break;
1441 }
1442 if (tag) putc (tag, fp);
1443 if (left) putc (left, fp);
1444 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1445 if (right) putc (right, fp);
1446 }
1447 break;
6060326b 1448
79bd622b 1449 case SPELL_CHAR:
33344a1c 1450 putc (token->val.c, fp);
79bd622b 1451 break;
6060326b 1452
79bd622b 1453 case SPELL_NONE:
1454 /* An error, most probably. */
1455 break;
f80e83a9 1456 }
6060326b 1457}
1458
79bd622b 1459/* Compare two tokens. */
1460int
1461_cpp_equiv_tokens (a, b)
1462 const cpp_token *a, *b;
6060326b 1463{
79bd622b 1464 if (a->type == b->type && a->flags == b->flags)
1465 switch (TOKEN_SPELL (a))
1466 {
1467 default: /* Keep compiler happy. */
1468 case SPELL_OPERATOR:
1469 return 1;
1470 case SPELL_CHAR:
33344a1c 1471 return a->val.c == b->val.c; /* Character. */
79bd622b 1472 case SPELL_NONE:
588d632b 1473 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
79bd622b 1474 case SPELL_IDENT:
1475 return a->val.node == b->val.node;
1476 case SPELL_STRING:
1477 return (a->val.str.len == b->val.str.len
1478 && !memcmp (a->val.str.text, b->val.str.text,
1479 a->val.str.len));
1480 }
6060326b 1481
f80e83a9 1482 return 0;
1483}
1484
79bd622b 1485#if 0
1486/* Compare two token lists. */
1487int
1488_cpp_equiv_toklists (a, b)
1489 const struct toklist *a, *b;
f80e83a9 1490{
79bd622b 1491 unsigned int i, count;
6060326b 1492
79bd622b 1493 count = a->limit - a->first;
1494 if (count != (b->limit - b->first))
1495 return 0;
6060326b 1496
79bd622b 1497 for (i = 0; i < count; i++)
1498 if (! _cpp_equiv_tokens (&a->first[i], &b->first[i]))
1499 return 0;
6060326b 1500
79bd622b 1501 return 1;
f80e83a9 1502}
79bd622b 1503#endif
6060326b 1504
f80e83a9 1505/* Determine whether two tokens can be pasted together, and if so,
1506 what the resulting token is. Returns CPP_EOF if the tokens cannot
1507 be pasted, or the appropriate type for the merged token if they
1508 can. */
524f0c40 1509enum cpp_ttype
79bd622b 1510cpp_can_paste (pfile, token1, token2, digraph)
f80e83a9 1511 cpp_reader * pfile;
1512 const cpp_token *token1, *token2;
1513 int* digraph;
6060326b 1514{
f80e83a9 1515 enum cpp_ttype a = token1->type, b = token2->type;
1516 int cxx = CPP_OPTION (pfile, cplusplus);
6060326b 1517
31674461 1518 /* Treat named operators as if they were ordinary NAMEs. */
1519 if (token1->flags & NAMED_OP)
1520 a = CPP_NAME;
1521 if (token2->flags & NAMED_OP)
1522 b = CPP_NAME;
1523
f80e83a9 1524 if (a <= CPP_LAST_EQ && b == CPP_EQ)
1525 return a + (CPP_EQ_EQ - CPP_EQ);
6060326b 1526
f80e83a9 1527 switch (a)
6060326b 1528 {
f80e83a9 1529 case CPP_GREATER:
1530 if (b == a) return CPP_RSHIFT;
1531 if (b == CPP_QUERY && cxx) return CPP_MAX;
1532 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1533 break;
1534 case CPP_LESS:
1535 if (b == a) return CPP_LSHIFT;
1536 if (b == CPP_QUERY && cxx) return CPP_MIN;
1537 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
27fdc0b6 1538 if (CPP_OPTION (pfile, digraphs))
1539 {
1540 if (b == CPP_COLON)
1541 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1542 if (b == CPP_MOD)
1543 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1544 }
f80e83a9 1545 break;
6060326b 1546
f80e83a9 1547 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1548 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1549 case CPP_OR: if (b == a) return CPP_OR_OR; break;
6060326b 1550
f80e83a9 1551 case CPP_MINUS:
1552 if (b == a) return CPP_MINUS_MINUS;
1553 if (b == CPP_GREATER) return CPP_DEREF;
1554 break;
1555 case CPP_COLON:
1556 if (b == a && cxx) return CPP_SCOPE;
27fdc0b6 1557 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
f80e83a9 1558 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1559 break;
1560
1561 case CPP_MOD:
27fdc0b6 1562 if (CPP_OPTION (pfile, digraphs))
1563 {
1564 if (b == CPP_GREATER)
1565 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1566 if (b == CPP_COLON)
1567 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1568 }
f80e83a9 1569 break;
1570 case CPP_DEREF:
1571 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1572 break;
1573 case CPP_DOT:
1574 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1575 if (b == CPP_NUMBER) return CPP_NUMBER;
1576 break;
1577
1578 case CPP_HASH:
1579 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1580 /* %:%: digraph */
1581 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1582 break;
1583
1584 case CPP_NAME:
1585 if (b == CPP_NAME) return CPP_NAME;
1586 if (b == CPP_NUMBER
79bd622b 1587 && name_p (pfile, &token2->val.str)) return CPP_NAME;
f80e83a9 1588 if (b == CPP_CHAR
79bd622b 1589 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
f80e83a9 1590 if (b == CPP_STRING
79bd622b 1591 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
f80e83a9 1592 break;
1593
1594 case CPP_NUMBER:
1595 if (b == CPP_NUMBER) return CPP_NUMBER;
1596 if (b == CPP_NAME) return CPP_NUMBER;
1597 if (b == CPP_DOT) return CPP_NUMBER;
1598 /* Numbers cannot have length zero, so this is safe. */
1599 if ((b == CPP_PLUS || b == CPP_MINUS)
76faa4c0 1600 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
f80e83a9 1601 return CPP_NUMBER;
1602 break;
1603
71aa9da4 1604 case CPP_OTHER:
33344a1c 1605 if (CPP_OPTION (pfile, objc) && token1->val.c == '@')
71aa9da4 1606 {
1607 if (b == CPP_NAME) return CPP_NAME;
1608 if (b == CPP_STRING) return CPP_OSTRING;
1609 }
1610
f80e83a9 1611 default:
1612 break;
6060326b 1613 }
1614
f80e83a9 1615 return CPP_EOF;
1616}
1617
79bd622b 1618/* Returns nonzero if a space should be inserted to avoid an
1619 accidental token paste for output. For simplicity, it is
1620 conservative, and occasionally advises a space where one is not
1621 needed, e.g. "." and ".2". */
f80e83a9 1622
79bd622b 1623int
1624cpp_avoid_paste (pfile, token1, token2)
6060326b 1625 cpp_reader *pfile;
79bd622b 1626 const cpp_token *token1, *token2;
6060326b 1627{
79bd622b 1628 enum cpp_ttype a = token1->type, b = token2->type;
1629 cppchar_t c;
6060326b 1630
79bd622b 1631 if (token1->flags & NAMED_OP)
1632 a = CPP_NAME;
1633 if (token2->flags & NAMED_OP)
1634 b = CPP_NAME;
6060326b 1635
79bd622b 1636 c = EOF;
1637 if (token2->flags & DIGRAPH)
1638 c = digraph_spellings[b - CPP_FIRST_DIGRAPH][0];
1639 else if (token_spellings[b].category == SPELL_OPERATOR)
1640 c = token_spellings[b].name[0];
6060326b 1641
79bd622b 1642 /* Quickly get everything that can paste with an '='. */
1643 if (a <= CPP_LAST_EQ && c == '=')
1644 return 1;
6060326b 1645
79bd622b 1646 switch (a)
6060326b 1647 {
79bd622b 1648 case CPP_GREATER: return c == '>' || c == '?';
1649 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1650 case CPP_PLUS: return c == '+';
1651 case CPP_MINUS: return c == '-' || c == '>';
1652 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1653 case CPP_MOD: return c == ':' || c == '>';
1654 case CPP_AND: return c == '&';
1655 case CPP_OR: return c == '|';
1656 case CPP_COLON: return c == ':' || c == '>';
1657 case CPP_DEREF: return c == '*';
1658 case CPP_DOT: return c == '.' || c == '%';
1659 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1660 case CPP_NAME: return ((b == CPP_NUMBER
1661 && name_p (pfile, &token2->val.str))
1662 || b == CPP_NAME
1663 || b == CPP_CHAR || b == CPP_STRING); /* L */
1664 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1665 || c == '.' || c == '+' || c == '-');
1666 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
33344a1c 1667 && token1->val.c == '@'
79bd622b 1668 && (b == CPP_NAME || b == CPP_STRING));
1669 default: break;
6060326b 1670 }
6060326b 1671
deb356cf 1672 return 0;
6060326b 1673}
1674
79bd622b 1675/* Output all the remaining tokens on the current line, and a newline
1676 character, to FP. Leading whitespace is removed. */
6060326b 1677void
79bd622b 1678cpp_output_line (pfile, fp)
6060326b 1679 cpp_reader *pfile;
79bd622b 1680 FILE *fp;
6060326b 1681{
79bd622b 1682 cpp_token token;
7e842f95 1683
343fd982 1684 cpp_get_token (pfile, &token);
79bd622b 1685 token.flags &= ~PREV_WHITE;
1686 while (token.type != CPP_EOF)
7e842f95 1687 {
79bd622b 1688 cpp_output_token (&token, fp);
343fd982 1689 cpp_get_token (pfile, &token);
7e842f95 1690 }
1691
79bd622b 1692 putc ('\n', fp);
f80e83a9 1693}
6060326b 1694
79bd622b 1695/* Memory pools. */
deb356cf 1696
79bd622b 1697struct dummy
deb356cf 1698{
79bd622b 1699 char c;
1700 union
1701 {
1702 double d;
1703 int *p;
1704 } u;
1705};
deb356cf 1706
79bd622b 1707#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
deb356cf 1708
79bd622b 1709static int
1710chunk_suitable (pool, chunk, size)
1711 cpp_pool *pool;
1712 cpp_chunk *chunk;
1713 unsigned int size;
1714{
1715 /* Being at least twice SIZE means we can use memcpy in
1716 _cpp_next_chunk rather than memmove. Besides, it's a good idea
1717 anyway. */
1718 return (chunk && pool->locked != chunk
1719 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
f80e83a9 1720}
6060326b 1721
79bd622b 1722/* Returns the end of the new pool. PTR points to a char in the old
1723 pool, and is updated to point to the same char in the new pool. */
1724unsigned char *
1725_cpp_next_chunk (pool, len, ptr)
1726 cpp_pool *pool;
1727 unsigned int len;
1728 unsigned char **ptr;
f80e83a9 1729{
79bd622b 1730 cpp_chunk *chunk = pool->cur->next;
6060326b 1731
79bd622b 1732 /* LEN is the minimum size we want in the new pool. */
1733 len += POOL_ROOM (pool);
1734 if (! chunk_suitable (pool, chunk, len))
f80e83a9 1735 {
79bd622b 1736 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
6060326b 1737
79bd622b 1738 chunk->next = pool->cur->next;
1739 pool->cur->next = chunk;
6060326b 1740 }
1741
79bd622b 1742 /* Update the pointer before changing chunk's front. */
1743 if (ptr)
1744 *ptr += chunk->base - POOL_FRONT (pool);
f80e83a9 1745
79bd622b 1746 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1747 chunk->front = chunk->base;
f80e83a9 1748
79bd622b 1749 pool->cur = chunk;
1750 return POOL_LIMIT (pool);
6060326b 1751}
1752
79bd622b 1753static cpp_chunk *
1754new_chunk (size)
1755 unsigned int size;
f80e83a9 1756{
79bd622b 1757 unsigned char *base;
1758 cpp_chunk *result;
89b05ef6 1759
79bd622b 1760 size = ALIGN (size, DEFAULT_ALIGNMENT);
1761 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1762 /* Put the chunk descriptor at the end. Then chunk overruns will
1763 cause obvious chaos. */
1764 result = (cpp_chunk *) (base + size);
1765 result->base = base;
1766 result->front = base;
1767 result->limit = base + size;
1768 result->next = 0;
deb356cf 1769
79bd622b 1770 return result;
f80e83a9 1771}
1772
79bd622b 1773void
1774_cpp_init_pool (pool, size, align, temp)
1775 cpp_pool *pool;
1776 unsigned int size, align, temp;
1777{
1778 if (align == 0)
1779 align = DEFAULT_ALIGNMENT;
1780 if (align & (align - 1))
1781 abort ();
1782 pool->align = align;
1783 pool->cur = new_chunk (size);
1784 pool->locked = 0;
1785 pool->locks = 0;
1786 if (temp)
1787 pool->cur->next = pool->cur;
f80e83a9 1788}
1789
79bd622b 1790void
1791_cpp_lock_pool (pool)
1792 cpp_pool *pool;
f80e83a9 1793{
79bd622b 1794 if (pool->locks++ == 0)
1795 pool->locked = pool->cur;
f80e83a9 1796}
1797
79bd622b 1798void
1799_cpp_unlock_pool (pool)
1800 cpp_pool *pool;
f80e83a9 1801{
79bd622b 1802 if (--pool->locks == 0)
1803 pool->locked = 0;
f80e83a9 1804}
1805
79bd622b 1806void
1807_cpp_free_pool (pool)
1808 cpp_pool *pool;
89b05ef6 1809{
79bd622b 1810 cpp_chunk *chunk = pool->cur, *next;
89b05ef6 1811
79bd622b 1812 do
89b05ef6 1813 {
79bd622b 1814 next = chunk->next;
1815 free (chunk->base);
1816 chunk = next;
89b05ef6 1817 }
79bd622b 1818 while (chunk && chunk != pool->cur);
f80e83a9 1819}
f80e83a9 1820
79bd622b 1821/* Reserve LEN bytes from a memory pool. */
1822unsigned char *
1823_cpp_pool_reserve (pool, len)
1824 cpp_pool *pool;
1825 unsigned int len;
f80e83a9 1826{
79bd622b 1827 len = ALIGN (len, pool->align);
1828 if (len > (unsigned int) POOL_ROOM (pool))
1829 _cpp_next_chunk (pool, len, 0);
f80e83a9 1830
79bd622b 1831 return POOL_FRONT (pool);
6060326b 1832}
1833
79bd622b 1834/* Allocate LEN bytes from a memory pool. */
1835unsigned char *
1836_cpp_pool_alloc (pool, len)
1837 cpp_pool *pool;
1838 unsigned int len;
f80e83a9 1839{
79bd622b 1840 unsigned char *result = _cpp_pool_reserve (pool, len);
deb356cf 1841
79bd622b 1842 POOL_COMMIT (pool, len);
1843 return result;
f80e83a9 1844}