]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
c-lex.c (c_lex): Replace tok.val.aux with tok.val.c or tok.val.arg_no as appropriate.
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
93c80368
NB
23/* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
27
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
041c3194 36
45b966db
ZW
37#include "config.h"
38#include "system.h"
45b966db
ZW
39#include "cpplib.h"
40#include "cpphash.h"
041c3194 41#include "symcat.h"
45b966db 42
93c80368
NB
43/* Tokens with SPELL_STRING store their spelling in the token list,
44 and it's length in the token->val.name.len. */
45enum spell_type
f9a0e96c 46{
93c80368
NB
47 SPELL_OPERATOR = 0,
48 SPELL_CHAR,
49 SPELL_IDENT,
50 SPELL_STRING,
51 SPELL_NONE
f9a0e96c
ZW
52};
53
93c80368 54struct token_spelling
f9a0e96c 55{
93c80368
NB
56 enum spell_type category;
57 const unsigned char *name;
f9a0e96c
ZW
58};
59
93c80368
NB
60const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
61 U":>", U"<%", U"%>"};
62
63#define OP(e, s) { SPELL_OPERATOR, U s },
64#define TK(e, s) { s, U STRINGX (e) },
65const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
66#undef OP
67#undef TK
68
69#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
70#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 71
0d9f234d
NB
72static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
73static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
74static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
75
041c3194 76static int skip_block_comment PARAMS ((cpp_reader *));
cbcff6df 77static int skip_line_comment PARAMS ((cpp_reader *));
0d9f234d
NB
78static void adjust_column PARAMS ((cpp_reader *));
79static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
80static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
93c80368
NB
81static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
82static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
0d9f234d 83static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
93c80368 84static void unterminated PARAMS ((cpp_reader *, int));
0d9f234d
NB
85static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
86static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
cbcff6df
NB
87static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
88static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
93c80368 89static int name_p PARAMS ((cpp_reader *, const cpp_string *));
f617b8e2 90
93c80368
NB
91static cpp_chunk *new_chunk PARAMS ((unsigned int));
92static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
15dad1d9 93
041c3194 94/* Utility routine:
9e62c811 95
bfb9dc7f
ZW
96 Compares, the token TOKEN to the NUL-terminated string STRING.
97 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 98
041c3194 99int
bfb9dc7f
ZW
100cpp_ideq (token, string)
101 const cpp_token *token;
041c3194
ZW
102 const char *string;
103{
bfb9dc7f 104 if (token->type != CPP_NAME)
041c3194 105 return 0;
bfb9dc7f 106
93c80368 107 return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
15dad1d9 108}
1368ee70 109
0d9f234d
NB
110/* Call when meeting a newline. Returns the character after the newline
111 (or carriage-return newline combination), or EOF. */
112static cppchar_t
113handle_newline (buffer, newline_char)
114 cpp_buffer *buffer;
115 cppchar_t newline_char;
116{
117 cppchar_t next = EOF;
118
119 buffer->col_adjust = 0;
120 buffer->lineno++;
121 buffer->line_base = buffer->cur;
122
123 /* Handle CR-LF and LF-CR combinations, get the next character. */
124 if (buffer->cur < buffer->rlimit)
125 {
126 next = *buffer->cur++;
127 if (next + newline_char == '\r' + '\n')
128 {
129 buffer->line_base = buffer->cur;
130 if (buffer->cur < buffer->rlimit)
131 next = *buffer->cur++;
132 else
133 next = EOF;
134 }
135 }
136
137 buffer->read_ahead = next;
138 return next;
139}
140
141/* Subroutine of skip_escaped_newlines; called when a trigraph is
142 encountered. It warns if necessary, and returns true if the
143 trigraph should be honoured. FROM_CHAR is the third character of a
144 trigraph, and presumed to be the previous character for position
145 reporting. */
45b966db 146static int
0d9f234d 147trigraph_ok (pfile, from_char)
45b966db 148 cpp_reader *pfile;
0d9f234d 149 cppchar_t from_char;
45b966db 150{
041c3194
ZW
151 int accept = CPP_OPTION (pfile, trigraphs);
152
cbcff6df
NB
153 /* Don't warn about trigraphs in comments. */
154 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
45b966db 155 {
0d9f234d 156 cpp_buffer *buffer = pfile->buffer;
041c3194 157 if (accept)
0d9f234d 158 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
041c3194 159 "trigraph ??%c converted to %c",
0d9f234d
NB
160 (int) from_char,
161 (int) _cpp_trigraph_map[from_char]);
45b966db 162 else
0d9f234d
NB
163 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
164 "trigraph ??%c ignored", (int) from_char);
45b966db 165 }
0d9f234d 166
041c3194 167 return accept;
45b966db
ZW
168}
169
0d9f234d
NB
170/* Assumes local variables buffer and result. */
171#define ACCEPT_CHAR(t) \
172 do { result->type = t; buffer->read_ahead = EOF; } while (0)
173
174/* When we move to multibyte character sets, add to these something
175 that saves and restores the state of the multibyte conversion
176 library. This probably involves saving and restoring a "cookie".
177 In the case of glibc it is an 8-byte structure, so is not a high
178 overhead operation. In any case, it's out of the fast path. */
179#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
180#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
181
182/* Skips any escaped newlines introduced by NEXT, which is either a
183 '?' or a '\\'. Returns the next character, which will also have
a5c3cccd
NB
184 been placed in buffer->read_ahead. This routine performs
185 preprocessing stages 1 and 2 of the ISO C standard. */
0d9f234d
NB
186static cppchar_t
187skip_escaped_newlines (buffer, next)
188 cpp_buffer *buffer;
189 cppchar_t next;
45b966db 190{
a5c3cccd
NB
191 /* Only do this if we apply stages 1 and 2. */
192 if (!buffer->from_stage3)
041c3194 193 {
a5c3cccd
NB
194 cppchar_t next1;
195 const unsigned char *saved_cur;
196 int space;
197
198 do
0d9f234d 199 {
a5c3cccd
NB
200 if (buffer->cur == buffer->rlimit)
201 break;
202
203 SAVE_STATE ();
204 if (next == '?')
0d9f234d 205 {
a5c3cccd
NB
206 next1 = *buffer->cur++;
207 if (next1 != '?' || buffer->cur == buffer->rlimit)
208 {
209 RESTORE_STATE ();
210 break;
211 }
212
213 next1 = *buffer->cur++;
214 if (!_cpp_trigraph_map[next1]
215 || !trigraph_ok (buffer->pfile, next1))
216 {
217 RESTORE_STATE ();
218 break;
219 }
220
221 /* We have a full trigraph here. */
222 next = _cpp_trigraph_map[next1];
223 if (next != '\\' || buffer->cur == buffer->rlimit)
224 break;
225 SAVE_STATE ();
226 }
227
228 /* We have a backslash, and room for at least one more character. */
229 space = 0;
230 do
231 {
232 next1 = *buffer->cur++;
233 if (!is_nvspace (next1))
234 break;
235 space = 1;
0d9f234d 236 }
a5c3cccd 237 while (buffer->cur < buffer->rlimit);
041c3194 238
a5c3cccd 239 if (!is_vspace (next1))
0d9f234d
NB
240 {
241 RESTORE_STATE ();
242 break;
243 }
45b966db 244
a5c3cccd
NB
245 if (space)
246 cpp_warning (buffer->pfile,
247 "backslash and newline separated by space");
0d9f234d 248
a5c3cccd
NB
249 next = handle_newline (buffer, next1);
250 if (next == EOF)
251 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
0d9f234d 252 }
a5c3cccd 253 while (next == '\\' || next == '?');
041c3194 254 }
45b966db 255
0d9f234d
NB
256 buffer->read_ahead = next;
257 return next;
45b966db
ZW
258}
259
0d9f234d
NB
260/* Obtain the next character, after trigraph conversion and skipping
261 an arbitrary string of escaped newlines. The common case of no
262 trigraphs or escaped newlines falls through quickly. */
263static cppchar_t
264get_effective_char (buffer)
265 cpp_buffer *buffer;
64aaf407 266{
0d9f234d
NB
267 cppchar_t next = EOF;
268
269 if (buffer->cur < buffer->rlimit)
270 {
271 next = *buffer->cur++;
272
273 /* '?' can introduce trigraphs (and therefore backslash); '\\'
274 can introduce escaped newlines, which we want to skip, or
275 UCNs, which, depending upon lexer state, we will handle in
276 the future. */
277 if (next == '?' || next == '\\')
278 next = skip_escaped_newlines (buffer, next);
279 }
280
281 buffer->read_ahead = next;
282 return next;
64aaf407
NB
283}
284
0d9f234d
NB
285/* Skip a C-style block comment. We find the end of the comment by
286 seeing if an asterisk is before every '/' we encounter. Returns
287 non-zero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
288static int
289skip_block_comment (pfile)
45b966db
ZW
290 cpp_reader *pfile;
291{
041c3194 292 cpp_buffer *buffer = pfile->buffer;
d8090680 293 cppchar_t c = EOF, prevc = EOF;
0d9f234d 294
cbcff6df 295 pfile->state.lexing_comment = 1;
0d9f234d 296 while (buffer->cur != buffer->rlimit)
45b966db 297 {
0d9f234d
NB
298 prevc = c, c = *buffer->cur++;
299
300 next_char:
301 /* FIXME: For speed, create a new character class of characters
93c80368 302 of interest inside block comments. */
0d9f234d
NB
303 if (c == '?' || c == '\\')
304 c = skip_escaped_newlines (buffer, c);
041c3194 305
0d9f234d
NB
306 /* People like decorating comments with '*', so check for '/'
307 instead for efficiency. */
041c3194 308 if (c == '/')
45b966db 309 {
0d9f234d
NB
310 if (prevc == '*')
311 break;
041c3194 312
0d9f234d
NB
313 /* Warn about potential nested comments, but not if the '/'
314 comes immediately before the true comment delimeter.
041c3194 315 Don't bother to get it right across escaped newlines. */
0d9f234d
NB
316 if (CPP_OPTION (pfile, warn_comments)
317 && buffer->cur != buffer->rlimit)
45b966db 318 {
0d9f234d
NB
319 prevc = c, c = *buffer->cur++;
320 if (c == '*' && buffer->cur != buffer->rlimit)
321 {
322 prevc = c, c = *buffer->cur++;
323 if (c != '/')
324 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
325 CPP_BUF_COL (buffer),
326 "\"/*\" within comment");
327 }
328 goto next_char;
45b966db 329 }
45b966db 330 }
91fcd158 331 else if (is_vspace (c))
45b966db 332 {
0d9f234d
NB
333 prevc = c, c = handle_newline (buffer, c);
334 goto next_char;
45b966db 335 }
52fadca8 336 else if (c == '\t')
0d9f234d 337 adjust_column (pfile);
45b966db 338 }
041c3194 339
cbcff6df 340 pfile->state.lexing_comment = 0;
0d9f234d
NB
341 buffer->read_ahead = EOF;
342 return c != '/' || prevc != '*';
45b966db
ZW
343}
344
f9a0e96c 345/* Skip a C++ line comment. Handles escaped newlines. Returns
0d9f234d
NB
346 non-zero if a multiline comment. The following new line, if any,
347 is left in buffer->read_ahead. */
041c3194 348static int
cbcff6df
NB
349skip_line_comment (pfile)
350 cpp_reader *pfile;
45b966db 351{
cbcff6df 352 cpp_buffer *buffer = pfile->buffer;
0d9f234d
NB
353 unsigned int orig_lineno = buffer->lineno;
354 cppchar_t c;
041c3194 355
cbcff6df 356 pfile->state.lexing_comment = 1;
0d9f234d 357 do
041c3194 358 {
0d9f234d
NB
359 c = EOF;
360 if (buffer->cur == buffer->rlimit)
361 break;
041c3194 362
0d9f234d
NB
363 c = *buffer->cur++;
364 if (c == '?' || c == '\\')
365 c = skip_escaped_newlines (buffer, c);
041c3194 366 }
0d9f234d 367 while (!is_vspace (c));
45b966db 368
cbcff6df 369 pfile->state.lexing_comment = 0;
0d9f234d
NB
370 buffer->read_ahead = c; /* Leave any newline for caller. */
371 return orig_lineno != buffer->lineno;
041c3194 372}
45b966db 373
0d9f234d
NB
374/* pfile->buffer->cur is one beyond the \t character. Update
375 col_adjust so we track the column correctly. */
52fadca8 376static void
0d9f234d 377adjust_column (pfile)
52fadca8 378 cpp_reader *pfile;
52fadca8 379{
0d9f234d
NB
380 cpp_buffer *buffer = pfile->buffer;
381 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
382
383 /* Round it up to multiple of the tabstop, but subtract 1 since the
384 tab itself occupies a character position. */
0d9f234d
NB
385 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
386 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
387}
388
0d9f234d
NB
389/* Skips whitespace, saving the next non-whitespace character.
390 Adjusts pfile->col_adjust to account for tabs. Without this,
391 tokens might be assigned an incorrect column. */
041c3194 392static void
0d9f234d 393skip_whitespace (pfile, c)
041c3194 394 cpp_reader *pfile;
0d9f234d 395 cppchar_t c;
041c3194
ZW
396{
397 cpp_buffer *buffer = pfile->buffer;
0d9f234d 398 unsigned int warned = 0;
45b966db 399
0d9f234d 400 do
041c3194 401 {
91fcd158
NB
402 /* Horizontal space always OK. */
403 if (c == ' ')
0d9f234d 404 ;
91fcd158 405 else if (c == '\t')
0d9f234d
NB
406 adjust_column (pfile);
407 /* Just \f \v or \0 left. */
91fcd158 408 else if (c == '\0')
041c3194 409 {
91fcd158 410 if (!warned)
0d9f234d
NB
411 {
412 cpp_warning (pfile, "null character(s) ignored");
413 warned = 1;
414 }
45b966db 415 }
93c80368 416 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
91fcd158
NB
417 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
418 CPP_BUF_COL (buffer),
419 "%s in preprocessing directive",
420 c == '\f' ? "form feed" : "vertical tab");
0d9f234d
NB
421
422 c = EOF;
423 if (buffer->cur == buffer->rlimit)
424 break;
425 c = *buffer->cur++;
45b966db 426 }
0d9f234d
NB
427 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
428 while (is_nvspace (c));
429
430 /* Remember the next character. */
431 buffer->read_ahead = c;
041c3194 432}
45b966db 433
93c80368
NB
434/* See if the characters of a number token are valid in a name (no
435 '.', '+' or '-'). */
436static int
437name_p (pfile, string)
438 cpp_reader *pfile;
439 const cpp_string *string;
440{
441 unsigned int i;
442
443 for (i = 0; i < string->len; i++)
444 if (!is_idchar (string->text[i]))
445 return 0;
446
447 return 1;
448}
449
0d9f234d
NB
450/* Parse an identifier, skipping embedded backslash-newlines.
451 Calculate the hash value of the token while parsing, for improved
452 performance. The hashing algorithm *must* match cpp_lookup(). */
453
454static cpp_hashnode *
455parse_identifier (pfile, c)
45b966db 456 cpp_reader *pfile;
0d9f234d 457 cppchar_t c;
45b966db 458{
93c80368 459 cpp_hashnode *result;
0d9f234d 460 cpp_buffer *buffer = pfile->buffer;
93c80368 461 unsigned char *dest, *limit;
0d9f234d 462 unsigned int r = 0, saw_dollar = 0;
93c80368
NB
463
464 dest = POOL_FRONT (&pfile->ident_pool);
465 limit = POOL_LIMIT (&pfile->ident_pool);
041c3194 466
0d9f234d 467 do
041c3194 468 {
0d9f234d 469 do
041c3194 470 {
93c80368
NB
471 /* Need room for terminating null. */
472 if (dest + 1 >= limit)
473 limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
474
475 *dest++ = c;
0d9f234d 476 r = HASHSTEP (r, c);
45b966db 477
0d9f234d
NB
478 if (c == '$')
479 saw_dollar++;
ba89d661 480
0d9f234d
NB
481 c = EOF;
482 if (buffer->cur == buffer->rlimit)
483 break;
ba89d661 484
0d9f234d
NB
485 c = *buffer->cur++;
486 }
487 while (is_idchar (c));
ba89d661 488
0d9f234d
NB
489 /* Potential escaped newline? */
490 if (c != '?' && c != '\\')
491 break;
492 c = skip_escaped_newlines (buffer, c);
041c3194 493 }
0d9f234d
NB
494 while (is_idchar (c));
495
93c80368
NB
496 /* Remember the next character. */
497 buffer->read_ahead = c;
498
0d9f234d
NB
499 /* $ is not a identifier character in the standard, but is commonly
500 accepted as an extension. Don't warn about it in skipped
501 conditional blocks. */
502 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
503 cpp_pedwarn (pfile, "'$' character(s) in identifier");
504
93c80368
NB
505 /* Identifiers are null-terminated. */
506 *dest = '\0';
507
508 /* This routine commits the memory if necessary. */
509 result = _cpp_lookup_with_hash (pfile,
510 dest - POOL_FRONT (&pfile->ident_pool), r);
511
512 /* Some identifiers require diagnostics when lexed. */
513 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
514 {
515 /* It is allowed to poison the same identifier twice. */
516 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
517 cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
518
519 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
520 replacement list of a variable-arguments macro. */
521 if (result == pfile->spec_nodes.n__VA_ARGS__
522 && !pfile->state.va_args_ok)
523 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variable-argument macro");
524 }
525
526 return result;
45b966db
ZW
527}
528
0d9f234d 529/* Parse a number, skipping embedded backslash-newlines. */
45b966db 530static void
93c80368 531parse_number (pfile, number, c, leading_period)
45b966db 532 cpp_reader *pfile;
0d9f234d
NB
533 cpp_string *number;
534 cppchar_t c;
93c80368 535 int leading_period;
45b966db 536{
041c3194 537 cpp_buffer *buffer = pfile->buffer;
93c80368
NB
538 cpp_pool *pool = pfile->string_pool;
539 unsigned char *dest, *limit;
45b966db 540
93c80368
NB
541 dest = POOL_FRONT (pool);
542 limit = POOL_LIMIT (pool);
cbcff6df 543
93c80368
NB
544 /* Place a leading period. */
545 if (leading_period)
546 {
547 if (dest >= limit)
548 limit = _cpp_next_chunk (pool, 0, &dest);
549 *dest++ = '.';
550 }
551
0d9f234d 552 do
041c3194 553 {
0d9f234d
NB
554 do
555 {
93c80368
NB
556 /* Need room for terminating null. */
557 if (dest + 1 >= limit)
558 limit = _cpp_next_chunk (pool, 0, &dest);
559 *dest++ = c;
0d9f234d 560
0d9f234d
NB
561 c = EOF;
562 if (buffer->cur == buffer->rlimit)
563 break;
45b966db 564
0d9f234d
NB
565 c = *buffer->cur++;
566 }
93c80368 567 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
45b966db 568
0d9f234d
NB
569 /* Potential escaped newline? */
570 if (c != '?' && c != '\\')
571 break;
572 c = skip_escaped_newlines (buffer, c);
45b966db 573 }
93c80368 574 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
cbcff6df 575
0d9f234d
NB
576 /* Remember the next character. */
577 buffer->read_ahead = c;
64aaf407 578
93c80368
NB
579 /* Null-terminate the number. */
580 *dest = '\0';
581
582 number->text = POOL_FRONT (pool);
583 number->len = dest - number->text;
584 POOL_COMMIT (pool, number->len + 1);
0d9f234d
NB
585}
586
587/* Subroutine of parse_string. Emits error for unterminated strings. */
588static void
93c80368 589unterminated (pfile, term)
0d9f234d 590 cpp_reader *pfile;
0d9f234d
NB
591 int term;
592{
593 cpp_error (pfile, "missing terminating %c character", term);
594
93c80368
NB
595 if (term == '\"' && pfile->mlstring_pos.line
596 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
041c3194 597 {
93c80368
NB
598 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
599 pfile->mlstring_pos.col,
0d9f234d 600 "possible start of unterminated string literal");
93c80368 601 pfile->mlstring_pos.line = 0;
041c3194 602 }
45b966db
ZW
603}
604
93c80368
NB
605/* Subroutine of parse_string. */
606static int
607unescaped_terminator_p (pfile, dest)
608 cpp_reader *pfile;
609 const unsigned char *dest;
610{
611 const unsigned char *start, *temp;
612
613 /* In #include-style directives, terminators are not escapeable. */
614 if (pfile->state.angled_headers)
615 return 1;
616
617 start = POOL_FRONT (pfile->string_pool);
618
619 /* An odd number of consecutive backslashes represents an escaped
620 terminator. */
621 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
622 ;
623
624 return ((dest - temp) & 1) == 0;
625}
626
0d9f234d
NB
627/* Parses a string, character constant, or angle-bracketed header file
628 name. Handles embedded trigraphs and escaped newlines.
45b966db 629
0d9f234d
NB
630 Multi-line strings are allowed, but they are deprecated within
631 directives. */
041c3194 632static void
0d9f234d 633parse_string (pfile, token, terminator)
45b966db 634 cpp_reader *pfile;
041c3194 635 cpp_token *token;
0d9f234d 636 cppchar_t terminator;
45b966db 637{
041c3194 638 cpp_buffer *buffer = pfile->buffer;
93c80368
NB
639 cpp_pool *pool = pfile->string_pool;
640 unsigned char *dest, *limit;
0d9f234d
NB
641 cppchar_t c;
642 unsigned int nulls = 0;
643
93c80368
NB
644 dest = POOL_FRONT (pool);
645 limit = POOL_LIMIT (pool);
646
0d9f234d 647 for (;;)
45b966db 648 {
0d9f234d
NB
649 if (buffer->cur == buffer->rlimit)
650 {
651 c = EOF;
93c80368 652 unterminated (pfile, terminator);
0d9f234d
NB
653 break;
654 }
655 c = *buffer->cur++;
656
657 have_char:
658 /* Handle trigraphs, escaped newlines etc. */
659 if (c == '?' || c == '\\')
660 c = skip_escaped_newlines (buffer, c);
45b966db 661
93c80368 662 if (c == terminator && unescaped_terminator_p (pfile, dest))
45b966db 663 {
93c80368
NB
664 c = EOF;
665 break;
0d9f234d
NB
666 }
667 else if (is_vspace (c))
668 {
669 /* In assembly language, silently terminate string and
670 character literals at end of line. This is a kludge
671 around not knowing where comments are. */
672 if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
673 break;
45b966db 674
0d9f234d
NB
675 /* Character constants and header names may not extend over
676 multiple lines. In Standard C, neither may strings.
677 Unfortunately, we accept multiline strings as an
16eb2788
NB
678 extension, except in #include family directives. */
679 if (terminator != '"' || pfile->state.angled_headers)
45b966db 680 {
93c80368 681 unterminated (pfile, terminator);
0d9f234d 682 break;
45b966db 683 }
45b966db 684
93c80368 685 if (pfile->mlstring_pos.line == 0)
0d9f234d 686 {
93c80368 687 pfile->mlstring_pos = pfile->lexer_pos;
0d9f234d
NB
688 if (CPP_PEDANTIC (pfile))
689 cpp_pedwarn (pfile, "multi-line string constant");
041c3194 690 }
0d9f234d
NB
691
692 handle_newline (buffer, c); /* Stores to read_ahead. */
693 c = '\n';
694 }
695 else if (c == '\0')
696 {
697 if (nulls++ == 0)
698 cpp_warning (pfile, "null character(s) preserved in literal");
45b966db 699 }
45b966db 700
93c80368
NB
701 /* No terminating null for strings - they could contain nulls. */
702 if (dest >= limit)
703 limit = _cpp_next_chunk (pool, 0, &dest);
704 *dest++ = c;
476f2869 705
0d9f234d
NB
706 /* If we had a new line, the next character is in read_ahead. */
707 if (c != '\n')
708 continue;
709 c = buffer->read_ahead;
710 if (c != EOF)
711 goto have_char;
45b966db
ZW
712 }
713
93c80368 714 /* Remember the next character. */
0d9f234d 715 buffer->read_ahead = c;
45b966db 716
93c80368
NB
717 token->val.str.text = POOL_FRONT (pool);
718 token->val.str.len = dest - token->val.str.text;
719 POOL_COMMIT (pool, token->val.str.len);
0d9f234d 720}
041c3194 721
93c80368 722/* The stored comment includes the comment start and any terminator. */
9e62c811 723static void
0d9f234d
NB
724save_comment (pfile, token, from)
725 cpp_reader *pfile;
041c3194
ZW
726 cpp_token *token;
727 const unsigned char *from;
9e62c811 728{
041c3194 729 unsigned char *buffer;
0d9f234d 730 unsigned int len;
0d9f234d 731
1c6d33ef 732 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
3542203b
NB
733 /* C++ comments probably (not definitely) have moved past a new
734 line, which we don't want to save in the comment. */
735 if (pfile->buffer->read_ahead != EOF)
736 len--;
93c80368 737 buffer = _cpp_pool_alloc (pfile->string_pool, len);
041c3194 738
041c3194 739 token->type = CPP_COMMENT;
bfb9dc7f 740 token->val.str.len = len;
0d9f234d 741 token->val.str.text = buffer;
45b966db 742
1c6d33ef
NB
743 buffer[0] = '/';
744 memcpy (buffer + 1, from, len - 1);
0d9f234d 745}
45b966db 746
cbcff6df
NB
747/* Subroutine of lex_token to handle '%'. A little tricky, since we
748 want to avoid stepping back when lexing %:%X. */
0d9f234d 749static void
cbcff6df 750lex_percent (buffer, result)
0d9f234d
NB
751 cpp_buffer *buffer;
752 cpp_token *result;
0d9f234d 753{
cbcff6df
NB
754 cppchar_t c;
755
756 result->type = CPP_MOD;
757 /* Parsing %:%X could leave an extra character. */
758 if (buffer->extra_char == EOF)
759 c = get_effective_char (buffer);
760 else
761 {
762 c = buffer->read_ahead = buffer->extra_char;
763 buffer->extra_char = EOF;
764 }
765
766 if (c == '=')
767 ACCEPT_CHAR (CPP_MOD_EQ);
768 else if (CPP_OPTION (buffer->pfile, digraphs))
769 {
770 if (c == ':')
771 {
772 result->flags |= DIGRAPH;
773 ACCEPT_CHAR (CPP_HASH);
774 if (get_effective_char (buffer) == '%')
775 {
776 buffer->extra_char = get_effective_char (buffer);
777 if (buffer->extra_char == ':')
778 {
779 buffer->extra_char = EOF;
780 ACCEPT_CHAR (CPP_PASTE);
781 }
782 else
783 /* We'll catch the extra_char when we're called back. */
784 buffer->read_ahead = '%';
785 }
786 }
787 else if (c == '>')
788 {
789 result->flags |= DIGRAPH;
790 ACCEPT_CHAR (CPP_CLOSE_BRACE);
791 }
792 }
793}
794
795/* Subroutine of lex_token to handle '.'. This is tricky, since we
796 want to avoid stepping back when lexing '...' or '.123'. In the
797 latter case we should also set a flag for parse_number. */
798static void
799lex_dot (pfile, result)
800 cpp_reader *pfile;
801 cpp_token *result;
802{
803 cpp_buffer *buffer = pfile->buffer;
804 cppchar_t c;
805
806 /* Parsing ..X could leave an extra character. */
807 if (buffer->extra_char == EOF)
808 c = get_effective_char (buffer);
809 else
810 {
811 c = buffer->read_ahead = buffer->extra_char;
812 buffer->extra_char = EOF;
813 }
0d9f234d 814
cbcff6df
NB
815 /* All known character sets have 0...9 contiguous. */
816 if (c >= '0' && c <= '9')
817 {
818 result->type = CPP_NUMBER;
93c80368 819 parse_number (pfile, &result->val.str, c, 1);
cbcff6df 820 }
041c3194 821 else
ea4a453b 822 {
cbcff6df
NB
823 result->type = CPP_DOT;
824 if (c == '.')
825 {
826 buffer->extra_char = get_effective_char (buffer);
827 if (buffer->extra_char == '.')
828 {
829 buffer->extra_char = EOF;
830 ACCEPT_CHAR (CPP_ELLIPSIS);
831 }
832 else
833 /* We'll catch the extra_char when we're called back. */
834 buffer->read_ahead = '.';
835 }
836 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
837 ACCEPT_CHAR (CPP_DOT_STAR);
ea4a453b 838 }
45b966db
ZW
839}
840
93c80368
NB
841void
842_cpp_lex_token (pfile, result)
45b966db 843 cpp_reader *pfile;
0d9f234d 844 cpp_token *result;
45b966db 845{
0d9f234d 846 cppchar_t c;
041c3194 847 cpp_buffer *buffer = pfile->buffer;
0d9f234d 848 const unsigned char *comment_start;
93c80368
NB
849 unsigned char was_skip_newlines = pfile->state.skip_newlines;
850 unsigned char newline_in_args = 0;
9ec7291f 851
93c80368 852 pfile->state.skip_newlines = 0;
0d9f234d
NB
853 result->flags = 0;
854 next_char:
93c80368 855 pfile->lexer_pos.line = buffer->lineno;
0d9f234d 856 next_char2:
93c80368 857 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
041c3194 858
0d9f234d
NB
859 c = buffer->read_ahead;
860 if (c == EOF && buffer->cur < buffer->rlimit)
861 {
862 c = *buffer->cur++;
93c80368 863 pfile->lexer_pos.col++;
0d9f234d 864 }
45b966db 865
0d9f234d
NB
866 do_switch:
867 buffer->read_ahead = EOF;
868 switch (c)
45b966db 869 {
0d9f234d 870 case EOF:
93c80368 871 /* Non-empty files should end in a newline. Ignore for command
a5c3cccd
NB
872 line and _Pragma buffers. */
873 if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
93c80368
NB
874 cpp_pedwarn (pfile, "no newline at end of file");
875 pfile->state.skip_newlines = 1;
0d9f234d
NB
876 result->type = CPP_EOF;
877 break;
45b966db 878
0d9f234d
NB
879 case ' ': case '\t': case '\f': case '\v': case '\0':
880 skip_whitespace (pfile, c);
881 result->flags |= PREV_WHITE;
882 goto next_char2;
883
884 case '\n': case '\r':
93c80368
NB
885 /* Don't let directives spill over to the next line. */
886 if (pfile->state.in_directive)
887 buffer->read_ahead = c;
888 else
45b966db 889 {
93c80368
NB
890 handle_newline (buffer, c);
891
892 pfile->lexer_pos.output_line = buffer->lineno;
893
894 /* Skip newlines in macro arguments (except in directives). */
895 if (pfile->state.parsing_args)
896 {
897 /* Set the whitespace flag. */
898 newline_in_args = 1;
899 result->flags |= PREV_WHITE;
900 goto next_char;
901 }
902
903 if (was_skip_newlines)
904 {
905 /* Clear any whitespace flag. */
906 result->flags &= ~PREV_WHITE;
907 goto next_char;
908 }
45b966db 909 }
93c80368
NB
910
911 /* Next we're at BOL, so skip new lines. */
912 pfile->state.skip_newlines = 1;
913 result->type = CPP_EOF;
0d9f234d 914 break;
46d07497 915
0d9f234d
NB
916 case '?':
917 case '\\':
918 /* These could start an escaped newline, or '?' a trigraph. Let
919 skip_escaped_newlines do all the work. */
920 {
921 unsigned int lineno = buffer->lineno;
922
923 c = skip_escaped_newlines (buffer, c);
924 if (lineno != buffer->lineno)
925 /* We had at least one escaped newline of some sort, and the
926 next character is in buffer->read_ahead. Update the
927 token's line and column. */
928 goto next_char;
929
930 /* We are either the original '?' or '\\', or a trigraph. */
931 result->type = CPP_QUERY;
932 buffer->read_ahead = EOF;
933 if (c == '\\')
12c4f523 934 goto random_char;
0d9f234d
NB
935 else if (c != '?')
936 goto do_switch;
937 }
938 break;
46d07497 939
0d9f234d
NB
940 case '0': case '1': case '2': case '3': case '4':
941 case '5': case '6': case '7': case '8': case '9':
942 result->type = CPP_NUMBER;
93c80368 943 parse_number (pfile, &result->val.str, c, 0);
0d9f234d 944 break;
46d07497 945
0d9f234d
NB
946 case '$':
947 if (!CPP_OPTION (pfile, dollars_in_ident))
948 goto random_char;
949 /* Fall through... */
950
951 case '_':
952 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
953 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
954 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
955 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
956 case 'y': case 'z':
957 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
958 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
959 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
960 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
961 case 'Y': case 'Z':
962 result->type = CPP_NAME;
963 result->val.node = parse_identifier (pfile, c);
964
965 /* 'L' may introduce wide characters or strings. */
93c80368 966 if (result->val.node == pfile->spec_nodes.n_L)
0d9f234d
NB
967 {
968 c = buffer->read_ahead; /* For make_string. */
969 if (c == '\'' || c == '"')
ba89d661 970 {
0d9f234d
NB
971 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
972 goto make_string;
ba89d661 973 }
0d9f234d
NB
974 }
975 /* Convert named operators to their proper types. */
93c80368 976 else if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
977 {
978 result->flags |= NAMED_OP;
93c80368 979 result->type = result->val.node->value.operator;
0d9f234d
NB
980 }
981 break;
982
983 case '\'':
984 case '"':
985 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
986 make_string:
987 parse_string (pfile, result, c);
988 break;
041c3194 989
0d9f234d 990 case '/':
1c6d33ef
NB
991 /* A potential block or line comment. */
992 comment_start = buffer->cur;
0d9f234d
NB
993 result->type = CPP_DIV;
994 c = get_effective_char (buffer);
995 if (c == '=')
996 ACCEPT_CHAR (CPP_DIV_EQ);
1c6d33ef
NB
997 if (c != '/' && c != '*')
998 break;
45b966db 999
1c6d33ef
NB
1000 if (c == '*')
1001 {
0d9f234d 1002 if (skip_block_comment (pfile))
93c80368
NB
1003 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1004 pfile->lexer_pos.col,
0d9f234d 1005 "unterminated comment");
0d9f234d 1006 }
1c6d33ef 1007 else
0d9f234d 1008 {
1c6d33ef
NB
1009 if (!CPP_OPTION (pfile, cplusplus_comments)
1010 && !CPP_IN_SYSTEM_HEADER (pfile))
1011 break;
1012
0d9f234d
NB
1013 /* We silently allow C++ comments in system headers,
1014 irrespective of conformance mode, because lots of
1015 broken systems do that and trying to clean it up in
1016 fixincludes is a nightmare. */
a94c1199
NB
1017 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1018 && ! buffer->warned_cplusplus_comments)
041c3194 1019 {
1c6d33ef
NB
1020 cpp_pedwarn (pfile,
1021 "C++ style comments are not allowed in ISO C89");
1022 cpp_pedwarn (pfile,
1023 "(this will be reported only once per input file)");
1024 buffer->warned_cplusplus_comments = 1;
1025 }
0d9f234d 1026
a94c1199 1027 /* Skip_line_comment updates buffer->read_ahead. */
1c6d33ef 1028 if (skip_line_comment (pfile))
93c80368
NB
1029 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1030 pfile->lexer_pos.col,
1c6d33ef
NB
1031 "multi-line comment");
1032 }
0d9f234d 1033
1c6d33ef
NB
1034 /* Skipping the comment has updated buffer->read_ahead. */
1035 if (!pfile->state.save_comments)
1036 {
1037 result->flags |= PREV_WHITE;
1038 goto next_char;
0d9f234d 1039 }
1c6d33ef
NB
1040
1041 /* Save the comment as a token in its own right. */
1042 save_comment (pfile, result, comment_start);
0d9f234d
NB
1043 break;
1044
1045 case '<':
1046 if (pfile->state.angled_headers)
1047 {
1048 result->type = CPP_HEADER_NAME;
1049 c = '>'; /* terminator. */
1050 goto make_string;
1051 }
45b966db 1052
0d9f234d
NB
1053 result->type = CPP_LESS;
1054 c = get_effective_char (buffer);
1055 if (c == '=')
1056 ACCEPT_CHAR (CPP_LESS_EQ);
1057 else if (c == '<')
1058 {
1059 ACCEPT_CHAR (CPP_LSHIFT);
1060 if (get_effective_char (buffer) == '=')
1061 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1062 }
1063 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1064 {
1065 ACCEPT_CHAR (CPP_MIN);
1066 if (get_effective_char (buffer) == '=')
1067 ACCEPT_CHAR (CPP_MIN_EQ);
1068 }
1069 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1070 {
1071 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1072 result->flags |= DIGRAPH;
1073 }
1074 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1075 {
1076 ACCEPT_CHAR (CPP_OPEN_BRACE);
1077 result->flags |= DIGRAPH;
1078 }
1079 break;
1080
1081 case '>':
1082 result->type = CPP_GREATER;
1083 c = get_effective_char (buffer);
1084 if (c == '=')
1085 ACCEPT_CHAR (CPP_GREATER_EQ);
1086 else if (c == '>')
1087 {
1088 ACCEPT_CHAR (CPP_RSHIFT);
1089 if (get_effective_char (buffer) == '=')
1090 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1091 }
1092 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1093 {
1094 ACCEPT_CHAR (CPP_MAX);
1095 if (get_effective_char (buffer) == '=')
1096 ACCEPT_CHAR (CPP_MAX_EQ);
1097 }
1098 break;
1099
cbcff6df
NB
1100 case '%':
1101 lex_percent (buffer, result);
93c80368
NB
1102 if (result->type == CPP_HASH)
1103 goto do_hash;
0d9f234d
NB
1104 break;
1105
cbcff6df
NB
1106 case '.':
1107 lex_dot (pfile, result);
0d9f234d 1108 break;
45b966db 1109
0d9f234d
NB
1110 case '+':
1111 result->type = CPP_PLUS;
1112 c = get_effective_char (buffer);
1113 if (c == '=')
1114 ACCEPT_CHAR (CPP_PLUS_EQ);
1115 else if (c == '+')
1116 ACCEPT_CHAR (CPP_PLUS_PLUS);
1117 break;
04e3ec78 1118
0d9f234d
NB
1119 case '-':
1120 result->type = CPP_MINUS;
1121 c = get_effective_char (buffer);
1122 if (c == '>')
1123 {
1124 ACCEPT_CHAR (CPP_DEREF);
1125 if (CPP_OPTION (pfile, cplusplus)
1126 && get_effective_char (buffer) == '*')
1127 ACCEPT_CHAR (CPP_DEREF_STAR);
1128 }
1129 else if (c == '=')
1130 ACCEPT_CHAR (CPP_MINUS_EQ);
1131 else if (c == '-')
1132 ACCEPT_CHAR (CPP_MINUS_MINUS);
1133 break;
45b966db 1134
0d9f234d
NB
1135 case '*':
1136 result->type = CPP_MULT;
1137 if (get_effective_char (buffer) == '=')
1138 ACCEPT_CHAR (CPP_MULT_EQ);
1139 break;
04e3ec78 1140
0d9f234d
NB
1141 case '=':
1142 result->type = CPP_EQ;
1143 if (get_effective_char (buffer) == '=')
1144 ACCEPT_CHAR (CPP_EQ_EQ);
1145 break;
f8f769ea 1146
0d9f234d
NB
1147 case '!':
1148 result->type = CPP_NOT;
1149 if (get_effective_char (buffer) == '=')
1150 ACCEPT_CHAR (CPP_NOT_EQ);
1151 break;
45b966db 1152
0d9f234d
NB
1153 case '&':
1154 result->type = CPP_AND;
1155 c = get_effective_char (buffer);
1156 if (c == '=')
1157 ACCEPT_CHAR (CPP_AND_EQ);
1158 else if (c == '&')
1159 ACCEPT_CHAR (CPP_AND_AND);
1160 break;
1161
1162 case '#':
0d9f234d
NB
1163 if (get_effective_char (buffer) == '#')
1164 ACCEPT_CHAR (CPP_PASTE);
93c80368
NB
1165 else
1166 {
1167 result->type = CPP_HASH;
1168 do_hash:
1169 /* CPP_DHASH is the hash introducing a directive. */
1170 if (was_skip_newlines || newline_in_args)
1171 {
1172 result->type = CPP_DHASH;
1173 /* Get whitespace right - newline_in_args sets it. */
1174 if (pfile->lexer_pos.col == 1)
1175 result->flags &= ~PREV_WHITE;
1176 }
1177 }
0d9f234d 1178 break;
45b966db 1179
0d9f234d
NB
1180 case '|':
1181 result->type = CPP_OR;
1182 c = get_effective_char (buffer);
1183 if (c == '=')
1184 ACCEPT_CHAR (CPP_OR_EQ);
1185 else if (c == '|')
1186 ACCEPT_CHAR (CPP_OR_OR);
1187 break;
45b966db 1188
0d9f234d
NB
1189 case '^':
1190 result->type = CPP_XOR;
1191 if (get_effective_char (buffer) == '=')
1192 ACCEPT_CHAR (CPP_XOR_EQ);
1193 break;
45b966db 1194
0d9f234d
NB
1195 case ':':
1196 result->type = CPP_COLON;
1197 c = get_effective_char (buffer);
1198 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1199 ACCEPT_CHAR (CPP_SCOPE);
1200 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1201 {
1202 result->flags |= DIGRAPH;
1203 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1204 }
1205 break;
45b966db 1206
0d9f234d
NB
1207 case '~': result->type = CPP_COMPL; break;
1208 case ',': result->type = CPP_COMMA; break;
1209 case '(': result->type = CPP_OPEN_PAREN; break;
1210 case ')': result->type = CPP_CLOSE_PAREN; break;
1211 case '[': result->type = CPP_OPEN_SQUARE; break;
1212 case ']': result->type = CPP_CLOSE_SQUARE; break;
1213 case '{': result->type = CPP_OPEN_BRACE; break;
1214 case '}': result->type = CPP_CLOSE_BRACE; break;
1215 case ';': result->type = CPP_SEMICOLON; break;
1216
1217 case '@':
1218 if (CPP_OPTION (pfile, objc))
1219 {
1220 /* In Objective C, '@' may begin keywords or strings, like
1221 @keyword or @"string". It would be nice to call
1222 get_effective_char here and test the result. However, we
1223 would then need to pass 2 characters to parse_identifier,
1224 making it ugly and slowing down its main loop. Instead,
1225 we assume we have an identifier, and recover if not. */
1226 result->type = CPP_NAME;
1227 result->val.node = parse_identifier (pfile, c);
1228 if (result->val.node->length != 1)
1229 break;
04e3ec78 1230
0d9f234d
NB
1231 /* OK, so it wasn't an identifier. Maybe a string? */
1232 if (buffer->read_ahead == '"')
041c3194 1233 {
0d9f234d
NB
1234 c = '"';
1235 ACCEPT_CHAR (CPP_OSTRING);
1236 goto make_string;
041c3194 1237 }
0d9f234d
NB
1238 }
1239 goto random_char;
1240
1241 random_char:
1242 default:
1243 result->type = CPP_OTHER;
6c53ebff 1244 result->val.c = c;
0d9f234d
NB
1245 break;
1246 }
1247}
1248
93c80368
NB
1249/* An upper bound on the number of bytes needed to spell a token,
1250 including preceding whitespace. */
1251unsigned int
1252cpp_token_len (token)
1253 const cpp_token *token;
0d9f234d 1254{
93c80368 1255 unsigned int len;
6d2c2047 1256
93c80368 1257 switch (TOKEN_SPELL (token))
041c3194 1258 {
93c80368
NB
1259 default: len = 0; break;
1260 case SPELL_STRING: len = token->val.str.len; break;
1261 case SPELL_IDENT: len = token->val.node->length; break;
041c3194 1262 }
93c80368
NB
1263 /* 1 for whitespace, 4 for comment delimeters. */
1264 return len + 5;
6d2c2047
ZW
1265}
1266
041c3194 1267/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1268 already contain the enough space to hold the token's spelling.
1269 Returns a pointer to the character after the last character
1270 written. */
93c80368
NB
1271unsigned char *
1272cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1273 cpp_reader *pfile; /* Would be nice to be rid of this... */
1274 const cpp_token *token;
1275 unsigned char *buffer;
1276{
96be6998 1277 switch (TOKEN_SPELL (token))
041c3194
ZW
1278 {
1279 case SPELL_OPERATOR:
1280 {
1281 const unsigned char *spelling;
1282 unsigned char c;
d6d5f795 1283
041c3194 1284 if (token->flags & DIGRAPH)
93c80368 1285 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
92936ecf
ZW
1286 else if (token->flags & NAMED_OP)
1287 goto spell_ident;
041c3194 1288 else
96be6998 1289 spelling = TOKEN_NAME (token);
041c3194
ZW
1290
1291 while ((c = *spelling++) != '\0')
1292 *buffer++ = c;
1293 }
1294 break;
d6d5f795 1295
041c3194 1296 case SPELL_IDENT:
92936ecf 1297 spell_ident:
bfb9dc7f
ZW
1298 memcpy (buffer, token->val.node->name, token->val.node->length);
1299 buffer += token->val.node->length;
041c3194 1300 break;
d6d5f795 1301
041c3194
ZW
1302 case SPELL_STRING:
1303 {
ba89d661
ZW
1304 int left, right, tag;
1305 switch (token->type)
1306 {
1307 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1308 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1309 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1310 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1311 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1312 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1313 default: left = '\0'; right = '\0'; tag = '\0'; break;
1314 }
1315 if (tag) *buffer++ = tag;
1316 if (left) *buffer++ = left;
bfb9dc7f
ZW
1317 memcpy (buffer, token->val.str.text, token->val.str.len);
1318 buffer += token->val.str.len;
ba89d661 1319 if (right) *buffer++ = right;
041c3194
ZW
1320 }
1321 break;
d6d5f795 1322
041c3194 1323 case SPELL_CHAR:
6c53ebff 1324 *buffer++ = token->val.c;
041c3194 1325 break;
d6d5f795 1326
041c3194 1327 case SPELL_NONE:
96be6998 1328 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1329 break;
1330 }
d6d5f795 1331
041c3194
ZW
1332 return buffer;
1333}
d6d5f795 1334
93c80368
NB
1335/* Returns a token as a null-terminated string. The string is
1336 temporary, and automatically freed later. Useful for diagnostics. */
1337unsigned char *
1338cpp_token_as_text (pfile, token)
c5a04734 1339 cpp_reader *pfile;
041c3194 1340 const cpp_token *token;
c5a04734 1341{
93c80368
NB
1342 unsigned int len = cpp_token_len (token);
1343 unsigned char *start = _cpp_pool_alloc (&pfile->temp_string_pool, len), *end;
c5a04734 1344
93c80368
NB
1345 end = cpp_spell_token (pfile, token, start);
1346 end[0] = '\0';
c5a04734 1347
93c80368
NB
1348 return start;
1349}
c5a04734 1350
93c80368
NB
1351/* Used by C front ends. Should really move to using cpp_token_as_text. */
1352const char *
1353cpp_type2name (type)
1354 enum cpp_ttype type;
1355{
1356 return (const char *) token_spellings[type].name;
1357}
c5a04734 1358
93c80368
NB
1359/* Writes the spelling of token to FP. Separate from cpp_spell_token
1360 for efficiency - to avoid double-buffering. Also, outputs a space
1361 if PREV_WHITE is flagged. */
1362void
1363cpp_output_token (token, fp)
1364 const cpp_token *token;
1365 FILE *fp;
1366{
1367 if (token->flags & PREV_WHITE)
1368 putc (' ', fp);
d8090680 1369
93c80368 1370 switch (TOKEN_SPELL (token))
c5a04734 1371 {
93c80368
NB
1372 case SPELL_OPERATOR:
1373 {
1374 const unsigned char *spelling;
c5a04734 1375
93c80368
NB
1376 if (token->flags & DIGRAPH)
1377 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1378 else if (token->flags & NAMED_OP)
1379 goto spell_ident;
1380 else
1381 spelling = TOKEN_NAME (token);
041c3194 1382
93c80368
NB
1383 ufputs (spelling, fp);
1384 }
1385 break;
041c3194 1386
93c80368
NB
1387 spell_ident:
1388 case SPELL_IDENT:
1389 ufputs (token->val.node->name, fp);
1390 break;
041c3194 1391
93c80368
NB
1392 case SPELL_STRING:
1393 {
1394 int left, right, tag;
1395 switch (token->type)
1396 {
1397 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1398 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1399 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1400 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1401 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1402 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1403 default: left = '\0'; right = '\0'; tag = '\0'; break;
1404 }
1405 if (tag) putc (tag, fp);
1406 if (left) putc (left, fp);
1407 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1408 if (right) putc (right, fp);
1409 }
1410 break;
c5a04734 1411
93c80368 1412 case SPELL_CHAR:
6c53ebff 1413 putc (token->val.c, fp);
93c80368 1414 break;
c5a04734 1415
93c80368
NB
1416 case SPELL_NONE:
1417 /* An error, most probably. */
1418 break;
041c3194 1419 }
c5a04734
ZW
1420}
1421
93c80368
NB
1422/* Compare two tokens. */
1423int
1424_cpp_equiv_tokens (a, b)
1425 const cpp_token *a, *b;
c5a04734 1426{
93c80368
NB
1427 if (a->type == b->type && a->flags == b->flags)
1428 switch (TOKEN_SPELL (a))
1429 {
1430 default: /* Keep compiler happy. */
1431 case SPELL_OPERATOR:
1432 return 1;
1433 case SPELL_CHAR:
6c53ebff 1434 return a->val.c == b->val.c; /* Character. */
93c80368 1435 case SPELL_NONE:
6c53ebff 1436 return (a->type != CPP_MACRO_ARG || a->val.c == b->val.c);
93c80368
NB
1437 case SPELL_IDENT:
1438 return a->val.node == b->val.node;
1439 case SPELL_STRING:
1440 return (a->val.str.len == b->val.str.len
1441 && !memcmp (a->val.str.text, b->val.str.text,
1442 a->val.str.len));
1443 }
c5a04734 1444
041c3194
ZW
1445 return 0;
1446}
1447
93c80368
NB
1448#if 0
1449/* Compare two token lists. */
1450int
1451_cpp_equiv_toklists (a, b)
1452 const struct toklist *a, *b;
041c3194 1453{
93c80368 1454 unsigned int i, count;
c5a04734 1455
93c80368
NB
1456 count = a->limit - a->first;
1457 if (count != (b->limit - b->first))
1458 return 0;
c5a04734 1459
93c80368
NB
1460 for (i = 0; i < count; i++)
1461 if (! _cpp_equiv_tokens (&a->first[i], &b->first[i]))
1462 return 0;
c5a04734 1463
93c80368 1464 return 1;
041c3194 1465}
93c80368 1466#endif
c5a04734 1467
041c3194
ZW
1468/* Determine whether two tokens can be pasted together, and if so,
1469 what the resulting token is. Returns CPP_EOF if the tokens cannot
1470 be pasted, or the appropriate type for the merged token if they
1471 can. */
7de4d004 1472enum cpp_ttype
93c80368 1473cpp_can_paste (pfile, token1, token2, digraph)
041c3194
ZW
1474 cpp_reader * pfile;
1475 const cpp_token *token1, *token2;
1476 int* digraph;
c5a04734 1477{
041c3194
ZW
1478 enum cpp_ttype a = token1->type, b = token2->type;
1479 int cxx = CPP_OPTION (pfile, cplusplus);
c5a04734 1480
92936ecf
ZW
1481 /* Treat named operators as if they were ordinary NAMEs. */
1482 if (token1->flags & NAMED_OP)
1483 a = CPP_NAME;
1484 if (token2->flags & NAMED_OP)
1485 b = CPP_NAME;
1486
041c3194
ZW
1487 if (a <= CPP_LAST_EQ && b == CPP_EQ)
1488 return a + (CPP_EQ_EQ - CPP_EQ);
c5a04734 1489
041c3194 1490 switch (a)
c5a04734 1491 {
041c3194
ZW
1492 case CPP_GREATER:
1493 if (b == a) return CPP_RSHIFT;
1494 if (b == CPP_QUERY && cxx) return CPP_MAX;
1495 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1496 break;
1497 case CPP_LESS:
1498 if (b == a) return CPP_LSHIFT;
1499 if (b == CPP_QUERY && cxx) return CPP_MIN;
1500 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
9b55f29a
NB
1501 if (CPP_OPTION (pfile, digraphs))
1502 {
1503 if (b == CPP_COLON)
1504 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1505 if (b == CPP_MOD)
1506 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1507 }
041c3194 1508 break;
c5a04734 1509
041c3194
ZW
1510 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1511 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1512 case CPP_OR: if (b == a) return CPP_OR_OR; break;
c5a04734 1513
041c3194
ZW
1514 case CPP_MINUS:
1515 if (b == a) return CPP_MINUS_MINUS;
1516 if (b == CPP_GREATER) return CPP_DEREF;
1517 break;
1518 case CPP_COLON:
1519 if (b == a && cxx) return CPP_SCOPE;
9b55f29a 1520 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
041c3194
ZW
1521 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1522 break;
1523
1524 case CPP_MOD:
9b55f29a
NB
1525 if (CPP_OPTION (pfile, digraphs))
1526 {
1527 if (b == CPP_GREATER)
1528 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1529 if (b == CPP_COLON)
1530 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1531 }
041c3194
ZW
1532 break;
1533 case CPP_DEREF:
1534 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1535 break;
1536 case CPP_DOT:
1537 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1538 if (b == CPP_NUMBER) return CPP_NUMBER;
1539 break;
1540
1541 case CPP_HASH:
1542 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1543 /* %:%: digraph */
1544 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1545 break;
1546
1547 case CPP_NAME:
1548 if (b == CPP_NAME) return CPP_NAME;
1549 if (b == CPP_NUMBER
93c80368 1550 && name_p (pfile, &token2->val.str)) return CPP_NAME;
041c3194 1551 if (b == CPP_CHAR
93c80368 1552 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
041c3194 1553 if (b == CPP_STRING
93c80368 1554 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
041c3194
ZW
1555 break;
1556
1557 case CPP_NUMBER:
1558 if (b == CPP_NUMBER) return CPP_NUMBER;
1559 if (b == CPP_NAME) return CPP_NUMBER;
1560 if (b == CPP_DOT) return CPP_NUMBER;
1561 /* Numbers cannot have length zero, so this is safe. */
1562 if ((b == CPP_PLUS || b == CPP_MINUS)
bfb9dc7f 1563 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
041c3194
ZW
1564 return CPP_NUMBER;
1565 break;
1566
ba89d661 1567 case CPP_OTHER:
6c53ebff 1568 if (CPP_OPTION (pfile, objc) && token1->val.c == '@')
ba89d661
ZW
1569 {
1570 if (b == CPP_NAME) return CPP_NAME;
1571 if (b == CPP_STRING) return CPP_OSTRING;
1572 }
1573
041c3194
ZW
1574 default:
1575 break;
c5a04734
ZW
1576 }
1577
041c3194
ZW
1578 return CPP_EOF;
1579}
1580
93c80368
NB
1581/* Returns nonzero if a space should be inserted to avoid an
1582 accidental token paste for output. For simplicity, it is
1583 conservative, and occasionally advises a space where one is not
1584 needed, e.g. "." and ".2". */
041c3194 1585
93c80368
NB
1586int
1587cpp_avoid_paste (pfile, token1, token2)
c5a04734 1588 cpp_reader *pfile;
93c80368 1589 const cpp_token *token1, *token2;
c5a04734 1590{
93c80368
NB
1591 enum cpp_ttype a = token1->type, b = token2->type;
1592 cppchar_t c;
c5a04734 1593
93c80368
NB
1594 if (token1->flags & NAMED_OP)
1595 a = CPP_NAME;
1596 if (token2->flags & NAMED_OP)
1597 b = CPP_NAME;
c5a04734 1598
93c80368
NB
1599 c = EOF;
1600 if (token2->flags & DIGRAPH)
1601 c = digraph_spellings[b - CPP_FIRST_DIGRAPH][0];
1602 else if (token_spellings[b].category == SPELL_OPERATOR)
1603 c = token_spellings[b].name[0];
c5a04734 1604
93c80368
NB
1605 /* Quickly get everything that can paste with an '='. */
1606 if (a <= CPP_LAST_EQ && c == '=')
1607 return 1;
c5a04734 1608
93c80368 1609 switch (a)
c5a04734 1610 {
93c80368
NB
1611 case CPP_GREATER: return c == '>' || c == '?';
1612 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1613 case CPP_PLUS: return c == '+';
1614 case CPP_MINUS: return c == '-' || c == '>';
1615 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1616 case CPP_MOD: return c == ':' || c == '>';
1617 case CPP_AND: return c == '&';
1618 case CPP_OR: return c == '|';
1619 case CPP_COLON: return c == ':' || c == '>';
1620 case CPP_DEREF: return c == '*';
1621 case CPP_DOT: return c == '.' || c == '%';
1622 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1623 case CPP_NAME: return ((b == CPP_NUMBER
1624 && name_p (pfile, &token2->val.str))
1625 || b == CPP_NAME
1626 || b == CPP_CHAR || b == CPP_STRING); /* L */
1627 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1628 || c == '.' || c == '+' || c == '-');
1629 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1630 && token1->val.c == '@'
93c80368
NB
1631 && (b == CPP_NAME || b == CPP_STRING));
1632 default: break;
c5a04734 1633 }
c5a04734 1634
417f3e3a 1635 return 0;
c5a04734
ZW
1636}
1637
93c80368
NB
1638/* Output all the remaining tokens on the current line, and a newline
1639 character, to FP. Leading whitespace is removed. */
c5a04734 1640void
93c80368 1641cpp_output_line (pfile, fp)
c5a04734 1642 cpp_reader *pfile;
93c80368 1643 FILE *fp;
c5a04734 1644{
93c80368 1645 cpp_token token;
96be6998 1646
93c80368
NB
1647 _cpp_get_token (pfile, &token);
1648 token.flags &= ~PREV_WHITE;
1649 while (token.type != CPP_EOF)
96be6998 1650 {
93c80368
NB
1651 cpp_output_token (&token, fp);
1652 _cpp_get_token (pfile, &token);
96be6998
ZW
1653 }
1654
93c80368 1655 putc ('\n', fp);
041c3194 1656}
c5a04734 1657
93c80368 1658/* Memory pools. */
417f3e3a 1659
93c80368 1660struct dummy
417f3e3a 1661{
93c80368
NB
1662 char c;
1663 union
1664 {
1665 double d;
1666 int *p;
1667 } u;
1668};
417f3e3a 1669
93c80368 1670#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
417f3e3a 1671
93c80368
NB
1672static int
1673chunk_suitable (pool, chunk, size)
1674 cpp_pool *pool;
1675 cpp_chunk *chunk;
1676 unsigned int size;
1677{
1678 /* Being at least twice SIZE means we can use memcpy in
1679 _cpp_next_chunk rather than memmove. Besides, it's a good idea
1680 anyway. */
1681 return (chunk && pool->locked != chunk
1682 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
041c3194 1683}
c5a04734 1684
93c80368
NB
1685/* Returns the end of the new pool. PTR points to a char in the old
1686 pool, and is updated to point to the same char in the new pool. */
1687unsigned char *
1688_cpp_next_chunk (pool, len, ptr)
1689 cpp_pool *pool;
1690 unsigned int len;
1691 unsigned char **ptr;
041c3194 1692{
93c80368 1693 cpp_chunk *chunk = pool->cur->next;
c5a04734 1694
93c80368
NB
1695 /* LEN is the minimum size we want in the new pool. */
1696 len += POOL_ROOM (pool);
1697 if (! chunk_suitable (pool, chunk, len))
041c3194 1698 {
93c80368 1699 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
c5a04734 1700
93c80368
NB
1701 chunk->next = pool->cur->next;
1702 pool->cur->next = chunk;
c5a04734
ZW
1703 }
1704
93c80368
NB
1705 /* Update the pointer before changing chunk's front. */
1706 if (ptr)
1707 *ptr += chunk->base - POOL_FRONT (pool);
041c3194 1708
93c80368
NB
1709 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1710 chunk->front = chunk->base;
041c3194 1711
93c80368
NB
1712 pool->cur = chunk;
1713 return POOL_LIMIT (pool);
c5a04734
ZW
1714}
1715
93c80368
NB
1716static cpp_chunk *
1717new_chunk (size)
1718 unsigned int size;
041c3194 1719{
93c80368
NB
1720 unsigned char *base;
1721 cpp_chunk *result;
3fef5b2b 1722
93c80368
NB
1723 size = ALIGN (size, DEFAULT_ALIGNMENT);
1724 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1725 /* Put the chunk descriptor at the end. Then chunk overruns will
1726 cause obvious chaos. */
1727 result = (cpp_chunk *) (base + size);
1728 result->base = base;
1729 result->front = base;
1730 result->limit = base + size;
1731 result->next = 0;
417f3e3a 1732
93c80368 1733 return result;
041c3194
ZW
1734}
1735
93c80368
NB
1736void
1737_cpp_init_pool (pool, size, align, temp)
1738 cpp_pool *pool;
1739 unsigned int size, align, temp;
1740{
1741 if (align == 0)
1742 align = DEFAULT_ALIGNMENT;
1743 if (align & (align - 1))
1744 abort ();
1745 pool->align = align;
1746 pool->cur = new_chunk (size);
1747 pool->locked = 0;
1748 pool->locks = 0;
1749 if (temp)
1750 pool->cur->next = pool->cur;
041c3194
ZW
1751}
1752
93c80368
NB
1753void
1754_cpp_lock_pool (pool)
1755 cpp_pool *pool;
041c3194 1756{
93c80368
NB
1757 if (pool->locks++ == 0)
1758 pool->locked = pool->cur;
041c3194
ZW
1759}
1760
93c80368
NB
1761void
1762_cpp_unlock_pool (pool)
1763 cpp_pool *pool;
041c3194 1764{
93c80368
NB
1765 if (--pool->locks == 0)
1766 pool->locked = 0;
041c3194
ZW
1767}
1768
93c80368
NB
1769void
1770_cpp_free_pool (pool)
1771 cpp_pool *pool;
3fef5b2b 1772{
93c80368 1773 cpp_chunk *chunk = pool->cur, *next;
3fef5b2b 1774
93c80368 1775 do
3fef5b2b 1776 {
93c80368
NB
1777 next = chunk->next;
1778 free (chunk->base);
1779 chunk = next;
3fef5b2b 1780 }
93c80368 1781 while (chunk && chunk != pool->cur);
041c3194 1782}
041c3194 1783
93c80368
NB
1784/* Reserve LEN bytes from a memory pool. */
1785unsigned char *
1786_cpp_pool_reserve (pool, len)
1787 cpp_pool *pool;
1788 unsigned int len;
041c3194 1789{
93c80368
NB
1790 len = ALIGN (len, pool->align);
1791 if (len > (unsigned int) POOL_ROOM (pool))
1792 _cpp_next_chunk (pool, len, 0);
041c3194 1793
93c80368 1794 return POOL_FRONT (pool);
c5a04734
ZW
1795}
1796
93c80368
NB
1797/* Allocate LEN bytes from a memory pool. */
1798unsigned char *
1799_cpp_pool_alloc (pool, len)
1800 cpp_pool *pool;
1801 unsigned int len;
041c3194 1802{
93c80368 1803 unsigned char *result = _cpp_pool_reserve (pool, len);
417f3e3a 1804
93c80368
NB
1805 POOL_COMMIT (pool, len);
1806 return result;
041c3194 1807}