]> git.ipfire.org Git - thirdparty/gcc.git/blame - libcpp/lex.c
* tree-outof-ssa.c (coalesce_abnormal_edges): Fix typo in
[thirdparty/gcc.git] / libcpp / lex.c
CommitLineData
0578f103 1/* CPP Library - lexical analysis.
3eccc2db 2 Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
0578f103 3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21
22#include "config.h"
23#include "system.h"
0578f103 24#include "cpplib.h"
d856c8a6 25#include "internal.h"
0578f103 26
79bd622b 27enum spell_type
241e762e 28{
79bd622b 29 SPELL_OPERATOR = 0,
79bd622b 30 SPELL_IDENT,
4970d4c2 31 SPELL_LITERAL,
79bd622b 32 SPELL_NONE
241e762e 33};
34
79bd622b 35struct token_spelling
241e762e 36{
79bd622b 37 enum spell_type category;
38 const unsigned char *name;
241e762e 39};
40
0ca849f9 41static const unsigned char *const digraph_spellings[] =
42{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
79bd622b 43
44#define OP(e, s) { SPELL_OPERATOR, U s },
18e43155 45#define TK(e, s) { s, U #e },
0ca849f9 46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
79bd622b 47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
e2f9a79f 52
f7fdd7a1 53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
56static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
57static void lex_number (cpp_reader *, cpp_string *);
58static bool forms_identifier_p (cpp_reader *, int);
59static void lex_string (cpp_reader *, cpp_token *, const uchar *);
60static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
61static void create_literal (cpp_reader *, cpp_token *, const uchar *,
62 unsigned int, enum cpp_ttype);
63static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
64static int name_p (cpp_reader *, const cpp_string *);
f7fdd7a1 65static tokenrun *next_tokenrun (tokenrun *);
66
f7fdd7a1 67static _cpp_buff *new_buff (size_t);
bce8e0c0 68
e920deaf 69
f80e83a9 70/* Utility routine:
2c63d6c8 71
76faa4c0 72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
f80e83a9 74int
f7fdd7a1 75cpp_ideq (const cpp_token *token, const char *string)
f80e83a9 76{
76faa4c0 77 if (token->type != CPP_NAME)
f80e83a9 78 return 0;
76faa4c0 79
b6d18b0a 80 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
bce8e0c0 81}
50fd6b48 82
a54e0bf8 83/* Record a note TYPE at byte POS into the current cleaned logical
84 line. */
1e0ef2fd 85static void
f7fdd7a1 86add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
338fa5f7 87{
a54e0bf8 88 if (buffer->notes_used == buffer->notes_cap)
89 {
90 buffer->notes_cap = buffer->notes_cap * 2 + 200;
f0af5a88 91 buffer->notes = xrealloc (buffer->notes,
92 buffer->notes_cap * sizeof (_cpp_line_note));
a54e0bf8 93 }
338fa5f7 94
a54e0bf8 95 buffer->notes[buffer->notes_used].pos = pos;
96 buffer->notes[buffer->notes_used].type = type;
97 buffer->notes_used++;
338fa5f7 98}
99
a54e0bf8 100/* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
102void
f7fdd7a1 103_cpp_clean_line (cpp_reader *pfile)
0578f103 104{
a54e0bf8 105 cpp_buffer *buffer;
106 const uchar *s;
107 uchar c, *d, *p;
1e0ef2fd 108
a54e0bf8 109 buffer = pfile->buffer;
110 buffer->cur_note = buffer->notes_used = 0;
111 buffer->cur = buffer->line_base = buffer->next_line;
112 buffer->need_line = false;
113 s = buffer->next_line - 1;
1e0ef2fd 114
a54e0bf8 115 if (!buffer->from_stage3)
0578f103 116 {
54d3be91 117 /* Short circuit for the common case of an un-escaped line with
118 no trigraphs. The primary win here is by not writing any
119 data back to memory until we have to. */
120 for (;;)
121 {
122 c = *++s;
123 if (c == '\n' || c == '\r')
124 {
125 d = (uchar *) s;
126
127 if (s == buffer->rlimit)
128 goto done;
129
130 /* DOS line ending? */
131 if (c == '\r' && s[1] == '\n')
132 s++;
133
134 if (s == buffer->rlimit)
135 goto done;
136
137 /* check for escaped newline */
138 p = d;
139 while (p != buffer->next_line && is_nvspace (p[-1]))
140 p--;
141 if (p == buffer->next_line || p[-1] != '\\')
142 goto done;
143
144 /* Have an escaped newline; process it and proceed to
145 the slow path. */
146 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
147 d = p - 2;
148 buffer->next_line = p - 1;
149 break;
150 }
151 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
152 {
153 /* Have a trigraph. We may or may not have to convert
154 it. Add a line note regardless, for -Wtrigraphs. */
155 add_line_note (buffer, s, s[2]);
156 if (CPP_OPTION (pfile, trigraphs))
157 {
158 /* We do, and that means we have to switch to the
159 slow path. */
160 d = (uchar *) s;
161 *d = _cpp_trigraph_map[s[2]];
162 s += 2;
163 break;
164 }
165 }
166 }
167
a54e0bf8 168
169 for (;;)
4b912310 170 {
a54e0bf8 171 c = *++s;
172 *++d = c;
173
174 if (c == '\n' || c == '\r')
175 {
176 /* Handle DOS line endings. */
177 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
178 s++;
179 if (s == buffer->rlimit)
180 break;
181
182 /* Escaped? */
183 p = d;
184 while (p != buffer->next_line && is_nvspace (p[-1]))
185 p--;
186 if (p == buffer->next_line || p[-1] != '\\')
187 break;
188
aad4a87f 189 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
a54e0bf8 190 d = p - 2;
191 buffer->next_line = p - 1;
192 }
193 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
194 {
195 /* Add a note regardless, for the benefit of -Wtrigraphs. */
aad4a87f 196 add_line_note (buffer, d, s[2]);
a54e0bf8 197 if (CPP_OPTION (pfile, trigraphs))
198 {
199 *d = _cpp_trigraph_map[s[2]];
200 s += 2;
201 }
202 }
4b912310 203 }
0578f103 204 }
a54e0bf8 205 else
206 {
207 do
208 s++;
209 while (*s != '\n' && *s != '\r');
210 d = (uchar *) s;
211
212 /* Handle DOS line endings. */
213 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
214 s++;
215 }
338fa5f7 216
54d3be91 217 done:
a54e0bf8 218 *d = '\n';
aad4a87f 219 /* A sentinel note that should never be processed. */
220 add_line_note (buffer, d + 1, '\n');
a54e0bf8 221 buffer->next_line = s + 1;
0578f103 222}
223
3078f2b2 224/* Return true if the trigraph indicated by NOTE should be warned
225 about in a comment. */
226static bool
f7fdd7a1 227warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
3078f2b2 228{
229 const uchar *p;
230
231 /* Within comments we don't warn about trigraphs, unless the
232 trigraph forms an escaped newline, as that may change
7ef5b942 233 behavior. */
3078f2b2 234 if (note->type != '/')
235 return false;
236
237 /* If -trigraphs, then this was an escaped newline iff the next note
238 is coincident. */
239 if (CPP_OPTION (pfile, trigraphs))
240 return note[1].pos == note->pos;
241
242 /* Otherwise, see if this forms an escaped newline. */
243 p = note->pos + 3;
244 while (is_nvspace (*p))
245 p++;
246
247 /* There might have been escaped newlines between the trigraph and the
248 newline we found. Hence the position test. */
249 return (*p == '\n' && p < note[1].pos);
250}
251
a54e0bf8 252/* Process the notes created by add_line_note as far as the current
253 location. */
254void
f7fdd7a1 255_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
0578f103 256{
c808d026 257 cpp_buffer *buffer = pfile->buffer;
258
a54e0bf8 259 for (;;)
f80e83a9 260 {
a54e0bf8 261 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
262 unsigned int col;
396ffa86 263
a54e0bf8 264 if (note->pos > buffer->cur)
265 break;
396ffa86 266
a54e0bf8 267 buffer->cur_note++;
268 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
435fb09b 269
aad4a87f 270 if (note->type == '\\' || note->type == ' ')
a54e0bf8 271 {
aad4a87f 272 if (note->type == ' ' && !in_comment)
dbddc569 273 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
a54e0bf8 274 "backslash and newline separated by space");
aad4a87f 275
a54e0bf8 276 if (buffer->next_line > buffer->rlimit)
1e0ef2fd 277 {
dbddc569 278 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
a54e0bf8 279 "backslash-newline at end of file");
280 /* Prevent "no newline at end of file" warning. */
281 buffer->next_line = buffer->rlimit;
1e0ef2fd 282 }
a54e0bf8 283
284 buffer->line_base = note->pos;
610625e3 285 CPP_INCREMENT_LINE (pfile, 0);
338fa5f7 286 }
aad4a87f 287 else if (_cpp_trigraph_map[note->type])
288 {
3078f2b2 289 if (CPP_OPTION (pfile, warn_trigraphs)
290 && (!in_comment || warn_in_comment (pfile, note)))
aad4a87f 291 {
292 if (CPP_OPTION (pfile, trigraphs))
dbddc569 293 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
aad4a87f 294 "trigraph ??%c converted to %c",
295 note->type,
296 (int) _cpp_trigraph_map[note->type]);
297 else
1542b1ef 298 {
299 cpp_error_with_line
dbddc569 300 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1542b1ef 301 "trigraph ??%c ignored, use -trigraphs to enable",
302 note->type);
303 }
aad4a87f 304 }
305 }
306 else
307 abort ();
f80e83a9 308 }
0578f103 309}
310
338fa5f7 311/* Skip a C-style block comment. We find the end of the comment by
312 seeing if an asterisk is before every '/' we encounter. Returns
edaf8cb5 313 nonzero if comment terminated by EOF, zero otherwise.
314
315 Buffer->cur points to the initial asterisk of the comment. */
a54e0bf8 316bool
f7fdd7a1 317_cpp_skip_block_comment (cpp_reader *pfile)
0578f103 318{
f80e83a9 319 cpp_buffer *buffer = pfile->buffer;
54d3be91 320 const uchar *cur = buffer->cur;
321 uchar c;
338fa5f7 322
54d3be91 323 cur++;
324 if (*cur == '/')
325 cur++;
338fa5f7 326
a54e0bf8 327 for (;;)
328 {
338fa5f7 329 /* People like decorating comments with '*', so check for '/'
330 instead for efficiency. */
54d3be91 331 c = *cur++;
332
f80e83a9 333 if (c == '/')
0578f103 334 {
54d3be91 335 if (cur[-2] == '*')
338fa5f7 336 break;
f80e83a9 337
338fa5f7 338 /* Warn about potential nested comments, but not if the '/'
3fb1e43b 339 comes immediately before the true comment delimiter.
f80e83a9 340 Don't bother to get it right across escaped newlines. */
338fa5f7 341 if (CPP_OPTION (pfile, warn_comments)
54d3be91 342 && cur[0] == '*' && cur[1] != '/')
343 {
344 buffer->cur = cur;
d80d2074 345 cpp_error_with_line (pfile, CPP_DL_WARNING,
dbddc569 346 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
54d3be91 347 "\"/*\" within comment");
348 }
0578f103 349 }
a54e0bf8 350 else if (c == '\n')
351 {
610625e3 352 unsigned int cols;
54d3be91 353 buffer->cur = cur - 1;
a54e0bf8 354 _cpp_process_line_notes (pfile, true);
355 if (buffer->next_line >= buffer->rlimit)
356 return true;
357 _cpp_clean_line (pfile);
610625e3 358
359 cols = buffer->next_line - buffer->line_base;
360 CPP_INCREMENT_LINE (pfile, cols);
361
54d3be91 362 cur = buffer->cur;
a54e0bf8 363 }
0578f103 364 }
f80e83a9 365
54d3be91 366 buffer->cur = cur;
3078f2b2 367 _cpp_process_line_notes (pfile, true);
a54e0bf8 368 return false;
0578f103 369}
370
1c124f85 371/* Skip a C++ line comment, leaving buffer->cur pointing to the
d10cfa8d 372 terminating newline. Handles escaped newlines. Returns nonzero
1c124f85 373 if a multiline comment. */
f80e83a9 374static int
f7fdd7a1 375skip_line_comment (cpp_reader *pfile)
0578f103 376{
f669338a 377 cpp_buffer *buffer = pfile->buffer;
dbddc569 378 unsigned int orig_line = pfile->line_table->highest_line;
f80e83a9 379
a54e0bf8 380 while (*buffer->cur != '\n')
381 buffer->cur++;
1c124f85 382
a54e0bf8 383 _cpp_process_line_notes (pfile, true);
dbddc569 384 return orig_line != pfile->line_table->highest_line;
f80e83a9 385}
0578f103 386
a54e0bf8 387/* Skips whitespace, saving the next non-whitespace character. */
b86584f6 388static void
f7fdd7a1 389skip_whitespace (cpp_reader *pfile, cppchar_t c)
f80e83a9 390{
391 cpp_buffer *buffer = pfile->buffer;
fe9eb18b 392 bool saw_NUL = false;
0578f103 393
338fa5f7 394 do
f80e83a9 395 {
78719282 396 /* Horizontal space always OK. */
a54e0bf8 397 if (c == ' ' || c == '\t')
338fa5f7 398 ;
338fa5f7 399 /* Just \f \v or \0 left. */
78719282 400 else if (c == '\0')
fe9eb18b 401 saw_NUL = true;
79bd622b 402 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
dbddc569 403 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
73328dce 404 CPP_BUF_COL (buffer),
405 "%s in preprocessing directive",
406 c == '\f' ? "form feed" : "vertical tab");
338fa5f7 407
338fa5f7 408 c = *buffer->cur++;
0578f103 409 }
2c0e001b 410 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
338fa5f7 411 while (is_nvspace (c));
412
fe9eb18b 413 if (saw_NUL)
d80d2074 414 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
fe9eb18b 415
1c124f85 416 buffer->cur--;
f80e83a9 417}
0578f103 418
79bd622b 419/* See if the characters of a number token are valid in a name (no
420 '.', '+' or '-'). */
421static int
f7fdd7a1 422name_p (cpp_reader *pfile, const cpp_string *string)
79bd622b 423{
424 unsigned int i;
425
426 for (i = 0; i < string->len; i++)
427 if (!is_idchar (string->text[i]))
428 return 0;
429
b1a9ff83 430 return 1;
79bd622b 431}
432
5bb46c08 433/* Returns TRUE if the sequence starting at buffer->cur is invalid in
2cbf1359 434 an identifier. FIRST is TRUE if this starts an identifier. */
5bb46c08 435static bool
f7fdd7a1 436forms_identifier_p (cpp_reader *pfile, int first)
5bb46c08 437{
2cbf1359 438 cpp_buffer *buffer = pfile->buffer;
439
440 if (*buffer->cur == '$')
441 {
442 if (!CPP_OPTION (pfile, dollars_in_ident))
443 return false;
444
445 buffer->cur++;
f0c2775b 446 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
2cbf1359 447 {
f0c2775b 448 CPP_OPTION (pfile, warn_dollars) = 0;
d80d2074 449 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
2cbf1359 450 }
451
452 return true;
453 }
5bb46c08 454
2cbf1359 455 /* Is this a syntactically valid UCN? */
456 if (0 && *buffer->cur == '\\'
457 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
5bb46c08 458 {
2cbf1359 459 buffer->cur += 2;
ebc03810 460 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
2cbf1359 461 return true;
462 buffer->cur -= 2;
5bb46c08 463 }
5bb46c08 464
2cbf1359 465 return false;
5bb46c08 466}
467
468/* Lex an identifier starting at BUFFER->CUR - 1. */
338fa5f7 469static cpp_hashnode *
f7fdd7a1 470lex_identifier (cpp_reader *pfile, const uchar *base)
0578f103 471{
79bd622b 472 cpp_hashnode *result;
3eb3f293 473 const uchar *cur, *limit;
474 unsigned int len;
475 unsigned int hash = HT_HASHSTEP (0, *base);
66a5287e 476
3eb3f293 477 cur = pfile->buffer->cur;
478 for (;;)
78a11351 479 {
5bb46c08 480 /* N.B. ISIDNUM does not include $. */
481 while (ISIDNUM (*cur))
3eb3f293 482 {
483 hash = HT_HASHSTEP (hash, *cur);
484 cur++;
485 }
78a11351 486
78a11351 487 pfile->buffer->cur = cur;
3eb3f293 488 if (!forms_identifier_p (pfile, false))
489 break;
490
491 limit = pfile->buffer->cur;
492 while (cur < limit)
493 {
494 hash = HT_HASHSTEP (hash, *cur);
495 cur++;
496 }
66a5287e 497 }
3eb3f293 498 len = cur - base;
499 hash = HT_HASHFINISH (hash, len);
5bb46c08 500
501 result = (cpp_hashnode *)
3eb3f293 502 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
66a5287e 503
5bb46c08 504 /* Rarely, identifiers require diagnostics when lexed. */
66a5287e 505 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
506 && !pfile->state.skipping, 0))
507 {
508 /* It is allowed to poison the same identifier twice. */
509 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
d80d2074 510 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
66a5287e 511 NODE_NAME (result));
512
513 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
514 replacement list of a variadic macro. */
515 if (result == pfile->spec_nodes.n__VA_ARGS__
516 && !pfile->state.va_args_ok)
d80d2074 517 cpp_error (pfile, CPP_DL_PEDWARN,
f7fdd7a1 518 "__VA_ARGS__ can only appear in the expansion"
519 " of a C99 variadic macro");
66a5287e 520 }
521
522 return result;
523}
524
5bb46c08 525/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
0578f103 526static void
f7fdd7a1 527lex_number (cpp_reader *pfile, cpp_string *number)
0578f103 528{
b6d18b0a 529 const uchar *cur;
5bb46c08 530 const uchar *base;
531 uchar *dest;
0578f103 532
5bb46c08 533 base = pfile->buffer->cur - 1;
534 do
f80e83a9 535 {
5bb46c08 536 cur = pfile->buffer->cur;
338fa5f7 537
5bb46c08 538 /* N.B. ISIDNUM does not include $. */
539 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
540 cur++;
0578f103 541
78a11351 542 pfile->buffer->cur = cur;
0578f103 543 }
2cbf1359 544 while (forms_identifier_p (pfile, false));
79bd622b 545
5bb46c08 546 number->len = cur - base;
547 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
548 memcpy (dest, base, number->len);
549 dest[number->len] = '\0';
550 number->text = dest;
79bd622b 551}
552
4970d4c2 553/* Create a token of type TYPE with a literal spelling. */
554static void
f7fdd7a1 555create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
556 unsigned int len, enum cpp_ttype type)
4970d4c2 557{
558 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
559
560 memcpy (dest, base, len);
561 dest[len] = '\0';
562 token->type = type;
563 token->val.str.len = len;
564 token->val.str.text = dest;
565}
566
5bb46c08 567/* Lexes a string, character constant, or angle-bracketed header file
4970d4c2 568 name. The stored string contains the spelling, including opening
569 quote and leading any leading 'L'. It returns the type of the
570 literal, or CPP_OTHER if it was not properly terminated.
571
572 The spelling is NUL-terminated, but it is not guaranteed that this
573 is the first NUL since embedded NULs are preserved. */
f80e83a9 574static void
f7fdd7a1 575lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
0578f103 576{
4970d4c2 577 bool saw_NUL = false;
578 const uchar *cur;
5bb46c08 579 cppchar_t terminator;
4970d4c2 580 enum cpp_ttype type;
581
582 cur = base;
583 terminator = *cur++;
584 if (terminator == 'L')
585 terminator = *cur++;
586 if (terminator == '\"')
587 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
588 else if (terminator == '\'')
589 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
590 else
591 terminator = '>', type = CPP_HEADER_NAME;
79bd622b 592
338fa5f7 593 for (;;)
0578f103 594 {
4970d4c2 595 cppchar_t c = *cur++;
4b0c16ee 596
edaf8cb5 597 /* In #include-style directives, terminators are not escapable. */
4970d4c2 598 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
599 cur++;
600 else if (c == terminator)
5bb46c08 601 break;
4970d4c2 602 else if (c == '\n')
338fa5f7 603 {
4970d4c2 604 cur--;
605 type = CPP_OTHER;
606 break;
0578f103 607 }
4970d4c2 608 else if (c == '\0')
609 saw_NUL = true;
0578f103 610 }
611
4970d4c2 612 if (saw_NUL && !pfile->state.skipping)
d80d2074 613 cpp_error (pfile, CPP_DL_WARNING,
614 "null character(s) preserved in literal");
0578f103 615
4970d4c2 616 pfile->buffer->cur = cur;
617 create_literal (pfile, token, base, cur - base, type);
338fa5f7 618}
f80e83a9 619
79bd622b 620/* The stored comment includes the comment start and any terminator. */
2c63d6c8 621static void
f7fdd7a1 622save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
623 cppchar_t type)
2c63d6c8 624{
f80e83a9 625 unsigned char *buffer;
d3f7919d 626 unsigned int len, clen;
b1a9ff83 627
f0495c2c 628 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1c124f85 629
a543b315 630 /* C++ comments probably (not definitely) have moved past a new
631 line, which we don't want to save in the comment. */
1c124f85 632 if (is_vspace (pfile->buffer->cur[-1]))
a543b315 633 len--;
d3f7919d 634
635 /* If we are currently in a directive, then we need to store all
636 C++ comments as C comments internally, and so we need to
637 allocate a little extra space in that case.
638
639 Note that the only time we encounter a directive here is
640 when we are saving comments in a "#define". */
641 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
642
643 buffer = _cpp_unaligned_alloc (pfile, clen);
b1a9ff83 644
f80e83a9 645 token->type = CPP_COMMENT;
d3f7919d 646 token->val.str.len = clen;
338fa5f7 647 token->val.str.text = buffer;
0578f103 648
f0495c2c 649 buffer[0] = '/';
650 memcpy (buffer + 1, from, len - 1);
d3f7919d 651
a113df96 652 /* Finish conversion to a C comment, if necessary. */
d3f7919d 653 if (pfile->state.in_directive && type == '/')
654 {
655 buffer[1] = '*';
656 buffer[clen - 2] = '*';
657 buffer[clen - 1] = '/';
658 }
338fa5f7 659}
0578f103 660
83dcbb5c 661/* Allocate COUNT tokens for RUN. */
662void
f7fdd7a1 663_cpp_init_tokenrun (tokenrun *run, unsigned int count)
83dcbb5c 664{
3b298764 665 run->base = XNEWVEC (cpp_token, count);
83dcbb5c 666 run->limit = run->base + count;
667 run->next = NULL;
668}
669
670/* Returns the next tokenrun, or creates one if there is none. */
671static tokenrun *
f7fdd7a1 672next_tokenrun (tokenrun *run)
83dcbb5c 673{
674 if (run->next == NULL)
675 {
3b298764 676 run->next = XNEW (tokenrun);
fb5ab82c 677 run->next->prev = run;
83dcbb5c 678 _cpp_init_tokenrun (run->next, 250);
679 }
680
681 return run->next;
682}
683
f9b5f742 684/* Allocate a single token that is invalidated at the same time as the
685 rest of the tokens on the line. Has its line and col set to the
686 same as the last lexed token, so that diagnostics appear in the
687 right place. */
688cpp_token *
f7fdd7a1 689_cpp_temp_token (cpp_reader *pfile)
f9b5f742 690{
691 cpp_token *old, *result;
692
693 old = pfile->cur_token - 1;
694 if (pfile->cur_token == pfile->cur_run->limit)
695 {
696 pfile->cur_run = next_tokenrun (pfile->cur_run);
697 pfile->cur_token = pfile->cur_run->base;
698 }
699
700 result = pfile->cur_token++;
610625e3 701 result->src_loc = old->src_loc;
f9b5f742 702 return result;
703}
704
10b4496a 705/* Lex a token into RESULT (external interface). Takes care of issues
706 like directive handling, token lookahead, multiple include
3fb1e43b 707 optimization and skipping. */
c00e481c 708const cpp_token *
f7fdd7a1 709_cpp_lex_token (cpp_reader *pfile)
83dcbb5c 710{
fb5ab82c 711 cpp_token *result;
83dcbb5c 712
fb5ab82c 713 for (;;)
83dcbb5c 714 {
fb5ab82c 715 if (pfile->cur_token == pfile->cur_run->limit)
83dcbb5c 716 {
fb5ab82c 717 pfile->cur_run = next_tokenrun (pfile->cur_run);
718 pfile->cur_token = pfile->cur_run->base;
83dcbb5c 719 }
720
fb5ab82c 721 if (pfile->lookaheads)
10b4496a 722 {
723 pfile->lookaheads--;
724 result = pfile->cur_token++;
725 }
fb5ab82c 726 else
10b4496a 727 result = _cpp_lex_direct (pfile);
fb5ab82c 728
729 if (result->flags & BOL)
83dcbb5c 730 {
fb5ab82c 731 /* Is this a directive. If _cpp_handle_directive returns
732 false, it is an assembler #. */
733 if (result->type == CPP_HASH
d6af0368 734 /* 6.10.3 p 11: Directives in a list of macro arguments
735 gives undefined behavior. This implementation
736 handles the directive as normal. */
737 && pfile->state.parsing_args != 1
fb5ab82c 738 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
739 continue;
5621a364 740 if (pfile->cb.line_change && !pfile->state.skipping)
f7fdd7a1 741 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
83dcbb5c 742 }
83dcbb5c 743
fb5ab82c 744 /* We don't skip tokens in directives. */
745 if (pfile->state.in_directive)
746 break;
83dcbb5c 747
fb5ab82c 748 /* Outside a directive, invalidate controlling macros. At file
10b4496a 749 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
7ef5b942 750 get here and MI optimization works. */
83dcbb5c 751 pfile->mi_valid = false;
fb5ab82c 752
753 if (!pfile->state.skipping || result->type == CPP_EOF)
754 break;
83dcbb5c 755 }
756
c00e481c 757 return result;
83dcbb5c 758}
759
a54e0bf8 760/* Returns true if a fresh line has been loaded. */
761bool
f7fdd7a1 762_cpp_get_fresh_line (cpp_reader *pfile)
0bb65704 763{
6e04daf1 764 int return_at_eof;
765
a54e0bf8 766 /* We can't get a new line until we leave the current directive. */
767 if (pfile->state.in_directive)
768 return false;
b1a9ff83 769
a54e0bf8 770 for (;;)
fb83e0d6 771 {
a54e0bf8 772 cpp_buffer *buffer = pfile->buffer;
fb83e0d6 773
a54e0bf8 774 if (!buffer->need_line)
775 return true;
776
777 if (buffer->next_line < buffer->rlimit)
0bb65704 778 {
a54e0bf8 779 _cpp_clean_line (pfile);
780 return true;
781 }
0bb65704 782
a54e0bf8 783 /* First, get out of parsing arguments state. */
784 if (pfile->state.parsing_args)
785 return false;
786
787 /* End of buffer. Non-empty files should end in a newline. */
788 if (buffer->buf != buffer->rlimit
789 && buffer->next_line > buffer->rlimit
790 && !buffer->from_stage3)
791 {
792 /* Only warn once. */
793 buffer->next_line = buffer->rlimit;
dbddc569 794 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
a54e0bf8 795 CPP_BUF_COLUMN (buffer, buffer->cur),
796 "no newline at end of file");
797 }
6e04daf1 798
799 return_at_eof = buffer->return_at_eof;
a54e0bf8 800 _cpp_pop_buffer (pfile);
6e04daf1 801 if (pfile->buffer == NULL || return_at_eof)
11b5269c 802 return false;
a54e0bf8 803 }
0bb65704 804}
805
edaf8cb5 806#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
807 do \
808 { \
809 result->type = ELSE_TYPE; \
810 if (*buffer->cur == CHAR) \
811 buffer->cur++, result->type = THEN_TYPE; \
812 } \
813 while (0)
1c124f85 814
10b4496a 815/* Lex a token into pfile->cur_token, which is also incremented, to
816 get diagnostics pointing to the correct location.
817
818 Does not handle issues such as token lookahead, multiple-include
4172d65e 819 optimization, directives, skipping etc. This function is only
10b4496a 820 suitable for use by _cpp_lex_token, and in special cases like
821 lex_expansion_token which doesn't care for any of these issues.
822
823 When meeting a newline, returns CPP_EOF if parsing a directive,
824 otherwise returns to the start of the token buffer if permissible.
825 Returns the location of the lexed token. */
826cpp_token *
f7fdd7a1 827_cpp_lex_direct (cpp_reader *pfile)
0578f103 828{
338fa5f7 829 cppchar_t c;
230f0943 830 cpp_buffer *buffer;
338fa5f7 831 const unsigned char *comment_start;
10b4496a 832 cpp_token *result = pfile->cur_token++;
0653b94e 833
83dcbb5c 834 fresh_line:
a54e0bf8 835 result->flags = 0;
82166c5c 836 buffer = pfile->buffer;
11b5269c 837 if (buffer->need_line)
a54e0bf8 838 {
839 if (!_cpp_get_fresh_line (pfile))
840 {
841 result->type = CPP_EOF;
2908f819 842 if (!pfile->state.in_directive)
843 {
844 /* Tell the compiler the line number of the EOF token. */
dbddc569 845 result->src_loc = pfile->line_table->highest_line;
2908f819 846 result->flags = BOL;
847 }
a54e0bf8 848 return result;
849 }
850 if (!pfile->keep_tokens)
851 {
852 pfile->cur_run = &pfile->base_run;
853 result = pfile->base_run.base;
854 pfile->cur_token = result + 1;
855 }
856 result->flags = BOL;
857 if (pfile->state.parsing_args == 2)
858 result->flags |= PREV_WHITE;
859 }
11b5269c 860 buffer = pfile->buffer;
83dcbb5c 861 update_tokens_line:
dbddc569 862 result->src_loc = pfile->line_table->highest_line;
f80e83a9 863
83dcbb5c 864 skipped_white:
a54e0bf8 865 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
866 && !pfile->overlaid_buffer)
867 {
868 _cpp_process_line_notes (pfile, false);
dbddc569 869 result->src_loc = pfile->line_table->highest_line;
a54e0bf8 870 }
1c124f85 871 c = *buffer->cur++;
610625e3 872
dbddc569 873 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
874 CPP_BUF_COLUMN (buffer, buffer->cur));
83dcbb5c 875
338fa5f7 876 switch (c)
0578f103 877 {
435fb09b 878 case ' ': case '\t': case '\f': case '\v': case '\0':
879 result->flags |= PREV_WHITE;
a54e0bf8 880 skip_whitespace (pfile, c);
881 goto skipped_white;
338fa5f7 882
a54e0bf8 883 case '\n':
610625e3 884 if (buffer->cur < buffer->rlimit)
885 CPP_INCREMENT_LINE (pfile, 0);
a54e0bf8 886 buffer->need_line = true;
887 goto fresh_line;
732cb4c9 888
338fa5f7 889 case '0': case '1': case '2': case '3': case '4':
890 case '5': case '6': case '7': case '8': case '9':
891 result->type = CPP_NUMBER;
5bb46c08 892 lex_number (pfile, &result->val.str);
338fa5f7 893 break;
732cb4c9 894
78c551ad 895 case 'L':
896 /* 'L' may introduce wide characters or strings. */
5bb46c08 897 if (*buffer->cur == '\'' || *buffer->cur == '"')
898 {
4970d4c2 899 lex_string (pfile, result, buffer->cur - 1);
5bb46c08 900 break;
901 }
b1a9ff83 902 /* Fall through. */
78c551ad 903
338fa5f7 904 case '_':
905 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
906 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
907 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
908 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
909 case 'y': case 'z':
910 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
78c551ad 911 case 'G': case 'H': case 'I': case 'J': case 'K':
338fa5f7 912 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
913 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
914 case 'Y': case 'Z':
915 result->type = CPP_NAME;
2cbf1359 916 result->val.node = lex_identifier (pfile, buffer->cur - 1);
338fa5f7 917
338fa5f7 918 /* Convert named operators to their proper types. */
78c551ad 919 if (result->val.node->flags & NODE_OPERATOR)
338fa5f7 920 {
921 result->flags |= NAMED_OP;
805e22b2 922 result->type = result->val.node->directive_index;
338fa5f7 923 }
924 break;
925
926 case '\'':
927 case '"':
4970d4c2 928 lex_string (pfile, result, buffer->cur - 1);
338fa5f7 929 break;
f80e83a9 930
338fa5f7 931 case '/':
f0495c2c 932 /* A potential block or line comment. */
933 comment_start = buffer->cur;
edaf8cb5 934 c = *buffer->cur;
935
f0495c2c 936 if (c == '*')
937 {
a54e0bf8 938 if (_cpp_skip_block_comment (pfile))
d80d2074 939 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
338fa5f7 940 }
1c124f85 941 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
610625e3 942 || cpp_in_system_header (pfile)))
338fa5f7 943 {
5db5d057 944 /* Warn about comments only if pedantically GNUC89, and not
945 in system headers. */
946 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
66914e49 947 && ! buffer->warned_cplusplus_comments)
f80e83a9 948 {
d80d2074 949 cpp_error (pfile, CPP_DL_PEDWARN,
ba059ac0 950 "C++ style comments are not allowed in ISO C90");
d80d2074 951 cpp_error (pfile, CPP_DL_PEDWARN,
73328dce 952 "(this will be reported only once per input file)");
f0495c2c 953 buffer->warned_cplusplus_comments = 1;
954 }
338fa5f7 955
e1caf668 956 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
d80d2074 957 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
f0495c2c 958 }
1c124f85 959 else if (c == '=')
960 {
edaf8cb5 961 buffer->cur++;
1c124f85 962 result->type = CPP_DIV_EQ;
963 break;
964 }
965 else
966 {
1c124f85 967 result->type = CPP_DIV;
968 break;
969 }
338fa5f7 970
f0495c2c 971 if (!pfile->state.save_comments)
972 {
973 result->flags |= PREV_WHITE;
83dcbb5c 974 goto update_tokens_line;
338fa5f7 975 }
f0495c2c 976
977 /* Save the comment as a token in its own right. */
d3f7919d 978 save_comment (pfile, result, comment_start, c);
fb5ab82c 979 break;
338fa5f7 980
981 case '<':
982 if (pfile->state.angled_headers)
983 {
4970d4c2 984 lex_string (pfile, result, buffer->cur - 1);
1c124f85 985 break;
338fa5f7 986 }
0578f103 987
edaf8cb5 988 result->type = CPP_LESS;
989 if (*buffer->cur == '=')
990 buffer->cur++, result->type = CPP_LESS_EQ;
991 else if (*buffer->cur == '<')
338fa5f7 992 {
edaf8cb5 993 buffer->cur++;
994 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
338fa5f7 995 }
edaf8cb5 996 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
338fa5f7 997 {
edaf8cb5 998 buffer->cur++;
999 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
338fa5f7 1000 }
edaf8cb5 1001 else if (CPP_OPTION (pfile, digraphs))
1c124f85 1002 {
edaf8cb5 1003 if (*buffer->cur == ':')
1004 {
1005 buffer->cur++;
1006 result->flags |= DIGRAPH;
1007 result->type = CPP_OPEN_SQUARE;
1008 }
1009 else if (*buffer->cur == '%')
1010 {
1011 buffer->cur++;
1012 result->flags |= DIGRAPH;
1013 result->type = CPP_OPEN_BRACE;
1014 }
1c124f85 1015 }
338fa5f7 1016 break;
1017
1018 case '>':
edaf8cb5 1019 result->type = CPP_GREATER;
1020 if (*buffer->cur == '=')
1021 buffer->cur++, result->type = CPP_GREATER_EQ;
1022 else if (*buffer->cur == '>')
338fa5f7 1023 {
edaf8cb5 1024 buffer->cur++;
1025 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1026 }
1027 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1028 {
1029 buffer->cur++;
1030 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
338fa5f7 1031 }
1032 break;
1033
f669338a 1034 case '%':
edaf8cb5 1035 result->type = CPP_MOD;
1036 if (*buffer->cur == '=')
1037 buffer->cur++, result->type = CPP_MOD_EQ;
1038 else if (CPP_OPTION (pfile, digraphs))
1c124f85 1039 {
edaf8cb5 1040 if (*buffer->cur == ':')
1c124f85 1041 {
edaf8cb5 1042 buffer->cur++;
1043 result->flags |= DIGRAPH;
1044 result->type = CPP_HASH;
1045 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1046 buffer->cur += 2, result->type = CPP_PASTE;
1047 }
1048 else if (*buffer->cur == '>')
1049 {
1050 buffer->cur++;
1051 result->flags |= DIGRAPH;
1052 result->type = CPP_CLOSE_BRACE;
1c124f85 1053 }
1c124f85 1054 }
338fa5f7 1055 break;
1056
f669338a 1057 case '.':
1c124f85 1058 result->type = CPP_DOT;
edaf8cb5 1059 if (ISDIGIT (*buffer->cur))
1c124f85 1060 {
1061 result->type = CPP_NUMBER;
5bb46c08 1062 lex_number (pfile, &result->val.str);
1c124f85 1063 }
edaf8cb5 1064 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1065 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1066 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1067 buffer->cur++, result->type = CPP_DOT_STAR;
338fa5f7 1068 break;
0578f103 1069
338fa5f7 1070 case '+':
edaf8cb5 1071 result->type = CPP_PLUS;
1072 if (*buffer->cur == '+')
1073 buffer->cur++, result->type = CPP_PLUS_PLUS;
1074 else if (*buffer->cur == '=')
1075 buffer->cur++, result->type = CPP_PLUS_EQ;
338fa5f7 1076 break;
ac0749c7 1077
338fa5f7 1078 case '-':
edaf8cb5 1079 result->type = CPP_MINUS;
1080 if (*buffer->cur == '>')
338fa5f7 1081 {
edaf8cb5 1082 buffer->cur++;
1c124f85 1083 result->type = CPP_DEREF;
edaf8cb5 1084 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1085 buffer->cur++, result->type = CPP_DEREF_STAR;
1c124f85 1086 }
edaf8cb5 1087 else if (*buffer->cur == '-')
1088 buffer->cur++, result->type = CPP_MINUS_MINUS;
1089 else if (*buffer->cur == '=')
1090 buffer->cur++, result->type = CPP_MINUS_EQ;
338fa5f7 1091 break;
0578f103 1092
338fa5f7 1093 case '&':
edaf8cb5 1094 result->type = CPP_AND;
1095 if (*buffer->cur == '&')
1096 buffer->cur++, result->type = CPP_AND_AND;
1097 else if (*buffer->cur == '=')
1098 buffer->cur++, result->type = CPP_AND_EQ;
338fa5f7 1099 break;
b1a9ff83 1100
338fa5f7 1101 case '|':
edaf8cb5 1102 result->type = CPP_OR;
1103 if (*buffer->cur == '|')
1104 buffer->cur++, result->type = CPP_OR_OR;
1105 else if (*buffer->cur == '=')
1106 buffer->cur++, result->type = CPP_OR_EQ;
338fa5f7 1107 break;
0578f103 1108
338fa5f7 1109 case ':':
edaf8cb5 1110 result->type = CPP_COLON;
1111 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1112 buffer->cur++, result->type = CPP_SCOPE;
1113 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
338fa5f7 1114 {
edaf8cb5 1115 buffer->cur++;
338fa5f7 1116 result->flags |= DIGRAPH;
1c124f85 1117 result->type = CPP_CLOSE_SQUARE;
1118 }
338fa5f7 1119 break;
0578f103 1120
1c124f85 1121 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1122 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1123 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1124 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1125 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1126
a54e0bf8 1127 case '?': result->type = CPP_QUERY; break;
338fa5f7 1128 case '~': result->type = CPP_COMPL; break;
1129 case ',': result->type = CPP_COMMA; break;
1130 case '(': result->type = CPP_OPEN_PAREN; break;
1131 case ')': result->type = CPP_CLOSE_PAREN; break;
1132 case '[': result->type = CPP_OPEN_SQUARE; break;
1133 case ']': result->type = CPP_CLOSE_SQUARE; break;
1134 case '{': result->type = CPP_OPEN_BRACE; break;
1135 case '}': result->type = CPP_CLOSE_BRACE; break;
1136 case ';': result->type = CPP_SEMICOLON; break;
1137
7fd957fe 1138 /* @ is a punctuator in Objective-C. */
9ee99ac6 1139 case '@': result->type = CPP_ATSIGN; break;
338fa5f7 1140
78c551ad 1141 case '$':
2cbf1359 1142 case '\\':
1143 {
1144 const uchar *base = --buffer->cur;
78c551ad 1145
2cbf1359 1146 if (forms_identifier_p (pfile, true))
1147 {
1148 result->type = CPP_NAME;
1149 result->val.node = lex_identifier (pfile, base);
1150 break;
1151 }
1152 buffer->cur++;
bc205914 1153 }
2cbf1359 1154
bc205914 1155 default:
4970d4c2 1156 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1157 break;
338fa5f7 1158 }
fb5ab82c 1159
1160 return result;
338fa5f7 1161}
1162
b1280514 1163/* An upper bound on the number of bytes needed to spell TOKEN.
1164 Does not include preceding whitespace. */
79bd622b 1165unsigned int
f7fdd7a1 1166cpp_token_len (const cpp_token *token)
338fa5f7 1167{
79bd622b 1168 unsigned int len;
cfad5579 1169
79bd622b 1170 switch (TOKEN_SPELL (token))
f80e83a9 1171 {
b1280514 1172 default: len = 4; break;
4970d4c2 1173 case SPELL_LITERAL: len = token->val.str.len; break;
c86dbc5b 1174 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
f80e83a9 1175 }
b1280514 1176
1177 return len;
cfad5579 1178}
1179
f80e83a9 1180/* Write the spelling of a token TOKEN to BUFFER. The buffer must
c5ea33a8 1181 already contain the enough space to hold the token's spelling.
f7fdd7a1 1182 Returns a pointer to the character after the last character written.
1183 FIXME: Would be nice if we didn't need the PFILE argument. */
79bd622b 1184unsigned char *
f7fdd7a1 1185cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1186 unsigned char *buffer)
f80e83a9 1187{
7e842f95 1188 switch (TOKEN_SPELL (token))
f80e83a9 1189 {
1190 case SPELL_OPERATOR:
1191 {
1192 const unsigned char *spelling;
1193 unsigned char c;
ab12a39c 1194
f80e83a9 1195 if (token->flags & DIGRAPH)
ee6c4e4b 1196 spelling
1197 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
31674461 1198 else if (token->flags & NAMED_OP)
1199 goto spell_ident;
f80e83a9 1200 else
7e842f95 1201 spelling = TOKEN_NAME (token);
b1a9ff83 1202
f80e83a9 1203 while ((c = *spelling++) != '\0')
1204 *buffer++ = c;
1205 }
1206 break;
ab12a39c 1207
8d27e472 1208 spell_ident:
f80e83a9 1209 case SPELL_IDENT:
c86dbc5b 1210 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1211 buffer += NODE_LEN (token->val.node);
f80e83a9 1212 break;
ab12a39c 1213
4970d4c2 1214 case SPELL_LITERAL:
8d27e472 1215 memcpy (buffer, token->val.str.text, token->val.str.len);
1216 buffer += token->val.str.len;
1217 break;
1218
f80e83a9 1219 case SPELL_NONE:
d80d2074 1220 cpp_error (pfile, CPP_DL_ICE,
1221 "unspellable token %s", TOKEN_NAME (token));
f80e83a9 1222 break;
1223 }
ab12a39c 1224
f80e83a9 1225 return buffer;
1226}
ab12a39c 1227
e484a1cc 1228/* Returns TOKEN spelt as a null-terminated string. The string is
1229 freed when the reader is destroyed. Useful for diagnostics. */
79bd622b 1230unsigned char *
f7fdd7a1 1231cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
b1280514 1232{
1233 unsigned int len = cpp_token_len (token) + 1;
1fdf6039 1234 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
6060326b 1235
79bd622b 1236 end = cpp_spell_token (pfile, token, start);
1237 end[0] = '\0';
6060326b 1238
79bd622b 1239 return start;
1240}
6060326b 1241
e484a1cc 1242/* Used by C front ends, which really should move to using
1243 cpp_token_as_text. */
79bd622b 1244const char *
f7fdd7a1 1245cpp_type2name (enum cpp_ttype type)
79bd622b 1246{
1247 return (const char *) token_spellings[type].name;
1248}
6060326b 1249
f9b5f742 1250/* Writes the spelling of token to FP, without any preceding space.
1251 Separated from cpp_spell_token for efficiency - to avoid stdio
1252 double-buffering. */
79bd622b 1253void
f7fdd7a1 1254cpp_output_token (const cpp_token *token, FILE *fp)
79bd622b 1255{
79bd622b 1256 switch (TOKEN_SPELL (token))
6060326b 1257 {
79bd622b 1258 case SPELL_OPERATOR:
1259 {
1260 const unsigned char *spelling;
28874558 1261 int c;
6060326b 1262
79bd622b 1263 if (token->flags & DIGRAPH)
ee6c4e4b 1264 spelling
1265 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
79bd622b 1266 else if (token->flags & NAMED_OP)
1267 goto spell_ident;
1268 else
1269 spelling = TOKEN_NAME (token);
f80e83a9 1270
28874558 1271 c = *spelling;
1272 do
1273 putc (c, fp);
1274 while ((c = *++spelling) != '\0');
79bd622b 1275 }
1276 break;
f80e83a9 1277
79bd622b 1278 spell_ident:
1279 case SPELL_IDENT:
28874558 1280 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
79bd622b 1281 break;
f80e83a9 1282
4970d4c2 1283 case SPELL_LITERAL:
8d27e472 1284 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1285 break;
1286
79bd622b 1287 case SPELL_NONE:
1288 /* An error, most probably. */
1289 break;
f80e83a9 1290 }
6060326b 1291}
1292
79bd622b 1293/* Compare two tokens. */
1294int
f7fdd7a1 1295_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
6060326b 1296{
79bd622b 1297 if (a->type == b->type && a->flags == b->flags)
1298 switch (TOKEN_SPELL (a))
1299 {
1300 default: /* Keep compiler happy. */
1301 case SPELL_OPERATOR:
1302 return 1;
79bd622b 1303 case SPELL_NONE:
588d632b 1304 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
79bd622b 1305 case SPELL_IDENT:
1306 return a->val.node == b->val.node;
4970d4c2 1307 case SPELL_LITERAL:
79bd622b 1308 return (a->val.str.len == b->val.str.len
1309 && !memcmp (a->val.str.text, b->val.str.text,
1310 a->val.str.len));
1311 }
6060326b 1312
f80e83a9 1313 return 0;
1314}
1315
79bd622b 1316/* Returns nonzero if a space should be inserted to avoid an
1317 accidental token paste for output. For simplicity, it is
1318 conservative, and occasionally advises a space where one is not
1319 needed, e.g. "." and ".2". */
79bd622b 1320int
f7fdd7a1 1321cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1322 const cpp_token *token2)
6060326b 1323{
79bd622b 1324 enum cpp_ttype a = token1->type, b = token2->type;
1325 cppchar_t c;
6060326b 1326
79bd622b 1327 if (token1->flags & NAMED_OP)
1328 a = CPP_NAME;
1329 if (token2->flags & NAMED_OP)
1330 b = CPP_NAME;
6060326b 1331
79bd622b 1332 c = EOF;
1333 if (token2->flags & DIGRAPH)
ee6c4e4b 1334 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
79bd622b 1335 else if (token_spellings[b].category == SPELL_OPERATOR)
1336 c = token_spellings[b].name[0];
6060326b 1337
79bd622b 1338 /* Quickly get everything that can paste with an '='. */
ee6c4e4b 1339 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
79bd622b 1340 return 1;
6060326b 1341
79bd622b 1342 switch (a)
6060326b 1343 {
79bd622b 1344 case CPP_GREATER: return c == '>' || c == '?';
1345 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1346 case CPP_PLUS: return c == '+';
1347 case CPP_MINUS: return c == '-' || c == '>';
1348 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1349 case CPP_MOD: return c == ':' || c == '>';
1350 case CPP_AND: return c == '&';
1351 case CPP_OR: return c == '|';
1352 case CPP_COLON: return c == ':' || c == '>';
1353 case CPP_DEREF: return c == '*';
efdcc728 1354 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
79bd622b 1355 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1356 case CPP_NAME: return ((b == CPP_NUMBER
1357 && name_p (pfile, &token2->val.str))
1358 || b == CPP_NAME
1359 || b == CPP_CHAR || b == CPP_STRING); /* L */
1360 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1361 || c == '.' || c == '+' || c == '-');
2cbf1359 1362 /* UCNs */
bc205914 1363 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1364 && b == CPP_NAME)
2cbf1359 1365 || (CPP_OPTION (pfile, objc)
bc205914 1366 && token1->val.str.text[0] == '@'
2cbf1359 1367 && (b == CPP_NAME || b == CPP_STRING)));
79bd622b 1368 default: break;
6060326b 1369 }
6060326b 1370
deb356cf 1371 return 0;
6060326b 1372}
1373
79bd622b 1374/* Output all the remaining tokens on the current line, and a newline
f9b5f742 1375 character, to FP. Leading whitespace is removed. If there are
1376 macros, special token padding is not performed. */
6060326b 1377void
f7fdd7a1 1378cpp_output_line (cpp_reader *pfile, FILE *fp)
6060326b 1379{
f9b5f742 1380 const cpp_token *token;
7e842f95 1381
f9b5f742 1382 token = cpp_get_token (pfile);
1383 while (token->type != CPP_EOF)
7e842f95 1384 {
f9b5f742 1385 cpp_output_token (token, fp);
1386 token = cpp_get_token (pfile);
1387 if (token->flags & PREV_WHITE)
1388 putc (' ', fp);
7e842f95 1389 }
1390
79bd622b 1391 putc ('\n', fp);
f80e83a9 1392}
6060326b 1393
084163dc 1394/* Memory buffers. Changing these three constants can have a dramatic
1395 effect on performance. The values here are reasonable defaults,
1396 but might be tuned. If you adjust them, be sure to test across a
1397 range of uses of cpplib, including heavy nested function-like macro
1398 expansion. Also check the change in peak memory usage (NJAMD is a
1399 good tool for this). */
1400#define MIN_BUFF_SIZE 8000
1e0ef2fd 1401#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
084163dc 1402#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1403 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
deb356cf 1404
1e0ef2fd 1405#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1406 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1407#endif
1408
1785b647 1409/* Create a new allocation buffer. Place the control block at the end
1410 of the buffer, so that buffer overflows will cause immediate chaos. */
06c92cbc 1411static _cpp_buff *
f7fdd7a1 1412new_buff (size_t len)
06c92cbc 1413{
1414 _cpp_buff *result;
1fdf6039 1415 unsigned char *base;
06c92cbc 1416
084163dc 1417 if (len < MIN_BUFF_SIZE)
1418 len = MIN_BUFF_SIZE;
198b48a0 1419 len = CPP_ALIGN (len);
06c92cbc 1420
1421 base = xmalloc (len + sizeof (_cpp_buff));
1422 result = (_cpp_buff *) (base + len);
1423 result->base = base;
1424 result->cur = base;
1425 result->limit = base + len;
1426 result->next = NULL;
1427 return result;
1428}
1429
1430/* Place a chain of unwanted allocation buffers on the free list. */
1431void
f7fdd7a1 1432_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
06c92cbc 1433{
1434 _cpp_buff *end = buff;
1435
1436 while (end->next)
1437 end = end->next;
1438 end->next = pfile->free_buffs;
1439 pfile->free_buffs = buff;
1440}
1441
1442/* Return a free buffer of size at least MIN_SIZE. */
1443_cpp_buff *
f7fdd7a1 1444_cpp_get_buff (cpp_reader *pfile, size_t min_size)
06c92cbc 1445{
1446 _cpp_buff *result, **p;
1447
1448 for (p = &pfile->free_buffs;; p = &(*p)->next)
1449 {
4b31a107 1450 size_t size;
084163dc 1451
1452 if (*p == NULL)
06c92cbc 1453 return new_buff (min_size);
084163dc 1454 result = *p;
1455 size = result->limit - result->base;
1456 /* Return a buffer that's big enough, but don't waste one that's
1457 way too big. */
4085c149 1458 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
06c92cbc 1459 break;
1460 }
1461
1462 *p = result->next;
1463 result->next = NULL;
1464 result->cur = result->base;
1465 return result;
1466}
1467
20dd417a 1468/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 1469 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1470 the excess bytes to the new buffer. Chains the new buffer after
1471 BUFF, and returns the new buffer. */
06c92cbc 1472_cpp_buff *
f7fdd7a1 1473_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
06c92cbc 1474{
4b31a107 1475 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
e6a5f963 1476 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
06c92cbc 1477
e6a5f963 1478 buff->next = new_buff;
1479 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1480 return new_buff;
1481}
1482
20dd417a 1483/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 1484 remaining bytes of the buffer pointed to by BUFF, and at least
1485 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1486 Chains the new buffer before the buffer pointed to by BUFF, and
1487 updates the pointer to point to the new buffer. */
1488void
f7fdd7a1 1489_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
e6a5f963 1490{
1491 _cpp_buff *new_buff, *old_buff = *pbuff;
1492 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1493
1494 new_buff = _cpp_get_buff (pfile, size);
1495 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1496 new_buff->next = old_buff;
1497 *pbuff = new_buff;
06c92cbc 1498}
1499
1500/* Free a chain of buffers starting at BUFF. */
1501void
f82b06e0 1502_cpp_free_buff (_cpp_buff *buff)
06c92cbc 1503{
1504 _cpp_buff *next;
1505
1506 for (; buff; buff = next)
1507 {
1508 next = buff->next;
1509 free (buff->base);
1510 }
1511}
deb356cf 1512
1fdf6039 1513/* Allocate permanent, unaligned storage of length LEN. */
1514unsigned char *
f7fdd7a1 1515_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1fdf6039 1516{
1517 _cpp_buff *buff = pfile->u_buff;
1518 unsigned char *result = buff->cur;
1519
1520 if (len > (size_t) (buff->limit - result))
1521 {
1522 buff = _cpp_get_buff (pfile, len);
1523 buff->next = pfile->u_buff;
1524 pfile->u_buff = buff;
1525 result = buff->cur;
1526 }
1527
1528 buff->cur = result + len;
1529 return result;
1530}
1531
1e0ef2fd 1532/* Allocate permanent, unaligned storage of length LEN from a_buff.
1533 That buffer is used for growing allocations when saving macro
1534 replacement lists in a #define, and when parsing an answer to an
1535 assertion in #assert, #unassert or #if (and therefore possibly
1536 whilst expanding macros). It therefore must not be used by any
1537 code that they might call: specifically the lexer and the guts of
1538 the macro expander.
1539
1540 All existing other uses clearly fit this restriction: storing
1541 registered pragmas during initialization. */
79bd622b 1542unsigned char *
f7fdd7a1 1543_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
89b05ef6 1544{
e6a5f963 1545 _cpp_buff *buff = pfile->a_buff;
1546 unsigned char *result = buff->cur;
89b05ef6 1547
e6a5f963 1548 if (len > (size_t) (buff->limit - result))
89b05ef6 1549 {
e6a5f963 1550 buff = _cpp_get_buff (pfile, len);
1551 buff->next = pfile->a_buff;
1552 pfile->a_buff = buff;
1553 result = buff->cur;
89b05ef6 1554 }
f80e83a9 1555
e6a5f963 1556 buff->cur = result + len;
79bd622b 1557 return result;
f80e83a9 1558}
c39ed964 1559
1560/* Say which field of TOK is in use. */
1561
1562enum cpp_token_fld_kind
1563cpp_token_val_index (cpp_token *tok)
1564{
1565 switch (TOKEN_SPELL (tok))
1566 {
1567 case SPELL_IDENT:
1568 return CPP_TOKEN_FLD_NODE;
1569 case SPELL_LITERAL:
1570 return CPP_TOKEN_FLD_STR;
1571 case SPELL_NONE:
1572 if (tok->type == CPP_MACRO_ARG)
1573 return CPP_TOKEN_FLD_ARG_NO;
1574 else if (tok->type == CPP_PADDING)
1575 return CPP_TOKEN_FLD_SOURCE;
1576 /* else fall through */
1577 default:
1578 return CPP_TOKEN_FLD_NONE;
1579 }
1580}