]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
PR optimization/11059
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
0578f103 1/* CPP Library - lexical analysis.
f0c2775b 2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
0578f103 3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21
22#include "config.h"
23#include "system.h"
805e22b2 24#include "coretypes.h"
25#include "tm.h"
0578f103 26#include "cpplib.h"
27#include "cpphash.h"
28
79bd622b 29enum spell_type
241e762e 30{
79bd622b 31 SPELL_OPERATOR = 0,
79bd622b 32 SPELL_IDENT,
4970d4c2 33 SPELL_LITERAL,
79bd622b 34 SPELL_NONE
241e762e 35};
36
79bd622b 37struct token_spelling
241e762e 38{
79bd622b 39 enum spell_type category;
40 const unsigned char *name;
241e762e 41};
42
0ca849f9 43static const unsigned char *const digraph_spellings[] =
44{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
79bd622b 45
46#define OP(e, s) { SPELL_OPERATOR, U s },
18e43155 47#define TK(e, s) { s, U #e },
0ca849f9 48static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
79bd622b 49#undef OP
50#undef TK
51
52#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
53#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
e2f9a79f 54
f7fdd7a1 55static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
56static int skip_line_comment (cpp_reader *);
57static void skip_whitespace (cpp_reader *, cppchar_t);
58static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
59static void lex_number (cpp_reader *, cpp_string *);
60static bool forms_identifier_p (cpp_reader *, int);
61static void lex_string (cpp_reader *, cpp_token *, const uchar *);
62static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
63static void create_literal (cpp_reader *, cpp_token *, const uchar *,
64 unsigned int, enum cpp_ttype);
65static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
66static int name_p (cpp_reader *, const cpp_string *);
f7fdd7a1 67static tokenrun *next_tokenrun (tokenrun *);
68
f7fdd7a1 69static _cpp_buff *new_buff (size_t);
bce8e0c0 70
e920deaf 71
f80e83a9 72/* Utility routine:
2c63d6c8 73
76faa4c0 74 Compares, the token TOKEN to the NUL-terminated string STRING.
75 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
f80e83a9 76int
f7fdd7a1 77cpp_ideq (const cpp_token *token, const char *string)
f80e83a9 78{
76faa4c0 79 if (token->type != CPP_NAME)
f80e83a9 80 return 0;
76faa4c0 81
b6d18b0a 82 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
bce8e0c0 83}
50fd6b48 84
a54e0bf8 85/* Record a note TYPE at byte POS into the current cleaned logical
86 line. */
1e0ef2fd 87static void
f7fdd7a1 88add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
338fa5f7 89{
a54e0bf8 90 if (buffer->notes_used == buffer->notes_cap)
91 {
92 buffer->notes_cap = buffer->notes_cap * 2 + 200;
93 buffer->notes = (_cpp_line_note *)
94 xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
95 }
338fa5f7 96
a54e0bf8 97 buffer->notes[buffer->notes_used].pos = pos;
98 buffer->notes[buffer->notes_used].type = type;
99 buffer->notes_used++;
338fa5f7 100}
101
a54e0bf8 102/* Returns with a logical line that contains no escaped newlines or
103 trigraphs. This is a time-critical inner loop. */
104void
f7fdd7a1 105_cpp_clean_line (cpp_reader *pfile)
0578f103 106{
a54e0bf8 107 cpp_buffer *buffer;
108 const uchar *s;
109 uchar c, *d, *p;
1e0ef2fd 110
a54e0bf8 111 buffer = pfile->buffer;
112 buffer->cur_note = buffer->notes_used = 0;
113 buffer->cur = buffer->line_base = buffer->next_line;
114 buffer->need_line = false;
115 s = buffer->next_line - 1;
1e0ef2fd 116
a54e0bf8 117 if (!buffer->from_stage3)
0578f103 118 {
a54e0bf8 119 d = (uchar *) s;
120
121 for (;;)
4b912310 122 {
a54e0bf8 123 c = *++s;
124 *++d = c;
125
126 if (c == '\n' || c == '\r')
127 {
128 /* Handle DOS line endings. */
129 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
130 s++;
131 if (s == buffer->rlimit)
132 break;
133
134 /* Escaped? */
135 p = d;
136 while (p != buffer->next_line && is_nvspace (p[-1]))
137 p--;
138 if (p == buffer->next_line || p[-1] != '\\')
139 break;
140
aad4a87f 141 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
a54e0bf8 142 d = p - 2;
143 buffer->next_line = p - 1;
144 }
145 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
146 {
147 /* Add a note regardless, for the benefit of -Wtrigraphs. */
aad4a87f 148 add_line_note (buffer, d, s[2]);
a54e0bf8 149 if (CPP_OPTION (pfile, trigraphs))
150 {
151 *d = _cpp_trigraph_map[s[2]];
152 s += 2;
153 }
154 }
4b912310 155 }
0578f103 156 }
a54e0bf8 157 else
158 {
159 do
160 s++;
161 while (*s != '\n' && *s != '\r');
162 d = (uchar *) s;
163
164 /* Handle DOS line endings. */
165 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
166 s++;
167 }
338fa5f7 168
a54e0bf8 169 *d = '\n';
aad4a87f 170 /* A sentinel note that should never be processed. */
171 add_line_note (buffer, d + 1, '\n');
a54e0bf8 172 buffer->next_line = s + 1;
0578f103 173}
174
3078f2b2 175/* Return true if the trigraph indicated by NOTE should be warned
176 about in a comment. */
177static bool
f7fdd7a1 178warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
3078f2b2 179{
180 const uchar *p;
181
182 /* Within comments we don't warn about trigraphs, unless the
183 trigraph forms an escaped newline, as that may change
7ef5b942 184 behavior. */
3078f2b2 185 if (note->type != '/')
186 return false;
187
188 /* If -trigraphs, then this was an escaped newline iff the next note
189 is coincident. */
190 if (CPP_OPTION (pfile, trigraphs))
191 return note[1].pos == note->pos;
192
193 /* Otherwise, see if this forms an escaped newline. */
194 p = note->pos + 3;
195 while (is_nvspace (*p))
196 p++;
197
198 /* There might have been escaped newlines between the trigraph and the
199 newline we found. Hence the position test. */
200 return (*p == '\n' && p < note[1].pos);
201}
202
a54e0bf8 203/* Process the notes created by add_line_note as far as the current
204 location. */
205void
f7fdd7a1 206_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
0578f103 207{
c808d026 208 cpp_buffer *buffer = pfile->buffer;
209
a54e0bf8 210 for (;;)
f80e83a9 211 {
a54e0bf8 212 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
213 unsigned int col;
396ffa86 214
a54e0bf8 215 if (note->pos > buffer->cur)
216 break;
396ffa86 217
a54e0bf8 218 buffer->cur_note++;
219 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
435fb09b 220
aad4a87f 221 if (note->type == '\\' || note->type == ' ')
a54e0bf8 222 {
aad4a87f 223 if (note->type == ' ' && !in_comment)
a54e0bf8 224 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
225 "backslash and newline separated by space");
aad4a87f 226
a54e0bf8 227 if (buffer->next_line > buffer->rlimit)
1e0ef2fd 228 {
a54e0bf8 229 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
230 "backslash-newline at end of file");
231 /* Prevent "no newline at end of file" warning. */
232 buffer->next_line = buffer->rlimit;
1e0ef2fd 233 }
a54e0bf8 234
235 buffer->line_base = note->pos;
236 pfile->line++;
338fa5f7 237 }
aad4a87f 238 else if (_cpp_trigraph_map[note->type])
239 {
3078f2b2 240 if (CPP_OPTION (pfile, warn_trigraphs)
241 && (!in_comment || warn_in_comment (pfile, note)))
aad4a87f 242 {
243 if (CPP_OPTION (pfile, trigraphs))
244 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
245 "trigraph ??%c converted to %c",
246 note->type,
247 (int) _cpp_trigraph_map[note->type]);
248 else
249 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
250 "trigraph ??%c ignored",
251 note->type);
252 }
253 }
254 else
255 abort ();
f80e83a9 256 }
0578f103 257}
258
338fa5f7 259/* Skip a C-style block comment. We find the end of the comment by
260 seeing if an asterisk is before every '/' we encounter. Returns
edaf8cb5 261 nonzero if comment terminated by EOF, zero otherwise.
262
263 Buffer->cur points to the initial asterisk of the comment. */
a54e0bf8 264bool
f7fdd7a1 265_cpp_skip_block_comment (cpp_reader *pfile)
0578f103 266{
f80e83a9 267 cpp_buffer *buffer = pfile->buffer;
a54e0bf8 268 cppchar_t c;
338fa5f7 269
edaf8cb5 270 buffer->cur++;
a54e0bf8 271 if (*buffer->cur == '/')
272 buffer->cur++;
338fa5f7 273
a54e0bf8 274 for (;;)
275 {
276 c = *buffer->cur++;
f80e83a9 277
338fa5f7 278 /* People like decorating comments with '*', so check for '/'
279 instead for efficiency. */
f80e83a9 280 if (c == '/')
0578f103 281 {
a54e0bf8 282 if (buffer->cur[-2] == '*')
338fa5f7 283 break;
f80e83a9 284
338fa5f7 285 /* Warn about potential nested comments, but not if the '/'
3fb1e43b 286 comes immediately before the true comment delimiter.
f80e83a9 287 Don't bother to get it right across escaped newlines. */
338fa5f7 288 if (CPP_OPTION (pfile, warn_comments)
1e0ef2fd 289 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
73328dce 290 cpp_error_with_line (pfile, DL_WARNING,
291 pfile->line, CPP_BUF_COL (buffer),
292 "\"/*\" within comment");
0578f103 293 }
a54e0bf8 294 else if (c == '\n')
295 {
296 buffer->cur--;
297 _cpp_process_line_notes (pfile, true);
298 if (buffer->next_line >= buffer->rlimit)
299 return true;
300 _cpp_clean_line (pfile);
301 pfile->line++;
302 }
0578f103 303 }
f80e83a9 304
3078f2b2 305 _cpp_process_line_notes (pfile, true);
a54e0bf8 306 return false;
0578f103 307}
308
1c124f85 309/* Skip a C++ line comment, leaving buffer->cur pointing to the
d10cfa8d 310 terminating newline. Handles escaped newlines. Returns nonzero
1c124f85 311 if a multiline comment. */
f80e83a9 312static int
f7fdd7a1 313skip_line_comment (cpp_reader *pfile)
0578f103 314{
f669338a 315 cpp_buffer *buffer = pfile->buffer;
1ea7ed21 316 unsigned int orig_line = pfile->line;
f80e83a9 317
a54e0bf8 318 while (*buffer->cur != '\n')
319 buffer->cur++;
1c124f85 320
a54e0bf8 321 _cpp_process_line_notes (pfile, true);
1ea7ed21 322 return orig_line != pfile->line;
f80e83a9 323}
0578f103 324
a54e0bf8 325/* Skips whitespace, saving the next non-whitespace character. */
b86584f6 326static void
f7fdd7a1 327skip_whitespace (cpp_reader *pfile, cppchar_t c)
f80e83a9 328{
329 cpp_buffer *buffer = pfile->buffer;
fe9eb18b 330 bool saw_NUL = false;
0578f103 331
338fa5f7 332 do
f80e83a9 333 {
78719282 334 /* Horizontal space always OK. */
a54e0bf8 335 if (c == ' ' || c == '\t')
338fa5f7 336 ;
338fa5f7 337 /* Just \f \v or \0 left. */
78719282 338 else if (c == '\0')
fe9eb18b 339 saw_NUL = true;
79bd622b 340 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
73328dce 341 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
342 CPP_BUF_COL (buffer),
343 "%s in preprocessing directive",
344 c == '\f' ? "form feed" : "vertical tab");
338fa5f7 345
338fa5f7 346 c = *buffer->cur++;
0578f103 347 }
2c0e001b 348 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
338fa5f7 349 while (is_nvspace (c));
350
fe9eb18b 351 if (saw_NUL)
352 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
353
1c124f85 354 buffer->cur--;
f80e83a9 355}
0578f103 356
79bd622b 357/* See if the characters of a number token are valid in a name (no
358 '.', '+' or '-'). */
359static int
f7fdd7a1 360name_p (cpp_reader *pfile, const cpp_string *string)
79bd622b 361{
362 unsigned int i;
363
364 for (i = 0; i < string->len; i++)
365 if (!is_idchar (string->text[i]))
366 return 0;
367
b1a9ff83 368 return 1;
79bd622b 369}
370
5bb46c08 371/* Returns TRUE if the sequence starting at buffer->cur is invalid in
2cbf1359 372 an identifier. FIRST is TRUE if this starts an identifier. */
5bb46c08 373static bool
f7fdd7a1 374forms_identifier_p (cpp_reader *pfile, int first)
5bb46c08 375{
2cbf1359 376 cpp_buffer *buffer = pfile->buffer;
377
378 if (*buffer->cur == '$')
379 {
380 if (!CPP_OPTION (pfile, dollars_in_ident))
381 return false;
382
383 buffer->cur++;
f0c2775b 384 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
2cbf1359 385 {
f0c2775b 386 CPP_OPTION (pfile, warn_dollars) = 0;
2cbf1359 387 cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
388 }
389
390 return true;
391 }
5bb46c08 392
2cbf1359 393 /* Is this a syntactically valid UCN? */
394 if (0 && *buffer->cur == '\\'
395 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
5bb46c08 396 {
2cbf1359 397 buffer->cur += 2;
ebc03810 398 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
2cbf1359 399 return true;
400 buffer->cur -= 2;
5bb46c08 401 }
5bb46c08 402
2cbf1359 403 return false;
5bb46c08 404}
405
406/* Lex an identifier starting at BUFFER->CUR - 1. */
338fa5f7 407static cpp_hashnode *
f7fdd7a1 408lex_identifier (cpp_reader *pfile, const uchar *base)
0578f103 409{
79bd622b 410 cpp_hashnode *result;
2cbf1359 411 const uchar *cur;
66a5287e 412
5bb46c08 413 do
78a11351 414 {
5bb46c08 415 cur = pfile->buffer->cur;
416
417 /* N.B. ISIDNUM does not include $. */
418 while (ISIDNUM (*cur))
419 cur++;
78a11351 420
78a11351 421 pfile->buffer->cur = cur;
66a5287e 422 }
2cbf1359 423 while (forms_identifier_p (pfile, false));
5bb46c08 424
425 result = (cpp_hashnode *)
426 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
66a5287e 427
5bb46c08 428 /* Rarely, identifiers require diagnostics when lexed. */
66a5287e 429 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
430 && !pfile->state.skipping, 0))
431 {
432 /* It is allowed to poison the same identifier twice. */
433 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
73328dce 434 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
66a5287e 435 NODE_NAME (result));
436
437 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
438 replacement list of a variadic macro. */
439 if (result == pfile->spec_nodes.n__VA_ARGS__
440 && !pfile->state.va_args_ok)
73328dce 441 cpp_error (pfile, DL_PEDWARN,
f7fdd7a1 442 "__VA_ARGS__ can only appear in the expansion"
443 " of a C99 variadic macro");
66a5287e 444 }
445
446 return result;
447}
448
5bb46c08 449/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
0578f103 450static void
f7fdd7a1 451lex_number (cpp_reader *pfile, cpp_string *number)
0578f103 452{
b6d18b0a 453 const uchar *cur;
5bb46c08 454 const uchar *base;
455 uchar *dest;
0578f103 456
5bb46c08 457 base = pfile->buffer->cur - 1;
458 do
f80e83a9 459 {
5bb46c08 460 cur = pfile->buffer->cur;
338fa5f7 461
5bb46c08 462 /* N.B. ISIDNUM does not include $. */
463 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
464 cur++;
0578f103 465
78a11351 466 pfile->buffer->cur = cur;
0578f103 467 }
2cbf1359 468 while (forms_identifier_p (pfile, false));
79bd622b 469
5bb46c08 470 number->len = cur - base;
471 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
472 memcpy (dest, base, number->len);
473 dest[number->len] = '\0';
474 number->text = dest;
79bd622b 475}
476
4970d4c2 477/* Create a token of type TYPE with a literal spelling. */
478static void
f7fdd7a1 479create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
480 unsigned int len, enum cpp_ttype type)
4970d4c2 481{
482 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
483
484 memcpy (dest, base, len);
485 dest[len] = '\0';
486 token->type = type;
487 token->val.str.len = len;
488 token->val.str.text = dest;
489}
490
5bb46c08 491/* Lexes a string, character constant, or angle-bracketed header file
4970d4c2 492 name. The stored string contains the spelling, including opening
493 quote and leading any leading 'L'. It returns the type of the
494 literal, or CPP_OTHER if it was not properly terminated.
495
496 The spelling is NUL-terminated, but it is not guaranteed that this
497 is the first NUL since embedded NULs are preserved. */
f80e83a9 498static void
f7fdd7a1 499lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
0578f103 500{
4970d4c2 501 bool saw_NUL = false;
502 const uchar *cur;
5bb46c08 503 cppchar_t terminator;
4970d4c2 504 enum cpp_ttype type;
505
506 cur = base;
507 terminator = *cur++;
508 if (terminator == 'L')
509 terminator = *cur++;
510 if (terminator == '\"')
511 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
512 else if (terminator == '\'')
513 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
514 else
515 terminator = '>', type = CPP_HEADER_NAME;
79bd622b 516
338fa5f7 517 for (;;)
0578f103 518 {
4970d4c2 519 cppchar_t c = *cur++;
4b0c16ee 520
edaf8cb5 521 /* In #include-style directives, terminators are not escapable. */
4970d4c2 522 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
523 cur++;
524 else if (c == terminator)
5bb46c08 525 break;
4970d4c2 526 else if (c == '\n')
338fa5f7 527 {
4970d4c2 528 cur--;
529 type = CPP_OTHER;
530 break;
0578f103 531 }
4970d4c2 532 else if (c == '\0')
533 saw_NUL = true;
0578f103 534 }
535
4970d4c2 536 if (saw_NUL && !pfile->state.skipping)
537 cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
0578f103 538
4970d4c2 539 pfile->buffer->cur = cur;
540 create_literal (pfile, token, base, cur - base, type);
338fa5f7 541}
f80e83a9 542
79bd622b 543/* The stored comment includes the comment start and any terminator. */
2c63d6c8 544static void
f7fdd7a1 545save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
546 cppchar_t type)
2c63d6c8 547{
f80e83a9 548 unsigned char *buffer;
d3f7919d 549 unsigned int len, clen;
b1a9ff83 550
f0495c2c 551 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1c124f85 552
a543b315 553 /* C++ comments probably (not definitely) have moved past a new
554 line, which we don't want to save in the comment. */
1c124f85 555 if (is_vspace (pfile->buffer->cur[-1]))
a543b315 556 len--;
d3f7919d 557
558 /* If we are currently in a directive, then we need to store all
559 C++ comments as C comments internally, and so we need to
560 allocate a little extra space in that case.
561
562 Note that the only time we encounter a directive here is
563 when we are saving comments in a "#define". */
564 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
565
566 buffer = _cpp_unaligned_alloc (pfile, clen);
b1a9ff83 567
f80e83a9 568 token->type = CPP_COMMENT;
d3f7919d 569 token->val.str.len = clen;
338fa5f7 570 token->val.str.text = buffer;
0578f103 571
f0495c2c 572 buffer[0] = '/';
573 memcpy (buffer + 1, from, len - 1);
d3f7919d 574
a113df96 575 /* Finish conversion to a C comment, if necessary. */
d3f7919d 576 if (pfile->state.in_directive && type == '/')
577 {
578 buffer[1] = '*';
579 buffer[clen - 2] = '*';
580 buffer[clen - 1] = '/';
581 }
338fa5f7 582}
0578f103 583
83dcbb5c 584/* Allocate COUNT tokens for RUN. */
585void
f7fdd7a1 586_cpp_init_tokenrun (tokenrun *run, unsigned int count)
83dcbb5c 587{
588 run->base = xnewvec (cpp_token, count);
589 run->limit = run->base + count;
590 run->next = NULL;
591}
592
593/* Returns the next tokenrun, or creates one if there is none. */
594static tokenrun *
f7fdd7a1 595next_tokenrun (tokenrun *run)
83dcbb5c 596{
597 if (run->next == NULL)
598 {
599 run->next = xnew (tokenrun);
fb5ab82c 600 run->next->prev = run;
83dcbb5c 601 _cpp_init_tokenrun (run->next, 250);
602 }
603
604 return run->next;
605}
606
f9b5f742 607/* Allocate a single token that is invalidated at the same time as the
608 rest of the tokens on the line. Has its line and col set to the
609 same as the last lexed token, so that diagnostics appear in the
610 right place. */
611cpp_token *
f7fdd7a1 612_cpp_temp_token (cpp_reader *pfile)
f9b5f742 613{
614 cpp_token *old, *result;
615
616 old = pfile->cur_token - 1;
617 if (pfile->cur_token == pfile->cur_run->limit)
618 {
619 pfile->cur_run = next_tokenrun (pfile->cur_run);
620 pfile->cur_token = pfile->cur_run->base;
621 }
622
623 result = pfile->cur_token++;
624 result->line = old->line;
625 result->col = old->col;
626 return result;
627}
628
10b4496a 629/* Lex a token into RESULT (external interface). Takes care of issues
630 like directive handling, token lookahead, multiple include
3fb1e43b 631 optimization and skipping. */
c00e481c 632const cpp_token *
f7fdd7a1 633_cpp_lex_token (cpp_reader *pfile)
83dcbb5c 634{
fb5ab82c 635 cpp_token *result;
83dcbb5c 636
fb5ab82c 637 for (;;)
83dcbb5c 638 {
fb5ab82c 639 if (pfile->cur_token == pfile->cur_run->limit)
83dcbb5c 640 {
fb5ab82c 641 pfile->cur_run = next_tokenrun (pfile->cur_run);
642 pfile->cur_token = pfile->cur_run->base;
83dcbb5c 643 }
644
fb5ab82c 645 if (pfile->lookaheads)
10b4496a 646 {
647 pfile->lookaheads--;
648 result = pfile->cur_token++;
649 }
fb5ab82c 650 else
10b4496a 651 result = _cpp_lex_direct (pfile);
fb5ab82c 652
653 if (result->flags & BOL)
83dcbb5c 654 {
fb5ab82c 655 /* Is this a directive. If _cpp_handle_directive returns
656 false, it is an assembler #. */
657 if (result->type == CPP_HASH
d6af0368 658 /* 6.10.3 p 11: Directives in a list of macro arguments
659 gives undefined behavior. This implementation
660 handles the directive as normal. */
661 && pfile->state.parsing_args != 1
fb5ab82c 662 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
663 continue;
5621a364 664 if (pfile->cb.line_change && !pfile->state.skipping)
f7fdd7a1 665 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
83dcbb5c 666 }
83dcbb5c 667
fb5ab82c 668 /* We don't skip tokens in directives. */
669 if (pfile->state.in_directive)
670 break;
83dcbb5c 671
fb5ab82c 672 /* Outside a directive, invalidate controlling macros. At file
10b4496a 673 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
7ef5b942 674 get here and MI optimization works. */
83dcbb5c 675 pfile->mi_valid = false;
fb5ab82c 676
677 if (!pfile->state.skipping || result->type == CPP_EOF)
678 break;
83dcbb5c 679 }
680
c00e481c 681 return result;
83dcbb5c 682}
683
a54e0bf8 684/* Returns true if a fresh line has been loaded. */
685bool
f7fdd7a1 686_cpp_get_fresh_line (cpp_reader *pfile)
0bb65704 687{
a54e0bf8 688 /* We can't get a new line until we leave the current directive. */
689 if (pfile->state.in_directive)
690 return false;
b1a9ff83 691
a54e0bf8 692 for (;;)
fb83e0d6 693 {
a54e0bf8 694 cpp_buffer *buffer = pfile->buffer;
fb83e0d6 695
a54e0bf8 696 if (!buffer->need_line)
697 return true;
698
699 if (buffer->next_line < buffer->rlimit)
0bb65704 700 {
a54e0bf8 701 _cpp_clean_line (pfile);
702 return true;
703 }
0bb65704 704
a54e0bf8 705 /* First, get out of parsing arguments state. */
706 if (pfile->state.parsing_args)
707 return false;
708
709 /* End of buffer. Non-empty files should end in a newline. */
710 if (buffer->buf != buffer->rlimit
711 && buffer->next_line > buffer->rlimit
712 && !buffer->from_stage3)
713 {
714 /* Only warn once. */
715 buffer->next_line = buffer->rlimit;
716 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
717 CPP_BUF_COLUMN (buffer, buffer->cur),
718 "no newline at end of file");
719 }
720
cc6448c7 721 if (!buffer->prev)
722 return false;
723
a54e0bf8 724 if (buffer->return_at_eof)
725 {
cc6448c7 726 _cpp_pop_buffer (pfile);
a54e0bf8 727 return false;
0bb65704 728 }
0bb65704 729
a54e0bf8 730 _cpp_pop_buffer (pfile);
731 }
0bb65704 732}
733
edaf8cb5 734#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
735 do \
736 { \
737 result->type = ELSE_TYPE; \
738 if (*buffer->cur == CHAR) \
739 buffer->cur++, result->type = THEN_TYPE; \
740 } \
741 while (0)
1c124f85 742
10b4496a 743/* Lex a token into pfile->cur_token, which is also incremented, to
744 get diagnostics pointing to the correct location.
745
746 Does not handle issues such as token lookahead, multiple-include
4172d65e 747 optimization, directives, skipping etc. This function is only
10b4496a 748 suitable for use by _cpp_lex_token, and in special cases like
749 lex_expansion_token which doesn't care for any of these issues.
750
751 When meeting a newline, returns CPP_EOF if parsing a directive,
752 otherwise returns to the start of the token buffer if permissible.
753 Returns the location of the lexed token. */
754cpp_token *
f7fdd7a1 755_cpp_lex_direct (cpp_reader *pfile)
0578f103 756{
338fa5f7 757 cppchar_t c;
230f0943 758 cpp_buffer *buffer;
338fa5f7 759 const unsigned char *comment_start;
10b4496a 760 cpp_token *result = pfile->cur_token++;
0653b94e 761
83dcbb5c 762 fresh_line:
a54e0bf8 763 result->flags = 0;
764 if (pfile->buffer->need_line)
765 {
766 if (!_cpp_get_fresh_line (pfile))
767 {
768 result->type = CPP_EOF;
2908f819 769 if (!pfile->state.in_directive)
770 {
771 /* Tell the compiler the line number of the EOF token. */
772 result->line = pfile->line;
773 result->flags = BOL;
774 }
a54e0bf8 775 return result;
776 }
777 if (!pfile->keep_tokens)
778 {
779 pfile->cur_run = &pfile->base_run;
780 result = pfile->base_run.base;
781 pfile->cur_token = result + 1;
782 }
783 result->flags = BOL;
784 if (pfile->state.parsing_args == 2)
785 result->flags |= PREV_WHITE;
786 }
230f0943 787 buffer = pfile->buffer;
83dcbb5c 788 update_tokens_line:
36a0aa7c 789 result->line = pfile->line;
f80e83a9 790
83dcbb5c 791 skipped_white:
a54e0bf8 792 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
793 && !pfile->overlaid_buffer)
794 {
795 _cpp_process_line_notes (pfile, false);
796 result->line = pfile->line;
797 }
1c124f85 798 c = *buffer->cur++;
83dcbb5c 799 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
83dcbb5c 800
338fa5f7 801 switch (c)
0578f103 802 {
435fb09b 803 case ' ': case '\t': case '\f': case '\v': case '\0':
804 result->flags |= PREV_WHITE;
a54e0bf8 805 skip_whitespace (pfile, c);
806 goto skipped_white;
338fa5f7 807
a54e0bf8 808 case '\n':
809 pfile->line++;
810 buffer->need_line = true;
811 goto fresh_line;
732cb4c9 812
338fa5f7 813 case '0': case '1': case '2': case '3': case '4':
814 case '5': case '6': case '7': case '8': case '9':
815 result->type = CPP_NUMBER;
5bb46c08 816 lex_number (pfile, &result->val.str);
338fa5f7 817 break;
732cb4c9 818
78c551ad 819 case 'L':
820 /* 'L' may introduce wide characters or strings. */
5bb46c08 821 if (*buffer->cur == '\'' || *buffer->cur == '"')
822 {
4970d4c2 823 lex_string (pfile, result, buffer->cur - 1);
5bb46c08 824 break;
825 }
b1a9ff83 826 /* Fall through. */
78c551ad 827
338fa5f7 828 case '_':
829 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
830 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
831 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
832 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
833 case 'y': case 'z':
834 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
78c551ad 835 case 'G': case 'H': case 'I': case 'J': case 'K':
338fa5f7 836 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
837 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
838 case 'Y': case 'Z':
839 result->type = CPP_NAME;
2cbf1359 840 result->val.node = lex_identifier (pfile, buffer->cur - 1);
338fa5f7 841
338fa5f7 842 /* Convert named operators to their proper types. */
78c551ad 843 if (result->val.node->flags & NODE_OPERATOR)
338fa5f7 844 {
845 result->flags |= NAMED_OP;
805e22b2 846 result->type = result->val.node->directive_index;
338fa5f7 847 }
848 break;
849
850 case '\'':
851 case '"':
4970d4c2 852 lex_string (pfile, result, buffer->cur - 1);
338fa5f7 853 break;
f80e83a9 854
338fa5f7 855 case '/':
f0495c2c 856 /* A potential block or line comment. */
857 comment_start = buffer->cur;
edaf8cb5 858 c = *buffer->cur;
859
f0495c2c 860 if (c == '*')
861 {
a54e0bf8 862 if (_cpp_skip_block_comment (pfile))
73328dce 863 cpp_error (pfile, DL_ERROR, "unterminated comment");
338fa5f7 864 }
1c124f85 865 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
866 || CPP_IN_SYSTEM_HEADER (pfile)))
338fa5f7 867 {
5db5d057 868 /* Warn about comments only if pedantically GNUC89, and not
869 in system headers. */
870 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
66914e49 871 && ! buffer->warned_cplusplus_comments)
f80e83a9 872 {
73328dce 873 cpp_error (pfile, DL_PEDWARN,
ba059ac0 874 "C++ style comments are not allowed in ISO C90");
73328dce 875 cpp_error (pfile, DL_PEDWARN,
876 "(this will be reported only once per input file)");
f0495c2c 877 buffer->warned_cplusplus_comments = 1;
878 }
338fa5f7 879
e1caf668 880 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
73328dce 881 cpp_error (pfile, DL_WARNING, "multi-line comment");
f0495c2c 882 }
1c124f85 883 else if (c == '=')
884 {
edaf8cb5 885 buffer->cur++;
1c124f85 886 result->type = CPP_DIV_EQ;
887 break;
888 }
889 else
890 {
1c124f85 891 result->type = CPP_DIV;
892 break;
893 }
338fa5f7 894
f0495c2c 895 if (!pfile->state.save_comments)
896 {
897 result->flags |= PREV_WHITE;
83dcbb5c 898 goto update_tokens_line;
338fa5f7 899 }
f0495c2c 900
901 /* Save the comment as a token in its own right. */
d3f7919d 902 save_comment (pfile, result, comment_start, c);
fb5ab82c 903 break;
338fa5f7 904
905 case '<':
906 if (pfile->state.angled_headers)
907 {
4970d4c2 908 lex_string (pfile, result, buffer->cur - 1);
1c124f85 909 break;
338fa5f7 910 }
0578f103 911
edaf8cb5 912 result->type = CPP_LESS;
913 if (*buffer->cur == '=')
914 buffer->cur++, result->type = CPP_LESS_EQ;
915 else if (*buffer->cur == '<')
338fa5f7 916 {
edaf8cb5 917 buffer->cur++;
918 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
338fa5f7 919 }
edaf8cb5 920 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
338fa5f7 921 {
edaf8cb5 922 buffer->cur++;
923 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
338fa5f7 924 }
edaf8cb5 925 else if (CPP_OPTION (pfile, digraphs))
1c124f85 926 {
edaf8cb5 927 if (*buffer->cur == ':')
928 {
929 buffer->cur++;
930 result->flags |= DIGRAPH;
931 result->type = CPP_OPEN_SQUARE;
932 }
933 else if (*buffer->cur == '%')
934 {
935 buffer->cur++;
936 result->flags |= DIGRAPH;
937 result->type = CPP_OPEN_BRACE;
938 }
1c124f85 939 }
338fa5f7 940 break;
941
942 case '>':
edaf8cb5 943 result->type = CPP_GREATER;
944 if (*buffer->cur == '=')
945 buffer->cur++, result->type = CPP_GREATER_EQ;
946 else if (*buffer->cur == '>')
338fa5f7 947 {
edaf8cb5 948 buffer->cur++;
949 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
950 }
951 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
952 {
953 buffer->cur++;
954 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
338fa5f7 955 }
956 break;
957
f669338a 958 case '%':
edaf8cb5 959 result->type = CPP_MOD;
960 if (*buffer->cur == '=')
961 buffer->cur++, result->type = CPP_MOD_EQ;
962 else if (CPP_OPTION (pfile, digraphs))
1c124f85 963 {
edaf8cb5 964 if (*buffer->cur == ':')
1c124f85 965 {
edaf8cb5 966 buffer->cur++;
967 result->flags |= DIGRAPH;
968 result->type = CPP_HASH;
969 if (*buffer->cur == '%' && buffer->cur[1] == ':')
970 buffer->cur += 2, result->type = CPP_PASTE;
971 }
972 else if (*buffer->cur == '>')
973 {
974 buffer->cur++;
975 result->flags |= DIGRAPH;
976 result->type = CPP_CLOSE_BRACE;
1c124f85 977 }
1c124f85 978 }
338fa5f7 979 break;
980
f669338a 981 case '.':
1c124f85 982 result->type = CPP_DOT;
edaf8cb5 983 if (ISDIGIT (*buffer->cur))
1c124f85 984 {
985 result->type = CPP_NUMBER;
5bb46c08 986 lex_number (pfile, &result->val.str);
1c124f85 987 }
edaf8cb5 988 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
989 buffer->cur += 2, result->type = CPP_ELLIPSIS;
990 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
991 buffer->cur++, result->type = CPP_DOT_STAR;
338fa5f7 992 break;
0578f103 993
338fa5f7 994 case '+':
edaf8cb5 995 result->type = CPP_PLUS;
996 if (*buffer->cur == '+')
997 buffer->cur++, result->type = CPP_PLUS_PLUS;
998 else if (*buffer->cur == '=')
999 buffer->cur++, result->type = CPP_PLUS_EQ;
338fa5f7 1000 break;
ac0749c7 1001
338fa5f7 1002 case '-':
edaf8cb5 1003 result->type = CPP_MINUS;
1004 if (*buffer->cur == '>')
338fa5f7 1005 {
edaf8cb5 1006 buffer->cur++;
1c124f85 1007 result->type = CPP_DEREF;
edaf8cb5 1008 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1009 buffer->cur++, result->type = CPP_DEREF_STAR;
1c124f85 1010 }
edaf8cb5 1011 else if (*buffer->cur == '-')
1012 buffer->cur++, result->type = CPP_MINUS_MINUS;
1013 else if (*buffer->cur == '=')
1014 buffer->cur++, result->type = CPP_MINUS_EQ;
338fa5f7 1015 break;
0578f103 1016
338fa5f7 1017 case '&':
edaf8cb5 1018 result->type = CPP_AND;
1019 if (*buffer->cur == '&')
1020 buffer->cur++, result->type = CPP_AND_AND;
1021 else if (*buffer->cur == '=')
1022 buffer->cur++, result->type = CPP_AND_EQ;
338fa5f7 1023 break;
b1a9ff83 1024
338fa5f7 1025 case '|':
edaf8cb5 1026 result->type = CPP_OR;
1027 if (*buffer->cur == '|')
1028 buffer->cur++, result->type = CPP_OR_OR;
1029 else if (*buffer->cur == '=')
1030 buffer->cur++, result->type = CPP_OR_EQ;
338fa5f7 1031 break;
0578f103 1032
338fa5f7 1033 case ':':
edaf8cb5 1034 result->type = CPP_COLON;
1035 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1036 buffer->cur++, result->type = CPP_SCOPE;
1037 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
338fa5f7 1038 {
edaf8cb5 1039 buffer->cur++;
338fa5f7 1040 result->flags |= DIGRAPH;
1c124f85 1041 result->type = CPP_CLOSE_SQUARE;
1042 }
338fa5f7 1043 break;
0578f103 1044
1c124f85 1045 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1046 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1047 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1048 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1049 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1050
a54e0bf8 1051 case '?': result->type = CPP_QUERY; break;
338fa5f7 1052 case '~': result->type = CPP_COMPL; break;
1053 case ',': result->type = CPP_COMMA; break;
1054 case '(': result->type = CPP_OPEN_PAREN; break;
1055 case ')': result->type = CPP_CLOSE_PAREN; break;
1056 case '[': result->type = CPP_OPEN_SQUARE; break;
1057 case ']': result->type = CPP_CLOSE_SQUARE; break;
1058 case '{': result->type = CPP_OPEN_BRACE; break;
1059 case '}': result->type = CPP_CLOSE_BRACE; break;
1060 case ';': result->type = CPP_SEMICOLON; break;
1061
7fd957fe 1062 /* @ is a punctuator in Objective-C. */
9ee99ac6 1063 case '@': result->type = CPP_ATSIGN; break;
338fa5f7 1064
78c551ad 1065 case '$':
2cbf1359 1066 case '\\':
1067 {
1068 const uchar *base = --buffer->cur;
78c551ad 1069
2cbf1359 1070 if (forms_identifier_p (pfile, true))
1071 {
1072 result->type = CPP_NAME;
1073 result->val.node = lex_identifier (pfile, base);
1074 break;
1075 }
1076 buffer->cur++;
bc205914 1077 }
2cbf1359 1078
bc205914 1079 default:
4970d4c2 1080 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1081 break;
338fa5f7 1082 }
fb5ab82c 1083
1084 return result;
338fa5f7 1085}
1086
b1280514 1087/* An upper bound on the number of bytes needed to spell TOKEN.
1088 Does not include preceding whitespace. */
79bd622b 1089unsigned int
f7fdd7a1 1090cpp_token_len (const cpp_token *token)
338fa5f7 1091{
79bd622b 1092 unsigned int len;
cfad5579 1093
79bd622b 1094 switch (TOKEN_SPELL (token))
f80e83a9 1095 {
b1280514 1096 default: len = 4; break;
4970d4c2 1097 case SPELL_LITERAL: len = token->val.str.len; break;
c86dbc5b 1098 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
f80e83a9 1099 }
b1280514 1100
1101 return len;
cfad5579 1102}
1103
f80e83a9 1104/* Write the spelling of a token TOKEN to BUFFER. The buffer must
c5ea33a8 1105 already contain the enough space to hold the token's spelling.
f7fdd7a1 1106 Returns a pointer to the character after the last character written.
1107 FIXME: Would be nice if we didn't need the PFILE argument. */
79bd622b 1108unsigned char *
f7fdd7a1 1109cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1110 unsigned char *buffer)
f80e83a9 1111{
7e842f95 1112 switch (TOKEN_SPELL (token))
f80e83a9 1113 {
1114 case SPELL_OPERATOR:
1115 {
1116 const unsigned char *spelling;
1117 unsigned char c;
ab12a39c 1118
f80e83a9 1119 if (token->flags & DIGRAPH)
ee6c4e4b 1120 spelling
1121 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
31674461 1122 else if (token->flags & NAMED_OP)
1123 goto spell_ident;
f80e83a9 1124 else
7e842f95 1125 spelling = TOKEN_NAME (token);
b1a9ff83 1126
f80e83a9 1127 while ((c = *spelling++) != '\0')
1128 *buffer++ = c;
1129 }
1130 break;
ab12a39c 1131
8d27e472 1132 spell_ident:
f80e83a9 1133 case SPELL_IDENT:
c86dbc5b 1134 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1135 buffer += NODE_LEN (token->val.node);
f80e83a9 1136 break;
ab12a39c 1137
4970d4c2 1138 case SPELL_LITERAL:
8d27e472 1139 memcpy (buffer, token->val.str.text, token->val.str.len);
1140 buffer += token->val.str.len;
1141 break;
1142
f80e83a9 1143 case SPELL_NONE:
73328dce 1144 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
f80e83a9 1145 break;
1146 }
ab12a39c 1147
f80e83a9 1148 return buffer;
1149}
ab12a39c 1150
e484a1cc 1151/* Returns TOKEN spelt as a null-terminated string. The string is
1152 freed when the reader is destroyed. Useful for diagnostics. */
79bd622b 1153unsigned char *
f7fdd7a1 1154cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
b1280514 1155{
1156 unsigned int len = cpp_token_len (token) + 1;
1fdf6039 1157 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
6060326b 1158
79bd622b 1159 end = cpp_spell_token (pfile, token, start);
1160 end[0] = '\0';
6060326b 1161
79bd622b 1162 return start;
1163}
6060326b 1164
e484a1cc 1165/* Used by C front ends, which really should move to using
1166 cpp_token_as_text. */
79bd622b 1167const char *
f7fdd7a1 1168cpp_type2name (enum cpp_ttype type)
79bd622b 1169{
1170 return (const char *) token_spellings[type].name;
1171}
6060326b 1172
f9b5f742 1173/* Writes the spelling of token to FP, without any preceding space.
1174 Separated from cpp_spell_token for efficiency - to avoid stdio
1175 double-buffering. */
79bd622b 1176void
f7fdd7a1 1177cpp_output_token (const cpp_token *token, FILE *fp)
79bd622b 1178{
79bd622b 1179 switch (TOKEN_SPELL (token))
6060326b 1180 {
79bd622b 1181 case SPELL_OPERATOR:
1182 {
1183 const unsigned char *spelling;
28874558 1184 int c;
6060326b 1185
79bd622b 1186 if (token->flags & DIGRAPH)
ee6c4e4b 1187 spelling
1188 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
79bd622b 1189 else if (token->flags & NAMED_OP)
1190 goto spell_ident;
1191 else
1192 spelling = TOKEN_NAME (token);
f80e83a9 1193
28874558 1194 c = *spelling;
1195 do
1196 putc (c, fp);
1197 while ((c = *++spelling) != '\0');
79bd622b 1198 }
1199 break;
f80e83a9 1200
79bd622b 1201 spell_ident:
1202 case SPELL_IDENT:
28874558 1203 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
79bd622b 1204 break;
f80e83a9 1205
4970d4c2 1206 case SPELL_LITERAL:
8d27e472 1207 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1208 break;
1209
79bd622b 1210 case SPELL_NONE:
1211 /* An error, most probably. */
1212 break;
f80e83a9 1213 }
6060326b 1214}
1215
79bd622b 1216/* Compare two tokens. */
1217int
f7fdd7a1 1218_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
6060326b 1219{
79bd622b 1220 if (a->type == b->type && a->flags == b->flags)
1221 switch (TOKEN_SPELL (a))
1222 {
1223 default: /* Keep compiler happy. */
1224 case SPELL_OPERATOR:
1225 return 1;
79bd622b 1226 case SPELL_NONE:
588d632b 1227 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
79bd622b 1228 case SPELL_IDENT:
1229 return a->val.node == b->val.node;
4970d4c2 1230 case SPELL_LITERAL:
79bd622b 1231 return (a->val.str.len == b->val.str.len
1232 && !memcmp (a->val.str.text, b->val.str.text,
1233 a->val.str.len));
1234 }
6060326b 1235
f80e83a9 1236 return 0;
1237}
1238
79bd622b 1239/* Returns nonzero if a space should be inserted to avoid an
1240 accidental token paste for output. For simplicity, it is
1241 conservative, and occasionally advises a space where one is not
1242 needed, e.g. "." and ".2". */
79bd622b 1243int
f7fdd7a1 1244cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1245 const cpp_token *token2)
6060326b 1246{
79bd622b 1247 enum cpp_ttype a = token1->type, b = token2->type;
1248 cppchar_t c;
6060326b 1249
79bd622b 1250 if (token1->flags & NAMED_OP)
1251 a = CPP_NAME;
1252 if (token2->flags & NAMED_OP)
1253 b = CPP_NAME;
6060326b 1254
79bd622b 1255 c = EOF;
1256 if (token2->flags & DIGRAPH)
ee6c4e4b 1257 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
79bd622b 1258 else if (token_spellings[b].category == SPELL_OPERATOR)
1259 c = token_spellings[b].name[0];
6060326b 1260
79bd622b 1261 /* Quickly get everything that can paste with an '='. */
ee6c4e4b 1262 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
79bd622b 1263 return 1;
6060326b 1264
79bd622b 1265 switch (a)
6060326b 1266 {
79bd622b 1267 case CPP_GREATER: return c == '>' || c == '?';
1268 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1269 case CPP_PLUS: return c == '+';
1270 case CPP_MINUS: return c == '-' || c == '>';
1271 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1272 case CPP_MOD: return c == ':' || c == '>';
1273 case CPP_AND: return c == '&';
1274 case CPP_OR: return c == '|';
1275 case CPP_COLON: return c == ':' || c == '>';
1276 case CPP_DEREF: return c == '*';
efdcc728 1277 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
79bd622b 1278 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1279 case CPP_NAME: return ((b == CPP_NUMBER
1280 && name_p (pfile, &token2->val.str))
1281 || b == CPP_NAME
1282 || b == CPP_CHAR || b == CPP_STRING); /* L */
1283 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1284 || c == '.' || c == '+' || c == '-');
2cbf1359 1285 /* UCNs */
bc205914 1286 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1287 && b == CPP_NAME)
2cbf1359 1288 || (CPP_OPTION (pfile, objc)
bc205914 1289 && token1->val.str.text[0] == '@'
2cbf1359 1290 && (b == CPP_NAME || b == CPP_STRING)));
79bd622b 1291 default: break;
6060326b 1292 }
6060326b 1293
deb356cf 1294 return 0;
6060326b 1295}
1296
79bd622b 1297/* Output all the remaining tokens on the current line, and a newline
f9b5f742 1298 character, to FP. Leading whitespace is removed. If there are
1299 macros, special token padding is not performed. */
6060326b 1300void
f7fdd7a1 1301cpp_output_line (cpp_reader *pfile, FILE *fp)
6060326b 1302{
f9b5f742 1303 const cpp_token *token;
7e842f95 1304
f9b5f742 1305 token = cpp_get_token (pfile);
1306 while (token->type != CPP_EOF)
7e842f95 1307 {
f9b5f742 1308 cpp_output_token (token, fp);
1309 token = cpp_get_token (pfile);
1310 if (token->flags & PREV_WHITE)
1311 putc (' ', fp);
7e842f95 1312 }
1313
79bd622b 1314 putc ('\n', fp);
f80e83a9 1315}
6060326b 1316
084163dc 1317/* Memory buffers. Changing these three constants can have a dramatic
1318 effect on performance. The values here are reasonable defaults,
1319 but might be tuned. If you adjust them, be sure to test across a
1320 range of uses of cpplib, including heavy nested function-like macro
1321 expansion. Also check the change in peak memory usage (NJAMD is a
1322 good tool for this). */
1323#define MIN_BUFF_SIZE 8000
1e0ef2fd 1324#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
084163dc 1325#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1326 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
deb356cf 1327
1e0ef2fd 1328#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1329 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1330#endif
1331
1785b647 1332/* Create a new allocation buffer. Place the control block at the end
1333 of the buffer, so that buffer overflows will cause immediate chaos. */
06c92cbc 1334static _cpp_buff *
f7fdd7a1 1335new_buff (size_t len)
06c92cbc 1336{
1337 _cpp_buff *result;
1fdf6039 1338 unsigned char *base;
06c92cbc 1339
084163dc 1340 if (len < MIN_BUFF_SIZE)
1341 len = MIN_BUFF_SIZE;
198b48a0 1342 len = CPP_ALIGN (len);
06c92cbc 1343
1344 base = xmalloc (len + sizeof (_cpp_buff));
1345 result = (_cpp_buff *) (base + len);
1346 result->base = base;
1347 result->cur = base;
1348 result->limit = base + len;
1349 result->next = NULL;
1350 return result;
1351}
1352
1353/* Place a chain of unwanted allocation buffers on the free list. */
1354void
f7fdd7a1 1355_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
06c92cbc 1356{
1357 _cpp_buff *end = buff;
1358
1359 while (end->next)
1360 end = end->next;
1361 end->next = pfile->free_buffs;
1362 pfile->free_buffs = buff;
1363}
1364
1365/* Return a free buffer of size at least MIN_SIZE. */
1366_cpp_buff *
f7fdd7a1 1367_cpp_get_buff (cpp_reader *pfile, size_t min_size)
06c92cbc 1368{
1369 _cpp_buff *result, **p;
1370
1371 for (p = &pfile->free_buffs;; p = &(*p)->next)
1372 {
4b31a107 1373 size_t size;
084163dc 1374
1375 if (*p == NULL)
06c92cbc 1376 return new_buff (min_size);
084163dc 1377 result = *p;
1378 size = result->limit - result->base;
1379 /* Return a buffer that's big enough, but don't waste one that's
1380 way too big. */
4085c149 1381 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
06c92cbc 1382 break;
1383 }
1384
1385 *p = result->next;
1386 result->next = NULL;
1387 result->cur = result->base;
1388 return result;
1389}
1390
20dd417a 1391/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 1392 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1393 the excess bytes to the new buffer. Chains the new buffer after
1394 BUFF, and returns the new buffer. */
06c92cbc 1395_cpp_buff *
f7fdd7a1 1396_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
06c92cbc 1397{
4b31a107 1398 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
e6a5f963 1399 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
06c92cbc 1400
e6a5f963 1401 buff->next = new_buff;
1402 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1403 return new_buff;
1404}
1405
20dd417a 1406/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 1407 remaining bytes of the buffer pointed to by BUFF, and at least
1408 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1409 Chains the new buffer before the buffer pointed to by BUFF, and
1410 updates the pointer to point to the new buffer. */
1411void
f7fdd7a1 1412_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
e6a5f963 1413{
1414 _cpp_buff *new_buff, *old_buff = *pbuff;
1415 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1416
1417 new_buff = _cpp_get_buff (pfile, size);
1418 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1419 new_buff->next = old_buff;
1420 *pbuff = new_buff;
06c92cbc 1421}
1422
1423/* Free a chain of buffers starting at BUFF. */
1424void
1425_cpp_free_buff (buff)
1426 _cpp_buff *buff;
1427{
1428 _cpp_buff *next;
1429
1430 for (; buff; buff = next)
1431 {
1432 next = buff->next;
1433 free (buff->base);
1434 }
1435}
deb356cf 1436
1fdf6039 1437/* Allocate permanent, unaligned storage of length LEN. */
1438unsigned char *
f7fdd7a1 1439_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1fdf6039 1440{
1441 _cpp_buff *buff = pfile->u_buff;
1442 unsigned char *result = buff->cur;
1443
1444 if (len > (size_t) (buff->limit - result))
1445 {
1446 buff = _cpp_get_buff (pfile, len);
1447 buff->next = pfile->u_buff;
1448 pfile->u_buff = buff;
1449 result = buff->cur;
1450 }
1451
1452 buff->cur = result + len;
1453 return result;
1454}
1455
1e0ef2fd 1456/* Allocate permanent, unaligned storage of length LEN from a_buff.
1457 That buffer is used for growing allocations when saving macro
1458 replacement lists in a #define, and when parsing an answer to an
1459 assertion in #assert, #unassert or #if (and therefore possibly
1460 whilst expanding macros). It therefore must not be used by any
1461 code that they might call: specifically the lexer and the guts of
1462 the macro expander.
1463
1464 All existing other uses clearly fit this restriction: storing
1465 registered pragmas during initialization. */
79bd622b 1466unsigned char *
f7fdd7a1 1467_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
89b05ef6 1468{
e6a5f963 1469 _cpp_buff *buff = pfile->a_buff;
1470 unsigned char *result = buff->cur;
89b05ef6 1471
e6a5f963 1472 if (len > (size_t) (buff->limit - result))
89b05ef6 1473 {
e6a5f963 1474 buff = _cpp_get_buff (pfile, len);
1475 buff->next = pfile->a_buff;
1476 pfile->a_buff = buff;
1477 result = buff->cur;
89b05ef6 1478 }
f80e83a9 1479
e6a5f963 1480 buff->cur = result + len;
79bd622b 1481 return result;
f80e83a9 1482}