]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
* configure.in: Remove unnecessary test.
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
0578f103 1/* CPP Library - lexical analysis.
f0c2775b 2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
0578f103 3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21
22#include "config.h"
23#include "system.h"
0578f103 24#include "cpplib.h"
25#include "cpphash.h"
26
79bd622b 27enum spell_type
241e762e 28{
79bd622b 29 SPELL_OPERATOR = 0,
79bd622b 30 SPELL_IDENT,
4970d4c2 31 SPELL_LITERAL,
79bd622b 32 SPELL_NONE
241e762e 33};
34
79bd622b 35struct token_spelling
241e762e 36{
79bd622b 37 enum spell_type category;
38 const unsigned char *name;
241e762e 39};
40
0ca849f9 41static const unsigned char *const digraph_spellings[] =
42{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
79bd622b 43
44#define OP(e, s) { SPELL_OPERATOR, U s },
18e43155 45#define TK(e, s) { s, U #e },
0ca849f9 46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
79bd622b 47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
e2f9a79f 52
f7fdd7a1 53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
56static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
57static void lex_number (cpp_reader *, cpp_string *);
58static bool forms_identifier_p (cpp_reader *, int);
59static void lex_string (cpp_reader *, cpp_token *, const uchar *);
60static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
61static void create_literal (cpp_reader *, cpp_token *, const uchar *,
62 unsigned int, enum cpp_ttype);
63static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
64static int name_p (cpp_reader *, const cpp_string *);
f7fdd7a1 65static tokenrun *next_tokenrun (tokenrun *);
66
f7fdd7a1 67static _cpp_buff *new_buff (size_t);
bce8e0c0 68
e920deaf 69
f80e83a9 70/* Utility routine:
2c63d6c8 71
76faa4c0 72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
f80e83a9 74int
f7fdd7a1 75cpp_ideq (const cpp_token *token, const char *string)
f80e83a9 76{
76faa4c0 77 if (token->type != CPP_NAME)
f80e83a9 78 return 0;
76faa4c0 79
b6d18b0a 80 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
bce8e0c0 81}
50fd6b48 82
a54e0bf8 83/* Record a note TYPE at byte POS into the current cleaned logical
84 line. */
1e0ef2fd 85static void
f7fdd7a1 86add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
338fa5f7 87{
a54e0bf8 88 if (buffer->notes_used == buffer->notes_cap)
89 {
90 buffer->notes_cap = buffer->notes_cap * 2 + 200;
f0af5a88 91 buffer->notes = xrealloc (buffer->notes,
92 buffer->notes_cap * sizeof (_cpp_line_note));
a54e0bf8 93 }
338fa5f7 94
a54e0bf8 95 buffer->notes[buffer->notes_used].pos = pos;
96 buffer->notes[buffer->notes_used].type = type;
97 buffer->notes_used++;
338fa5f7 98}
99
a54e0bf8 100/* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
102void
f7fdd7a1 103_cpp_clean_line (cpp_reader *pfile)
0578f103 104{
a54e0bf8 105 cpp_buffer *buffer;
106 const uchar *s;
107 uchar c, *d, *p;
1e0ef2fd 108
a54e0bf8 109 buffer = pfile->buffer;
110 buffer->cur_note = buffer->notes_used = 0;
111 buffer->cur = buffer->line_base = buffer->next_line;
112 buffer->need_line = false;
113 s = buffer->next_line - 1;
1e0ef2fd 114
a54e0bf8 115 if (!buffer->from_stage3)
0578f103 116 {
a54e0bf8 117 d = (uchar *) s;
118
119 for (;;)
4b912310 120 {
a54e0bf8 121 c = *++s;
122 *++d = c;
123
124 if (c == '\n' || c == '\r')
125 {
126 /* Handle DOS line endings. */
127 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
128 s++;
129 if (s == buffer->rlimit)
130 break;
131
132 /* Escaped? */
133 p = d;
134 while (p != buffer->next_line && is_nvspace (p[-1]))
135 p--;
136 if (p == buffer->next_line || p[-1] != '\\')
137 break;
138
aad4a87f 139 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
a54e0bf8 140 d = p - 2;
141 buffer->next_line = p - 1;
142 }
143 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
144 {
145 /* Add a note regardless, for the benefit of -Wtrigraphs. */
aad4a87f 146 add_line_note (buffer, d, s[2]);
a54e0bf8 147 if (CPP_OPTION (pfile, trigraphs))
148 {
149 *d = _cpp_trigraph_map[s[2]];
150 s += 2;
151 }
152 }
4b912310 153 }
0578f103 154 }
a54e0bf8 155 else
156 {
157 do
158 s++;
159 while (*s != '\n' && *s != '\r');
160 d = (uchar *) s;
161
162 /* Handle DOS line endings. */
163 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
164 s++;
165 }
338fa5f7 166
a54e0bf8 167 *d = '\n';
aad4a87f 168 /* A sentinel note that should never be processed. */
169 add_line_note (buffer, d + 1, '\n');
a54e0bf8 170 buffer->next_line = s + 1;
0578f103 171}
172
3078f2b2 173/* Return true if the trigraph indicated by NOTE should be warned
174 about in a comment. */
175static bool
f7fdd7a1 176warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
3078f2b2 177{
178 const uchar *p;
179
180 /* Within comments we don't warn about trigraphs, unless the
181 trigraph forms an escaped newline, as that may change
7ef5b942 182 behavior. */
3078f2b2 183 if (note->type != '/')
184 return false;
185
186 /* If -trigraphs, then this was an escaped newline iff the next note
187 is coincident. */
188 if (CPP_OPTION (pfile, trigraphs))
189 return note[1].pos == note->pos;
190
191 /* Otherwise, see if this forms an escaped newline. */
192 p = note->pos + 3;
193 while (is_nvspace (*p))
194 p++;
195
196 /* There might have been escaped newlines between the trigraph and the
197 newline we found. Hence the position test. */
198 return (*p == '\n' && p < note[1].pos);
199}
200
a54e0bf8 201/* Process the notes created by add_line_note as far as the current
202 location. */
203void
f7fdd7a1 204_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
0578f103 205{
c808d026 206 cpp_buffer *buffer = pfile->buffer;
207
a54e0bf8 208 for (;;)
f80e83a9 209 {
a54e0bf8 210 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
211 unsigned int col;
396ffa86 212
a54e0bf8 213 if (note->pos > buffer->cur)
214 break;
396ffa86 215
a54e0bf8 216 buffer->cur_note++;
217 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
435fb09b 218
aad4a87f 219 if (note->type == '\\' || note->type == ' ')
a54e0bf8 220 {
aad4a87f 221 if (note->type == ' ' && !in_comment)
a54e0bf8 222 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
223 "backslash and newline separated by space");
aad4a87f 224
a54e0bf8 225 if (buffer->next_line > buffer->rlimit)
1e0ef2fd 226 {
a54e0bf8 227 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
228 "backslash-newline at end of file");
229 /* Prevent "no newline at end of file" warning. */
230 buffer->next_line = buffer->rlimit;
1e0ef2fd 231 }
a54e0bf8 232
233 buffer->line_base = note->pos;
234 pfile->line++;
338fa5f7 235 }
aad4a87f 236 else if (_cpp_trigraph_map[note->type])
237 {
3078f2b2 238 if (CPP_OPTION (pfile, warn_trigraphs)
239 && (!in_comment || warn_in_comment (pfile, note)))
aad4a87f 240 {
241 if (CPP_OPTION (pfile, trigraphs))
242 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
243 "trigraph ??%c converted to %c",
244 note->type,
245 (int) _cpp_trigraph_map[note->type]);
246 else
1542b1ef 247 {
248 cpp_error_with_line
249 (pfile, DL_WARNING, pfile->line, col,
250 "trigraph ??%c ignored, use -trigraphs to enable",
251 note->type);
252 }
aad4a87f 253 }
254 }
255 else
256 abort ();
f80e83a9 257 }
0578f103 258}
259
338fa5f7 260/* Skip a C-style block comment. We find the end of the comment by
261 seeing if an asterisk is before every '/' we encounter. Returns
edaf8cb5 262 nonzero if comment terminated by EOF, zero otherwise.
263
264 Buffer->cur points to the initial asterisk of the comment. */
a54e0bf8 265bool
f7fdd7a1 266_cpp_skip_block_comment (cpp_reader *pfile)
0578f103 267{
f80e83a9 268 cpp_buffer *buffer = pfile->buffer;
a54e0bf8 269 cppchar_t c;
338fa5f7 270
edaf8cb5 271 buffer->cur++;
a54e0bf8 272 if (*buffer->cur == '/')
273 buffer->cur++;
338fa5f7 274
a54e0bf8 275 for (;;)
276 {
277 c = *buffer->cur++;
f80e83a9 278
338fa5f7 279 /* People like decorating comments with '*', so check for '/'
280 instead for efficiency. */
f80e83a9 281 if (c == '/')
0578f103 282 {
a54e0bf8 283 if (buffer->cur[-2] == '*')
338fa5f7 284 break;
f80e83a9 285
338fa5f7 286 /* Warn about potential nested comments, but not if the '/'
3fb1e43b 287 comes immediately before the true comment delimiter.
f80e83a9 288 Don't bother to get it right across escaped newlines. */
338fa5f7 289 if (CPP_OPTION (pfile, warn_comments)
1e0ef2fd 290 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
73328dce 291 cpp_error_with_line (pfile, DL_WARNING,
292 pfile->line, CPP_BUF_COL (buffer),
293 "\"/*\" within comment");
0578f103 294 }
a54e0bf8 295 else if (c == '\n')
296 {
297 buffer->cur--;
298 _cpp_process_line_notes (pfile, true);
299 if (buffer->next_line >= buffer->rlimit)
300 return true;
301 _cpp_clean_line (pfile);
302 pfile->line++;
303 }
0578f103 304 }
f80e83a9 305
3078f2b2 306 _cpp_process_line_notes (pfile, true);
a54e0bf8 307 return false;
0578f103 308}
309
1c124f85 310/* Skip a C++ line comment, leaving buffer->cur pointing to the
d10cfa8d 311 terminating newline. Handles escaped newlines. Returns nonzero
1c124f85 312 if a multiline comment. */
f80e83a9 313static int
f7fdd7a1 314skip_line_comment (cpp_reader *pfile)
0578f103 315{
f669338a 316 cpp_buffer *buffer = pfile->buffer;
1ea7ed21 317 unsigned int orig_line = pfile->line;
f80e83a9 318
a54e0bf8 319 while (*buffer->cur != '\n')
320 buffer->cur++;
1c124f85 321
a54e0bf8 322 _cpp_process_line_notes (pfile, true);
1ea7ed21 323 return orig_line != pfile->line;
f80e83a9 324}
0578f103 325
a54e0bf8 326/* Skips whitespace, saving the next non-whitespace character. */
b86584f6 327static void
f7fdd7a1 328skip_whitespace (cpp_reader *pfile, cppchar_t c)
f80e83a9 329{
330 cpp_buffer *buffer = pfile->buffer;
fe9eb18b 331 bool saw_NUL = false;
0578f103 332
338fa5f7 333 do
f80e83a9 334 {
78719282 335 /* Horizontal space always OK. */
a54e0bf8 336 if (c == ' ' || c == '\t')
338fa5f7 337 ;
338fa5f7 338 /* Just \f \v or \0 left. */
78719282 339 else if (c == '\0')
fe9eb18b 340 saw_NUL = true;
79bd622b 341 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
73328dce 342 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
343 CPP_BUF_COL (buffer),
344 "%s in preprocessing directive",
345 c == '\f' ? "form feed" : "vertical tab");
338fa5f7 346
338fa5f7 347 c = *buffer->cur++;
0578f103 348 }
2c0e001b 349 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
338fa5f7 350 while (is_nvspace (c));
351
fe9eb18b 352 if (saw_NUL)
353 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
354
1c124f85 355 buffer->cur--;
f80e83a9 356}
0578f103 357
79bd622b 358/* See if the characters of a number token are valid in a name (no
359 '.', '+' or '-'). */
360static int
f7fdd7a1 361name_p (cpp_reader *pfile, const cpp_string *string)
79bd622b 362{
363 unsigned int i;
364
365 for (i = 0; i < string->len; i++)
366 if (!is_idchar (string->text[i]))
367 return 0;
368
b1a9ff83 369 return 1;
79bd622b 370}
371
5bb46c08 372/* Returns TRUE if the sequence starting at buffer->cur is invalid in
2cbf1359 373 an identifier. FIRST is TRUE if this starts an identifier. */
5bb46c08 374static bool
f7fdd7a1 375forms_identifier_p (cpp_reader *pfile, int first)
5bb46c08 376{
2cbf1359 377 cpp_buffer *buffer = pfile->buffer;
378
379 if (*buffer->cur == '$')
380 {
381 if (!CPP_OPTION (pfile, dollars_in_ident))
382 return false;
383
384 buffer->cur++;
f0c2775b 385 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
2cbf1359 386 {
f0c2775b 387 CPP_OPTION (pfile, warn_dollars) = 0;
2cbf1359 388 cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
389 }
390
391 return true;
392 }
5bb46c08 393
2cbf1359 394 /* Is this a syntactically valid UCN? */
395 if (0 && *buffer->cur == '\\'
396 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
5bb46c08 397 {
2cbf1359 398 buffer->cur += 2;
ebc03810 399 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
2cbf1359 400 return true;
401 buffer->cur -= 2;
5bb46c08 402 }
5bb46c08 403
2cbf1359 404 return false;
5bb46c08 405}
406
407/* Lex an identifier starting at BUFFER->CUR - 1. */
338fa5f7 408static cpp_hashnode *
f7fdd7a1 409lex_identifier (cpp_reader *pfile, const uchar *base)
0578f103 410{
79bd622b 411 cpp_hashnode *result;
2cbf1359 412 const uchar *cur;
66a5287e 413
5bb46c08 414 do
78a11351 415 {
5bb46c08 416 cur = pfile->buffer->cur;
417
418 /* N.B. ISIDNUM does not include $. */
419 while (ISIDNUM (*cur))
420 cur++;
78a11351 421
78a11351 422 pfile->buffer->cur = cur;
66a5287e 423 }
2cbf1359 424 while (forms_identifier_p (pfile, false));
5bb46c08 425
426 result = (cpp_hashnode *)
427 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
66a5287e 428
5bb46c08 429 /* Rarely, identifiers require diagnostics when lexed. */
66a5287e 430 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
431 && !pfile->state.skipping, 0))
432 {
433 /* It is allowed to poison the same identifier twice. */
434 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
73328dce 435 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
66a5287e 436 NODE_NAME (result));
437
438 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
439 replacement list of a variadic macro. */
440 if (result == pfile->spec_nodes.n__VA_ARGS__
441 && !pfile->state.va_args_ok)
73328dce 442 cpp_error (pfile, DL_PEDWARN,
f7fdd7a1 443 "__VA_ARGS__ can only appear in the expansion"
444 " of a C99 variadic macro");
66a5287e 445 }
446
447 return result;
448}
449
5bb46c08 450/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
0578f103 451static void
f7fdd7a1 452lex_number (cpp_reader *pfile, cpp_string *number)
0578f103 453{
b6d18b0a 454 const uchar *cur;
5bb46c08 455 const uchar *base;
456 uchar *dest;
0578f103 457
5bb46c08 458 base = pfile->buffer->cur - 1;
459 do
f80e83a9 460 {
5bb46c08 461 cur = pfile->buffer->cur;
338fa5f7 462
5bb46c08 463 /* N.B. ISIDNUM does not include $. */
464 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
465 cur++;
0578f103 466
78a11351 467 pfile->buffer->cur = cur;
0578f103 468 }
2cbf1359 469 while (forms_identifier_p (pfile, false));
79bd622b 470
5bb46c08 471 number->len = cur - base;
472 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
473 memcpy (dest, base, number->len);
474 dest[number->len] = '\0';
475 number->text = dest;
79bd622b 476}
477
4970d4c2 478/* Create a token of type TYPE with a literal spelling. */
479static void
f7fdd7a1 480create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
481 unsigned int len, enum cpp_ttype type)
4970d4c2 482{
483 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
484
485 memcpy (dest, base, len);
486 dest[len] = '\0';
487 token->type = type;
488 token->val.str.len = len;
489 token->val.str.text = dest;
490}
491
5bb46c08 492/* Lexes a string, character constant, or angle-bracketed header file
4970d4c2 493 name. The stored string contains the spelling, including opening
494 quote and leading any leading 'L'. It returns the type of the
495 literal, or CPP_OTHER if it was not properly terminated.
496
497 The spelling is NUL-terminated, but it is not guaranteed that this
498 is the first NUL since embedded NULs are preserved. */
f80e83a9 499static void
f7fdd7a1 500lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
0578f103 501{
4970d4c2 502 bool saw_NUL = false;
503 const uchar *cur;
5bb46c08 504 cppchar_t terminator;
4970d4c2 505 enum cpp_ttype type;
506
507 cur = base;
508 terminator = *cur++;
509 if (terminator == 'L')
510 terminator = *cur++;
511 if (terminator == '\"')
512 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
513 else if (terminator == '\'')
514 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
515 else
516 terminator = '>', type = CPP_HEADER_NAME;
79bd622b 517
338fa5f7 518 for (;;)
0578f103 519 {
4970d4c2 520 cppchar_t c = *cur++;
4b0c16ee 521
edaf8cb5 522 /* In #include-style directives, terminators are not escapable. */
4970d4c2 523 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
524 cur++;
525 else if (c == terminator)
5bb46c08 526 break;
4970d4c2 527 else if (c == '\n')
338fa5f7 528 {
4970d4c2 529 cur--;
530 type = CPP_OTHER;
531 break;
0578f103 532 }
4970d4c2 533 else if (c == '\0')
534 saw_NUL = true;
0578f103 535 }
536
4970d4c2 537 if (saw_NUL && !pfile->state.skipping)
538 cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
0578f103 539
4970d4c2 540 pfile->buffer->cur = cur;
541 create_literal (pfile, token, base, cur - base, type);
338fa5f7 542}
f80e83a9 543
79bd622b 544/* The stored comment includes the comment start and any terminator. */
2c63d6c8 545static void
f7fdd7a1 546save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
547 cppchar_t type)
2c63d6c8 548{
f80e83a9 549 unsigned char *buffer;
d3f7919d 550 unsigned int len, clen;
b1a9ff83 551
f0495c2c 552 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1c124f85 553
a543b315 554 /* C++ comments probably (not definitely) have moved past a new
555 line, which we don't want to save in the comment. */
1c124f85 556 if (is_vspace (pfile->buffer->cur[-1]))
a543b315 557 len--;
d3f7919d 558
559 /* If we are currently in a directive, then we need to store all
560 C++ comments as C comments internally, and so we need to
561 allocate a little extra space in that case.
562
563 Note that the only time we encounter a directive here is
564 when we are saving comments in a "#define". */
565 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
566
567 buffer = _cpp_unaligned_alloc (pfile, clen);
b1a9ff83 568
f80e83a9 569 token->type = CPP_COMMENT;
d3f7919d 570 token->val.str.len = clen;
338fa5f7 571 token->val.str.text = buffer;
0578f103 572
f0495c2c 573 buffer[0] = '/';
574 memcpy (buffer + 1, from, len - 1);
d3f7919d 575
a113df96 576 /* Finish conversion to a C comment, if necessary. */
d3f7919d 577 if (pfile->state.in_directive && type == '/')
578 {
579 buffer[1] = '*';
580 buffer[clen - 2] = '*';
581 buffer[clen - 1] = '/';
582 }
338fa5f7 583}
0578f103 584
83dcbb5c 585/* Allocate COUNT tokens for RUN. */
586void
f7fdd7a1 587_cpp_init_tokenrun (tokenrun *run, unsigned int count)
83dcbb5c 588{
589 run->base = xnewvec (cpp_token, count);
590 run->limit = run->base + count;
591 run->next = NULL;
592}
593
594/* Returns the next tokenrun, or creates one if there is none. */
595static tokenrun *
f7fdd7a1 596next_tokenrun (tokenrun *run)
83dcbb5c 597{
598 if (run->next == NULL)
599 {
600 run->next = xnew (tokenrun);
fb5ab82c 601 run->next->prev = run;
83dcbb5c 602 _cpp_init_tokenrun (run->next, 250);
603 }
604
605 return run->next;
606}
607
f9b5f742 608/* Allocate a single token that is invalidated at the same time as the
609 rest of the tokens on the line. Has its line and col set to the
610 same as the last lexed token, so that diagnostics appear in the
611 right place. */
612cpp_token *
f7fdd7a1 613_cpp_temp_token (cpp_reader *pfile)
f9b5f742 614{
615 cpp_token *old, *result;
616
617 old = pfile->cur_token - 1;
618 if (pfile->cur_token == pfile->cur_run->limit)
619 {
620 pfile->cur_run = next_tokenrun (pfile->cur_run);
621 pfile->cur_token = pfile->cur_run->base;
622 }
623
624 result = pfile->cur_token++;
625 result->line = old->line;
626 result->col = old->col;
627 return result;
628}
629
10b4496a 630/* Lex a token into RESULT (external interface). Takes care of issues
631 like directive handling, token lookahead, multiple include
3fb1e43b 632 optimization and skipping. */
c00e481c 633const cpp_token *
f7fdd7a1 634_cpp_lex_token (cpp_reader *pfile)
83dcbb5c 635{
fb5ab82c 636 cpp_token *result;
83dcbb5c 637
fb5ab82c 638 for (;;)
83dcbb5c 639 {
fb5ab82c 640 if (pfile->cur_token == pfile->cur_run->limit)
83dcbb5c 641 {
fb5ab82c 642 pfile->cur_run = next_tokenrun (pfile->cur_run);
643 pfile->cur_token = pfile->cur_run->base;
83dcbb5c 644 }
645
fb5ab82c 646 if (pfile->lookaheads)
10b4496a 647 {
648 pfile->lookaheads--;
649 result = pfile->cur_token++;
650 }
fb5ab82c 651 else
10b4496a 652 result = _cpp_lex_direct (pfile);
fb5ab82c 653
654 if (result->flags & BOL)
83dcbb5c 655 {
fb5ab82c 656 /* Is this a directive. If _cpp_handle_directive returns
657 false, it is an assembler #. */
658 if (result->type == CPP_HASH
d6af0368 659 /* 6.10.3 p 11: Directives in a list of macro arguments
660 gives undefined behavior. This implementation
661 handles the directive as normal. */
662 && pfile->state.parsing_args != 1
fb5ab82c 663 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
664 continue;
5621a364 665 if (pfile->cb.line_change && !pfile->state.skipping)
f7fdd7a1 666 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
83dcbb5c 667 }
83dcbb5c 668
fb5ab82c 669 /* We don't skip tokens in directives. */
670 if (pfile->state.in_directive)
671 break;
83dcbb5c 672
fb5ab82c 673 /* Outside a directive, invalidate controlling macros. At file
10b4496a 674 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
7ef5b942 675 get here and MI optimization works. */
83dcbb5c 676 pfile->mi_valid = false;
fb5ab82c 677
678 if (!pfile->state.skipping || result->type == CPP_EOF)
679 break;
83dcbb5c 680 }
681
c00e481c 682 return result;
83dcbb5c 683}
684
a54e0bf8 685/* Returns true if a fresh line has been loaded. */
686bool
f7fdd7a1 687_cpp_get_fresh_line (cpp_reader *pfile)
0bb65704 688{
a54e0bf8 689 /* We can't get a new line until we leave the current directive. */
690 if (pfile->state.in_directive)
691 return false;
b1a9ff83 692
a54e0bf8 693 for (;;)
fb83e0d6 694 {
a54e0bf8 695 cpp_buffer *buffer = pfile->buffer;
fb83e0d6 696
a54e0bf8 697 if (!buffer->need_line)
698 return true;
699
700 if (buffer->next_line < buffer->rlimit)
0bb65704 701 {
a54e0bf8 702 _cpp_clean_line (pfile);
703 return true;
704 }
0bb65704 705
a54e0bf8 706 /* First, get out of parsing arguments state. */
707 if (pfile->state.parsing_args)
708 return false;
709
710 /* End of buffer. Non-empty files should end in a newline. */
711 if (buffer->buf != buffer->rlimit
712 && buffer->next_line > buffer->rlimit
713 && !buffer->from_stage3)
714 {
715 /* Only warn once. */
716 buffer->next_line = buffer->rlimit;
717 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
718 CPP_BUF_COLUMN (buffer, buffer->cur),
719 "no newline at end of file");
720 }
721
a54e0bf8 722 _cpp_pop_buffer (pfile);
11b5269c 723 if (pfile->buffer == NULL)
724 return false;
a54e0bf8 725 }
0bb65704 726}
727
edaf8cb5 728#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
729 do \
730 { \
731 result->type = ELSE_TYPE; \
732 if (*buffer->cur == CHAR) \
733 buffer->cur++, result->type = THEN_TYPE; \
734 } \
735 while (0)
1c124f85 736
10b4496a 737/* Lex a token into pfile->cur_token, which is also incremented, to
738 get diagnostics pointing to the correct location.
739
740 Does not handle issues such as token lookahead, multiple-include
4172d65e 741 optimization, directives, skipping etc. This function is only
10b4496a 742 suitable for use by _cpp_lex_token, and in special cases like
743 lex_expansion_token which doesn't care for any of these issues.
744
745 When meeting a newline, returns CPP_EOF if parsing a directive,
746 otherwise returns to the start of the token buffer if permissible.
747 Returns the location of the lexed token. */
748cpp_token *
f7fdd7a1 749_cpp_lex_direct (cpp_reader *pfile)
0578f103 750{
338fa5f7 751 cppchar_t c;
230f0943 752 cpp_buffer *buffer;
338fa5f7 753 const unsigned char *comment_start;
10b4496a 754 cpp_token *result = pfile->cur_token++;
0653b94e 755
83dcbb5c 756 fresh_line:
a54e0bf8 757 result->flags = 0;
82166c5c 758 buffer = pfile->buffer;
11b5269c 759 if (buffer->need_line)
a54e0bf8 760 {
761 if (!_cpp_get_fresh_line (pfile))
762 {
763 result->type = CPP_EOF;
2908f819 764 if (!pfile->state.in_directive)
765 {
766 /* Tell the compiler the line number of the EOF token. */
767 result->line = pfile->line;
768 result->flags = BOL;
769 }
a54e0bf8 770 return result;
771 }
772 if (!pfile->keep_tokens)
773 {
774 pfile->cur_run = &pfile->base_run;
775 result = pfile->base_run.base;
776 pfile->cur_token = result + 1;
777 }
778 result->flags = BOL;
779 if (pfile->state.parsing_args == 2)
780 result->flags |= PREV_WHITE;
781 }
11b5269c 782 buffer = pfile->buffer;
83dcbb5c 783 update_tokens_line:
36a0aa7c 784 result->line = pfile->line;
f80e83a9 785
83dcbb5c 786 skipped_white:
a54e0bf8 787 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
788 && !pfile->overlaid_buffer)
789 {
790 _cpp_process_line_notes (pfile, false);
791 result->line = pfile->line;
792 }
1c124f85 793 c = *buffer->cur++;
83dcbb5c 794 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
83dcbb5c 795
338fa5f7 796 switch (c)
0578f103 797 {
435fb09b 798 case ' ': case '\t': case '\f': case '\v': case '\0':
799 result->flags |= PREV_WHITE;
a54e0bf8 800 skip_whitespace (pfile, c);
801 goto skipped_white;
338fa5f7 802
a54e0bf8 803 case '\n':
804 pfile->line++;
805 buffer->need_line = true;
806 goto fresh_line;
732cb4c9 807
338fa5f7 808 case '0': case '1': case '2': case '3': case '4':
809 case '5': case '6': case '7': case '8': case '9':
810 result->type = CPP_NUMBER;
5bb46c08 811 lex_number (pfile, &result->val.str);
338fa5f7 812 break;
732cb4c9 813
78c551ad 814 case 'L':
815 /* 'L' may introduce wide characters or strings. */
5bb46c08 816 if (*buffer->cur == '\'' || *buffer->cur == '"')
817 {
4970d4c2 818 lex_string (pfile, result, buffer->cur - 1);
5bb46c08 819 break;
820 }
b1a9ff83 821 /* Fall through. */
78c551ad 822
338fa5f7 823 case '_':
824 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
825 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
826 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
827 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
828 case 'y': case 'z':
829 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
78c551ad 830 case 'G': case 'H': case 'I': case 'J': case 'K':
338fa5f7 831 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
832 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
833 case 'Y': case 'Z':
834 result->type = CPP_NAME;
2cbf1359 835 result->val.node = lex_identifier (pfile, buffer->cur - 1);
338fa5f7 836
338fa5f7 837 /* Convert named operators to their proper types. */
78c551ad 838 if (result->val.node->flags & NODE_OPERATOR)
338fa5f7 839 {
840 result->flags |= NAMED_OP;
805e22b2 841 result->type = result->val.node->directive_index;
338fa5f7 842 }
843 break;
844
845 case '\'':
846 case '"':
4970d4c2 847 lex_string (pfile, result, buffer->cur - 1);
338fa5f7 848 break;
f80e83a9 849
338fa5f7 850 case '/':
f0495c2c 851 /* A potential block or line comment. */
852 comment_start = buffer->cur;
edaf8cb5 853 c = *buffer->cur;
854
f0495c2c 855 if (c == '*')
856 {
a54e0bf8 857 if (_cpp_skip_block_comment (pfile))
73328dce 858 cpp_error (pfile, DL_ERROR, "unterminated comment");
338fa5f7 859 }
1c124f85 860 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
861 || CPP_IN_SYSTEM_HEADER (pfile)))
338fa5f7 862 {
5db5d057 863 /* Warn about comments only if pedantically GNUC89, and not
864 in system headers. */
865 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
66914e49 866 && ! buffer->warned_cplusplus_comments)
f80e83a9 867 {
73328dce 868 cpp_error (pfile, DL_PEDWARN,
ba059ac0 869 "C++ style comments are not allowed in ISO C90");
73328dce 870 cpp_error (pfile, DL_PEDWARN,
871 "(this will be reported only once per input file)");
f0495c2c 872 buffer->warned_cplusplus_comments = 1;
873 }
338fa5f7 874
e1caf668 875 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
73328dce 876 cpp_error (pfile, DL_WARNING, "multi-line comment");
f0495c2c 877 }
1c124f85 878 else if (c == '=')
879 {
edaf8cb5 880 buffer->cur++;
1c124f85 881 result->type = CPP_DIV_EQ;
882 break;
883 }
884 else
885 {
1c124f85 886 result->type = CPP_DIV;
887 break;
888 }
338fa5f7 889
f0495c2c 890 if (!pfile->state.save_comments)
891 {
892 result->flags |= PREV_WHITE;
83dcbb5c 893 goto update_tokens_line;
338fa5f7 894 }
f0495c2c 895
896 /* Save the comment as a token in its own right. */
d3f7919d 897 save_comment (pfile, result, comment_start, c);
fb5ab82c 898 break;
338fa5f7 899
900 case '<':
901 if (pfile->state.angled_headers)
902 {
4970d4c2 903 lex_string (pfile, result, buffer->cur - 1);
1c124f85 904 break;
338fa5f7 905 }
0578f103 906
edaf8cb5 907 result->type = CPP_LESS;
908 if (*buffer->cur == '=')
909 buffer->cur++, result->type = CPP_LESS_EQ;
910 else if (*buffer->cur == '<')
338fa5f7 911 {
edaf8cb5 912 buffer->cur++;
913 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
338fa5f7 914 }
edaf8cb5 915 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
338fa5f7 916 {
edaf8cb5 917 buffer->cur++;
918 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
338fa5f7 919 }
edaf8cb5 920 else if (CPP_OPTION (pfile, digraphs))
1c124f85 921 {
edaf8cb5 922 if (*buffer->cur == ':')
923 {
924 buffer->cur++;
925 result->flags |= DIGRAPH;
926 result->type = CPP_OPEN_SQUARE;
927 }
928 else if (*buffer->cur == '%')
929 {
930 buffer->cur++;
931 result->flags |= DIGRAPH;
932 result->type = CPP_OPEN_BRACE;
933 }
1c124f85 934 }
338fa5f7 935 break;
936
937 case '>':
edaf8cb5 938 result->type = CPP_GREATER;
939 if (*buffer->cur == '=')
940 buffer->cur++, result->type = CPP_GREATER_EQ;
941 else if (*buffer->cur == '>')
338fa5f7 942 {
edaf8cb5 943 buffer->cur++;
944 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
945 }
946 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
947 {
948 buffer->cur++;
949 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
338fa5f7 950 }
951 break;
952
f669338a 953 case '%':
edaf8cb5 954 result->type = CPP_MOD;
955 if (*buffer->cur == '=')
956 buffer->cur++, result->type = CPP_MOD_EQ;
957 else if (CPP_OPTION (pfile, digraphs))
1c124f85 958 {
edaf8cb5 959 if (*buffer->cur == ':')
1c124f85 960 {
edaf8cb5 961 buffer->cur++;
962 result->flags |= DIGRAPH;
963 result->type = CPP_HASH;
964 if (*buffer->cur == '%' && buffer->cur[1] == ':')
965 buffer->cur += 2, result->type = CPP_PASTE;
966 }
967 else if (*buffer->cur == '>')
968 {
969 buffer->cur++;
970 result->flags |= DIGRAPH;
971 result->type = CPP_CLOSE_BRACE;
1c124f85 972 }
1c124f85 973 }
338fa5f7 974 break;
975
f669338a 976 case '.':
1c124f85 977 result->type = CPP_DOT;
edaf8cb5 978 if (ISDIGIT (*buffer->cur))
1c124f85 979 {
980 result->type = CPP_NUMBER;
5bb46c08 981 lex_number (pfile, &result->val.str);
1c124f85 982 }
edaf8cb5 983 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
984 buffer->cur += 2, result->type = CPP_ELLIPSIS;
985 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
986 buffer->cur++, result->type = CPP_DOT_STAR;
338fa5f7 987 break;
0578f103 988
338fa5f7 989 case '+':
edaf8cb5 990 result->type = CPP_PLUS;
991 if (*buffer->cur == '+')
992 buffer->cur++, result->type = CPP_PLUS_PLUS;
993 else if (*buffer->cur == '=')
994 buffer->cur++, result->type = CPP_PLUS_EQ;
338fa5f7 995 break;
ac0749c7 996
338fa5f7 997 case '-':
edaf8cb5 998 result->type = CPP_MINUS;
999 if (*buffer->cur == '>')
338fa5f7 1000 {
edaf8cb5 1001 buffer->cur++;
1c124f85 1002 result->type = CPP_DEREF;
edaf8cb5 1003 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1004 buffer->cur++, result->type = CPP_DEREF_STAR;
1c124f85 1005 }
edaf8cb5 1006 else if (*buffer->cur == '-')
1007 buffer->cur++, result->type = CPP_MINUS_MINUS;
1008 else if (*buffer->cur == '=')
1009 buffer->cur++, result->type = CPP_MINUS_EQ;
338fa5f7 1010 break;
0578f103 1011
338fa5f7 1012 case '&':
edaf8cb5 1013 result->type = CPP_AND;
1014 if (*buffer->cur == '&')
1015 buffer->cur++, result->type = CPP_AND_AND;
1016 else if (*buffer->cur == '=')
1017 buffer->cur++, result->type = CPP_AND_EQ;
338fa5f7 1018 break;
b1a9ff83 1019
338fa5f7 1020 case '|':
edaf8cb5 1021 result->type = CPP_OR;
1022 if (*buffer->cur == '|')
1023 buffer->cur++, result->type = CPP_OR_OR;
1024 else if (*buffer->cur == '=')
1025 buffer->cur++, result->type = CPP_OR_EQ;
338fa5f7 1026 break;
0578f103 1027
338fa5f7 1028 case ':':
edaf8cb5 1029 result->type = CPP_COLON;
1030 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1031 buffer->cur++, result->type = CPP_SCOPE;
1032 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
338fa5f7 1033 {
edaf8cb5 1034 buffer->cur++;
338fa5f7 1035 result->flags |= DIGRAPH;
1c124f85 1036 result->type = CPP_CLOSE_SQUARE;
1037 }
338fa5f7 1038 break;
0578f103 1039
1c124f85 1040 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1041 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1042 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1043 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1044 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1045
a54e0bf8 1046 case '?': result->type = CPP_QUERY; break;
338fa5f7 1047 case '~': result->type = CPP_COMPL; break;
1048 case ',': result->type = CPP_COMMA; break;
1049 case '(': result->type = CPP_OPEN_PAREN; break;
1050 case ')': result->type = CPP_CLOSE_PAREN; break;
1051 case '[': result->type = CPP_OPEN_SQUARE; break;
1052 case ']': result->type = CPP_CLOSE_SQUARE; break;
1053 case '{': result->type = CPP_OPEN_BRACE; break;
1054 case '}': result->type = CPP_CLOSE_BRACE; break;
1055 case ';': result->type = CPP_SEMICOLON; break;
1056
7fd957fe 1057 /* @ is a punctuator in Objective-C. */
9ee99ac6 1058 case '@': result->type = CPP_ATSIGN; break;
338fa5f7 1059
78c551ad 1060 case '$':
2cbf1359 1061 case '\\':
1062 {
1063 const uchar *base = --buffer->cur;
78c551ad 1064
2cbf1359 1065 if (forms_identifier_p (pfile, true))
1066 {
1067 result->type = CPP_NAME;
1068 result->val.node = lex_identifier (pfile, base);
1069 break;
1070 }
1071 buffer->cur++;
bc205914 1072 }
2cbf1359 1073
bc205914 1074 default:
4970d4c2 1075 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1076 break;
338fa5f7 1077 }
fb5ab82c 1078
1079 return result;
338fa5f7 1080}
1081
b1280514 1082/* An upper bound on the number of bytes needed to spell TOKEN.
1083 Does not include preceding whitespace. */
79bd622b 1084unsigned int
f7fdd7a1 1085cpp_token_len (const cpp_token *token)
338fa5f7 1086{
79bd622b 1087 unsigned int len;
cfad5579 1088
79bd622b 1089 switch (TOKEN_SPELL (token))
f80e83a9 1090 {
b1280514 1091 default: len = 4; break;
4970d4c2 1092 case SPELL_LITERAL: len = token->val.str.len; break;
c86dbc5b 1093 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
f80e83a9 1094 }
b1280514 1095
1096 return len;
cfad5579 1097}
1098
f80e83a9 1099/* Write the spelling of a token TOKEN to BUFFER. The buffer must
c5ea33a8 1100 already contain the enough space to hold the token's spelling.
f7fdd7a1 1101 Returns a pointer to the character after the last character written.
1102 FIXME: Would be nice if we didn't need the PFILE argument. */
79bd622b 1103unsigned char *
f7fdd7a1 1104cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1105 unsigned char *buffer)
f80e83a9 1106{
7e842f95 1107 switch (TOKEN_SPELL (token))
f80e83a9 1108 {
1109 case SPELL_OPERATOR:
1110 {
1111 const unsigned char *spelling;
1112 unsigned char c;
ab12a39c 1113
f80e83a9 1114 if (token->flags & DIGRAPH)
ee6c4e4b 1115 spelling
1116 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
31674461 1117 else if (token->flags & NAMED_OP)
1118 goto spell_ident;
f80e83a9 1119 else
7e842f95 1120 spelling = TOKEN_NAME (token);
b1a9ff83 1121
f80e83a9 1122 while ((c = *spelling++) != '\0')
1123 *buffer++ = c;
1124 }
1125 break;
ab12a39c 1126
8d27e472 1127 spell_ident:
f80e83a9 1128 case SPELL_IDENT:
c86dbc5b 1129 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1130 buffer += NODE_LEN (token->val.node);
f80e83a9 1131 break;
ab12a39c 1132
4970d4c2 1133 case SPELL_LITERAL:
8d27e472 1134 memcpy (buffer, token->val.str.text, token->val.str.len);
1135 buffer += token->val.str.len;
1136 break;
1137
f80e83a9 1138 case SPELL_NONE:
73328dce 1139 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
f80e83a9 1140 break;
1141 }
ab12a39c 1142
f80e83a9 1143 return buffer;
1144}
ab12a39c 1145
e484a1cc 1146/* Returns TOKEN spelt as a null-terminated string. The string is
1147 freed when the reader is destroyed. Useful for diagnostics. */
79bd622b 1148unsigned char *
f7fdd7a1 1149cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
b1280514 1150{
1151 unsigned int len = cpp_token_len (token) + 1;
1fdf6039 1152 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
6060326b 1153
79bd622b 1154 end = cpp_spell_token (pfile, token, start);
1155 end[0] = '\0';
6060326b 1156
79bd622b 1157 return start;
1158}
6060326b 1159
e484a1cc 1160/* Used by C front ends, which really should move to using
1161 cpp_token_as_text. */
79bd622b 1162const char *
f7fdd7a1 1163cpp_type2name (enum cpp_ttype type)
79bd622b 1164{
1165 return (const char *) token_spellings[type].name;
1166}
6060326b 1167
f9b5f742 1168/* Writes the spelling of token to FP, without any preceding space.
1169 Separated from cpp_spell_token for efficiency - to avoid stdio
1170 double-buffering. */
79bd622b 1171void
f7fdd7a1 1172cpp_output_token (const cpp_token *token, FILE *fp)
79bd622b 1173{
79bd622b 1174 switch (TOKEN_SPELL (token))
6060326b 1175 {
79bd622b 1176 case SPELL_OPERATOR:
1177 {
1178 const unsigned char *spelling;
28874558 1179 int c;
6060326b 1180
79bd622b 1181 if (token->flags & DIGRAPH)
ee6c4e4b 1182 spelling
1183 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
79bd622b 1184 else if (token->flags & NAMED_OP)
1185 goto spell_ident;
1186 else
1187 spelling = TOKEN_NAME (token);
f80e83a9 1188
28874558 1189 c = *spelling;
1190 do
1191 putc (c, fp);
1192 while ((c = *++spelling) != '\0');
79bd622b 1193 }
1194 break;
f80e83a9 1195
79bd622b 1196 spell_ident:
1197 case SPELL_IDENT:
28874558 1198 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
79bd622b 1199 break;
f80e83a9 1200
4970d4c2 1201 case SPELL_LITERAL:
8d27e472 1202 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1203 break;
1204
79bd622b 1205 case SPELL_NONE:
1206 /* An error, most probably. */
1207 break;
f80e83a9 1208 }
6060326b 1209}
1210
79bd622b 1211/* Compare two tokens. */
1212int
f7fdd7a1 1213_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
6060326b 1214{
79bd622b 1215 if (a->type == b->type && a->flags == b->flags)
1216 switch (TOKEN_SPELL (a))
1217 {
1218 default: /* Keep compiler happy. */
1219 case SPELL_OPERATOR:
1220 return 1;
79bd622b 1221 case SPELL_NONE:
588d632b 1222 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
79bd622b 1223 case SPELL_IDENT:
1224 return a->val.node == b->val.node;
4970d4c2 1225 case SPELL_LITERAL:
79bd622b 1226 return (a->val.str.len == b->val.str.len
1227 && !memcmp (a->val.str.text, b->val.str.text,
1228 a->val.str.len));
1229 }
6060326b 1230
f80e83a9 1231 return 0;
1232}
1233
79bd622b 1234/* Returns nonzero if a space should be inserted to avoid an
1235 accidental token paste for output. For simplicity, it is
1236 conservative, and occasionally advises a space where one is not
1237 needed, e.g. "." and ".2". */
79bd622b 1238int
f7fdd7a1 1239cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1240 const cpp_token *token2)
6060326b 1241{
79bd622b 1242 enum cpp_ttype a = token1->type, b = token2->type;
1243 cppchar_t c;
6060326b 1244
79bd622b 1245 if (token1->flags & NAMED_OP)
1246 a = CPP_NAME;
1247 if (token2->flags & NAMED_OP)
1248 b = CPP_NAME;
6060326b 1249
79bd622b 1250 c = EOF;
1251 if (token2->flags & DIGRAPH)
ee6c4e4b 1252 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
79bd622b 1253 else if (token_spellings[b].category == SPELL_OPERATOR)
1254 c = token_spellings[b].name[0];
6060326b 1255
79bd622b 1256 /* Quickly get everything that can paste with an '='. */
ee6c4e4b 1257 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
79bd622b 1258 return 1;
6060326b 1259
79bd622b 1260 switch (a)
6060326b 1261 {
79bd622b 1262 case CPP_GREATER: return c == '>' || c == '?';
1263 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1264 case CPP_PLUS: return c == '+';
1265 case CPP_MINUS: return c == '-' || c == '>';
1266 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1267 case CPP_MOD: return c == ':' || c == '>';
1268 case CPP_AND: return c == '&';
1269 case CPP_OR: return c == '|';
1270 case CPP_COLON: return c == ':' || c == '>';
1271 case CPP_DEREF: return c == '*';
efdcc728 1272 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
79bd622b 1273 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1274 case CPP_NAME: return ((b == CPP_NUMBER
1275 && name_p (pfile, &token2->val.str))
1276 || b == CPP_NAME
1277 || b == CPP_CHAR || b == CPP_STRING); /* L */
1278 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1279 || c == '.' || c == '+' || c == '-');
2cbf1359 1280 /* UCNs */
bc205914 1281 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1282 && b == CPP_NAME)
2cbf1359 1283 || (CPP_OPTION (pfile, objc)
bc205914 1284 && token1->val.str.text[0] == '@'
2cbf1359 1285 && (b == CPP_NAME || b == CPP_STRING)));
79bd622b 1286 default: break;
6060326b 1287 }
6060326b 1288
deb356cf 1289 return 0;
6060326b 1290}
1291
79bd622b 1292/* Output all the remaining tokens on the current line, and a newline
f9b5f742 1293 character, to FP. Leading whitespace is removed. If there are
1294 macros, special token padding is not performed. */
6060326b 1295void
f7fdd7a1 1296cpp_output_line (cpp_reader *pfile, FILE *fp)
6060326b 1297{
f9b5f742 1298 const cpp_token *token;
7e842f95 1299
f9b5f742 1300 token = cpp_get_token (pfile);
1301 while (token->type != CPP_EOF)
7e842f95 1302 {
f9b5f742 1303 cpp_output_token (token, fp);
1304 token = cpp_get_token (pfile);
1305 if (token->flags & PREV_WHITE)
1306 putc (' ', fp);
7e842f95 1307 }
1308
79bd622b 1309 putc ('\n', fp);
f80e83a9 1310}
6060326b 1311
084163dc 1312/* Memory buffers. Changing these three constants can have a dramatic
1313 effect on performance. The values here are reasonable defaults,
1314 but might be tuned. If you adjust them, be sure to test across a
1315 range of uses of cpplib, including heavy nested function-like macro
1316 expansion. Also check the change in peak memory usage (NJAMD is a
1317 good tool for this). */
1318#define MIN_BUFF_SIZE 8000
1e0ef2fd 1319#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
084163dc 1320#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1321 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
deb356cf 1322
1e0ef2fd 1323#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1324 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1325#endif
1326
1785b647 1327/* Create a new allocation buffer. Place the control block at the end
1328 of the buffer, so that buffer overflows will cause immediate chaos. */
06c92cbc 1329static _cpp_buff *
f7fdd7a1 1330new_buff (size_t len)
06c92cbc 1331{
1332 _cpp_buff *result;
1fdf6039 1333 unsigned char *base;
06c92cbc 1334
084163dc 1335 if (len < MIN_BUFF_SIZE)
1336 len = MIN_BUFF_SIZE;
198b48a0 1337 len = CPP_ALIGN (len);
06c92cbc 1338
1339 base = xmalloc (len + sizeof (_cpp_buff));
1340 result = (_cpp_buff *) (base + len);
1341 result->base = base;
1342 result->cur = base;
1343 result->limit = base + len;
1344 result->next = NULL;
1345 return result;
1346}
1347
1348/* Place a chain of unwanted allocation buffers on the free list. */
1349void
f7fdd7a1 1350_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
06c92cbc 1351{
1352 _cpp_buff *end = buff;
1353
1354 while (end->next)
1355 end = end->next;
1356 end->next = pfile->free_buffs;
1357 pfile->free_buffs = buff;
1358}
1359
1360/* Return a free buffer of size at least MIN_SIZE. */
1361_cpp_buff *
f7fdd7a1 1362_cpp_get_buff (cpp_reader *pfile, size_t min_size)
06c92cbc 1363{
1364 _cpp_buff *result, **p;
1365
1366 for (p = &pfile->free_buffs;; p = &(*p)->next)
1367 {
4b31a107 1368 size_t size;
084163dc 1369
1370 if (*p == NULL)
06c92cbc 1371 return new_buff (min_size);
084163dc 1372 result = *p;
1373 size = result->limit - result->base;
1374 /* Return a buffer that's big enough, but don't waste one that's
1375 way too big. */
4085c149 1376 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
06c92cbc 1377 break;
1378 }
1379
1380 *p = result->next;
1381 result->next = NULL;
1382 result->cur = result->base;
1383 return result;
1384}
1385
20dd417a 1386/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 1387 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1388 the excess bytes to the new buffer. Chains the new buffer after
1389 BUFF, and returns the new buffer. */
06c92cbc 1390_cpp_buff *
f7fdd7a1 1391_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
06c92cbc 1392{
4b31a107 1393 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
e6a5f963 1394 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
06c92cbc 1395
e6a5f963 1396 buff->next = new_buff;
1397 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1398 return new_buff;
1399}
1400
20dd417a 1401/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 1402 remaining bytes of the buffer pointed to by BUFF, and at least
1403 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1404 Chains the new buffer before the buffer pointed to by BUFF, and
1405 updates the pointer to point to the new buffer. */
1406void
f7fdd7a1 1407_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
e6a5f963 1408{
1409 _cpp_buff *new_buff, *old_buff = *pbuff;
1410 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1411
1412 new_buff = _cpp_get_buff (pfile, size);
1413 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1414 new_buff->next = old_buff;
1415 *pbuff = new_buff;
06c92cbc 1416}
1417
1418/* Free a chain of buffers starting at BUFF. */
1419void
f82b06e0 1420_cpp_free_buff (_cpp_buff *buff)
06c92cbc 1421{
1422 _cpp_buff *next;
1423
1424 for (; buff; buff = next)
1425 {
1426 next = buff->next;
1427 free (buff->base);
1428 }
1429}
deb356cf 1430
1fdf6039 1431/* Allocate permanent, unaligned storage of length LEN. */
1432unsigned char *
f7fdd7a1 1433_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1fdf6039 1434{
1435 _cpp_buff *buff = pfile->u_buff;
1436 unsigned char *result = buff->cur;
1437
1438 if (len > (size_t) (buff->limit - result))
1439 {
1440 buff = _cpp_get_buff (pfile, len);
1441 buff->next = pfile->u_buff;
1442 pfile->u_buff = buff;
1443 result = buff->cur;
1444 }
1445
1446 buff->cur = result + len;
1447 return result;
1448}
1449
1e0ef2fd 1450/* Allocate permanent, unaligned storage of length LEN from a_buff.
1451 That buffer is used for growing allocations when saving macro
1452 replacement lists in a #define, and when parsing an answer to an
1453 assertion in #assert, #unassert or #if (and therefore possibly
1454 whilst expanding macros). It therefore must not be used by any
1455 code that they might call: specifically the lexer and the guts of
1456 the macro expander.
1457
1458 All existing other uses clearly fit this restriction: storing
1459 registered pragmas during initialization. */
79bd622b 1460unsigned char *
f7fdd7a1 1461_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
89b05ef6 1462{
e6a5f963 1463 _cpp_buff *buff = pfile->a_buff;
1464 unsigned char *result = buff->cur;
89b05ef6 1465
e6a5f963 1466 if (len > (size_t) (buff->limit - result))
89b05ef6 1467 {
e6a5f963 1468 buff = _cpp_get_buff (pfile, len);
1469 buff->next = pfile->a_buff;
1470 pfile->a_buff = buff;
1471 result = buff->cur;
89b05ef6 1472 }
f80e83a9 1473
e6a5f963 1474 buff->cur = result + len;
79bd622b 1475 return result;
f80e83a9 1476}