]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
s390.md ("*mulsi3_sign"): New insn.
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
45b966db 1/* CPP Library - lexical analysis.
78b8811a 2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
45b966db
ZW
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21
22#include "config.h"
23#include "system.h"
45b966db
ZW
24#include "cpplib.h"
25#include "cpphash.h"
26
93c80368 27enum spell_type
f9a0e96c 28{
93c80368 29 SPELL_OPERATOR = 0,
93c80368 30 SPELL_IDENT,
6338b358 31 SPELL_LITERAL,
93c80368 32 SPELL_NONE
f9a0e96c
ZW
33};
34
93c80368 35struct token_spelling
f9a0e96c 36{
93c80368
NB
37 enum spell_type category;
38 const unsigned char *name;
f9a0e96c
ZW
39};
40
8206c799
ZW
41static const unsigned char *const digraph_spellings[] =
42{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
93c80368
NB
43
44#define OP(e, s) { SPELL_OPERATOR, U s },
9a238586 45#define TK(e, s) { s, U #e },
8206c799 46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 52
6cf87ca4
ZW
53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
56static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
57static void lex_number (cpp_reader *, cpp_string *);
58static bool forms_identifier_p (cpp_reader *, int);
59static void lex_string (cpp_reader *, cpp_token *, const uchar *);
60static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
61static void create_literal (cpp_reader *, cpp_token *, const uchar *,
62 unsigned int, enum cpp_ttype);
63static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
64static int name_p (cpp_reader *, const cpp_string *);
6cf87ca4
ZW
65static tokenrun *next_tokenrun (tokenrun *);
66
6cf87ca4 67static _cpp_buff *new_buff (size_t);
15dad1d9 68
9d10c9a9 69
041c3194 70/* Utility routine:
9e62c811 71
bfb9dc7f
ZW
72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
041c3194 74int
6cf87ca4 75cpp_ideq (const cpp_token *token, const char *string)
041c3194 76{
bfb9dc7f 77 if (token->type != CPP_NAME)
041c3194 78 return 0;
bfb9dc7f 79
562a5c27 80 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
15dad1d9 81}
1368ee70 82
26aea073
NB
83/* Record a note TYPE at byte POS into the current cleaned logical
84 line. */
87062813 85static void
6cf87ca4 86add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
0d9f234d 87{
26aea073
NB
88 if (buffer->notes_used == buffer->notes_cap)
89 {
90 buffer->notes_cap = buffer->notes_cap * 2 + 200;
703ad42b
KG
91 buffer->notes = xrealloc (buffer->notes,
92 buffer->notes_cap * sizeof (_cpp_line_note));
26aea073 93 }
0d9f234d 94
26aea073
NB
95 buffer->notes[buffer->notes_used].pos = pos;
96 buffer->notes[buffer->notes_used].type = type;
97 buffer->notes_used++;
0d9f234d
NB
98}
99
26aea073
NB
100/* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
102void
6cf87ca4 103_cpp_clean_line (cpp_reader *pfile)
45b966db 104{
26aea073
NB
105 cpp_buffer *buffer;
106 const uchar *s;
107 uchar c, *d, *p;
87062813 108
26aea073
NB
109 buffer = pfile->buffer;
110 buffer->cur_note = buffer->notes_used = 0;
111 buffer->cur = buffer->line_base = buffer->next_line;
112 buffer->need_line = false;
113 s = buffer->next_line - 1;
87062813 114
26aea073 115 if (!buffer->from_stage3)
45b966db 116 {
26aea073
NB
117 d = (uchar *) s;
118
119 for (;;)
4a5b68a2 120 {
26aea073
NB
121 c = *++s;
122 *++d = c;
123
124 if (c == '\n' || c == '\r')
125 {
126 /* Handle DOS line endings. */
127 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
128 s++;
129 if (s == buffer->rlimit)
130 break;
131
132 /* Escaped? */
133 p = d;
134 while (p != buffer->next_line && is_nvspace (p[-1]))
135 p--;
136 if (p == buffer->next_line || p[-1] != '\\')
137 break;
138
41c32c98 139 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
26aea073
NB
140 d = p - 2;
141 buffer->next_line = p - 1;
142 }
143 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
144 {
145 /* Add a note regardless, for the benefit of -Wtrigraphs. */
41c32c98 146 add_line_note (buffer, d, s[2]);
26aea073
NB
147 if (CPP_OPTION (pfile, trigraphs))
148 {
149 *d = _cpp_trigraph_map[s[2]];
150 s += 2;
151 }
152 }
4a5b68a2 153 }
45b966db 154 }
26aea073
NB
155 else
156 {
157 do
158 s++;
159 while (*s != '\n' && *s != '\r');
160 d = (uchar *) s;
161
162 /* Handle DOS line endings. */
163 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
164 s++;
165 }
0d9f234d 166
26aea073 167 *d = '\n';
41c32c98
NB
168 /* A sentinel note that should never be processed. */
169 add_line_note (buffer, d + 1, '\n');
26aea073 170 buffer->next_line = s + 1;
45b966db
ZW
171}
172
a8eb6044
NB
173/* Return true if the trigraph indicated by NOTE should be warned
174 about in a comment. */
175static bool
6cf87ca4 176warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
a8eb6044
NB
177{
178 const uchar *p;
179
180 /* Within comments we don't warn about trigraphs, unless the
181 trigraph forms an escaped newline, as that may change
6356f892 182 behavior. */
a8eb6044
NB
183 if (note->type != '/')
184 return false;
185
186 /* If -trigraphs, then this was an escaped newline iff the next note
187 is coincident. */
188 if (CPP_OPTION (pfile, trigraphs))
189 return note[1].pos == note->pos;
190
191 /* Otherwise, see if this forms an escaped newline. */
192 p = note->pos + 3;
193 while (is_nvspace (*p))
194 p++;
195
196 /* There might have been escaped newlines between the trigraph and the
197 newline we found. Hence the position test. */
198 return (*p == '\n' && p < note[1].pos);
199}
200
26aea073
NB
201/* Process the notes created by add_line_note as far as the current
202 location. */
203void
6cf87ca4 204_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
45b966db 205{
29401c30
NB
206 cpp_buffer *buffer = pfile->buffer;
207
26aea073 208 for (;;)
041c3194 209 {
26aea073
NB
210 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
211 unsigned int col;
a5c3cccd 212
26aea073
NB
213 if (note->pos > buffer->cur)
214 break;
a5c3cccd 215
26aea073
NB
216 buffer->cur_note++;
217 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
4d6baafa 218
41c32c98 219 if (note->type == '\\' || note->type == ' ')
26aea073 220 {
41c32c98 221 if (note->type == ' ' && !in_comment)
26aea073
NB
222 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
223 "backslash and newline separated by space");
41c32c98 224
26aea073 225 if (buffer->next_line > buffer->rlimit)
87062813 226 {
26aea073
NB
227 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
228 "backslash-newline at end of file");
229 /* Prevent "no newline at end of file" warning. */
230 buffer->next_line = buffer->rlimit;
87062813 231 }
26aea073
NB
232
233 buffer->line_base = note->pos;
234 pfile->line++;
0d9f234d 235 }
41c32c98
NB
236 else if (_cpp_trigraph_map[note->type])
237 {
a8eb6044
NB
238 if (CPP_OPTION (pfile, warn_trigraphs)
239 && (!in_comment || warn_in_comment (pfile, note)))
41c32c98
NB
240 {
241 if (CPP_OPTION (pfile, trigraphs))
242 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
243 "trigraph ??%c converted to %c",
244 note->type,
245 (int) _cpp_trigraph_map[note->type]);
246 else
905bd7b5
GK
247 {
248 cpp_error_with_line
249 (pfile, DL_WARNING, pfile->line, col,
250 "trigraph ??%c ignored, use -trigraphs to enable",
251 note->type);
252 }
41c32c98
NB
253 }
254 }
255 else
256 abort ();
041c3194 257 }
45b966db
ZW
258}
259
0d9f234d
NB
260/* Skip a C-style block comment. We find the end of the comment by
261 seeing if an asterisk is before every '/' we encounter. Returns
6f572ac2
NB
262 nonzero if comment terminated by EOF, zero otherwise.
263
264 Buffer->cur points to the initial asterisk of the comment. */
26aea073 265bool
6cf87ca4 266_cpp_skip_block_comment (cpp_reader *pfile)
45b966db 267{
041c3194 268 cpp_buffer *buffer = pfile->buffer;
26aea073 269 cppchar_t c;
0d9f234d 270
6f572ac2 271 buffer->cur++;
26aea073
NB
272 if (*buffer->cur == '/')
273 buffer->cur++;
0d9f234d 274
26aea073
NB
275 for (;;)
276 {
277 c = *buffer->cur++;
041c3194 278
0d9f234d
NB
279 /* People like decorating comments with '*', so check for '/'
280 instead for efficiency. */
041c3194 281 if (c == '/')
45b966db 282 {
26aea073 283 if (buffer->cur[-2] == '*')
0d9f234d 284 break;
041c3194 285
0d9f234d 286 /* Warn about potential nested comments, but not if the '/'
a1f300c0 287 comes immediately before the true comment delimiter.
041c3194 288 Don't bother to get it right across escaped newlines. */
0d9f234d 289 if (CPP_OPTION (pfile, warn_comments)
87062813 290 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
ebef4e8c
NB
291 cpp_error_with_line (pfile, DL_WARNING,
292 pfile->line, CPP_BUF_COL (buffer),
293 "\"/*\" within comment");
45b966db 294 }
26aea073
NB
295 else if (c == '\n')
296 {
297 buffer->cur--;
298 _cpp_process_line_notes (pfile, true);
299 if (buffer->next_line >= buffer->rlimit)
300 return true;
301 _cpp_clean_line (pfile);
302 pfile->line++;
303 }
45b966db 304 }
041c3194 305
a8eb6044 306 _cpp_process_line_notes (pfile, true);
26aea073 307 return false;
45b966db
ZW
308}
309
480709cc 310/* Skip a C++ line comment, leaving buffer->cur pointing to the
da7d8304 311 terminating newline. Handles escaped newlines. Returns nonzero
480709cc 312 if a multiline comment. */
041c3194 313static int
6cf87ca4 314skip_line_comment (cpp_reader *pfile)
45b966db 315{
cbcff6df 316 cpp_buffer *buffer = pfile->buffer;
67821e3a 317 unsigned int orig_line = pfile->line;
041c3194 318
26aea073
NB
319 while (*buffer->cur != '\n')
320 buffer->cur++;
480709cc 321
26aea073 322 _cpp_process_line_notes (pfile, true);
67821e3a 323 return orig_line != pfile->line;
041c3194 324}
45b966db 325
26aea073 326/* Skips whitespace, saving the next non-whitespace character. */
52fadca8 327static void
6cf87ca4 328skip_whitespace (cpp_reader *pfile, cppchar_t c)
041c3194
ZW
329{
330 cpp_buffer *buffer = pfile->buffer;
f7d151fb 331 bool saw_NUL = false;
45b966db 332
0d9f234d 333 do
041c3194 334 {
91fcd158 335 /* Horizontal space always OK. */
26aea073 336 if (c == ' ' || c == '\t')
0d9f234d 337 ;
0d9f234d 338 /* Just \f \v or \0 left. */
91fcd158 339 else if (c == '\0')
f7d151fb 340 saw_NUL = true;
93c80368 341 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
ebef4e8c
NB
342 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
343 CPP_BUF_COL (buffer),
344 "%s in preprocessing directive",
345 c == '\f' ? "form feed" : "vertical tab");
0d9f234d 346
0d9f234d 347 c = *buffer->cur++;
45b966db 348 }
ec5c56db 349 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
350 while (is_nvspace (c));
351
f7d151fb
NB
352 if (saw_NUL)
353 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
354
480709cc 355 buffer->cur--;
041c3194 356}
45b966db 357
93c80368
NB
358/* See if the characters of a number token are valid in a name (no
359 '.', '+' or '-'). */
360static int
6cf87ca4 361name_p (cpp_reader *pfile, const cpp_string *string)
93c80368
NB
362{
363 unsigned int i;
364
365 for (i = 0; i < string->len; i++)
366 if (!is_idchar (string->text[i]))
367 return 0;
368
df383483 369 return 1;
93c80368
NB
370}
371
bced6edf 372/* Returns TRUE if the sequence starting at buffer->cur is invalid in
1613e52b 373 an identifier. FIRST is TRUE if this starts an identifier. */
bced6edf 374static bool
6cf87ca4 375forms_identifier_p (cpp_reader *pfile, int first)
bced6edf 376{
1613e52b
NB
377 cpp_buffer *buffer = pfile->buffer;
378
379 if (*buffer->cur == '$')
380 {
381 if (!CPP_OPTION (pfile, dollars_in_ident))
382 return false;
383
384 buffer->cur++;
78b8811a 385 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1613e52b 386 {
78b8811a 387 CPP_OPTION (pfile, warn_dollars) = 0;
1613e52b
NB
388 cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
389 }
390
391 return true;
392 }
bced6edf 393
1613e52b
NB
394 /* Is this a syntactically valid UCN? */
395 if (0 && *buffer->cur == '\\'
396 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
bced6edf 397 {
1613e52b 398 buffer->cur += 2;
e6cc3a24 399 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
1613e52b
NB
400 return true;
401 buffer->cur -= 2;
bced6edf 402 }
bced6edf 403
1613e52b 404 return false;
bced6edf
NB
405}
406
407/* Lex an identifier starting at BUFFER->CUR - 1. */
0d9f234d 408static cpp_hashnode *
6cf87ca4 409lex_identifier (cpp_reader *pfile, const uchar *base)
45b966db 410{
93c80368 411 cpp_hashnode *result;
1613e52b 412 const uchar *cur;
2c3fcba6 413
bced6edf 414 do
10cf9bde 415 {
bced6edf
NB
416 cur = pfile->buffer->cur;
417
418 /* N.B. ISIDNUM does not include $. */
419 while (ISIDNUM (*cur))
420 cur++;
10cf9bde 421
10cf9bde 422 pfile->buffer->cur = cur;
2c3fcba6 423 }
1613e52b 424 while (forms_identifier_p (pfile, false));
bced6edf
NB
425
426 result = (cpp_hashnode *)
427 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
2c3fcba6 428
bced6edf 429 /* Rarely, identifiers require diagnostics when lexed. */
2c3fcba6
ZW
430 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
431 && !pfile->state.skipping, 0))
432 {
433 /* It is allowed to poison the same identifier twice. */
434 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
ebef4e8c 435 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
2c3fcba6
ZW
436 NODE_NAME (result));
437
438 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
439 replacement list of a variadic macro. */
440 if (result == pfile->spec_nodes.n__VA_ARGS__
441 && !pfile->state.va_args_ok)
ebef4e8c 442 cpp_error (pfile, DL_PEDWARN,
6cf87ca4
ZW
443 "__VA_ARGS__ can only appear in the expansion"
444 " of a C99 variadic macro");
2c3fcba6
ZW
445 }
446
447 return result;
448}
449
bced6edf 450/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
45b966db 451static void
6cf87ca4 452lex_number (cpp_reader *pfile, cpp_string *number)
45b966db 453{
562a5c27 454 const uchar *cur;
bced6edf
NB
455 const uchar *base;
456 uchar *dest;
45b966db 457
bced6edf
NB
458 base = pfile->buffer->cur - 1;
459 do
041c3194 460 {
bced6edf 461 cur = pfile->buffer->cur;
0d9f234d 462
bced6edf
NB
463 /* N.B. ISIDNUM does not include $. */
464 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
465 cur++;
45b966db 466
10cf9bde 467 pfile->buffer->cur = cur;
45b966db 468 }
1613e52b 469 while (forms_identifier_p (pfile, false));
93c80368 470
bced6edf
NB
471 number->len = cur - base;
472 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
473 memcpy (dest, base, number->len);
474 dest[number->len] = '\0';
475 number->text = dest;
93c80368
NB
476}
477
6338b358
NB
478/* Create a token of type TYPE with a literal spelling. */
479static void
6cf87ca4
ZW
480create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
481 unsigned int len, enum cpp_ttype type)
6338b358
NB
482{
483 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
484
485 memcpy (dest, base, len);
486 dest[len] = '\0';
487 token->type = type;
488 token->val.str.len = len;
489 token->val.str.text = dest;
490}
491
bced6edf 492/* Lexes a string, character constant, or angle-bracketed header file
6338b358
NB
493 name. The stored string contains the spelling, including opening
494 quote and leading any leading 'L'. It returns the type of the
495 literal, or CPP_OTHER if it was not properly terminated.
496
497 The spelling is NUL-terminated, but it is not guaranteed that this
498 is the first NUL since embedded NULs are preserved. */
041c3194 499static void
6cf87ca4 500lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
45b966db 501{
6338b358
NB
502 bool saw_NUL = false;
503 const uchar *cur;
bced6edf 504 cppchar_t terminator;
6338b358
NB
505 enum cpp_ttype type;
506
507 cur = base;
508 terminator = *cur++;
509 if (terminator == 'L')
510 terminator = *cur++;
511 if (terminator == '\"')
512 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
513 else if (terminator == '\'')
514 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
515 else
516 terminator = '>', type = CPP_HEADER_NAME;
93c80368 517
0d9f234d 518 for (;;)
45b966db 519 {
6338b358 520 cppchar_t c = *cur++;
7868b4a2 521
6f572ac2 522 /* In #include-style directives, terminators are not escapable. */
6338b358
NB
523 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
524 cur++;
525 else if (c == terminator)
bced6edf 526 break;
6338b358 527 else if (c == '\n')
0d9f234d 528 {
6338b358
NB
529 cur--;
530 type = CPP_OTHER;
531 break;
45b966db 532 }
6338b358
NB
533 else if (c == '\0')
534 saw_NUL = true;
45b966db
ZW
535 }
536
6338b358
NB
537 if (saw_NUL && !pfile->state.skipping)
538 cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
45b966db 539
6338b358
NB
540 pfile->buffer->cur = cur;
541 create_literal (pfile, token, base, cur - base, type);
0d9f234d 542}
041c3194 543
93c80368 544/* The stored comment includes the comment start and any terminator. */
9e62c811 545static void
6cf87ca4
ZW
546save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
547 cppchar_t type)
9e62c811 548{
041c3194 549 unsigned char *buffer;
477cdac7 550 unsigned int len, clen;
df383483 551
1c6d33ef 552 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
480709cc 553
3542203b
NB
554 /* C++ comments probably (not definitely) have moved past a new
555 line, which we don't want to save in the comment. */
480709cc 556 if (is_vspace (pfile->buffer->cur[-1]))
3542203b 557 len--;
477cdac7
JT
558
559 /* If we are currently in a directive, then we need to store all
560 C++ comments as C comments internally, and so we need to
561 allocate a little extra space in that case.
562
563 Note that the only time we encounter a directive here is
564 when we are saving comments in a "#define". */
565 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
566
567 buffer = _cpp_unaligned_alloc (pfile, clen);
df383483 568
041c3194 569 token->type = CPP_COMMENT;
477cdac7 570 token->val.str.len = clen;
0d9f234d 571 token->val.str.text = buffer;
45b966db 572
1c6d33ef
NB
573 buffer[0] = '/';
574 memcpy (buffer + 1, from, len - 1);
477cdac7 575
1eeeb6a4 576 /* Finish conversion to a C comment, if necessary. */
477cdac7
JT
577 if (pfile->state.in_directive && type == '/')
578 {
579 buffer[1] = '*';
580 buffer[clen - 2] = '*';
581 buffer[clen - 1] = '/';
582 }
0d9f234d 583}
45b966db 584
5fddcffc
NB
585/* Allocate COUNT tokens for RUN. */
586void
6cf87ca4 587_cpp_init_tokenrun (tokenrun *run, unsigned int count)
5fddcffc
NB
588{
589 run->base = xnewvec (cpp_token, count);
590 run->limit = run->base + count;
591 run->next = NULL;
592}
593
594/* Returns the next tokenrun, or creates one if there is none. */
595static tokenrun *
6cf87ca4 596next_tokenrun (tokenrun *run)
5fddcffc
NB
597{
598 if (run->next == NULL)
599 {
600 run->next = xnew (tokenrun);
bdcbe496 601 run->next->prev = run;
5fddcffc
NB
602 _cpp_init_tokenrun (run->next, 250);
603 }
604
605 return run->next;
606}
607
4ed5bcfb
NB
608/* Allocate a single token that is invalidated at the same time as the
609 rest of the tokens on the line. Has its line and col set to the
610 same as the last lexed token, so that diagnostics appear in the
611 right place. */
612cpp_token *
6cf87ca4 613_cpp_temp_token (cpp_reader *pfile)
4ed5bcfb
NB
614{
615 cpp_token *old, *result;
616
617 old = pfile->cur_token - 1;
618 if (pfile->cur_token == pfile->cur_run->limit)
619 {
620 pfile->cur_run = next_tokenrun (pfile->cur_run);
621 pfile->cur_token = pfile->cur_run->base;
622 }
623
624 result = pfile->cur_token++;
625 result->line = old->line;
626 result->col = old->col;
627 return result;
628}
629
14baae01
NB
630/* Lex a token into RESULT (external interface). Takes care of issues
631 like directive handling, token lookahead, multiple include
a1f300c0 632 optimization and skipping. */
345894b4 633const cpp_token *
6cf87ca4 634_cpp_lex_token (cpp_reader *pfile)
5fddcffc 635{
bdcbe496 636 cpp_token *result;
5fddcffc 637
bdcbe496 638 for (;;)
5fddcffc 639 {
bdcbe496 640 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 641 {
bdcbe496
NB
642 pfile->cur_run = next_tokenrun (pfile->cur_run);
643 pfile->cur_token = pfile->cur_run->base;
5fddcffc
NB
644 }
645
bdcbe496 646 if (pfile->lookaheads)
14baae01
NB
647 {
648 pfile->lookaheads--;
649 result = pfile->cur_token++;
650 }
bdcbe496 651 else
14baae01 652 result = _cpp_lex_direct (pfile);
bdcbe496
NB
653
654 if (result->flags & BOL)
5fddcffc 655 {
bdcbe496
NB
656 /* Is this a directive. If _cpp_handle_directive returns
657 false, it is an assembler #. */
658 if (result->type == CPP_HASH
e808ec9c
NB
659 /* 6.10.3 p 11: Directives in a list of macro arguments
660 gives undefined behavior. This implementation
661 handles the directive as normal. */
662 && pfile->state.parsing_args != 1
bdcbe496
NB
663 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
664 continue;
97293897 665 if (pfile->cb.line_change && !pfile->state.skipping)
6cf87ca4 666 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
5fddcffc 667 }
5fddcffc 668
bdcbe496
NB
669 /* We don't skip tokens in directives. */
670 if (pfile->state.in_directive)
671 break;
5fddcffc 672
bdcbe496 673 /* Outside a directive, invalidate controlling macros. At file
14baae01 674 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
6356f892 675 get here and MI optimization works. */
5fddcffc 676 pfile->mi_valid = false;
bdcbe496
NB
677
678 if (!pfile->state.skipping || result->type == CPP_EOF)
679 break;
5fddcffc
NB
680 }
681
345894b4 682 return result;
5fddcffc
NB
683}
684
26aea073
NB
685/* Returns true if a fresh line has been loaded. */
686bool
6cf87ca4 687_cpp_get_fresh_line (cpp_reader *pfile)
004cb263 688{
26aea073
NB
689 /* We can't get a new line until we leave the current directive. */
690 if (pfile->state.in_directive)
691 return false;
df383483 692
26aea073 693 for (;;)
1a76916c 694 {
26aea073 695 cpp_buffer *buffer = pfile->buffer;
1a76916c 696
26aea073
NB
697 if (!buffer->need_line)
698 return true;
699
700 if (buffer->next_line < buffer->rlimit)
004cb263 701 {
26aea073
NB
702 _cpp_clean_line (pfile);
703 return true;
704 }
004cb263 705
26aea073
NB
706 /* First, get out of parsing arguments state. */
707 if (pfile->state.parsing_args)
708 return false;
709
710 /* End of buffer. Non-empty files should end in a newline. */
711 if (buffer->buf != buffer->rlimit
712 && buffer->next_line > buffer->rlimit
713 && !buffer->from_stage3)
714 {
715 /* Only warn once. */
716 buffer->next_line = buffer->rlimit;
717 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
718 CPP_BUF_COLUMN (buffer, buffer->cur),
719 "no newline at end of file");
720 }
721
722 if (buffer->return_at_eof)
723 {
b78f9414 724 _cpp_pop_buffer (pfile);
26aea073 725 return false;
004cb263 726 }
004cb263 727
26aea073
NB
728 _cpp_pop_buffer (pfile);
729 }
004cb263
NB
730}
731
6f572ac2
NB
732#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
733 do \
734 { \
735 result->type = ELSE_TYPE; \
736 if (*buffer->cur == CHAR) \
737 buffer->cur++, result->type = THEN_TYPE; \
738 } \
739 while (0)
480709cc 740
14baae01
NB
741/* Lex a token into pfile->cur_token, which is also incremented, to
742 get diagnostics pointing to the correct location.
743
744 Does not handle issues such as token lookahead, multiple-include
f1ba665b 745 optimization, directives, skipping etc. This function is only
14baae01
NB
746 suitable for use by _cpp_lex_token, and in special cases like
747 lex_expansion_token which doesn't care for any of these issues.
748
749 When meeting a newline, returns CPP_EOF if parsing a directive,
750 otherwise returns to the start of the token buffer if permissible.
751 Returns the location of the lexed token. */
752cpp_token *
6cf87ca4 753_cpp_lex_direct (cpp_reader *pfile)
45b966db 754{
0d9f234d 755 cppchar_t c;
adb84b42 756 cpp_buffer *buffer;
0d9f234d 757 const unsigned char *comment_start;
14baae01 758 cpp_token *result = pfile->cur_token++;
9ec7291f 759
5fddcffc 760 fresh_line:
26aea073
NB
761 result->flags = 0;
762 if (pfile->buffer->need_line)
763 {
764 if (!_cpp_get_fresh_line (pfile))
765 {
766 result->type = CPP_EOF;
9ff7868d
NB
767 if (!pfile->state.in_directive)
768 {
769 /* Tell the compiler the line number of the EOF token. */
770 result->line = pfile->line;
771 result->flags = BOL;
772 }
26aea073
NB
773 return result;
774 }
775 if (!pfile->keep_tokens)
776 {
777 pfile->cur_run = &pfile->base_run;
778 result = pfile->base_run.base;
779 pfile->cur_token = result + 1;
780 }
781 result->flags = BOL;
782 if (pfile->state.parsing_args == 2)
783 result->flags |= PREV_WHITE;
784 }
adb84b42 785 buffer = pfile->buffer;
5fddcffc 786 update_tokens_line:
1444f2ed 787 result->line = pfile->line;
041c3194 788
5fddcffc 789 skipped_white:
26aea073
NB
790 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
791 && !pfile->overlaid_buffer)
792 {
793 _cpp_process_line_notes (pfile, false);
794 result->line = pfile->line;
795 }
480709cc 796 c = *buffer->cur++;
5fddcffc 797 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
5fddcffc 798
0d9f234d 799 switch (c)
45b966db 800 {
4d6baafa
NB
801 case ' ': case '\t': case '\f': case '\v': case '\0':
802 result->flags |= PREV_WHITE;
26aea073
NB
803 skip_whitespace (pfile, c);
804 goto skipped_white;
0d9f234d 805
26aea073
NB
806 case '\n':
807 pfile->line++;
808 buffer->need_line = true;
809 goto fresh_line;
46d07497 810
0d9f234d
NB
811 case '0': case '1': case '2': case '3': case '4':
812 case '5': case '6': case '7': case '8': case '9':
813 result->type = CPP_NUMBER;
bced6edf 814 lex_number (pfile, &result->val.str);
0d9f234d 815 break;
46d07497 816
0abc6a6a
NB
817 case 'L':
818 /* 'L' may introduce wide characters or strings. */
bced6edf
NB
819 if (*buffer->cur == '\'' || *buffer->cur == '"')
820 {
6338b358 821 lex_string (pfile, result, buffer->cur - 1);
bced6edf
NB
822 break;
823 }
df383483 824 /* Fall through. */
0abc6a6a 825
0d9f234d
NB
826 case '_':
827 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
828 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
829 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
830 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
831 case 'y': case 'z':
832 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
0abc6a6a 833 case 'G': case 'H': case 'I': case 'J': case 'K':
0d9f234d
NB
834 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
835 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
836 case 'Y': case 'Z':
837 result->type = CPP_NAME;
1613e52b 838 result->val.node = lex_identifier (pfile, buffer->cur - 1);
0d9f234d 839
0d9f234d 840 /* Convert named operators to their proper types. */
0abc6a6a 841 if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
842 {
843 result->flags |= NAMED_OP;
4977bab6 844 result->type = result->val.node->directive_index;
0d9f234d
NB
845 }
846 break;
847
848 case '\'':
849 case '"':
6338b358 850 lex_string (pfile, result, buffer->cur - 1);
0d9f234d 851 break;
041c3194 852
0d9f234d 853 case '/':
1c6d33ef
NB
854 /* A potential block or line comment. */
855 comment_start = buffer->cur;
6f572ac2
NB
856 c = *buffer->cur;
857
1c6d33ef
NB
858 if (c == '*')
859 {
26aea073 860 if (_cpp_skip_block_comment (pfile))
ebef4e8c 861 cpp_error (pfile, DL_ERROR, "unterminated comment");
0d9f234d 862 }
480709cc
NB
863 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
864 || CPP_IN_SYSTEM_HEADER (pfile)))
0d9f234d 865 {
bdb05a7b
NB
866 /* Warn about comments only if pedantically GNUC89, and not
867 in system headers. */
868 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 869 && ! buffer->warned_cplusplus_comments)
041c3194 870 {
ebef4e8c 871 cpp_error (pfile, DL_PEDWARN,
56508306 872 "C++ style comments are not allowed in ISO C90");
ebef4e8c
NB
873 cpp_error (pfile, DL_PEDWARN,
874 "(this will be reported only once per input file)");
1c6d33ef
NB
875 buffer->warned_cplusplus_comments = 1;
876 }
0d9f234d 877
01ef6563 878 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
ebef4e8c 879 cpp_error (pfile, DL_WARNING, "multi-line comment");
1c6d33ef 880 }
480709cc
NB
881 else if (c == '=')
882 {
6f572ac2 883 buffer->cur++;
480709cc
NB
884 result->type = CPP_DIV_EQ;
885 break;
886 }
887 else
888 {
480709cc
NB
889 result->type = CPP_DIV;
890 break;
891 }
0d9f234d 892
1c6d33ef
NB
893 if (!pfile->state.save_comments)
894 {
895 result->flags |= PREV_WHITE;
5fddcffc 896 goto update_tokens_line;
0d9f234d 897 }
1c6d33ef
NB
898
899 /* Save the comment as a token in its own right. */
477cdac7 900 save_comment (pfile, result, comment_start, c);
bdcbe496 901 break;
0d9f234d
NB
902
903 case '<':
904 if (pfile->state.angled_headers)
905 {
6338b358 906 lex_string (pfile, result, buffer->cur - 1);
480709cc 907 break;
0d9f234d 908 }
45b966db 909
6f572ac2
NB
910 result->type = CPP_LESS;
911 if (*buffer->cur == '=')
912 buffer->cur++, result->type = CPP_LESS_EQ;
913 else if (*buffer->cur == '<')
0d9f234d 914 {
6f572ac2
NB
915 buffer->cur++;
916 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
0d9f234d 917 }
6f572ac2 918 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
0d9f234d 919 {
6f572ac2
NB
920 buffer->cur++;
921 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
0d9f234d 922 }
6f572ac2 923 else if (CPP_OPTION (pfile, digraphs))
480709cc 924 {
6f572ac2
NB
925 if (*buffer->cur == ':')
926 {
927 buffer->cur++;
928 result->flags |= DIGRAPH;
929 result->type = CPP_OPEN_SQUARE;
930 }
931 else if (*buffer->cur == '%')
932 {
933 buffer->cur++;
934 result->flags |= DIGRAPH;
935 result->type = CPP_OPEN_BRACE;
936 }
480709cc 937 }
0d9f234d
NB
938 break;
939
940 case '>':
6f572ac2
NB
941 result->type = CPP_GREATER;
942 if (*buffer->cur == '=')
943 buffer->cur++, result->type = CPP_GREATER_EQ;
944 else if (*buffer->cur == '>')
0d9f234d 945 {
6f572ac2
NB
946 buffer->cur++;
947 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
948 }
949 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
950 {
951 buffer->cur++;
952 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
0d9f234d
NB
953 }
954 break;
955
cbcff6df 956 case '%':
6f572ac2
NB
957 result->type = CPP_MOD;
958 if (*buffer->cur == '=')
959 buffer->cur++, result->type = CPP_MOD_EQ;
960 else if (CPP_OPTION (pfile, digraphs))
480709cc 961 {
6f572ac2 962 if (*buffer->cur == ':')
480709cc 963 {
6f572ac2
NB
964 buffer->cur++;
965 result->flags |= DIGRAPH;
966 result->type = CPP_HASH;
967 if (*buffer->cur == '%' && buffer->cur[1] == ':')
968 buffer->cur += 2, result->type = CPP_PASTE;
969 }
970 else if (*buffer->cur == '>')
971 {
972 buffer->cur++;
973 result->flags |= DIGRAPH;
974 result->type = CPP_CLOSE_BRACE;
480709cc 975 }
480709cc 976 }
0d9f234d
NB
977 break;
978
cbcff6df 979 case '.':
480709cc 980 result->type = CPP_DOT;
6f572ac2 981 if (ISDIGIT (*buffer->cur))
480709cc
NB
982 {
983 result->type = CPP_NUMBER;
bced6edf 984 lex_number (pfile, &result->val.str);
480709cc 985 }
6f572ac2
NB
986 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
987 buffer->cur += 2, result->type = CPP_ELLIPSIS;
988 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
989 buffer->cur++, result->type = CPP_DOT_STAR;
0d9f234d 990 break;
45b966db 991
0d9f234d 992 case '+':
6f572ac2
NB
993 result->type = CPP_PLUS;
994 if (*buffer->cur == '+')
995 buffer->cur++, result->type = CPP_PLUS_PLUS;
996 else if (*buffer->cur == '=')
997 buffer->cur++, result->type = CPP_PLUS_EQ;
0d9f234d 998 break;
04e3ec78 999
0d9f234d 1000 case '-':
6f572ac2
NB
1001 result->type = CPP_MINUS;
1002 if (*buffer->cur == '>')
0d9f234d 1003 {
6f572ac2 1004 buffer->cur++;
480709cc 1005 result->type = CPP_DEREF;
6f572ac2
NB
1006 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1007 buffer->cur++, result->type = CPP_DEREF_STAR;
480709cc 1008 }
6f572ac2
NB
1009 else if (*buffer->cur == '-')
1010 buffer->cur++, result->type = CPP_MINUS_MINUS;
1011 else if (*buffer->cur == '=')
1012 buffer->cur++, result->type = CPP_MINUS_EQ;
0d9f234d 1013 break;
45b966db 1014
0d9f234d 1015 case '&':
6f572ac2
NB
1016 result->type = CPP_AND;
1017 if (*buffer->cur == '&')
1018 buffer->cur++, result->type = CPP_AND_AND;
1019 else if (*buffer->cur == '=')
1020 buffer->cur++, result->type = CPP_AND_EQ;
0d9f234d 1021 break;
df383483 1022
0d9f234d 1023 case '|':
6f572ac2
NB
1024 result->type = CPP_OR;
1025 if (*buffer->cur == '|')
1026 buffer->cur++, result->type = CPP_OR_OR;
1027 else if (*buffer->cur == '=')
1028 buffer->cur++, result->type = CPP_OR_EQ;
0d9f234d 1029 break;
45b966db 1030
0d9f234d 1031 case ':':
6f572ac2
NB
1032 result->type = CPP_COLON;
1033 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1034 buffer->cur++, result->type = CPP_SCOPE;
1035 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
0d9f234d 1036 {
6f572ac2 1037 buffer->cur++;
0d9f234d 1038 result->flags |= DIGRAPH;
480709cc
NB
1039 result->type = CPP_CLOSE_SQUARE;
1040 }
0d9f234d 1041 break;
45b966db 1042
480709cc
NB
1043 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1044 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1045 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1046 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1047 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1048
26aea073 1049 case '?': result->type = CPP_QUERY; break;
0d9f234d
NB
1050 case '~': result->type = CPP_COMPL; break;
1051 case ',': result->type = CPP_COMMA; break;
1052 case '(': result->type = CPP_OPEN_PAREN; break;
1053 case ')': result->type = CPP_CLOSE_PAREN; break;
1054 case '[': result->type = CPP_OPEN_SQUARE; break;
1055 case ']': result->type = CPP_CLOSE_SQUARE; break;
1056 case '{': result->type = CPP_OPEN_BRACE; break;
1057 case '}': result->type = CPP_CLOSE_BRACE; break;
1058 case ';': result->type = CPP_SEMICOLON; break;
1059
40f03658 1060 /* @ is a punctuator in Objective-C. */
cc937581 1061 case '@': result->type = CPP_ATSIGN; break;
0d9f234d 1062
0abc6a6a 1063 case '$':
1613e52b
NB
1064 case '\\':
1065 {
1066 const uchar *base = --buffer->cur;
0abc6a6a 1067
1613e52b
NB
1068 if (forms_identifier_p (pfile, true))
1069 {
1070 result->type = CPP_NAME;
1071 result->val.node = lex_identifier (pfile, base);
1072 break;
1073 }
1074 buffer->cur++;
1067694a 1075 }
1613e52b 1076
1067694a 1077 default:
6338b358
NB
1078 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1079 break;
0d9f234d 1080 }
bdcbe496
NB
1081
1082 return result;
0d9f234d
NB
1083}
1084
59325650
NB
1085/* An upper bound on the number of bytes needed to spell TOKEN.
1086 Does not include preceding whitespace. */
93c80368 1087unsigned int
6cf87ca4 1088cpp_token_len (const cpp_token *token)
0d9f234d 1089{
93c80368 1090 unsigned int len;
6d2c2047 1091
93c80368 1092 switch (TOKEN_SPELL (token))
041c3194 1093 {
59325650 1094 default: len = 4; break;
6338b358 1095 case SPELL_LITERAL: len = token->val.str.len; break;
a28c5035 1096 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1097 }
59325650
NB
1098
1099 return len;
6d2c2047
ZW
1100}
1101
041c3194 1102/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885 1103 already contain the enough space to hold the token's spelling.
6cf87ca4
ZW
1104 Returns a pointer to the character after the last character written.
1105 FIXME: Would be nice if we didn't need the PFILE argument. */
93c80368 1106unsigned char *
6cf87ca4
ZW
1107cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1108 unsigned char *buffer)
041c3194 1109{
96be6998 1110 switch (TOKEN_SPELL (token))
041c3194
ZW
1111 {
1112 case SPELL_OPERATOR:
1113 {
1114 const unsigned char *spelling;
1115 unsigned char c;
d6d5f795 1116
041c3194 1117 if (token->flags & DIGRAPH)
37b8524c
JDA
1118 spelling
1119 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1120 else if (token->flags & NAMED_OP)
1121 goto spell_ident;
041c3194 1122 else
96be6998 1123 spelling = TOKEN_NAME (token);
df383483 1124
041c3194
ZW
1125 while ((c = *spelling++) != '\0')
1126 *buffer++ = c;
1127 }
1128 break;
d6d5f795 1129
47ad4138 1130 spell_ident:
041c3194 1131 case SPELL_IDENT:
a28c5035
NB
1132 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1133 buffer += NODE_LEN (token->val.node);
041c3194 1134 break;
d6d5f795 1135
6338b358 1136 case SPELL_LITERAL:
47ad4138
ZW
1137 memcpy (buffer, token->val.str.text, token->val.str.len);
1138 buffer += token->val.str.len;
1139 break;
1140
041c3194 1141 case SPELL_NONE:
ebef4e8c 1142 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1143 break;
1144 }
d6d5f795 1145
041c3194
ZW
1146 return buffer;
1147}
d6d5f795 1148
5d8ebbd8
NB
1149/* Returns TOKEN spelt as a null-terminated string. The string is
1150 freed when the reader is destroyed. Useful for diagnostics. */
93c80368 1151unsigned char *
6cf87ca4 1152cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
59325650
NB
1153{
1154 unsigned int len = cpp_token_len (token) + 1;
ece54d54 1155 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 1156
93c80368
NB
1157 end = cpp_spell_token (pfile, token, start);
1158 end[0] = '\0';
c5a04734 1159
93c80368
NB
1160 return start;
1161}
c5a04734 1162
5d8ebbd8
NB
1163/* Used by C front ends, which really should move to using
1164 cpp_token_as_text. */
93c80368 1165const char *
6cf87ca4 1166cpp_type2name (enum cpp_ttype type)
93c80368
NB
1167{
1168 return (const char *) token_spellings[type].name;
1169}
c5a04734 1170
4ed5bcfb
NB
1171/* Writes the spelling of token to FP, without any preceding space.
1172 Separated from cpp_spell_token for efficiency - to avoid stdio
1173 double-buffering. */
93c80368 1174void
6cf87ca4 1175cpp_output_token (const cpp_token *token, FILE *fp)
93c80368 1176{
93c80368 1177 switch (TOKEN_SPELL (token))
c5a04734 1178 {
93c80368
NB
1179 case SPELL_OPERATOR:
1180 {
1181 const unsigned char *spelling;
3b681e9d 1182 int c;
c5a04734 1183
93c80368 1184 if (token->flags & DIGRAPH)
37b8524c
JDA
1185 spelling
1186 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1187 else if (token->flags & NAMED_OP)
1188 goto spell_ident;
1189 else
1190 spelling = TOKEN_NAME (token);
041c3194 1191
3b681e9d
ZW
1192 c = *spelling;
1193 do
1194 putc (c, fp);
1195 while ((c = *++spelling) != '\0');
93c80368
NB
1196 }
1197 break;
041c3194 1198
93c80368
NB
1199 spell_ident:
1200 case SPELL_IDENT:
3b681e9d 1201 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
93c80368 1202 break;
041c3194 1203
6338b358 1204 case SPELL_LITERAL:
47ad4138
ZW
1205 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1206 break;
1207
93c80368
NB
1208 case SPELL_NONE:
1209 /* An error, most probably. */
1210 break;
041c3194 1211 }
c5a04734
ZW
1212}
1213
93c80368
NB
1214/* Compare two tokens. */
1215int
6cf87ca4 1216_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
c5a04734 1217{
93c80368
NB
1218 if (a->type == b->type && a->flags == b->flags)
1219 switch (TOKEN_SPELL (a))
1220 {
1221 default: /* Keep compiler happy. */
1222 case SPELL_OPERATOR:
1223 return 1;
93c80368 1224 case SPELL_NONE:
56051c0a 1225 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1226 case SPELL_IDENT:
1227 return a->val.node == b->val.node;
6338b358 1228 case SPELL_LITERAL:
93c80368
NB
1229 return (a->val.str.len == b->val.str.len
1230 && !memcmp (a->val.str.text, b->val.str.text,
1231 a->val.str.len));
1232 }
c5a04734 1233
041c3194
ZW
1234 return 0;
1235}
1236
93c80368
NB
1237/* Returns nonzero if a space should be inserted to avoid an
1238 accidental token paste for output. For simplicity, it is
1239 conservative, and occasionally advises a space where one is not
1240 needed, e.g. "." and ".2". */
93c80368 1241int
6cf87ca4
ZW
1242cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1243 const cpp_token *token2)
c5a04734 1244{
93c80368
NB
1245 enum cpp_ttype a = token1->type, b = token2->type;
1246 cppchar_t c;
c5a04734 1247
93c80368
NB
1248 if (token1->flags & NAMED_OP)
1249 a = CPP_NAME;
1250 if (token2->flags & NAMED_OP)
1251 b = CPP_NAME;
c5a04734 1252
93c80368
NB
1253 c = EOF;
1254 if (token2->flags & DIGRAPH)
37b8524c 1255 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1256 else if (token_spellings[b].category == SPELL_OPERATOR)
1257 c = token_spellings[b].name[0];
c5a04734 1258
93c80368 1259 /* Quickly get everything that can paste with an '='. */
37b8524c 1260 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1261 return 1;
c5a04734 1262
93c80368 1263 switch (a)
c5a04734 1264 {
93c80368
NB
1265 case CPP_GREATER: return c == '>' || c == '?';
1266 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1267 case CPP_PLUS: return c == '+';
1268 case CPP_MINUS: return c == '-' || c == '>';
1269 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1270 case CPP_MOD: return c == ':' || c == '>';
1271 case CPP_AND: return c == '&';
1272 case CPP_OR: return c == '|';
1273 case CPP_COLON: return c == ':' || c == '>';
1274 case CPP_DEREF: return c == '*';
26ec42ee 1275 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1276 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1277 case CPP_NAME: return ((b == CPP_NUMBER
1278 && name_p (pfile, &token2->val.str))
1279 || b == CPP_NAME
1280 || b == CPP_CHAR || b == CPP_STRING); /* L */
1281 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1282 || c == '.' || c == '+' || c == '-');
1613e52b 1283 /* UCNs */
1067694a
NB
1284 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1285 && b == CPP_NAME)
1613e52b 1286 || (CPP_OPTION (pfile, objc)
1067694a 1287 && token1->val.str.text[0] == '@'
1613e52b 1288 && (b == CPP_NAME || b == CPP_STRING)));
93c80368 1289 default: break;
c5a04734 1290 }
c5a04734 1291
417f3e3a 1292 return 0;
c5a04734
ZW
1293}
1294
93c80368 1295/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1296 character, to FP. Leading whitespace is removed. If there are
1297 macros, special token padding is not performed. */
c5a04734 1298void
6cf87ca4 1299cpp_output_line (cpp_reader *pfile, FILE *fp)
c5a04734 1300{
4ed5bcfb 1301 const cpp_token *token;
96be6998 1302
4ed5bcfb
NB
1303 token = cpp_get_token (pfile);
1304 while (token->type != CPP_EOF)
96be6998 1305 {
4ed5bcfb
NB
1306 cpp_output_token (token, fp);
1307 token = cpp_get_token (pfile);
1308 if (token->flags & PREV_WHITE)
1309 putc (' ', fp);
96be6998
ZW
1310 }
1311
93c80368 1312 putc ('\n', fp);
041c3194 1313}
c5a04734 1314
1e013d2e
NB
1315/* Memory buffers. Changing these three constants can have a dramatic
1316 effect on performance. The values here are reasonable defaults,
1317 but might be tuned. If you adjust them, be sure to test across a
1318 range of uses of cpplib, including heavy nested function-like macro
1319 expansion. Also check the change in peak memory usage (NJAMD is a
1320 good tool for this). */
1321#define MIN_BUFF_SIZE 8000
87062813 1322#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1e013d2e
NB
1323#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1324 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 1325
87062813
NB
1326#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1327 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1328#endif
1329
c9e7a609
NB
1330/* Create a new allocation buffer. Place the control block at the end
1331 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5 1332static _cpp_buff *
6cf87ca4 1333new_buff (size_t len)
b8af0ca5
NB
1334{
1335 _cpp_buff *result;
ece54d54 1336 unsigned char *base;
b8af0ca5 1337
1e013d2e
NB
1338 if (len < MIN_BUFF_SIZE)
1339 len = MIN_BUFF_SIZE;
c70f6ed3 1340 len = CPP_ALIGN (len);
b8af0ca5
NB
1341
1342 base = xmalloc (len + sizeof (_cpp_buff));
1343 result = (_cpp_buff *) (base + len);
1344 result->base = base;
1345 result->cur = base;
1346 result->limit = base + len;
1347 result->next = NULL;
1348 return result;
1349}
1350
1351/* Place a chain of unwanted allocation buffers on the free list. */
1352void
6cf87ca4 1353_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
b8af0ca5
NB
1354{
1355 _cpp_buff *end = buff;
1356
1357 while (end->next)
1358 end = end->next;
1359 end->next = pfile->free_buffs;
1360 pfile->free_buffs = buff;
1361}
1362
1363/* Return a free buffer of size at least MIN_SIZE. */
1364_cpp_buff *
6cf87ca4 1365_cpp_get_buff (cpp_reader *pfile, size_t min_size)
b8af0ca5
NB
1366{
1367 _cpp_buff *result, **p;
1368
1369 for (p = &pfile->free_buffs;; p = &(*p)->next)
1370 {
6142088c 1371 size_t size;
1e013d2e
NB
1372
1373 if (*p == NULL)
b8af0ca5 1374 return new_buff (min_size);
1e013d2e
NB
1375 result = *p;
1376 size = result->limit - result->base;
1377 /* Return a buffer that's big enough, but don't waste one that's
1378 way too big. */
34f5271d 1379 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
1380 break;
1381 }
1382
1383 *p = result->next;
1384 result->next = NULL;
1385 result->cur = result->base;
1386 return result;
1387}
1388
4fe9b91c 1389/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1390 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1391 the excess bytes to the new buffer. Chains the new buffer after
1392 BUFF, and returns the new buffer. */
b8af0ca5 1393_cpp_buff *
6cf87ca4 1394_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
b8af0ca5 1395{
6142088c 1396 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 1397 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 1398
8c3b2693
NB
1399 buff->next = new_buff;
1400 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1401 return new_buff;
1402}
1403
4fe9b91c 1404/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1405 remaining bytes of the buffer pointed to by BUFF, and at least
1406 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1407 Chains the new buffer before the buffer pointed to by BUFF, and
1408 updates the pointer to point to the new buffer. */
1409void
6cf87ca4 1410_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
8c3b2693
NB
1411{
1412 _cpp_buff *new_buff, *old_buff = *pbuff;
1413 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1414
1415 new_buff = _cpp_get_buff (pfile, size);
1416 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1417 new_buff->next = old_buff;
1418 *pbuff = new_buff;
b8af0ca5
NB
1419}
1420
1421/* Free a chain of buffers starting at BUFF. */
1422void
5671bf27 1423_cpp_free_buff (_cpp_buff *buff)
b8af0ca5
NB
1424{
1425 _cpp_buff *next;
1426
1427 for (; buff; buff = next)
1428 {
1429 next = buff->next;
1430 free (buff->base);
1431 }
1432}
417f3e3a 1433
ece54d54
NB
1434/* Allocate permanent, unaligned storage of length LEN. */
1435unsigned char *
6cf87ca4 1436_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
ece54d54
NB
1437{
1438 _cpp_buff *buff = pfile->u_buff;
1439 unsigned char *result = buff->cur;
1440
1441 if (len > (size_t) (buff->limit - result))
1442 {
1443 buff = _cpp_get_buff (pfile, len);
1444 buff->next = pfile->u_buff;
1445 pfile->u_buff = buff;
1446 result = buff->cur;
1447 }
1448
1449 buff->cur = result + len;
1450 return result;
1451}
1452
87062813
NB
1453/* Allocate permanent, unaligned storage of length LEN from a_buff.
1454 That buffer is used for growing allocations when saving macro
1455 replacement lists in a #define, and when parsing an answer to an
1456 assertion in #assert, #unassert or #if (and therefore possibly
1457 whilst expanding macros). It therefore must not be used by any
1458 code that they might call: specifically the lexer and the guts of
1459 the macro expander.
1460
1461 All existing other uses clearly fit this restriction: storing
1462 registered pragmas during initialization. */
93c80368 1463unsigned char *
6cf87ca4 1464_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3fef5b2b 1465{
8c3b2693
NB
1466 _cpp_buff *buff = pfile->a_buff;
1467 unsigned char *result = buff->cur;
3fef5b2b 1468
8c3b2693 1469 if (len > (size_t) (buff->limit - result))
3fef5b2b 1470 {
8c3b2693
NB
1471 buff = _cpp_get_buff (pfile, len);
1472 buff->next = pfile->a_buff;
1473 pfile->a_buff = buff;
1474 result = buff->cur;
3fef5b2b 1475 }
041c3194 1476
8c3b2693 1477 buff->cur = result + len;
93c80368 1478 return result;
041c3194 1479}