]> git.ipfire.org Git - thirdparty/gcc.git/blame - libcpp/lex.c
re PR tree-optimization/38865 (missing FRE with VIEW_CONVERT_EXPR)
[thirdparty/gcc.git] / libcpp / lex.c
CommitLineData
45b966db 1/* CPP Library - lexical analysis.
748086b7
JJ
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
3 Free Software Foundation, Inc.
45b966db
ZW
4 Contributed by Per Bothner, 1994-95.
5 Based on CCCP program by Paul Rubin, June 1986
6 Adapted to ANSI C, Richard Stallman, Jan 1987
7 Broken out to separate file, Zack Weinberg, Mar 2000
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
748086b7 11Free Software Foundation; either version 3, or (at your option) any
45b966db
ZW
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
748086b7
JJ
20along with this program; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
45b966db
ZW
22
23#include "config.h"
24#include "system.h"
45b966db 25#include "cpplib.h"
4f4e53dd 26#include "internal.h"
45b966db 27
93c80368 28enum spell_type
f9a0e96c 29{
93c80368 30 SPELL_OPERATOR = 0,
93c80368 31 SPELL_IDENT,
6338b358 32 SPELL_LITERAL,
93c80368 33 SPELL_NONE
f9a0e96c
ZW
34};
35
93c80368 36struct token_spelling
f9a0e96c 37{
93c80368
NB
38 enum spell_type category;
39 const unsigned char *name;
f9a0e96c
ZW
40};
41
8206c799 42static const unsigned char *const digraph_spellings[] =
b6baa67d 43{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
93c80368 44
b6baa67d
KVH
45#define OP(e, s) { SPELL_OPERATOR, UC s },
46#define TK(e, s) { SPELL_ ## s, UC #e },
8206c799 47static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
48#undef OP
49#undef TK
50
51#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 53
6cf87ca4
ZW
54static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55static int skip_line_comment (cpp_reader *);
56static void skip_whitespace (cpp_reader *, cppchar_t);
6cf87ca4
ZW
57static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
631d0d36 59static void store_comment (cpp_reader *, cpp_token *);
6cf87ca4
ZW
60static void create_literal (cpp_reader *, cpp_token *, const uchar *,
61 unsigned int, enum cpp_ttype);
62static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
63static int name_p (cpp_reader *, const cpp_string *);
6cf87ca4
ZW
64static tokenrun *next_tokenrun (tokenrun *);
65
6cf87ca4 66static _cpp_buff *new_buff (size_t);
15dad1d9 67
9d10c9a9 68
041c3194 69/* Utility routine:
9e62c811 70
bfb9dc7f
ZW
71 Compares, the token TOKEN to the NUL-terminated string STRING.
72 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
041c3194 73int
6cf87ca4 74cpp_ideq (const cpp_token *token, const char *string)
041c3194 75{
bfb9dc7f 76 if (token->type != CPP_NAME)
041c3194 77 return 0;
bfb9dc7f 78
9a0c6187 79 return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
15dad1d9 80}
1368ee70 81
26aea073
NB
82/* Record a note TYPE at byte POS into the current cleaned logical
83 line. */
87062813 84static void
6cf87ca4 85add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
0d9f234d 86{
26aea073
NB
87 if (buffer->notes_used == buffer->notes_cap)
88 {
89 buffer->notes_cap = buffer->notes_cap * 2 + 200;
c3f829c1
GDR
90 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
91 buffer->notes_cap);
26aea073 92 }
0d9f234d 93
26aea073
NB
94 buffer->notes[buffer->notes_used].pos = pos;
95 buffer->notes[buffer->notes_used].type = type;
96 buffer->notes_used++;
0d9f234d
NB
97}
98
26aea073
NB
99/* Returns with a logical line that contains no escaped newlines or
100 trigraphs. This is a time-critical inner loop. */
101void
6cf87ca4 102_cpp_clean_line (cpp_reader *pfile)
45b966db 103{
26aea073
NB
104 cpp_buffer *buffer;
105 const uchar *s;
106 uchar c, *d, *p;
87062813 107
26aea073
NB
108 buffer = pfile->buffer;
109 buffer->cur_note = buffer->notes_used = 0;
110 buffer->cur = buffer->line_base = buffer->next_line;
111 buffer->need_line = false;
112 s = buffer->next_line - 1;
87062813 113
26aea073 114 if (!buffer->from_stage3)
45b966db 115 {
7af45bd4
ILT
116 const uchar *pbackslash = NULL;
117
d08dcf87
ZW
118 /* Short circuit for the common case of an un-escaped line with
119 no trigraphs. The primary win here is by not writing any
120 data back to memory until we have to. */
121 for (;;)
122 {
123 c = *++s;
7af45bd4
ILT
124 if (__builtin_expect (c == '\n', false)
125 || __builtin_expect (c == '\r', false))
d08dcf87
ZW
126 {
127 d = (uchar *) s;
128
7af45bd4 129 if (__builtin_expect (s == buffer->rlimit, false))
d08dcf87
ZW
130 goto done;
131
132 /* DOS line ending? */
7af45bd4
ILT
133 if (__builtin_expect (c == '\r', false)
134 && s[1] == '\n')
135 {
136 s++;
137 if (s == buffer->rlimit)
138 goto done;
139 }
d08dcf87 140
7af45bd4 141 if (__builtin_expect (pbackslash == NULL, true))
d08dcf87
ZW
142 goto done;
143
7af45bd4 144 /* Check for escaped newline. */
d08dcf87 145 p = d;
7af45bd4 146 while (is_nvspace (p[-1]))
d08dcf87 147 p--;
7af45bd4 148 if (p - 1 != pbackslash)
d08dcf87
ZW
149 goto done;
150
151 /* Have an escaped newline; process it and proceed to
152 the slow path. */
153 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
154 d = p - 2;
155 buffer->next_line = p - 1;
156 break;
157 }
7af45bd4
ILT
158 if (__builtin_expect (c == '\\', false))
159 pbackslash = s;
160 else if (__builtin_expect (c == '?', false)
161 && __builtin_expect (s[1] == '?', false)
162 && _cpp_trigraph_map[s[2]])
d08dcf87
ZW
163 {
164 /* Have a trigraph. We may or may not have to convert
165 it. Add a line note regardless, for -Wtrigraphs. */
166 add_line_note (buffer, s, s[2]);
167 if (CPP_OPTION (pfile, trigraphs))
168 {
169 /* We do, and that means we have to switch to the
170 slow path. */
171 d = (uchar *) s;
172 *d = _cpp_trigraph_map[s[2]];
173 s += 2;
174 break;
175 }
176 }
177 }
178
26aea073
NB
179
180 for (;;)
4a5b68a2 181 {
26aea073
NB
182 c = *++s;
183 *++d = c;
184
185 if (c == '\n' || c == '\r')
186 {
187 /* Handle DOS line endings. */
188 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
189 s++;
190 if (s == buffer->rlimit)
191 break;
192
193 /* Escaped? */
194 p = d;
195 while (p != buffer->next_line && is_nvspace (p[-1]))
196 p--;
197 if (p == buffer->next_line || p[-1] != '\\')
198 break;
199
41c32c98 200 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
26aea073
NB
201 d = p - 2;
202 buffer->next_line = p - 1;
203 }
204 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
205 {
206 /* Add a note regardless, for the benefit of -Wtrigraphs. */
41c32c98 207 add_line_note (buffer, d, s[2]);
26aea073
NB
208 if (CPP_OPTION (pfile, trigraphs))
209 {
210 *d = _cpp_trigraph_map[s[2]];
211 s += 2;
212 }
213 }
4a5b68a2 214 }
45b966db 215 }
26aea073
NB
216 else
217 {
218 do
219 s++;
220 while (*s != '\n' && *s != '\r');
221 d = (uchar *) s;
222
223 /* Handle DOS line endings. */
224 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
225 s++;
226 }
0d9f234d 227
d08dcf87 228 done:
26aea073 229 *d = '\n';
41c32c98
NB
230 /* A sentinel note that should never be processed. */
231 add_line_note (buffer, d + 1, '\n');
26aea073 232 buffer->next_line = s + 1;
45b966db
ZW
233}
234
a8eb6044
NB
235/* Return true if the trigraph indicated by NOTE should be warned
236 about in a comment. */
237static bool
6cf87ca4 238warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
a8eb6044
NB
239{
240 const uchar *p;
241
242 /* Within comments we don't warn about trigraphs, unless the
243 trigraph forms an escaped newline, as that may change
6356f892 244 behavior. */
a8eb6044
NB
245 if (note->type != '/')
246 return false;
247
248 /* If -trigraphs, then this was an escaped newline iff the next note
249 is coincident. */
250 if (CPP_OPTION (pfile, trigraphs))
251 return note[1].pos == note->pos;
252
253 /* Otherwise, see if this forms an escaped newline. */
254 p = note->pos + 3;
255 while (is_nvspace (*p))
256 p++;
257
258 /* There might have been escaped newlines between the trigraph and the
259 newline we found. Hence the position test. */
260 return (*p == '\n' && p < note[1].pos);
261}
262
26aea073
NB
263/* Process the notes created by add_line_note as far as the current
264 location. */
265void
6cf87ca4 266_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
45b966db 267{
29401c30
NB
268 cpp_buffer *buffer = pfile->buffer;
269
26aea073 270 for (;;)
041c3194 271 {
26aea073
NB
272 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
273 unsigned int col;
a5c3cccd 274
26aea073
NB
275 if (note->pos > buffer->cur)
276 break;
a5c3cccd 277
26aea073
NB
278 buffer->cur_note++;
279 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
4d6baafa 280
41c32c98 281 if (note->type == '\\' || note->type == ' ')
26aea073 282 {
41c32c98 283 if (note->type == ' ' && !in_comment)
500bee0a 284 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
26aea073 285 "backslash and newline separated by space");
41c32c98 286
26aea073 287 if (buffer->next_line > buffer->rlimit)
87062813 288 {
500bee0a 289 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
26aea073
NB
290 "backslash-newline at end of file");
291 /* Prevent "no newline at end of file" warning. */
292 buffer->next_line = buffer->rlimit;
87062813 293 }
26aea073
NB
294
295 buffer->line_base = note->pos;
12f9df4e 296 CPP_INCREMENT_LINE (pfile, 0);
0d9f234d 297 }
41c32c98
NB
298 else if (_cpp_trigraph_map[note->type])
299 {
a8eb6044
NB
300 if (CPP_OPTION (pfile, warn_trigraphs)
301 && (!in_comment || warn_in_comment (pfile, note)))
41c32c98
NB
302 {
303 if (CPP_OPTION (pfile, trigraphs))
500bee0a 304 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
41c32c98
NB
305 "trigraph ??%c converted to %c",
306 note->type,
307 (int) _cpp_trigraph_map[note->type]);
308 else
905bd7b5
GK
309 {
310 cpp_error_with_line
500bee0a 311 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
905bd7b5
GK
312 "trigraph ??%c ignored, use -trigraphs to enable",
313 note->type);
314 }
41c32c98
NB
315 }
316 }
317 else
318 abort ();
041c3194 319 }
45b966db
ZW
320}
321
0d9f234d
NB
322/* Skip a C-style block comment. We find the end of the comment by
323 seeing if an asterisk is before every '/' we encounter. Returns
6f572ac2
NB
324 nonzero if comment terminated by EOF, zero otherwise.
325
326 Buffer->cur points to the initial asterisk of the comment. */
26aea073 327bool
6cf87ca4 328_cpp_skip_block_comment (cpp_reader *pfile)
45b966db 329{
041c3194 330 cpp_buffer *buffer = pfile->buffer;
d08dcf87
ZW
331 const uchar *cur = buffer->cur;
332 uchar c;
0d9f234d 333
d08dcf87
ZW
334 cur++;
335 if (*cur == '/')
336 cur++;
0d9f234d 337
26aea073
NB
338 for (;;)
339 {
0d9f234d
NB
340 /* People like decorating comments with '*', so check for '/'
341 instead for efficiency. */
d08dcf87
ZW
342 c = *cur++;
343
041c3194 344 if (c == '/')
45b966db 345 {
d08dcf87 346 if (cur[-2] == '*')
0d9f234d 347 break;
041c3194 348
0d9f234d 349 /* Warn about potential nested comments, but not if the '/'
a1f300c0 350 comes immediately before the true comment delimiter.
041c3194 351 Don't bother to get it right across escaped newlines. */
0d9f234d 352 if (CPP_OPTION (pfile, warn_comments)
d08dcf87
ZW
353 && cur[0] == '*' && cur[1] != '/')
354 {
355 buffer->cur = cur;
0527bc4e 356 cpp_error_with_line (pfile, CPP_DL_WARNING,
500bee0a 357 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
d08dcf87
ZW
358 "\"/*\" within comment");
359 }
45b966db 360 }
26aea073
NB
361 else if (c == '\n')
362 {
12f9df4e 363 unsigned int cols;
d08dcf87 364 buffer->cur = cur - 1;
26aea073
NB
365 _cpp_process_line_notes (pfile, true);
366 if (buffer->next_line >= buffer->rlimit)
367 return true;
368 _cpp_clean_line (pfile);
12f9df4e
PB
369
370 cols = buffer->next_line - buffer->line_base;
371 CPP_INCREMENT_LINE (pfile, cols);
372
d08dcf87 373 cur = buffer->cur;
26aea073 374 }
45b966db 375 }
041c3194 376
d08dcf87 377 buffer->cur = cur;
a8eb6044 378 _cpp_process_line_notes (pfile, true);
26aea073 379 return false;
45b966db
ZW
380}
381
480709cc 382/* Skip a C++ line comment, leaving buffer->cur pointing to the
da7d8304 383 terminating newline. Handles escaped newlines. Returns nonzero
480709cc 384 if a multiline comment. */
041c3194 385static int
6cf87ca4 386skip_line_comment (cpp_reader *pfile)
45b966db 387{
cbcff6df 388 cpp_buffer *buffer = pfile->buffer;
1bb64668 389 source_location orig_line = pfile->line_table->highest_line;
041c3194 390
26aea073
NB
391 while (*buffer->cur != '\n')
392 buffer->cur++;
480709cc 393
26aea073 394 _cpp_process_line_notes (pfile, true);
500bee0a 395 return orig_line != pfile->line_table->highest_line;
041c3194 396}
45b966db 397
26aea073 398/* Skips whitespace, saving the next non-whitespace character. */
52fadca8 399static void
6cf87ca4 400skip_whitespace (cpp_reader *pfile, cppchar_t c)
041c3194
ZW
401{
402 cpp_buffer *buffer = pfile->buffer;
f7d151fb 403 bool saw_NUL = false;
45b966db 404
0d9f234d 405 do
041c3194 406 {
91fcd158 407 /* Horizontal space always OK. */
26aea073 408 if (c == ' ' || c == '\t')
0d9f234d 409 ;
0d9f234d 410 /* Just \f \v or \0 left. */
91fcd158 411 else if (c == '\0')
f7d151fb 412 saw_NUL = true;
93c80368 413 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
500bee0a 414 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
ebef4e8c
NB
415 CPP_BUF_COL (buffer),
416 "%s in preprocessing directive",
417 c == '\f' ? "form feed" : "vertical tab");
0d9f234d 418
0d9f234d 419 c = *buffer->cur++;
45b966db 420 }
ec5c56db 421 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
422 while (is_nvspace (c));
423
f7d151fb 424 if (saw_NUL)
0527bc4e 425 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
f7d151fb 426
480709cc 427 buffer->cur--;
041c3194 428}
45b966db 429
93c80368
NB
430/* See if the characters of a number token are valid in a name (no
431 '.', '+' or '-'). */
432static int
6cf87ca4 433name_p (cpp_reader *pfile, const cpp_string *string)
93c80368
NB
434{
435 unsigned int i;
436
437 for (i = 0; i < string->len; i++)
438 if (!is_idchar (string->text[i]))
439 return 0;
440
df383483 441 return 1;
93c80368
NB
442}
443
50668cf6
GK
444/* After parsing an identifier or other sequence, produce a warning about
445 sequences not in NFC/NFKC. */
446static void
447warn_about_normalization (cpp_reader *pfile,
448 const cpp_token *token,
449 const struct normalize_state *s)
450{
451 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
452 && !pfile->state.skipping)
453 {
454 /* Make sure that the token is printed using UCNs, even
455 if we'd otherwise happily print UTF-8. */
c3f829c1 456 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
50668cf6
GK
457 size_t sz;
458
459 sz = cpp_spell_token (pfile, token, buf, false) - buf;
460 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
461 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
cbada204 462 "`%.*s' is not in NFKC", (int) sz, buf);
50668cf6
GK
463 else
464 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
cbada204 465 "`%.*s' is not in NFC", (int) sz, buf);
50668cf6
GK
466 }
467}
468
bced6edf 469/* Returns TRUE if the sequence starting at buffer->cur is invalid in
1613e52b 470 an identifier. FIRST is TRUE if this starts an identifier. */
bced6edf 471static bool
50668cf6
GK
472forms_identifier_p (cpp_reader *pfile, int first,
473 struct normalize_state *state)
bced6edf 474{
1613e52b
NB
475 cpp_buffer *buffer = pfile->buffer;
476
477 if (*buffer->cur == '$')
478 {
479 if (!CPP_OPTION (pfile, dollars_in_ident))
480 return false;
481
482 buffer->cur++;
78b8811a 483 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1613e52b 484 {
78b8811a 485 CPP_OPTION (pfile, warn_dollars) = 0;
0527bc4e 486 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1613e52b
NB
487 }
488
489 return true;
490 }
bced6edf 491
1613e52b 492 /* Is this a syntactically valid UCN? */
af15a2fe 493 if (CPP_OPTION (pfile, extended_identifiers)
6baba9bb 494 && *buffer->cur == '\\'
1613e52b 495 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
bced6edf 496 {
1613e52b 497 buffer->cur += 2;
50668cf6
GK
498 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
499 state))
1613e52b
NB
500 return true;
501 buffer->cur -= 2;
bced6edf 502 }
bced6edf 503
1613e52b 504 return false;
bced6edf
NB
505}
506
507/* Lex an identifier starting at BUFFER->CUR - 1. */
0d9f234d 508static cpp_hashnode *
50668cf6
GK
509lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
510 struct normalize_state *nst)
45b966db 511{
93c80368 512 cpp_hashnode *result;
47e20491 513 const uchar *cur;
c6e83800
ZW
514 unsigned int len;
515 unsigned int hash = HT_HASHSTEP (0, *base);
2c3fcba6 516
c6e83800 517 cur = pfile->buffer->cur;
47e20491
GK
518 if (! starts_ucn)
519 while (ISIDNUM (*cur))
520 {
521 hash = HT_HASHSTEP (hash, *cur);
522 cur++;
523 }
524 pfile->buffer->cur = cur;
50668cf6 525 if (starts_ucn || forms_identifier_p (pfile, false, nst))
10cf9bde 526 {
47e20491
GK
527 /* Slower version for identifiers containing UCNs (or $). */
528 do {
529 while (ISIDNUM (*pfile->buffer->cur))
50668cf6
GK
530 {
531 pfile->buffer->cur++;
532 NORMALIZE_STATE_UPDATE_IDNUM (nst);
533 }
534 } while (forms_identifier_p (pfile, false, nst));
47e20491
GK
535 result = _cpp_interpret_identifier (pfile, base,
536 pfile->buffer->cur - base);
2c3fcba6 537 }
47e20491
GK
538 else
539 {
540 len = cur - base;
541 hash = HT_HASHFINISH (hash, len);
bced6edf 542
2bf41bf0
TT
543 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
544 base, len, hash, HT_ALLOC));
47e20491 545 }
2c3fcba6 546
bced6edf 547 /* Rarely, identifiers require diagnostics when lexed. */
2c3fcba6
ZW
548 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
549 && !pfile->state.skipping, 0))
550 {
551 /* It is allowed to poison the same identifier twice. */
552 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
0527bc4e 553 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
2c3fcba6
ZW
554 NODE_NAME (result));
555
556 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
557 replacement list of a variadic macro. */
558 if (result == pfile->spec_nodes.n__VA_ARGS__
559 && !pfile->state.va_args_ok)
0527bc4e 560 cpp_error (pfile, CPP_DL_PEDWARN,
6cf87ca4
ZW
561 "__VA_ARGS__ can only appear in the expansion"
562 " of a C99 variadic macro");
2c3fcba6
ZW
563 }
564
565 return result;
566}
567
bced6edf 568/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
45b966db 569static void
50668cf6
GK
570lex_number (cpp_reader *pfile, cpp_string *number,
571 struct normalize_state *nst)
45b966db 572{
562a5c27 573 const uchar *cur;
bced6edf
NB
574 const uchar *base;
575 uchar *dest;
45b966db 576
bced6edf
NB
577 base = pfile->buffer->cur - 1;
578 do
041c3194 579 {
bced6edf 580 cur = pfile->buffer->cur;
0d9f234d 581
bced6edf
NB
582 /* N.B. ISIDNUM does not include $. */
583 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
50668cf6
GK
584 {
585 cur++;
586 NORMALIZE_STATE_UPDATE_IDNUM (nst);
587 }
45b966db 588
10cf9bde 589 pfile->buffer->cur = cur;
45b966db 590 }
50668cf6 591 while (forms_identifier_p (pfile, false, nst));
93c80368 592
bced6edf
NB
593 number->len = cur - base;
594 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
595 memcpy (dest, base, number->len);
596 dest[number->len] = '\0';
597 number->text = dest;
93c80368
NB
598}
599
6338b358
NB
600/* Create a token of type TYPE with a literal spelling. */
601static void
6cf87ca4
ZW
602create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
603 unsigned int len, enum cpp_ttype type)
6338b358
NB
604{
605 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
606
607 memcpy (dest, base, len);
608 dest[len] = '\0';
609 token->type = type;
610 token->val.str.len = len;
611 token->val.str.text = dest;
612}
613
bced6edf 614/* Lexes a string, character constant, or angle-bracketed header file
6338b358 615 name. The stored string contains the spelling, including opening
b6baa67d 616 quote and leading any leading 'L', 'u' or 'U'. It returns the type
4bb09c26
JM
617 of the literal, or CPP_OTHER if it was not properly terminated, or
618 CPP_LESS for an unterminated header name which must be relexed as
619 normal tokens.
6338b358
NB
620
621 The spelling is NUL-terminated, but it is not guaranteed that this
622 is the first NUL since embedded NULs are preserved. */
041c3194 623static void
6cf87ca4 624lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
45b966db 625{
6338b358
NB
626 bool saw_NUL = false;
627 const uchar *cur;
bced6edf 628 cppchar_t terminator;
6338b358
NB
629 enum cpp_ttype type;
630
631 cur = base;
632 terminator = *cur++;
b6baa67d 633 if (terminator == 'L' || terminator == 'u' || terminator == 'U')
6338b358
NB
634 terminator = *cur++;
635 if (terminator == '\"')
b6baa67d
KVH
636 type = (*base == 'L' ? CPP_WSTRING :
637 *base == 'U' ? CPP_STRING32 :
638 *base == 'u' ? CPP_STRING16 : CPP_STRING);
6338b358 639 else if (terminator == '\'')
b6baa67d
KVH
640 type = (*base == 'L' ? CPP_WCHAR :
641 *base == 'U' ? CPP_CHAR32 :
642 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
6338b358
NB
643 else
644 terminator = '>', type = CPP_HEADER_NAME;
93c80368 645
0d9f234d 646 for (;;)
45b966db 647 {
6338b358 648 cppchar_t c = *cur++;
7868b4a2 649
6f572ac2 650 /* In #include-style directives, terminators are not escapable. */
6338b358
NB
651 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
652 cur++;
653 else if (c == terminator)
bced6edf 654 break;
6338b358 655 else if (c == '\n')
0d9f234d 656 {
6338b358 657 cur--;
4bb09c26
JM
658 /* Unmatched quotes always yield undefined behavior, but
659 greedy lexing means that what appears to be an unterminated
660 header name may actually be a legitimate sequence of tokens. */
661 if (terminator == '>')
662 {
663 token->type = CPP_LESS;
664 return;
665 }
6338b358
NB
666 type = CPP_OTHER;
667 break;
45b966db 668 }
6338b358
NB
669 else if (c == '\0')
670 saw_NUL = true;
45b966db
ZW
671 }
672
6338b358 673 if (saw_NUL && !pfile->state.skipping)
0527bc4e
JDA
674 cpp_error (pfile, CPP_DL_WARNING,
675 "null character(s) preserved in literal");
45b966db 676
c663e301
JM
677 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
678 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
679 (int) terminator);
680
6338b358
NB
681 pfile->buffer->cur = cur;
682 create_literal (pfile, token, base, cur - base, type);
0d9f234d 683}
041c3194 684
631d0d36
MG
685/* Return the comment table. The client may not make any assumption
686 about the ordering of the table. */
687cpp_comment_table *
688cpp_get_comments (cpp_reader *pfile)
689{
690 return &pfile->comments;
691}
692
693/* Append a comment to the end of the comment table. */
694static void
695store_comment (cpp_reader *pfile, cpp_token *token)
696{
697 int len;
698
699 if (pfile->comments.allocated == 0)
700 {
701 pfile->comments.allocated = 256;
702 pfile->comments.entries = (cpp_comment *) xmalloc
703 (pfile->comments.allocated * sizeof (cpp_comment));
704 }
705
706 if (pfile->comments.count == pfile->comments.allocated)
707 {
708 pfile->comments.allocated *= 2;
709 pfile->comments.entries = (cpp_comment *) xrealloc
710 (pfile->comments.entries,
711 pfile->comments.allocated * sizeof (cpp_comment));
712 }
713
714 len = token->val.str.len;
715
716 /* Copy comment. Note, token may not be NULL terminated. */
717 pfile->comments.entries[pfile->comments.count].comment =
718 (char *) xmalloc (sizeof (char) * (len + 1));
719 memcpy (pfile->comments.entries[pfile->comments.count].comment,
720 token->val.str.text, len);
721 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
722
723 /* Set source location. */
724 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
725
726 /* Increment the count of entries in the comment table. */
727 pfile->comments.count++;
728}
729
93c80368 730/* The stored comment includes the comment start and any terminator. */
9e62c811 731static void
6cf87ca4
ZW
732save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
733 cppchar_t type)
9e62c811 734{
041c3194 735 unsigned char *buffer;
477cdac7 736 unsigned int len, clen;
df383483 737
1c6d33ef 738 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
480709cc 739
3542203b
NB
740 /* C++ comments probably (not definitely) have moved past a new
741 line, which we don't want to save in the comment. */
480709cc 742 if (is_vspace (pfile->buffer->cur[-1]))
3542203b 743 len--;
477cdac7
JT
744
745 /* If we are currently in a directive, then we need to store all
746 C++ comments as C comments internally, and so we need to
747 allocate a little extra space in that case.
748
749 Note that the only time we encounter a directive here is
750 when we are saving comments in a "#define". */
751 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
752
753 buffer = _cpp_unaligned_alloc (pfile, clen);
df383483 754
041c3194 755 token->type = CPP_COMMENT;
477cdac7 756 token->val.str.len = clen;
0d9f234d 757 token->val.str.text = buffer;
45b966db 758
1c6d33ef
NB
759 buffer[0] = '/';
760 memcpy (buffer + 1, from, len - 1);
477cdac7 761
1eeeb6a4 762 /* Finish conversion to a C comment, if necessary. */
477cdac7
JT
763 if (pfile->state.in_directive && type == '/')
764 {
765 buffer[1] = '*';
766 buffer[clen - 2] = '*';
767 buffer[clen - 1] = '/';
768 }
631d0d36
MG
769
770 /* Finally store this comment for use by clients of libcpp. */
771 store_comment (pfile, token);
0d9f234d 772}
45b966db 773
5fddcffc
NB
774/* Allocate COUNT tokens for RUN. */
775void
6cf87ca4 776_cpp_init_tokenrun (tokenrun *run, unsigned int count)
5fddcffc 777{
72bb2c39 778 run->base = XNEWVEC (cpp_token, count);
5fddcffc
NB
779 run->limit = run->base + count;
780 run->next = NULL;
781}
782
783/* Returns the next tokenrun, or creates one if there is none. */
784static tokenrun *
6cf87ca4 785next_tokenrun (tokenrun *run)
5fddcffc
NB
786{
787 if (run->next == NULL)
788 {
72bb2c39 789 run->next = XNEW (tokenrun);
bdcbe496 790 run->next->prev = run;
5fddcffc
NB
791 _cpp_init_tokenrun (run->next, 250);
792 }
793
794 return run->next;
795}
796
5950c3c9
BE
797/* Look ahead in the input stream. */
798const cpp_token *
799cpp_peek_token (cpp_reader *pfile, int index)
800{
801 cpp_context *context = pfile->context;
802 const cpp_token *peektok;
803 int count;
804
805 /* First, scan through any pending cpp_context objects. */
806 while (context->prev)
807 {
808 ptrdiff_t sz = (context->direct_p
809 ? LAST (context).token - FIRST (context).token
810 : LAST (context).ptoken - FIRST (context).ptoken);
811
812 if (index < (int) sz)
813 return (context->direct_p
814 ? FIRST (context).token + index
815 : *(FIRST (context).ptoken + index));
816
817 index -= (int) sz;
818 context = context->prev;
819 }
820
821 /* We will have to read some new tokens after all (and do so
822 without invalidating preceding tokens). */
823 count = index;
824 pfile->keep_tokens++;
825
826 do
827 {
828 peektok = _cpp_lex_token (pfile);
829 if (peektok->type == CPP_EOF)
830 return peektok;
831 }
832 while (index--);
833
834 _cpp_backup_tokens_direct (pfile, count + 1);
835 pfile->keep_tokens--;
836
837 return peektok;
838}
839
4ed5bcfb
NB
840/* Allocate a single token that is invalidated at the same time as the
841 rest of the tokens on the line. Has its line and col set to the
842 same as the last lexed token, so that diagnostics appear in the
843 right place. */
844cpp_token *
6cf87ca4 845_cpp_temp_token (cpp_reader *pfile)
4ed5bcfb
NB
846{
847 cpp_token *old, *result;
5950c3c9
BE
848 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
849 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
4ed5bcfb
NB
850
851 old = pfile->cur_token - 1;
5950c3c9
BE
852 /* Any pre-existing lookaheads must not be clobbered. */
853 if (la)
854 {
855 if (sz <= la)
856 {
857 tokenrun *next = next_tokenrun (pfile->cur_run);
858
859 if (sz < la)
860 memmove (next->base + 1, next->base,
861 (la - sz) * sizeof (cpp_token));
862
863 next->base[0] = pfile->cur_run->limit[-1];
864 }
865
866 if (sz > 1)
867 memmove (pfile->cur_token + 1, pfile->cur_token,
868 MIN (la, sz - 1) * sizeof (cpp_token));
869 }
870
871 if (!sz && pfile->cur_token == pfile->cur_run->limit)
4ed5bcfb
NB
872 {
873 pfile->cur_run = next_tokenrun (pfile->cur_run);
874 pfile->cur_token = pfile->cur_run->base;
875 }
876
877 result = pfile->cur_token++;
12f9df4e 878 result->src_loc = old->src_loc;
4ed5bcfb
NB
879 return result;
880}
881
14baae01
NB
882/* Lex a token into RESULT (external interface). Takes care of issues
883 like directive handling, token lookahead, multiple include
a1f300c0 884 optimization and skipping. */
345894b4 885const cpp_token *
6cf87ca4 886_cpp_lex_token (cpp_reader *pfile)
5fddcffc 887{
bdcbe496 888 cpp_token *result;
5fddcffc 889
bdcbe496 890 for (;;)
5fddcffc 891 {
bdcbe496 892 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 893 {
bdcbe496
NB
894 pfile->cur_run = next_tokenrun (pfile->cur_run);
895 pfile->cur_token = pfile->cur_run->base;
5fddcffc 896 }
ee380365
TT
897 /* We assume that the current token is somewhere in the current
898 run. */
899 if (pfile->cur_token < pfile->cur_run->base
900 || pfile->cur_token >= pfile->cur_run->limit)
901 abort ();
5fddcffc 902
bdcbe496 903 if (pfile->lookaheads)
14baae01
NB
904 {
905 pfile->lookaheads--;
906 result = pfile->cur_token++;
907 }
bdcbe496 908 else
14baae01 909 result = _cpp_lex_direct (pfile);
bdcbe496
NB
910
911 if (result->flags & BOL)
5fddcffc 912 {
bdcbe496
NB
913 /* Is this a directive. If _cpp_handle_directive returns
914 false, it is an assembler #. */
915 if (result->type == CPP_HASH
e808ec9c
NB
916 /* 6.10.3 p 11: Directives in a list of macro arguments
917 gives undefined behavior. This implementation
918 handles the directive as normal. */
bc4071dd 919 && pfile->state.parsing_args != 1)
21b11495 920 {
bc4071dd 921 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
21b11495 922 {
bc4071dd
RH
923 if (pfile->directive_result.type == CPP_PADDING)
924 continue;
21b11495 925 result = &pfile->directive_result;
21b11495
ZW
926 }
927 }
bc4071dd
RH
928 else if (pfile->state.in_deferred_pragma)
929 result = &pfile->directive_result;
21b11495 930
97293897 931 if (pfile->cb.line_change && !pfile->state.skipping)
6cf87ca4 932 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
5fddcffc 933 }
5fddcffc 934
bdcbe496 935 /* We don't skip tokens in directives. */
bc4071dd 936 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
bdcbe496 937 break;
5fddcffc 938
bdcbe496 939 /* Outside a directive, invalidate controlling macros. At file
14baae01 940 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
6356f892 941 get here and MI optimization works. */
5fddcffc 942 pfile->mi_valid = false;
bdcbe496
NB
943
944 if (!pfile->state.skipping || result->type == CPP_EOF)
945 break;
5fddcffc
NB
946 }
947
345894b4 948 return result;
5fddcffc
NB
949}
950
26aea073
NB
951/* Returns true if a fresh line has been loaded. */
952bool
6cf87ca4 953_cpp_get_fresh_line (cpp_reader *pfile)
004cb263 954{
22234f56
PB
955 int return_at_eof;
956
26aea073
NB
957 /* We can't get a new line until we leave the current directive. */
958 if (pfile->state.in_directive)
959 return false;
df383483 960
26aea073 961 for (;;)
1a76916c 962 {
26aea073 963 cpp_buffer *buffer = pfile->buffer;
1a76916c 964
26aea073
NB
965 if (!buffer->need_line)
966 return true;
967
968 if (buffer->next_line < buffer->rlimit)
004cb263 969 {
26aea073
NB
970 _cpp_clean_line (pfile);
971 return true;
972 }
004cb263 973
26aea073
NB
974 /* First, get out of parsing arguments state. */
975 if (pfile->state.parsing_args)
976 return false;
977
978 /* End of buffer. Non-empty files should end in a newline. */
979 if (buffer->buf != buffer->rlimit
980 && buffer->next_line > buffer->rlimit
981 && !buffer->from_stage3)
982 {
ed0e74e0 983 /* Clip to buffer size. */
26aea073 984 buffer->next_line = buffer->rlimit;
26aea073 985 }
22234f56
PB
986
987 return_at_eof = buffer->return_at_eof;
26aea073 988 _cpp_pop_buffer (pfile);
22234f56 989 if (pfile->buffer == NULL || return_at_eof)
a506c55c 990 return false;
26aea073 991 }
004cb263
NB
992}
993
6f572ac2
NB
994#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
995 do \
996 { \
997 result->type = ELSE_TYPE; \
998 if (*buffer->cur == CHAR) \
999 buffer->cur++, result->type = THEN_TYPE; \
1000 } \
1001 while (0)
480709cc 1002
14baae01
NB
1003/* Lex a token into pfile->cur_token, which is also incremented, to
1004 get diagnostics pointing to the correct location.
1005
1006 Does not handle issues such as token lookahead, multiple-include
f1ba665b 1007 optimization, directives, skipping etc. This function is only
14baae01
NB
1008 suitable for use by _cpp_lex_token, and in special cases like
1009 lex_expansion_token which doesn't care for any of these issues.
1010
1011 When meeting a newline, returns CPP_EOF if parsing a directive,
1012 otherwise returns to the start of the token buffer if permissible.
1013 Returns the location of the lexed token. */
1014cpp_token *
6cf87ca4 1015_cpp_lex_direct (cpp_reader *pfile)
45b966db 1016{
0d9f234d 1017 cppchar_t c;
adb84b42 1018 cpp_buffer *buffer;
0d9f234d 1019 const unsigned char *comment_start;
14baae01 1020 cpp_token *result = pfile->cur_token++;
9ec7291f 1021
5fddcffc 1022 fresh_line:
26aea073 1023 result->flags = 0;
2be570f9 1024 buffer = pfile->buffer;
a506c55c 1025 if (buffer->need_line)
26aea073 1026 {
bc4071dd
RH
1027 if (pfile->state.in_deferred_pragma)
1028 {
1029 result->type = CPP_PRAGMA_EOL;
1030 pfile->state.in_deferred_pragma = false;
1031 if (!pfile->state.pragma_allow_expansion)
1032 pfile->state.prevent_expansion--;
1033 return result;
1034 }
26aea073
NB
1035 if (!_cpp_get_fresh_line (pfile))
1036 {
1037 result->type = CPP_EOF;
9ff7868d
NB
1038 if (!pfile->state.in_directive)
1039 {
1040 /* Tell the compiler the line number of the EOF token. */
500bee0a 1041 result->src_loc = pfile->line_table->highest_line;
9ff7868d
NB
1042 result->flags = BOL;
1043 }
26aea073
NB
1044 return result;
1045 }
1046 if (!pfile->keep_tokens)
1047 {
1048 pfile->cur_run = &pfile->base_run;
1049 result = pfile->base_run.base;
1050 pfile->cur_token = result + 1;
1051 }
1052 result->flags = BOL;
1053 if (pfile->state.parsing_args == 2)
1054 result->flags |= PREV_WHITE;
1055 }
a506c55c 1056 buffer = pfile->buffer;
5fddcffc 1057 update_tokens_line:
500bee0a 1058 result->src_loc = pfile->line_table->highest_line;
041c3194 1059
5fddcffc 1060 skipped_white:
26aea073
NB
1061 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1062 && !pfile->overlaid_buffer)
1063 {
1064 _cpp_process_line_notes (pfile, false);
500bee0a 1065 result->src_loc = pfile->line_table->highest_line;
26aea073 1066 }
480709cc 1067 c = *buffer->cur++;
12f9df4e 1068
500bee0a
PB
1069 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1070 CPP_BUF_COLUMN (buffer, buffer->cur));
5fddcffc 1071
0d9f234d 1072 switch (c)
45b966db 1073 {
4d6baafa
NB
1074 case ' ': case '\t': case '\f': case '\v': case '\0':
1075 result->flags |= PREV_WHITE;
26aea073
NB
1076 skip_whitespace (pfile, c);
1077 goto skipped_white;
0d9f234d 1078
26aea073 1079 case '\n':
12f9df4e
PB
1080 if (buffer->cur < buffer->rlimit)
1081 CPP_INCREMENT_LINE (pfile, 0);
26aea073
NB
1082 buffer->need_line = true;
1083 goto fresh_line;
46d07497 1084
0d9f234d
NB
1085 case '0': case '1': case '2': case '3': case '4':
1086 case '5': case '6': case '7': case '8': case '9':
50668cf6
GK
1087 {
1088 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1089 result->type = CPP_NUMBER;
1090 lex_number (pfile, &result->val.str, &nst);
1091 warn_about_normalization (pfile, result, &nst);
1092 break;
1093 }
46d07497 1094
0abc6a6a 1095 case 'L':
b6baa67d
KVH
1096 case 'u':
1097 case 'U':
1098 /* 'L', 'u' or 'U' may introduce wide characters or strings. */
1099 if (c == 'L' || CPP_OPTION (pfile, uliterals))
bced6edf 1100 {
b6baa67d
KVH
1101 if (*buffer->cur == '\'' || *buffer->cur == '"')
1102 {
1103 lex_string (pfile, result, buffer->cur - 1);
1104 break;
1105 }
bced6edf 1106 }
df383483 1107 /* Fall through. */
0abc6a6a 1108
0d9f234d
NB
1109 case '_':
1110 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1111 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1112 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
b6baa67d 1113 case 's': case 't': case 'v': case 'w': case 'x':
0d9f234d
NB
1114 case 'y': case 'z':
1115 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
0abc6a6a 1116 case 'G': case 'H': case 'I': case 'J': case 'K':
0d9f234d 1117 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
b6baa67d 1118 case 'S': case 'T': case 'V': case 'W': case 'X':
0d9f234d
NB
1119 case 'Y': case 'Z':
1120 result->type = CPP_NAME;
50668cf6
GK
1121 {
1122 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
9a0c6187
JM
1123 result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
1124 &nst);
50668cf6
GK
1125 warn_about_normalization (pfile, result, &nst);
1126 }
0d9f234d 1127
0d9f234d 1128 /* Convert named operators to their proper types. */
9a0c6187 1129 if (result->val.node.node->flags & NODE_OPERATOR)
0d9f234d
NB
1130 {
1131 result->flags |= NAMED_OP;
9a0c6187 1132 result->type = (enum cpp_ttype) result->val.node.node->directive_index;
0d9f234d
NB
1133 }
1134 break;
1135
1136 case '\'':
1137 case '"':
6338b358 1138 lex_string (pfile, result, buffer->cur - 1);
0d9f234d 1139 break;
041c3194 1140
0d9f234d 1141 case '/':
1c6d33ef
NB
1142 /* A potential block or line comment. */
1143 comment_start = buffer->cur;
6f572ac2
NB
1144 c = *buffer->cur;
1145
1c6d33ef
NB
1146 if (c == '*')
1147 {
26aea073 1148 if (_cpp_skip_block_comment (pfile))
0527bc4e 1149 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
0d9f234d 1150 }
480709cc 1151 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
12f9df4e 1152 || cpp_in_system_header (pfile)))
0d9f234d 1153 {
bdb05a7b
NB
1154 /* Warn about comments only if pedantically GNUC89, and not
1155 in system headers. */
1156 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 1157 && ! buffer->warned_cplusplus_comments)
041c3194 1158 {
0527bc4e 1159 cpp_error (pfile, CPP_DL_PEDWARN,
56508306 1160 "C++ style comments are not allowed in ISO C90");
0527bc4e 1161 cpp_error (pfile, CPP_DL_PEDWARN,
ebef4e8c 1162 "(this will be reported only once per input file)");
1c6d33ef
NB
1163 buffer->warned_cplusplus_comments = 1;
1164 }
0d9f234d 1165
01ef6563 1166 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
0527bc4e 1167 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1c6d33ef 1168 }
480709cc
NB
1169 else if (c == '=')
1170 {
6f572ac2 1171 buffer->cur++;
480709cc
NB
1172 result->type = CPP_DIV_EQ;
1173 break;
1174 }
1175 else
1176 {
480709cc
NB
1177 result->type = CPP_DIV;
1178 break;
1179 }
0d9f234d 1180
1c6d33ef
NB
1181 if (!pfile->state.save_comments)
1182 {
1183 result->flags |= PREV_WHITE;
5fddcffc 1184 goto update_tokens_line;
0d9f234d 1185 }
1c6d33ef
NB
1186
1187 /* Save the comment as a token in its own right. */
477cdac7 1188 save_comment (pfile, result, comment_start, c);
bdcbe496 1189 break;
0d9f234d
NB
1190
1191 case '<':
1192 if (pfile->state.angled_headers)
1193 {
6338b358 1194 lex_string (pfile, result, buffer->cur - 1);
4bb09c26
JM
1195 if (result->type != CPP_LESS)
1196 break;
0d9f234d 1197 }
45b966db 1198
6f572ac2
NB
1199 result->type = CPP_LESS;
1200 if (*buffer->cur == '=')
1201 buffer->cur++, result->type = CPP_LESS_EQ;
1202 else if (*buffer->cur == '<')
0d9f234d 1203 {
6f572ac2
NB
1204 buffer->cur++;
1205 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
0d9f234d 1206 }
6f572ac2 1207 else if (CPP_OPTION (pfile, digraphs))
480709cc 1208 {
6f572ac2
NB
1209 if (*buffer->cur == ':')
1210 {
1211 buffer->cur++;
1212 result->flags |= DIGRAPH;
1213 result->type = CPP_OPEN_SQUARE;
1214 }
1215 else if (*buffer->cur == '%')
1216 {
1217 buffer->cur++;
1218 result->flags |= DIGRAPH;
1219 result->type = CPP_OPEN_BRACE;
1220 }
480709cc 1221 }
0d9f234d
NB
1222 break;
1223
1224 case '>':
6f572ac2
NB
1225 result->type = CPP_GREATER;
1226 if (*buffer->cur == '=')
1227 buffer->cur++, result->type = CPP_GREATER_EQ;
1228 else if (*buffer->cur == '>')
0d9f234d 1229 {
6f572ac2
NB
1230 buffer->cur++;
1231 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1232 }
0d9f234d
NB
1233 break;
1234
cbcff6df 1235 case '%':
6f572ac2
NB
1236 result->type = CPP_MOD;
1237 if (*buffer->cur == '=')
1238 buffer->cur++, result->type = CPP_MOD_EQ;
1239 else if (CPP_OPTION (pfile, digraphs))
480709cc 1240 {
6f572ac2 1241 if (*buffer->cur == ':')
480709cc 1242 {
6f572ac2
NB
1243 buffer->cur++;
1244 result->flags |= DIGRAPH;
1245 result->type = CPP_HASH;
1246 if (*buffer->cur == '%' && buffer->cur[1] == ':')
9a0c6187 1247 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
6f572ac2
NB
1248 }
1249 else if (*buffer->cur == '>')
1250 {
1251 buffer->cur++;
1252 result->flags |= DIGRAPH;
1253 result->type = CPP_CLOSE_BRACE;
480709cc 1254 }
480709cc 1255 }
0d9f234d
NB
1256 break;
1257
cbcff6df 1258 case '.':
480709cc 1259 result->type = CPP_DOT;
6f572ac2 1260 if (ISDIGIT (*buffer->cur))
480709cc 1261 {
50668cf6 1262 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
480709cc 1263 result->type = CPP_NUMBER;
50668cf6
GK
1264 lex_number (pfile, &result->val.str, &nst);
1265 warn_about_normalization (pfile, result, &nst);
480709cc 1266 }
6f572ac2
NB
1267 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1268 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1269 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1270 buffer->cur++, result->type = CPP_DOT_STAR;
0d9f234d 1271 break;
45b966db 1272
0d9f234d 1273 case '+':
6f572ac2
NB
1274 result->type = CPP_PLUS;
1275 if (*buffer->cur == '+')
1276 buffer->cur++, result->type = CPP_PLUS_PLUS;
1277 else if (*buffer->cur == '=')
1278 buffer->cur++, result->type = CPP_PLUS_EQ;
0d9f234d 1279 break;
04e3ec78 1280
0d9f234d 1281 case '-':
6f572ac2
NB
1282 result->type = CPP_MINUS;
1283 if (*buffer->cur == '>')
0d9f234d 1284 {
6f572ac2 1285 buffer->cur++;
480709cc 1286 result->type = CPP_DEREF;
6f572ac2
NB
1287 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1288 buffer->cur++, result->type = CPP_DEREF_STAR;
480709cc 1289 }
6f572ac2
NB
1290 else if (*buffer->cur == '-')
1291 buffer->cur++, result->type = CPP_MINUS_MINUS;
1292 else if (*buffer->cur == '=')
1293 buffer->cur++, result->type = CPP_MINUS_EQ;
0d9f234d 1294 break;
45b966db 1295
0d9f234d 1296 case '&':
6f572ac2
NB
1297 result->type = CPP_AND;
1298 if (*buffer->cur == '&')
1299 buffer->cur++, result->type = CPP_AND_AND;
1300 else if (*buffer->cur == '=')
1301 buffer->cur++, result->type = CPP_AND_EQ;
0d9f234d 1302 break;
df383483 1303
0d9f234d 1304 case '|':
6f572ac2
NB
1305 result->type = CPP_OR;
1306 if (*buffer->cur == '|')
1307 buffer->cur++, result->type = CPP_OR_OR;
1308 else if (*buffer->cur == '=')
1309 buffer->cur++, result->type = CPP_OR_EQ;
0d9f234d 1310 break;
45b966db 1311
0d9f234d 1312 case ':':
6f572ac2
NB
1313 result->type = CPP_COLON;
1314 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1315 buffer->cur++, result->type = CPP_SCOPE;
1316 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
0d9f234d 1317 {
6f572ac2 1318 buffer->cur++;
0d9f234d 1319 result->flags |= DIGRAPH;
480709cc
NB
1320 result->type = CPP_CLOSE_SQUARE;
1321 }
0d9f234d 1322 break;
45b966db 1323
480709cc
NB
1324 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1325 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1326 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1327 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
9a0c6187 1328 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
480709cc 1329
26aea073 1330 case '?': result->type = CPP_QUERY; break;
0d9f234d
NB
1331 case '~': result->type = CPP_COMPL; break;
1332 case ',': result->type = CPP_COMMA; break;
1333 case '(': result->type = CPP_OPEN_PAREN; break;
1334 case ')': result->type = CPP_CLOSE_PAREN; break;
1335 case '[': result->type = CPP_OPEN_SQUARE; break;
1336 case ']': result->type = CPP_CLOSE_SQUARE; break;
1337 case '{': result->type = CPP_OPEN_BRACE; break;
1338 case '}': result->type = CPP_CLOSE_BRACE; break;
1339 case ';': result->type = CPP_SEMICOLON; break;
1340
40f03658 1341 /* @ is a punctuator in Objective-C. */
cc937581 1342 case '@': result->type = CPP_ATSIGN; break;
0d9f234d 1343
0abc6a6a 1344 case '$':
1613e52b
NB
1345 case '\\':
1346 {
1347 const uchar *base = --buffer->cur;
50668cf6 1348 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
0abc6a6a 1349
50668cf6 1350 if (forms_identifier_p (pfile, true, &nst))
1613e52b
NB
1351 {
1352 result->type = CPP_NAME;
9a0c6187 1353 result->val.node.node = lex_identifier (pfile, base, true, &nst);
50668cf6 1354 warn_about_normalization (pfile, result, &nst);
1613e52b
NB
1355 break;
1356 }
1357 buffer->cur++;
1067694a 1358 }
1613e52b 1359
1067694a 1360 default:
6338b358
NB
1361 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1362 break;
0d9f234d 1363 }
bdcbe496
NB
1364
1365 return result;
0d9f234d
NB
1366}
1367
59325650
NB
1368/* An upper bound on the number of bytes needed to spell TOKEN.
1369 Does not include preceding whitespace. */
93c80368 1370unsigned int
6cf87ca4 1371cpp_token_len (const cpp_token *token)
0d9f234d 1372{
93c80368 1373 unsigned int len;
6d2c2047 1374
93c80368 1375 switch (TOKEN_SPELL (token))
041c3194 1376 {
cc955282 1377 default: len = 6; break;
6338b358 1378 case SPELL_LITERAL: len = token->val.str.len; break;
9a0c6187 1379 case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break;
041c3194 1380 }
59325650
NB
1381
1382 return len;
6d2c2047
ZW
1383}
1384
47e20491
GK
1385/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1386 Return the number of bytes read out of NAME. (There are always
1387 10 bytes written to BUFFER.) */
1388
1389static size_t
1390utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1391{
1392 int j;
1393 int ucn_len = 0;
1394 int ucn_len_c;
1395 unsigned t;
1396 unsigned long utf32;
1397
1398 /* Compute the length of the UTF-8 sequence. */
1399 for (t = *name; t & 0x80; t <<= 1)
1400 ucn_len++;
1401
1402 utf32 = *name & (0x7F >> ucn_len);
1403 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1404 {
1405 utf32 = (utf32 << 6) | (*++name & 0x3F);
1406
1407 /* Ill-formed UTF-8. */
1408 if ((*name & ~0x3F) != 0x80)
1409 abort ();
1410 }
1411
1412 *buffer++ = '\\';
1413 *buffer++ = 'U';
1414 for (j = 7; j >= 0; j--)
1415 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1416 return ucn_len;
1417}
1418
cfc93532
MLI
1419/* Given a token TYPE corresponding to a digraph, return a pointer to
1420 the spelling of the digraph. */
1421static const unsigned char *
1422cpp_digraph2name (enum cpp_ttype type)
1423{
1424 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
1425}
47e20491 1426
041c3194 1427/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885 1428 already contain the enough space to hold the token's spelling.
6cf87ca4 1429 Returns a pointer to the character after the last character written.
47e20491
GK
1430 FORSTRING is true if this is to be the spelling after translation
1431 phase 1 (this is different for UCNs).
6cf87ca4 1432 FIXME: Would be nice if we didn't need the PFILE argument. */
93c80368 1433unsigned char *
6cf87ca4 1434cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
47e20491 1435 unsigned char *buffer, bool forstring)
041c3194 1436{
96be6998 1437 switch (TOKEN_SPELL (token))
041c3194
ZW
1438 {
1439 case SPELL_OPERATOR:
1440 {
1441 const unsigned char *spelling;
1442 unsigned char c;
d6d5f795 1443
041c3194 1444 if (token->flags & DIGRAPH)
cfc93532 1445 spelling = cpp_digraph2name (token->type);
92936ecf
ZW
1446 else if (token->flags & NAMED_OP)
1447 goto spell_ident;
041c3194 1448 else
96be6998 1449 spelling = TOKEN_NAME (token);
df383483 1450
041c3194
ZW
1451 while ((c = *spelling++) != '\0')
1452 *buffer++ = c;
1453 }
1454 break;
d6d5f795 1455
47ad4138 1456 spell_ident:
041c3194 1457 case SPELL_IDENT:
47e20491
GK
1458 if (forstring)
1459 {
9a0c6187
JM
1460 memcpy (buffer, NODE_NAME (token->val.node.node),
1461 NODE_LEN (token->val.node.node));
1462 buffer += NODE_LEN (token->val.node.node);
47e20491
GK
1463 }
1464 else
1465 {
1466 size_t i;
9a0c6187 1467 const unsigned char * name = NODE_NAME (token->val.node.node);
47e20491 1468
9a0c6187 1469 for (i = 0; i < NODE_LEN (token->val.node.node); i++)
47e20491
GK
1470 if (name[i] & ~0x7F)
1471 {
1472 i += utf8_to_ucn (buffer, name + i) - 1;
1473 buffer += 10;
1474 }
1475 else
9a0c6187 1476 *buffer++ = NODE_NAME (token->val.node.node)[i];
47e20491 1477 }
041c3194 1478 break;
d6d5f795 1479
6338b358 1480 case SPELL_LITERAL:
47ad4138
ZW
1481 memcpy (buffer, token->val.str.text, token->val.str.len);
1482 buffer += token->val.str.len;
1483 break;
1484
041c3194 1485 case SPELL_NONE:
0527bc4e
JDA
1486 cpp_error (pfile, CPP_DL_ICE,
1487 "unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1488 break;
1489 }
d6d5f795 1490
041c3194
ZW
1491 return buffer;
1492}
d6d5f795 1493
5d8ebbd8
NB
1494/* Returns TOKEN spelt as a null-terminated string. The string is
1495 freed when the reader is destroyed. Useful for diagnostics. */
93c80368 1496unsigned char *
6cf87ca4 1497cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
59325650
NB
1498{
1499 unsigned int len = cpp_token_len (token) + 1;
ece54d54 1500 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 1501
47e20491 1502 end = cpp_spell_token (pfile, token, start, false);
93c80368 1503 end[0] = '\0';
c5a04734 1504
93c80368
NB
1505 return start;
1506}
c5a04734 1507
cfc93532
MLI
1508/* Returns a pointer to a string which spells the token defined by
1509 TYPE and FLAGS. Used by C front ends, which really should move to
1510 using cpp_token_as_text. */
93c80368 1511const char *
cfc93532 1512cpp_type2name (enum cpp_ttype type, unsigned char flags)
93c80368 1513{
cfc93532
MLI
1514 if (flags & DIGRAPH)
1515 return (const char *) cpp_digraph2name (type);
1516 else if (flags & NAMED_OP)
1517 return cpp_named_operator2name (type);
1518
93c80368
NB
1519 return (const char *) token_spellings[type].name;
1520}
c5a04734 1521
4ed5bcfb
NB
1522/* Writes the spelling of token to FP, without any preceding space.
1523 Separated from cpp_spell_token for efficiency - to avoid stdio
1524 double-buffering. */
93c80368 1525void
6cf87ca4 1526cpp_output_token (const cpp_token *token, FILE *fp)
93c80368 1527{
93c80368 1528 switch (TOKEN_SPELL (token))
c5a04734 1529 {
93c80368
NB
1530 case SPELL_OPERATOR:
1531 {
1532 const unsigned char *spelling;
3b681e9d 1533 int c;
c5a04734 1534
93c80368 1535 if (token->flags & DIGRAPH)
cfc93532 1536 spelling = cpp_digraph2name (token->type);
93c80368
NB
1537 else if (token->flags & NAMED_OP)
1538 goto spell_ident;
1539 else
1540 spelling = TOKEN_NAME (token);
041c3194 1541
3b681e9d
ZW
1542 c = *spelling;
1543 do
1544 putc (c, fp);
1545 while ((c = *++spelling) != '\0');
93c80368
NB
1546 }
1547 break;
041c3194 1548
93c80368
NB
1549 spell_ident:
1550 case SPELL_IDENT:
47e20491
GK
1551 {
1552 size_t i;
9a0c6187 1553 const unsigned char * name = NODE_NAME (token->val.node.node);
47e20491 1554
9a0c6187 1555 for (i = 0; i < NODE_LEN (token->val.node.node); i++)
47e20491
GK
1556 if (name[i] & ~0x7F)
1557 {
1558 unsigned char buffer[10];
1559 i += utf8_to_ucn (buffer, name + i) - 1;
1560 fwrite (buffer, 1, 10, fp);
1561 }
1562 else
9a0c6187 1563 fputc (NODE_NAME (token->val.node.node)[i], fp);
47e20491
GK
1564 }
1565 break;
041c3194 1566
6338b358 1567 case SPELL_LITERAL:
47ad4138
ZW
1568 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1569 break;
1570
93c80368
NB
1571 case SPELL_NONE:
1572 /* An error, most probably. */
1573 break;
041c3194 1574 }
c5a04734
ZW
1575}
1576
93c80368
NB
1577/* Compare two tokens. */
1578int
6cf87ca4 1579_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
c5a04734 1580{
93c80368
NB
1581 if (a->type == b->type && a->flags == b->flags)
1582 switch (TOKEN_SPELL (a))
1583 {
1584 default: /* Keep compiler happy. */
1585 case SPELL_OPERATOR:
9a0c6187 1586 /* token_no is used to track where multiple consecutive ##
aa508502 1587 tokens were originally located. */
9a0c6187 1588 return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
93c80368 1589 case SPELL_NONE:
9a0c6187
JM
1590 return (a->type != CPP_MACRO_ARG
1591 || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
93c80368 1592 case SPELL_IDENT:
9a0c6187 1593 return a->val.node.node == b->val.node.node;
6338b358 1594 case SPELL_LITERAL:
93c80368
NB
1595 return (a->val.str.len == b->val.str.len
1596 && !memcmp (a->val.str.text, b->val.str.text,
1597 a->val.str.len));
1598 }
c5a04734 1599
041c3194
ZW
1600 return 0;
1601}
1602
93c80368
NB
1603/* Returns nonzero if a space should be inserted to avoid an
1604 accidental token paste for output. For simplicity, it is
1605 conservative, and occasionally advises a space where one is not
1606 needed, e.g. "." and ".2". */
93c80368 1607int
6cf87ca4
ZW
1608cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1609 const cpp_token *token2)
c5a04734 1610{
93c80368
NB
1611 enum cpp_ttype a = token1->type, b = token2->type;
1612 cppchar_t c;
c5a04734 1613
93c80368
NB
1614 if (token1->flags & NAMED_OP)
1615 a = CPP_NAME;
1616 if (token2->flags & NAMED_OP)
1617 b = CPP_NAME;
c5a04734 1618
93c80368
NB
1619 c = EOF;
1620 if (token2->flags & DIGRAPH)
37b8524c 1621 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1622 else if (token_spellings[b].category == SPELL_OPERATOR)
1623 c = token_spellings[b].name[0];
c5a04734 1624
93c80368 1625 /* Quickly get everything that can paste with an '='. */
37b8524c 1626 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1627 return 1;
c5a04734 1628
93c80368 1629 switch (a)
c5a04734 1630 {
b52dbbf8
SE
1631 case CPP_GREATER: return c == '>';
1632 case CPP_LESS: return c == '<' || c == '%' || c == ':';
93c80368
NB
1633 case CPP_PLUS: return c == '+';
1634 case CPP_MINUS: return c == '-' || c == '>';
1635 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1636 case CPP_MOD: return c == ':' || c == '>';
1637 case CPP_AND: return c == '&';
1638 case CPP_OR: return c == '|';
1639 case CPP_COLON: return c == ':' || c == '>';
1640 case CPP_DEREF: return c == '*';
26ec42ee 1641 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1642 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1643 case CPP_NAME: return ((b == CPP_NUMBER
1644 && name_p (pfile, &token2->val.str))
1645 || b == CPP_NAME
1646 || b == CPP_CHAR || b == CPP_STRING); /* L */
1647 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1648 || c == '.' || c == '+' || c == '-');
1613e52b 1649 /* UCNs */
1067694a
NB
1650 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1651 && b == CPP_NAME)
1613e52b 1652 || (CPP_OPTION (pfile, objc)
1067694a 1653 && token1->val.str.text[0] == '@'
1613e52b 1654 && (b == CPP_NAME || b == CPP_STRING)));
93c80368 1655 default: break;
c5a04734 1656 }
c5a04734 1657
417f3e3a 1658 return 0;
c5a04734
ZW
1659}
1660
93c80368 1661/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1662 character, to FP. Leading whitespace is removed. If there are
1663 macros, special token padding is not performed. */
c5a04734 1664void
6cf87ca4 1665cpp_output_line (cpp_reader *pfile, FILE *fp)
c5a04734 1666{
4ed5bcfb 1667 const cpp_token *token;
96be6998 1668
4ed5bcfb
NB
1669 token = cpp_get_token (pfile);
1670 while (token->type != CPP_EOF)
96be6998 1671 {
4ed5bcfb
NB
1672 cpp_output_token (token, fp);
1673 token = cpp_get_token (pfile);
1674 if (token->flags & PREV_WHITE)
1675 putc (' ', fp);
96be6998
ZW
1676 }
1677
93c80368 1678 putc ('\n', fp);
041c3194 1679}
c5a04734 1680
5d6342eb
TT
1681/* Return a string representation of all the remaining tokens on the
1682 current line. The result is allocated using xmalloc and must be
1683 freed by the caller. */
1684unsigned char *
1685cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
1686{
1687 const cpp_token *token;
1688 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
1689 unsigned int alloced = 120 + out;
1690 unsigned char *result = (unsigned char *) xmalloc (alloced);
1691
1692 /* If DIR_NAME is empty, there are no initial contents. */
1693 if (dir_name)
1694 {
1695 sprintf ((char *) result, "#%s ", dir_name);
1696 out += 2;
1697 }
1698
1699 token = cpp_get_token (pfile);
1700 while (token->type != CPP_EOF)
1701 {
1702 unsigned char *last;
1703 /* Include room for a possible space and the terminating nul. */
1704 unsigned int len = cpp_token_len (token) + 2;
1705
1706 if (out + len > alloced)
1707 {
1708 alloced *= 2;
1709 if (out + len > alloced)
1710 alloced = out + len;
1711 result = (unsigned char *) xrealloc (result, alloced);
1712 }
1713
1714 last = cpp_spell_token (pfile, token, &result[out], 0);
1715 out = last - result;
1716
1717 token = cpp_get_token (pfile);
1718 if (token->flags & PREV_WHITE)
1719 result[out++] = ' ';
1720 }
1721
1722 result[out] = '\0';
1723 return result;
1724}
1725
1e013d2e
NB
1726/* Memory buffers. Changing these three constants can have a dramatic
1727 effect on performance. The values here are reasonable defaults,
1728 but might be tuned. If you adjust them, be sure to test across a
1729 range of uses of cpplib, including heavy nested function-like macro
1730 expansion. Also check the change in peak memory usage (NJAMD is a
1731 good tool for this). */
1732#define MIN_BUFF_SIZE 8000
87062813 1733#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1e013d2e
NB
1734#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1735 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 1736
87062813
NB
1737#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1738 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1739#endif
1740
c9e7a609
NB
1741/* Create a new allocation buffer. Place the control block at the end
1742 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5 1743static _cpp_buff *
6cf87ca4 1744new_buff (size_t len)
b8af0ca5
NB
1745{
1746 _cpp_buff *result;
ece54d54 1747 unsigned char *base;
b8af0ca5 1748
1e013d2e
NB
1749 if (len < MIN_BUFF_SIZE)
1750 len = MIN_BUFF_SIZE;
c70f6ed3 1751 len = CPP_ALIGN (len);
b8af0ca5 1752
c3f829c1 1753 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
b8af0ca5
NB
1754 result = (_cpp_buff *) (base + len);
1755 result->base = base;
1756 result->cur = base;
1757 result->limit = base + len;
1758 result->next = NULL;
1759 return result;
1760}
1761
1762/* Place a chain of unwanted allocation buffers on the free list. */
1763void
6cf87ca4 1764_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
b8af0ca5
NB
1765{
1766 _cpp_buff *end = buff;
1767
1768 while (end->next)
1769 end = end->next;
1770 end->next = pfile->free_buffs;
1771 pfile->free_buffs = buff;
1772}
1773
1774/* Return a free buffer of size at least MIN_SIZE. */
1775_cpp_buff *
6cf87ca4 1776_cpp_get_buff (cpp_reader *pfile, size_t min_size)
b8af0ca5
NB
1777{
1778 _cpp_buff *result, **p;
1779
1780 for (p = &pfile->free_buffs;; p = &(*p)->next)
1781 {
6142088c 1782 size_t size;
1e013d2e
NB
1783
1784 if (*p == NULL)
b8af0ca5 1785 return new_buff (min_size);
1e013d2e
NB
1786 result = *p;
1787 size = result->limit - result->base;
1788 /* Return a buffer that's big enough, but don't waste one that's
1789 way too big. */
34f5271d 1790 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
1791 break;
1792 }
1793
1794 *p = result->next;
1795 result->next = NULL;
1796 result->cur = result->base;
1797 return result;
1798}
1799
4fe9b91c 1800/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1801 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1802 the excess bytes to the new buffer. Chains the new buffer after
1803 BUFF, and returns the new buffer. */
b8af0ca5 1804_cpp_buff *
6cf87ca4 1805_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
b8af0ca5 1806{
6142088c 1807 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 1808 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 1809
8c3b2693
NB
1810 buff->next = new_buff;
1811 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1812 return new_buff;
1813}
1814
4fe9b91c 1815/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1816 remaining bytes of the buffer pointed to by BUFF, and at least
1817 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1818 Chains the new buffer before the buffer pointed to by BUFF, and
1819 updates the pointer to point to the new buffer. */
1820void
6cf87ca4 1821_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
8c3b2693
NB
1822{
1823 _cpp_buff *new_buff, *old_buff = *pbuff;
1824 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1825
1826 new_buff = _cpp_get_buff (pfile, size);
1827 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1828 new_buff->next = old_buff;
1829 *pbuff = new_buff;
b8af0ca5
NB
1830}
1831
1832/* Free a chain of buffers starting at BUFF. */
1833void
5671bf27 1834_cpp_free_buff (_cpp_buff *buff)
b8af0ca5
NB
1835{
1836 _cpp_buff *next;
1837
1838 for (; buff; buff = next)
1839 {
1840 next = buff->next;
1841 free (buff->base);
1842 }
1843}
417f3e3a 1844
ece54d54
NB
1845/* Allocate permanent, unaligned storage of length LEN. */
1846unsigned char *
6cf87ca4 1847_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
ece54d54
NB
1848{
1849 _cpp_buff *buff = pfile->u_buff;
1850 unsigned char *result = buff->cur;
1851
1852 if (len > (size_t) (buff->limit - result))
1853 {
1854 buff = _cpp_get_buff (pfile, len);
1855 buff->next = pfile->u_buff;
1856 pfile->u_buff = buff;
1857 result = buff->cur;
1858 }
1859
1860 buff->cur = result + len;
1861 return result;
1862}
1863
87062813
NB
1864/* Allocate permanent, unaligned storage of length LEN from a_buff.
1865 That buffer is used for growing allocations when saving macro
1866 replacement lists in a #define, and when parsing an answer to an
1867 assertion in #assert, #unassert or #if (and therefore possibly
1868 whilst expanding macros). It therefore must not be used by any
1869 code that they might call: specifically the lexer and the guts of
1870 the macro expander.
1871
1872 All existing other uses clearly fit this restriction: storing
1873 registered pragmas during initialization. */
93c80368 1874unsigned char *
6cf87ca4 1875_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3fef5b2b 1876{
8c3b2693
NB
1877 _cpp_buff *buff = pfile->a_buff;
1878 unsigned char *result = buff->cur;
3fef5b2b 1879
8c3b2693 1880 if (len > (size_t) (buff->limit - result))
3fef5b2b 1881 {
8c3b2693
NB
1882 buff = _cpp_get_buff (pfile, len);
1883 buff->next = pfile->a_buff;
1884 pfile->a_buff = buff;
1885 result = buff->cur;
3fef5b2b 1886 }
041c3194 1887
8c3b2693 1888 buff->cur = result + len;
93c80368 1889 return result;
041c3194 1890}
d8044160
GK
1891
1892/* Say which field of TOK is in use. */
1893
1894enum cpp_token_fld_kind
1895cpp_token_val_index (cpp_token *tok)
1896{
1897 switch (TOKEN_SPELL (tok))
1898 {
1899 case SPELL_IDENT:
1900 return CPP_TOKEN_FLD_NODE;
1901 case SPELL_LITERAL:
1902 return CPP_TOKEN_FLD_STR;
aa508502
JM
1903 case SPELL_OPERATOR:
1904 if (tok->type == CPP_PASTE)
9a0c6187 1905 return CPP_TOKEN_FLD_TOKEN_NO;
aa508502
JM
1906 else
1907 return CPP_TOKEN_FLD_NONE;
d8044160
GK
1908 case SPELL_NONE:
1909 if (tok->type == CPP_MACRO_ARG)
1910 return CPP_TOKEN_FLD_ARG_NO;
1911 else if (tok->type == CPP_PADDING)
1912 return CPP_TOKEN_FLD_SOURCE;
21b11495 1913 else if (tok->type == CPP_PRAGMA)
bc4071dd 1914 return CPP_TOKEN_FLD_PRAGMA;
d8044160
GK
1915 /* else fall through */
1916 default:
1917 return CPP_TOKEN_FLD_NONE;
1918 }
1919}