]> git.ipfire.org Git - thirdparty/gcc.git/blame - libcpp/lex.c
2007-05-30 Russell Yanofsky <russ@yanofsky.org>
[thirdparty/gcc.git] / libcpp / lex.c
CommitLineData
0578f103 1/* CPP Library - lexical analysis.
3827dee5 2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
0578f103 3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
2656917a 20Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
0578f103 21
22#include "config.h"
23#include "system.h"
0578f103 24#include "cpplib.h"
d856c8a6 25#include "internal.h"
0578f103 26
79bd622b 27enum spell_type
241e762e 28{
79bd622b 29 SPELL_OPERATOR = 0,
79bd622b 30 SPELL_IDENT,
4970d4c2 31 SPELL_LITERAL,
79bd622b 32 SPELL_NONE
241e762e 33};
34
79bd622b 35struct token_spelling
241e762e 36{
79bd622b 37 enum spell_type category;
38 const unsigned char *name;
241e762e 39};
40
0ca849f9 41static const unsigned char *const digraph_spellings[] =
42{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
79bd622b 43
d6d3c909 44#define OP(e, s) { SPELL_OPERATOR, U s },
45#define TK(e, s) { SPELL_ ## s, U #e },
0ca849f9 46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
79bd622b 47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
e2f9a79f 52
f7fdd7a1 53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
f7fdd7a1 56static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59 unsigned int, enum cpp_ttype);
60static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61static int name_p (cpp_reader *, const cpp_string *);
f7fdd7a1 62static tokenrun *next_tokenrun (tokenrun *);
63
f7fdd7a1 64static _cpp_buff *new_buff (size_t);
bce8e0c0 65
e920deaf 66
f80e83a9 67/* Utility routine:
2c63d6c8 68
76faa4c0 69 Compares, the token TOKEN to the NUL-terminated string STRING.
70 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
f80e83a9 71int
f7fdd7a1 72cpp_ideq (const cpp_token *token, const char *string)
f80e83a9 73{
76faa4c0 74 if (token->type != CPP_NAME)
f80e83a9 75 return 0;
76faa4c0 76
b6d18b0a 77 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
bce8e0c0 78}
50fd6b48 79
a54e0bf8 80/* Record a note TYPE at byte POS into the current cleaned logical
81 line. */
1e0ef2fd 82static void
f7fdd7a1 83add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
338fa5f7 84{
a54e0bf8 85 if (buffer->notes_used == buffer->notes_cap)
86 {
87 buffer->notes_cap = buffer->notes_cap * 2 + 200;
720aca92 88 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89 buffer->notes_cap);
a54e0bf8 90 }
338fa5f7 91
a54e0bf8 92 buffer->notes[buffer->notes_used].pos = pos;
93 buffer->notes[buffer->notes_used].type = type;
94 buffer->notes_used++;
338fa5f7 95}
96
a54e0bf8 97/* Returns with a logical line that contains no escaped newlines or
98 trigraphs. This is a time-critical inner loop. */
99void
f7fdd7a1 100_cpp_clean_line (cpp_reader *pfile)
0578f103 101{
a54e0bf8 102 cpp_buffer *buffer;
103 const uchar *s;
104 uchar c, *d, *p;
1e0ef2fd 105
a54e0bf8 106 buffer = pfile->buffer;
107 buffer->cur_note = buffer->notes_used = 0;
108 buffer->cur = buffer->line_base = buffer->next_line;
109 buffer->need_line = false;
110 s = buffer->next_line - 1;
1e0ef2fd 111
a54e0bf8 112 if (!buffer->from_stage3)
0578f103 113 {
5008f5c5 114 const uchar *pbackslash = NULL;
115
54d3be91 116 /* Short circuit for the common case of an un-escaped line with
117 no trigraphs. The primary win here is by not writing any
118 data back to memory until we have to. */
119 for (;;)
120 {
121 c = *++s;
5008f5c5 122 if (__builtin_expect (c == '\n', false)
123 || __builtin_expect (c == '\r', false))
54d3be91 124 {
125 d = (uchar *) s;
126
5008f5c5 127 if (__builtin_expect (s == buffer->rlimit, false))
54d3be91 128 goto done;
129
130 /* DOS line ending? */
5008f5c5 131 if (__builtin_expect (c == '\r', false)
132 && s[1] == '\n')
133 {
134 s++;
135 if (s == buffer->rlimit)
136 goto done;
137 }
54d3be91 138
5008f5c5 139 if (__builtin_expect (pbackslash == NULL, true))
54d3be91 140 goto done;
141
5008f5c5 142 /* Check for escaped newline. */
54d3be91 143 p = d;
5008f5c5 144 while (is_nvspace (p[-1]))
54d3be91 145 p--;
5008f5c5 146 if (p - 1 != pbackslash)
54d3be91 147 goto done;
148
149 /* Have an escaped newline; process it and proceed to
150 the slow path. */
151 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
152 d = p - 2;
153 buffer->next_line = p - 1;
154 break;
155 }
5008f5c5 156 if (__builtin_expect (c == '\\', false))
157 pbackslash = s;
158 else if (__builtin_expect (c == '?', false)
159 && __builtin_expect (s[1] == '?', false)
160 && _cpp_trigraph_map[s[2]])
54d3be91 161 {
162 /* Have a trigraph. We may or may not have to convert
163 it. Add a line note regardless, for -Wtrigraphs. */
164 add_line_note (buffer, s, s[2]);
165 if (CPP_OPTION (pfile, trigraphs))
166 {
167 /* We do, and that means we have to switch to the
168 slow path. */
169 d = (uchar *) s;
170 *d = _cpp_trigraph_map[s[2]];
171 s += 2;
172 break;
173 }
174 }
175 }
176
a54e0bf8 177
178 for (;;)
4b912310 179 {
a54e0bf8 180 c = *++s;
181 *++d = c;
182
183 if (c == '\n' || c == '\r')
184 {
185 /* Handle DOS line endings. */
186 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
187 s++;
188 if (s == buffer->rlimit)
189 break;
190
191 /* Escaped? */
192 p = d;
193 while (p != buffer->next_line && is_nvspace (p[-1]))
194 p--;
195 if (p == buffer->next_line || p[-1] != '\\')
196 break;
197
aad4a87f 198 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
a54e0bf8 199 d = p - 2;
200 buffer->next_line = p - 1;
201 }
202 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
203 {
204 /* Add a note regardless, for the benefit of -Wtrigraphs. */
aad4a87f 205 add_line_note (buffer, d, s[2]);
a54e0bf8 206 if (CPP_OPTION (pfile, trigraphs))
207 {
208 *d = _cpp_trigraph_map[s[2]];
209 s += 2;
210 }
211 }
4b912310 212 }
0578f103 213 }
a54e0bf8 214 else
215 {
216 do
217 s++;
218 while (*s != '\n' && *s != '\r');
219 d = (uchar *) s;
220
221 /* Handle DOS line endings. */
222 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
223 s++;
224 }
338fa5f7 225
54d3be91 226 done:
a54e0bf8 227 *d = '\n';
aad4a87f 228 /* A sentinel note that should never be processed. */
229 add_line_note (buffer, d + 1, '\n');
a54e0bf8 230 buffer->next_line = s + 1;
0578f103 231}
232
3078f2b2 233/* Return true if the trigraph indicated by NOTE should be warned
234 about in a comment. */
235static bool
f7fdd7a1 236warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
3078f2b2 237{
238 const uchar *p;
239
240 /* Within comments we don't warn about trigraphs, unless the
241 trigraph forms an escaped newline, as that may change
7ef5b942 242 behavior. */
3078f2b2 243 if (note->type != '/')
244 return false;
245
246 /* If -trigraphs, then this was an escaped newline iff the next note
247 is coincident. */
248 if (CPP_OPTION (pfile, trigraphs))
249 return note[1].pos == note->pos;
250
251 /* Otherwise, see if this forms an escaped newline. */
252 p = note->pos + 3;
253 while (is_nvspace (*p))
254 p++;
255
256 /* There might have been escaped newlines between the trigraph and the
257 newline we found. Hence the position test. */
258 return (*p == '\n' && p < note[1].pos);
259}
260
a54e0bf8 261/* Process the notes created by add_line_note as far as the current
262 location. */
263void
f7fdd7a1 264_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
0578f103 265{
c808d026 266 cpp_buffer *buffer = pfile->buffer;
267
a54e0bf8 268 for (;;)
f80e83a9 269 {
a54e0bf8 270 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
271 unsigned int col;
396ffa86 272
a54e0bf8 273 if (note->pos > buffer->cur)
274 break;
396ffa86 275
a54e0bf8 276 buffer->cur_note++;
277 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
435fb09b 278
aad4a87f 279 if (note->type == '\\' || note->type == ' ')
a54e0bf8 280 {
aad4a87f 281 if (note->type == ' ' && !in_comment)
dbddc569 282 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
a54e0bf8 283 "backslash and newline separated by space");
aad4a87f 284
a54e0bf8 285 if (buffer->next_line > buffer->rlimit)
1e0ef2fd 286 {
dbddc569 287 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
a54e0bf8 288 "backslash-newline at end of file");
289 /* Prevent "no newline at end of file" warning. */
290 buffer->next_line = buffer->rlimit;
1e0ef2fd 291 }
a54e0bf8 292
293 buffer->line_base = note->pos;
610625e3 294 CPP_INCREMENT_LINE (pfile, 0);
338fa5f7 295 }
aad4a87f 296 else if (_cpp_trigraph_map[note->type])
297 {
3078f2b2 298 if (CPP_OPTION (pfile, warn_trigraphs)
299 && (!in_comment || warn_in_comment (pfile, note)))
aad4a87f 300 {
301 if (CPP_OPTION (pfile, trigraphs))
dbddc569 302 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
aad4a87f 303 "trigraph ??%c converted to %c",
304 note->type,
305 (int) _cpp_trigraph_map[note->type]);
306 else
1542b1ef 307 {
308 cpp_error_with_line
dbddc569 309 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1542b1ef 310 "trigraph ??%c ignored, use -trigraphs to enable",
311 note->type);
312 }
aad4a87f 313 }
314 }
315 else
316 abort ();
f80e83a9 317 }
0578f103 318}
319
338fa5f7 320/* Skip a C-style block comment. We find the end of the comment by
321 seeing if an asterisk is before every '/' we encounter. Returns
edaf8cb5 322 nonzero if comment terminated by EOF, zero otherwise.
323
324 Buffer->cur points to the initial asterisk of the comment. */
a54e0bf8 325bool
f7fdd7a1 326_cpp_skip_block_comment (cpp_reader *pfile)
0578f103 327{
f80e83a9 328 cpp_buffer *buffer = pfile->buffer;
54d3be91 329 const uchar *cur = buffer->cur;
330 uchar c;
338fa5f7 331
54d3be91 332 cur++;
333 if (*cur == '/')
334 cur++;
338fa5f7 335
a54e0bf8 336 for (;;)
337 {
338fa5f7 338 /* People like decorating comments with '*', so check for '/'
339 instead for efficiency. */
54d3be91 340 c = *cur++;
341
f80e83a9 342 if (c == '/')
0578f103 343 {
54d3be91 344 if (cur[-2] == '*')
338fa5f7 345 break;
f80e83a9 346
338fa5f7 347 /* Warn about potential nested comments, but not if the '/'
3fb1e43b 348 comes immediately before the true comment delimiter.
f80e83a9 349 Don't bother to get it right across escaped newlines. */
338fa5f7 350 if (CPP_OPTION (pfile, warn_comments)
54d3be91 351 && cur[0] == '*' && cur[1] != '/')
352 {
353 buffer->cur = cur;
d80d2074 354 cpp_error_with_line (pfile, CPP_DL_WARNING,
dbddc569 355 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
54d3be91 356 "\"/*\" within comment");
357 }
0578f103 358 }
a54e0bf8 359 else if (c == '\n')
360 {
610625e3 361 unsigned int cols;
54d3be91 362 buffer->cur = cur - 1;
a54e0bf8 363 _cpp_process_line_notes (pfile, true);
364 if (buffer->next_line >= buffer->rlimit)
365 return true;
366 _cpp_clean_line (pfile);
610625e3 367
368 cols = buffer->next_line - buffer->line_base;
369 CPP_INCREMENT_LINE (pfile, cols);
370
54d3be91 371 cur = buffer->cur;
a54e0bf8 372 }
0578f103 373 }
f80e83a9 374
54d3be91 375 buffer->cur = cur;
3078f2b2 376 _cpp_process_line_notes (pfile, true);
a54e0bf8 377 return false;
0578f103 378}
379
1c124f85 380/* Skip a C++ line comment, leaving buffer->cur pointing to the
d10cfa8d 381 terminating newline. Handles escaped newlines. Returns nonzero
1c124f85 382 if a multiline comment. */
f80e83a9 383static int
f7fdd7a1 384skip_line_comment (cpp_reader *pfile)
0578f103 385{
f669338a 386 cpp_buffer *buffer = pfile->buffer;
dbddc569 387 unsigned int orig_line = pfile->line_table->highest_line;
f80e83a9 388
a54e0bf8 389 while (*buffer->cur != '\n')
390 buffer->cur++;
1c124f85 391
a54e0bf8 392 _cpp_process_line_notes (pfile, true);
dbddc569 393 return orig_line != pfile->line_table->highest_line;
f80e83a9 394}
0578f103 395
a54e0bf8 396/* Skips whitespace, saving the next non-whitespace character. */
b86584f6 397static void
f7fdd7a1 398skip_whitespace (cpp_reader *pfile, cppchar_t c)
f80e83a9 399{
400 cpp_buffer *buffer = pfile->buffer;
fe9eb18b 401 bool saw_NUL = false;
0578f103 402
338fa5f7 403 do
f80e83a9 404 {
78719282 405 /* Horizontal space always OK. */
a54e0bf8 406 if (c == ' ' || c == '\t')
338fa5f7 407 ;
338fa5f7 408 /* Just \f \v or \0 left. */
78719282 409 else if (c == '\0')
fe9eb18b 410 saw_NUL = true;
79bd622b 411 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
dbddc569 412 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
73328dce 413 CPP_BUF_COL (buffer),
414 "%s in preprocessing directive",
415 c == '\f' ? "form feed" : "vertical tab");
338fa5f7 416
338fa5f7 417 c = *buffer->cur++;
0578f103 418 }
2c0e001b 419 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
338fa5f7 420 while (is_nvspace (c));
421
fe9eb18b 422 if (saw_NUL)
d80d2074 423 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
fe9eb18b 424
1c124f85 425 buffer->cur--;
f80e83a9 426}
0578f103 427
79bd622b 428/* See if the characters of a number token are valid in a name (no
429 '.', '+' or '-'). */
430static int
f7fdd7a1 431name_p (cpp_reader *pfile, const cpp_string *string)
79bd622b 432{
433 unsigned int i;
434
435 for (i = 0; i < string->len; i++)
436 if (!is_idchar (string->text[i]))
437 return 0;
438
b1a9ff83 439 return 1;
79bd622b 440}
441
bce47149 442/* After parsing an identifier or other sequence, produce a warning about
443 sequences not in NFC/NFKC. */
444static void
445warn_about_normalization (cpp_reader *pfile,
446 const cpp_token *token,
447 const struct normalize_state *s)
448{
449 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
450 && !pfile->state.skipping)
451 {
452 /* Make sure that the token is printed using UCNs, even
453 if we'd otherwise happily print UTF-8. */
720aca92 454 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
bce47149 455 size_t sz;
456
457 sz = cpp_spell_token (pfile, token, buf, false) - buf;
458 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
459 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
3827dee5 460 "`%.*s' is not in NFKC", (int) sz, buf);
bce47149 461 else
462 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
3827dee5 463 "`%.*s' is not in NFC", (int) sz, buf);
bce47149 464 }
465}
466
5bb46c08 467/* Returns TRUE if the sequence starting at buffer->cur is invalid in
2cbf1359 468 an identifier. FIRST is TRUE if this starts an identifier. */
5bb46c08 469static bool
bce47149 470forms_identifier_p (cpp_reader *pfile, int first,
471 struct normalize_state *state)
5bb46c08 472{
2cbf1359 473 cpp_buffer *buffer = pfile->buffer;
474
475 if (*buffer->cur == '$')
476 {
477 if (!CPP_OPTION (pfile, dollars_in_ident))
478 return false;
479
480 buffer->cur++;
f0c2775b 481 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
2cbf1359 482 {
f0c2775b 483 CPP_OPTION (pfile, warn_dollars) = 0;
d80d2074 484 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
2cbf1359 485 }
486
487 return true;
488 }
5bb46c08 489
2cbf1359 490 /* Is this a syntactically valid UCN? */
865c4e44 491 if (CPP_OPTION (pfile, extended_identifiers)
4e9d1e6d 492 && *buffer->cur == '\\'
2cbf1359 493 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
5bb46c08 494 {
2cbf1359 495 buffer->cur += 2;
bce47149 496 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
497 state))
2cbf1359 498 return true;
499 buffer->cur -= 2;
5bb46c08 500 }
5bb46c08 501
2cbf1359 502 return false;
5bb46c08 503}
504
505/* Lex an identifier starting at BUFFER->CUR - 1. */
338fa5f7 506static cpp_hashnode *
bce47149 507lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
508 struct normalize_state *nst)
0578f103 509{
79bd622b 510 cpp_hashnode *result;
bb1fa6bb 511 const uchar *cur;
3eb3f293 512 unsigned int len;
513 unsigned int hash = HT_HASHSTEP (0, *base);
66a5287e 514
3eb3f293 515 cur = pfile->buffer->cur;
bb1fa6bb 516 if (! starts_ucn)
517 while (ISIDNUM (*cur))
518 {
519 hash = HT_HASHSTEP (hash, *cur);
520 cur++;
521 }
522 pfile->buffer->cur = cur;
bce47149 523 if (starts_ucn || forms_identifier_p (pfile, false, nst))
78a11351 524 {
bb1fa6bb 525 /* Slower version for identifiers containing UCNs (or $). */
526 do {
527 while (ISIDNUM (*pfile->buffer->cur))
bce47149 528 {
529 pfile->buffer->cur++;
530 NORMALIZE_STATE_UPDATE_IDNUM (nst);
531 }
532 } while (forms_identifier_p (pfile, false, nst));
bb1fa6bb 533 result = _cpp_interpret_identifier (pfile, base,
534 pfile->buffer->cur - base);
66a5287e 535 }
bb1fa6bb 536 else
537 {
538 len = cur - base;
539 hash = HT_HASHFINISH (hash, len);
5bb46c08 540
bb1fa6bb 541 result = (cpp_hashnode *)
542 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
543 }
66a5287e 544
5bb46c08 545 /* Rarely, identifiers require diagnostics when lexed. */
66a5287e 546 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
547 && !pfile->state.skipping, 0))
548 {
549 /* It is allowed to poison the same identifier twice. */
550 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
d80d2074 551 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
66a5287e 552 NODE_NAME (result));
553
554 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
555 replacement list of a variadic macro. */
556 if (result == pfile->spec_nodes.n__VA_ARGS__
557 && !pfile->state.va_args_ok)
d80d2074 558 cpp_error (pfile, CPP_DL_PEDWARN,
f7fdd7a1 559 "__VA_ARGS__ can only appear in the expansion"
560 " of a C99 variadic macro");
66a5287e 561 }
562
563 return result;
564}
565
5bb46c08 566/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
0578f103 567static void
bce47149 568lex_number (cpp_reader *pfile, cpp_string *number,
569 struct normalize_state *nst)
0578f103 570{
b6d18b0a 571 const uchar *cur;
5bb46c08 572 const uchar *base;
573 uchar *dest;
0578f103 574
5bb46c08 575 base = pfile->buffer->cur - 1;
576 do
f80e83a9 577 {
5bb46c08 578 cur = pfile->buffer->cur;
338fa5f7 579
5bb46c08 580 /* N.B. ISIDNUM does not include $. */
581 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
bce47149 582 {
583 cur++;
584 NORMALIZE_STATE_UPDATE_IDNUM (nst);
585 }
0578f103 586
78a11351 587 pfile->buffer->cur = cur;
0578f103 588 }
bce47149 589 while (forms_identifier_p (pfile, false, nst));
79bd622b 590
5bb46c08 591 number->len = cur - base;
592 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
593 memcpy (dest, base, number->len);
594 dest[number->len] = '\0';
595 number->text = dest;
79bd622b 596}
597
4970d4c2 598/* Create a token of type TYPE with a literal spelling. */
599static void
f7fdd7a1 600create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
601 unsigned int len, enum cpp_ttype type)
4970d4c2 602{
603 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
604
605 memcpy (dest, base, len);
606 dest[len] = '\0';
607 token->type = type;
608 token->val.str.len = len;
609 token->val.str.text = dest;
610}
611
5bb46c08 612/* Lexes a string, character constant, or angle-bracketed header file
4970d4c2 613 name. The stored string contains the spelling, including opening
614 quote and leading any leading 'L'. It returns the type of the
615 literal, or CPP_OTHER if it was not properly terminated.
616
617 The spelling is NUL-terminated, but it is not guaranteed that this
618 is the first NUL since embedded NULs are preserved. */
f80e83a9 619static void
f7fdd7a1 620lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
0578f103 621{
4970d4c2 622 bool saw_NUL = false;
623 const uchar *cur;
5bb46c08 624 cppchar_t terminator;
4970d4c2 625 enum cpp_ttype type;
626
627 cur = base;
628 terminator = *cur++;
629 if (terminator == 'L')
630 terminator = *cur++;
631 if (terminator == '\"')
632 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
633 else if (terminator == '\'')
634 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
635 else
636 terminator = '>', type = CPP_HEADER_NAME;
79bd622b 637
338fa5f7 638 for (;;)
0578f103 639 {
4970d4c2 640 cppchar_t c = *cur++;
4b0c16ee 641
edaf8cb5 642 /* In #include-style directives, terminators are not escapable. */
4970d4c2 643 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
644 cur++;
645 else if (c == terminator)
5bb46c08 646 break;
4970d4c2 647 else if (c == '\n')
338fa5f7 648 {
4970d4c2 649 cur--;
650 type = CPP_OTHER;
651 break;
0578f103 652 }
4970d4c2 653 else if (c == '\0')
654 saw_NUL = true;
0578f103 655 }
656
4970d4c2 657 if (saw_NUL && !pfile->state.skipping)
d80d2074 658 cpp_error (pfile, CPP_DL_WARNING,
659 "null character(s) preserved in literal");
0578f103 660
0b67f687 661 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
662 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
663 (int) terminator);
664
4970d4c2 665 pfile->buffer->cur = cur;
666 create_literal (pfile, token, base, cur - base, type);
338fa5f7 667}
f80e83a9 668
79bd622b 669/* The stored comment includes the comment start and any terminator. */
2c63d6c8 670static void
f7fdd7a1 671save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
672 cppchar_t type)
2c63d6c8 673{
f80e83a9 674 unsigned char *buffer;
d3f7919d 675 unsigned int len, clen;
b1a9ff83 676
f0495c2c 677 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1c124f85 678
a543b315 679 /* C++ comments probably (not definitely) have moved past a new
680 line, which we don't want to save in the comment. */
1c124f85 681 if (is_vspace (pfile->buffer->cur[-1]))
a543b315 682 len--;
d3f7919d 683
684 /* If we are currently in a directive, then we need to store all
685 C++ comments as C comments internally, and so we need to
686 allocate a little extra space in that case.
687
688 Note that the only time we encounter a directive here is
689 when we are saving comments in a "#define". */
690 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
691
692 buffer = _cpp_unaligned_alloc (pfile, clen);
b1a9ff83 693
f80e83a9 694 token->type = CPP_COMMENT;
d3f7919d 695 token->val.str.len = clen;
338fa5f7 696 token->val.str.text = buffer;
0578f103 697
f0495c2c 698 buffer[0] = '/';
699 memcpy (buffer + 1, from, len - 1);
d3f7919d 700
a113df96 701 /* Finish conversion to a C comment, if necessary. */
d3f7919d 702 if (pfile->state.in_directive && type == '/')
703 {
704 buffer[1] = '*';
705 buffer[clen - 2] = '*';
706 buffer[clen - 1] = '/';
707 }
338fa5f7 708}
0578f103 709
83dcbb5c 710/* Allocate COUNT tokens for RUN. */
711void
f7fdd7a1 712_cpp_init_tokenrun (tokenrun *run, unsigned int count)
83dcbb5c 713{
3b298764 714 run->base = XNEWVEC (cpp_token, count);
83dcbb5c 715 run->limit = run->base + count;
716 run->next = NULL;
717}
718
719/* Returns the next tokenrun, or creates one if there is none. */
720static tokenrun *
f7fdd7a1 721next_tokenrun (tokenrun *run)
83dcbb5c 722{
723 if (run->next == NULL)
724 {
3b298764 725 run->next = XNEW (tokenrun);
fb5ab82c 726 run->next->prev = run;
83dcbb5c 727 _cpp_init_tokenrun (run->next, 250);
728 }
729
730 return run->next;
731}
732
f9b5f742 733/* Allocate a single token that is invalidated at the same time as the
734 rest of the tokens on the line. Has its line and col set to the
735 same as the last lexed token, so that diagnostics appear in the
736 right place. */
737cpp_token *
f7fdd7a1 738_cpp_temp_token (cpp_reader *pfile)
f9b5f742 739{
740 cpp_token *old, *result;
741
742 old = pfile->cur_token - 1;
743 if (pfile->cur_token == pfile->cur_run->limit)
744 {
745 pfile->cur_run = next_tokenrun (pfile->cur_run);
746 pfile->cur_token = pfile->cur_run->base;
747 }
748
749 result = pfile->cur_token++;
610625e3 750 result->src_loc = old->src_loc;
f9b5f742 751 return result;
752}
753
10b4496a 754/* Lex a token into RESULT (external interface). Takes care of issues
755 like directive handling, token lookahead, multiple include
3fb1e43b 756 optimization and skipping. */
c00e481c 757const cpp_token *
f7fdd7a1 758_cpp_lex_token (cpp_reader *pfile)
83dcbb5c 759{
fb5ab82c 760 cpp_token *result;
83dcbb5c 761
fb5ab82c 762 for (;;)
83dcbb5c 763 {
fb5ab82c 764 if (pfile->cur_token == pfile->cur_run->limit)
83dcbb5c 765 {
fb5ab82c 766 pfile->cur_run = next_tokenrun (pfile->cur_run);
767 pfile->cur_token = pfile->cur_run->base;
83dcbb5c 768 }
e0ff7935 769 /* We assume that the current token is somewhere in the current
770 run. */
771 if (pfile->cur_token < pfile->cur_run->base
772 || pfile->cur_token >= pfile->cur_run->limit)
773 abort ();
83dcbb5c 774
fb5ab82c 775 if (pfile->lookaheads)
10b4496a 776 {
777 pfile->lookaheads--;
778 result = pfile->cur_token++;
779 }
fb5ab82c 780 else
10b4496a 781 result = _cpp_lex_direct (pfile);
fb5ab82c 782
783 if (result->flags & BOL)
83dcbb5c 784 {
fb5ab82c 785 /* Is this a directive. If _cpp_handle_directive returns
786 false, it is an assembler #. */
787 if (result->type == CPP_HASH
d6af0368 788 /* 6.10.3 p 11: Directives in a list of macro arguments
789 gives undefined behavior. This implementation
790 handles the directive as normal. */
b75b98aa 791 && pfile->state.parsing_args != 1)
d6d3c909 792 {
b75b98aa 793 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
d6d3c909 794 {
b75b98aa 795 if (pfile->directive_result.type == CPP_PADDING)
796 continue;
d6d3c909 797 result = &pfile->directive_result;
d6d3c909 798 }
799 }
b75b98aa 800 else if (pfile->state.in_deferred_pragma)
801 result = &pfile->directive_result;
d6d3c909 802
5621a364 803 if (pfile->cb.line_change && !pfile->state.skipping)
f7fdd7a1 804 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
83dcbb5c 805 }
83dcbb5c 806
fb5ab82c 807 /* We don't skip tokens in directives. */
b75b98aa 808 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
fb5ab82c 809 break;
83dcbb5c 810
fb5ab82c 811 /* Outside a directive, invalidate controlling macros. At file
10b4496a 812 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
7ef5b942 813 get here and MI optimization works. */
83dcbb5c 814 pfile->mi_valid = false;
fb5ab82c 815
816 if (!pfile->state.skipping || result->type == CPP_EOF)
817 break;
83dcbb5c 818 }
819
c00e481c 820 return result;
83dcbb5c 821}
822
a54e0bf8 823/* Returns true if a fresh line has been loaded. */
824bool
f7fdd7a1 825_cpp_get_fresh_line (cpp_reader *pfile)
0bb65704 826{
6e04daf1 827 int return_at_eof;
828
a54e0bf8 829 /* We can't get a new line until we leave the current directive. */
830 if (pfile->state.in_directive)
831 return false;
b1a9ff83 832
a54e0bf8 833 for (;;)
fb83e0d6 834 {
a54e0bf8 835 cpp_buffer *buffer = pfile->buffer;
fb83e0d6 836
a54e0bf8 837 if (!buffer->need_line)
838 return true;
839
840 if (buffer->next_line < buffer->rlimit)
0bb65704 841 {
a54e0bf8 842 _cpp_clean_line (pfile);
843 return true;
844 }
0bb65704 845
a54e0bf8 846 /* First, get out of parsing arguments state. */
847 if (pfile->state.parsing_args)
848 return false;
849
850 /* End of buffer. Non-empty files should end in a newline. */
851 if (buffer->buf != buffer->rlimit
852 && buffer->next_line > buffer->rlimit
853 && !buffer->from_stage3)
854 {
855 /* Only warn once. */
856 buffer->next_line = buffer->rlimit;
dbddc569 857 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
a54e0bf8 858 CPP_BUF_COLUMN (buffer, buffer->cur),
859 "no newline at end of file");
860 }
6e04daf1 861
862 return_at_eof = buffer->return_at_eof;
a54e0bf8 863 _cpp_pop_buffer (pfile);
6e04daf1 864 if (pfile->buffer == NULL || return_at_eof)
11b5269c 865 return false;
a54e0bf8 866 }
0bb65704 867}
868
edaf8cb5 869#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
870 do \
871 { \
872 result->type = ELSE_TYPE; \
873 if (*buffer->cur == CHAR) \
874 buffer->cur++, result->type = THEN_TYPE; \
875 } \
876 while (0)
1c124f85 877
10b4496a 878/* Lex a token into pfile->cur_token, which is also incremented, to
879 get diagnostics pointing to the correct location.
880
881 Does not handle issues such as token lookahead, multiple-include
4172d65e 882 optimization, directives, skipping etc. This function is only
10b4496a 883 suitable for use by _cpp_lex_token, and in special cases like
884 lex_expansion_token which doesn't care for any of these issues.
885
886 When meeting a newline, returns CPP_EOF if parsing a directive,
887 otherwise returns to the start of the token buffer if permissible.
888 Returns the location of the lexed token. */
889cpp_token *
f7fdd7a1 890_cpp_lex_direct (cpp_reader *pfile)
0578f103 891{
338fa5f7 892 cppchar_t c;
230f0943 893 cpp_buffer *buffer;
338fa5f7 894 const unsigned char *comment_start;
10b4496a 895 cpp_token *result = pfile->cur_token++;
0653b94e 896
83dcbb5c 897 fresh_line:
a54e0bf8 898 result->flags = 0;
82166c5c 899 buffer = pfile->buffer;
11b5269c 900 if (buffer->need_line)
a54e0bf8 901 {
b75b98aa 902 if (pfile->state.in_deferred_pragma)
903 {
904 result->type = CPP_PRAGMA_EOL;
905 pfile->state.in_deferred_pragma = false;
906 if (!pfile->state.pragma_allow_expansion)
907 pfile->state.prevent_expansion--;
908 return result;
909 }
a54e0bf8 910 if (!_cpp_get_fresh_line (pfile))
911 {
912 result->type = CPP_EOF;
2908f819 913 if (!pfile->state.in_directive)
914 {
915 /* Tell the compiler the line number of the EOF token. */
dbddc569 916 result->src_loc = pfile->line_table->highest_line;
2908f819 917 result->flags = BOL;
918 }
a54e0bf8 919 return result;
920 }
921 if (!pfile->keep_tokens)
922 {
923 pfile->cur_run = &pfile->base_run;
924 result = pfile->base_run.base;
925 pfile->cur_token = result + 1;
926 }
927 result->flags = BOL;
928 if (pfile->state.parsing_args == 2)
929 result->flags |= PREV_WHITE;
930 }
11b5269c 931 buffer = pfile->buffer;
83dcbb5c 932 update_tokens_line:
dbddc569 933 result->src_loc = pfile->line_table->highest_line;
f80e83a9 934
83dcbb5c 935 skipped_white:
a54e0bf8 936 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
937 && !pfile->overlaid_buffer)
938 {
939 _cpp_process_line_notes (pfile, false);
dbddc569 940 result->src_loc = pfile->line_table->highest_line;
a54e0bf8 941 }
1c124f85 942 c = *buffer->cur++;
610625e3 943
dbddc569 944 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
945 CPP_BUF_COLUMN (buffer, buffer->cur));
83dcbb5c 946
338fa5f7 947 switch (c)
0578f103 948 {
435fb09b 949 case ' ': case '\t': case '\f': case '\v': case '\0':
950 result->flags |= PREV_WHITE;
a54e0bf8 951 skip_whitespace (pfile, c);
952 goto skipped_white;
338fa5f7 953
a54e0bf8 954 case '\n':
610625e3 955 if (buffer->cur < buffer->rlimit)
956 CPP_INCREMENT_LINE (pfile, 0);
a54e0bf8 957 buffer->need_line = true;
958 goto fresh_line;
732cb4c9 959
338fa5f7 960 case '0': case '1': case '2': case '3': case '4':
961 case '5': case '6': case '7': case '8': case '9':
bce47149 962 {
963 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
964 result->type = CPP_NUMBER;
965 lex_number (pfile, &result->val.str, &nst);
966 warn_about_normalization (pfile, result, &nst);
967 break;
968 }
732cb4c9 969
78c551ad 970 case 'L':
971 /* 'L' may introduce wide characters or strings. */
5bb46c08 972 if (*buffer->cur == '\'' || *buffer->cur == '"')
973 {
4970d4c2 974 lex_string (pfile, result, buffer->cur - 1);
5bb46c08 975 break;
976 }
b1a9ff83 977 /* Fall through. */
78c551ad 978
338fa5f7 979 case '_':
980 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
981 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
982 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
983 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
984 case 'y': case 'z':
985 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
78c551ad 986 case 'G': case 'H': case 'I': case 'J': case 'K':
338fa5f7 987 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
988 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
989 case 'Y': case 'Z':
990 result->type = CPP_NAME;
bce47149 991 {
992 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
993 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
994 &nst);
995 warn_about_normalization (pfile, result, &nst);
996 }
338fa5f7 997
338fa5f7 998 /* Convert named operators to their proper types. */
78c551ad 999 if (result->val.node->flags & NODE_OPERATOR)
338fa5f7 1000 {
1001 result->flags |= NAMED_OP;
720aca92 1002 result->type = (enum cpp_ttype) result->val.node->directive_index;
338fa5f7 1003 }
1004 break;
1005
1006 case '\'':
1007 case '"':
4970d4c2 1008 lex_string (pfile, result, buffer->cur - 1);
338fa5f7 1009 break;
f80e83a9 1010
338fa5f7 1011 case '/':
f0495c2c 1012 /* A potential block or line comment. */
1013 comment_start = buffer->cur;
edaf8cb5 1014 c = *buffer->cur;
1015
f0495c2c 1016 if (c == '*')
1017 {
a54e0bf8 1018 if (_cpp_skip_block_comment (pfile))
d80d2074 1019 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
338fa5f7 1020 }
1c124f85 1021 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
610625e3 1022 || cpp_in_system_header (pfile)))
338fa5f7 1023 {
5db5d057 1024 /* Warn about comments only if pedantically GNUC89, and not
1025 in system headers. */
1026 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
66914e49 1027 && ! buffer->warned_cplusplus_comments)
f80e83a9 1028 {
d80d2074 1029 cpp_error (pfile, CPP_DL_PEDWARN,
ba059ac0 1030 "C++ style comments are not allowed in ISO C90");
d80d2074 1031 cpp_error (pfile, CPP_DL_PEDWARN,
73328dce 1032 "(this will be reported only once per input file)");
f0495c2c 1033 buffer->warned_cplusplus_comments = 1;
1034 }
338fa5f7 1035
e1caf668 1036 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
d80d2074 1037 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
f0495c2c 1038 }
1c124f85 1039 else if (c == '=')
1040 {
edaf8cb5 1041 buffer->cur++;
1c124f85 1042 result->type = CPP_DIV_EQ;
1043 break;
1044 }
1045 else
1046 {
1c124f85 1047 result->type = CPP_DIV;
1048 break;
1049 }
338fa5f7 1050
f0495c2c 1051 if (!pfile->state.save_comments)
1052 {
1053 result->flags |= PREV_WHITE;
83dcbb5c 1054 goto update_tokens_line;
338fa5f7 1055 }
f0495c2c 1056
1057 /* Save the comment as a token in its own right. */
d3f7919d 1058 save_comment (pfile, result, comment_start, c);
fb5ab82c 1059 break;
338fa5f7 1060
1061 case '<':
1062 if (pfile->state.angled_headers)
1063 {
4970d4c2 1064 lex_string (pfile, result, buffer->cur - 1);
1c124f85 1065 break;
338fa5f7 1066 }
0578f103 1067
edaf8cb5 1068 result->type = CPP_LESS;
1069 if (*buffer->cur == '=')
1070 buffer->cur++, result->type = CPP_LESS_EQ;
1071 else if (*buffer->cur == '<')
338fa5f7 1072 {
edaf8cb5 1073 buffer->cur++;
1074 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
338fa5f7 1075 }
edaf8cb5 1076 else if (CPP_OPTION (pfile, digraphs))
1c124f85 1077 {
edaf8cb5 1078 if (*buffer->cur == ':')
1079 {
1080 buffer->cur++;
1081 result->flags |= DIGRAPH;
1082 result->type = CPP_OPEN_SQUARE;
1083 }
1084 else if (*buffer->cur == '%')
1085 {
1086 buffer->cur++;
1087 result->flags |= DIGRAPH;
1088 result->type = CPP_OPEN_BRACE;
1089 }
1c124f85 1090 }
338fa5f7 1091 break;
1092
1093 case '>':
edaf8cb5 1094 result->type = CPP_GREATER;
1095 if (*buffer->cur == '=')
1096 buffer->cur++, result->type = CPP_GREATER_EQ;
1097 else if (*buffer->cur == '>')
338fa5f7 1098 {
edaf8cb5 1099 buffer->cur++;
1100 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1101 }
338fa5f7 1102 break;
1103
f669338a 1104 case '%':
edaf8cb5 1105 result->type = CPP_MOD;
1106 if (*buffer->cur == '=')
1107 buffer->cur++, result->type = CPP_MOD_EQ;
1108 else if (CPP_OPTION (pfile, digraphs))
1c124f85 1109 {
edaf8cb5 1110 if (*buffer->cur == ':')
1c124f85 1111 {
edaf8cb5 1112 buffer->cur++;
1113 result->flags |= DIGRAPH;
1114 result->type = CPP_HASH;
1115 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1116 buffer->cur += 2, result->type = CPP_PASTE;
1117 }
1118 else if (*buffer->cur == '>')
1119 {
1120 buffer->cur++;
1121 result->flags |= DIGRAPH;
1122 result->type = CPP_CLOSE_BRACE;
1c124f85 1123 }
1c124f85 1124 }
338fa5f7 1125 break;
1126
f669338a 1127 case '.':
1c124f85 1128 result->type = CPP_DOT;
edaf8cb5 1129 if (ISDIGIT (*buffer->cur))
1c124f85 1130 {
bce47149 1131 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1c124f85 1132 result->type = CPP_NUMBER;
bce47149 1133 lex_number (pfile, &result->val.str, &nst);
1134 warn_about_normalization (pfile, result, &nst);
1c124f85 1135 }
edaf8cb5 1136 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1137 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1138 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1139 buffer->cur++, result->type = CPP_DOT_STAR;
338fa5f7 1140 break;
0578f103 1141
338fa5f7 1142 case '+':
edaf8cb5 1143 result->type = CPP_PLUS;
1144 if (*buffer->cur == '+')
1145 buffer->cur++, result->type = CPP_PLUS_PLUS;
1146 else if (*buffer->cur == '=')
1147 buffer->cur++, result->type = CPP_PLUS_EQ;
338fa5f7 1148 break;
ac0749c7 1149
338fa5f7 1150 case '-':
edaf8cb5 1151 result->type = CPP_MINUS;
1152 if (*buffer->cur == '>')
338fa5f7 1153 {
edaf8cb5 1154 buffer->cur++;
1c124f85 1155 result->type = CPP_DEREF;
edaf8cb5 1156 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1157 buffer->cur++, result->type = CPP_DEREF_STAR;
1c124f85 1158 }
edaf8cb5 1159 else if (*buffer->cur == '-')
1160 buffer->cur++, result->type = CPP_MINUS_MINUS;
1161 else if (*buffer->cur == '=')
1162 buffer->cur++, result->type = CPP_MINUS_EQ;
338fa5f7 1163 break;
0578f103 1164
338fa5f7 1165 case '&':
edaf8cb5 1166 result->type = CPP_AND;
1167 if (*buffer->cur == '&')
1168 buffer->cur++, result->type = CPP_AND_AND;
1169 else if (*buffer->cur == '=')
1170 buffer->cur++, result->type = CPP_AND_EQ;
338fa5f7 1171 break;
b1a9ff83 1172
338fa5f7 1173 case '|':
edaf8cb5 1174 result->type = CPP_OR;
1175 if (*buffer->cur == '|')
1176 buffer->cur++, result->type = CPP_OR_OR;
1177 else if (*buffer->cur == '=')
1178 buffer->cur++, result->type = CPP_OR_EQ;
338fa5f7 1179 break;
0578f103 1180
338fa5f7 1181 case ':':
edaf8cb5 1182 result->type = CPP_COLON;
1183 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1184 buffer->cur++, result->type = CPP_SCOPE;
1185 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
338fa5f7 1186 {
edaf8cb5 1187 buffer->cur++;
338fa5f7 1188 result->flags |= DIGRAPH;
1c124f85 1189 result->type = CPP_CLOSE_SQUARE;
1190 }
338fa5f7 1191 break;
0578f103 1192
1c124f85 1193 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1194 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1195 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1196 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1197 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1198
a54e0bf8 1199 case '?': result->type = CPP_QUERY; break;
338fa5f7 1200 case '~': result->type = CPP_COMPL; break;
1201 case ',': result->type = CPP_COMMA; break;
1202 case '(': result->type = CPP_OPEN_PAREN; break;
1203 case ')': result->type = CPP_CLOSE_PAREN; break;
1204 case '[': result->type = CPP_OPEN_SQUARE; break;
1205 case ']': result->type = CPP_CLOSE_SQUARE; break;
1206 case '{': result->type = CPP_OPEN_BRACE; break;
1207 case '}': result->type = CPP_CLOSE_BRACE; break;
1208 case ';': result->type = CPP_SEMICOLON; break;
1209
7fd957fe 1210 /* @ is a punctuator in Objective-C. */
9ee99ac6 1211 case '@': result->type = CPP_ATSIGN; break;
338fa5f7 1212
78c551ad 1213 case '$':
2cbf1359 1214 case '\\':
1215 {
1216 const uchar *base = --buffer->cur;
bce47149 1217 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
78c551ad 1218
bce47149 1219 if (forms_identifier_p (pfile, true, &nst))
2cbf1359 1220 {
1221 result->type = CPP_NAME;
bce47149 1222 result->val.node = lex_identifier (pfile, base, true, &nst);
1223 warn_about_normalization (pfile, result, &nst);
2cbf1359 1224 break;
1225 }
1226 buffer->cur++;
bc205914 1227 }
2cbf1359 1228
bc205914 1229 default:
4970d4c2 1230 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1231 break;
338fa5f7 1232 }
fb5ab82c 1233
1234 return result;
338fa5f7 1235}
1236
b1280514 1237/* An upper bound on the number of bytes needed to spell TOKEN.
1238 Does not include preceding whitespace. */
79bd622b 1239unsigned int
f7fdd7a1 1240cpp_token_len (const cpp_token *token)
338fa5f7 1241{
79bd622b 1242 unsigned int len;
cfad5579 1243
79bd622b 1244 switch (TOKEN_SPELL (token))
f80e83a9 1245 {
b1280514 1246 default: len = 4; break;
4970d4c2 1247 case SPELL_LITERAL: len = token->val.str.len; break;
bb1fa6bb 1248 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
f80e83a9 1249 }
b1280514 1250
1251 return len;
cfad5579 1252}
1253
bb1fa6bb 1254/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1255 Return the number of bytes read out of NAME. (There are always
1256 10 bytes written to BUFFER.) */
1257
1258static size_t
1259utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1260{
1261 int j;
1262 int ucn_len = 0;
1263 int ucn_len_c;
1264 unsigned t;
1265 unsigned long utf32;
1266
1267 /* Compute the length of the UTF-8 sequence. */
1268 for (t = *name; t & 0x80; t <<= 1)
1269 ucn_len++;
1270
1271 utf32 = *name & (0x7F >> ucn_len);
1272 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1273 {
1274 utf32 = (utf32 << 6) | (*++name & 0x3F);
1275
1276 /* Ill-formed UTF-8. */
1277 if ((*name & ~0x3F) != 0x80)
1278 abort ();
1279 }
1280
1281 *buffer++ = '\\';
1282 *buffer++ = 'U';
1283 for (j = 7; j >= 0; j--)
1284 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1285 return ucn_len;
1286}
1287
1288
f80e83a9 1289/* Write the spelling of a token TOKEN to BUFFER. The buffer must
c5ea33a8 1290 already contain the enough space to hold the token's spelling.
f7fdd7a1 1291 Returns a pointer to the character after the last character written.
bb1fa6bb 1292 FORSTRING is true if this is to be the spelling after translation
1293 phase 1 (this is different for UCNs).
f7fdd7a1 1294 FIXME: Would be nice if we didn't need the PFILE argument. */
79bd622b 1295unsigned char *
f7fdd7a1 1296cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
bb1fa6bb 1297 unsigned char *buffer, bool forstring)
f80e83a9 1298{
7e842f95 1299 switch (TOKEN_SPELL (token))
f80e83a9 1300 {
1301 case SPELL_OPERATOR:
1302 {
1303 const unsigned char *spelling;
1304 unsigned char c;
ab12a39c 1305
f80e83a9 1306 if (token->flags & DIGRAPH)
ee6c4e4b 1307 spelling
1308 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
31674461 1309 else if (token->flags & NAMED_OP)
1310 goto spell_ident;
f80e83a9 1311 else
7e842f95 1312 spelling = TOKEN_NAME (token);
b1a9ff83 1313
f80e83a9 1314 while ((c = *spelling++) != '\0')
1315 *buffer++ = c;
1316 }
1317 break;
ab12a39c 1318
8d27e472 1319 spell_ident:
f80e83a9 1320 case SPELL_IDENT:
bb1fa6bb 1321 if (forstring)
1322 {
1323 memcpy (buffer, NODE_NAME (token->val.node),
1324 NODE_LEN (token->val.node));
1325 buffer += NODE_LEN (token->val.node);
1326 }
1327 else
1328 {
1329 size_t i;
1330 const unsigned char * name = NODE_NAME (token->val.node);
1331
1332 for (i = 0; i < NODE_LEN (token->val.node); i++)
1333 if (name[i] & ~0x7F)
1334 {
1335 i += utf8_to_ucn (buffer, name + i) - 1;
1336 buffer += 10;
1337 }
1338 else
1339 *buffer++ = NODE_NAME (token->val.node)[i];
1340 }
f80e83a9 1341 break;
ab12a39c 1342
4970d4c2 1343 case SPELL_LITERAL:
8d27e472 1344 memcpy (buffer, token->val.str.text, token->val.str.len);
1345 buffer += token->val.str.len;
1346 break;
1347
f80e83a9 1348 case SPELL_NONE:
d80d2074 1349 cpp_error (pfile, CPP_DL_ICE,
1350 "unspellable token %s", TOKEN_NAME (token));
f80e83a9 1351 break;
1352 }
ab12a39c 1353
f80e83a9 1354 return buffer;
1355}
ab12a39c 1356
e484a1cc 1357/* Returns TOKEN spelt as a null-terminated string. The string is
1358 freed when the reader is destroyed. Useful for diagnostics. */
79bd622b 1359unsigned char *
f7fdd7a1 1360cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
b1280514 1361{
1362 unsigned int len = cpp_token_len (token) + 1;
1fdf6039 1363 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
6060326b 1364
bb1fa6bb 1365 end = cpp_spell_token (pfile, token, start, false);
79bd622b 1366 end[0] = '\0';
6060326b 1367
79bd622b 1368 return start;
1369}
6060326b 1370
e484a1cc 1371/* Used by C front ends, which really should move to using
1372 cpp_token_as_text. */
79bd622b 1373const char *
f7fdd7a1 1374cpp_type2name (enum cpp_ttype type)
79bd622b 1375{
1376 return (const char *) token_spellings[type].name;
1377}
6060326b 1378
f9b5f742 1379/* Writes the spelling of token to FP, without any preceding space.
1380 Separated from cpp_spell_token for efficiency - to avoid stdio
1381 double-buffering. */
79bd622b 1382void
f7fdd7a1 1383cpp_output_token (const cpp_token *token, FILE *fp)
79bd622b 1384{
79bd622b 1385 switch (TOKEN_SPELL (token))
6060326b 1386 {
79bd622b 1387 case SPELL_OPERATOR:
1388 {
1389 const unsigned char *spelling;
28874558 1390 int c;
6060326b 1391
79bd622b 1392 if (token->flags & DIGRAPH)
ee6c4e4b 1393 spelling
1394 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
79bd622b 1395 else if (token->flags & NAMED_OP)
1396 goto spell_ident;
1397 else
1398 spelling = TOKEN_NAME (token);
f80e83a9 1399
28874558 1400 c = *spelling;
1401 do
1402 putc (c, fp);
1403 while ((c = *++spelling) != '\0');
79bd622b 1404 }
1405 break;
f80e83a9 1406
79bd622b 1407 spell_ident:
1408 case SPELL_IDENT:
bb1fa6bb 1409 {
1410 size_t i;
1411 const unsigned char * name = NODE_NAME (token->val.node);
1412
1413 for (i = 0; i < NODE_LEN (token->val.node); i++)
1414 if (name[i] & ~0x7F)
1415 {
1416 unsigned char buffer[10];
1417 i += utf8_to_ucn (buffer, name + i) - 1;
1418 fwrite (buffer, 1, 10, fp);
1419 }
1420 else
1421 fputc (NODE_NAME (token->val.node)[i], fp);
1422 }
1423 break;
f80e83a9 1424
4970d4c2 1425 case SPELL_LITERAL:
8d27e472 1426 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1427 break;
1428
79bd622b 1429 case SPELL_NONE:
1430 /* An error, most probably. */
1431 break;
f80e83a9 1432 }
6060326b 1433}
1434
79bd622b 1435/* Compare two tokens. */
1436int
f7fdd7a1 1437_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
6060326b 1438{
79bd622b 1439 if (a->type == b->type && a->flags == b->flags)
1440 switch (TOKEN_SPELL (a))
1441 {
1442 default: /* Keep compiler happy. */
1443 case SPELL_OPERATOR:
1444 return 1;
79bd622b 1445 case SPELL_NONE:
588d632b 1446 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
79bd622b 1447 case SPELL_IDENT:
1448 return a->val.node == b->val.node;
4970d4c2 1449 case SPELL_LITERAL:
79bd622b 1450 return (a->val.str.len == b->val.str.len
1451 && !memcmp (a->val.str.text, b->val.str.text,
1452 a->val.str.len));
1453 }
6060326b 1454
f80e83a9 1455 return 0;
1456}
1457
79bd622b 1458/* Returns nonzero if a space should be inserted to avoid an
1459 accidental token paste for output. For simplicity, it is
1460 conservative, and occasionally advises a space where one is not
1461 needed, e.g. "." and ".2". */
79bd622b 1462int
f7fdd7a1 1463cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1464 const cpp_token *token2)
6060326b 1465{
79bd622b 1466 enum cpp_ttype a = token1->type, b = token2->type;
1467 cppchar_t c;
6060326b 1468
79bd622b 1469 if (token1->flags & NAMED_OP)
1470 a = CPP_NAME;
1471 if (token2->flags & NAMED_OP)
1472 b = CPP_NAME;
6060326b 1473
79bd622b 1474 c = EOF;
1475 if (token2->flags & DIGRAPH)
ee6c4e4b 1476 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
79bd622b 1477 else if (token_spellings[b].category == SPELL_OPERATOR)
1478 c = token_spellings[b].name[0];
6060326b 1479
79bd622b 1480 /* Quickly get everything that can paste with an '='. */
ee6c4e4b 1481 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
79bd622b 1482 return 1;
6060326b 1483
79bd622b 1484 switch (a)
6060326b 1485 {
e58c07f7 1486 case CPP_GREATER: return c == '>';
1487 case CPP_LESS: return c == '<' || c == '%' || c == ':';
79bd622b 1488 case CPP_PLUS: return c == '+';
1489 case CPP_MINUS: return c == '-' || c == '>';
1490 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1491 case CPP_MOD: return c == ':' || c == '>';
1492 case CPP_AND: return c == '&';
1493 case CPP_OR: return c == '|';
1494 case CPP_COLON: return c == ':' || c == '>';
1495 case CPP_DEREF: return c == '*';
efdcc728 1496 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
79bd622b 1497 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1498 case CPP_NAME: return ((b == CPP_NUMBER
1499 && name_p (pfile, &token2->val.str))
1500 || b == CPP_NAME
1501 || b == CPP_CHAR || b == CPP_STRING); /* L */
1502 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1503 || c == '.' || c == '+' || c == '-');
2cbf1359 1504 /* UCNs */
bc205914 1505 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1506 && b == CPP_NAME)
2cbf1359 1507 || (CPP_OPTION (pfile, objc)
bc205914 1508 && token1->val.str.text[0] == '@'
2cbf1359 1509 && (b == CPP_NAME || b == CPP_STRING)));
79bd622b 1510 default: break;
6060326b 1511 }
6060326b 1512
deb356cf 1513 return 0;
6060326b 1514}
1515
79bd622b 1516/* Output all the remaining tokens on the current line, and a newline
f9b5f742 1517 character, to FP. Leading whitespace is removed. If there are
1518 macros, special token padding is not performed. */
6060326b 1519void
f7fdd7a1 1520cpp_output_line (cpp_reader *pfile, FILE *fp)
6060326b 1521{
f9b5f742 1522 const cpp_token *token;
7e842f95 1523
f9b5f742 1524 token = cpp_get_token (pfile);
1525 while (token->type != CPP_EOF)
7e842f95 1526 {
f9b5f742 1527 cpp_output_token (token, fp);
1528 token = cpp_get_token (pfile);
1529 if (token->flags & PREV_WHITE)
1530 putc (' ', fp);
7e842f95 1531 }
1532
79bd622b 1533 putc ('\n', fp);
f80e83a9 1534}
6060326b 1535
084163dc 1536/* Memory buffers. Changing these three constants can have a dramatic
1537 effect on performance. The values here are reasonable defaults,
1538 but might be tuned. If you adjust them, be sure to test across a
1539 range of uses of cpplib, including heavy nested function-like macro
1540 expansion. Also check the change in peak memory usage (NJAMD is a
1541 good tool for this). */
1542#define MIN_BUFF_SIZE 8000
1e0ef2fd 1543#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
084163dc 1544#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1545 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
deb356cf 1546
1e0ef2fd 1547#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1548 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1549#endif
1550
1785b647 1551/* Create a new allocation buffer. Place the control block at the end
1552 of the buffer, so that buffer overflows will cause immediate chaos. */
06c92cbc 1553static _cpp_buff *
f7fdd7a1 1554new_buff (size_t len)
06c92cbc 1555{
1556 _cpp_buff *result;
1fdf6039 1557 unsigned char *base;
06c92cbc 1558
084163dc 1559 if (len < MIN_BUFF_SIZE)
1560 len = MIN_BUFF_SIZE;
198b48a0 1561 len = CPP_ALIGN (len);
06c92cbc 1562
720aca92 1563 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
06c92cbc 1564 result = (_cpp_buff *) (base + len);
1565 result->base = base;
1566 result->cur = base;
1567 result->limit = base + len;
1568 result->next = NULL;
1569 return result;
1570}
1571
1572/* Place a chain of unwanted allocation buffers on the free list. */
1573void
f7fdd7a1 1574_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
06c92cbc 1575{
1576 _cpp_buff *end = buff;
1577
1578 while (end->next)
1579 end = end->next;
1580 end->next = pfile->free_buffs;
1581 pfile->free_buffs = buff;
1582}
1583
1584/* Return a free buffer of size at least MIN_SIZE. */
1585_cpp_buff *
f7fdd7a1 1586_cpp_get_buff (cpp_reader *pfile, size_t min_size)
06c92cbc 1587{
1588 _cpp_buff *result, **p;
1589
1590 for (p = &pfile->free_buffs;; p = &(*p)->next)
1591 {
4b31a107 1592 size_t size;
084163dc 1593
1594 if (*p == NULL)
06c92cbc 1595 return new_buff (min_size);
084163dc 1596 result = *p;
1597 size = result->limit - result->base;
1598 /* Return a buffer that's big enough, but don't waste one that's
1599 way too big. */
4085c149 1600 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
06c92cbc 1601 break;
1602 }
1603
1604 *p = result->next;
1605 result->next = NULL;
1606 result->cur = result->base;
1607 return result;
1608}
1609
20dd417a 1610/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 1611 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1612 the excess bytes to the new buffer. Chains the new buffer after
1613 BUFF, and returns the new buffer. */
06c92cbc 1614_cpp_buff *
f7fdd7a1 1615_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
06c92cbc 1616{
4b31a107 1617 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
e6a5f963 1618 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
06c92cbc 1619
e6a5f963 1620 buff->next = new_buff;
1621 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1622 return new_buff;
1623}
1624
20dd417a 1625/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 1626 remaining bytes of the buffer pointed to by BUFF, and at least
1627 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1628 Chains the new buffer before the buffer pointed to by BUFF, and
1629 updates the pointer to point to the new buffer. */
1630void
f7fdd7a1 1631_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
e6a5f963 1632{
1633 _cpp_buff *new_buff, *old_buff = *pbuff;
1634 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1635
1636 new_buff = _cpp_get_buff (pfile, size);
1637 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1638 new_buff->next = old_buff;
1639 *pbuff = new_buff;
06c92cbc 1640}
1641
1642/* Free a chain of buffers starting at BUFF. */
1643void
f82b06e0 1644_cpp_free_buff (_cpp_buff *buff)
06c92cbc 1645{
1646 _cpp_buff *next;
1647
1648 for (; buff; buff = next)
1649 {
1650 next = buff->next;
1651 free (buff->base);
1652 }
1653}
deb356cf 1654
1fdf6039 1655/* Allocate permanent, unaligned storage of length LEN. */
1656unsigned char *
f7fdd7a1 1657_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1fdf6039 1658{
1659 _cpp_buff *buff = pfile->u_buff;
1660 unsigned char *result = buff->cur;
1661
1662 if (len > (size_t) (buff->limit - result))
1663 {
1664 buff = _cpp_get_buff (pfile, len);
1665 buff->next = pfile->u_buff;
1666 pfile->u_buff = buff;
1667 result = buff->cur;
1668 }
1669
1670 buff->cur = result + len;
1671 return result;
1672}
1673
1e0ef2fd 1674/* Allocate permanent, unaligned storage of length LEN from a_buff.
1675 That buffer is used for growing allocations when saving macro
1676 replacement lists in a #define, and when parsing an answer to an
1677 assertion in #assert, #unassert or #if (and therefore possibly
1678 whilst expanding macros). It therefore must not be used by any
1679 code that they might call: specifically the lexer and the guts of
1680 the macro expander.
1681
1682 All existing other uses clearly fit this restriction: storing
1683 registered pragmas during initialization. */
79bd622b 1684unsigned char *
f7fdd7a1 1685_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
89b05ef6 1686{
e6a5f963 1687 _cpp_buff *buff = pfile->a_buff;
1688 unsigned char *result = buff->cur;
89b05ef6 1689
e6a5f963 1690 if (len > (size_t) (buff->limit - result))
89b05ef6 1691 {
e6a5f963 1692 buff = _cpp_get_buff (pfile, len);
1693 buff->next = pfile->a_buff;
1694 pfile->a_buff = buff;
1695 result = buff->cur;
89b05ef6 1696 }
f80e83a9 1697
e6a5f963 1698 buff->cur = result + len;
79bd622b 1699 return result;
f80e83a9 1700}
c39ed964 1701
1702/* Say which field of TOK is in use. */
1703
1704enum cpp_token_fld_kind
1705cpp_token_val_index (cpp_token *tok)
1706{
1707 switch (TOKEN_SPELL (tok))
1708 {
1709 case SPELL_IDENT:
1710 return CPP_TOKEN_FLD_NODE;
1711 case SPELL_LITERAL:
1712 return CPP_TOKEN_FLD_STR;
1713 case SPELL_NONE:
1714 if (tok->type == CPP_MACRO_ARG)
1715 return CPP_TOKEN_FLD_ARG_NO;
1716 else if (tok->type == CPP_PADDING)
1717 return CPP_TOKEN_FLD_SOURCE;
d6d3c909 1718 else if (tok->type == CPP_PRAGMA)
b75b98aa 1719 return CPP_TOKEN_FLD_PRAGMA;
c39ed964 1720 /* else fall through */
1721 default:
1722 return CPP_TOKEN_FLD_NONE;
1723 }
1724}