]> git.ipfire.org Git - thirdparty/gcc.git/blame - libcpp/lex.c
* cgraph.c (cgraph_create_edge, cgraph_set_call_stmt): Set proper cfun.
[thirdparty/gcc.git] / libcpp / lex.c
CommitLineData
0578f103 1/* CPP Library - lexical analysis.
6bc9506f 2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
3 Free Software Foundation, Inc.
0578f103 4 Contributed by Per Bothner, 1994-95.
5 Based on CCCP program by Paul Rubin, June 1986
6 Adapted to ANSI C, Richard Stallman, Jan 1987
7 Broken out to separate file, Zack Weinberg, Mar 2000
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
6bc9506f 11Free Software Foundation; either version 3, or (at your option) any
0578f103 12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
6bc9506f 20along with this program; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
0578f103 22
23#include "config.h"
24#include "system.h"
0578f103 25#include "cpplib.h"
d856c8a6 26#include "internal.h"
0578f103 27
79bd622b 28enum spell_type
241e762e 29{
79bd622b 30 SPELL_OPERATOR = 0,
79bd622b 31 SPELL_IDENT,
4970d4c2 32 SPELL_LITERAL,
79bd622b 33 SPELL_NONE
241e762e 34};
35
79bd622b 36struct token_spelling
241e762e 37{
79bd622b 38 enum spell_type category;
39 const unsigned char *name;
241e762e 40};
41
0ca849f9 42static const unsigned char *const digraph_spellings[] =
924bbf02 43{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
79bd622b 44
924bbf02 45#define OP(e, s) { SPELL_OPERATOR, UC s },
46#define TK(e, s) { SPELL_ ## s, UC #e },
0ca849f9 47static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
79bd622b 48#undef OP
49#undef TK
50
51#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
e2f9a79f 53
f7fdd7a1 54static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55static int skip_line_comment (cpp_reader *);
56static void skip_whitespace (cpp_reader *, cppchar_t);
f7fdd7a1 57static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
956c6108 59static void store_comment (cpp_reader *, cpp_token *);
f7fdd7a1 60static void create_literal (cpp_reader *, cpp_token *, const uchar *,
61 unsigned int, enum cpp_ttype);
62static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
63static int name_p (cpp_reader *, const cpp_string *);
f7fdd7a1 64static tokenrun *next_tokenrun (tokenrun *);
65
f7fdd7a1 66static _cpp_buff *new_buff (size_t);
bce8e0c0 67
e920deaf 68
f80e83a9 69/* Utility routine:
2c63d6c8 70
76faa4c0 71 Compares, the token TOKEN to the NUL-terminated string STRING.
72 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
f80e83a9 73int
f7fdd7a1 74cpp_ideq (const cpp_token *token, const char *string)
f80e83a9 75{
76faa4c0 76 if (token->type != CPP_NAME)
f80e83a9 77 return 0;
76faa4c0 78
b6d18b0a 79 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
bce8e0c0 80}
50fd6b48 81
a54e0bf8 82/* Record a note TYPE at byte POS into the current cleaned logical
83 line. */
1e0ef2fd 84static void
f7fdd7a1 85add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
338fa5f7 86{
a54e0bf8 87 if (buffer->notes_used == buffer->notes_cap)
88 {
89 buffer->notes_cap = buffer->notes_cap * 2 + 200;
720aca92 90 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
91 buffer->notes_cap);
a54e0bf8 92 }
338fa5f7 93
a54e0bf8 94 buffer->notes[buffer->notes_used].pos = pos;
95 buffer->notes[buffer->notes_used].type = type;
96 buffer->notes_used++;
338fa5f7 97}
98
a54e0bf8 99/* Returns with a logical line that contains no escaped newlines or
100 trigraphs. This is a time-critical inner loop. */
101void
f7fdd7a1 102_cpp_clean_line (cpp_reader *pfile)
0578f103 103{
a54e0bf8 104 cpp_buffer *buffer;
105 const uchar *s;
106 uchar c, *d, *p;
1e0ef2fd 107
a54e0bf8 108 buffer = pfile->buffer;
109 buffer->cur_note = buffer->notes_used = 0;
110 buffer->cur = buffer->line_base = buffer->next_line;
111 buffer->need_line = false;
112 s = buffer->next_line - 1;
1e0ef2fd 113
a54e0bf8 114 if (!buffer->from_stage3)
0578f103 115 {
5008f5c5 116 const uchar *pbackslash = NULL;
117
54d3be91 118 /* Short circuit for the common case of an un-escaped line with
119 no trigraphs. The primary win here is by not writing any
120 data back to memory until we have to. */
121 for (;;)
122 {
123 c = *++s;
5008f5c5 124 if (__builtin_expect (c == '\n', false)
125 || __builtin_expect (c == '\r', false))
54d3be91 126 {
127 d = (uchar *) s;
128
5008f5c5 129 if (__builtin_expect (s == buffer->rlimit, false))
54d3be91 130 goto done;
131
132 /* DOS line ending? */
5008f5c5 133 if (__builtin_expect (c == '\r', false)
134 && s[1] == '\n')
135 {
136 s++;
137 if (s == buffer->rlimit)
138 goto done;
139 }
54d3be91 140
5008f5c5 141 if (__builtin_expect (pbackslash == NULL, true))
54d3be91 142 goto done;
143
5008f5c5 144 /* Check for escaped newline. */
54d3be91 145 p = d;
5008f5c5 146 while (is_nvspace (p[-1]))
54d3be91 147 p--;
5008f5c5 148 if (p - 1 != pbackslash)
54d3be91 149 goto done;
150
151 /* Have an escaped newline; process it and proceed to
152 the slow path. */
153 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
154 d = p - 2;
155 buffer->next_line = p - 1;
156 break;
157 }
5008f5c5 158 if (__builtin_expect (c == '\\', false))
159 pbackslash = s;
160 else if (__builtin_expect (c == '?', false)
161 && __builtin_expect (s[1] == '?', false)
162 && _cpp_trigraph_map[s[2]])
54d3be91 163 {
164 /* Have a trigraph. We may or may not have to convert
165 it. Add a line note regardless, for -Wtrigraphs. */
166 add_line_note (buffer, s, s[2]);
167 if (CPP_OPTION (pfile, trigraphs))
168 {
169 /* We do, and that means we have to switch to the
170 slow path. */
171 d = (uchar *) s;
172 *d = _cpp_trigraph_map[s[2]];
173 s += 2;
174 break;
175 }
176 }
177 }
178
a54e0bf8 179
180 for (;;)
4b912310 181 {
a54e0bf8 182 c = *++s;
183 *++d = c;
184
185 if (c == '\n' || c == '\r')
186 {
187 /* Handle DOS line endings. */
188 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
189 s++;
190 if (s == buffer->rlimit)
191 break;
192
193 /* Escaped? */
194 p = d;
195 while (p != buffer->next_line && is_nvspace (p[-1]))
196 p--;
197 if (p == buffer->next_line || p[-1] != '\\')
198 break;
199
aad4a87f 200 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
a54e0bf8 201 d = p - 2;
202 buffer->next_line = p - 1;
203 }
204 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
205 {
206 /* Add a note regardless, for the benefit of -Wtrigraphs. */
aad4a87f 207 add_line_note (buffer, d, s[2]);
a54e0bf8 208 if (CPP_OPTION (pfile, trigraphs))
209 {
210 *d = _cpp_trigraph_map[s[2]];
211 s += 2;
212 }
213 }
4b912310 214 }
0578f103 215 }
a54e0bf8 216 else
217 {
218 do
219 s++;
220 while (*s != '\n' && *s != '\r');
221 d = (uchar *) s;
222
223 /* Handle DOS line endings. */
224 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
225 s++;
226 }
338fa5f7 227
54d3be91 228 done:
a54e0bf8 229 *d = '\n';
aad4a87f 230 /* A sentinel note that should never be processed. */
231 add_line_note (buffer, d + 1, '\n');
a54e0bf8 232 buffer->next_line = s + 1;
0578f103 233}
234
3078f2b2 235/* Return true if the trigraph indicated by NOTE should be warned
236 about in a comment. */
237static bool
f7fdd7a1 238warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
3078f2b2 239{
240 const uchar *p;
241
242 /* Within comments we don't warn about trigraphs, unless the
243 trigraph forms an escaped newline, as that may change
7ef5b942 244 behavior. */
3078f2b2 245 if (note->type != '/')
246 return false;
247
248 /* If -trigraphs, then this was an escaped newline iff the next note
249 is coincident. */
250 if (CPP_OPTION (pfile, trigraphs))
251 return note[1].pos == note->pos;
252
253 /* Otherwise, see if this forms an escaped newline. */
254 p = note->pos + 3;
255 while (is_nvspace (*p))
256 p++;
257
258 /* There might have been escaped newlines between the trigraph and the
259 newline we found. Hence the position test. */
260 return (*p == '\n' && p < note[1].pos);
261}
262
a54e0bf8 263/* Process the notes created by add_line_note as far as the current
264 location. */
265void
f7fdd7a1 266_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
0578f103 267{
c808d026 268 cpp_buffer *buffer = pfile->buffer;
269
a54e0bf8 270 for (;;)
f80e83a9 271 {
a54e0bf8 272 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
273 unsigned int col;
396ffa86 274
a54e0bf8 275 if (note->pos > buffer->cur)
276 break;
396ffa86 277
a54e0bf8 278 buffer->cur_note++;
279 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
435fb09b 280
aad4a87f 281 if (note->type == '\\' || note->type == ' ')
a54e0bf8 282 {
aad4a87f 283 if (note->type == ' ' && !in_comment)
dbddc569 284 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
a54e0bf8 285 "backslash and newline separated by space");
aad4a87f 286
a54e0bf8 287 if (buffer->next_line > buffer->rlimit)
1e0ef2fd 288 {
dbddc569 289 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
a54e0bf8 290 "backslash-newline at end of file");
291 /* Prevent "no newline at end of file" warning. */
292 buffer->next_line = buffer->rlimit;
1e0ef2fd 293 }
a54e0bf8 294
295 buffer->line_base = note->pos;
610625e3 296 CPP_INCREMENT_LINE (pfile, 0);
338fa5f7 297 }
aad4a87f 298 else if (_cpp_trigraph_map[note->type])
299 {
3078f2b2 300 if (CPP_OPTION (pfile, warn_trigraphs)
301 && (!in_comment || warn_in_comment (pfile, note)))
aad4a87f 302 {
303 if (CPP_OPTION (pfile, trigraphs))
dbddc569 304 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
aad4a87f 305 "trigraph ??%c converted to %c",
306 note->type,
307 (int) _cpp_trigraph_map[note->type]);
308 else
1542b1ef 309 {
310 cpp_error_with_line
dbddc569 311 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1542b1ef 312 "trigraph ??%c ignored, use -trigraphs to enable",
313 note->type);
314 }
aad4a87f 315 }
316 }
317 else
318 abort ();
f80e83a9 319 }
0578f103 320}
321
338fa5f7 322/* Skip a C-style block comment. We find the end of the comment by
323 seeing if an asterisk is before every '/' we encounter. Returns
edaf8cb5 324 nonzero if comment terminated by EOF, zero otherwise.
325
326 Buffer->cur points to the initial asterisk of the comment. */
a54e0bf8 327bool
f7fdd7a1 328_cpp_skip_block_comment (cpp_reader *pfile)
0578f103 329{
f80e83a9 330 cpp_buffer *buffer = pfile->buffer;
54d3be91 331 const uchar *cur = buffer->cur;
332 uchar c;
338fa5f7 333
54d3be91 334 cur++;
335 if (*cur == '/')
336 cur++;
338fa5f7 337
a54e0bf8 338 for (;;)
339 {
338fa5f7 340 /* People like decorating comments with '*', so check for '/'
341 instead for efficiency. */
54d3be91 342 c = *cur++;
343
f80e83a9 344 if (c == '/')
0578f103 345 {
54d3be91 346 if (cur[-2] == '*')
338fa5f7 347 break;
f80e83a9 348
338fa5f7 349 /* Warn about potential nested comments, but not if the '/'
3fb1e43b 350 comes immediately before the true comment delimiter.
f80e83a9 351 Don't bother to get it right across escaped newlines. */
338fa5f7 352 if (CPP_OPTION (pfile, warn_comments)
54d3be91 353 && cur[0] == '*' && cur[1] != '/')
354 {
355 buffer->cur = cur;
d80d2074 356 cpp_error_with_line (pfile, CPP_DL_WARNING,
dbddc569 357 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
54d3be91 358 "\"/*\" within comment");
359 }
0578f103 360 }
a54e0bf8 361 else if (c == '\n')
362 {
610625e3 363 unsigned int cols;
54d3be91 364 buffer->cur = cur - 1;
a54e0bf8 365 _cpp_process_line_notes (pfile, true);
366 if (buffer->next_line >= buffer->rlimit)
367 return true;
368 _cpp_clean_line (pfile);
610625e3 369
370 cols = buffer->next_line - buffer->line_base;
371 CPP_INCREMENT_LINE (pfile, cols);
372
54d3be91 373 cur = buffer->cur;
a54e0bf8 374 }
0578f103 375 }
f80e83a9 376
54d3be91 377 buffer->cur = cur;
3078f2b2 378 _cpp_process_line_notes (pfile, true);
a54e0bf8 379 return false;
0578f103 380}
381
1c124f85 382/* Skip a C++ line comment, leaving buffer->cur pointing to the
d10cfa8d 383 terminating newline. Handles escaped newlines. Returns nonzero
1c124f85 384 if a multiline comment. */
f80e83a9 385static int
f7fdd7a1 386skip_line_comment (cpp_reader *pfile)
0578f103 387{
f669338a 388 cpp_buffer *buffer = pfile->buffer;
4999c35b 389 source_location orig_line = pfile->line_table->highest_line;
f80e83a9 390
a54e0bf8 391 while (*buffer->cur != '\n')
392 buffer->cur++;
1c124f85 393
a54e0bf8 394 _cpp_process_line_notes (pfile, true);
dbddc569 395 return orig_line != pfile->line_table->highest_line;
f80e83a9 396}
0578f103 397
a54e0bf8 398/* Skips whitespace, saving the next non-whitespace character. */
b86584f6 399static void
f7fdd7a1 400skip_whitespace (cpp_reader *pfile, cppchar_t c)
f80e83a9 401{
402 cpp_buffer *buffer = pfile->buffer;
fe9eb18b 403 bool saw_NUL = false;
0578f103 404
338fa5f7 405 do
f80e83a9 406 {
78719282 407 /* Horizontal space always OK. */
a54e0bf8 408 if (c == ' ' || c == '\t')
338fa5f7 409 ;
338fa5f7 410 /* Just \f \v or \0 left. */
78719282 411 else if (c == '\0')
fe9eb18b 412 saw_NUL = true;
79bd622b 413 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
dbddc569 414 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
73328dce 415 CPP_BUF_COL (buffer),
416 "%s in preprocessing directive",
417 c == '\f' ? "form feed" : "vertical tab");
338fa5f7 418
338fa5f7 419 c = *buffer->cur++;
0578f103 420 }
2c0e001b 421 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
338fa5f7 422 while (is_nvspace (c));
423
fe9eb18b 424 if (saw_NUL)
d80d2074 425 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
fe9eb18b 426
1c124f85 427 buffer->cur--;
f80e83a9 428}
0578f103 429
79bd622b 430/* See if the characters of a number token are valid in a name (no
431 '.', '+' or '-'). */
432static int
f7fdd7a1 433name_p (cpp_reader *pfile, const cpp_string *string)
79bd622b 434{
435 unsigned int i;
436
437 for (i = 0; i < string->len; i++)
438 if (!is_idchar (string->text[i]))
439 return 0;
440
b1a9ff83 441 return 1;
79bd622b 442}
443
bce47149 444/* After parsing an identifier or other sequence, produce a warning about
445 sequences not in NFC/NFKC. */
446static void
447warn_about_normalization (cpp_reader *pfile,
448 const cpp_token *token,
449 const struct normalize_state *s)
450{
451 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
452 && !pfile->state.skipping)
453 {
454 /* Make sure that the token is printed using UCNs, even
455 if we'd otherwise happily print UTF-8. */
720aca92 456 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
bce47149 457 size_t sz;
458
459 sz = cpp_spell_token (pfile, token, buf, false) - buf;
460 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
461 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
3827dee5 462 "`%.*s' is not in NFKC", (int) sz, buf);
bce47149 463 else
464 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
3827dee5 465 "`%.*s' is not in NFC", (int) sz, buf);
bce47149 466 }
467}
468
5bb46c08 469/* Returns TRUE if the sequence starting at buffer->cur is invalid in
2cbf1359 470 an identifier. FIRST is TRUE if this starts an identifier. */
5bb46c08 471static bool
bce47149 472forms_identifier_p (cpp_reader *pfile, int first,
473 struct normalize_state *state)
5bb46c08 474{
2cbf1359 475 cpp_buffer *buffer = pfile->buffer;
476
477 if (*buffer->cur == '$')
478 {
479 if (!CPP_OPTION (pfile, dollars_in_ident))
480 return false;
481
482 buffer->cur++;
f0c2775b 483 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
2cbf1359 484 {
f0c2775b 485 CPP_OPTION (pfile, warn_dollars) = 0;
d80d2074 486 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
2cbf1359 487 }
488
489 return true;
490 }
5bb46c08 491
2cbf1359 492 /* Is this a syntactically valid UCN? */
865c4e44 493 if (CPP_OPTION (pfile, extended_identifiers)
4e9d1e6d 494 && *buffer->cur == '\\'
2cbf1359 495 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
5bb46c08 496 {
2cbf1359 497 buffer->cur += 2;
bce47149 498 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
499 state))
2cbf1359 500 return true;
501 buffer->cur -= 2;
5bb46c08 502 }
5bb46c08 503
2cbf1359 504 return false;
5bb46c08 505}
506
507/* Lex an identifier starting at BUFFER->CUR - 1. */
338fa5f7 508static cpp_hashnode *
bce47149 509lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
510 struct normalize_state *nst)
0578f103 511{
79bd622b 512 cpp_hashnode *result;
bb1fa6bb 513 const uchar *cur;
3eb3f293 514 unsigned int len;
515 unsigned int hash = HT_HASHSTEP (0, *base);
66a5287e 516
3eb3f293 517 cur = pfile->buffer->cur;
bb1fa6bb 518 if (! starts_ucn)
519 while (ISIDNUM (*cur))
520 {
521 hash = HT_HASHSTEP (hash, *cur);
522 cur++;
523 }
524 pfile->buffer->cur = cur;
bce47149 525 if (starts_ucn || forms_identifier_p (pfile, false, nst))
78a11351 526 {
bb1fa6bb 527 /* Slower version for identifiers containing UCNs (or $). */
528 do {
529 while (ISIDNUM (*pfile->buffer->cur))
bce47149 530 {
531 pfile->buffer->cur++;
532 NORMALIZE_STATE_UPDATE_IDNUM (nst);
533 }
534 } while (forms_identifier_p (pfile, false, nst));
bb1fa6bb 535 result = _cpp_interpret_identifier (pfile, base,
536 pfile->buffer->cur - base);
66a5287e 537 }
bb1fa6bb 538 else
539 {
540 len = cur - base;
541 hash = HT_HASHFINISH (hash, len);
5bb46c08 542
e297899b 543 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
544 base, len, hash, HT_ALLOC));
bb1fa6bb 545 }
66a5287e 546
5bb46c08 547 /* Rarely, identifiers require diagnostics when lexed. */
66a5287e 548 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
549 && !pfile->state.skipping, 0))
550 {
551 /* It is allowed to poison the same identifier twice. */
552 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
d80d2074 553 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
66a5287e 554 NODE_NAME (result));
555
556 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
557 replacement list of a variadic macro. */
558 if (result == pfile->spec_nodes.n__VA_ARGS__
559 && !pfile->state.va_args_ok)
d80d2074 560 cpp_error (pfile, CPP_DL_PEDWARN,
f7fdd7a1 561 "__VA_ARGS__ can only appear in the expansion"
562 " of a C99 variadic macro");
66a5287e 563 }
564
565 return result;
566}
567
5bb46c08 568/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
0578f103 569static void
bce47149 570lex_number (cpp_reader *pfile, cpp_string *number,
571 struct normalize_state *nst)
0578f103 572{
b6d18b0a 573 const uchar *cur;
5bb46c08 574 const uchar *base;
575 uchar *dest;
0578f103 576
5bb46c08 577 base = pfile->buffer->cur - 1;
578 do
f80e83a9 579 {
5bb46c08 580 cur = pfile->buffer->cur;
338fa5f7 581
5bb46c08 582 /* N.B. ISIDNUM does not include $. */
583 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
bce47149 584 {
585 cur++;
586 NORMALIZE_STATE_UPDATE_IDNUM (nst);
587 }
0578f103 588
78a11351 589 pfile->buffer->cur = cur;
0578f103 590 }
bce47149 591 while (forms_identifier_p (pfile, false, nst));
79bd622b 592
5bb46c08 593 number->len = cur - base;
594 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
595 memcpy (dest, base, number->len);
596 dest[number->len] = '\0';
597 number->text = dest;
79bd622b 598}
599
4970d4c2 600/* Create a token of type TYPE with a literal spelling. */
601static void
f7fdd7a1 602create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
603 unsigned int len, enum cpp_ttype type)
4970d4c2 604{
605 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
606
607 memcpy (dest, base, len);
608 dest[len] = '\0';
609 token->type = type;
610 token->val.str.len = len;
611 token->val.str.text = dest;
612}
613
5bb46c08 614/* Lexes a string, character constant, or angle-bracketed header file
4970d4c2 615 name. The stored string contains the spelling, including opening
924bbf02 616 quote and leading any leading 'L', 'u' or 'U'. It returns the type
7811eab5 617 of the literal, or CPP_OTHER if it was not properly terminated, or
618 CPP_LESS for an unterminated header name which must be relexed as
619 normal tokens.
4970d4c2 620
621 The spelling is NUL-terminated, but it is not guaranteed that this
622 is the first NUL since embedded NULs are preserved. */
f80e83a9 623static void
f7fdd7a1 624lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
0578f103 625{
4970d4c2 626 bool saw_NUL = false;
627 const uchar *cur;
5bb46c08 628 cppchar_t terminator;
4970d4c2 629 enum cpp_ttype type;
630
631 cur = base;
632 terminator = *cur++;
924bbf02 633 if (terminator == 'L' || terminator == 'u' || terminator == 'U')
4970d4c2 634 terminator = *cur++;
635 if (terminator == '\"')
924bbf02 636 type = (*base == 'L' ? CPP_WSTRING :
637 *base == 'U' ? CPP_STRING32 :
638 *base == 'u' ? CPP_STRING16 : CPP_STRING);
4970d4c2 639 else if (terminator == '\'')
924bbf02 640 type = (*base == 'L' ? CPP_WCHAR :
641 *base == 'U' ? CPP_CHAR32 :
642 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
4970d4c2 643 else
644 terminator = '>', type = CPP_HEADER_NAME;
79bd622b 645
338fa5f7 646 for (;;)
0578f103 647 {
4970d4c2 648 cppchar_t c = *cur++;
4b0c16ee 649
edaf8cb5 650 /* In #include-style directives, terminators are not escapable. */
4970d4c2 651 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
652 cur++;
653 else if (c == terminator)
5bb46c08 654 break;
4970d4c2 655 else if (c == '\n')
338fa5f7 656 {
4970d4c2 657 cur--;
7811eab5 658 /* Unmatched quotes always yield undefined behavior, but
659 greedy lexing means that what appears to be an unterminated
660 header name may actually be a legitimate sequence of tokens. */
661 if (terminator == '>')
662 {
663 token->type = CPP_LESS;
664 return;
665 }
4970d4c2 666 type = CPP_OTHER;
667 break;
0578f103 668 }
4970d4c2 669 else if (c == '\0')
670 saw_NUL = true;
0578f103 671 }
672
4970d4c2 673 if (saw_NUL && !pfile->state.skipping)
d80d2074 674 cpp_error (pfile, CPP_DL_WARNING,
675 "null character(s) preserved in literal");
0578f103 676
0b67f687 677 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
678 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
679 (int) terminator);
680
4970d4c2 681 pfile->buffer->cur = cur;
682 create_literal (pfile, token, base, cur - base, type);
338fa5f7 683}
f80e83a9 684
956c6108 685/* Return the comment table. The client may not make any assumption
686 about the ordering of the table. */
687cpp_comment_table *
688cpp_get_comments (cpp_reader *pfile)
689{
690 return &pfile->comments;
691}
692
693/* Append a comment to the end of the comment table. */
694static void
695store_comment (cpp_reader *pfile, cpp_token *token)
696{
697 int len;
698
699 if (pfile->comments.allocated == 0)
700 {
701 pfile->comments.allocated = 256;
702 pfile->comments.entries = (cpp_comment *) xmalloc
703 (pfile->comments.allocated * sizeof (cpp_comment));
704 }
705
706 if (pfile->comments.count == pfile->comments.allocated)
707 {
708 pfile->comments.allocated *= 2;
709 pfile->comments.entries = (cpp_comment *) xrealloc
710 (pfile->comments.entries,
711 pfile->comments.allocated * sizeof (cpp_comment));
712 }
713
714 len = token->val.str.len;
715
716 /* Copy comment. Note, token may not be NULL terminated. */
717 pfile->comments.entries[pfile->comments.count].comment =
718 (char *) xmalloc (sizeof (char) * (len + 1));
719 memcpy (pfile->comments.entries[pfile->comments.count].comment,
720 token->val.str.text, len);
721 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
722
723 /* Set source location. */
724 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
725
726 /* Increment the count of entries in the comment table. */
727 pfile->comments.count++;
728}
729
79bd622b 730/* The stored comment includes the comment start and any terminator. */
2c63d6c8 731static void
f7fdd7a1 732save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
733 cppchar_t type)
2c63d6c8 734{
f80e83a9 735 unsigned char *buffer;
d3f7919d 736 unsigned int len, clen;
b1a9ff83 737
f0495c2c 738 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1c124f85 739
a543b315 740 /* C++ comments probably (not definitely) have moved past a new
741 line, which we don't want to save in the comment. */
1c124f85 742 if (is_vspace (pfile->buffer->cur[-1]))
a543b315 743 len--;
d3f7919d 744
745 /* If we are currently in a directive, then we need to store all
746 C++ comments as C comments internally, and so we need to
747 allocate a little extra space in that case.
748
749 Note that the only time we encounter a directive here is
750 when we are saving comments in a "#define". */
751 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
752
753 buffer = _cpp_unaligned_alloc (pfile, clen);
b1a9ff83 754
f80e83a9 755 token->type = CPP_COMMENT;
d3f7919d 756 token->val.str.len = clen;
338fa5f7 757 token->val.str.text = buffer;
0578f103 758
f0495c2c 759 buffer[0] = '/';
760 memcpy (buffer + 1, from, len - 1);
d3f7919d 761
a113df96 762 /* Finish conversion to a C comment, if necessary. */
d3f7919d 763 if (pfile->state.in_directive && type == '/')
764 {
765 buffer[1] = '*';
766 buffer[clen - 2] = '*';
767 buffer[clen - 1] = '/';
768 }
956c6108 769
770 /* Finally store this comment for use by clients of libcpp. */
771 store_comment (pfile, token);
338fa5f7 772}
0578f103 773
83dcbb5c 774/* Allocate COUNT tokens for RUN. */
775void
f7fdd7a1 776_cpp_init_tokenrun (tokenrun *run, unsigned int count)
83dcbb5c 777{
3b298764 778 run->base = XNEWVEC (cpp_token, count);
83dcbb5c 779 run->limit = run->base + count;
780 run->next = NULL;
781}
782
783/* Returns the next tokenrun, or creates one if there is none. */
784static tokenrun *
f7fdd7a1 785next_tokenrun (tokenrun *run)
83dcbb5c 786{
787 if (run->next == NULL)
788 {
3b298764 789 run->next = XNEW (tokenrun);
fb5ab82c 790 run->next->prev = run;
83dcbb5c 791 _cpp_init_tokenrun (run->next, 250);
792 }
793
794 return run->next;
795}
796
89768577 797/* Look ahead in the input stream. */
798const cpp_token *
799cpp_peek_token (cpp_reader *pfile, int index)
800{
801 cpp_context *context = pfile->context;
802 const cpp_token *peektok;
803 int count;
804
805 /* First, scan through any pending cpp_context objects. */
806 while (context->prev)
807 {
808 ptrdiff_t sz = (context->direct_p
809 ? LAST (context).token - FIRST (context).token
810 : LAST (context).ptoken - FIRST (context).ptoken);
811
812 if (index < (int) sz)
813 return (context->direct_p
814 ? FIRST (context).token + index
815 : *(FIRST (context).ptoken + index));
816
817 index -= (int) sz;
818 context = context->prev;
819 }
820
821 /* We will have to read some new tokens after all (and do so
822 without invalidating preceding tokens). */
823 count = index;
824 pfile->keep_tokens++;
825
826 do
827 {
828 peektok = _cpp_lex_token (pfile);
829 if (peektok->type == CPP_EOF)
830 return peektok;
831 }
832 while (index--);
833
834 _cpp_backup_tokens_direct (pfile, count + 1);
835 pfile->keep_tokens--;
836
837 return peektok;
838}
839
f9b5f742 840/* Allocate a single token that is invalidated at the same time as the
841 rest of the tokens on the line. Has its line and col set to the
842 same as the last lexed token, so that diagnostics appear in the
843 right place. */
844cpp_token *
f7fdd7a1 845_cpp_temp_token (cpp_reader *pfile)
f9b5f742 846{
847 cpp_token *old, *result;
89768577 848 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
849 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
f9b5f742 850
851 old = pfile->cur_token - 1;
89768577 852 /* Any pre-existing lookaheads must not be clobbered. */
853 if (la)
854 {
855 if (sz <= la)
856 {
857 tokenrun *next = next_tokenrun (pfile->cur_run);
858
859 if (sz < la)
860 memmove (next->base + 1, next->base,
861 (la - sz) * sizeof (cpp_token));
862
863 next->base[0] = pfile->cur_run->limit[-1];
864 }
865
866 if (sz > 1)
867 memmove (pfile->cur_token + 1, pfile->cur_token,
868 MIN (la, sz - 1) * sizeof (cpp_token));
869 }
870
871 if (!sz && pfile->cur_token == pfile->cur_run->limit)
f9b5f742 872 {
873 pfile->cur_run = next_tokenrun (pfile->cur_run);
874 pfile->cur_token = pfile->cur_run->base;
875 }
876
877 result = pfile->cur_token++;
610625e3 878 result->src_loc = old->src_loc;
f9b5f742 879 return result;
880}
881
10b4496a 882/* Lex a token into RESULT (external interface). Takes care of issues
883 like directive handling, token lookahead, multiple include
3fb1e43b 884 optimization and skipping. */
c00e481c 885const cpp_token *
f7fdd7a1 886_cpp_lex_token (cpp_reader *pfile)
83dcbb5c 887{
fb5ab82c 888 cpp_token *result;
83dcbb5c 889
fb5ab82c 890 for (;;)
83dcbb5c 891 {
fb5ab82c 892 if (pfile->cur_token == pfile->cur_run->limit)
83dcbb5c 893 {
fb5ab82c 894 pfile->cur_run = next_tokenrun (pfile->cur_run);
895 pfile->cur_token = pfile->cur_run->base;
83dcbb5c 896 }
e0ff7935 897 /* We assume that the current token is somewhere in the current
898 run. */
899 if (pfile->cur_token < pfile->cur_run->base
900 || pfile->cur_token >= pfile->cur_run->limit)
901 abort ();
83dcbb5c 902
fb5ab82c 903 if (pfile->lookaheads)
10b4496a 904 {
905 pfile->lookaheads--;
906 result = pfile->cur_token++;
907 }
fb5ab82c 908 else
10b4496a 909 result = _cpp_lex_direct (pfile);
fb5ab82c 910
911 if (result->flags & BOL)
83dcbb5c 912 {
fb5ab82c 913 /* Is this a directive. If _cpp_handle_directive returns
914 false, it is an assembler #. */
915 if (result->type == CPP_HASH
d6af0368 916 /* 6.10.3 p 11: Directives in a list of macro arguments
917 gives undefined behavior. This implementation
918 handles the directive as normal. */
b75b98aa 919 && pfile->state.parsing_args != 1)
d6d3c909 920 {
b75b98aa 921 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
d6d3c909 922 {
b75b98aa 923 if (pfile->directive_result.type == CPP_PADDING)
924 continue;
d6d3c909 925 result = &pfile->directive_result;
d6d3c909 926 }
927 }
b75b98aa 928 else if (pfile->state.in_deferred_pragma)
929 result = &pfile->directive_result;
d6d3c909 930
5621a364 931 if (pfile->cb.line_change && !pfile->state.skipping)
f7fdd7a1 932 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
83dcbb5c 933 }
83dcbb5c 934
fb5ab82c 935 /* We don't skip tokens in directives. */
b75b98aa 936 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
fb5ab82c 937 break;
83dcbb5c 938
fb5ab82c 939 /* Outside a directive, invalidate controlling macros. At file
10b4496a 940 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
7ef5b942 941 get here and MI optimization works. */
83dcbb5c 942 pfile->mi_valid = false;
fb5ab82c 943
944 if (!pfile->state.skipping || result->type == CPP_EOF)
945 break;
83dcbb5c 946 }
947
c00e481c 948 return result;
83dcbb5c 949}
950
a54e0bf8 951/* Returns true if a fresh line has been loaded. */
952bool
f7fdd7a1 953_cpp_get_fresh_line (cpp_reader *pfile)
0bb65704 954{
6e04daf1 955 int return_at_eof;
956
a54e0bf8 957 /* We can't get a new line until we leave the current directive. */
958 if (pfile->state.in_directive)
959 return false;
b1a9ff83 960
a54e0bf8 961 for (;;)
fb83e0d6 962 {
a54e0bf8 963 cpp_buffer *buffer = pfile->buffer;
fb83e0d6 964
a54e0bf8 965 if (!buffer->need_line)
966 return true;
967
968 if (buffer->next_line < buffer->rlimit)
0bb65704 969 {
a54e0bf8 970 _cpp_clean_line (pfile);
971 return true;
972 }
0bb65704 973
a54e0bf8 974 /* First, get out of parsing arguments state. */
975 if (pfile->state.parsing_args)
976 return false;
977
978 /* End of buffer. Non-empty files should end in a newline. */
979 if (buffer->buf != buffer->rlimit
980 && buffer->next_line > buffer->rlimit
981 && !buffer->from_stage3)
982 {
0448520c 983 /* Clip to buffer size. */
a54e0bf8 984 buffer->next_line = buffer->rlimit;
a54e0bf8 985 }
6e04daf1 986
987 return_at_eof = buffer->return_at_eof;
a54e0bf8 988 _cpp_pop_buffer (pfile);
6e04daf1 989 if (pfile->buffer == NULL || return_at_eof)
11b5269c 990 return false;
a54e0bf8 991 }
0bb65704 992}
993
edaf8cb5 994#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
995 do \
996 { \
997 result->type = ELSE_TYPE; \
998 if (*buffer->cur == CHAR) \
999 buffer->cur++, result->type = THEN_TYPE; \
1000 } \
1001 while (0)
1c124f85 1002
10b4496a 1003/* Lex a token into pfile->cur_token, which is also incremented, to
1004 get diagnostics pointing to the correct location.
1005
1006 Does not handle issues such as token lookahead, multiple-include
4172d65e 1007 optimization, directives, skipping etc. This function is only
10b4496a 1008 suitable for use by _cpp_lex_token, and in special cases like
1009 lex_expansion_token which doesn't care for any of these issues.
1010
1011 When meeting a newline, returns CPP_EOF if parsing a directive,
1012 otherwise returns to the start of the token buffer if permissible.
1013 Returns the location of the lexed token. */
1014cpp_token *
f7fdd7a1 1015_cpp_lex_direct (cpp_reader *pfile)
0578f103 1016{
338fa5f7 1017 cppchar_t c;
230f0943 1018 cpp_buffer *buffer;
338fa5f7 1019 const unsigned char *comment_start;
10b4496a 1020 cpp_token *result = pfile->cur_token++;
0653b94e 1021
83dcbb5c 1022 fresh_line:
a54e0bf8 1023 result->flags = 0;
82166c5c 1024 buffer = pfile->buffer;
11b5269c 1025 if (buffer->need_line)
a54e0bf8 1026 {
b75b98aa 1027 if (pfile->state.in_deferred_pragma)
1028 {
1029 result->type = CPP_PRAGMA_EOL;
1030 pfile->state.in_deferred_pragma = false;
1031 if (!pfile->state.pragma_allow_expansion)
1032 pfile->state.prevent_expansion--;
1033 return result;
1034 }
a54e0bf8 1035 if (!_cpp_get_fresh_line (pfile))
1036 {
1037 result->type = CPP_EOF;
2908f819 1038 if (!pfile->state.in_directive)
1039 {
1040 /* Tell the compiler the line number of the EOF token. */
dbddc569 1041 result->src_loc = pfile->line_table->highest_line;
2908f819 1042 result->flags = BOL;
1043 }
a54e0bf8 1044 return result;
1045 }
1046 if (!pfile->keep_tokens)
1047 {
1048 pfile->cur_run = &pfile->base_run;
1049 result = pfile->base_run.base;
1050 pfile->cur_token = result + 1;
1051 }
1052 result->flags = BOL;
1053 if (pfile->state.parsing_args == 2)
1054 result->flags |= PREV_WHITE;
1055 }
11b5269c 1056 buffer = pfile->buffer;
83dcbb5c 1057 update_tokens_line:
dbddc569 1058 result->src_loc = pfile->line_table->highest_line;
f80e83a9 1059
83dcbb5c 1060 skipped_white:
a54e0bf8 1061 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1062 && !pfile->overlaid_buffer)
1063 {
1064 _cpp_process_line_notes (pfile, false);
dbddc569 1065 result->src_loc = pfile->line_table->highest_line;
a54e0bf8 1066 }
1c124f85 1067 c = *buffer->cur++;
610625e3 1068
dbddc569 1069 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1070 CPP_BUF_COLUMN (buffer, buffer->cur));
83dcbb5c 1071
338fa5f7 1072 switch (c)
0578f103 1073 {
435fb09b 1074 case ' ': case '\t': case '\f': case '\v': case '\0':
1075 result->flags |= PREV_WHITE;
a54e0bf8 1076 skip_whitespace (pfile, c);
1077 goto skipped_white;
338fa5f7 1078
a54e0bf8 1079 case '\n':
610625e3 1080 if (buffer->cur < buffer->rlimit)
1081 CPP_INCREMENT_LINE (pfile, 0);
a54e0bf8 1082 buffer->need_line = true;
1083 goto fresh_line;
732cb4c9 1084
338fa5f7 1085 case '0': case '1': case '2': case '3': case '4':
1086 case '5': case '6': case '7': case '8': case '9':
bce47149 1087 {
1088 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1089 result->type = CPP_NUMBER;
1090 lex_number (pfile, &result->val.str, &nst);
1091 warn_about_normalization (pfile, result, &nst);
1092 break;
1093 }
732cb4c9 1094
78c551ad 1095 case 'L':
924bbf02 1096 case 'u':
1097 case 'U':
1098 /* 'L', 'u' or 'U' may introduce wide characters or strings. */
1099 if (c == 'L' || CPP_OPTION (pfile, uliterals))
5bb46c08 1100 {
924bbf02 1101 if (*buffer->cur == '\'' || *buffer->cur == '"')
1102 {
1103 lex_string (pfile, result, buffer->cur - 1);
1104 break;
1105 }
5bb46c08 1106 }
b1a9ff83 1107 /* Fall through. */
78c551ad 1108
338fa5f7 1109 case '_':
1110 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1111 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1112 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
924bbf02 1113 case 's': case 't': case 'v': case 'w': case 'x':
338fa5f7 1114 case 'y': case 'z':
1115 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
78c551ad 1116 case 'G': case 'H': case 'I': case 'J': case 'K':
338fa5f7 1117 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
924bbf02 1118 case 'S': case 'T': case 'V': case 'W': case 'X':
338fa5f7 1119 case 'Y': case 'Z':
1120 result->type = CPP_NAME;
bce47149 1121 {
1122 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1123 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1124 &nst);
1125 warn_about_normalization (pfile, result, &nst);
1126 }
338fa5f7 1127
338fa5f7 1128 /* Convert named operators to their proper types. */
78c551ad 1129 if (result->val.node->flags & NODE_OPERATOR)
338fa5f7 1130 {
1131 result->flags |= NAMED_OP;
720aca92 1132 result->type = (enum cpp_ttype) result->val.node->directive_index;
338fa5f7 1133 }
1134 break;
1135
1136 case '\'':
1137 case '"':
4970d4c2 1138 lex_string (pfile, result, buffer->cur - 1);
338fa5f7 1139 break;
f80e83a9 1140
338fa5f7 1141 case '/':
f0495c2c 1142 /* A potential block or line comment. */
1143 comment_start = buffer->cur;
edaf8cb5 1144 c = *buffer->cur;
1145
f0495c2c 1146 if (c == '*')
1147 {
a54e0bf8 1148 if (_cpp_skip_block_comment (pfile))
d80d2074 1149 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
338fa5f7 1150 }
1c124f85 1151 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
610625e3 1152 || cpp_in_system_header (pfile)))
338fa5f7 1153 {
5db5d057 1154 /* Warn about comments only if pedantically GNUC89, and not
1155 in system headers. */
1156 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
66914e49 1157 && ! buffer->warned_cplusplus_comments)
f80e83a9 1158 {
d80d2074 1159 cpp_error (pfile, CPP_DL_PEDWARN,
ba059ac0 1160 "C++ style comments are not allowed in ISO C90");
d80d2074 1161 cpp_error (pfile, CPP_DL_PEDWARN,
73328dce 1162 "(this will be reported only once per input file)");
f0495c2c 1163 buffer->warned_cplusplus_comments = 1;
1164 }
338fa5f7 1165
e1caf668 1166 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
d80d2074 1167 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
f0495c2c 1168 }
1c124f85 1169 else if (c == '=')
1170 {
edaf8cb5 1171 buffer->cur++;
1c124f85 1172 result->type = CPP_DIV_EQ;
1173 break;
1174 }
1175 else
1176 {
1c124f85 1177 result->type = CPP_DIV;
1178 break;
1179 }
338fa5f7 1180
f0495c2c 1181 if (!pfile->state.save_comments)
1182 {
1183 result->flags |= PREV_WHITE;
83dcbb5c 1184 goto update_tokens_line;
338fa5f7 1185 }
f0495c2c 1186
1187 /* Save the comment as a token in its own right. */
d3f7919d 1188 save_comment (pfile, result, comment_start, c);
fb5ab82c 1189 break;
338fa5f7 1190
1191 case '<':
1192 if (pfile->state.angled_headers)
1193 {
4970d4c2 1194 lex_string (pfile, result, buffer->cur - 1);
7811eab5 1195 if (result->type != CPP_LESS)
1196 break;
338fa5f7 1197 }
0578f103 1198
edaf8cb5 1199 result->type = CPP_LESS;
1200 if (*buffer->cur == '=')
1201 buffer->cur++, result->type = CPP_LESS_EQ;
1202 else if (*buffer->cur == '<')
338fa5f7 1203 {
edaf8cb5 1204 buffer->cur++;
1205 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
338fa5f7 1206 }
edaf8cb5 1207 else if (CPP_OPTION (pfile, digraphs))
1c124f85 1208 {
edaf8cb5 1209 if (*buffer->cur == ':')
1210 {
1211 buffer->cur++;
1212 result->flags |= DIGRAPH;
1213 result->type = CPP_OPEN_SQUARE;
1214 }
1215 else if (*buffer->cur == '%')
1216 {
1217 buffer->cur++;
1218 result->flags |= DIGRAPH;
1219 result->type = CPP_OPEN_BRACE;
1220 }
1c124f85 1221 }
338fa5f7 1222 break;
1223
1224 case '>':
edaf8cb5 1225 result->type = CPP_GREATER;
1226 if (*buffer->cur == '=')
1227 buffer->cur++, result->type = CPP_GREATER_EQ;
1228 else if (*buffer->cur == '>')
338fa5f7 1229 {
edaf8cb5 1230 buffer->cur++;
1231 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1232 }
338fa5f7 1233 break;
1234
f669338a 1235 case '%':
edaf8cb5 1236 result->type = CPP_MOD;
1237 if (*buffer->cur == '=')
1238 buffer->cur++, result->type = CPP_MOD_EQ;
1239 else if (CPP_OPTION (pfile, digraphs))
1c124f85 1240 {
edaf8cb5 1241 if (*buffer->cur == ':')
1c124f85 1242 {
edaf8cb5 1243 buffer->cur++;
1244 result->flags |= DIGRAPH;
1245 result->type = CPP_HASH;
1246 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1247 buffer->cur += 2, result->type = CPP_PASTE;
1248 }
1249 else if (*buffer->cur == '>')
1250 {
1251 buffer->cur++;
1252 result->flags |= DIGRAPH;
1253 result->type = CPP_CLOSE_BRACE;
1c124f85 1254 }
1c124f85 1255 }
338fa5f7 1256 break;
1257
f669338a 1258 case '.':
1c124f85 1259 result->type = CPP_DOT;
edaf8cb5 1260 if (ISDIGIT (*buffer->cur))
1c124f85 1261 {
bce47149 1262 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1c124f85 1263 result->type = CPP_NUMBER;
bce47149 1264 lex_number (pfile, &result->val.str, &nst);
1265 warn_about_normalization (pfile, result, &nst);
1c124f85 1266 }
edaf8cb5 1267 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1268 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1269 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1270 buffer->cur++, result->type = CPP_DOT_STAR;
338fa5f7 1271 break;
0578f103 1272
338fa5f7 1273 case '+':
edaf8cb5 1274 result->type = CPP_PLUS;
1275 if (*buffer->cur == '+')
1276 buffer->cur++, result->type = CPP_PLUS_PLUS;
1277 else if (*buffer->cur == '=')
1278 buffer->cur++, result->type = CPP_PLUS_EQ;
338fa5f7 1279 break;
ac0749c7 1280
338fa5f7 1281 case '-':
edaf8cb5 1282 result->type = CPP_MINUS;
1283 if (*buffer->cur == '>')
338fa5f7 1284 {
edaf8cb5 1285 buffer->cur++;
1c124f85 1286 result->type = CPP_DEREF;
edaf8cb5 1287 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1288 buffer->cur++, result->type = CPP_DEREF_STAR;
1c124f85 1289 }
edaf8cb5 1290 else if (*buffer->cur == '-')
1291 buffer->cur++, result->type = CPP_MINUS_MINUS;
1292 else if (*buffer->cur == '=')
1293 buffer->cur++, result->type = CPP_MINUS_EQ;
338fa5f7 1294 break;
0578f103 1295
338fa5f7 1296 case '&':
edaf8cb5 1297 result->type = CPP_AND;
1298 if (*buffer->cur == '&')
1299 buffer->cur++, result->type = CPP_AND_AND;
1300 else if (*buffer->cur == '=')
1301 buffer->cur++, result->type = CPP_AND_EQ;
338fa5f7 1302 break;
b1a9ff83 1303
338fa5f7 1304 case '|':
edaf8cb5 1305 result->type = CPP_OR;
1306 if (*buffer->cur == '|')
1307 buffer->cur++, result->type = CPP_OR_OR;
1308 else if (*buffer->cur == '=')
1309 buffer->cur++, result->type = CPP_OR_EQ;
338fa5f7 1310 break;
0578f103 1311
338fa5f7 1312 case ':':
edaf8cb5 1313 result->type = CPP_COLON;
1314 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1315 buffer->cur++, result->type = CPP_SCOPE;
1316 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
338fa5f7 1317 {
edaf8cb5 1318 buffer->cur++;
338fa5f7 1319 result->flags |= DIGRAPH;
1c124f85 1320 result->type = CPP_CLOSE_SQUARE;
1321 }
338fa5f7 1322 break;
0578f103 1323
1c124f85 1324 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1325 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1326 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1327 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1328 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1329
a54e0bf8 1330 case '?': result->type = CPP_QUERY; break;
338fa5f7 1331 case '~': result->type = CPP_COMPL; break;
1332 case ',': result->type = CPP_COMMA; break;
1333 case '(': result->type = CPP_OPEN_PAREN; break;
1334 case ')': result->type = CPP_CLOSE_PAREN; break;
1335 case '[': result->type = CPP_OPEN_SQUARE; break;
1336 case ']': result->type = CPP_CLOSE_SQUARE; break;
1337 case '{': result->type = CPP_OPEN_BRACE; break;
1338 case '}': result->type = CPP_CLOSE_BRACE; break;
1339 case ';': result->type = CPP_SEMICOLON; break;
1340
7fd957fe 1341 /* @ is a punctuator in Objective-C. */
9ee99ac6 1342 case '@': result->type = CPP_ATSIGN; break;
338fa5f7 1343
78c551ad 1344 case '$':
2cbf1359 1345 case '\\':
1346 {
1347 const uchar *base = --buffer->cur;
bce47149 1348 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
78c551ad 1349
bce47149 1350 if (forms_identifier_p (pfile, true, &nst))
2cbf1359 1351 {
1352 result->type = CPP_NAME;
bce47149 1353 result->val.node = lex_identifier (pfile, base, true, &nst);
1354 warn_about_normalization (pfile, result, &nst);
2cbf1359 1355 break;
1356 }
1357 buffer->cur++;
bc205914 1358 }
2cbf1359 1359
bc205914 1360 default:
4970d4c2 1361 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1362 break;
338fa5f7 1363 }
fb5ab82c 1364
1365 return result;
338fa5f7 1366}
1367
b1280514 1368/* An upper bound on the number of bytes needed to spell TOKEN.
1369 Does not include preceding whitespace. */
79bd622b 1370unsigned int
f7fdd7a1 1371cpp_token_len (const cpp_token *token)
338fa5f7 1372{
79bd622b 1373 unsigned int len;
cfad5579 1374
79bd622b 1375 switch (TOKEN_SPELL (token))
f80e83a9 1376 {
cd740bd5 1377 default: len = 6; break;
4970d4c2 1378 case SPELL_LITERAL: len = token->val.str.len; break;
bb1fa6bb 1379 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
f80e83a9 1380 }
b1280514 1381
1382 return len;
cfad5579 1383}
1384
bb1fa6bb 1385/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1386 Return the number of bytes read out of NAME. (There are always
1387 10 bytes written to BUFFER.) */
1388
1389static size_t
1390utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1391{
1392 int j;
1393 int ucn_len = 0;
1394 int ucn_len_c;
1395 unsigned t;
1396 unsigned long utf32;
1397
1398 /* Compute the length of the UTF-8 sequence. */
1399 for (t = *name; t & 0x80; t <<= 1)
1400 ucn_len++;
1401
1402 utf32 = *name & (0x7F >> ucn_len);
1403 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1404 {
1405 utf32 = (utf32 << 6) | (*++name & 0x3F);
1406
1407 /* Ill-formed UTF-8. */
1408 if ((*name & ~0x3F) != 0x80)
1409 abort ();
1410 }
1411
1412 *buffer++ = '\\';
1413 *buffer++ = 'U';
1414 for (j = 7; j >= 0; j--)
1415 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1416 return ucn_len;
1417}
1418
1419
f80e83a9 1420/* Write the spelling of a token TOKEN to BUFFER. The buffer must
c5ea33a8 1421 already contain the enough space to hold the token's spelling.
f7fdd7a1 1422 Returns a pointer to the character after the last character written.
bb1fa6bb 1423 FORSTRING is true if this is to be the spelling after translation
1424 phase 1 (this is different for UCNs).
f7fdd7a1 1425 FIXME: Would be nice if we didn't need the PFILE argument. */
79bd622b 1426unsigned char *
f7fdd7a1 1427cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
bb1fa6bb 1428 unsigned char *buffer, bool forstring)
f80e83a9 1429{
7e842f95 1430 switch (TOKEN_SPELL (token))
f80e83a9 1431 {
1432 case SPELL_OPERATOR:
1433 {
1434 const unsigned char *spelling;
1435 unsigned char c;
ab12a39c 1436
f80e83a9 1437 if (token->flags & DIGRAPH)
ee6c4e4b 1438 spelling
1439 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
31674461 1440 else if (token->flags & NAMED_OP)
1441 goto spell_ident;
f80e83a9 1442 else
7e842f95 1443 spelling = TOKEN_NAME (token);
b1a9ff83 1444
f80e83a9 1445 while ((c = *spelling++) != '\0')
1446 *buffer++ = c;
1447 }
1448 break;
ab12a39c 1449
8d27e472 1450 spell_ident:
f80e83a9 1451 case SPELL_IDENT:
bb1fa6bb 1452 if (forstring)
1453 {
1454 memcpy (buffer, NODE_NAME (token->val.node),
1455 NODE_LEN (token->val.node));
1456 buffer += NODE_LEN (token->val.node);
1457 }
1458 else
1459 {
1460 size_t i;
1461 const unsigned char * name = NODE_NAME (token->val.node);
1462
1463 for (i = 0; i < NODE_LEN (token->val.node); i++)
1464 if (name[i] & ~0x7F)
1465 {
1466 i += utf8_to_ucn (buffer, name + i) - 1;
1467 buffer += 10;
1468 }
1469 else
1470 *buffer++ = NODE_NAME (token->val.node)[i];
1471 }
f80e83a9 1472 break;
ab12a39c 1473
4970d4c2 1474 case SPELL_LITERAL:
8d27e472 1475 memcpy (buffer, token->val.str.text, token->val.str.len);
1476 buffer += token->val.str.len;
1477 break;
1478
f80e83a9 1479 case SPELL_NONE:
d80d2074 1480 cpp_error (pfile, CPP_DL_ICE,
1481 "unspellable token %s", TOKEN_NAME (token));
f80e83a9 1482 break;
1483 }
ab12a39c 1484
f80e83a9 1485 return buffer;
1486}
ab12a39c 1487
e484a1cc 1488/* Returns TOKEN spelt as a null-terminated string. The string is
1489 freed when the reader is destroyed. Useful for diagnostics. */
79bd622b 1490unsigned char *
f7fdd7a1 1491cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
b1280514 1492{
1493 unsigned int len = cpp_token_len (token) + 1;
1fdf6039 1494 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
6060326b 1495
bb1fa6bb 1496 end = cpp_spell_token (pfile, token, start, false);
79bd622b 1497 end[0] = '\0';
6060326b 1498
79bd622b 1499 return start;
1500}
6060326b 1501
e484a1cc 1502/* Used by C front ends, which really should move to using
1503 cpp_token_as_text. */
79bd622b 1504const char *
f7fdd7a1 1505cpp_type2name (enum cpp_ttype type)
79bd622b 1506{
1507 return (const char *) token_spellings[type].name;
1508}
6060326b 1509
f9b5f742 1510/* Writes the spelling of token to FP, without any preceding space.
1511 Separated from cpp_spell_token for efficiency - to avoid stdio
1512 double-buffering. */
79bd622b 1513void
f7fdd7a1 1514cpp_output_token (const cpp_token *token, FILE *fp)
79bd622b 1515{
79bd622b 1516 switch (TOKEN_SPELL (token))
6060326b 1517 {
79bd622b 1518 case SPELL_OPERATOR:
1519 {
1520 const unsigned char *spelling;
28874558 1521 int c;
6060326b 1522
79bd622b 1523 if (token->flags & DIGRAPH)
ee6c4e4b 1524 spelling
1525 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
79bd622b 1526 else if (token->flags & NAMED_OP)
1527 goto spell_ident;
1528 else
1529 spelling = TOKEN_NAME (token);
f80e83a9 1530
28874558 1531 c = *spelling;
1532 do
1533 putc (c, fp);
1534 while ((c = *++spelling) != '\0');
79bd622b 1535 }
1536 break;
f80e83a9 1537
79bd622b 1538 spell_ident:
1539 case SPELL_IDENT:
bb1fa6bb 1540 {
1541 size_t i;
1542 const unsigned char * name = NODE_NAME (token->val.node);
1543
1544 for (i = 0; i < NODE_LEN (token->val.node); i++)
1545 if (name[i] & ~0x7F)
1546 {
1547 unsigned char buffer[10];
1548 i += utf8_to_ucn (buffer, name + i) - 1;
1549 fwrite (buffer, 1, 10, fp);
1550 }
1551 else
1552 fputc (NODE_NAME (token->val.node)[i], fp);
1553 }
1554 break;
f80e83a9 1555
4970d4c2 1556 case SPELL_LITERAL:
8d27e472 1557 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1558 break;
1559
79bd622b 1560 case SPELL_NONE:
1561 /* An error, most probably. */
1562 break;
f80e83a9 1563 }
6060326b 1564}
1565
79bd622b 1566/* Compare two tokens. */
1567int
f7fdd7a1 1568_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
6060326b 1569{
79bd622b 1570 if (a->type == b->type && a->flags == b->flags)
1571 switch (TOKEN_SPELL (a))
1572 {
1573 default: /* Keep compiler happy. */
1574 case SPELL_OPERATOR:
1575 return 1;
79bd622b 1576 case SPELL_NONE:
588d632b 1577 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
79bd622b 1578 case SPELL_IDENT:
1579 return a->val.node == b->val.node;
4970d4c2 1580 case SPELL_LITERAL:
79bd622b 1581 return (a->val.str.len == b->val.str.len
1582 && !memcmp (a->val.str.text, b->val.str.text,
1583 a->val.str.len));
1584 }
6060326b 1585
f80e83a9 1586 return 0;
1587}
1588
79bd622b 1589/* Returns nonzero if a space should be inserted to avoid an
1590 accidental token paste for output. For simplicity, it is
1591 conservative, and occasionally advises a space where one is not
1592 needed, e.g. "." and ".2". */
79bd622b 1593int
f7fdd7a1 1594cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1595 const cpp_token *token2)
6060326b 1596{
79bd622b 1597 enum cpp_ttype a = token1->type, b = token2->type;
1598 cppchar_t c;
6060326b 1599
79bd622b 1600 if (token1->flags & NAMED_OP)
1601 a = CPP_NAME;
1602 if (token2->flags & NAMED_OP)
1603 b = CPP_NAME;
6060326b 1604
79bd622b 1605 c = EOF;
1606 if (token2->flags & DIGRAPH)
ee6c4e4b 1607 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
79bd622b 1608 else if (token_spellings[b].category == SPELL_OPERATOR)
1609 c = token_spellings[b].name[0];
6060326b 1610
79bd622b 1611 /* Quickly get everything that can paste with an '='. */
ee6c4e4b 1612 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
79bd622b 1613 return 1;
6060326b 1614
79bd622b 1615 switch (a)
6060326b 1616 {
e58c07f7 1617 case CPP_GREATER: return c == '>';
1618 case CPP_LESS: return c == '<' || c == '%' || c == ':';
79bd622b 1619 case CPP_PLUS: return c == '+';
1620 case CPP_MINUS: return c == '-' || c == '>';
1621 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1622 case CPP_MOD: return c == ':' || c == '>';
1623 case CPP_AND: return c == '&';
1624 case CPP_OR: return c == '|';
1625 case CPP_COLON: return c == ':' || c == '>';
1626 case CPP_DEREF: return c == '*';
efdcc728 1627 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
79bd622b 1628 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1629 case CPP_NAME: return ((b == CPP_NUMBER
1630 && name_p (pfile, &token2->val.str))
1631 || b == CPP_NAME
1632 || b == CPP_CHAR || b == CPP_STRING); /* L */
1633 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1634 || c == '.' || c == '+' || c == '-');
2cbf1359 1635 /* UCNs */
bc205914 1636 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1637 && b == CPP_NAME)
2cbf1359 1638 || (CPP_OPTION (pfile, objc)
bc205914 1639 && token1->val.str.text[0] == '@'
2cbf1359 1640 && (b == CPP_NAME || b == CPP_STRING)));
79bd622b 1641 default: break;
6060326b 1642 }
6060326b 1643
deb356cf 1644 return 0;
6060326b 1645}
1646
79bd622b 1647/* Output all the remaining tokens on the current line, and a newline
f9b5f742 1648 character, to FP. Leading whitespace is removed. If there are
1649 macros, special token padding is not performed. */
6060326b 1650void
f7fdd7a1 1651cpp_output_line (cpp_reader *pfile, FILE *fp)
6060326b 1652{
f9b5f742 1653 const cpp_token *token;
7e842f95 1654
f9b5f742 1655 token = cpp_get_token (pfile);
1656 while (token->type != CPP_EOF)
7e842f95 1657 {
f9b5f742 1658 cpp_output_token (token, fp);
1659 token = cpp_get_token (pfile);
1660 if (token->flags & PREV_WHITE)
1661 putc (' ', fp);
7e842f95 1662 }
1663
79bd622b 1664 putc ('\n', fp);
f80e83a9 1665}
6060326b 1666
c0770282 1667/* Return a string representation of all the remaining tokens on the
1668 current line. The result is allocated using xmalloc and must be
1669 freed by the caller. */
1670unsigned char *
1671cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
1672{
1673 const cpp_token *token;
1674 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
1675 unsigned int alloced = 120 + out;
1676 unsigned char *result = (unsigned char *) xmalloc (alloced);
1677
1678 /* If DIR_NAME is empty, there are no initial contents. */
1679 if (dir_name)
1680 {
1681 sprintf ((char *) result, "#%s ", dir_name);
1682 out += 2;
1683 }
1684
1685 token = cpp_get_token (pfile);
1686 while (token->type != CPP_EOF)
1687 {
1688 unsigned char *last;
1689 /* Include room for a possible space and the terminating nul. */
1690 unsigned int len = cpp_token_len (token) + 2;
1691
1692 if (out + len > alloced)
1693 {
1694 alloced *= 2;
1695 if (out + len > alloced)
1696 alloced = out + len;
1697 result = (unsigned char *) xrealloc (result, alloced);
1698 }
1699
1700 last = cpp_spell_token (pfile, token, &result[out], 0);
1701 out = last - result;
1702
1703 token = cpp_get_token (pfile);
1704 if (token->flags & PREV_WHITE)
1705 result[out++] = ' ';
1706 }
1707
1708 result[out] = '\0';
1709 return result;
1710}
1711
084163dc 1712/* Memory buffers. Changing these three constants can have a dramatic
1713 effect on performance. The values here are reasonable defaults,
1714 but might be tuned. If you adjust them, be sure to test across a
1715 range of uses of cpplib, including heavy nested function-like macro
1716 expansion. Also check the change in peak memory usage (NJAMD is a
1717 good tool for this). */
1718#define MIN_BUFF_SIZE 8000
1e0ef2fd 1719#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
084163dc 1720#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1721 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
deb356cf 1722
1e0ef2fd 1723#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1724 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1725#endif
1726
1785b647 1727/* Create a new allocation buffer. Place the control block at the end
1728 of the buffer, so that buffer overflows will cause immediate chaos. */
06c92cbc 1729static _cpp_buff *
f7fdd7a1 1730new_buff (size_t len)
06c92cbc 1731{
1732 _cpp_buff *result;
1fdf6039 1733 unsigned char *base;
06c92cbc 1734
084163dc 1735 if (len < MIN_BUFF_SIZE)
1736 len = MIN_BUFF_SIZE;
198b48a0 1737 len = CPP_ALIGN (len);
06c92cbc 1738
720aca92 1739 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
06c92cbc 1740 result = (_cpp_buff *) (base + len);
1741 result->base = base;
1742 result->cur = base;
1743 result->limit = base + len;
1744 result->next = NULL;
1745 return result;
1746}
1747
1748/* Place a chain of unwanted allocation buffers on the free list. */
1749void
f7fdd7a1 1750_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
06c92cbc 1751{
1752 _cpp_buff *end = buff;
1753
1754 while (end->next)
1755 end = end->next;
1756 end->next = pfile->free_buffs;
1757 pfile->free_buffs = buff;
1758}
1759
1760/* Return a free buffer of size at least MIN_SIZE. */
1761_cpp_buff *
f7fdd7a1 1762_cpp_get_buff (cpp_reader *pfile, size_t min_size)
06c92cbc 1763{
1764 _cpp_buff *result, **p;
1765
1766 for (p = &pfile->free_buffs;; p = &(*p)->next)
1767 {
4b31a107 1768 size_t size;
084163dc 1769
1770 if (*p == NULL)
06c92cbc 1771 return new_buff (min_size);
084163dc 1772 result = *p;
1773 size = result->limit - result->base;
1774 /* Return a buffer that's big enough, but don't waste one that's
1775 way too big. */
4085c149 1776 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
06c92cbc 1777 break;
1778 }
1779
1780 *p = result->next;
1781 result->next = NULL;
1782 result->cur = result->base;
1783 return result;
1784}
1785
20dd417a 1786/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 1787 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1788 the excess bytes to the new buffer. Chains the new buffer after
1789 BUFF, and returns the new buffer. */
06c92cbc 1790_cpp_buff *
f7fdd7a1 1791_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
06c92cbc 1792{
4b31a107 1793 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
e6a5f963 1794 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
06c92cbc 1795
e6a5f963 1796 buff->next = new_buff;
1797 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1798 return new_buff;
1799}
1800
20dd417a 1801/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 1802 remaining bytes of the buffer pointed to by BUFF, and at least
1803 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1804 Chains the new buffer before the buffer pointed to by BUFF, and
1805 updates the pointer to point to the new buffer. */
1806void
f7fdd7a1 1807_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
e6a5f963 1808{
1809 _cpp_buff *new_buff, *old_buff = *pbuff;
1810 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1811
1812 new_buff = _cpp_get_buff (pfile, size);
1813 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1814 new_buff->next = old_buff;
1815 *pbuff = new_buff;
06c92cbc 1816}
1817
1818/* Free a chain of buffers starting at BUFF. */
1819void
f82b06e0 1820_cpp_free_buff (_cpp_buff *buff)
06c92cbc 1821{
1822 _cpp_buff *next;
1823
1824 for (; buff; buff = next)
1825 {
1826 next = buff->next;
1827 free (buff->base);
1828 }
1829}
deb356cf 1830
1fdf6039 1831/* Allocate permanent, unaligned storage of length LEN. */
1832unsigned char *
f7fdd7a1 1833_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1fdf6039 1834{
1835 _cpp_buff *buff = pfile->u_buff;
1836 unsigned char *result = buff->cur;
1837
1838 if (len > (size_t) (buff->limit - result))
1839 {
1840 buff = _cpp_get_buff (pfile, len);
1841 buff->next = pfile->u_buff;
1842 pfile->u_buff = buff;
1843 result = buff->cur;
1844 }
1845
1846 buff->cur = result + len;
1847 return result;
1848}
1849
1e0ef2fd 1850/* Allocate permanent, unaligned storage of length LEN from a_buff.
1851 That buffer is used for growing allocations when saving macro
1852 replacement lists in a #define, and when parsing an answer to an
1853 assertion in #assert, #unassert or #if (and therefore possibly
1854 whilst expanding macros). It therefore must not be used by any
1855 code that they might call: specifically the lexer and the guts of
1856 the macro expander.
1857
1858 All existing other uses clearly fit this restriction: storing
1859 registered pragmas during initialization. */
79bd622b 1860unsigned char *
f7fdd7a1 1861_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
89b05ef6 1862{
e6a5f963 1863 _cpp_buff *buff = pfile->a_buff;
1864 unsigned char *result = buff->cur;
89b05ef6 1865
e6a5f963 1866 if (len > (size_t) (buff->limit - result))
89b05ef6 1867 {
e6a5f963 1868 buff = _cpp_get_buff (pfile, len);
1869 buff->next = pfile->a_buff;
1870 pfile->a_buff = buff;
1871 result = buff->cur;
89b05ef6 1872 }
f80e83a9 1873
e6a5f963 1874 buff->cur = result + len;
79bd622b 1875 return result;
f80e83a9 1876}
c39ed964 1877
1878/* Say which field of TOK is in use. */
1879
1880enum cpp_token_fld_kind
1881cpp_token_val_index (cpp_token *tok)
1882{
1883 switch (TOKEN_SPELL (tok))
1884 {
1885 case SPELL_IDENT:
1886 return CPP_TOKEN_FLD_NODE;
1887 case SPELL_LITERAL:
1888 return CPP_TOKEN_FLD_STR;
1889 case SPELL_NONE:
1890 if (tok->type == CPP_MACRO_ARG)
1891 return CPP_TOKEN_FLD_ARG_NO;
1892 else if (tok->type == CPP_PADDING)
1893 return CPP_TOKEN_FLD_SOURCE;
d6d3c909 1894 else if (tok->type == CPP_PRAGMA)
b75b98aa 1895 return CPP_TOKEN_FLD_PRAGMA;
c39ed964 1896 /* else fall through */
1897 default:
1898 return CPP_TOKEN_FLD_NONE;
1899 }
1900}