]> git.ipfire.org Git - thirdparty/gcc.git/blame - libcpp/lex.c
Fix formatting
[thirdparty/gcc.git] / libcpp / lex.c
CommitLineData
0578f103 1/* CPP Library - lexical analysis.
6bc9506f 2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
3 Free Software Foundation, Inc.
0578f103 4 Contributed by Per Bothner, 1994-95.
5 Based on CCCP program by Paul Rubin, June 1986
6 Adapted to ANSI C, Richard Stallman, Jan 1987
7 Broken out to separate file, Zack Weinberg, Mar 2000
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
6bc9506f 11Free Software Foundation; either version 3, or (at your option) any
0578f103 12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
6bc9506f 20along with this program; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
0578f103 22
23#include "config.h"
24#include "system.h"
0578f103 25#include "cpplib.h"
d856c8a6 26#include "internal.h"
0578f103 27
79bd622b 28enum spell_type
241e762e 29{
79bd622b 30 SPELL_OPERATOR = 0,
79bd622b 31 SPELL_IDENT,
4970d4c2 32 SPELL_LITERAL,
79bd622b 33 SPELL_NONE
241e762e 34};
35
79bd622b 36struct token_spelling
241e762e 37{
79bd622b 38 enum spell_type category;
39 const unsigned char *name;
241e762e 40};
41
0ca849f9 42static const unsigned char *const digraph_spellings[] =
924bbf02 43{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
79bd622b 44
924bbf02 45#define OP(e, s) { SPELL_OPERATOR, UC s },
46#define TK(e, s) { SPELL_ ## s, UC #e },
0ca849f9 47static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
79bd622b 48#undef OP
49#undef TK
50
51#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
e2f9a79f 53
f7fdd7a1 54static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55static int skip_line_comment (cpp_reader *);
56static void skip_whitespace (cpp_reader *, cppchar_t);
f7fdd7a1 57static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
956c6108 59static void store_comment (cpp_reader *, cpp_token *);
f7fdd7a1 60static void create_literal (cpp_reader *, cpp_token *, const uchar *,
61 unsigned int, enum cpp_ttype);
62static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
63static int name_p (cpp_reader *, const cpp_string *);
f7fdd7a1 64static tokenrun *next_tokenrun (tokenrun *);
65
f7fdd7a1 66static _cpp_buff *new_buff (size_t);
bce8e0c0 67
e920deaf 68
f80e83a9 69/* Utility routine:
2c63d6c8 70
76faa4c0 71 Compares, the token TOKEN to the NUL-terminated string STRING.
72 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
f80e83a9 73int
f7fdd7a1 74cpp_ideq (const cpp_token *token, const char *string)
f80e83a9 75{
76faa4c0 76 if (token->type != CPP_NAME)
f80e83a9 77 return 0;
76faa4c0 78
b6d18b0a 79 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
bce8e0c0 80}
50fd6b48 81
a54e0bf8 82/* Record a note TYPE at byte POS into the current cleaned logical
83 line. */
1e0ef2fd 84static void
f7fdd7a1 85add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
338fa5f7 86{
a54e0bf8 87 if (buffer->notes_used == buffer->notes_cap)
88 {
89 buffer->notes_cap = buffer->notes_cap * 2 + 200;
720aca92 90 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
91 buffer->notes_cap);
a54e0bf8 92 }
338fa5f7 93
a54e0bf8 94 buffer->notes[buffer->notes_used].pos = pos;
95 buffer->notes[buffer->notes_used].type = type;
96 buffer->notes_used++;
338fa5f7 97}
98
a54e0bf8 99/* Returns with a logical line that contains no escaped newlines or
100 trigraphs. This is a time-critical inner loop. */
101void
f7fdd7a1 102_cpp_clean_line (cpp_reader *pfile)
0578f103 103{
a54e0bf8 104 cpp_buffer *buffer;
105 const uchar *s;
106 uchar c, *d, *p;
1e0ef2fd 107
a54e0bf8 108 buffer = pfile->buffer;
109 buffer->cur_note = buffer->notes_used = 0;
110 buffer->cur = buffer->line_base = buffer->next_line;
111 buffer->need_line = false;
112 s = buffer->next_line - 1;
1e0ef2fd 113
a54e0bf8 114 if (!buffer->from_stage3)
0578f103 115 {
5008f5c5 116 const uchar *pbackslash = NULL;
117
54d3be91 118 /* Short circuit for the common case of an un-escaped line with
119 no trigraphs. The primary win here is by not writing any
120 data back to memory until we have to. */
121 for (;;)
122 {
123 c = *++s;
5008f5c5 124 if (__builtin_expect (c == '\n', false)
125 || __builtin_expect (c == '\r', false))
54d3be91 126 {
127 d = (uchar *) s;
128
5008f5c5 129 if (__builtin_expect (s == buffer->rlimit, false))
54d3be91 130 goto done;
131
132 /* DOS line ending? */
5008f5c5 133 if (__builtin_expect (c == '\r', false)
134 && s[1] == '\n')
135 {
136 s++;
137 if (s == buffer->rlimit)
138 goto done;
139 }
54d3be91 140
5008f5c5 141 if (__builtin_expect (pbackslash == NULL, true))
54d3be91 142 goto done;
143
5008f5c5 144 /* Check for escaped newline. */
54d3be91 145 p = d;
5008f5c5 146 while (is_nvspace (p[-1]))
54d3be91 147 p--;
5008f5c5 148 if (p - 1 != pbackslash)
54d3be91 149 goto done;
150
151 /* Have an escaped newline; process it and proceed to
152 the slow path. */
153 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
154 d = p - 2;
155 buffer->next_line = p - 1;
156 break;
157 }
5008f5c5 158 if (__builtin_expect (c == '\\', false))
159 pbackslash = s;
160 else if (__builtin_expect (c == '?', false)
161 && __builtin_expect (s[1] == '?', false)
162 && _cpp_trigraph_map[s[2]])
54d3be91 163 {
164 /* Have a trigraph. We may or may not have to convert
165 it. Add a line note regardless, for -Wtrigraphs. */
166 add_line_note (buffer, s, s[2]);
167 if (CPP_OPTION (pfile, trigraphs))
168 {
169 /* We do, and that means we have to switch to the
170 slow path. */
171 d = (uchar *) s;
172 *d = _cpp_trigraph_map[s[2]];
173 s += 2;
174 break;
175 }
176 }
177 }
178
a54e0bf8 179
180 for (;;)
4b912310 181 {
a54e0bf8 182 c = *++s;
183 *++d = c;
184
185 if (c == '\n' || c == '\r')
186 {
187 /* Handle DOS line endings. */
188 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
189 s++;
190 if (s == buffer->rlimit)
191 break;
192
193 /* Escaped? */
194 p = d;
195 while (p != buffer->next_line && is_nvspace (p[-1]))
196 p--;
197 if (p == buffer->next_line || p[-1] != '\\')
198 break;
199
aad4a87f 200 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
a54e0bf8 201 d = p - 2;
202 buffer->next_line = p - 1;
203 }
204 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
205 {
206 /* Add a note regardless, for the benefit of -Wtrigraphs. */
aad4a87f 207 add_line_note (buffer, d, s[2]);
a54e0bf8 208 if (CPP_OPTION (pfile, trigraphs))
209 {
210 *d = _cpp_trigraph_map[s[2]];
211 s += 2;
212 }
213 }
4b912310 214 }
0578f103 215 }
a54e0bf8 216 else
217 {
218 do
219 s++;
220 while (*s != '\n' && *s != '\r');
221 d = (uchar *) s;
222
223 /* Handle DOS line endings. */
224 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
225 s++;
226 }
338fa5f7 227
54d3be91 228 done:
a54e0bf8 229 *d = '\n';
aad4a87f 230 /* A sentinel note that should never be processed. */
231 add_line_note (buffer, d + 1, '\n');
a54e0bf8 232 buffer->next_line = s + 1;
0578f103 233}
234
3078f2b2 235/* Return true if the trigraph indicated by NOTE should be warned
236 about in a comment. */
237static bool
f7fdd7a1 238warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
3078f2b2 239{
240 const uchar *p;
241
242 /* Within comments we don't warn about trigraphs, unless the
243 trigraph forms an escaped newline, as that may change
7ef5b942 244 behavior. */
3078f2b2 245 if (note->type != '/')
246 return false;
247
248 /* If -trigraphs, then this was an escaped newline iff the next note
249 is coincident. */
250 if (CPP_OPTION (pfile, trigraphs))
251 return note[1].pos == note->pos;
252
253 /* Otherwise, see if this forms an escaped newline. */
254 p = note->pos + 3;
255 while (is_nvspace (*p))
256 p++;
257
258 /* There might have been escaped newlines between the trigraph and the
259 newline we found. Hence the position test. */
260 return (*p == '\n' && p < note[1].pos);
261}
262
a54e0bf8 263/* Process the notes created by add_line_note as far as the current
264 location. */
265void
f7fdd7a1 266_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
0578f103 267{
c808d026 268 cpp_buffer *buffer = pfile->buffer;
269
a54e0bf8 270 for (;;)
f80e83a9 271 {
a54e0bf8 272 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
273 unsigned int col;
396ffa86 274
a54e0bf8 275 if (note->pos > buffer->cur)
276 break;
396ffa86 277
a54e0bf8 278 buffer->cur_note++;
279 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
435fb09b 280
aad4a87f 281 if (note->type == '\\' || note->type == ' ')
a54e0bf8 282 {
aad4a87f 283 if (note->type == ' ' && !in_comment)
dbddc569 284 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
a54e0bf8 285 "backslash and newline separated by space");
aad4a87f 286
a54e0bf8 287 if (buffer->next_line > buffer->rlimit)
1e0ef2fd 288 {
dbddc569 289 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
a54e0bf8 290 "backslash-newline at end of file");
291 /* Prevent "no newline at end of file" warning. */
292 buffer->next_line = buffer->rlimit;
1e0ef2fd 293 }
a54e0bf8 294
295 buffer->line_base = note->pos;
610625e3 296 CPP_INCREMENT_LINE (pfile, 0);
338fa5f7 297 }
aad4a87f 298 else if (_cpp_trigraph_map[note->type])
299 {
3078f2b2 300 if (CPP_OPTION (pfile, warn_trigraphs)
301 && (!in_comment || warn_in_comment (pfile, note)))
aad4a87f 302 {
303 if (CPP_OPTION (pfile, trigraphs))
dbddc569 304 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
aad4a87f 305 "trigraph ??%c converted to %c",
306 note->type,
307 (int) _cpp_trigraph_map[note->type]);
308 else
1542b1ef 309 {
310 cpp_error_with_line
dbddc569 311 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1542b1ef 312 "trigraph ??%c ignored, use -trigraphs to enable",
313 note->type);
314 }
aad4a87f 315 }
316 }
317 else
318 abort ();
f80e83a9 319 }
0578f103 320}
321
338fa5f7 322/* Skip a C-style block comment. We find the end of the comment by
323 seeing if an asterisk is before every '/' we encounter. Returns
edaf8cb5 324 nonzero if comment terminated by EOF, zero otherwise.
325
326 Buffer->cur points to the initial asterisk of the comment. */
a54e0bf8 327bool
f7fdd7a1 328_cpp_skip_block_comment (cpp_reader *pfile)
0578f103 329{
f80e83a9 330 cpp_buffer *buffer = pfile->buffer;
54d3be91 331 const uchar *cur = buffer->cur;
332 uchar c;
338fa5f7 333
54d3be91 334 cur++;
335 if (*cur == '/')
336 cur++;
338fa5f7 337
a54e0bf8 338 for (;;)
339 {
338fa5f7 340 /* People like decorating comments with '*', so check for '/'
341 instead for efficiency. */
54d3be91 342 c = *cur++;
343
f80e83a9 344 if (c == '/')
0578f103 345 {
54d3be91 346 if (cur[-2] == '*')
338fa5f7 347 break;
f80e83a9 348
338fa5f7 349 /* Warn about potential nested comments, but not if the '/'
3fb1e43b 350 comes immediately before the true comment delimiter.
f80e83a9 351 Don't bother to get it right across escaped newlines. */
338fa5f7 352 if (CPP_OPTION (pfile, warn_comments)
54d3be91 353 && cur[0] == '*' && cur[1] != '/')
354 {
355 buffer->cur = cur;
d80d2074 356 cpp_error_with_line (pfile, CPP_DL_WARNING,
dbddc569 357 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
54d3be91 358 "\"/*\" within comment");
359 }
0578f103 360 }
a54e0bf8 361 else if (c == '\n')
362 {
610625e3 363 unsigned int cols;
54d3be91 364 buffer->cur = cur - 1;
a54e0bf8 365 _cpp_process_line_notes (pfile, true);
366 if (buffer->next_line >= buffer->rlimit)
367 return true;
368 _cpp_clean_line (pfile);
610625e3 369
370 cols = buffer->next_line - buffer->line_base;
371 CPP_INCREMENT_LINE (pfile, cols);
372
54d3be91 373 cur = buffer->cur;
a54e0bf8 374 }
0578f103 375 }
f80e83a9 376
54d3be91 377 buffer->cur = cur;
3078f2b2 378 _cpp_process_line_notes (pfile, true);
a54e0bf8 379 return false;
0578f103 380}
381
1c124f85 382/* Skip a C++ line comment, leaving buffer->cur pointing to the
d10cfa8d 383 terminating newline. Handles escaped newlines. Returns nonzero
1c124f85 384 if a multiline comment. */
f80e83a9 385static int
f7fdd7a1 386skip_line_comment (cpp_reader *pfile)
0578f103 387{
f669338a 388 cpp_buffer *buffer = pfile->buffer;
4999c35b 389 source_location orig_line = pfile->line_table->highest_line;
f80e83a9 390
a54e0bf8 391 while (*buffer->cur != '\n')
392 buffer->cur++;
1c124f85 393
a54e0bf8 394 _cpp_process_line_notes (pfile, true);
dbddc569 395 return orig_line != pfile->line_table->highest_line;
f80e83a9 396}
0578f103 397
a54e0bf8 398/* Skips whitespace, saving the next non-whitespace character. */
b86584f6 399static void
f7fdd7a1 400skip_whitespace (cpp_reader *pfile, cppchar_t c)
f80e83a9 401{
402 cpp_buffer *buffer = pfile->buffer;
fe9eb18b 403 bool saw_NUL = false;
0578f103 404
338fa5f7 405 do
f80e83a9 406 {
78719282 407 /* Horizontal space always OK. */
a54e0bf8 408 if (c == ' ' || c == '\t')
338fa5f7 409 ;
338fa5f7 410 /* Just \f \v or \0 left. */
78719282 411 else if (c == '\0')
fe9eb18b 412 saw_NUL = true;
79bd622b 413 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
dbddc569 414 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
73328dce 415 CPP_BUF_COL (buffer),
416 "%s in preprocessing directive",
417 c == '\f' ? "form feed" : "vertical tab");
338fa5f7 418
338fa5f7 419 c = *buffer->cur++;
0578f103 420 }
2c0e001b 421 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
338fa5f7 422 while (is_nvspace (c));
423
fe9eb18b 424 if (saw_NUL)
d80d2074 425 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
fe9eb18b 426
1c124f85 427 buffer->cur--;
f80e83a9 428}
0578f103 429
79bd622b 430/* See if the characters of a number token are valid in a name (no
431 '.', '+' or '-'). */
432static int
f7fdd7a1 433name_p (cpp_reader *pfile, const cpp_string *string)
79bd622b 434{
435 unsigned int i;
436
437 for (i = 0; i < string->len; i++)
438 if (!is_idchar (string->text[i]))
439 return 0;
440
b1a9ff83 441 return 1;
79bd622b 442}
443
bce47149 444/* After parsing an identifier or other sequence, produce a warning about
445 sequences not in NFC/NFKC. */
446static void
447warn_about_normalization (cpp_reader *pfile,
448 const cpp_token *token,
449 const struct normalize_state *s)
450{
451 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
452 && !pfile->state.skipping)
453 {
454 /* Make sure that the token is printed using UCNs, even
455 if we'd otherwise happily print UTF-8. */
720aca92 456 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
bce47149 457 size_t sz;
458
459 sz = cpp_spell_token (pfile, token, buf, false) - buf;
460 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
461 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
3827dee5 462 "`%.*s' is not in NFKC", (int) sz, buf);
bce47149 463 else
464 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
3827dee5 465 "`%.*s' is not in NFC", (int) sz, buf);
bce47149 466 }
467}
468
5bb46c08 469/* Returns TRUE if the sequence starting at buffer->cur is invalid in
2cbf1359 470 an identifier. FIRST is TRUE if this starts an identifier. */
5bb46c08 471static bool
bce47149 472forms_identifier_p (cpp_reader *pfile, int first,
473 struct normalize_state *state)
5bb46c08 474{
2cbf1359 475 cpp_buffer *buffer = pfile->buffer;
476
477 if (*buffer->cur == '$')
478 {
479 if (!CPP_OPTION (pfile, dollars_in_ident))
480 return false;
481
482 buffer->cur++;
f0c2775b 483 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
2cbf1359 484 {
f0c2775b 485 CPP_OPTION (pfile, warn_dollars) = 0;
d80d2074 486 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
2cbf1359 487 }
488
489 return true;
490 }
5bb46c08 491
2cbf1359 492 /* Is this a syntactically valid UCN? */
865c4e44 493 if (CPP_OPTION (pfile, extended_identifiers)
4e9d1e6d 494 && *buffer->cur == '\\'
2cbf1359 495 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
5bb46c08 496 {
2cbf1359 497 buffer->cur += 2;
bce47149 498 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
499 state))
2cbf1359 500 return true;
501 buffer->cur -= 2;
5bb46c08 502 }
5bb46c08 503
2cbf1359 504 return false;
5bb46c08 505}
506
507/* Lex an identifier starting at BUFFER->CUR - 1. */
338fa5f7 508static cpp_hashnode *
bce47149 509lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
510 struct normalize_state *nst)
0578f103 511{
79bd622b 512 cpp_hashnode *result;
bb1fa6bb 513 const uchar *cur;
3eb3f293 514 unsigned int len;
515 unsigned int hash = HT_HASHSTEP (0, *base);
66a5287e 516
3eb3f293 517 cur = pfile->buffer->cur;
bb1fa6bb 518 if (! starts_ucn)
519 while (ISIDNUM (*cur))
520 {
521 hash = HT_HASHSTEP (hash, *cur);
522 cur++;
523 }
524 pfile->buffer->cur = cur;
bce47149 525 if (starts_ucn || forms_identifier_p (pfile, false, nst))
78a11351 526 {
bb1fa6bb 527 /* Slower version for identifiers containing UCNs (or $). */
528 do {
529 while (ISIDNUM (*pfile->buffer->cur))
bce47149 530 {
531 pfile->buffer->cur++;
532 NORMALIZE_STATE_UPDATE_IDNUM (nst);
533 }
534 } while (forms_identifier_p (pfile, false, nst));
bb1fa6bb 535 result = _cpp_interpret_identifier (pfile, base,
536 pfile->buffer->cur - base);
66a5287e 537 }
bb1fa6bb 538 else
539 {
540 len = cur - base;
541 hash = HT_HASHFINISH (hash, len);
5bb46c08 542
e297899b 543 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
544 base, len, hash, HT_ALLOC));
bb1fa6bb 545 }
66a5287e 546
5bb46c08 547 /* Rarely, identifiers require diagnostics when lexed. */
66a5287e 548 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
549 && !pfile->state.skipping, 0))
550 {
551 /* It is allowed to poison the same identifier twice. */
552 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
d80d2074 553 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
66a5287e 554 NODE_NAME (result));
555
556 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
557 replacement list of a variadic macro. */
558 if (result == pfile->spec_nodes.n__VA_ARGS__
559 && !pfile->state.va_args_ok)
d80d2074 560 cpp_error (pfile, CPP_DL_PEDWARN,
f7fdd7a1 561 "__VA_ARGS__ can only appear in the expansion"
562 " of a C99 variadic macro");
66a5287e 563 }
564
565 return result;
566}
567
5bb46c08 568/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
0578f103 569static void
bce47149 570lex_number (cpp_reader *pfile, cpp_string *number,
571 struct normalize_state *nst)
0578f103 572{
b6d18b0a 573 const uchar *cur;
5bb46c08 574 const uchar *base;
575 uchar *dest;
0578f103 576
5bb46c08 577 base = pfile->buffer->cur - 1;
578 do
f80e83a9 579 {
5bb46c08 580 cur = pfile->buffer->cur;
338fa5f7 581
5bb46c08 582 /* N.B. ISIDNUM does not include $. */
583 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
bce47149 584 {
585 cur++;
586 NORMALIZE_STATE_UPDATE_IDNUM (nst);
587 }
0578f103 588
78a11351 589 pfile->buffer->cur = cur;
0578f103 590 }
bce47149 591 while (forms_identifier_p (pfile, false, nst));
79bd622b 592
5bb46c08 593 number->len = cur - base;
594 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
595 memcpy (dest, base, number->len);
596 dest[number->len] = '\0';
597 number->text = dest;
79bd622b 598}
599
4970d4c2 600/* Create a token of type TYPE with a literal spelling. */
601static void
f7fdd7a1 602create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
603 unsigned int len, enum cpp_ttype type)
4970d4c2 604{
605 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
606
607 memcpy (dest, base, len);
608 dest[len] = '\0';
609 token->type = type;
610 token->val.str.len = len;
611 token->val.str.text = dest;
612}
613
5bb46c08 614/* Lexes a string, character constant, or angle-bracketed header file
4970d4c2 615 name. The stored string contains the spelling, including opening
924bbf02 616 quote and leading any leading 'L', 'u' or 'U'. It returns the type
7811eab5 617 of the literal, or CPP_OTHER if it was not properly terminated, or
618 CPP_LESS for an unterminated header name which must be relexed as
619 normal tokens.
4970d4c2 620
621 The spelling is NUL-terminated, but it is not guaranteed that this
622 is the first NUL since embedded NULs are preserved. */
f80e83a9 623static void
f7fdd7a1 624lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
0578f103 625{
4970d4c2 626 bool saw_NUL = false;
627 const uchar *cur;
5bb46c08 628 cppchar_t terminator;
4970d4c2 629 enum cpp_ttype type;
630
631 cur = base;
632 terminator = *cur++;
924bbf02 633 if (terminator == 'L' || terminator == 'u' || terminator == 'U')
4970d4c2 634 terminator = *cur++;
635 if (terminator == '\"')
924bbf02 636 type = (*base == 'L' ? CPP_WSTRING :
637 *base == 'U' ? CPP_STRING32 :
638 *base == 'u' ? CPP_STRING16 : CPP_STRING);
4970d4c2 639 else if (terminator == '\'')
924bbf02 640 type = (*base == 'L' ? CPP_WCHAR :
641 *base == 'U' ? CPP_CHAR32 :
642 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
4970d4c2 643 else
644 terminator = '>', type = CPP_HEADER_NAME;
79bd622b 645
338fa5f7 646 for (;;)
0578f103 647 {
4970d4c2 648 cppchar_t c = *cur++;
4b0c16ee 649
edaf8cb5 650 /* In #include-style directives, terminators are not escapable. */
4970d4c2 651 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
652 cur++;
653 else if (c == terminator)
5bb46c08 654 break;
4970d4c2 655 else if (c == '\n')
338fa5f7 656 {
4970d4c2 657 cur--;
7811eab5 658 /* Unmatched quotes always yield undefined behavior, but
659 greedy lexing means that what appears to be an unterminated
660 header name may actually be a legitimate sequence of tokens. */
661 if (terminator == '>')
662 {
663 token->type = CPP_LESS;
664 return;
665 }
4970d4c2 666 type = CPP_OTHER;
667 break;
0578f103 668 }
4970d4c2 669 else if (c == '\0')
670 saw_NUL = true;
0578f103 671 }
672
4970d4c2 673 if (saw_NUL && !pfile->state.skipping)
d80d2074 674 cpp_error (pfile, CPP_DL_WARNING,
675 "null character(s) preserved in literal");
0578f103 676
0b67f687 677 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
678 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
679 (int) terminator);
680
4970d4c2 681 pfile->buffer->cur = cur;
682 create_literal (pfile, token, base, cur - base, type);
338fa5f7 683}
f80e83a9 684
956c6108 685/* Return the comment table. The client may not make any assumption
686 about the ordering of the table. */
687cpp_comment_table *
688cpp_get_comments (cpp_reader *pfile)
689{
690 return &pfile->comments;
691}
692
693/* Append a comment to the end of the comment table. */
694static void
695store_comment (cpp_reader *pfile, cpp_token *token)
696{
697 int len;
698
699 if (pfile->comments.allocated == 0)
700 {
701 pfile->comments.allocated = 256;
702 pfile->comments.entries = (cpp_comment *) xmalloc
703 (pfile->comments.allocated * sizeof (cpp_comment));
704 }
705
706 if (pfile->comments.count == pfile->comments.allocated)
707 {
708 pfile->comments.allocated *= 2;
709 pfile->comments.entries = (cpp_comment *) xrealloc
710 (pfile->comments.entries,
711 pfile->comments.allocated * sizeof (cpp_comment));
712 }
713
714 len = token->val.str.len;
715
716 /* Copy comment. Note, token may not be NULL terminated. */
717 pfile->comments.entries[pfile->comments.count].comment =
718 (char *) xmalloc (sizeof (char) * (len + 1));
719 memcpy (pfile->comments.entries[pfile->comments.count].comment,
720 token->val.str.text, len);
721 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
722
723 /* Set source location. */
724 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
725
726 /* Increment the count of entries in the comment table. */
727 pfile->comments.count++;
728}
729
79bd622b 730/* The stored comment includes the comment start and any terminator. */
2c63d6c8 731static void
f7fdd7a1 732save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
733 cppchar_t type)
2c63d6c8 734{
f80e83a9 735 unsigned char *buffer;
d3f7919d 736 unsigned int len, clen;
b1a9ff83 737
f0495c2c 738 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1c124f85 739
a543b315 740 /* C++ comments probably (not definitely) have moved past a new
741 line, which we don't want to save in the comment. */
1c124f85 742 if (is_vspace (pfile->buffer->cur[-1]))
a543b315 743 len--;
d3f7919d 744
745 /* If we are currently in a directive, then we need to store all
746 C++ comments as C comments internally, and so we need to
747 allocate a little extra space in that case.
748
749 Note that the only time we encounter a directive here is
750 when we are saving comments in a "#define". */
751 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
752
753 buffer = _cpp_unaligned_alloc (pfile, clen);
b1a9ff83 754
f80e83a9 755 token->type = CPP_COMMENT;
d3f7919d 756 token->val.str.len = clen;
338fa5f7 757 token->val.str.text = buffer;
0578f103 758
f0495c2c 759 buffer[0] = '/';
760 memcpy (buffer + 1, from, len - 1);
d3f7919d 761
a113df96 762 /* Finish conversion to a C comment, if necessary. */
d3f7919d 763 if (pfile->state.in_directive && type == '/')
764 {
765 buffer[1] = '*';
766 buffer[clen - 2] = '*';
767 buffer[clen - 1] = '/';
768 }
956c6108 769
770 /* Finally store this comment for use by clients of libcpp. */
771 store_comment (pfile, token);
338fa5f7 772}
0578f103 773
83dcbb5c 774/* Allocate COUNT tokens for RUN. */
775void
f7fdd7a1 776_cpp_init_tokenrun (tokenrun *run, unsigned int count)
83dcbb5c 777{
3b298764 778 run->base = XNEWVEC (cpp_token, count);
83dcbb5c 779 run->limit = run->base + count;
780 run->next = NULL;
781}
782
783/* Returns the next tokenrun, or creates one if there is none. */
784static tokenrun *
f7fdd7a1 785next_tokenrun (tokenrun *run)
83dcbb5c 786{
787 if (run->next == NULL)
788 {
3b298764 789 run->next = XNEW (tokenrun);
fb5ab82c 790 run->next->prev = run;
83dcbb5c 791 _cpp_init_tokenrun (run->next, 250);
792 }
793
794 return run->next;
795}
796
89768577 797/* Look ahead in the input stream. */
798const cpp_token *
799cpp_peek_token (cpp_reader *pfile, int index)
800{
801 cpp_context *context = pfile->context;
802 const cpp_token *peektok;
803 int count;
804
805 /* First, scan through any pending cpp_context objects. */
806 while (context->prev)
807 {
808 ptrdiff_t sz = (context->direct_p
809 ? LAST (context).token - FIRST (context).token
810 : LAST (context).ptoken - FIRST (context).ptoken);
811
812 if (index < (int) sz)
813 return (context->direct_p
814 ? FIRST (context).token + index
815 : *(FIRST (context).ptoken + index));
816
817 index -= (int) sz;
818 context = context->prev;
819 }
820
821 /* We will have to read some new tokens after all (and do so
822 without invalidating preceding tokens). */
823 count = index;
824 pfile->keep_tokens++;
825
826 do
827 {
828 peektok = _cpp_lex_token (pfile);
829 if (peektok->type == CPP_EOF)
830 return peektok;
831 }
832 while (index--);
833
834 _cpp_backup_tokens_direct (pfile, count + 1);
835 pfile->keep_tokens--;
836
837 return peektok;
838}
839
f9b5f742 840/* Allocate a single token that is invalidated at the same time as the
841 rest of the tokens on the line. Has its line and col set to the
842 same as the last lexed token, so that diagnostics appear in the
843 right place. */
844cpp_token *
f7fdd7a1 845_cpp_temp_token (cpp_reader *pfile)
f9b5f742 846{
847 cpp_token *old, *result;
89768577 848 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
849 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
f9b5f742 850
851 old = pfile->cur_token - 1;
89768577 852 /* Any pre-existing lookaheads must not be clobbered. */
853 if (la)
854 {
855 if (sz <= la)
856 {
857 tokenrun *next = next_tokenrun (pfile->cur_run);
858
859 if (sz < la)
860 memmove (next->base + 1, next->base,
861 (la - sz) * sizeof (cpp_token));
862
863 next->base[0] = pfile->cur_run->limit[-1];
864 }
865
866 if (sz > 1)
867 memmove (pfile->cur_token + 1, pfile->cur_token,
868 MIN (la, sz - 1) * sizeof (cpp_token));
869 }
870
871 if (!sz && pfile->cur_token == pfile->cur_run->limit)
f9b5f742 872 {
873 pfile->cur_run = next_tokenrun (pfile->cur_run);
874 pfile->cur_token = pfile->cur_run->base;
875 }
876
877 result = pfile->cur_token++;
610625e3 878 result->src_loc = old->src_loc;
f9b5f742 879 return result;
880}
881
10b4496a 882/* Lex a token into RESULT (external interface). Takes care of issues
883 like directive handling, token lookahead, multiple include
3fb1e43b 884 optimization and skipping. */
c00e481c 885const cpp_token *
f7fdd7a1 886_cpp_lex_token (cpp_reader *pfile)
83dcbb5c 887{
fb5ab82c 888 cpp_token *result;
83dcbb5c 889
fb5ab82c 890 for (;;)
83dcbb5c 891 {
fb5ab82c 892 if (pfile->cur_token == pfile->cur_run->limit)
83dcbb5c 893 {
fb5ab82c 894 pfile->cur_run = next_tokenrun (pfile->cur_run);
895 pfile->cur_token = pfile->cur_run->base;
83dcbb5c 896 }
e0ff7935 897 /* We assume that the current token is somewhere in the current
898 run. */
899 if (pfile->cur_token < pfile->cur_run->base
900 || pfile->cur_token >= pfile->cur_run->limit)
901 abort ();
83dcbb5c 902
fb5ab82c 903 if (pfile->lookaheads)
10b4496a 904 {
905 pfile->lookaheads--;
906 result = pfile->cur_token++;
907 }
fb5ab82c 908 else
10b4496a 909 result = _cpp_lex_direct (pfile);
fb5ab82c 910
911 if (result->flags & BOL)
83dcbb5c 912 {
fb5ab82c 913 /* Is this a directive. If _cpp_handle_directive returns
914 false, it is an assembler #. */
915 if (result->type == CPP_HASH
d6af0368 916 /* 6.10.3 p 11: Directives in a list of macro arguments
917 gives undefined behavior. This implementation
918 handles the directive as normal. */
b75b98aa 919 && pfile->state.parsing_args != 1)
d6d3c909 920 {
b75b98aa 921 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
d6d3c909 922 {
b75b98aa 923 if (pfile->directive_result.type == CPP_PADDING)
924 continue;
d6d3c909 925 result = &pfile->directive_result;
d6d3c909 926 }
927 }
b75b98aa 928 else if (pfile->state.in_deferred_pragma)
929 result = &pfile->directive_result;
d6d3c909 930
5621a364 931 if (pfile->cb.line_change && !pfile->state.skipping)
f7fdd7a1 932 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
83dcbb5c 933 }
83dcbb5c 934
fb5ab82c 935 /* We don't skip tokens in directives. */
b75b98aa 936 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
fb5ab82c 937 break;
83dcbb5c 938
fb5ab82c 939 /* Outside a directive, invalidate controlling macros. At file
10b4496a 940 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
7ef5b942 941 get here and MI optimization works. */
83dcbb5c 942 pfile->mi_valid = false;
fb5ab82c 943
944 if (!pfile->state.skipping || result->type == CPP_EOF)
945 break;
83dcbb5c 946 }
947
c00e481c 948 return result;
83dcbb5c 949}
950
a54e0bf8 951/* Returns true if a fresh line has been loaded. */
952bool
f7fdd7a1 953_cpp_get_fresh_line (cpp_reader *pfile)
0bb65704 954{
6e04daf1 955 int return_at_eof;
956
a54e0bf8 957 /* We can't get a new line until we leave the current directive. */
958 if (pfile->state.in_directive)
959 return false;
b1a9ff83 960
a54e0bf8 961 for (;;)
fb83e0d6 962 {
a54e0bf8 963 cpp_buffer *buffer = pfile->buffer;
fb83e0d6 964
a54e0bf8 965 if (!buffer->need_line)
966 return true;
967
968 if (buffer->next_line < buffer->rlimit)
0bb65704 969 {
a54e0bf8 970 _cpp_clean_line (pfile);
971 return true;
972 }
0bb65704 973
a54e0bf8 974 /* First, get out of parsing arguments state. */
975 if (pfile->state.parsing_args)
976 return false;
977
978 /* End of buffer. Non-empty files should end in a newline. */
979 if (buffer->buf != buffer->rlimit
980 && buffer->next_line > buffer->rlimit
981 && !buffer->from_stage3)
982 {
0448520c 983 /* Clip to buffer size. */
a54e0bf8 984 buffer->next_line = buffer->rlimit;
a54e0bf8 985 }
6e04daf1 986
987 return_at_eof = buffer->return_at_eof;
a54e0bf8 988 _cpp_pop_buffer (pfile);
6e04daf1 989 if (pfile->buffer == NULL || return_at_eof)
11b5269c 990 return false;
a54e0bf8 991 }
0bb65704 992}
993
edaf8cb5 994#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
995 do \
996 { \
997 result->type = ELSE_TYPE; \
998 if (*buffer->cur == CHAR) \
999 buffer->cur++, result->type = THEN_TYPE; \
1000 } \
1001 while (0)
1c124f85 1002
10b4496a 1003/* Lex a token into pfile->cur_token, which is also incremented, to
1004 get diagnostics pointing to the correct location.
1005
1006 Does not handle issues such as token lookahead, multiple-include
4172d65e 1007 optimization, directives, skipping etc. This function is only
10b4496a 1008 suitable for use by _cpp_lex_token, and in special cases like
1009 lex_expansion_token which doesn't care for any of these issues.
1010
1011 When meeting a newline, returns CPP_EOF if parsing a directive,
1012 otherwise returns to the start of the token buffer if permissible.
1013 Returns the location of the lexed token. */
1014cpp_token *
f7fdd7a1 1015_cpp_lex_direct (cpp_reader *pfile)
0578f103 1016{
338fa5f7 1017 cppchar_t c;
230f0943 1018 cpp_buffer *buffer;
338fa5f7 1019 const unsigned char *comment_start;
10b4496a 1020 cpp_token *result = pfile->cur_token++;
0653b94e 1021
83dcbb5c 1022 fresh_line:
a54e0bf8 1023 result->flags = 0;
82166c5c 1024 buffer = pfile->buffer;
11b5269c 1025 if (buffer->need_line)
a54e0bf8 1026 {
b75b98aa 1027 if (pfile->state.in_deferred_pragma)
1028 {
1029 result->type = CPP_PRAGMA_EOL;
1030 pfile->state.in_deferred_pragma = false;
1031 if (!pfile->state.pragma_allow_expansion)
1032 pfile->state.prevent_expansion--;
1033 return result;
1034 }
a54e0bf8 1035 if (!_cpp_get_fresh_line (pfile))
1036 {
1037 result->type = CPP_EOF;
2908f819 1038 if (!pfile->state.in_directive)
1039 {
1040 /* Tell the compiler the line number of the EOF token. */
dbddc569 1041 result->src_loc = pfile->line_table->highest_line;
2908f819 1042 result->flags = BOL;
1043 }
a54e0bf8 1044 return result;
1045 }
1046 if (!pfile->keep_tokens)
1047 {
1048 pfile->cur_run = &pfile->base_run;
1049 result = pfile->base_run.base;
1050 pfile->cur_token = result + 1;
1051 }
1052 result->flags = BOL;
1053 if (pfile->state.parsing_args == 2)
1054 result->flags |= PREV_WHITE;
1055 }
11b5269c 1056 buffer = pfile->buffer;
83dcbb5c 1057 update_tokens_line:
dbddc569 1058 result->src_loc = pfile->line_table->highest_line;
f80e83a9 1059
83dcbb5c 1060 skipped_white:
a54e0bf8 1061 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1062 && !pfile->overlaid_buffer)
1063 {
1064 _cpp_process_line_notes (pfile, false);
dbddc569 1065 result->src_loc = pfile->line_table->highest_line;
a54e0bf8 1066 }
1c124f85 1067 c = *buffer->cur++;
610625e3 1068
dbddc569 1069 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1070 CPP_BUF_COLUMN (buffer, buffer->cur));
83dcbb5c 1071
338fa5f7 1072 switch (c)
0578f103 1073 {
435fb09b 1074 case ' ': case '\t': case '\f': case '\v': case '\0':
1075 result->flags |= PREV_WHITE;
a54e0bf8 1076 skip_whitespace (pfile, c);
1077 goto skipped_white;
338fa5f7 1078
a54e0bf8 1079 case '\n':
610625e3 1080 if (buffer->cur < buffer->rlimit)
1081 CPP_INCREMENT_LINE (pfile, 0);
a54e0bf8 1082 buffer->need_line = true;
1083 goto fresh_line;
732cb4c9 1084
338fa5f7 1085 case '0': case '1': case '2': case '3': case '4':
1086 case '5': case '6': case '7': case '8': case '9':
bce47149 1087 {
1088 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1089 result->type = CPP_NUMBER;
1090 lex_number (pfile, &result->val.str, &nst);
1091 warn_about_normalization (pfile, result, &nst);
1092 break;
1093 }
732cb4c9 1094
78c551ad 1095 case 'L':
924bbf02 1096 case 'u':
1097 case 'U':
1098 /* 'L', 'u' or 'U' may introduce wide characters or strings. */
1099 if (c == 'L' || CPP_OPTION (pfile, uliterals))
5bb46c08 1100 {
924bbf02 1101 if (*buffer->cur == '\'' || *buffer->cur == '"')
1102 {
1103 lex_string (pfile, result, buffer->cur - 1);
1104 break;
1105 }
5bb46c08 1106 }
b1a9ff83 1107 /* Fall through. */
78c551ad 1108
338fa5f7 1109 case '_':
1110 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1111 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1112 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
924bbf02 1113 case 's': case 't': case 'v': case 'w': case 'x':
338fa5f7 1114 case 'y': case 'z':
1115 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
78c551ad 1116 case 'G': case 'H': case 'I': case 'J': case 'K':
338fa5f7 1117 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
924bbf02 1118 case 'S': case 'T': case 'V': case 'W': case 'X':
338fa5f7 1119 case 'Y': case 'Z':
1120 result->type = CPP_NAME;
bce47149 1121 {
1122 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1123 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1124 &nst);
1125 warn_about_normalization (pfile, result, &nst);
1126 }
338fa5f7 1127
338fa5f7 1128 /* Convert named operators to their proper types. */
78c551ad 1129 if (result->val.node->flags & NODE_OPERATOR)
338fa5f7 1130 {
1131 result->flags |= NAMED_OP;
720aca92 1132 result->type = (enum cpp_ttype) result->val.node->directive_index;
338fa5f7 1133 }
1134 break;
1135
1136 case '\'':
1137 case '"':
4970d4c2 1138 lex_string (pfile, result, buffer->cur - 1);
338fa5f7 1139 break;
f80e83a9 1140
338fa5f7 1141 case '/':
f0495c2c 1142 /* A potential block or line comment. */
1143 comment_start = buffer->cur;
edaf8cb5 1144 c = *buffer->cur;
1145
f0495c2c 1146 if (c == '*')
1147 {
a54e0bf8 1148 if (_cpp_skip_block_comment (pfile))
d80d2074 1149 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
338fa5f7 1150 }
1c124f85 1151 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
610625e3 1152 || cpp_in_system_header (pfile)))
338fa5f7 1153 {
5db5d057 1154 /* Warn about comments only if pedantically GNUC89, and not
1155 in system headers. */
1156 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
66914e49 1157 && ! buffer->warned_cplusplus_comments)
f80e83a9 1158 {
d80d2074 1159 cpp_error (pfile, CPP_DL_PEDWARN,
ba059ac0 1160 "C++ style comments are not allowed in ISO C90");
d80d2074 1161 cpp_error (pfile, CPP_DL_PEDWARN,
73328dce 1162 "(this will be reported only once per input file)");
f0495c2c 1163 buffer->warned_cplusplus_comments = 1;
1164 }
338fa5f7 1165
e1caf668 1166 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
d80d2074 1167 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
f0495c2c 1168 }
1c124f85 1169 else if (c == '=')
1170 {
edaf8cb5 1171 buffer->cur++;
1c124f85 1172 result->type = CPP_DIV_EQ;
1173 break;
1174 }
1175 else
1176 {
1c124f85 1177 result->type = CPP_DIV;
1178 break;
1179 }
338fa5f7 1180
f0495c2c 1181 if (!pfile->state.save_comments)
1182 {
1183 result->flags |= PREV_WHITE;
83dcbb5c 1184 goto update_tokens_line;
338fa5f7 1185 }
f0495c2c 1186
1187 /* Save the comment as a token in its own right. */
d3f7919d 1188 save_comment (pfile, result, comment_start, c);
fb5ab82c 1189 break;
338fa5f7 1190
1191 case '<':
1192 if (pfile->state.angled_headers)
1193 {
4970d4c2 1194 lex_string (pfile, result, buffer->cur - 1);
7811eab5 1195 if (result->type != CPP_LESS)
1196 break;
338fa5f7 1197 }
0578f103 1198
edaf8cb5 1199 result->type = CPP_LESS;
1200 if (*buffer->cur == '=')
1201 buffer->cur++, result->type = CPP_LESS_EQ;
1202 else if (*buffer->cur == '<')
338fa5f7 1203 {
edaf8cb5 1204 buffer->cur++;
1205 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
338fa5f7 1206 }
edaf8cb5 1207 else if (CPP_OPTION (pfile, digraphs))
1c124f85 1208 {
edaf8cb5 1209 if (*buffer->cur == ':')
1210 {
1211 buffer->cur++;
1212 result->flags |= DIGRAPH;
1213 result->type = CPP_OPEN_SQUARE;
1214 }
1215 else if (*buffer->cur == '%')
1216 {
1217 buffer->cur++;
1218 result->flags |= DIGRAPH;
1219 result->type = CPP_OPEN_BRACE;
1220 }
1c124f85 1221 }
338fa5f7 1222 break;
1223
1224 case '>':
edaf8cb5 1225 result->type = CPP_GREATER;
1226 if (*buffer->cur == '=')
1227 buffer->cur++, result->type = CPP_GREATER_EQ;
1228 else if (*buffer->cur == '>')
338fa5f7 1229 {
edaf8cb5 1230 buffer->cur++;
1231 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1232 }
338fa5f7 1233 break;
1234
f669338a 1235 case '%':
edaf8cb5 1236 result->type = CPP_MOD;
1237 if (*buffer->cur == '=')
1238 buffer->cur++, result->type = CPP_MOD_EQ;
1239 else if (CPP_OPTION (pfile, digraphs))
1c124f85 1240 {
edaf8cb5 1241 if (*buffer->cur == ':')
1c124f85 1242 {
edaf8cb5 1243 buffer->cur++;
1244 result->flags |= DIGRAPH;
1245 result->type = CPP_HASH;
1246 if (*buffer->cur == '%' && buffer->cur[1] == ':')
941f2388 1247 buffer->cur += 2, result->type = CPP_PASTE, result->val.arg_no = 0;
edaf8cb5 1248 }
1249 else if (*buffer->cur == '>')
1250 {
1251 buffer->cur++;
1252 result->flags |= DIGRAPH;
1253 result->type = CPP_CLOSE_BRACE;
1c124f85 1254 }
1c124f85 1255 }
338fa5f7 1256 break;
1257
f669338a 1258 case '.':
1c124f85 1259 result->type = CPP_DOT;
edaf8cb5 1260 if (ISDIGIT (*buffer->cur))
1c124f85 1261 {
bce47149 1262 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1c124f85 1263 result->type = CPP_NUMBER;
bce47149 1264 lex_number (pfile, &result->val.str, &nst);
1265 warn_about_normalization (pfile, result, &nst);
1c124f85 1266 }
edaf8cb5 1267 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1268 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1269 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1270 buffer->cur++, result->type = CPP_DOT_STAR;
338fa5f7 1271 break;
0578f103 1272
338fa5f7 1273 case '+':
edaf8cb5 1274 result->type = CPP_PLUS;
1275 if (*buffer->cur == '+')
1276 buffer->cur++, result->type = CPP_PLUS_PLUS;
1277 else if (*buffer->cur == '=')
1278 buffer->cur++, result->type = CPP_PLUS_EQ;
338fa5f7 1279 break;
ac0749c7 1280
338fa5f7 1281 case '-':
edaf8cb5 1282 result->type = CPP_MINUS;
1283 if (*buffer->cur == '>')
338fa5f7 1284 {
edaf8cb5 1285 buffer->cur++;
1c124f85 1286 result->type = CPP_DEREF;
edaf8cb5 1287 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1288 buffer->cur++, result->type = CPP_DEREF_STAR;
1c124f85 1289 }
edaf8cb5 1290 else if (*buffer->cur == '-')
1291 buffer->cur++, result->type = CPP_MINUS_MINUS;
1292 else if (*buffer->cur == '=')
1293 buffer->cur++, result->type = CPP_MINUS_EQ;
338fa5f7 1294 break;
0578f103 1295
338fa5f7 1296 case '&':
edaf8cb5 1297 result->type = CPP_AND;
1298 if (*buffer->cur == '&')
1299 buffer->cur++, result->type = CPP_AND_AND;
1300 else if (*buffer->cur == '=')
1301 buffer->cur++, result->type = CPP_AND_EQ;
338fa5f7 1302 break;
b1a9ff83 1303
338fa5f7 1304 case '|':
edaf8cb5 1305 result->type = CPP_OR;
1306 if (*buffer->cur == '|')
1307 buffer->cur++, result->type = CPP_OR_OR;
1308 else if (*buffer->cur == '=')
1309 buffer->cur++, result->type = CPP_OR_EQ;
338fa5f7 1310 break;
0578f103 1311
338fa5f7 1312 case ':':
edaf8cb5 1313 result->type = CPP_COLON;
1314 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1315 buffer->cur++, result->type = CPP_SCOPE;
1316 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
338fa5f7 1317 {
edaf8cb5 1318 buffer->cur++;
338fa5f7 1319 result->flags |= DIGRAPH;
1c124f85 1320 result->type = CPP_CLOSE_SQUARE;
1321 }
338fa5f7 1322 break;
0578f103 1323
1c124f85 1324 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1325 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1326 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1327 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
941f2388 1328 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.arg_no = 0; break;
1c124f85 1329
a54e0bf8 1330 case '?': result->type = CPP_QUERY; break;
338fa5f7 1331 case '~': result->type = CPP_COMPL; break;
1332 case ',': result->type = CPP_COMMA; break;
1333 case '(': result->type = CPP_OPEN_PAREN; break;
1334 case ')': result->type = CPP_CLOSE_PAREN; break;
1335 case '[': result->type = CPP_OPEN_SQUARE; break;
1336 case ']': result->type = CPP_CLOSE_SQUARE; break;
1337 case '{': result->type = CPP_OPEN_BRACE; break;
1338 case '}': result->type = CPP_CLOSE_BRACE; break;
1339 case ';': result->type = CPP_SEMICOLON; break;
1340
7fd957fe 1341 /* @ is a punctuator in Objective-C. */
9ee99ac6 1342 case '@': result->type = CPP_ATSIGN; break;
338fa5f7 1343
78c551ad 1344 case '$':
2cbf1359 1345 case '\\':
1346 {
1347 const uchar *base = --buffer->cur;
bce47149 1348 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
78c551ad 1349
bce47149 1350 if (forms_identifier_p (pfile, true, &nst))
2cbf1359 1351 {
1352 result->type = CPP_NAME;
bce47149 1353 result->val.node = lex_identifier (pfile, base, true, &nst);
1354 warn_about_normalization (pfile, result, &nst);
2cbf1359 1355 break;
1356 }
1357 buffer->cur++;
bc205914 1358 }
2cbf1359 1359
bc205914 1360 default:
4970d4c2 1361 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1362 break;
338fa5f7 1363 }
fb5ab82c 1364
1365 return result;
338fa5f7 1366}
1367
b1280514 1368/* An upper bound on the number of bytes needed to spell TOKEN.
1369 Does not include preceding whitespace. */
79bd622b 1370unsigned int
f7fdd7a1 1371cpp_token_len (const cpp_token *token)
338fa5f7 1372{
79bd622b 1373 unsigned int len;
cfad5579 1374
79bd622b 1375 switch (TOKEN_SPELL (token))
f80e83a9 1376 {
cd740bd5 1377 default: len = 6; break;
4970d4c2 1378 case SPELL_LITERAL: len = token->val.str.len; break;
bb1fa6bb 1379 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
f80e83a9 1380 }
b1280514 1381
1382 return len;
cfad5579 1383}
1384
bb1fa6bb 1385/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1386 Return the number of bytes read out of NAME. (There are always
1387 10 bytes written to BUFFER.) */
1388
1389static size_t
1390utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1391{
1392 int j;
1393 int ucn_len = 0;
1394 int ucn_len_c;
1395 unsigned t;
1396 unsigned long utf32;
1397
1398 /* Compute the length of the UTF-8 sequence. */
1399 for (t = *name; t & 0x80; t <<= 1)
1400 ucn_len++;
1401
1402 utf32 = *name & (0x7F >> ucn_len);
1403 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1404 {
1405 utf32 = (utf32 << 6) | (*++name & 0x3F);
1406
1407 /* Ill-formed UTF-8. */
1408 if ((*name & ~0x3F) != 0x80)
1409 abort ();
1410 }
1411
1412 *buffer++ = '\\';
1413 *buffer++ = 'U';
1414 for (j = 7; j >= 0; j--)
1415 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1416 return ucn_len;
1417}
1418
ba99525e 1419/* Given a token TYPE corresponding to a digraph, return a pointer to
1420 the spelling of the digraph. */
1421static const unsigned char *
1422cpp_digraph2name (enum cpp_ttype type)
1423{
1424 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
1425}
bb1fa6bb 1426
f80e83a9 1427/* Write the spelling of a token TOKEN to BUFFER. The buffer must
c5ea33a8 1428 already contain the enough space to hold the token's spelling.
f7fdd7a1 1429 Returns a pointer to the character after the last character written.
bb1fa6bb 1430 FORSTRING is true if this is to be the spelling after translation
1431 phase 1 (this is different for UCNs).
f7fdd7a1 1432 FIXME: Would be nice if we didn't need the PFILE argument. */
79bd622b 1433unsigned char *
f7fdd7a1 1434cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
bb1fa6bb 1435 unsigned char *buffer, bool forstring)
f80e83a9 1436{
7e842f95 1437 switch (TOKEN_SPELL (token))
f80e83a9 1438 {
1439 case SPELL_OPERATOR:
1440 {
1441 const unsigned char *spelling;
1442 unsigned char c;
ab12a39c 1443
f80e83a9 1444 if (token->flags & DIGRAPH)
ba99525e 1445 spelling = cpp_digraph2name (token->type);
31674461 1446 else if (token->flags & NAMED_OP)
1447 goto spell_ident;
f80e83a9 1448 else
7e842f95 1449 spelling = TOKEN_NAME (token);
b1a9ff83 1450
f80e83a9 1451 while ((c = *spelling++) != '\0')
1452 *buffer++ = c;
1453 }
1454 break;
ab12a39c 1455
8d27e472 1456 spell_ident:
f80e83a9 1457 case SPELL_IDENT:
bb1fa6bb 1458 if (forstring)
1459 {
1460 memcpy (buffer, NODE_NAME (token->val.node),
1461 NODE_LEN (token->val.node));
1462 buffer += NODE_LEN (token->val.node);
1463 }
1464 else
1465 {
1466 size_t i;
1467 const unsigned char * name = NODE_NAME (token->val.node);
1468
1469 for (i = 0; i < NODE_LEN (token->val.node); i++)
1470 if (name[i] & ~0x7F)
1471 {
1472 i += utf8_to_ucn (buffer, name + i) - 1;
1473 buffer += 10;
1474 }
1475 else
1476 *buffer++ = NODE_NAME (token->val.node)[i];
1477 }
f80e83a9 1478 break;
ab12a39c 1479
4970d4c2 1480 case SPELL_LITERAL:
8d27e472 1481 memcpy (buffer, token->val.str.text, token->val.str.len);
1482 buffer += token->val.str.len;
1483 break;
1484
f80e83a9 1485 case SPELL_NONE:
d80d2074 1486 cpp_error (pfile, CPP_DL_ICE,
1487 "unspellable token %s", TOKEN_NAME (token));
f80e83a9 1488 break;
1489 }
ab12a39c 1490
f80e83a9 1491 return buffer;
1492}
ab12a39c 1493
e484a1cc 1494/* Returns TOKEN spelt as a null-terminated string. The string is
1495 freed when the reader is destroyed. Useful for diagnostics. */
79bd622b 1496unsigned char *
f7fdd7a1 1497cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
b1280514 1498{
1499 unsigned int len = cpp_token_len (token) + 1;
1fdf6039 1500 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
6060326b 1501
bb1fa6bb 1502 end = cpp_spell_token (pfile, token, start, false);
79bd622b 1503 end[0] = '\0';
6060326b 1504
79bd622b 1505 return start;
1506}
6060326b 1507
ba99525e 1508/* Returns a pointer to a string which spells the token defined by
1509 TYPE and FLAGS. Used by C front ends, which really should move to
1510 using cpp_token_as_text. */
79bd622b 1511const char *
ba99525e 1512cpp_type2name (enum cpp_ttype type, unsigned char flags)
79bd622b 1513{
ba99525e 1514 if (flags & DIGRAPH)
1515 return (const char *) cpp_digraph2name (type);
1516 else if (flags & NAMED_OP)
1517 return cpp_named_operator2name (type);
1518
79bd622b 1519 return (const char *) token_spellings[type].name;
1520}
6060326b 1521
f9b5f742 1522/* Writes the spelling of token to FP, without any preceding space.
1523 Separated from cpp_spell_token for efficiency - to avoid stdio
1524 double-buffering. */
79bd622b 1525void
f7fdd7a1 1526cpp_output_token (const cpp_token *token, FILE *fp)
79bd622b 1527{
79bd622b 1528 switch (TOKEN_SPELL (token))
6060326b 1529 {
79bd622b 1530 case SPELL_OPERATOR:
1531 {
1532 const unsigned char *spelling;
28874558 1533 int c;
6060326b 1534
79bd622b 1535 if (token->flags & DIGRAPH)
ba99525e 1536 spelling = cpp_digraph2name (token->type);
79bd622b 1537 else if (token->flags & NAMED_OP)
1538 goto spell_ident;
1539 else
1540 spelling = TOKEN_NAME (token);
f80e83a9 1541
28874558 1542 c = *spelling;
1543 do
1544 putc (c, fp);
1545 while ((c = *++spelling) != '\0');
79bd622b 1546 }
1547 break;
f80e83a9 1548
79bd622b 1549 spell_ident:
1550 case SPELL_IDENT:
bb1fa6bb 1551 {
1552 size_t i;
1553 const unsigned char * name = NODE_NAME (token->val.node);
1554
1555 for (i = 0; i < NODE_LEN (token->val.node); i++)
1556 if (name[i] & ~0x7F)
1557 {
1558 unsigned char buffer[10];
1559 i += utf8_to_ucn (buffer, name + i) - 1;
1560 fwrite (buffer, 1, 10, fp);
1561 }
1562 else
1563 fputc (NODE_NAME (token->val.node)[i], fp);
1564 }
1565 break;
f80e83a9 1566
4970d4c2 1567 case SPELL_LITERAL:
8d27e472 1568 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1569 break;
1570
79bd622b 1571 case SPELL_NONE:
1572 /* An error, most probably. */
1573 break;
f80e83a9 1574 }
6060326b 1575}
1576
79bd622b 1577/* Compare two tokens. */
1578int
f7fdd7a1 1579_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
6060326b 1580{
79bd622b 1581 if (a->type == b->type && a->flags == b->flags)
1582 switch (TOKEN_SPELL (a))
1583 {
1584 default: /* Keep compiler happy. */
1585 case SPELL_OPERATOR:
941f2388 1586 /* arg_no is used to track where multiple consecutive ##
1587 tokens were originally located. */
1588 return (a->type != CPP_PASTE || a->val.arg_no == b->val.arg_no);
79bd622b 1589 case SPELL_NONE:
588d632b 1590 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
79bd622b 1591 case SPELL_IDENT:
1592 return a->val.node == b->val.node;
4970d4c2 1593 case SPELL_LITERAL:
79bd622b 1594 return (a->val.str.len == b->val.str.len
1595 && !memcmp (a->val.str.text, b->val.str.text,
1596 a->val.str.len));
1597 }
6060326b 1598
f80e83a9 1599 return 0;
1600}
1601
79bd622b 1602/* Returns nonzero if a space should be inserted to avoid an
1603 accidental token paste for output. For simplicity, it is
1604 conservative, and occasionally advises a space where one is not
1605 needed, e.g. "." and ".2". */
79bd622b 1606int
f7fdd7a1 1607cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1608 const cpp_token *token2)
6060326b 1609{
79bd622b 1610 enum cpp_ttype a = token1->type, b = token2->type;
1611 cppchar_t c;
6060326b 1612
79bd622b 1613 if (token1->flags & NAMED_OP)
1614 a = CPP_NAME;
1615 if (token2->flags & NAMED_OP)
1616 b = CPP_NAME;
6060326b 1617
79bd622b 1618 c = EOF;
1619 if (token2->flags & DIGRAPH)
ee6c4e4b 1620 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
79bd622b 1621 else if (token_spellings[b].category == SPELL_OPERATOR)
1622 c = token_spellings[b].name[0];
6060326b 1623
79bd622b 1624 /* Quickly get everything that can paste with an '='. */
ee6c4e4b 1625 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
79bd622b 1626 return 1;
6060326b 1627
79bd622b 1628 switch (a)
6060326b 1629 {
e58c07f7 1630 case CPP_GREATER: return c == '>';
1631 case CPP_LESS: return c == '<' || c == '%' || c == ':';
79bd622b 1632 case CPP_PLUS: return c == '+';
1633 case CPP_MINUS: return c == '-' || c == '>';
1634 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1635 case CPP_MOD: return c == ':' || c == '>';
1636 case CPP_AND: return c == '&';
1637 case CPP_OR: return c == '|';
1638 case CPP_COLON: return c == ':' || c == '>';
1639 case CPP_DEREF: return c == '*';
efdcc728 1640 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
79bd622b 1641 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1642 case CPP_NAME: return ((b == CPP_NUMBER
1643 && name_p (pfile, &token2->val.str))
1644 || b == CPP_NAME
1645 || b == CPP_CHAR || b == CPP_STRING); /* L */
1646 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1647 || c == '.' || c == '+' || c == '-');
2cbf1359 1648 /* UCNs */
bc205914 1649 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1650 && b == CPP_NAME)
2cbf1359 1651 || (CPP_OPTION (pfile, objc)
bc205914 1652 && token1->val.str.text[0] == '@'
2cbf1359 1653 && (b == CPP_NAME || b == CPP_STRING)));
79bd622b 1654 default: break;
6060326b 1655 }
6060326b 1656
deb356cf 1657 return 0;
6060326b 1658}
1659
79bd622b 1660/* Output all the remaining tokens on the current line, and a newline
f9b5f742 1661 character, to FP. Leading whitespace is removed. If there are
1662 macros, special token padding is not performed. */
6060326b 1663void
f7fdd7a1 1664cpp_output_line (cpp_reader *pfile, FILE *fp)
6060326b 1665{
f9b5f742 1666 const cpp_token *token;
7e842f95 1667
f9b5f742 1668 token = cpp_get_token (pfile);
1669 while (token->type != CPP_EOF)
7e842f95 1670 {
f9b5f742 1671 cpp_output_token (token, fp);
1672 token = cpp_get_token (pfile);
1673 if (token->flags & PREV_WHITE)
1674 putc (' ', fp);
7e842f95 1675 }
1676
79bd622b 1677 putc ('\n', fp);
f80e83a9 1678}
6060326b 1679
c0770282 1680/* Return a string representation of all the remaining tokens on the
1681 current line. The result is allocated using xmalloc and must be
1682 freed by the caller. */
1683unsigned char *
1684cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
1685{
1686 const cpp_token *token;
1687 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
1688 unsigned int alloced = 120 + out;
1689 unsigned char *result = (unsigned char *) xmalloc (alloced);
1690
1691 /* If DIR_NAME is empty, there are no initial contents. */
1692 if (dir_name)
1693 {
1694 sprintf ((char *) result, "#%s ", dir_name);
1695 out += 2;
1696 }
1697
1698 token = cpp_get_token (pfile);
1699 while (token->type != CPP_EOF)
1700 {
1701 unsigned char *last;
1702 /* Include room for a possible space and the terminating nul. */
1703 unsigned int len = cpp_token_len (token) + 2;
1704
1705 if (out + len > alloced)
1706 {
1707 alloced *= 2;
1708 if (out + len > alloced)
1709 alloced = out + len;
1710 result = (unsigned char *) xrealloc (result, alloced);
1711 }
1712
1713 last = cpp_spell_token (pfile, token, &result[out], 0);
1714 out = last - result;
1715
1716 token = cpp_get_token (pfile);
1717 if (token->flags & PREV_WHITE)
1718 result[out++] = ' ';
1719 }
1720
1721 result[out] = '\0';
1722 return result;
1723}
1724
084163dc 1725/* Memory buffers. Changing these three constants can have a dramatic
1726 effect on performance. The values here are reasonable defaults,
1727 but might be tuned. If you adjust them, be sure to test across a
1728 range of uses of cpplib, including heavy nested function-like macro
1729 expansion. Also check the change in peak memory usage (NJAMD is a
1730 good tool for this). */
1731#define MIN_BUFF_SIZE 8000
1e0ef2fd 1732#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
084163dc 1733#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1734 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
deb356cf 1735
1e0ef2fd 1736#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1737 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1738#endif
1739
1785b647 1740/* Create a new allocation buffer. Place the control block at the end
1741 of the buffer, so that buffer overflows will cause immediate chaos. */
06c92cbc 1742static _cpp_buff *
f7fdd7a1 1743new_buff (size_t len)
06c92cbc 1744{
1745 _cpp_buff *result;
1fdf6039 1746 unsigned char *base;
06c92cbc 1747
084163dc 1748 if (len < MIN_BUFF_SIZE)
1749 len = MIN_BUFF_SIZE;
198b48a0 1750 len = CPP_ALIGN (len);
06c92cbc 1751
720aca92 1752 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
06c92cbc 1753 result = (_cpp_buff *) (base + len);
1754 result->base = base;
1755 result->cur = base;
1756 result->limit = base + len;
1757 result->next = NULL;
1758 return result;
1759}
1760
1761/* Place a chain of unwanted allocation buffers on the free list. */
1762void
f7fdd7a1 1763_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
06c92cbc 1764{
1765 _cpp_buff *end = buff;
1766
1767 while (end->next)
1768 end = end->next;
1769 end->next = pfile->free_buffs;
1770 pfile->free_buffs = buff;
1771}
1772
1773/* Return a free buffer of size at least MIN_SIZE. */
1774_cpp_buff *
f7fdd7a1 1775_cpp_get_buff (cpp_reader *pfile, size_t min_size)
06c92cbc 1776{
1777 _cpp_buff *result, **p;
1778
1779 for (p = &pfile->free_buffs;; p = &(*p)->next)
1780 {
4b31a107 1781 size_t size;
084163dc 1782
1783 if (*p == NULL)
06c92cbc 1784 return new_buff (min_size);
084163dc 1785 result = *p;
1786 size = result->limit - result->base;
1787 /* Return a buffer that's big enough, but don't waste one that's
1788 way too big. */
4085c149 1789 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
06c92cbc 1790 break;
1791 }
1792
1793 *p = result->next;
1794 result->next = NULL;
1795 result->cur = result->base;
1796 return result;
1797}
1798
20dd417a 1799/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 1800 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1801 the excess bytes to the new buffer. Chains the new buffer after
1802 BUFF, and returns the new buffer. */
06c92cbc 1803_cpp_buff *
f7fdd7a1 1804_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
06c92cbc 1805{
4b31a107 1806 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
e6a5f963 1807 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
06c92cbc 1808
e6a5f963 1809 buff->next = new_buff;
1810 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1811 return new_buff;
1812}
1813
20dd417a 1814/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 1815 remaining bytes of the buffer pointed to by BUFF, and at least
1816 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1817 Chains the new buffer before the buffer pointed to by BUFF, and
1818 updates the pointer to point to the new buffer. */
1819void
f7fdd7a1 1820_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
e6a5f963 1821{
1822 _cpp_buff *new_buff, *old_buff = *pbuff;
1823 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1824
1825 new_buff = _cpp_get_buff (pfile, size);
1826 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1827 new_buff->next = old_buff;
1828 *pbuff = new_buff;
06c92cbc 1829}
1830
1831/* Free a chain of buffers starting at BUFF. */
1832void
f82b06e0 1833_cpp_free_buff (_cpp_buff *buff)
06c92cbc 1834{
1835 _cpp_buff *next;
1836
1837 for (; buff; buff = next)
1838 {
1839 next = buff->next;
1840 free (buff->base);
1841 }
1842}
deb356cf 1843
1fdf6039 1844/* Allocate permanent, unaligned storage of length LEN. */
1845unsigned char *
f7fdd7a1 1846_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1fdf6039 1847{
1848 _cpp_buff *buff = pfile->u_buff;
1849 unsigned char *result = buff->cur;
1850
1851 if (len > (size_t) (buff->limit - result))
1852 {
1853 buff = _cpp_get_buff (pfile, len);
1854 buff->next = pfile->u_buff;
1855 pfile->u_buff = buff;
1856 result = buff->cur;
1857 }
1858
1859 buff->cur = result + len;
1860 return result;
1861}
1862
1e0ef2fd 1863/* Allocate permanent, unaligned storage of length LEN from a_buff.
1864 That buffer is used for growing allocations when saving macro
1865 replacement lists in a #define, and when parsing an answer to an
1866 assertion in #assert, #unassert or #if (and therefore possibly
1867 whilst expanding macros). It therefore must not be used by any
1868 code that they might call: specifically the lexer and the guts of
1869 the macro expander.
1870
1871 All existing other uses clearly fit this restriction: storing
1872 registered pragmas during initialization. */
79bd622b 1873unsigned char *
f7fdd7a1 1874_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
89b05ef6 1875{
e6a5f963 1876 _cpp_buff *buff = pfile->a_buff;
1877 unsigned char *result = buff->cur;
89b05ef6 1878
e6a5f963 1879 if (len > (size_t) (buff->limit - result))
89b05ef6 1880 {
e6a5f963 1881 buff = _cpp_get_buff (pfile, len);
1882 buff->next = pfile->a_buff;
1883 pfile->a_buff = buff;
1884 result = buff->cur;
89b05ef6 1885 }
f80e83a9 1886
e6a5f963 1887 buff->cur = result + len;
79bd622b 1888 return result;
f80e83a9 1889}
c39ed964 1890
1891/* Say which field of TOK is in use. */
1892
1893enum cpp_token_fld_kind
1894cpp_token_val_index (cpp_token *tok)
1895{
1896 switch (TOKEN_SPELL (tok))
1897 {
1898 case SPELL_IDENT:
1899 return CPP_TOKEN_FLD_NODE;
1900 case SPELL_LITERAL:
1901 return CPP_TOKEN_FLD_STR;
941f2388 1902 case SPELL_OPERATOR:
1903 if (tok->type == CPP_PASTE)
1904 return CPP_TOKEN_FLD_ARG_NO;
1905 else
1906 return CPP_TOKEN_FLD_NONE;
c39ed964 1907 case SPELL_NONE:
1908 if (tok->type == CPP_MACRO_ARG)
1909 return CPP_TOKEN_FLD_ARG_NO;
1910 else if (tok->type == CPP_PADDING)
1911 return CPP_TOKEN_FLD_SOURCE;
d6d3c909 1912 else if (tok->type == CPP_PRAGMA)
b75b98aa 1913 return CPP_TOKEN_FLD_PRAGMA;
c39ed964 1914 /* else fall through */
1915 default:
1916 return CPP_TOKEN_FLD_NONE;
1917 }
1918}