]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
* Make-lang.in (JAVA_SRCS): Include java-tree.h.
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
0578f103 1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
6060326b 7 Single-pass line tokenization by Neil Booth, April 2000
0578f103 8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
f80e83a9 23/*
24
25Cleanups to do:-
26
f80e83a9 27o Check line numbers assigned to all errors.
f80e83a9 28o Distinguish integers, floats, and 'other' pp-numbers.
29o Store ints and char constants as binary values.
30o New command-line assertion syntax.
f80e83a9 31o Work towards functions in cpperror.c taking a message level parameter.
32 If we do this, merge the common code of do_warning and do_error.
33o Comment all functions, and describe macro expansion algorithm.
34o Move as much out of header files as possible.
35o Remove single quote pairs `', and some '', from diagnostics.
36o Correct pastability test for CPP_NAME and CPP_NUMBER.
37
38*/
39
0578f103 40#include "config.h"
41#include "system.h"
42#include "intl.h"
43#include "cpplib.h"
44#include "cpphash.h"
f80e83a9 45#include "symcat.h"
0578f103 46
338fa5f7 47static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER,
48 0 UNION_INIT_ZERO};
241e762e 49static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
50
51/* Flags for cpp_context. */
52#define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
53#define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
54#define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
55#define CONTEXT_ARG (1 << 3) /* If an argument context. */
56
57typedef struct cpp_context cpp_context;
58struct cpp_context
59{
60 union
61 {
62 const cpp_toklist *list; /* Used for macro contexts only. */
63 const cpp_token **arg; /* Used for arg contexts only. */
64 } u;
65
66 /* Pushed token to be returned by next call to get_raw_token. */
67 const cpp_token *pushed_token;
68
fdf3a98f 69 struct macro_args *args; /* The arguments for a function-like
70 macro. NULL otherwise. */
241e762e 71 unsigned short posn; /* Current posn, index into u. */
72 unsigned short count; /* No. of tokens in u. */
73 unsigned short level;
74 unsigned char flags;
75};
76
77typedef struct macro_args macro_args;
78struct macro_args
79{
80 unsigned int *ends;
81 const cpp_token **tokens;
82 unsigned int capacity;
83 unsigned int used;
84 unsigned short level;
85};
86
87static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
88static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
89 macro_args *, unsigned int *));
90static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
91static void save_token PARAMS ((macro_args *, const cpp_token *));
92static int pop_context PARAMS ((cpp_reader *));
93static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
94static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
95static void free_macro_args PARAMS ((macro_args *));
6cae2504 96static void dump_param_spelling PARAMS ((FILE *, const cpp_toklist *,
50fd6b48 97 unsigned int));
f80e83a9 98static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
2c63d6c8 99 unsigned int));
e2f9a79f 100
338fa5f7 101static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
102static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
103static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
104
f80e83a9 105static int skip_block_comment PARAMS ((cpp_reader *));
338fa5f7 106static int skip_line_comment PARAMS ((cpp_buffer *));
107static void adjust_column PARAMS ((cpp_reader *));
108static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
109static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
110static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t));
111static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
112static void unterminated PARAMS ((cpp_reader *, unsigned int, int));
113static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
114static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
f80e83a9 115static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
338fa5f7 116static void check_long_token PARAMS ((cpp_buffer *,
117 cpp_token *,
118 cppchar_t,
119 enum cpp_ttype));
120static void lex_token PARAMS ((cpp_reader *, cpp_token *));
f80e83a9 121static int lex_next PARAMS ((cpp_reader *, int));
338fa5f7 122
123static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
f80e83a9 124static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
125 const cpp_token *));
6bc03ce3 126
f80e83a9 127static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
128static void expand_context_stack PARAMS ((cpp_reader *));
d2efb5ed 129static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
f80e83a9 130 unsigned char *));
6cae2504 131static void output_token PARAMS ((cpp_reader *, FILE *, const cpp_token *,
132 const cpp_token *, int));
6bc03ce3 133typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
134 cpp_token *));
f80e83a9 135static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
136 unsigned int));
137static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
138static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
139 const cpp_token *));
140static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
141static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
142 const cpp_token *));
143static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
144 const cpp_token *, int *));
145static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
146static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
147static cpp_token *get_temp_token PARAMS ((cpp_reader *));
148static void release_temp_tokens PARAMS ((cpp_reader *));
149static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
150static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
6bc03ce3 151
f80e83a9 152#define VALID_SIGN(c, prevc) \
153 (((c) == '+' || (c) == '-') && \
154 ((prevc) == 'e' || (prevc) == 'E' \
155 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
156
e916a356 157/* An upper bound on the number of bytes needed to spell a token,
158 including preceding whitespace. */
6cae2504 159static inline size_t TOKEN_LEN PARAMS ((const cpp_token *));
160static inline size_t
161TOKEN_LEN (token)
162 const cpp_token *token;
163{
164 size_t len;
165
166 switch (TOKEN_SPELL (token))
167 {
168 default: len = 0; break;
169 case SPELL_STRING: len = token->val.str.len; break;
170 case SPELL_IDENT: len = token->val.node->length; break;
171 }
172 return len + 5;
173}
e916a356 174
241e762e 175#define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
176#define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
e0a859f1 177#define ON_REST_ARG(c) \
58fe658a 178 (((c)->u.list->flags & VAR_ARGS) \
179 && (c)->u.list->tokens[(c)->posn - 1].val.aux \
180 == (unsigned int) ((c)->u.list->paramc - 1))
241e762e 181
182#define ASSIGN_FLAGS_AND_POS(d, s) \
183 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
184 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
185 } while (0)
186
187/* f is flags, just consisting of PREV_WHITE | BOL. */
188#define MODIFY_FLAGS_AND_POS(d, s, f) \
189 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
190 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
191 } while (0)
192
7e842f95 193#define OP(e, s) { SPELL_OPERATOR, U s },
194#define TK(e, s) { s, U STRINGX (e) },
6bc03ce3 195
f80e83a9 196const struct token_spelling
7e842f95 197_cpp_token_spellings [N_TTYPES] = {TTYPE_TABLE };
f80e83a9 198
7e842f95 199#undef OP
200#undef TK
6bc03ce3 201
e2f9a79f 202/* Notify the compiler proper that the current line number has jumped,
203 or the current file name has changed. */
204
205static void
50fd6b48 206output_line_command (pfile, print, line)
0578f103 207 cpp_reader *pfile;
e2f9a79f 208 cpp_printer *print;
50fd6b48 209 unsigned int line;
0578f103 210{
f80e83a9 211 cpp_buffer *ip = CPP_BUFFER (pfile);
e2f9a79f 212
f80e83a9 213 if (line == 0)
214 return;
215
216 /* End the previous line of text. */
217 if (pfile->need_newline)
6cae2504 218 {
219 putc ('\n', print->outf);
220 print->lineno++;
221 }
f80e83a9 222 pfile->need_newline = 0;
223
e2f9a79f 224 if (CPP_OPTION (pfile, no_line_commands))
225 return;
226
e2f9a79f 227 /* If the current file has not changed, we can output a few newlines
228 instead if we want to increase the line number by a small amount.
229 We cannot do this if print->lineno is zero, because that means we
230 haven't output any line commands yet. (The very first line
6cae2504 231 command output is a `same_file' command.)
232
233 'nominal_fname' values are unique, so they can be compared by
234 comparing pointers. */
235 if (ip->nominal_fname == print->last_fname && print->lineno > 0
e2f9a79f 236 && line >= print->lineno && line < print->lineno + 8)
0578f103 237 {
e2f9a79f 238 while (line > print->lineno)
0578f103 239 {
e2f9a79f 240 putc ('\n', print->outf);
241 print->lineno++;
0578f103 242 }
e2f9a79f 243 return;
0578f103 244 }
e2f9a79f 245
6cae2504 246 fprintf (print->outf, "# %u \"%s\"%s\n", line, ip->nominal_fname,
247 cpp_syshdr_flags (pfile, ip));
248
249 print->last_fname = ip->nominal_fname;
e2f9a79f 250 print->lineno = line;
251}
252
6cae2504 253/* Like fprintf, but writes to a printer object. You should be sure
254 always to generate a complete line when you use this function. */
e2f9a79f 255void
6cae2504 256cpp_printf VPARAMS ((cpp_reader *pfile, cpp_printer *print,
257 const char *fmt, ...))
258{
259 va_list ap;
260#ifndef ANSI_PROTOTYPES
261 cpp_reader *pfile;
262 cpp_printer *print;
263 const char *fmt;
264#endif
0578f103 265
6cae2504 266 VA_START (ap, fmt);
267
268#ifndef ANSI_PROTOTYPES
269 pfile = va_arg (ap, cpp_reader *);
270 print = va_arg (ap, cpp_printer *);
271 fmt = va_arg (ap, const char *);
272#endif
273
274 /* End the previous line of text. */
275 if (pfile->need_newline)
bad6bf53 276 {
277 putc ('\n', print->outf);
278 print->lineno++;
279 }
6cae2504 280 pfile->need_newline = 0;
281
282 vfprintf (print->outf, fmt, ap);
283 va_end (ap);
0578f103 284}
285
d1678bc6 286/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
e2f9a79f 287
288void
289cpp_scan_buffer_nooutput (pfile)
290 cpp_reader *pfile;
291{
f80e83a9 292 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
dfa5c7fa 293 const cpp_token *token;
f80e83a9 294
dfa5c7fa 295 /* In no-output mode, we can ignore everything but directives. */
e2f9a79f 296 for (;;)
297 {
dfa5c7fa 298 token = _cpp_get_token (pfile);
299
f80e83a9 300 if (token->type == CPP_EOF)
301 {
302 cpp_pop_buffer (pfile);
303 if (CPP_BUFFER (pfile) == stop)
304 break;
305 }
dfa5c7fa 306
307 if (token->type == CPP_HASH && token->flags & BOL
308 && pfile->token_list.directive)
309 {
310 process_directive (pfile, token);
311 continue;
312 }
313
f80e83a9 314 _cpp_skip_rest_of_line (pfile);
e2f9a79f 315 }
e2f9a79f 316}
317
d1678bc6 318/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
e2f9a79f 319void
320cpp_scan_buffer (pfile, print)
321 cpp_reader *pfile;
322 cpp_printer *print;
323{
d1678bc6 324 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
f80e83a9 325 const cpp_token *token, *prev = 0;
e2f9a79f 326
327 for (;;)
328 {
dfa5c7fa 329 token = _cpp_get_token (pfile);
f80e83a9 330 if (token->type == CPP_EOF)
e2f9a79f 331 {
f80e83a9 332 cpp_pop_buffer (pfile);
e9d42dd4 333
f80e83a9 334 if (CPP_BUFFER (pfile) == stop)
e2f9a79f 335 return;
dfa5c7fa 336
f80e83a9 337 prev = 0;
338 continue;
339 }
340
341 if (token->flags & BOL)
342 {
338fa5f7 343 output_line_command (pfile, print, token->line);
bad6bf53 344 prev = 0;
345
dfa5c7fa 346 if (token->type == CPP_HASH && pfile->token_list.directive)
347 {
348 process_directive (pfile, token);
349 continue;
350 }
e2f9a79f 351 }
f80e83a9 352
dfa5c7fa 353 if (token->type != CPP_PLACEMARKER)
241e762e 354 {
6cae2504 355 output_token (pfile, print->outf, token, prev, 1);
356 pfile->need_newline = 1;
241e762e 357 }
358
241e762e 359 prev = token;
360 }
241e762e 361}
362
f80e83a9 363/* Helper routine used by parse_include, which can't see spell_token.
364 Reinterpret the current line as an h-char-sequence (< ... >); we are
365 looking at the first token after the <. */
366const cpp_token *
367_cpp_glue_header_name (pfile)
0578f103 368 cpp_reader *pfile;
369{
f80e83a9 370 const cpp_token *t;
371 cpp_token *hdr;
6cae2504 372 U_CHAR *buf, *p;
373 size_t len, avail;
374
375 avail = 40;
376 len = 0;
377 buf = xmalloc (avail);
f80e83a9 378
379 for (;;)
380 {
deb356cf 381 t = _cpp_get_token (pfile);
f80e83a9 382 if (t->type == CPP_GREATER || t->type == CPP_EOF)
383 break;
384
6cae2504 385 if (len + TOKEN_LEN (t) > avail)
386 {
387 avail = len + TOKEN_LEN (t) + 40;
388 buf = xrealloc (buf, avail);
389 }
390
f80e83a9 391 if (t->flags & PREV_WHITE)
6cae2504 392 buf[len++] = ' ';
393
394 p = spell_token (pfile, t, buf + len);
395 len = (size_t) (p - buf); /* p known >= buf */
f80e83a9 396 }
397
398 if (t->type == CPP_EOF)
399 cpp_error (pfile, "missing terminating > character");
0578f103 400
6cae2504 401 buf = xrealloc (buf, len);
f80e83a9 402
403 hdr = get_temp_token (pfile);
404 hdr->type = CPP_HEADER_NAME;
405 hdr->flags = 0;
76faa4c0 406 hdr->val.str.text = buf;
407 hdr->val.str.len = len;
f80e83a9 408 return hdr;
0578f103 409}
410
50fd6b48 411/* Token-buffer helper functions. */
412
d2efb5ed 413/* Expand a token list's string space. It is *vital* that
414 list->tokens_used is correct, to get pointer fix-up right. */
f80e83a9 415void
416_cpp_expand_name_space (list, len)
50fd6b48 417 cpp_toklist *list;
6060326b 418 unsigned int len;
419{
e916a356 420 const U_CHAR *old_namebuf;
e916a356 421
422 old_namebuf = list->namebuf;
6060326b 423 list->name_cap += len;
424 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
e916a356 425
426 /* Fix up token text pointers. */
6dafd80a 427 if (list->namebuf != old_namebuf)
e916a356 428 {
429 unsigned int i;
430
431 for (i = 0; i < list->tokens_used; i++)
7e842f95 432 if (TOKEN_SPELL (&list->tokens[i]) == SPELL_STRING)
76faa4c0 433 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
e916a356 434 }
50fd6b48 435}
436
f80e83a9 437/* If there is not enough room for LEN more characters, expand the
438 list by just enough to have room for LEN characters. */
439void
440_cpp_reserve_name_space (list, len)
441 cpp_toklist *list;
442 unsigned int len;
443{
444 unsigned int room = list->name_cap - list->name_used;
445
446 if (room < len)
447 _cpp_expand_name_space (list, len - room);
448}
449
50fd6b48 450/* Expand the number of tokens in a list. */
d2efb5ed 451void
452_cpp_expand_token_space (list, count)
50fd6b48 453 cpp_toklist *list;
d2efb5ed 454 unsigned int count;
50fd6b48 455{
d2efb5ed 456 unsigned int n;
457
458 list->tokens_cap += count;
459 n = list->tokens_cap;
bce8e0c0 460 if (list->flags & LIST_OFFSET)
d2efb5ed 461 list->tokens--, n++;
50fd6b48 462 list->tokens = (cpp_token *)
d2efb5ed 463 xrealloc (list->tokens, n * sizeof (cpp_token));
bce8e0c0 464 if (list->flags & LIST_OFFSET)
465 list->tokens++; /* Skip the dummy. */
50fd6b48 466}
467
d2efb5ed 468/* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
469 an extra token in front of the token list, as this allows the lexer
470 to always peek at the previous token without worrying about
471 underflowing the list, and some initial space. Otherwise, no
472 token- or name-space is allocated, and there is no dummy token. */
bce8e0c0 473void
d2efb5ed 474_cpp_init_toklist (list, flags)
50fd6b48 475 cpp_toklist *list;
d2efb5ed 476 int flags;
50fd6b48 477{
d2efb5ed 478 if (flags == NO_DUMMY_TOKEN)
479 {
480 list->tokens_cap = 0;
f80e83a9 481 list->tokens = 0;
d2efb5ed 482 list->name_cap = 0;
f80e83a9 483 list->namebuf = 0;
d2efb5ed 484 list->flags = 0;
485 }
486 else
487 {
488 /* Initialize token space. Put a dummy token before the start
489 that will fail matches. */
490 list->tokens_cap = 256; /* 4K's worth. */
491 list->tokens = (cpp_token *)
492 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
493 list->tokens[0].type = CPP_EOF;
494 list->tokens++;
495
496 /* Initialize name space. */
497 list->name_cap = 1024;
f80e83a9 498 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
d2efb5ed 499 list->flags = LIST_OFFSET;
500 }
bce8e0c0 501
bce8e0c0 502 _cpp_clear_toklist (list);
503}
50fd6b48 504
bce8e0c0 505/* Clear a token list. */
506void
507_cpp_clear_toklist (list)
508 cpp_toklist *list;
509{
6060326b 510 list->tokens_used = 0;
511 list->name_used = 0;
f80e83a9 512 list->directive = 0;
513 list->paramc = 0;
514 list->params_len = 0;
bce8e0c0 515 list->flags &= LIST_OFFSET; /* clear all but that one */
516}
517
518/* Free a token list. Does not free the list itself, which may be
519 embedded in a larger structure. */
520void
521_cpp_free_toklist (list)
f80e83a9 522 const cpp_toklist *list;
bce8e0c0 523{
bce8e0c0 524 if (list->flags & LIST_OFFSET)
525 free (list->tokens - 1); /* Backup over dummy token. */
526 else
527 free (list->tokens);
528 free (list->namebuf);
50fd6b48 529}
530
bce8e0c0 531/* Compare two tokens. */
532int
533_cpp_equiv_tokens (a, b)
534 const cpp_token *a, *b;
535{
f80e83a9 536 if (a->type == b->type && a->flags == b->flags)
7e842f95 537 switch (TOKEN_SPELL (a))
f80e83a9 538 {
539 default: /* Keep compiler happy. */
540 case SPELL_OPERATOR:
541 return 1;
542 case SPELL_CHAR:
543 case SPELL_NONE:
544 return a->val.aux == b->val.aux; /* arg_no or character. */
545 case SPELL_IDENT:
76faa4c0 546 return a->val.node == b->val.node;
f80e83a9 547 case SPELL_STRING:
76faa4c0 548 return (a->val.str.len == b->val.str.len
549 && !memcmp (a->val.str.text, b->val.str.text,
550 a->val.str.len));
f80e83a9 551 }
bce8e0c0 552
f80e83a9 553 return 0;
bce8e0c0 554}
555
556/* Compare two token lists. */
557int
558_cpp_equiv_toklists (a, b)
559 const cpp_toklist *a, *b;
560{
561 unsigned int i;
562
f80e83a9 563 if (a->tokens_used != b->tokens_used
564 || a->flags != b->flags
565 || a->paramc != b->paramc)
bce8e0c0 566 return 0;
567
568 for (i = 0; i < a->tokens_used; i++)
569 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
570 return 0;
571 return 1;
572}
573
f80e83a9 574/* Utility routine:
2c63d6c8 575
76faa4c0 576 Compares, the token TOKEN to the NUL-terminated string STRING.
577 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
bce8e0c0 578
f80e83a9 579int
76faa4c0 580cpp_ideq (token, string)
581 const cpp_token *token;
f80e83a9 582 const char *string;
583{
76faa4c0 584 if (token->type != CPP_NAME)
f80e83a9 585 return 0;
76faa4c0 586
587 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
bce8e0c0 588}
50fd6b48 589
f80e83a9 590static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
591 U":>", U"<%", U"%>"};
0578f103 592
338fa5f7 593/* Call when meeting a newline. Returns the character after the newline
594 (or carriage-return newline combination), or EOF. */
595static cppchar_t
596handle_newline (buffer, newline_char)
597 cpp_buffer *buffer;
598 cppchar_t newline_char;
599{
600 cppchar_t next = EOF;
601
602 buffer->col_adjust = 0;
603 buffer->lineno++;
604 buffer->line_base = buffer->cur;
605
606 /* Handle CR-LF and LF-CR combinations, get the next character. */
607 if (buffer->cur < buffer->rlimit)
608 {
609 next = *buffer->cur++;
610 if (next + newline_char == '\r' + '\n')
611 {
612 buffer->line_base = buffer->cur;
613 if (buffer->cur < buffer->rlimit)
614 next = *buffer->cur++;
615 else
616 next = EOF;
617 }
618 }
619
620 buffer->read_ahead = next;
621 return next;
622}
623
624/* Subroutine of skip_escaped_newlines; called when a trigraph is
625 encountered. It warns if necessary, and returns true if the
626 trigraph should be honoured. FROM_CHAR is the third character of a
627 trigraph, and presumed to be the previous character for position
628 reporting. */
0578f103 629static int
338fa5f7 630trigraph_ok (pfile, from_char)
0578f103 631 cpp_reader *pfile;
338fa5f7 632 cppchar_t from_char;
0578f103 633{
f80e83a9 634 int accept = CPP_OPTION (pfile, trigraphs);
635
636 if (CPP_OPTION (pfile, warn_trigraphs))
0578f103 637 {
338fa5f7 638 cpp_buffer *buffer = pfile->buffer;
f80e83a9 639 if (accept)
338fa5f7 640 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
f80e83a9 641 "trigraph ??%c converted to %c",
338fa5f7 642 (int) from_char,
643 (int) _cpp_trigraph_map[from_char]);
0578f103 644 else
338fa5f7 645 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
646 "trigraph ??%c ignored", (int) from_char);
0578f103 647 }
338fa5f7 648
f80e83a9 649 return accept;
0578f103 650}
651
338fa5f7 652/* Assumes local variables buffer and result. */
653#define ACCEPT_CHAR(t) \
654 do { result->type = t; buffer->read_ahead = EOF; } while (0)
655
656/* When we move to multibyte character sets, add to these something
657 that saves and restores the state of the multibyte conversion
658 library. This probably involves saving and restoring a "cookie".
659 In the case of glibc it is an 8-byte structure, so is not a high
660 overhead operation. In any case, it's out of the fast path. */
661#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
662#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
663
664/* Skips any escaped newlines introduced by NEXT, which is either a
665 '?' or a '\\'. Returns the next character, which will also have
666 been placed in buffer->read_ahead. */
667static cppchar_t
668skip_escaped_newlines (buffer, next)
669 cpp_buffer *buffer;
670 cppchar_t next;
0578f103 671{
338fa5f7 672 cppchar_t next1;
673 const unsigned char *saved_cur;
674 int space;
f80e83a9 675
338fa5f7 676 do
f80e83a9 677 {
338fa5f7 678 if (buffer->cur == buffer->rlimit)
679 break;
680
681 SAVE_STATE ();
682 if (next == '?')
683 {
684 next1 = *buffer->cur++;
685 if (next1 != '?' || buffer->cur == buffer->rlimit)
686 {
687 RESTORE_STATE ();
688 break;
689 }
f80e83a9 690
338fa5f7 691 next1 = *buffer->cur++;
692 if (!_cpp_trigraph_map[next1] || !trigraph_ok (buffer->pfile, next1))
693 {
694 RESTORE_STATE ();
695 break;
696 }
0578f103 697
338fa5f7 698 /* We have a full trigraph here. */
699 next = _cpp_trigraph_map[next1];
700 if (next != '\\' || buffer->cur == buffer->rlimit)
701 break;
702 SAVE_STATE ();
703 }
704
705 /* We have a backslash, and room for at least one more character. */
706 space = 0;
707 do
708 {
709 next1 = *buffer->cur++;
710 if (!is_nvspace (next1))
711 break;
712 space = 1;
713 }
714 while (buffer->cur < buffer->rlimit);
715
716 if (!is_vspace (next1))
717 {
718 RESTORE_STATE ();
719 break;
720 }
0578f103 721
338fa5f7 722 if (space)
723 cpp_warning (buffer->pfile,
724 "backslash and newline separated by space");
725
726 next = handle_newline (buffer, next1);
727 if (next == EOF)
728 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
f80e83a9 729 }
338fa5f7 730 while (next == '\\' || next == '?');
0578f103 731
338fa5f7 732 buffer->read_ahead = next;
733 return next;
0578f103 734}
735
338fa5f7 736/* Obtain the next character, after trigraph conversion and skipping
737 an arbitrary string of escaped newlines. The common case of no
738 trigraphs or escaped newlines falls through quickly. */
739static cppchar_t
740get_effective_char (buffer)
741 cpp_buffer *buffer;
852d1b04 742{
338fa5f7 743 cppchar_t next = EOF;
744
745 if (buffer->cur < buffer->rlimit)
746 {
747 next = *buffer->cur++;
748
749 /* '?' can introduce trigraphs (and therefore backslash); '\\'
750 can introduce escaped newlines, which we want to skip, or
751 UCNs, which, depending upon lexer state, we will handle in
752 the future. */
753 if (next == '?' || next == '\\')
754 next = skip_escaped_newlines (buffer, next);
755 }
756
757 buffer->read_ahead = next;
758 return next;
852d1b04 759}
760
338fa5f7 761/* Skip a C-style block comment. We find the end of the comment by
762 seeing if an asterisk is before every '/' we encounter. Returns
763 non-zero if comment terminated by EOF, zero otherwise. */
f80e83a9 764static int
765skip_block_comment (pfile)
0578f103 766 cpp_reader *pfile;
767{
f80e83a9 768 cpp_buffer *buffer = pfile->buffer;
338fa5f7 769 cppchar_t c = EOF, prevc;
770
771 while (buffer->cur != buffer->rlimit)
0578f103 772 {
338fa5f7 773 prevc = c, c = *buffer->cur++;
774
775 next_char:
776 /* FIXME: For speed, create a new character class of characters
777 of no interest inside block comments. */
778 if (c == '?' || c == '\\')
779 c = skip_escaped_newlines (buffer, c);
f80e83a9 780
338fa5f7 781 /* People like decorating comments with '*', so check for '/'
782 instead for efficiency. */
f80e83a9 783 if (c == '/')
0578f103 784 {
338fa5f7 785 if (prevc == '*')
786 break;
f80e83a9 787
338fa5f7 788 /* Warn about potential nested comments, but not if the '/'
789 comes immediately before the true comment delimeter.
f80e83a9 790 Don't bother to get it right across escaped newlines. */
338fa5f7 791 if (CPP_OPTION (pfile, warn_comments)
792 && buffer->cur != buffer->rlimit)
0578f103 793 {
338fa5f7 794 prevc = c, c = *buffer->cur++;
795 if (c == '*' && buffer->cur != buffer->rlimit)
796 {
797 prevc = c, c = *buffer->cur++;
798 if (c != '/')
799 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
800 CPP_BUF_COL (buffer),
801 "\"/*\" within comment");
802 }
803 goto next_char;
0578f103 804 }
0578f103 805 }
78719282 806 else if (is_vspace (c))
0578f103 807 {
338fa5f7 808 prevc = c, c = handle_newline (buffer, c);
809 goto next_char;
0578f103 810 }
b86584f6 811 else if (c == '\t')
338fa5f7 812 adjust_column (pfile);
0578f103 813 }
f80e83a9 814
338fa5f7 815 buffer->read_ahead = EOF;
816 return c != '/' || prevc != '*';
0578f103 817}
818
241e762e 819/* Skip a C++ line comment. Handles escaped newlines. Returns
338fa5f7 820 non-zero if a multiline comment. The following new line, if any,
821 is left in buffer->read_ahead. */
f80e83a9 822static int
338fa5f7 823skip_line_comment (buffer)
824 cpp_buffer *buffer;
0578f103 825{
338fa5f7 826 unsigned int orig_lineno = buffer->lineno;
827 cppchar_t c;
f80e83a9 828
338fa5f7 829 do
f80e83a9 830 {
338fa5f7 831 c = EOF;
832 if (buffer->cur == buffer->rlimit)
833 break;
f80e83a9 834
338fa5f7 835 c = *buffer->cur++;
836 if (c == '?' || c == '\\')
837 c = skip_escaped_newlines (buffer, c);
f80e83a9 838 }
338fa5f7 839 while (!is_vspace (c));
0578f103 840
338fa5f7 841 buffer->read_ahead = c; /* Leave any newline for caller. */
842 return orig_lineno != buffer->lineno;
f80e83a9 843}
0578f103 844
338fa5f7 845/* pfile->buffer->cur is one beyond the \t character. Update
846 col_adjust so we track the column correctly. */
b86584f6 847static void
338fa5f7 848adjust_column (pfile)
b86584f6 849 cpp_reader *pfile;
b86584f6 850{
338fa5f7 851 cpp_buffer *buffer = pfile->buffer;
852 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
b86584f6 853
854 /* Round it up to multiple of the tabstop, but subtract 1 since the
855 tab itself occupies a character position. */
338fa5f7 856 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
857 - col % CPP_OPTION (pfile, tabstop)) - 1;
b86584f6 858}
859
338fa5f7 860/* Skips whitespace, saving the next non-whitespace character.
861 Adjusts pfile->col_adjust to account for tabs. Without this,
862 tokens might be assigned an incorrect column. */
f80e83a9 863static void
338fa5f7 864skip_whitespace (pfile, c)
f80e83a9 865 cpp_reader *pfile;
338fa5f7 866 cppchar_t c;
f80e83a9 867{
868 cpp_buffer *buffer = pfile->buffer;
338fa5f7 869 unsigned int warned = 0;
0578f103 870
338fa5f7 871 do
f80e83a9 872 {
78719282 873 /* Horizontal space always OK. */
874 if (c == ' ')
338fa5f7 875 ;
78719282 876 else if (c == '\t')
338fa5f7 877 adjust_column (pfile);
878 /* Just \f \v or \0 left. */
78719282 879 else if (c == '\0')
f80e83a9 880 {
78719282 881 if (!warned)
338fa5f7 882 {
883 cpp_warning (pfile, "null character(s) ignored");
884 warned = 1;
885 }
0578f103 886 }
338fa5f7 887 else if (IN_DIRECTIVE (pfile) && CPP_PEDANTIC (pfile))
78719282 888 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
889 CPP_BUF_COL (buffer),
890 "%s in preprocessing directive",
891 c == '\f' ? "form feed" : "vertical tab");
338fa5f7 892
893 c = EOF;
894 if (buffer->cur == buffer->rlimit)
895 break;
896 c = *buffer->cur++;
0578f103 897 }
338fa5f7 898 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
899 while (is_nvspace (c));
900
901 /* Remember the next character. */
902 buffer->read_ahead = c;
f80e83a9 903}
0578f103 904
338fa5f7 905/* Parse an identifier, skipping embedded backslash-newlines.
906 Calculate the hash value of the token while parsing, for improved
907 performance. The hashing algorithm *must* match cpp_lookup(). */
908
909static cpp_hashnode *
910parse_identifier (pfile, c)
0578f103 911 cpp_reader *pfile;
338fa5f7 912 cppchar_t c;
0578f103 913{
338fa5f7 914 cpp_buffer *buffer = pfile->buffer;
915 unsigned int r = 0, saw_dollar = 0;
916 unsigned int orig_used = pfile->token_list.name_used;
f80e83a9 917
338fa5f7 918 do
f80e83a9 919 {
338fa5f7 920 do
f80e83a9 921 {
338fa5f7 922 if (pfile->token_list.name_used == pfile->token_list.name_cap)
923 _cpp_expand_name_space (&pfile->token_list,
924 pfile->token_list.name_used + 256);
925 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
926 r = HASHSTEP (r, c);
0578f103 927
338fa5f7 928 if (c == '$')
929 saw_dollar++;
71aa9da4 930
338fa5f7 931 c = EOF;
932 if (buffer->cur == buffer->rlimit)
933 break;
71aa9da4 934
338fa5f7 935 c = *buffer->cur++;
936 }
937 while (is_idchar (c));
71aa9da4 938
338fa5f7 939 /* Potential escaped newline? */
940 if (c != '?' && c != '\\')
941 break;
942 c = skip_escaped_newlines (buffer, c);
f80e83a9 943 }
338fa5f7 944 while (is_idchar (c));
945
946 /* $ is not a identifier character in the standard, but is commonly
947 accepted as an extension. Don't warn about it in skipped
948 conditional blocks. */
949 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
950 cpp_pedwarn (pfile, "'$' character(s) in identifier");
951
952 /* Remember the next character. */
953 buffer->read_ahead = c;
954 return _cpp_lookup_with_hash (pfile, &pfile->token_list.namebuf[orig_used],
955 pfile->token_list.name_used - orig_used, r);
0578f103 956}
957
338fa5f7 958/* Parse a number, skipping embedded backslash-newlines. */
0578f103 959static void
338fa5f7 960parse_number (pfile, number, c)
0578f103 961 cpp_reader *pfile;
338fa5f7 962 cpp_string *number;
963 cppchar_t c;
0578f103 964{
338fa5f7 965 cppchar_t prevc;
f80e83a9 966 cpp_buffer *buffer = pfile->buffer;
338fa5f7 967 unsigned int orig_used = pfile->token_list.name_used;
0578f103 968
338fa5f7 969 do
f80e83a9 970 {
338fa5f7 971 do
972 {
973 if (pfile->token_list.name_used == pfile->token_list.name_cap)
974 _cpp_expand_name_space (&pfile->token_list,
975 pfile->token_list.name_used + 256);
976 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
977
978 prevc = c;
979 c = EOF;
980 if (buffer->cur == buffer->rlimit)
981 break;
0578f103 982
338fa5f7 983 c = *buffer->cur++;
984 }
985 while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
0578f103 986
338fa5f7 987 /* Potential escaped newline? */
988 if (c != '?' && c != '\\')
989 break;
990 c = skip_escaped_newlines (buffer, c);
0578f103 991 }
338fa5f7 992 while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
993
994 /* Remember the next character. */
995 buffer->read_ahead = c;
852d1b04 996
338fa5f7 997 number->text = &pfile->token_list.namebuf[orig_used];
998 number->len = pfile->token_list.name_used - orig_used;
999}
1000
1001/* Subroutine of parse_string. Emits error for unterminated strings. */
1002static void
1003unterminated (pfile, line, term)
1004 cpp_reader *pfile;
1005 unsigned int line;
1006 int term;
1007{
1008 cpp_error (pfile, "missing terminating %c character", term);
1009
1010 if (term == '\"' && pfile->mls_line && pfile->mls_line != line)
f80e83a9 1011 {
338fa5f7 1012 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_column,
1013 "possible start of unterminated string literal");
1014 pfile->mls_line = 0;
f80e83a9 1015 }
0578f103 1016}
1017
338fa5f7 1018/* Parses a string, character constant, or angle-bracketed header file
1019 name. Handles embedded trigraphs and escaped newlines.
0578f103 1020
338fa5f7 1021 Multi-line strings are allowed, but they are deprecated within
1022 directives. */
f80e83a9 1023static void
338fa5f7 1024parse_string (pfile, token, terminator)
0578f103 1025 cpp_reader *pfile;
f80e83a9 1026 cpp_token *token;
338fa5f7 1027 cppchar_t terminator;
0578f103 1028{
f80e83a9 1029 cpp_buffer *buffer = pfile->buffer;
338fa5f7 1030 unsigned int orig_used = pfile->token_list.name_used;
1031 cppchar_t c;
1032 unsigned int nulls = 0;
1033
1034 for (;;)
0578f103 1035 {
338fa5f7 1036 if (buffer->cur == buffer->rlimit)
1037 {
1038 c = EOF;
1039 unterminated (pfile, token->line, terminator);
1040 break;
1041 }
1042 c = *buffer->cur++;
1043
1044 have_char:
1045 /* Handle trigraphs, escaped newlines etc. */
1046 if (c == '?' || c == '\\')
1047 c = skip_escaped_newlines (buffer, c);
0578f103 1048
338fa5f7 1049 if (c == terminator)
0578f103 1050 {
338fa5f7 1051 unsigned int u = pfile->token_list.name_used;
01f93cf7 1052
338fa5f7 1053 /* An odd number of consecutive backslashes represents an
1054 escaped terminator. */
1055 while (u > orig_used && pfile->token_list.namebuf[u - 1] == '\\')
1056 u--;
1057
1058 if ((pfile->token_list.name_used - u) % 2 == 0)
0578f103 1059 {
338fa5f7 1060 c = EOF;
1061 break;
0578f103 1062 }
338fa5f7 1063 }
1064 else if (is_vspace (c))
1065 {
1066 /* In assembly language, silently terminate string and
1067 character literals at end of line. This is a kludge
1068 around not knowing where comments are. */
1069 if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
1070 break;
0578f103 1071
338fa5f7 1072 /* Character constants and header names may not extend over
1073 multiple lines. In Standard C, neither may strings.
1074 Unfortunately, we accept multiline strings as an
1075 extension. (Deprecatedly even in directives - otherwise,
1076 glibc's longlong.h breaks.) */
1077 if (terminator != '"')
0578f103 1078 {
338fa5f7 1079 unterminated (pfile, token->line, terminator);
1080 break;
0578f103 1081 }
0578f103 1082
338fa5f7 1083 if (pfile->mls_line == 0)
1084 {
1085 pfile->mls_line = token->line;
1086 pfile->mls_column = token->col;
1087 if (CPP_PEDANTIC (pfile))
1088 cpp_pedwarn (pfile, "multi-line string constant");
f80e83a9 1089 }
338fa5f7 1090
1091 handle_newline (buffer, c); /* Stores to read_ahead. */
1092 c = '\n';
1093 }
1094 else if (c == '\0')
1095 {
1096 if (nulls++ == 0)
1097 cpp_warning (pfile, "null character(s) preserved in literal");
0578f103 1098 }
0578f103 1099
338fa5f7 1100 if (pfile->token_list.name_used == pfile->token_list.name_cap)
1101 _cpp_expand_name_space (&pfile->token_list,
1102 pfile->token_list.name_used + 256);
9fb5b53d 1103
338fa5f7 1104 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
1105 /* If we had a new line, the next character is in read_ahead. */
1106 if (c != '\n')
1107 continue;
1108 c = buffer->read_ahead;
1109 if (c != EOF)
1110 goto have_char;
0578f103 1111 }
1112
338fa5f7 1113 buffer->read_ahead = c;
0578f103 1114
338fa5f7 1115 token->val.str.text = &pfile->token_list.namebuf[orig_used];
1116 token->val.str.len = pfile->token_list.name_used - orig_used;
1117}
f80e83a9 1118
338fa5f7 1119/* For output routine simplicity, the stored comment includes the
1120 comment start and any terminator. */
2c63d6c8 1121static void
338fa5f7 1122save_comment (pfile, token, from)
1123 cpp_reader *pfile;
f80e83a9 1124 cpp_token *token;
1125 const unsigned char *from;
2c63d6c8 1126{
f80e83a9 1127 unsigned char *buffer;
338fa5f7 1128 unsigned int len;
1129 cpp_toklist *list = &pfile->token_list;
1130
1131#define COMMENT_START_LEN 2
1132 len = pfile->buffer->cur - from + COMMENT_START_LEN;
1133 _cpp_reserve_name_space (list, len);
1134 buffer = list->namebuf + list->name_used;
1135 list->name_used += len;
f80e83a9 1136
f80e83a9 1137 token->type = CPP_COMMENT;
76faa4c0 1138 token->val.str.len = len;
338fa5f7 1139 token->val.str.text = buffer;
0578f103 1140
338fa5f7 1141 /* from[-1] is '/' or '*' depending on the comment type. */
1142 *buffer++ = '/';
1143 *buffer++ = from[-1];
1144 memcpy (buffer, from, len - COMMENT_START_LEN);
1145}
0578f103 1146
338fa5f7 1147/* A helper routine for lex_token. With some long tokens, we need
1148 to read ahead to see if that is the token we have, but back-track
1149 if not. */
1150static void
1151check_long_token (buffer, result, wanted, type)
1152 cpp_buffer *buffer;
1153 cpp_token *result;
1154 cppchar_t wanted;
1155 enum cpp_ttype type;
1156{
1157 const unsigned char *saved_cur;
1158 cppchar_t c = buffer->read_ahead;
1159
1160 SAVE_STATE ();
1161 if (get_effective_char (buffer) == wanted)
1162 ACCEPT_CHAR (type);
f80e83a9 1163 else
c4357c92 1164 {
338fa5f7 1165 /* Restore state. */
1166 RESTORE_STATE ();
1167 buffer->read_ahead = c;
c4357c92 1168 }
0578f103 1169}
1170
f80e83a9 1171static void
338fa5f7 1172lex_token (pfile, result)
0578f103 1173 cpp_reader *pfile;
338fa5f7 1174 cpp_token *result;
0578f103 1175{
338fa5f7 1176 cppchar_t c;
f80e83a9 1177 cpp_buffer *buffer = pfile->buffer;
338fa5f7 1178 const unsigned char *comment_start;
0653b94e 1179
338fa5f7 1180 result->flags = 0;
1181 next_char:
1182 result->line = CPP_BUF_LINE (buffer);
1183 next_char2:
1184 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
f80e83a9 1185
338fa5f7 1186 c = buffer->read_ahead;
1187 if (c == EOF && buffer->cur < buffer->rlimit)
1188 {
1189 c = *buffer->cur++;
1190 result->col++;
1191 }
0578f103 1192
338fa5f7 1193 do_switch:
1194 buffer->read_ahead = EOF;
1195 switch (c)
0578f103 1196 {
338fa5f7 1197 case EOF:
1198 /* Non-empty files should end in a newline. Testing
1199 skip_newlines ensures we only emit the warning once. */
1200 if (buffer->cur != buffer->line_base && buffer->cur != buffer->buf
1201 && pfile->state.skip_newlines)
1202 cpp_pedwarn_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer),
1203 "no newline at end of file");
1204 result->type = CPP_EOF;
1205 break;
0578f103 1206
338fa5f7 1207 case ' ': case '\t': case '\f': case '\v': case '\0':
1208 skip_whitespace (pfile, c);
1209 result->flags |= PREV_WHITE;
1210 goto next_char2;
1211
1212 case '\n': case '\r':
1213 result->type = CPP_EOF;
1214 handle_newline (buffer, c);
1215 /* Handling here will change significantly when moving to
1216 token-at-a-time. */
1217 if (pfile->state.skip_newlines)
0578f103 1218 {
338fa5f7 1219 result->flags &= ~PREV_WHITE; /* Clear any whitespace flag. */
1220 goto next_char;
0578f103 1221 }
338fa5f7 1222 break;
732cb4c9 1223
338fa5f7 1224 case '?':
1225 case '\\':
1226 /* These could start an escaped newline, or '?' a trigraph. Let
1227 skip_escaped_newlines do all the work. */
1228 {
1229 unsigned int lineno = buffer->lineno;
1230
1231 c = skip_escaped_newlines (buffer, c);
1232 if (lineno != buffer->lineno)
1233 /* We had at least one escaped newline of some sort, and the
1234 next character is in buffer->read_ahead. Update the
1235 token's line and column. */
1236 goto next_char;
1237
1238 /* We are either the original '?' or '\\', or a trigraph. */
1239 result->type = CPP_QUERY;
1240 buffer->read_ahead = EOF;
1241 if (c == '\\')
1242 result->type = CPP_BACKSLASH;
1243 else if (c != '?')
1244 goto do_switch;
1245 }
1246 break;
732cb4c9 1247
338fa5f7 1248 make_number:
1249 case '0': case '1': case '2': case '3': case '4':
1250 case '5': case '6': case '7': case '8': case '9':
1251 result->type = CPP_NUMBER;
1252 parse_number (pfile, &result->val.str, c);
1253 break;
732cb4c9 1254
338fa5f7 1255 case '$':
1256 if (!CPP_OPTION (pfile, dollars_in_ident))
1257 goto random_char;
1258 /* Fall through... */
1259
1260 case '_':
1261 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1262 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1263 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1264 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1265 case 'y': case 'z':
1266 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1267 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1268 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1269 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1270 case 'Y': case 'Z':
1271 result->type = CPP_NAME;
1272 result->val.node = parse_identifier (pfile, c);
1273
1274 /* 'L' may introduce wide characters or strings. */
1275 if (result->val.node == pfile->spec_nodes->n_L)
1276 {
1277 c = buffer->read_ahead; /* For make_string. */
1278 if (c == '\'' || c == '"')
71aa9da4 1279 {
338fa5f7 1280 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1281 goto make_string;
71aa9da4 1282 }
338fa5f7 1283 }
1284 /* Convert named operators to their proper types. */
1285 else if (result->val.node->type == T_OPERATOR)
1286 {
1287 result->flags |= NAMED_OP;
1288 result->type = result->val.node->value.code;
1289 }
1290 break;
1291
1292 case '\'':
1293 case '"':
1294 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1295 make_string:
1296 parse_string (pfile, result, c);
1297 break;
f80e83a9 1298
338fa5f7 1299 case '/':
1300 result->type = CPP_DIV;
1301 c = get_effective_char (buffer);
1302 if (c == '=')
1303 ACCEPT_CHAR (CPP_DIV_EQ);
1304 else if (c == '*')
1305 {
1306 comment_start = buffer->cur;
0578f103 1307
338fa5f7 1308 /* Skip_block_comment updates buffer->read_ahead. */
1309 if (skip_block_comment (pfile))
1310 cpp_error_with_line (pfile, result->line, result->col,
1311 "unterminated comment");
1312 if (!pfile->state.save_comments)
31674461 1313 {
338fa5f7 1314 result->flags |= PREV_WHITE;
1315 goto next_char;
31674461 1316 }
1317
338fa5f7 1318 /* Save the comment as a token in its own right. */
1319 save_comment (pfile, result, comment_start);
1320 }
1321 else if (c == '/')
1322 {
1323 /* We silently allow C++ comments in system headers,
1324 irrespective of conformance mode, because lots of
1325 broken systems do that and trying to clean it up in
1326 fixincludes is a nightmare. */
1327 if (CPP_IN_SYSTEM_HEADER (pfile))
1328 goto do_line_comment;
1329 if (CPP_OPTION (pfile, cplusplus_comments))
f80e83a9 1330 {
338fa5f7 1331 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1332 && ! buffer->warned_cplusplus_comments)
f80e83a9 1333 {
338fa5f7 1334 cpp_pedwarn (pfile,
1335 "C++ style comments are not allowed in ISO C89");
1336 cpp_pedwarn (pfile,
1337 "(this will be reported only once per input file)");
1338 buffer->warned_cplusplus_comments = 1;
f80e83a9 1339 }
f80e83a9 1340
338fa5f7 1341 do_line_comment:
1342 comment_start = buffer->cur;
1343
1344 /* Skip_line_comment updates buffer->read_ahead. */
1345 if (skip_line_comment (buffer))
1346 cpp_warning_with_line (pfile, result->line, result->col,
1347 "multi-line comment");
1348
1349 if (!pfile->state.save_comments)
f80e83a9 1350 {
338fa5f7 1351 result->flags |= PREV_WHITE;
1352 goto next_char;
f80e83a9 1353 }
338fa5f7 1354
1355 /* Save the comment as a token in its own right. */
1356 save_comment (pfile, result, comment_start);
0578f103 1357 }
338fa5f7 1358 }
1359 break;
1360
1361 case '<':
1362 if (pfile->state.angled_headers)
1363 {
1364 result->type = CPP_HEADER_NAME;
1365 c = '>'; /* terminator. */
1366 goto make_string;
1367 }
0578f103 1368
338fa5f7 1369 result->type = CPP_LESS;
1370 c = get_effective_char (buffer);
1371 if (c == '=')
1372 ACCEPT_CHAR (CPP_LESS_EQ);
1373 else if (c == '<')
1374 {
1375 ACCEPT_CHAR (CPP_LSHIFT);
1376 if (get_effective_char (buffer) == '=')
1377 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1378 }
1379 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1380 {
1381 ACCEPT_CHAR (CPP_MIN);
1382 if (get_effective_char (buffer) == '=')
1383 ACCEPT_CHAR (CPP_MIN_EQ);
1384 }
1385 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1386 {
1387 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1388 result->flags |= DIGRAPH;
1389 }
1390 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1391 {
1392 ACCEPT_CHAR (CPP_OPEN_BRACE);
1393 result->flags |= DIGRAPH;
1394 }
1395 break;
1396
1397 case '>':
1398 result->type = CPP_GREATER;
1399 c = get_effective_char (buffer);
1400 if (c == '=')
1401 ACCEPT_CHAR (CPP_GREATER_EQ);
1402 else if (c == '>')
1403 {
1404 ACCEPT_CHAR (CPP_RSHIFT);
1405 if (get_effective_char (buffer) == '=')
1406 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1407 }
1408 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1409 {
1410 ACCEPT_CHAR (CPP_MAX);
1411 if (get_effective_char (buffer) == '=')
1412 ACCEPT_CHAR (CPP_MAX_EQ);
1413 }
1414 break;
1415
1416 case '.':
1417 {
1418 const unsigned char *saved_cur;
1419 cppchar_t c1;
1420
1421 /* Save state to avoid needing to pass 2 chars to parse_number. */
1422 SAVE_STATE ();
1423 c1 = get_effective_char (buffer);
1424 /* All known character sets have 0...9 contiguous. */
1425 if (c1 >= '0' && c1 <= '9')
1426 {
1427 RESTORE_STATE ();
1428 goto make_number;
1429 }
1430
1431 result->type = CPP_DOT;
1432 if (c1 == '.')
1433 {
1434 if (get_effective_char (buffer) == '.')
1435 ACCEPT_CHAR (CPP_ELLIPSIS);
1436 else
1437 {
1438 buffer->read_ahead = EOF;
1439 RESTORE_STATE ();
1440 }
1441 }
1442 else if (c1 == '*' && CPP_OPTION (pfile, cplusplus))
1443 ACCEPT_CHAR (CPP_DOT_STAR);
1444 }
1445 break;
1446
1447 case '%':
1448 result->type = CPP_MOD;
1449 c = get_effective_char (buffer);
1450 if (c == '=')
1451 ACCEPT_CHAR (CPP_MOD_EQ);
1452 else if (CPP_OPTION (pfile, digraphs))
1453 {
1454 if (c == ':')
0578f103 1455 {
338fa5f7 1456 result->flags |= DIGRAPH;
1457 ACCEPT_CHAR (CPP_HASH);
1458 if (get_effective_char (buffer) == '%')
1459 check_long_token (buffer, result, ':', CPP_PASTE);
f80e83a9 1460 }
338fa5f7 1461 else if (c == '>')
f80e83a9 1462 {
338fa5f7 1463 result->flags |= DIGRAPH;
1464 ACCEPT_CHAR (CPP_CLOSE_BRACE);
c4abf88d 1465 }
338fa5f7 1466 }
1467 break;
0578f103 1468
338fa5f7 1469 case '+':
1470 result->type = CPP_PLUS;
1471 c = get_effective_char (buffer);
1472 if (c == '=')
1473 ACCEPT_CHAR (CPP_PLUS_EQ);
1474 else if (c == '+')
1475 ACCEPT_CHAR (CPP_PLUS_PLUS);
1476 break;
ac0749c7 1477
338fa5f7 1478 case '-':
1479 result->type = CPP_MINUS;
1480 c = get_effective_char (buffer);
1481 if (c == '>')
1482 {
1483 ACCEPT_CHAR (CPP_DEREF);
1484 if (CPP_OPTION (pfile, cplusplus)
1485 && get_effective_char (buffer) == '*')
1486 ACCEPT_CHAR (CPP_DEREF_STAR);
1487 }
1488 else if (c == '=')
1489 ACCEPT_CHAR (CPP_MINUS_EQ);
1490 else if (c == '-')
1491 ACCEPT_CHAR (CPP_MINUS_MINUS);
1492 break;
0578f103 1493
338fa5f7 1494 case '*':
1495 result->type = CPP_MULT;
1496 if (get_effective_char (buffer) == '=')
1497 ACCEPT_CHAR (CPP_MULT_EQ);
1498 break;
ac0749c7 1499
338fa5f7 1500 case '=':
1501 result->type = CPP_EQ;
1502 if (get_effective_char (buffer) == '=')
1503 ACCEPT_CHAR (CPP_EQ_EQ);
1504 break;
c4abf88d 1505
338fa5f7 1506 case '!':
1507 result->type = CPP_NOT;
1508 if (get_effective_char (buffer) == '=')
1509 ACCEPT_CHAR (CPP_NOT_EQ);
1510 break;
0578f103 1511
338fa5f7 1512 case '&':
1513 result->type = CPP_AND;
1514 c = get_effective_char (buffer);
1515 if (c == '=')
1516 ACCEPT_CHAR (CPP_AND_EQ);
1517 else if (c == '&')
1518 ACCEPT_CHAR (CPP_AND_AND);
1519 break;
1520
1521 case '#':
1522 result->type = CPP_HASH;
1523 if (get_effective_char (buffer) == '#')
1524 ACCEPT_CHAR (CPP_PASTE);
1525 break;
0578f103 1526
338fa5f7 1527 case '|':
1528 result->type = CPP_OR;
1529 c = get_effective_char (buffer);
1530 if (c == '=')
1531 ACCEPT_CHAR (CPP_OR_EQ);
1532 else if (c == '|')
1533 ACCEPT_CHAR (CPP_OR_OR);
1534 break;
0578f103 1535
338fa5f7 1536 case '^':
1537 result->type = CPP_XOR;
1538 if (get_effective_char (buffer) == '=')
1539 ACCEPT_CHAR (CPP_XOR_EQ);
1540 break;
0578f103 1541
338fa5f7 1542 case ':':
1543 result->type = CPP_COLON;
1544 c = get_effective_char (buffer);
1545 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1546 ACCEPT_CHAR (CPP_SCOPE);
1547 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1548 {
1549 result->flags |= DIGRAPH;
1550 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1551 }
1552 break;
0578f103 1553
338fa5f7 1554 case '~': result->type = CPP_COMPL; break;
1555 case ',': result->type = CPP_COMMA; break;
1556 case '(': result->type = CPP_OPEN_PAREN; break;
1557 case ')': result->type = CPP_CLOSE_PAREN; break;
1558 case '[': result->type = CPP_OPEN_SQUARE; break;
1559 case ']': result->type = CPP_CLOSE_SQUARE; break;
1560 case '{': result->type = CPP_OPEN_BRACE; break;
1561 case '}': result->type = CPP_CLOSE_BRACE; break;
1562 case ';': result->type = CPP_SEMICOLON; break;
1563
1564 case '@':
1565 if (CPP_OPTION (pfile, objc))
1566 {
1567 /* In Objective C, '@' may begin keywords or strings, like
1568 @keyword or @"string". It would be nice to call
1569 get_effective_char here and test the result. However, we
1570 would then need to pass 2 characters to parse_identifier,
1571 making it ugly and slowing down its main loop. Instead,
1572 we assume we have an identifier, and recover if not. */
1573 result->type = CPP_NAME;
1574 result->val.node = parse_identifier (pfile, c);
1575 if (result->val.node->length != 1)
1576 break;
ac0749c7 1577
338fa5f7 1578 /* OK, so it wasn't an identifier. Maybe a string? */
1579 if (buffer->read_ahead == '"')
f80e83a9 1580 {
338fa5f7 1581 c = '"';
1582 ACCEPT_CHAR (CPP_OSTRING);
1583 goto make_string;
f80e83a9 1584 }
338fa5f7 1585 }
1586 goto random_char;
1587
1588 random_char:
1589 default:
1590 result->type = CPP_OTHER;
1591 result->val.aux = c;
1592 break;
1593 }
1594}
1595
1596/*
1597 * The tokenizer's main loop. Returns a token list, representing a
1598 * logical line in the input file. On EOF after some tokens have
1599 * been processed, we return immediately. Then in next call, or if
1600 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1601 * token is placed in the list.
1602 */
1603
1604static void
1605lex_line (pfile, list)
1606 cpp_reader *pfile;
1607 cpp_toklist *list;
1608{
1609 unsigned int first_token;
1610 cpp_token *cur_token, *first;
1611 cpp_buffer *buffer = pfile->buffer;
0578f103 1612
338fa5f7 1613 if (!(list->flags & LIST_OFFSET))
1614 (abort) ();
1615
1616 pfile->state.in_lex_line = 1;
1617 if (pfile->buffer->cur == pfile->buffer->buf)
1618 list->flags |= BEG_OF_FILE;
1619
1620 retry:
1621 pfile->state.in_directive = 0;
1622 pfile->state.angled_headers = 0;
1623 pfile->state.skip_newlines = 1;
1624 pfile->state.save_comments = ! CPP_OPTION (pfile, discard_comments);
1625 first_token = list->tokens_used;
1626 list->file = buffer->nominal_fname;
1627
1628 do
1629 {
1630 if (list->tokens_used >= list->tokens_cap)
1631 _cpp_expand_token_space (list, 256);
1632
1633 cur_token = list->tokens + list->tokens_used;
1634 lex_token (pfile, cur_token);
1635
1636 if (pfile->state.skip_newlines)
1637 {
1638 pfile->state.skip_newlines = 0;
1639 list->line = buffer->lineno;
1640 if (cur_token->type == CPP_HASH)
f80e83a9 1641 {
338fa5f7 1642 pfile->state.in_directive = 1;
1643 pfile->state.save_comments = 0;
1644 pfile->state.indented = cur_token->flags & PREV_WHITE;
f80e83a9 1645 }
338fa5f7 1646 /* 6.10.3.10: Within the sequence of preprocessing tokens
1647 making up the invocation of a function-like macro, new
1648 line is considered a normal white-space character. */
1649 else if (first_token != 0)
1650 cur_token->flags |= PREV_WHITE;
1651 }
1652 else if (IN_DIRECTIVE (pfile) && list->tokens_used == first_token + 1)
1653 {
1654 if (cur_token->type == CPP_NUMBER)
1655 list->directive = _cpp_check_linemarker (pfile, cur_token);
f80e83a9 1656 else
338fa5f7 1657 list->directive = _cpp_check_directive (pfile, cur_token);
f80e83a9 1658 }
cfad5579 1659
338fa5f7 1660 /* _cpp_get_line assumes list->tokens_used refers to the current
1661 token being lexed. So do this after _cpp_check_directive to
1662 get the warnings therein correct. */
1663 list->tokens_used++;
f80e83a9 1664 }
338fa5f7 1665 while (cur_token->type != CPP_EOF);
cfad5579 1666
f80e83a9 1667 /* All tokens are allocated, so the memory location is fixed. */
1668 first = &list->tokens[first_token];
338fa5f7 1669 first->flags |= BOL;
1670 pfile->first_directive_token = first;
1671
f80e83a9 1672 /* Don't complain about the null directive, nor directives in
1673 assembly source: we don't know where the comments are, and # may
1674 introduce assembler pseudo-ops. Don't complain about invalid
1675 directives in skipped conditional groups (6.10 p4). */
338fa5f7 1676 if (IN_DIRECTIVE (pfile) && !KNOWN_DIRECTIVE (list) && !pfile->skipping
1677 && !CPP_OPTION (pfile, lang_asm))
f80e83a9 1678 {
338fa5f7 1679 if (cur_token > first + 1)
1680 {
1681 if (first[1].type == CPP_NAME)
1682 cpp_error_with_line (pfile, first->line, first->col,
1683 "invalid preprocessing directive #%s",
1684 first[1].val.node->name);
1685 else
1686 cpp_error_with_line (pfile, first->line, first->col,
1687 "invalid preprocessing directive");
1688 }
0653b94e 1689
1690 /* Discard this line to prevent further errors from cc1. */
1691 _cpp_clear_toklist (list);
1692 goto retry;
f80e83a9 1693 }
1694
be6e77fe 1695 /* Drop the EOF unless really at EOF or in a directive. */
1696 if (cur_token != first && !KNOWN_DIRECTIVE (list)
1697 && pfile->done_initializing)
1698 list->tokens_used--;
1699
338fa5f7 1700 pfile->state.in_lex_line = 0;
cfad5579 1701}
1702
f80e83a9 1703/* Write the spelling of a token TOKEN, with any appropriate
6cae2504 1704 whitespace before it, to FP. PREV is the previous token, which
1705 is used to determine if we need to shove in an extra space in order
1706 to avoid accidental token paste. If WHITE is 0, do not insert any
1707 leading whitespace. */
f80e83a9 1708static void
6cae2504 1709output_token (pfile, fp, token, prev, white)
f80e83a9 1710 cpp_reader *pfile;
6cae2504 1711 FILE *fp;
f80e83a9 1712 const cpp_token *token, *prev;
6cae2504 1713 int white;
f80e83a9 1714{
6cae2504 1715 if (white)
f80e83a9 1716 {
6cae2504 1717 int dummy;
1718
1719 if (token->col && (token->flags & BOL))
1720 {
1721 /* Supply enough whitespace to put this token in its original
1722 column. Don't bother trying to reconstruct tabs; we can't
1723 get it right in general, and nothing ought to care. (Yes,
1724 some things do care; the fault lies with them.) */
1725 unsigned int spaces = token->col - 1;
1726
1727 while (spaces--)
1728 putc (' ', fp);
1729 }
1730 else if (token->flags & PREV_WHITE)
1731 putc (' ', fp);
1732 else
1733 /* Check for and prevent accidental token pasting.
1734 In addition to the cases handled by can_paste, consider
1735
1736 a + ++b - if there is not a space between the + and ++, it
1737 will be misparsed as a++ + b. But + ## ++ doesn't produce
1738 a valid token. */
1739 if (prev
1740 && (can_paste (pfile, prev, token, &dummy) != CPP_EOF
1741 || (prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1742 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS)))
1743 putc (' ', fp);
f80e83a9 1744 }
6cae2504 1745
1746 switch (TOKEN_SPELL (token))
f80e83a9 1747 {
6cae2504 1748 case SPELL_OPERATOR:
1749 {
1750 const unsigned char *spelling;
1751
1752 if (token->flags & DIGRAPH)
1753 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1754 else if (token->flags & NAMED_OP)
1755 goto spell_ident;
1756 else
1757 spelling = TOKEN_NAME (token);
1758
1759 ufputs (spelling, fp);
1760 }
1761 break;
1762
1763 case SPELL_IDENT:
1764 spell_ident:
1765 ufputs (token->val.node->name, fp);
1766 break;
1767
1768 case SPELL_STRING:
1769 {
71aa9da4 1770 int left, right, tag;
1771 switch (token->type)
1772 {
1773 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1774 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1775 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1776 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1777 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1778 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1779 default: left = '\0'; right = '\0'; tag = '\0'; break;
1780 }
1781 if (tag) putc (tag, fp);
1782 if (left) putc (left, fp);
6cae2504 1783 fwrite (token->val.str.text, 1, token->val.str.len, fp);
71aa9da4 1784 if (right) putc (right, fp);
6cae2504 1785 }
1786 break;
1787
1788 case SPELL_CHAR:
1789 putc (token->val.aux, fp);
1790 break;
1791
1792 case SPELL_NONE:
1793 /* Placemarker or EOF - no output. (Macro args are handled
1794 elsewhere. */
1795 break;
f80e83a9 1796 }
6cae2504 1797}
1798
1799/* Dump the original user's spelling of argument index ARG_NO to the
1800 macro whose expansion is LIST. */
1801static void
1802dump_param_spelling (fp, list, arg_no)
1803 FILE *fp;
1804 const cpp_toklist *list;
1805 unsigned int arg_no;
1806{
1807 const U_CHAR *param = list->namebuf;
1808
1809 while (arg_no--)
1810 param += ustrlen (param) + 1;
1811 ufputs (param, fp);
1812}
1813
1814/* Output all the tokens of LIST, starting at TOKEN, to FP. */
1815void
1816cpp_output_list (pfile, fp, list, token)
1817 cpp_reader *pfile;
1818 FILE *fp;
1819 const cpp_toklist *list;
1820 const cpp_token *token;
1821{
1822 const cpp_token *limit = list->tokens + list->tokens_used;
1823 const cpp_token *prev = 0;
1824 int white = 0;
ab12a39c 1825
6cae2504 1826 while (token < limit)
1827 {
1828 /* XXX Find some way we can write macro args from inside
1829 output_token/spell_token. */
1830 if (token->type == CPP_MACRO_ARG)
1831 {
1832 if (white && token->flags & PREV_WHITE)
1833 putc (' ', fp);
1834 if (token->flags & STRINGIFY_ARG)
1835 putc ('#', fp);
1836 dump_param_spelling (fp, list, token->val.aux);
1837 }
1838 else
1839 output_token (pfile, fp, token, prev, white);
1840 if (token->flags & PASTE_LEFT)
1841 fputs (" ##", fp);
1842 prev = token;
1843 token++;
1844 white = 1;
1845 }
f80e83a9 1846}
ab12a39c 1847
6cae2504 1848
f80e83a9 1849/* Write the spelling of a token TOKEN to BUFFER. The buffer must
c5ea33a8 1850 already contain the enough space to hold the token's spelling.
1851 Returns a pointer to the character after the last character
1852 written. */
ab12a39c 1853
f80e83a9 1854static unsigned char *
1855spell_token (pfile, token, buffer)
1856 cpp_reader *pfile; /* Would be nice to be rid of this... */
1857 const cpp_token *token;
1858 unsigned char *buffer;
1859{
7e842f95 1860 switch (TOKEN_SPELL (token))
f80e83a9 1861 {
1862 case SPELL_OPERATOR:
1863 {
1864 const unsigned char *spelling;
1865 unsigned char c;
ab12a39c 1866
f80e83a9 1867 if (token->flags & DIGRAPH)
1868 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
31674461 1869 else if (token->flags & NAMED_OP)
1870 goto spell_ident;
f80e83a9 1871 else
7e842f95 1872 spelling = TOKEN_NAME (token);
f80e83a9 1873
1874 while ((c = *spelling++) != '\0')
1875 *buffer++ = c;
1876 }
1877 break;
ab12a39c 1878
f80e83a9 1879 case SPELL_IDENT:
31674461 1880 spell_ident:
76faa4c0 1881 memcpy (buffer, token->val.node->name, token->val.node->length);
1882 buffer += token->val.node->length;
f80e83a9 1883 break;
ab12a39c 1884
f80e83a9 1885 case SPELL_STRING:
1886 {
71aa9da4 1887 int left, right, tag;
1888 switch (token->type)
1889 {
1890 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1891 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1892 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1893 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1894 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1895 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1896 default: left = '\0'; right = '\0'; tag = '\0'; break;
1897 }
1898 if (tag) *buffer++ = tag;
1899 if (left) *buffer++ = left;
76faa4c0 1900 memcpy (buffer, token->val.str.text, token->val.str.len);
1901 buffer += token->val.str.len;
71aa9da4 1902 if (right) *buffer++ = right;
f80e83a9 1903 }
1904 break;
ab12a39c 1905
f80e83a9 1906 case SPELL_CHAR:
1907 *buffer++ = token->val.aux;
1908 break;
ab12a39c 1909
f80e83a9 1910 case SPELL_NONE:
7e842f95 1911 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
f80e83a9 1912 break;
1913 }
ab12a39c 1914
f80e83a9 1915 return buffer;
1916}
ab12a39c 1917
fdf3a98f 1918/* Macro expansion algorithm.
1919
1920Macro expansion is implemented by a single-pass algorithm; there are
1921no rescan passes involved. cpp_get_token expands just enough to be
1922able to return a token to the caller, a consequence is that when it
1923returns the preprocessor can be in a state of mid-expansion. The
1924algorithm does not work by fully expanding a macro invocation into
1925some kind of token list, and then returning them one by one.
1926
1927Our expansion state is recorded in a context stack. We start out with
1928a single context on the stack, let's call it base context. This
1929consists of the token list returned by lex_line that forms the next
1930logical line in the source file.
1931
1932The current level in the context stack is stored in the cur_context
1933member of the cpp_reader structure. The context it references keeps,
1934amongst other things, a count of how many tokens form that context and
1935our position within those tokens.
1936
1937Fundamentally, calling cpp_get_token will return the next token from
1938the current context. If we're at the end of the current context, that
1939context is popped from the stack first, unless it is the base context,
1940in which case the next logical line is lexed from the source file.
1941
1942However, before returning the token, if it is a CPP_NAME token
1943_cpp_get_token checks to see if it is a macro and if it is enabled.
1944Each time it encounters a macro name, it calls push_macro_context.
1945This function checks that the macro should be expanded (with
1946is_macro_enabled), and if so pushes a new macro context on the stack
1947which becomes the current context. It then loops back to read the
1948first token of the macro context.
1949
1950A macro context basically consists of the token list representing the
1951macro's replacement list, which was saved in the hash table by
1952save_macro_expansion when its #define statement was parsed. If the
1953macro is function-like, it also contains the tokens that form the
1954arguments to the macro. I say more about macro arguments below, but
1955for now just saying that each argument is a set of pointers to tokens
1956is enough.
1957
1958When taking tokens from a macro context, we may get a CPP_MACRO_ARG
1959token. This represents an argument passed to the macro, with the
1960argument number stored in the token's AUX field. The argument should
1961be substituted, this is achieved by pushing an "argument context". An
1962argument context is just refers to the tokens forming the argument,
1963which are obtained directly from the macro context. The STRINGIFY
1964flag on a CPP_MACRO_ARG token indicates that the argument should be
1965stringified.
1966
1967Here's a few simple rules the context stack obeys:-
1968
1969 1) The lex_line token list is always context zero.
1970
1971 2) Context 1, if it exists, must be a macro context.
1972
1973 3) An argument context can only appear above a macro context.
1974
1975 4) A macro context can appear above the base context, another macro
1976 context, or an argument context.
1977
1978 5) These imply that the minimal level of an argument context is 2.
1979
1980The only tricky thing left is ensuring that macros are enabled and
1981disabled correctly. The algorithm controls macro expansion by the
1982level of the context a token is taken from in the context stack. If a
1983token is taken from a level equal to no_expand_level (a member of
1984struct cpp_reader), no expansion is performed.
1985
1986When popping a context off the stack, if no_expand_level equals the
1987level of the popped context, it is reduced by one to match the new
1988context level, so that expansion is still disabled. It does not
1989increase if a context is pushed, though. It starts out life as
1990UINT_MAX, which has the effect that initially macro expansion is
1991enabled. I explain how this mechanism works below.
1992
1993The standard requires:-
1994
1995 1) Arguments to be fully expanded before substitution.
1996
1997 2) Stringified arguments to not be expanded, nor the tokens
1998 immediately surrounding a ## operator.
1999
2000 3) Continual rescanning until there are no more macros left to
2001 replace.
2002
2003 4) Once a macro has been expanded in stage 1) or 3), it cannot be
2004 expanded again during later rescans. This prevents infinite
2005 recursion.
2006
2007The first thing to observe is that stage 3) is mostly redundant.
2008Since a macro is disabled once it has been expanded, how can a rescan
2009find an unexpanded macro name? There are only two cases where this is
2010possible:-
2011
2012 a) If the macro name results from a token paste operation.
2013
2014 b) If the macro in question is a function-like macro that hasn't
2015 already been expanded because previously there was not the required
2016 '(' token immediately following it. This is only possible when an
2017 argument is substituted, and after substitution the last token of
2018 the argument can bind with a parenthesis appearing in the tokens
2019 following the substitution. Note that if the '(' appears within the
2020 argument, the ')' must too, as expanding macro arguments cannot
2021 "suck in" tokens outside the argument.
2022
2023So we tackle this as follows. When parsing the macro invocation for
2024arguments, we record the tokens forming each argument as a list of
2025pointers to those tokens. We do not expand any tokens that are "raw",
2026i.e. directly from the macro invocation, but other tokens that come
2027from (nested) argument substitution are fully expanded.
2028
2029This is achieved by setting the no_expand_level to that of the macro
2030invocation. A CPP_MACRO_ARG token never appears in the list of tokens
2031forming an argument, because parse_args (indirectly) calls
2032get_raw_token which automatically pushes argument contexts and traces
2033into them. Since these contexts are at a higher level than the
2034no_expand_level, they get fully macro expanded.
2035
2036"Raw" and non-raw tokens are separated in arguments by null pointers,
2037with the policy that the initial state of an argument is raw. If the
2038first token is not raw, it should be preceded by a null pointer. When
2039tracing through the tokens of an argument context, each time
2040get_raw_token encounters a null pointer, it toggles the flag
2041CONTEXT_RAW.
2042
2043This flag, when set, indicates to is_macro_disabled that we are
2044reading raw tokens which should be macro-expanded. Similarly, if
2045clear, is_macro_disabled suppresses re-expansion.
2046
2047It's probably time for an example.
2048
2049#define hash #
2050#define str(x) #x
2051#define xstr(y) str(y hash)
2052str(hash) // "hash"
2053xstr(hash) // "# hash"
2054
2055In the invocation of str, parse_args turns off macro expansion and so
2056parses the argument as <hash>. This is the only token (pointer)
2057passed as the argument to str. Since <hash> is raw there is no need
2058for an initial null pointer. stringify_arg is called from
2059get_raw_token when tracing through the expansion of str, since the
2060argument has the STRINGIFY flag set. stringify_arg turns off
2061macro_expansion by setting the no_expand_level to that of the argument
2062context. Thus it gets the token <hash> and stringifies it to "hash"
2063correctly.
2064
2065Similary xstr is passed <hash>. However, when parse_args is parsing
2066the invocation of str() in xstr's expansion, get_raw_token encounters
2067a CPP_MACRO_ARG token for y. Transparently to parse_args, it pushes
2068an argument context, and enters the tokens of the argument,
2069i.e. <hash>. This is at a higher context level than parse_args
2070disabled, and so is_macro_disabled permits expansion of it and a macro
2071context is pushed on top of the argument context. This contains the
2072<#> token, and the end result is that <hash> is macro expanded.
2073However, after popping off the argument context, the <hash> of xstr's
2074expansion does not get macro expanded because we're back at the
2075no_expand_level. The end result is that the argument passed to str is
2076<NULL> <#> <NULL> <hash>. Note the nulls - policy is we start off
2077raw, <#> is not raw, but then <hash> is.
2078
2079*/
ab12a39c 2080
f80e83a9 2081
2082/* Free the storage allocated for macro arguments. */
2083static void
2084free_macro_args (args)
2085 macro_args *args;
6060326b 2086{
f80e83a9 2087 if (args->tokens)
deb356cf 2088 free ((PTR) args->tokens);
f80e83a9 2089 free (args->ends);
2090 free (args);
6060326b 2091}
2092
f80e83a9 2093/* Determines if a macro has been already used (and is therefore
2094 disabled). */
6060326b 2095static int
f80e83a9 2096is_macro_disabled (pfile, expansion, token)
6060326b 2097 cpp_reader *pfile;
f80e83a9 2098 const cpp_toklist *expansion;
2099 const cpp_token *token;
6060326b 2100{
f80e83a9 2101 cpp_context *context = CURRENT_CONTEXT (pfile);
2102
2103 /* Arguments on either side of ## are inserted in place without
2104 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2105 occurs during a later rescan pass. The effect is that we expand
2106 iff we would as part of the macro's expansion list, so we should
2107 drop to the macro's context. */
2108 if (IS_ARG_CONTEXT (context))
6060326b 2109 {
f80e83a9 2110 if (token->flags & PASTED)
2111 context--;
2112 else if (!(context->flags & CONTEXT_RAW))
2113 return 1;
2114 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2115 context--;
6060326b 2116 }
6060326b 2117
f80e83a9 2118 /* Have we already used this macro? */
2119 while (context->level > 0)
6060326b 2120 {
f80e83a9 2121 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2122 return 1;
2123 /* Raw argument tokens are judged based on the token list they
2124 came from. */
2125 if (context->flags & CONTEXT_RAW)
2126 context = pfile->contexts + context->level;
2127 else
2128 context--;
2129 }
6060326b 2130
f80e83a9 2131 /* Function-like macros may be disabled if the '(' is not in the
2132 current context. We check this without disrupting the context
2133 stack. */
2134 if (expansion->paramc >= 0)
2135 {
2136 const cpp_token *next;
2137 unsigned int prev_nme;
6060326b 2138
f80e83a9 2139 context = CURRENT_CONTEXT (pfile);
2140 /* Drop down any contexts we're at the end of: the '(' may
2141 appear in lower macro expansions, or in the rest of the file. */
2142 while (context->posn == context->count && context > pfile->contexts)
2143 {
2144 context--;
2145 /* If we matched, we are disabled, as we appear in the
2146 expansion of each macro we meet. */
2147 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2148 return 1;
2149 }
6060326b 2150
f80e83a9 2151 prev_nme = pfile->no_expand_level;
2152 pfile->no_expand_level = context - pfile->contexts;
deb356cf 2153 next = _cpp_get_token (pfile);
f80e83a9 2154 restore_macro_expansion (pfile, prev_nme);
2155 if (next->type != CPP_OPEN_PAREN)
2156 {
2157 _cpp_push_token (pfile, next);
1449850f 2158 if (CPP_WTRADITIONAL (pfile))
f80e83a9 2159 cpp_warning (pfile,
31674461 2160 "function macro %s must be used with arguments in traditional C",
2161 token->val.node->name);
f80e83a9 2162 return 1;
2163 }
6060326b 2164 }
6060326b 2165
f80e83a9 2166 return 0;
6060326b 2167}
2168
f80e83a9 2169/* Add a token to the set of tokens forming the arguments to the macro
2170 being parsed in parse_args. */
2171static void
2172save_token (args, token)
2173 macro_args *args;
2174 const cpp_token *token;
6060326b 2175{
f80e83a9 2176 if (args->used == args->capacity)
2177 {
2178 args->capacity += args->capacity + 100;
2179 args->tokens = (const cpp_token **)
deb356cf 2180 xrealloc ((PTR) args->tokens,
2181 args->capacity * sizeof (const cpp_token *));
f80e83a9 2182 }
2183 args->tokens[args->used++] = token;
6060326b 2184}
2185
f80e83a9 2186/* Take and save raw tokens until we finish one argument. Empty
2187 arguments are saved as a single CPP_PLACEMARKER token. */
2188static const cpp_token *
2189parse_arg (pfile, var_args, paren_context, args, pcount)
6060326b 2190 cpp_reader *pfile;
f80e83a9 2191 int var_args;
2192 unsigned int paren_context;
2193 macro_args *args;
2194 unsigned int *pcount;
6060326b 2195{
f80e83a9 2196 const cpp_token *token;
2197 unsigned int paren = 0, count = 0;
2198 int raw, was_raw = 1;
6060326b 2199
f80e83a9 2200 for (count = 0;; count++)
6060326b 2201 {
deb356cf 2202 token = _cpp_get_token (pfile);
6060326b 2203
f80e83a9 2204 switch (token->type)
6060326b 2205 {
f80e83a9 2206 default:
2207 break;
6060326b 2208
f80e83a9 2209 case CPP_OPEN_PAREN:
2210 paren++;
2211 break;
2212
2213 case CPP_CLOSE_PAREN:
2214 if (paren-- != 0)
2215 break;
2216 goto out;
2217
2218 case CPP_COMMA:
2219 /* Commas are not terminators within parantheses or var_args. */
2220 if (paren || var_args)
2221 break;
2222 goto out;
2223
2224 case CPP_EOF: /* Error reported by caller. */
2225 goto out;
6060326b 2226 }
6060326b 2227
f80e83a9 2228 raw = pfile->cur_context <= paren_context;
2229 if (raw != was_raw)
2230 {
2231 was_raw = raw;
2232 save_token (args, 0);
2233 count++;
6060326b 2234 }
f80e83a9 2235 save_token (args, token);
6060326b 2236 }
6060326b 2237
2238 out:
f80e83a9 2239 if (count == 0)
2240 {
2241 /* Duplicate the placemarker. Then we can set its flags and
2242 position and safely be using more than one. */
2243 save_token (args, duplicate_token (pfile, &placemarker_token));
2244 count++;
2245 }
2246
2247 *pcount = count;
2248 return token;
6060326b 2249}
2250
f80e83a9 2251/* This macro returns true if the argument starting at offset O of arglist
2252 A is empty - that is, it's either a single PLACEMARKER token, or a null
2253 pointer followed by a PLACEMARKER. */
2254
2255#define empty_argument(A, O) \
2256 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2257 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2258
2259/* Parse the arguments making up a macro invocation. Nested arguments
2260 are automatically macro expanded, but immediate macros are not
2261 expanded; this enables e.g. operator # to work correctly. Returns
2262 non-zero on error. */
6060326b 2263static int
f80e83a9 2264parse_args (pfile, hp, args)
6060326b 2265 cpp_reader *pfile;
f80e83a9 2266 cpp_hashnode *hp;
2267 macro_args *args;
6060326b 2268{
f80e83a9 2269 const cpp_token *token;
2270 const cpp_toklist *macro;
2271 unsigned int total = 0;
2272 unsigned int paren_context = pfile->cur_context;
2273 int argc = 0;
2274
2275 macro = hp->value.expansion;
2276 do
6060326b 2277 {
f80e83a9 2278 unsigned int count;
6060326b 2279
f80e83a9 2280 token = parse_arg (pfile, (argc + 1 == macro->paramc
2281 && (macro->flags & VAR_ARGS)),
2282 paren_context, args, &count);
2283 if (argc < macro->paramc)
6060326b 2284 {
f80e83a9 2285 total += count;
2286 args->ends[argc] = total;
6060326b 2287 }
f80e83a9 2288 argc++;
6060326b 2289 }
f80e83a9 2290 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
6060326b 2291
f80e83a9 2292 if (token->type == CPP_EOF)
2293 {
31674461 2294 cpp_error(pfile, "unterminated argument list for macro \"%s\"", hp->name);
f80e83a9 2295 return 1;
2296 }
2297 else if (argc < macro->paramc)
2298 {
2299 /* A rest argument is allowed to not appear in the invocation at all.
2300 e.g. #define debug(format, args...) ...
2301 debug("string");
2302 This is exactly the same as if the rest argument had received no
57ba19ba 2303 tokens - debug("string",); This extension is deprecated. */
35677230 2304
2305 if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
f80e83a9 2306 {
2307 /* Duplicate the placemarker. Then we can set its flags and
2308 position and safely be using more than one. */
35677230 2309 cpp_token *pm = duplicate_token (pfile, &placemarker_token);
2310 pm->flags = VOID_REST;
2311 save_token (args, pm);
f80e83a9 2312 args->ends[argc] = total + 1;
35677230 2313
2314 if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
2315 cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
2316
f80e83a9 2317 return 0;
2318 }
2319 else
2320 {
31674461 2321 cpp_error (pfile, "not enough arguments for macro \"%s\"", hp->name);
f80e83a9 2322 return 1;
2323 }
2324 }
2325 /* An empty argument to an empty function-like macro is fine. */
2326 else if (argc > macro->paramc
2327 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2328 {
31674461 2329 cpp_error (pfile, "too many arguments for macro \"%s\"", hp->name);
f80e83a9 2330 return 1;
2331 }
6060326b 2332
f80e83a9 2333 return 0;
2334}
2335
2336/* Adds backslashes before all backslashes and double quotes appearing
2337 in strings. Non-printable characters are converted to octal. */
2338static U_CHAR *
2339quote_string (dest, src, len)
2340 U_CHAR *dest;
2341 const U_CHAR *src;
2342 unsigned int len;
2343{
2344 while (len--)
6060326b 2345 {
f80e83a9 2346 U_CHAR c = *src++;
6060326b 2347
f80e83a9 2348 if (c == '\\' || c == '"')
3b304865 2349 {
f80e83a9 2350 *dest++ = '\\';
2351 *dest++ = c;
2352 }
2353 else
2354 {
2355 if (ISPRINT (c))
2356 *dest++ = c;
2357 else
2358 {
2359 sprintf ((char *) dest, "\\%03o", c);
2360 dest += 4;
2361 }
3b304865 2362 }
6060326b 2363 }
6060326b 2364
f80e83a9 2365 return dest;
6060326b 2366}
2367
f80e83a9 2368/* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2369 CPP_STRING token containing TEXT in quoted form. */
2370static cpp_token *
2371make_string_token (token, text, len)
2372 cpp_token *token;
2373 const U_CHAR *text;
2374 unsigned int len;
2375{
2376 U_CHAR *buf;
2377
2378 buf = (U_CHAR *) xmalloc (len * 4);
2379 token->type = CPP_STRING;
2380 token->flags = 0;
76faa4c0 2381 token->val.str.text = buf;
2382 token->val.str.len = quote_string (buf, text, len) - buf;
f80e83a9 2383 return token;
2384}
2385
2386/* Allocates and converts a temporary token to a CPP_NUMBER token,
2387 evaluating to NUMBER. */
2388static cpp_token *
2389alloc_number_token (pfile, number)
6060326b 2390 cpp_reader *pfile;
f80e83a9 2391 int number;
6060326b 2392{
f80e83a9 2393 cpp_token *result;
2394 char *buf;
2395
2396 result = get_temp_token (pfile);
2397 buf = xmalloc (20);
2398 sprintf (buf, "%d", number);
2399
2400 result->type = CPP_NUMBER;
2401 result->flags = 0;
76faa4c0 2402 result->val.str.text = (U_CHAR *) buf;
2403 result->val.str.len = strlen (buf);
f80e83a9 2404 return result;
2405}
6060326b 2406
f80e83a9 2407/* Returns a temporary token from the temporary token store of PFILE. */
2408static cpp_token *
2409get_temp_token (pfile)
2410 cpp_reader *pfile;
2411{
2412 if (pfile->temp_used == pfile->temp_alloced)
2413 {
2414 if (pfile->temp_used == pfile->temp_cap)
2415 {
2416 pfile->temp_cap += pfile->temp_cap + 20;
2417 pfile->temp_tokens = (cpp_token **) xrealloc
2418 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2419 }
2420 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2421 (sizeof (cpp_token));
2422 }
6060326b 2423
f80e83a9 2424 return pfile->temp_tokens[pfile->temp_used++];
2425}
2426
2427/* Release (not free) for re-use the temporary tokens of PFILE. */
2428static void
2429release_temp_tokens (pfile)
2430 cpp_reader *pfile;
2431{
2432 while (pfile->temp_used)
6060326b 2433 {
f80e83a9 2434 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
6060326b 2435
7e842f95 2436 if (TOKEN_SPELL (token) == SPELL_STRING)
6060326b 2437 {
76faa4c0 2438 free ((char *) token->val.str.text);
2439 token->val.str.text = 0;
6060326b 2440 }
2441 }
f80e83a9 2442}
6060326b 2443
f80e83a9 2444/* Free all of PFILE's dynamically-allocated temporary tokens. */
2445void
2446_cpp_free_temp_tokens (pfile)
2447 cpp_reader *pfile;
2448{
2449 if (pfile->temp_tokens)
6060326b 2450 {
f80e83a9 2451 /* It is possible, though unlikely (looking for '(' of a funlike
2452 macro into EOF), that we haven't released the tokens yet. */
2453 release_temp_tokens (pfile);
2454 while (pfile->temp_alloced)
2455 free (pfile->temp_tokens[--pfile->temp_alloced]);
2456 free (pfile->temp_tokens);
6060326b 2457 }
2458
f80e83a9 2459 if (pfile->date)
2460 {
76faa4c0 2461 free ((char *) pfile->date->val.str.text);
f80e83a9 2462 free (pfile->date);
76faa4c0 2463 free ((char *) pfile->time->val.str.text);
f80e83a9 2464 free (pfile->time);
2465 }
6060326b 2466}
2467
f80e83a9 2468/* Copy TOKEN into a temporary token from PFILE's store. */
2469static cpp_token *
2470duplicate_token (pfile, token)
2471 cpp_reader *pfile;
2472 const cpp_token *token;
2473{
2474 cpp_token *result = get_temp_token (pfile);
6060326b 2475
f80e83a9 2476 *result = *token;
7e842f95 2477 if (TOKEN_SPELL (token) == SPELL_STRING)
f80e83a9 2478 {
76faa4c0 2479 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2480 memcpy (buff, token->val.str.text, token->val.str.len);
2481 result->val.str.text = buff;
f80e83a9 2482 }
2483 return result;
2484}
6060326b 2485
f80e83a9 2486/* Determine whether two tokens can be pasted together, and if so,
2487 what the resulting token is. Returns CPP_EOF if the tokens cannot
2488 be pasted, or the appropriate type for the merged token if they
2489 can. */
2490static enum cpp_ttype
2491can_paste (pfile, token1, token2, digraph)
2492 cpp_reader * pfile;
2493 const cpp_token *token1, *token2;
2494 int* digraph;
6060326b 2495{
f80e83a9 2496 enum cpp_ttype a = token1->type, b = token2->type;
2497 int cxx = CPP_OPTION (pfile, cplusplus);
6060326b 2498
31674461 2499 /* Treat named operators as if they were ordinary NAMEs. */
2500 if (token1->flags & NAMED_OP)
2501 a = CPP_NAME;
2502 if (token2->flags & NAMED_OP)
2503 b = CPP_NAME;
2504
f80e83a9 2505 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2506 return a + (CPP_EQ_EQ - CPP_EQ);
6060326b 2507
f80e83a9 2508 switch (a)
6060326b 2509 {
f80e83a9 2510 case CPP_GREATER:
2511 if (b == a) return CPP_RSHIFT;
2512 if (b == CPP_QUERY && cxx) return CPP_MAX;
2513 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2514 break;
2515 case CPP_LESS:
2516 if (b == a) return CPP_LSHIFT;
2517 if (b == CPP_QUERY && cxx) return CPP_MIN;
2518 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
27fdc0b6 2519 if (CPP_OPTION (pfile, digraphs))
2520 {
2521 if (b == CPP_COLON)
2522 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2523 if (b == CPP_MOD)
2524 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2525 }
f80e83a9 2526 break;
6060326b 2527
f80e83a9 2528 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2529 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2530 case CPP_OR: if (b == a) return CPP_OR_OR; break;
6060326b 2531
f80e83a9 2532 case CPP_MINUS:
2533 if (b == a) return CPP_MINUS_MINUS;
2534 if (b == CPP_GREATER) return CPP_DEREF;
2535 break;
2536 case CPP_COLON:
2537 if (b == a && cxx) return CPP_SCOPE;
27fdc0b6 2538 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
f80e83a9 2539 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2540 break;
2541
2542 case CPP_MOD:
27fdc0b6 2543 if (CPP_OPTION (pfile, digraphs))
2544 {
2545 if (b == CPP_GREATER)
2546 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2547 if (b == CPP_COLON)
2548 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2549 }
f80e83a9 2550 break;
2551 case CPP_DEREF:
2552 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2553 break;
2554 case CPP_DOT:
2555 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2556 if (b == CPP_NUMBER) return CPP_NUMBER;
2557 break;
2558
2559 case CPP_HASH:
2560 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2561 /* %:%: digraph */
2562 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2563 break;
2564
2565 case CPP_NAME:
2566 if (b == CPP_NAME) return CPP_NAME;
2567 if (b == CPP_NUMBER
76faa4c0 2568 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
f80e83a9 2569 if (b == CPP_CHAR
76faa4c0 2570 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
f80e83a9 2571 if (b == CPP_STRING
76faa4c0 2572 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
f80e83a9 2573 break;
2574
2575 case CPP_NUMBER:
2576 if (b == CPP_NUMBER) return CPP_NUMBER;
2577 if (b == CPP_NAME) return CPP_NUMBER;
2578 if (b == CPP_DOT) return CPP_NUMBER;
2579 /* Numbers cannot have length zero, so this is safe. */
2580 if ((b == CPP_PLUS || b == CPP_MINUS)
76faa4c0 2581 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
f80e83a9 2582 return CPP_NUMBER;
2583 break;
2584
71aa9da4 2585 case CPP_OTHER:
2586 if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
2587 {
2588 if (b == CPP_NAME) return CPP_NAME;
2589 if (b == CPP_STRING) return CPP_OSTRING;
2590 }
2591
f80e83a9 2592 default:
2593 break;
6060326b 2594 }
2595
f80e83a9 2596 return CPP_EOF;
2597}
2598
2599/* Check if TOKEN is to be ##-pasted with the token after it. */
2600static const cpp_token *
2601maybe_paste_with_next (pfile, token)
2602 cpp_reader *pfile;
2603 const cpp_token *token;
2604{
2605 cpp_token *pasted;
2606 const cpp_token *second;
2607 cpp_context *context = CURRENT_CONTEXT (pfile);
2608
2609 /* Is this token on the LHS of ## ? */
f80e83a9 2610
deb356cf 2611 while ((token->flags & PASTE_LEFT)
2612 || ((context->flags & CONTEXT_PASTEL)
2613 && context->posn == context->count))
6060326b 2614 {
deb356cf 2615 /* Suppress macro expansion for next token, but don't conflict
2616 with the other method of suppression. If it is an argument,
2617 macro expansion within the argument will still occur. */
2618 pfile->paste_level = pfile->cur_context;
2619 second = _cpp_get_token (pfile);
2620 pfile->paste_level = 0;
2621
2622 /* Ignore placemarker argument tokens (cannot be from an empty
2623 macro since macros are not expanded). */
2624 if (token->type == CPP_PLACEMARKER)
2625 pasted = duplicate_token (pfile, second);
2626 else if (second->type == CPP_PLACEMARKER)
f80e83a9 2627 {
e0a859f1 2628 /* GCC has special extended semantics for , ## b where b is
2629 a varargs parameter: the comma disappears if b was given
2630 no actual arguments (not merely if b is an empty
2631 argument). */
2632 if (token->type == CPP_COMMA && second->flags & VOID_REST)
35677230 2633 pasted = duplicate_token (pfile, second);
deb356cf 2634 else
2635 pasted = duplicate_token (pfile, token);
f80e83a9 2636 }
2637 else
f80e83a9 2638 {
deb356cf 2639 int digraph = 0;
2640 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
6060326b 2641
deb356cf 2642 if (type == CPP_EOF)
2643 {
2644 if (CPP_OPTION (pfile, warn_paste))
e0a859f1 2645 {
2646 /* Do not complain about , ## <whatever> if
2647 <whatever> came from a variable argument, because
2648 the author probably intended the ## to trigger
2649 the special extended semantics (see above). */
2650 if (token->type == CPP_COMMA
2651 && IS_ARG_CONTEXT (CURRENT_CONTEXT (pfile))
58fe658a 2652 && ON_REST_ARG (CURRENT_CONTEXT (pfile) - 1))
e0a859f1 2653 /* no warning */;
2654 else
2655 cpp_warning (pfile,
deb356cf 2656 "pasting would not give a valid preprocessing token");
e0a859f1 2657 }
deb356cf 2658 _cpp_push_token (pfile, second);
58fe658a 2659 /* A short term hack to safely clear the PASTE_LEFT flag. */
2660 pasted = duplicate_token (pfile, token);
2661 pasted->flags &= ~PASTE_LEFT;
2662 return pasted;
deb356cf 2663 }
6060326b 2664
deb356cf 2665 if (type == CPP_NAME || type == CPP_NUMBER)
2666 {
2667 /* Join spellings. */
2668 U_CHAR *buf, *end;
2669
2670 pasted = get_temp_token (pfile);
2671 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2672 end = spell_token (pfile, token, buf);
2673 end = spell_token (pfile, second, end);
2674 *end = '\0';
f80e83a9 2675
deb356cf 2676 if (type == CPP_NAME)
2677 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2678 else
2679 {
2680 pasted->val.str.text = uxstrdup (buf);
2681 pasted->val.str.len = end - buf;
2682 }
2683 }
71aa9da4 2684 else if (type == CPP_WCHAR || type == CPP_WSTRING
2685 || type == CPP_OSTRING)
deb356cf 2686 pasted = duplicate_token (pfile, second);
76faa4c0 2687 else
2688 {
deb356cf 2689 pasted = get_temp_token (pfile);
2690 pasted->val.integer = 0;
76faa4c0 2691 }
deb356cf 2692
2693 pasted->type = type;
2694 pasted->flags = digraph ? DIGRAPH : 0;
31674461 2695
2696 if (type == CPP_NAME && pasted->val.node->type == T_OPERATOR)
2697 {
2698 pasted->type = pasted->val.node->value.code;
2699 pasted->flags |= NAMED_OP;
2700 }
f80e83a9 2701 }
2702
deb356cf 2703 /* The pasted token gets the whitespace flags and position of the
2704 first token, the PASTE_LEFT flag of the second token, plus the
2705 PASTED flag to indicate it is the result of a paste. However, we
2706 want to preserve the DIGRAPH flag. */
2707 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2708 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2709 | (second->flags & PASTE_LEFT) | PASTED);
2710 pasted->col = token->col;
2711 pasted->line = token->line;
2712
2713 /* See if there is another token to be pasted onto the one we just
2714 constructed. */
2715 token = pasted;
2716 context = CURRENT_CONTEXT (pfile);
2717 /* and loop */
f80e83a9 2718 }
deb356cf 2719 return token;
f80e83a9 2720}
2721
2722/* Convert a token sequence to a single string token according to the
2723 rules of the ISO C #-operator. */
2724#define INIT_SIZE 200
2725static cpp_token *
2726stringify_arg (pfile, token)
6060326b 2727 cpp_reader *pfile;
f80e83a9 2728 const cpp_token *token;
6060326b 2729{
f80e83a9 2730 cpp_token *result;
2731 unsigned char *main_buf;
2732 unsigned int prev_value, backslash_count = 0;
2733 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
6060326b 2734
deb356cf 2735 push_arg_context (pfile, token);
f80e83a9 2736 prev_value = prevent_macro_expansion (pfile);
2737 main_buf = (unsigned char *) xmalloc (buf_cap);
6060326b 2738
f80e83a9 2739 result = get_temp_token (pfile);
2740 ASSIGN_FLAGS_AND_POS (result, token);
2741
deb356cf 2742 for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
6060326b 2743 {
f80e83a9 2744 int escape;
2745 unsigned char *buf;
2746 unsigned int len = TOKEN_LEN (token);
6060326b 2747
7e842f95 2748 if (token->type == CPP_PLACEMARKER)
2749 continue;
2750
f80e83a9 2751 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2752 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2753 if (escape)
2754 len *= 4 + 1;
2755
2756 if (buf_used + len > buf_cap)
6060326b 2757 {
f80e83a9 2758 buf_cap = buf_used + len + INIT_SIZE;
2759 main_buf = xrealloc (main_buf, buf_cap);
2760 }
6060326b 2761
f80e83a9 2762 if (whitespace && (token->flags & PREV_WHITE))
2763 main_buf[buf_used++] = ' ';
6060326b 2764
f80e83a9 2765 if (escape)
2766 buf = (unsigned char *) xmalloc (len);
2767 else
2768 buf = main_buf + buf_used;
2769
2770 len = spell_token (pfile, token, buf) - buf;
2771 if (escape)
2772 {
2773 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2774 free (buf);
2775 }
2776 else
2777 buf_used += len;
6060326b 2778
f80e83a9 2779 whitespace = 1;
2780 if (token->type == CPP_BACKSLASH)
2781 backslash_count++;
2782 else
2783 backslash_count = 0;
2784 }
6060326b 2785
f80e83a9 2786 /* Ignore the final \ of invalid string literals. */
2787 if (backslash_count & 1)
2788 {
2789 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2790 buf_used--;
2791 }
6060326b 2792
f80e83a9 2793 result->type = CPP_STRING;
76faa4c0 2794 result->val.str.text = main_buf;
2795 result->val.str.len = buf_used;
f80e83a9 2796 restore_macro_expansion (pfile, prev_value);
2797 return result;
2798}
6060326b 2799
f80e83a9 2800/* Allocate more room on the context stack of PFILE. */
2801static void
2802expand_context_stack (pfile)
2803 cpp_reader *pfile;
2804{
2805 pfile->context_cap += pfile->context_cap + 20;
2806 pfile->contexts = (cpp_context *)
2807 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2808}
6060326b 2809
f80e83a9 2810/* Push the context of macro NODE onto the context stack. TOKEN is
2811 the CPP_NAME token invoking the macro. */
deb356cf 2812static int
2813push_macro_context (pfile, token)
f80e83a9 2814 cpp_reader *pfile;
f80e83a9 2815 const cpp_token *token;
2816{
2817 unsigned char orig_flags;
2818 macro_args *args;
2819 cpp_context *context;
deb356cf 2820 cpp_hashnode *node = token->val.node;
6060326b 2821
f80e83a9 2822 /* Token's flags may change when parsing args containing a nested
2823 invocation of this macro. */
2824 orig_flags = token->flags & (PREV_WHITE | BOL);
2825 args = 0;
2826 if (node->value.expansion->paramc >= 0)
6060326b 2827 {
f80e83a9 2828 unsigned int error, prev_nme;
2829
2830 /* Allocate room for the argument contexts, and parse them. */
2831 args = (macro_args *) xmalloc (sizeof (macro_args));
2832 args->ends = (unsigned int *)
2833 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2834 args->tokens = 0;
2835 args->capacity = 0;
2836 args->used = 0;
f80e83a9 2837
2838 prev_nme = prevent_macro_expansion (pfile);
2839 pfile->args = args;
2840 error = parse_args (pfile, node, args);
2841 pfile->args = 0;
2842 restore_macro_expansion (pfile, prev_nme);
2843 if (error)
2844 {
2845 free_macro_args (args);
deb356cf 2846 return 1;
f80e83a9 2847 }
e2ab8477 2848 /* Set the level after the call to parse_args. */
2849 args->level = pfile->cur_context;
6060326b 2850 }
6060326b 2851
f80e83a9 2852 /* Now push its context. */
2853 pfile->cur_context++;
2854 if (pfile->cur_context == pfile->context_cap)
2855 expand_context_stack (pfile);
2856
2857 context = CURRENT_CONTEXT (pfile);
2858 context->u.list = node->value.expansion;
2859 context->args = args;
2860 context->posn = 0;
2861 context->count = context->u.list->tokens_used;
2862 context->level = pfile->cur_context;
2863 context->flags = 0;
2864 context->pushed_token = 0;
2865
2866 /* Set the flags of the first token. We know there must
2867 be one, empty macros are a single placemarker token. */
2868 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2869
deb356cf 2870 return 0;
6060326b 2871}
2872
f80e83a9 2873/* Push an argument to the current macro onto the context stack.
2874 TOKEN is the MACRO_ARG token representing the argument expansion. */
deb356cf 2875static void
f80e83a9 2876push_arg_context (pfile, token)
2877 cpp_reader *pfile;
2878 const cpp_token *token;
6060326b 2879{
f80e83a9 2880 cpp_context *context;
2881 macro_args *args;
2882
2883 pfile->cur_context++;
2884 if (pfile->cur_context == pfile->context_cap)
2885 expand_context_stack (pfile);
2886
2887 context = CURRENT_CONTEXT (pfile);
2888 args = context[-1].args;
2889
2890 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2891 context->u.arg = args->tokens + context->count;
2892 context->count = args->ends[token->val.aux] - context->count;
2893 context->args = 0;
2894 context->posn = 0;
2895 context->level = args->level;
2896 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2897 context->pushed_token = 0;
2898
2899 /* Set the flags of the first token. There is one. */
2900 {
2901 const cpp_token *first = context->u.arg[0];
2902 if (!first)
2903 first = context->u.arg[1];
6060326b 2904
f80e83a9 2905 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2906 token->flags & (PREV_WHITE | BOL));
2907 }
6060326b 2908
f80e83a9 2909 if (token->flags & PASTE_LEFT)
2910 context->flags |= CONTEXT_PASTEL;
2911 if (pfile->paste_level)
2912 context->flags |= CONTEXT_PASTER;
6060326b 2913}
2914
f80e83a9 2915/* "Unget" a token. It is effectively inserted in the token queue and
2916 will be returned by the next call to get_raw_token. */
6060326b 2917void
f80e83a9 2918_cpp_push_token (pfile, token)
6060326b 2919 cpp_reader *pfile;
f80e83a9 2920 const cpp_token *token;
6060326b 2921{
f80e83a9 2922 cpp_context *context = CURRENT_CONTEXT (pfile);
7e842f95 2923
2924 if (context->posn > 0)
2925 {
2926 const cpp_token *prev;
2927 if (IS_ARG_CONTEXT (context))
2928 prev = context->u.arg[context->posn - 1];
2929 else
2930 prev = &context->u.list->tokens[context->posn - 1];
2931
2932 if (prev == token)
2933 {
2934 context->posn--;
2935 return;
2936 }
2937 }
2938
f80e83a9 2939 if (context->pushed_token)
2940 cpp_ice (pfile, "two tokens pushed in a row");
2941 if (token->type != CPP_EOF)
2942 context->pushed_token = token;
2943 /* Don't push back a directive's CPP_EOF, step back instead. */
2944 else if (pfile->cur_context == 0)
2945 pfile->contexts[0].posn--;
2946}
6060326b 2947
f80e83a9 2948/* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2949 introducing the directive. */
2950static void
2951process_directive (pfile, token)
2952 cpp_reader *pfile;
2953 const cpp_token *token;
2954{
2955 const struct directive *d = pfile->token_list.directive;
2956 int prev_nme = 0;
2957
2958 /* Skip over the directive name. */
2959 if (token[1].type == CPP_NAME)
2960 _cpp_get_raw_token (pfile);
2961 else if (token[1].type != CPP_NUMBER)
7e842f95 2962 cpp_ice (pfile, "directive begins with %s?!", TOKEN_NAME (token));
f80e83a9 2963
f80e83a9 2964 if (! (d->flags & EXPAND))
2965 prev_nme = prevent_macro_expansion (pfile);
2966 (void) (*d->handler) (pfile);
2967 if (! (d->flags & EXPAND))
2968 restore_macro_expansion (pfile, prev_nme);
2969 _cpp_skip_rest_of_line (pfile);
2970}
6060326b 2971
f80e83a9 2972/* The external interface to return the next token. All macro
2973 expansion and directive processing is handled internally, the
2974 caller only ever sees the output after preprocessing. */
2975const cpp_token *
2976cpp_get_token (pfile)
2977 cpp_reader *pfile;
2978{
2979 const cpp_token *token;
deb356cf 2980 /* Loop till we hit a non-directive, non-placemarker token. */
f80e83a9 2981 for (;;)
2982 {
deb356cf 2983 token = _cpp_get_token (pfile);
2984
2985 if (token->type == CPP_PLACEMARKER)
2986 continue;
2987
2988 if (token->type == CPP_HASH && token->flags & BOL
f80e83a9 2989 && pfile->token_list.directive)
6060326b 2990 {
f80e83a9 2991 process_directive (pfile, token);
2992 continue;
6060326b 2993 }
2994
deb356cf 2995 return token;
2996 }
2997}
2998
2999/* The internal interface to return the next token. There are two
3000 differences between the internal and external interfaces: the
3001 internal interface may return a PLACEMARKER token, and it does not
3002 process directives. */
3003const cpp_token *
3004_cpp_get_token (pfile)
3005 cpp_reader *pfile;
3006{
c2cce424 3007 const cpp_token *token, *old_token;
deb356cf 3008 cpp_hashnode *node;
3009
3010 /* Loop until we hit a non-macro token. */
3011 for (;;)
3012 {
3013 token = get_raw_token (pfile);
3014
f80e83a9 3015 /* Short circuit EOF. */
3016 if (token->type == CPP_EOF)
3017 return token;
deb356cf 3018
3019 /* If we are skipping... */
3020 if (pfile->skipping)
6060326b 3021 {
deb356cf 3022 /* we still have to process directives, */
3023 if (pfile->token_list.directive)
3024 return token;
3025
3026 /* but everything else is ignored. */
f80e83a9 3027 _cpp_skip_rest_of_line (pfile);
3028 continue;
3029 }
6060326b 3030
deb356cf 3031 /* If there's a potential control macro and we get here, then that
3032 #ifndef didn't cover the entire file and its argument shouldn't
3033 be taken as a control macro. */
3034 pfile->potential_control_macro = 0;
6060326b 3035
db54da8f 3036 /* If we are rescanning preprocessed input, no macro expansion or
3037 token pasting may occur. */
3038 if (CPP_OPTION (pfile, preprocessed))
3039 return token;
3040
c2cce424 3041 old_token = token;
3042
deb356cf 3043 /* See if there's a token to paste with this one. */
3044 if (!pfile->paste_level)
3045 token = maybe_paste_with_next (pfile, token);
6060326b 3046
deb356cf 3047 /* If it isn't a macro, return it now. */
31674461 3048 if (token->type != CPP_NAME || token->val.node->type == T_VOID)
deb356cf 3049 return token;
6060326b 3050
31674461 3051 /* Is macro expansion disabled in general, or are we in the
c2cce424 3052 middle of a token paste, or was this token just pasted?
3053 (Note we don't check token->flags & PASTED, because that
3054 counts tokens that were pasted at some point in the past,
3055 we're only interested in tokens that were pasted by this call
3056 to maybe_paste_with_next.) */
3057 if (pfile->no_expand_level == pfile->cur_context
3058 || pfile->paste_level
3059 || (token != old_token
3060 && pfile->no_expand_level + 1 == pfile->cur_context))
deb356cf 3061 return token;
c2cce424 3062
deb356cf 3063 node = token->val.node;
3064 if (node->type != T_MACRO)
3065 return special_symbol (pfile, node, token);
6060326b 3066
f80e83a9 3067 if (is_macro_disabled (pfile, node->value.expansion, token))
3068 return token;
6060326b 3069
deb356cf 3070 if (push_macro_context (pfile, token))
3071 return token;
3072 /* else loop */
f80e83a9 3073 }
f80e83a9 3074}
6060326b 3075
f80e83a9 3076/* Returns the next raw token, i.e. without performing macro
3077 expansion. Argument contexts are automatically entered. */
3078static const cpp_token *
3079get_raw_token (pfile)
3080 cpp_reader *pfile;
3081{
3082 const cpp_token *result;
deb356cf 3083 cpp_context *context;
6060326b 3084
deb356cf 3085 for (;;)
f80e83a9 3086 {
deb356cf 3087 context = CURRENT_CONTEXT (pfile);
3088 if (context->pushed_token)
f80e83a9 3089 {
deb356cf 3090 result = context->pushed_token;
3091 context->pushed_token = 0;
35677230 3092 return result; /* Cannot be a CPP_MACRO_ARG */
deb356cf 3093 }
3094 else if (context->posn == context->count)
3095 {
3096 if (pop_context (pfile))
3097 return &eof_token;
3098 continue;
3099 }
35677230 3100 else if (IS_ARG_CONTEXT (context))
deb356cf 3101 {
35677230 3102 result = context->u.arg[context->posn++];
3103 if (result == 0)
6060326b 3104 {
35677230 3105 context->flags ^= CONTEXT_RAW;
f80e83a9 3106 result = context->u.arg[context->posn++];
6060326b 3107 }
35677230 3108 return result; /* Cannot be a CPP_MACRO_ARG */
f80e83a9 3109 }
6060326b 3110
35677230 3111 result = &context->u.list->tokens[context->posn++];
3112
deb356cf 3113 if (result->type != CPP_MACRO_ARG)
3114 return result;
3115
3116 if (result->flags & STRINGIFY_ARG)
3117 return stringify_arg (pfile, result);
3118
3119 push_arg_context (pfile, result);
3120 }
f80e83a9 3121}
6060326b 3122
f80e83a9 3123/* Internal interface to get the token without macro expanding. */
3124const cpp_token *
3125_cpp_get_raw_token (pfile)
3126 cpp_reader *pfile;
3127{
3128 int prev_nme = prevent_macro_expansion (pfile);
deb356cf 3129 const cpp_token *result = _cpp_get_token (pfile);
f80e83a9 3130 restore_macro_expansion (pfile, prev_nme);
3131 return result;
3132}
6060326b 3133
f80e83a9 3134/* A thin wrapper to lex_line. CLEAR is non-zero if the current token
3135 list should be overwritten, or zero if we need to append
3136 (typically, if we are within the arguments to a macro, or looking
3137 for the '(' to start a function-like macro invocation). */
3138static int
3139lex_next (pfile, clear)
3140 cpp_reader *pfile;
3141 int clear;
3142{
3143 cpp_toklist *list = &pfile->token_list;
3144 const cpp_token *old_list = list->tokens;
3145 unsigned int old_used = list->tokens_used;
6060326b 3146
f80e83a9 3147 if (clear)
6060326b 3148 {
f80e83a9 3149 /* Release all temporary tokens. */
3150 _cpp_clear_toklist (list);
3151 pfile->contexts[0].posn = 0;
3152 if (pfile->temp_used)
3153 release_temp_tokens (pfile);
6060326b 3154 }
f80e83a9 3155 lex_line (pfile, list);
3156 pfile->contexts[0].count = list->tokens_used;
6060326b 3157
f80e83a9 3158 if (!clear && pfile->args)
6060326b 3159 {
f80e83a9 3160 /* Fix up argument token pointers. */
3161 if (old_list != list->tokens)
3162 {
3163 unsigned int i;
3164
3165 for (i = 0; i < pfile->args->used; i++)
3166 {
3167 const cpp_token *token = pfile->args->tokens[i];
3168 if (token >= old_list && token < old_list + old_used)
3169 pfile->args->tokens[i] = (const cpp_token *)
3170 ((char *) token + ((char *) list->tokens - (char *) old_list));
3171 }
3172 }
3173
3174 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3175 tokens within the list of arguments that would otherwise act as
3176 preprocessing directives, the behavior is undefined.
3177
3178 This implementation will report a hard error and treat the
3179 'sequence of preprocessing tokens' as part of the macro argument,
3180 not a directive.
3181
3182 Note if pfile->args == 0, we're OK since we're only inside a
3183 macro argument after a '('. */
3184 if (list->directive)
3185 {
3186 cpp_error_with_line (pfile, list->tokens[old_used].line,
3187 list->tokens[old_used].col,
3188 "#%s may not be used inside a macro argument",
3189 list->directive->name);
78719282 3190 return 1;
f80e83a9 3191 }
6060326b 3192 }
3193
f80e83a9 3194 return 0;
6060326b 3195}
3196
deb356cf 3197/* Pops a context off the context stack. If we're at the bottom, lexes
3198 the next logical line. Returns EOF if we're at the end of the
e0a859f1 3199 argument list to the # operator, or we should not "overflow"
f80e83a9 3200 into the rest of the file (e.g. 6.10.3.1.1). */
3201static int
deb356cf 3202pop_context (pfile)
f80e83a9 3203 cpp_reader *pfile;
3204{
3205 cpp_context *context;
89b05ef6 3206
f80e83a9 3207 if (pfile->cur_context == 0)
deb356cf 3208 {
3209 /* If we are currently processing a directive, do not advance. 6.10
3210 paragraph 2: A new-line character ends the directive even if it
3211 occurs within what would otherwise be an invocation of a
3212 function-like macro. */
3213 if (pfile->token_list.directive)
3214 return 1;
3215
3216 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3217 }
f80e83a9 3218
3219 /* Argument contexts, when parsing args or handling # operator
3220 return CPP_EOF at the end. */
3221 context = CURRENT_CONTEXT (pfile);
3222 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3223 return 1;
3224
3225 /* Free resources when leaving macro contexts. */
3226 if (context->args)
3227 free_macro_args (context->args);
3228
3229 if (pfile->cur_context == pfile->no_expand_level)
3230 pfile->no_expand_level--;
3231 pfile->cur_context--;
3232
3233 return 0;
3234}
3235
f80e83a9 3236/* Turn off macro expansion at the current context level. */
3237static unsigned int
3238prevent_macro_expansion (pfile)
3239 cpp_reader *pfile;
3240{
3241 unsigned int prev_value = pfile->no_expand_level;
3242 pfile->no_expand_level = pfile->cur_context;
3243 return prev_value;
3244}
3245
3246/* Restore macro expansion to its previous state. */
3247static void
3248restore_macro_expansion (pfile, prev_value)
3249 cpp_reader *pfile;
3250 unsigned int prev_value;
3251{
3252 pfile->no_expand_level = prev_value;
3253}
3254
3255/* Used by cpperror.c to obtain the correct line and column to report
3256 in a diagnostic. */
3257unsigned int
3258_cpp_get_line (pfile, pcol)
3259 cpp_reader *pfile;
3260 unsigned int *pcol;
3261{
3262 unsigned int index;
3263 const cpp_token *cur_token;
3264
338fa5f7 3265 if (pfile->state.in_lex_line)
f80e83a9 3266 index = pfile->token_list.tokens_used;
3267 else
c2cce424 3268 {
338fa5f7 3269 index = pfile->contexts[0].posn;
3270
3271 if (index == 0)
3272 {
3273 if (pcol)
3274 *pcol = 0;
3275 return 0;
3276 }
3277 index--;
c2cce424 3278 }
3279
338fa5f7 3280 cur_token = &pfile->token_list.tokens[index];
f80e83a9 3281 if (pcol)
3282 *pcol = cur_token->col;
3283 return cur_token->line;
3284}
3285
3286#define DSC(str) (const U_CHAR *)str, sizeof str - 1
3287static const char * const monthnames[] =
3288{
3289 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3290 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3291};
3292
3293/* Handle builtin macros like __FILE__. */
3294static const cpp_token *
3295special_symbol (pfile, node, token)
3296 cpp_reader *pfile;
3297 cpp_hashnode *node;
d2efb5ed 3298 const cpp_token *token;
89b05ef6 3299{
f80e83a9 3300 cpp_token *result;
3301 cpp_buffer *ip;
89b05ef6 3302
f80e83a9 3303 switch (node->type)
89b05ef6 3304 {
f80e83a9 3305 case T_FILE:
3306 case T_BASE_FILE:
89b05ef6 3307 {
f80e83a9 3308 const char *file;
89b05ef6 3309
f80e83a9 3310 ip = CPP_BUFFER (pfile);
3311 if (ip == 0)
3312 file = "";
89b05ef6 3313 else
f80e83a9 3314 {
3315 if (node->type == T_BASE_FILE)
3316 while (CPP_PREV_BUFFER (ip) != NULL)
3317 ip = CPP_PREV_BUFFER (ip);
3318
3319 file = ip->nominal_fname;
3320 }
3321 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3322 strlen (file));
3323 }
3324 break;
89b05ef6 3325
f80e83a9 3326 case T_INCLUDE_LEVEL:
241e762e 3327 /* pfile->include_depth counts the primary source as level 1,
3328 but historically __INCLUDE_DEPTH__ has called the primary
3329 source level 0. */
3330 result = alloc_number_token (pfile, pfile->include_depth - 1);
89b05ef6 3331 break;
3332
f80e83a9 3333 case T_SPECLINE:
3334 /* If __LINE__ is embedded in a macro, it must expand to the
3335 line of the macro's invocation, not its definition.
3336 Otherwise things like assert() will not work properly. */
3337 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
de3d9756 3338 break;
3339
f80e83a9 3340 case T_STDC:
89b05ef6 3341 {
f80e83a9 3342 int stdc = 1;
89b05ef6 3343
f80e83a9 3344#ifdef STDC_0_IN_SYSTEM_HEADERS
3345 if (CPP_IN_SYSTEM_HEADER (pfile)
76faa4c0 3346 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
f80e83a9 3347 stdc = 0;
3348#endif
3349 result = alloc_number_token (pfile, stdc);
89b05ef6 3350 }
3351 break;
3352
f80e83a9 3353 case T_DATE:
3354 case T_TIME:
3355 if (pfile->date == 0)
3356 {
3357 /* Allocate __DATE__ and __TIME__ from permanent storage,
3358 and save them in pfile so we don't have to do this again.
3359 We don't generate these strings at init time because
3360 time() and localtime() are very slow on some systems. */
3361 time_t tt = time (NULL);
3362 struct tm *tb = localtime (&tt);
3363
3364 pfile->date = make_string_token
3365 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3366 pfile->time = make_string_token
3367 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3368
76faa4c0 3369 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
f80e83a9 3370 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
76faa4c0 3371 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
f80e83a9 3372 tb->tm_hour, tb->tm_min, tb->tm_sec);
3373 }
3374 result = node->type == T_DATE ? pfile->date: pfile->time;
89b05ef6 3375 break;
3376
f80e83a9 3377 case T_POISON:
76faa4c0 3378 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
f80e83a9 3379 return token;
3380
3381 default:
3382 cpp_ice (pfile, "invalid special hash type");
3383 return token;
89b05ef6 3384 }
3385
f80e83a9 3386 ASSIGN_FLAGS_AND_POS (result, token);
3387 return result;
3388}
3389#undef DSC
3390
b3954366 3391/* Allocate pfile->input_buffer, and initialize _cpp_trigraph_map[]
f80e83a9 3392 if it hasn't happened already. */
3393
3394void
3395_cpp_init_input_buffer (pfile)
3396 cpp_reader *pfile;
3397{
deb356cf 3398 cpp_context *base;
3399
deb356cf 3400 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3401 pfile->no_expand_level = UINT_MAX;
f80e83a9 3402 pfile->context_cap = 20;
f80e83a9 3403 pfile->cur_context = 0;
f80e83a9 3404
deb356cf 3405 pfile->contexts = (cpp_context *)
3406 xmalloc (pfile->context_cap * sizeof (cpp_context));
f80e83a9 3407
deb356cf 3408 /* Clear the base context. */
3409 base = &pfile->contexts[0];
3410 base->u.list = &pfile->token_list;
3411 base->posn = 0;
3412 base->count = 0;
3413 base->args = 0;
3414 base->level = 0;
3415 base->flags = 0;
3416 base->pushed_token = 0;
f80e83a9 3417}
3418
3419/* Moves to the end of the directive line, popping contexts as
3420 necessary. */
3421void
3422_cpp_skip_rest_of_line (pfile)
3423 cpp_reader *pfile;
3424{
deb356cf 3425 /* Discard all stacked contexts. */
3426 int i;
3427 for (i = pfile->cur_context; i > 0; i--)
3428 if (pfile->contexts[i].args)
3429 free_macro_args (pfile->contexts[i].args);
3430
3431 if (pfile->no_expand_level <= pfile->cur_context)
3432 pfile->no_expand_level = 0;
3433 pfile->cur_context = 0;
f80e83a9 3434
deb356cf 3435 /* Clear the base context, and clear the directive pointer so that
3436 get_raw_token will advance to the next line. */
3437 pfile->contexts[0].count = 0;
3438 pfile->contexts[0].posn = 0;
f80e83a9 3439 pfile->token_list.directive = 0;
6060326b 3440}
3441
f80e83a9 3442/* Directive handler wrapper used by the command line option
3443 processor. */
3444void
a336277c 3445_cpp_run_directive (pfile, dir, buf, count, name)
f80e83a9 3446 cpp_reader *pfile;
3447 const struct directive *dir;
3448 const char *buf;
3449 size_t count;
a336277c 3450 const char *name;
f80e83a9 3451{
3452 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3453 {
3454 unsigned int prev_lvl = 0;
deb356cf 3455
a336277c 3456 if (name)
3457 CPP_BUFFER (pfile)->nominal_fname = name;
3458 else
3459 CPP_BUFFER (pfile)->nominal_fname = _("<command line>");
3460 CPP_BUFFER (pfile)->lineno = (unsigned int)-1;
3461
deb356cf 3462 /* Scan the line now, else prevent_macro_expansion won't work. */
3463 lex_next (pfile, 1);
f80e83a9 3464 if (! (dir->flags & EXPAND))
3465 prev_lvl = prevent_macro_expansion (pfile);
3466
3467 (void) (*dir->handler) (pfile);
3468
3469 if (! (dir->flags & EXPAND))
3470 restore_macro_expansion (pfile, prev_lvl);
3471
3472 _cpp_skip_rest_of_line (pfile);
3473 cpp_pop_buffer (pfile);
3474 }
3475}