]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
For PR java/4295:
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
45b966db
ZW
1/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
93c80368
NB
23/* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
27
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
041c3194 36
45b966db
ZW
37#include "config.h"
38#include "system.h"
45b966db
ZW
39#include "cpplib.h"
40#include "cpphash.h"
41
c8a96070
NB
42/* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45#ifdef CROSS_COMPILE
46#undef MULTIBYTE_CHARS
47#endif
48
49#ifdef MULTIBYTE_CHARS
50#include "mbchar.h"
51#include <locale.h>
52#endif
53
93c80368
NB
54/* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
56enum spell_type
f9a0e96c 57{
93c80368
NB
58 SPELL_OPERATOR = 0,
59 SPELL_CHAR,
60 SPELL_IDENT,
47ad4138 61 SPELL_NUMBER,
93c80368
NB
62 SPELL_STRING,
63 SPELL_NONE
f9a0e96c
ZW
64};
65
93c80368 66struct token_spelling
f9a0e96c 67{
93c80368
NB
68 enum spell_type category;
69 const unsigned char *name;
f9a0e96c
ZW
70};
71
8206c799
ZW
72static const unsigned char *const digraph_spellings[] =
73{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
93c80368
NB
74
75#define OP(e, s) { SPELL_OPERATOR, U s },
76#define TK(e, s) { s, U STRINGX (e) },
8206c799 77static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
78#undef OP
79#undef TK
80
81#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
82#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 83
1444f2ed 84static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
29401c30
NB
85static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
86static cppchar_t get_effective_char PARAMS ((cpp_reader *));
0d9f234d 87
041c3194 88static int skip_block_comment PARAMS ((cpp_reader *));
cbcff6df 89static int skip_line_comment PARAMS ((cpp_reader *));
0d9f234d
NB
90static void adjust_column PARAMS ((cpp_reader *));
91static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
2c3fcba6
ZW
92static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
93static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
94 const U_CHAR *));
93c80368
NB
95static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
96static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
0d9f234d 97static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
93c80368 98static void unterminated PARAMS ((cpp_reader *, int));
0d9f234d
NB
99static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
100static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
29401c30 101static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
cbcff6df 102static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
93c80368 103static int name_p PARAMS ((cpp_reader *, const cpp_string *));
62729350
NB
104static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
105 const unsigned char *, unsigned int *));
5fddcffc 106static tokenrun *next_tokenrun PARAMS ((tokenrun *));
f617b8e2 107
c8a96070 108static unsigned int hex_digit_value PARAMS ((unsigned int));
6142088c 109static _cpp_buff *new_buff PARAMS ((size_t));
15dad1d9 110
041c3194 111/* Utility routine:
9e62c811 112
bfb9dc7f
ZW
113 Compares, the token TOKEN to the NUL-terminated string STRING.
114 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
15dad1d9 115
041c3194 116int
bfb9dc7f
ZW
117cpp_ideq (token, string)
118 const cpp_token *token;
041c3194
ZW
119 const char *string;
120{
bfb9dc7f 121 if (token->type != CPP_NAME)
041c3194 122 return 0;
bfb9dc7f 123
a28c5035 124 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
15dad1d9 125}
1368ee70 126
0d9f234d
NB
127/* Call when meeting a newline. Returns the character after the newline
128 (or carriage-return newline combination), or EOF. */
129static cppchar_t
1444f2ed
NB
130handle_newline (pfile, newline_char)
131 cpp_reader *pfile;
0d9f234d
NB
132 cppchar_t newline_char;
133{
1444f2ed 134 cpp_buffer *buffer;
0d9f234d
NB
135 cppchar_t next = EOF;
136
1444f2ed 137 pfile->line++;
1444f2ed 138 buffer = pfile->buffer;
0d9f234d 139 buffer->col_adjust = 0;
0d9f234d
NB
140 buffer->line_base = buffer->cur;
141
142 /* Handle CR-LF and LF-CR combinations, get the next character. */
143 if (buffer->cur < buffer->rlimit)
144 {
145 next = *buffer->cur++;
146 if (next + newline_char == '\r' + '\n')
147 {
148 buffer->line_base = buffer->cur;
149 if (buffer->cur < buffer->rlimit)
150 next = *buffer->cur++;
151 else
152 next = EOF;
153 }
154 }
155
156 buffer->read_ahead = next;
157 return next;
158}
159
160/* Subroutine of skip_escaped_newlines; called when a trigraph is
161 encountered. It warns if necessary, and returns true if the
162 trigraph should be honoured. FROM_CHAR is the third character of a
163 trigraph, and presumed to be the previous character for position
164 reporting. */
45b966db 165static int
0d9f234d 166trigraph_ok (pfile, from_char)
45b966db 167 cpp_reader *pfile;
0d9f234d 168 cppchar_t from_char;
45b966db 169{
041c3194
ZW
170 int accept = CPP_OPTION (pfile, trigraphs);
171
cbcff6df
NB
172 /* Don't warn about trigraphs in comments. */
173 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
45b966db 174 {
0d9f234d 175 cpp_buffer *buffer = pfile->buffer;
67821e3a 176
041c3194 177 if (accept)
67821e3a 178 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
041c3194 179 "trigraph ??%c converted to %c",
0d9f234d
NB
180 (int) from_char,
181 (int) _cpp_trigraph_map[from_char]);
4a5b68a2
NB
182 else if (buffer->cur != buffer->last_Wtrigraphs)
183 {
184 buffer->last_Wtrigraphs = buffer->cur;
67821e3a 185 cpp_warning_with_line (pfile, pfile->line,
4a5b68a2
NB
186 CPP_BUF_COL (buffer) - 2,
187 "trigraph ??%c ignored", (int) from_char);
188 }
45b966db 189 }
0d9f234d 190
041c3194 191 return accept;
45b966db
ZW
192}
193
0d9f234d
NB
194/* Assumes local variables buffer and result. */
195#define ACCEPT_CHAR(t) \
196 do { result->type = t; buffer->read_ahead = EOF; } while (0)
197
198/* When we move to multibyte character sets, add to these something
199 that saves and restores the state of the multibyte conversion
200 library. This probably involves saving and restoring a "cookie".
201 In the case of glibc it is an 8-byte structure, so is not a high
202 overhead operation. In any case, it's out of the fast path. */
203#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
204#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
205
206/* Skips any escaped newlines introduced by NEXT, which is either a
207 '?' or a '\\'. Returns the next character, which will also have
a5c3cccd
NB
208 been placed in buffer->read_ahead. This routine performs
209 preprocessing stages 1 and 2 of the ISO C standard. */
0d9f234d 210static cppchar_t
29401c30
NB
211skip_escaped_newlines (pfile, next)
212 cpp_reader *pfile;
0d9f234d 213 cppchar_t next;
45b966db 214{
29401c30
NB
215 cpp_buffer *buffer = pfile->buffer;
216
a5c3cccd
NB
217 /* Only do this if we apply stages 1 and 2. */
218 if (!buffer->from_stage3)
041c3194 219 {
a5c3cccd
NB
220 cppchar_t next1;
221 const unsigned char *saved_cur;
222 int space;
223
224 do
0d9f234d 225 {
a5c3cccd
NB
226 if (buffer->cur == buffer->rlimit)
227 break;
228
229 SAVE_STATE ();
230 if (next == '?')
0d9f234d 231 {
a5c3cccd
NB
232 next1 = *buffer->cur++;
233 if (next1 != '?' || buffer->cur == buffer->rlimit)
234 {
235 RESTORE_STATE ();
236 break;
237 }
238
239 next1 = *buffer->cur++;
240 if (!_cpp_trigraph_map[next1]
29401c30 241 || !trigraph_ok (pfile, next1))
a5c3cccd
NB
242 {
243 RESTORE_STATE ();
244 break;
245 }
246
247 /* We have a full trigraph here. */
248 next = _cpp_trigraph_map[next1];
249 if (next != '\\' || buffer->cur == buffer->rlimit)
250 break;
251 SAVE_STATE ();
252 }
253
254 /* We have a backslash, and room for at least one more character. */
255 space = 0;
256 do
257 {
258 next1 = *buffer->cur++;
259 if (!is_nvspace (next1))
260 break;
261 space = 1;
0d9f234d 262 }
a5c3cccd 263 while (buffer->cur < buffer->rlimit);
041c3194 264
a5c3cccd 265 if (!is_vspace (next1))
0d9f234d
NB
266 {
267 RESTORE_STATE ();
268 break;
269 }
45b966db 270
29401c30
NB
271 if (space && !pfile->state.lexing_comment)
272 cpp_warning (pfile, "backslash and newline separated by space");
0d9f234d 273
29401c30 274 next = handle_newline (pfile, next1);
a5c3cccd 275 if (next == EOF)
29401c30 276 cpp_pedwarn (pfile, "backslash-newline at end of file");
0d9f234d 277 }
a5c3cccd 278 while (next == '\\' || next == '?');
041c3194 279 }
45b966db 280
0d9f234d
NB
281 buffer->read_ahead = next;
282 return next;
45b966db
ZW
283}
284
0d9f234d
NB
285/* Obtain the next character, after trigraph conversion and skipping
286 an arbitrary string of escaped newlines. The common case of no
287 trigraphs or escaped newlines falls through quickly. */
288static cppchar_t
29401c30
NB
289get_effective_char (pfile)
290 cpp_reader *pfile;
64aaf407 291{
29401c30 292 cpp_buffer *buffer = pfile->buffer;
0d9f234d
NB
293 cppchar_t next = EOF;
294
295 if (buffer->cur < buffer->rlimit)
296 {
297 next = *buffer->cur++;
298
299 /* '?' can introduce trigraphs (and therefore backslash); '\\'
300 can introduce escaped newlines, which we want to skip, or
301 UCNs, which, depending upon lexer state, we will handle in
302 the future. */
303 if (next == '?' || next == '\\')
29401c30 304 next = skip_escaped_newlines (pfile, next);
0d9f234d
NB
305 }
306
307 buffer->read_ahead = next;
308 return next;
64aaf407
NB
309}
310
0d9f234d
NB
311/* Skip a C-style block comment. We find the end of the comment by
312 seeing if an asterisk is before every '/' we encounter. Returns
313 non-zero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
314static int
315skip_block_comment (pfile)
45b966db
ZW
316 cpp_reader *pfile;
317{
041c3194 318 cpp_buffer *buffer = pfile->buffer;
d8090680 319 cppchar_t c = EOF, prevc = EOF;
0d9f234d 320
cbcff6df 321 pfile->state.lexing_comment = 1;
0d9f234d 322 while (buffer->cur != buffer->rlimit)
45b966db 323 {
0d9f234d
NB
324 prevc = c, c = *buffer->cur++;
325
326 next_char:
327 /* FIXME: For speed, create a new character class of characters
93c80368 328 of interest inside block comments. */
0d9f234d 329 if (c == '?' || c == '\\')
29401c30 330 c = skip_escaped_newlines (pfile, c);
041c3194 331
0d9f234d
NB
332 /* People like decorating comments with '*', so check for '/'
333 instead for efficiency. */
041c3194 334 if (c == '/')
45b966db 335 {
0d9f234d
NB
336 if (prevc == '*')
337 break;
041c3194 338
0d9f234d
NB
339 /* Warn about potential nested comments, but not if the '/'
340 comes immediately before the true comment delimeter.
041c3194 341 Don't bother to get it right across escaped newlines. */
0d9f234d
NB
342 if (CPP_OPTION (pfile, warn_comments)
343 && buffer->cur != buffer->rlimit)
45b966db 344 {
0d9f234d
NB
345 prevc = c, c = *buffer->cur++;
346 if (c == '*' && buffer->cur != buffer->rlimit)
347 {
348 prevc = c, c = *buffer->cur++;
349 if (c != '/')
67821e3a
NB
350 cpp_warning_with_line (pfile, pfile->line,
351 CPP_BUF_COL (buffer) - 2,
0d9f234d
NB
352 "\"/*\" within comment");
353 }
354 goto next_char;
45b966db 355 }
45b966db 356 }
91fcd158 357 else if (is_vspace (c))
45b966db 358 {
1444f2ed 359 prevc = c, c = handle_newline (pfile, c);
0d9f234d 360 goto next_char;
45b966db 361 }
52fadca8 362 else if (c == '\t')
0d9f234d 363 adjust_column (pfile);
45b966db 364 }
041c3194 365
cbcff6df 366 pfile->state.lexing_comment = 0;
0d9f234d
NB
367 buffer->read_ahead = EOF;
368 return c != '/' || prevc != '*';
45b966db
ZW
369}
370
f9a0e96c 371/* Skip a C++ line comment. Handles escaped newlines. Returns
0d9f234d
NB
372 non-zero if a multiline comment. The following new line, if any,
373 is left in buffer->read_ahead. */
041c3194 374static int
cbcff6df
NB
375skip_line_comment (pfile)
376 cpp_reader *pfile;
45b966db 377{
cbcff6df 378 cpp_buffer *buffer = pfile->buffer;
67821e3a 379 unsigned int orig_line = pfile->line;
0d9f234d 380 cppchar_t c;
041c3194 381
cbcff6df 382 pfile->state.lexing_comment = 1;
0d9f234d 383 do
041c3194 384 {
0d9f234d
NB
385 c = EOF;
386 if (buffer->cur == buffer->rlimit)
387 break;
041c3194 388
0d9f234d
NB
389 c = *buffer->cur++;
390 if (c == '?' || c == '\\')
29401c30 391 c = skip_escaped_newlines (pfile, c);
041c3194 392 }
0d9f234d 393 while (!is_vspace (c));
45b966db 394
cbcff6df 395 pfile->state.lexing_comment = 0;
0d9f234d 396 buffer->read_ahead = c; /* Leave any newline for caller. */
67821e3a 397 return orig_line != pfile->line;
041c3194 398}
45b966db 399
0d9f234d
NB
400/* pfile->buffer->cur is one beyond the \t character. Update
401 col_adjust so we track the column correctly. */
52fadca8 402static void
0d9f234d 403adjust_column (pfile)
52fadca8 404 cpp_reader *pfile;
52fadca8 405{
0d9f234d
NB
406 cpp_buffer *buffer = pfile->buffer;
407 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
408
409 /* Round it up to multiple of the tabstop, but subtract 1 since the
410 tab itself occupies a character position. */
0d9f234d
NB
411 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
412 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
413}
414
0d9f234d
NB
415/* Skips whitespace, saving the next non-whitespace character.
416 Adjusts pfile->col_adjust to account for tabs. Without this,
417 tokens might be assigned an incorrect column. */
041c3194 418static void
0d9f234d 419skip_whitespace (pfile, c)
041c3194 420 cpp_reader *pfile;
0d9f234d 421 cppchar_t c;
041c3194
ZW
422{
423 cpp_buffer *buffer = pfile->buffer;
0d9f234d 424 unsigned int warned = 0;
45b966db 425
0d9f234d 426 do
041c3194 427 {
91fcd158
NB
428 /* Horizontal space always OK. */
429 if (c == ' ')
0d9f234d 430 ;
91fcd158 431 else if (c == '\t')
0d9f234d
NB
432 adjust_column (pfile);
433 /* Just \f \v or \0 left. */
91fcd158 434 else if (c == '\0')
041c3194 435 {
91fcd158 436 if (!warned)
0d9f234d
NB
437 {
438 cpp_warning (pfile, "null character(s) ignored");
439 warned = 1;
440 }
45b966db 441 }
93c80368 442 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
67821e3a 443 cpp_pedwarn_with_line (pfile, pfile->line,
91fcd158
NB
444 CPP_BUF_COL (buffer),
445 "%s in preprocessing directive",
446 c == '\f' ? "form feed" : "vertical tab");
0d9f234d
NB
447
448 c = EOF;
449 if (buffer->cur == buffer->rlimit)
450 break;
451 c = *buffer->cur++;
45b966db 452 }
ec5c56db 453 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
454 while (is_nvspace (c));
455
456 /* Remember the next character. */
457 buffer->read_ahead = c;
041c3194 458}
45b966db 459
93c80368
NB
460/* See if the characters of a number token are valid in a name (no
461 '.', '+' or '-'). */
462static int
463name_p (pfile, string)
464 cpp_reader *pfile;
465 const cpp_string *string;
466{
467 unsigned int i;
468
469 for (i = 0; i < string->len; i++)
470 if (!is_idchar (string->text[i]))
471 return 0;
472
473 return 1;
474}
475
2c3fcba6
ZW
476/* Parse an identifier, skipping embedded backslash-newlines. This is
477 a critical inner loop. The common case is an identifier which has
478 not been split by backslash-newline, does not contain a dollar
479 sign, and has already been scanned (roughly 10:1 ratio of
480 seen:unseen identifiers in normal code; the distribution is
481 Poisson-like). Second most common case is a new identifier, not
482 split and no dollar sign. The other possibilities are rare and
483 have been relegated to parse_identifier_slow. */
0d9f234d
NB
484
485static cpp_hashnode *
2c3fcba6 486parse_identifier (pfile)
45b966db 487 cpp_reader *pfile;
45b966db 488{
93c80368 489 cpp_hashnode *result;
2c3fcba6
ZW
490 const U_CHAR *cur, *rlimit;
491
492 /* Fast-path loop. Skim over a normal identifier.
493 N.B. ISIDNUM does not include $. */
494 cur = pfile->buffer->cur - 1;
495 rlimit = pfile->buffer->rlimit;
496 do
497 cur++;
498 while (cur < rlimit && ISIDNUM (*cur));
499
500 /* Check for slow-path cases. */
501 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
502 result = parse_identifier_slow (pfile, cur);
503 else
504 {
505 const U_CHAR *base = pfile->buffer->cur - 1;
506 result = (cpp_hashnode *)
507 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
508 pfile->buffer->cur = cur;
509 }
510
511 /* Rarely, identifiers require diagnostics when lexed.
512 XXX Has to be forced out of the fast path. */
513 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
514 && !pfile->state.skipping, 0))
515 {
516 /* It is allowed to poison the same identifier twice. */
517 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
518 cpp_error (pfile, "attempt to use poisoned \"%s\"",
519 NODE_NAME (result));
520
521 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
522 replacement list of a variadic macro. */
523 if (result == pfile->spec_nodes.n__VA_ARGS__
524 && !pfile->state.va_args_ok)
525 cpp_pedwarn (pfile,
526 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
527 }
528
529 return result;
530}
531
532/* Slow path. This handles identifiers which have been split, and
533 identifiers which contain dollar signs. The part of the identifier
534 from PFILE->buffer->cur-1 to CUR has already been scanned. */
535static cpp_hashnode *
536parse_identifier_slow (pfile, cur)
537 cpp_reader *pfile;
538 const U_CHAR *cur;
539{
0d9f234d 540 cpp_buffer *buffer = pfile->buffer;
2c3fcba6 541 const U_CHAR *base = buffer->cur - 1;
2a967f3d 542 struct obstack *stack = &pfile->hash_table->stack;
2c3fcba6
ZW
543 unsigned int c, saw_dollar = 0, len;
544
545 /* Copy the part of the token which is known to be okay. */
546 obstack_grow (stack, base, cur - base);
041c3194 547
2c3fcba6
ZW
548 /* Now process the part which isn't. We are looking at one of
549 '$', '\\', or '?' on entry to this loop. */
550 c = *cur++;
551 buffer->cur = cur;
0d9f234d 552 do
041c3194 553 {
2c3fcba6
ZW
554 while (is_idchar (c))
555 {
556 obstack_1grow (stack, c);
45b966db 557
2c3fcba6
ZW
558 if (c == '$')
559 saw_dollar++;
ba89d661 560
2c3fcba6
ZW
561 c = EOF;
562 if (buffer->cur == buffer->rlimit)
563 break;
ba89d661 564
2c3fcba6
ZW
565 c = *buffer->cur++;
566 }
ba89d661 567
0d9f234d
NB
568 /* Potential escaped newline? */
569 if (c != '?' && c != '\\')
2c3fcba6 570 break;
29401c30 571 c = skip_escaped_newlines (pfile, c);
041c3194 572 }
0d9f234d
NB
573 while (is_idchar (c));
574
93c80368
NB
575 /* Remember the next character. */
576 buffer->read_ahead = c;
577
4fe9b91c 578 /* $ is not an identifier character in the standard, but is commonly
0d9f234d
NB
579 accepted as an extension. Don't warn about it in skipped
580 conditional blocks. */
cef0d199 581 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
0d9f234d
NB
582 cpp_pedwarn (pfile, "'$' character(s) in identifier");
583
93c80368 584 /* Identifiers are null-terminated. */
2a967f3d
NB
585 len = obstack_object_size (stack);
586 obstack_1grow (stack, '\0');
93c80368 587
2c3fcba6 588 return (cpp_hashnode *)
2a967f3d 589 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
45b966db
ZW
590}
591
0d9f234d 592/* Parse a number, skipping embedded backslash-newlines. */
45b966db 593static void
93c80368 594parse_number (pfile, number, c, leading_period)
45b966db 595 cpp_reader *pfile;
0d9f234d
NB
596 cpp_string *number;
597 cppchar_t c;
93c80368 598 int leading_period;
45b966db 599{
041c3194 600 cpp_buffer *buffer = pfile->buffer;
93c80368 601 unsigned char *dest, *limit;
45b966db 602
ece54d54
NB
603 dest = BUFF_FRONT (pfile->u_buff);
604 limit = BUFF_LIMIT (pfile->u_buff);
cbcff6df 605
93c80368
NB
606 /* Place a leading period. */
607 if (leading_period)
608 {
ece54d54
NB
609 if (dest == limit)
610 {
8c3b2693 611 _cpp_extend_buff (pfile, &pfile->u_buff, 1);
ece54d54
NB
612 dest = BUFF_FRONT (pfile->u_buff);
613 limit = BUFF_LIMIT (pfile->u_buff);
614 }
93c80368
NB
615 *dest++ = '.';
616 }
617
0d9f234d 618 do
041c3194 619 {
0d9f234d
NB
620 do
621 {
93c80368 622 /* Need room for terminating null. */
ece54d54
NB
623 if ((size_t) (limit - dest) < 2)
624 {
625 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
8c3b2693 626 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
ece54d54
NB
627 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
628 limit = BUFF_LIMIT (pfile->u_buff);
629 }
93c80368 630 *dest++ = c;
0d9f234d 631
0d9f234d
NB
632 c = EOF;
633 if (buffer->cur == buffer->rlimit)
634 break;
45b966db 635
0d9f234d
NB
636 c = *buffer->cur++;
637 }
93c80368 638 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
45b966db 639
0d9f234d
NB
640 /* Potential escaped newline? */
641 if (c != '?' && c != '\\')
642 break;
29401c30 643 c = skip_escaped_newlines (pfile, c);
45b966db 644 }
93c80368 645 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
cbcff6df 646
0d9f234d
NB
647 /* Remember the next character. */
648 buffer->read_ahead = c;
64aaf407 649
93c80368
NB
650 /* Null-terminate the number. */
651 *dest = '\0';
652
ece54d54 653 number->text = BUFF_FRONT (pfile->u_buff);
93c80368 654 number->len = dest - number->text;
ece54d54 655 BUFF_FRONT (pfile->u_buff) = dest + 1;
0d9f234d
NB
656}
657
658/* Subroutine of parse_string. Emits error for unterminated strings. */
659static void
93c80368 660unterminated (pfile, term)
0d9f234d 661 cpp_reader *pfile;
0d9f234d
NB
662 int term;
663{
664 cpp_error (pfile, "missing terminating %c character", term);
665
50410426 666 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
041c3194 667 {
50410426 668 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
0d9f234d 669 "possible start of unterminated string literal");
50410426 670 pfile->mls_line = 0;
041c3194 671 }
45b966db
ZW
672}
673
93c80368
NB
674/* Subroutine of parse_string. */
675static int
676unescaped_terminator_p (pfile, dest)
677 cpp_reader *pfile;
678 const unsigned char *dest;
679{
680 const unsigned char *start, *temp;
681
682 /* In #include-style directives, terminators are not escapeable. */
683 if (pfile->state.angled_headers)
684 return 1;
685
ece54d54 686 start = BUFF_FRONT (pfile->u_buff);
93c80368
NB
687
688 /* An odd number of consecutive backslashes represents an escaped
689 terminator. */
690 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
691 ;
692
693 return ((dest - temp) & 1) == 0;
694}
695
0d9f234d 696/* Parses a string, character constant, or angle-bracketed header file
7868b4a2
NB
697 name. Handles embedded trigraphs and escaped newlines. The stored
698 string is guaranteed NUL-terminated, but it is not guaranteed that
699 this is the first NUL since embedded NULs are preserved.
45b966db 700
7868b4a2 701 Multi-line strings are allowed, but they are deprecated. */
041c3194 702static void
0d9f234d 703parse_string (pfile, token, terminator)
45b966db 704 cpp_reader *pfile;
041c3194 705 cpp_token *token;
0d9f234d 706 cppchar_t terminator;
45b966db 707{
041c3194 708 cpp_buffer *buffer = pfile->buffer;
93c80368 709 unsigned char *dest, *limit;
0d9f234d 710 cppchar_t c;
d82fc108 711 bool warned_nulls = false, warned_multi = false;
0d9f234d 712
ece54d54
NB
713 dest = BUFF_FRONT (pfile->u_buff);
714 limit = BUFF_LIMIT (pfile->u_buff);
93c80368 715
0d9f234d 716 for (;;)
45b966db 717 {
0d9f234d 718 if (buffer->cur == buffer->rlimit)
7868b4a2
NB
719 c = EOF;
720 else
721 c = *buffer->cur++;
722
723 have_char:
724 /* We need space for the terminating NUL. */
ece54d54
NB
725 if ((size_t) (limit - dest) < 1)
726 {
727 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
8c3b2693 728 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
ece54d54
NB
729 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
730 limit = BUFF_LIMIT (pfile->u_buff);
731 }
7868b4a2
NB
732
733 if (c == EOF)
0d9f234d 734 {
93c80368 735 unterminated (pfile, terminator);
0d9f234d
NB
736 break;
737 }
0d9f234d 738
0d9f234d
NB
739 /* Handle trigraphs, escaped newlines etc. */
740 if (c == '?' || c == '\\')
29401c30 741 c = skip_escaped_newlines (pfile, c);
45b966db 742
93c80368 743 if (c == terminator && unescaped_terminator_p (pfile, dest))
45b966db 744 {
93c80368
NB
745 c = EOF;
746 break;
0d9f234d
NB
747 }
748 else if (is_vspace (c))
749 {
750 /* In assembly language, silently terminate string and
751 character literals at end of line. This is a kludge
752 around not knowing where comments are. */
bdb05a7b 753 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
0d9f234d 754 break;
45b966db 755
0d9f234d
NB
756 /* Character constants and header names may not extend over
757 multiple lines. In Standard C, neither may strings.
758 Unfortunately, we accept multiline strings as an
16eb2788
NB
759 extension, except in #include family directives. */
760 if (terminator != '"' || pfile->state.angled_headers)
45b966db 761 {
93c80368 762 unterminated (pfile, terminator);
0d9f234d 763 break;
45b966db 764 }
45b966db 765
d82fc108
NB
766 if (!warned_multi)
767 {
768 warned_multi = true;
769 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
770 }
771
50410426
NB
772 if (pfile->mls_line == 0)
773 {
774 pfile->mls_line = token->line;
775 pfile->mls_col = token->col;
776 }
0d9f234d 777
1444f2ed 778 c = handle_newline (pfile, c);
7868b4a2
NB
779 *dest++ = '\n';
780 goto have_char;
0d9f234d 781 }
d82fc108 782 else if (c == '\0' && !warned_nulls)
0d9f234d 783 {
d82fc108
NB
784 warned_nulls = true;
785 cpp_warning (pfile, "null character(s) preserved in literal");
45b966db 786 }
45b966db 787
93c80368 788 *dest++ = c;
45b966db
ZW
789 }
790
93c80368 791 /* Remember the next character. */
0d9f234d 792 buffer->read_ahead = c;
7868b4a2 793 *dest = '\0';
45b966db 794
ece54d54
NB
795 token->val.str.text = BUFF_FRONT (pfile->u_buff);
796 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
797 BUFF_FRONT (pfile->u_buff) = dest + 1;
0d9f234d 798}
041c3194 799
93c80368 800/* The stored comment includes the comment start and any terminator. */
9e62c811 801static void
0d9f234d
NB
802save_comment (pfile, token, from)
803 cpp_reader *pfile;
041c3194
ZW
804 cpp_token *token;
805 const unsigned char *from;
9e62c811 806{
041c3194 807 unsigned char *buffer;
0d9f234d 808 unsigned int len;
0d9f234d 809
1c6d33ef 810 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
3542203b
NB
811 /* C++ comments probably (not definitely) have moved past a new
812 line, which we don't want to save in the comment. */
813 if (pfile->buffer->read_ahead != EOF)
814 len--;
ece54d54 815 buffer = _cpp_unaligned_alloc (pfile, len);
041c3194 816
041c3194 817 token->type = CPP_COMMENT;
bfb9dc7f 818 token->val.str.len = len;
0d9f234d 819 token->val.str.text = buffer;
45b966db 820
1c6d33ef
NB
821 buffer[0] = '/';
822 memcpy (buffer + 1, from, len - 1);
0d9f234d 823}
45b966db 824
14baae01 825/* Subroutine of _cpp_lex_direct to handle '%'. A little tricky, since we
cbcff6df 826 want to avoid stepping back when lexing %:%X. */
0d9f234d 827static void
29401c30
NB
828lex_percent (pfile, result)
829 cpp_reader *pfile;
0d9f234d 830 cpp_token *result;
0d9f234d 831{
29401c30 832 cpp_buffer *buffer= pfile->buffer;
cbcff6df
NB
833 cppchar_t c;
834
835 result->type = CPP_MOD;
836 /* Parsing %:%X could leave an extra character. */
837 if (buffer->extra_char == EOF)
29401c30 838 c = get_effective_char (pfile);
cbcff6df
NB
839 else
840 {
841 c = buffer->read_ahead = buffer->extra_char;
842 buffer->extra_char = EOF;
843 }
844
845 if (c == '=')
846 ACCEPT_CHAR (CPP_MOD_EQ);
29401c30 847 else if (CPP_OPTION (pfile, digraphs))
cbcff6df
NB
848 {
849 if (c == ':')
850 {
851 result->flags |= DIGRAPH;
852 ACCEPT_CHAR (CPP_HASH);
29401c30 853 if (get_effective_char (pfile) == '%')
cbcff6df 854 {
29401c30 855 buffer->extra_char = get_effective_char (pfile);
cbcff6df
NB
856 if (buffer->extra_char == ':')
857 {
858 buffer->extra_char = EOF;
859 ACCEPT_CHAR (CPP_PASTE);
860 }
861 else
862 /* We'll catch the extra_char when we're called back. */
863 buffer->read_ahead = '%';
864 }
865 }
866 else if (c == '>')
867 {
868 result->flags |= DIGRAPH;
869 ACCEPT_CHAR (CPP_CLOSE_BRACE);
870 }
871 }
872}
873
14baae01 874/* Subroutine of _cpp_lex_direct to handle '.'. This is tricky, since we
cbcff6df
NB
875 want to avoid stepping back when lexing '...' or '.123'. In the
876 latter case we should also set a flag for parse_number. */
877static void
878lex_dot (pfile, result)
879 cpp_reader *pfile;
880 cpp_token *result;
881{
882 cpp_buffer *buffer = pfile->buffer;
883 cppchar_t c;
884
885 /* Parsing ..X could leave an extra character. */
886 if (buffer->extra_char == EOF)
29401c30 887 c = get_effective_char (pfile);
cbcff6df
NB
888 else
889 {
890 c = buffer->read_ahead = buffer->extra_char;
891 buffer->extra_char = EOF;
892 }
0d9f234d 893
cbcff6df
NB
894 /* All known character sets have 0...9 contiguous. */
895 if (c >= '0' && c <= '9')
896 {
897 result->type = CPP_NUMBER;
93c80368 898 parse_number (pfile, &result->val.str, c, 1);
cbcff6df 899 }
041c3194 900 else
ea4a453b 901 {
cbcff6df
NB
902 result->type = CPP_DOT;
903 if (c == '.')
904 {
29401c30 905 buffer->extra_char = get_effective_char (pfile);
cbcff6df
NB
906 if (buffer->extra_char == '.')
907 {
908 buffer->extra_char = EOF;
909 ACCEPT_CHAR (CPP_ELLIPSIS);
910 }
911 else
912 /* We'll catch the extra_char when we're called back. */
913 buffer->read_ahead = '.';
914 }
915 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
916 ACCEPT_CHAR (CPP_DOT_STAR);
ea4a453b 917 }
45b966db
ZW
918}
919
5fddcffc
NB
920/* Allocate COUNT tokens for RUN. */
921void
922_cpp_init_tokenrun (run, count)
923 tokenrun *run;
924 unsigned int count;
925{
926 run->base = xnewvec (cpp_token, count);
927 run->limit = run->base + count;
928 run->next = NULL;
929}
930
931/* Returns the next tokenrun, or creates one if there is none. */
932static tokenrun *
933next_tokenrun (run)
934 tokenrun *run;
935{
936 if (run->next == NULL)
937 {
938 run->next = xnew (tokenrun);
bdcbe496 939 run->next->prev = run;
5fddcffc
NB
940 _cpp_init_tokenrun (run->next, 250);
941 }
942
943 return run->next;
944}
945
4ed5bcfb
NB
946/* Allocate a single token that is invalidated at the same time as the
947 rest of the tokens on the line. Has its line and col set to the
948 same as the last lexed token, so that diagnostics appear in the
949 right place. */
950cpp_token *
951_cpp_temp_token (pfile)
952 cpp_reader *pfile;
953{
954 cpp_token *old, *result;
955
956 old = pfile->cur_token - 1;
957 if (pfile->cur_token == pfile->cur_run->limit)
958 {
959 pfile->cur_run = next_tokenrun (pfile->cur_run);
960 pfile->cur_token = pfile->cur_run->base;
961 }
962
963 result = pfile->cur_token++;
964 result->line = old->line;
965 result->col = old->col;
966 return result;
967}
968
14baae01
NB
969/* Lex a token into RESULT (external interface). Takes care of issues
970 like directive handling, token lookahead, multiple include
971 opimisation and skipping. */
345894b4
NB
972const cpp_token *
973_cpp_lex_token (pfile)
45b966db 974 cpp_reader *pfile;
5fddcffc 975{
bdcbe496 976 cpp_token *result;
5fddcffc 977
bdcbe496 978 for (;;)
5fddcffc 979 {
bdcbe496 980 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 981 {
bdcbe496
NB
982 pfile->cur_run = next_tokenrun (pfile->cur_run);
983 pfile->cur_token = pfile->cur_run->base;
5fddcffc
NB
984 }
985
bdcbe496 986 if (pfile->lookaheads)
14baae01
NB
987 {
988 pfile->lookaheads--;
989 result = pfile->cur_token++;
990 }
bdcbe496 991 else
14baae01 992 result = _cpp_lex_direct (pfile);
bdcbe496
NB
993
994 if (result->flags & BOL)
5fddcffc 995 {
bdcbe496
NB
996 /* Is this a directive. If _cpp_handle_directive returns
997 false, it is an assembler #. */
998 if (result->type == CPP_HASH
999 && !pfile->state.parsing_args
1000 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1001 continue;
97293897
NB
1002 if (pfile->cb.line_change && !pfile->state.skipping)
1003 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
5fddcffc 1004 }
5fddcffc 1005
bdcbe496
NB
1006 /* We don't skip tokens in directives. */
1007 if (pfile->state.in_directive)
1008 break;
5fddcffc 1009
bdcbe496 1010 /* Outside a directive, invalidate controlling macros. At file
14baae01 1011 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
bdcbe496 1012 get here and MI optimisation works. */
5fddcffc 1013 pfile->mi_valid = false;
bdcbe496
NB
1014
1015 if (!pfile->state.skipping || result->type == CPP_EOF)
1016 break;
5fddcffc
NB
1017 }
1018
345894b4 1019 return result;
5fddcffc
NB
1020}
1021
14baae01
NB
1022/* Lex a token into pfile->cur_token, which is also incremented, to
1023 get diagnostics pointing to the correct location.
1024
1025 Does not handle issues such as token lookahead, multiple-include
1026 optimisation, directives, skipping etc. This function is only
1027 suitable for use by _cpp_lex_token, and in special cases like
1028 lex_expansion_token which doesn't care for any of these issues.
1029
1030 When meeting a newline, returns CPP_EOF if parsing a directive,
1031 otherwise returns to the start of the token buffer if permissible.
1032 Returns the location of the lexed token. */
1033cpp_token *
1034_cpp_lex_direct (pfile)
5fddcffc 1035 cpp_reader *pfile;
45b966db 1036{
0d9f234d 1037 cppchar_t c;
adb84b42 1038 cpp_buffer *buffer;
0d9f234d 1039 const unsigned char *comment_start;
14baae01 1040 cpp_token *result = pfile->cur_token++;
9ec7291f 1041
5fddcffc 1042 fresh_line:
adb84b42 1043 buffer = pfile->buffer;
bd969772
NB
1044 result->flags = buffer->saved_flags;
1045 buffer->saved_flags = 0;
5fddcffc 1046 update_tokens_line:
1444f2ed 1047 result->line = pfile->line;
041c3194 1048
5fddcffc 1049 skipped_white:
0d9f234d
NB
1050 c = buffer->read_ahead;
1051 if (c == EOF && buffer->cur < buffer->rlimit)
5fddcffc
NB
1052 c = *buffer->cur++;
1053 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
0d9f234d 1054 buffer->read_ahead = EOF;
5fddcffc
NB
1055
1056 trigraph:
0d9f234d 1057 switch (c)
45b966db 1058 {
0d9f234d 1059 case EOF:
bdcbe496 1060 buffer->saved_flags = BOL;
5fddcffc 1061 if (!pfile->state.parsing_args && !pfile->state.in_directive)
ef6e958a 1062 {
bdcbe496 1063 if (buffer->cur != buffer->line_base)
5fddcffc
NB
1064 {
1065 /* Non-empty files should end in a newline. Don't warn
1066 for command line and _Pragma buffers. */
1067 if (!buffer->from_stage3)
1068 cpp_pedwarn (pfile, "no newline at end of file");
1069 handle_newline (pfile, '\n');
7364fdd8 1070 }
bdcbe496
NB
1071
1072 /* Don't pop the last buffer. */
1073 if (buffer->prev)
1074 {
1075 unsigned char stop = buffer->return_at_eof;
1076
1077 _cpp_pop_buffer (pfile);
1078 if (!stop)
1079 goto fresh_line;
1080 }
ef6e958a 1081 }
0d9f234d 1082 result->type = CPP_EOF;
5fddcffc 1083 break;
45b966db 1084
0d9f234d
NB
1085 case ' ': case '\t': case '\f': case '\v': case '\0':
1086 skip_whitespace (pfile, c);
1087 result->flags |= PREV_WHITE;
5fddcffc 1088 goto skipped_white;
0d9f234d
NB
1089
1090 case '\n': case '\r':
bdcbe496
NB
1091 handle_newline (pfile, c);
1092 buffer->saved_flags = BOL;
1093 if (! pfile->state.in_directive)
45b966db 1094 {
4ed5bcfb
NB
1095 if (pfile->state.parsing_args == 2)
1096 buffer->saved_flags |= PREV_WHITE;
bdcbe496
NB
1097 if (!pfile->keep_tokens)
1098 {
1099 pfile->cur_run = &pfile->base_run;
1100 result = pfile->base_run.base;
1101 pfile->cur_token = result + 1;
1102 }
1103 goto fresh_line;
45b966db 1104 }
5fddcffc
NB
1105 result->type = CPP_EOF;
1106 break;
46d07497 1107
0d9f234d
NB
1108 case '?':
1109 case '\\':
1110 /* These could start an escaped newline, or '?' a trigraph. Let
1111 skip_escaped_newlines do all the work. */
1112 {
67821e3a 1113 unsigned int line = pfile->line;
0d9f234d 1114
29401c30 1115 c = skip_escaped_newlines (pfile, c);
67821e3a 1116 if (line != pfile->line)
0d9f234d
NB
1117 /* We had at least one escaped newline of some sort, and the
1118 next character is in buffer->read_ahead. Update the
1119 token's line and column. */
5fddcffc 1120 goto update_tokens_line;
0d9f234d
NB
1121
1122 /* We are either the original '?' or '\\', or a trigraph. */
1123 result->type = CPP_QUERY;
1124 buffer->read_ahead = EOF;
1125 if (c == '\\')
12c4f523 1126 goto random_char;
0d9f234d 1127 else if (c != '?')
5fddcffc 1128 goto trigraph;
0d9f234d
NB
1129 }
1130 break;
46d07497 1131
0d9f234d
NB
1132 case '0': case '1': case '2': case '3': case '4':
1133 case '5': case '6': case '7': case '8': case '9':
1134 result->type = CPP_NUMBER;
93c80368 1135 parse_number (pfile, &result->val.str, c, 0);
0d9f234d 1136 break;
46d07497 1137
0d9f234d
NB
1138 case '$':
1139 if (!CPP_OPTION (pfile, dollars_in_ident))
1140 goto random_char;
ec5c56db 1141 /* Fall through... */
0d9f234d
NB
1142
1143 case '_':
1144 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1145 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1146 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1147 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1148 case 'y': case 'z':
1149 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1150 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1151 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1152 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1153 case 'Y': case 'Z':
1154 result->type = CPP_NAME;
2c3fcba6 1155 result->val.node = parse_identifier (pfile);
0d9f234d
NB
1156
1157 /* 'L' may introduce wide characters or strings. */
93c80368 1158 if (result->val.node == pfile->spec_nodes.n_L)
0d9f234d 1159 {
2c3fcba6
ZW
1160 c = buffer->read_ahead;
1161 if (c == EOF && buffer->cur < buffer->rlimit)
1162 c = *buffer->cur;
0d9f234d 1163 if (c == '\'' || c == '"')
ba89d661 1164 {
2c3fcba6 1165 buffer->cur++;
0d9f234d
NB
1166 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1167 goto make_string;
ba89d661 1168 }
0d9f234d
NB
1169 }
1170 /* Convert named operators to their proper types. */
93c80368 1171 else if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
1172 {
1173 result->flags |= NAMED_OP;
93c80368 1174 result->type = result->val.node->value.operator;
0d9f234d
NB
1175 }
1176 break;
1177
1178 case '\'':
1179 case '"':
1180 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1181 make_string:
1182 parse_string (pfile, result, c);
1183 break;
041c3194 1184
0d9f234d 1185 case '/':
1c6d33ef
NB
1186 /* A potential block or line comment. */
1187 comment_start = buffer->cur;
0d9f234d 1188 result->type = CPP_DIV;
29401c30 1189 c = get_effective_char (pfile);
0d9f234d
NB
1190 if (c == '=')
1191 ACCEPT_CHAR (CPP_DIV_EQ);
1c6d33ef
NB
1192 if (c != '/' && c != '*')
1193 break;
e61fc951 1194
1c6d33ef
NB
1195 if (c == '*')
1196 {
0d9f234d 1197 if (skip_block_comment (pfile))
67821e3a 1198 cpp_error (pfile, "unterminated comment");
0d9f234d 1199 }
1c6d33ef 1200 else
0d9f234d 1201 {
1c6d33ef
NB
1202 if (!CPP_OPTION (pfile, cplusplus_comments)
1203 && !CPP_IN_SYSTEM_HEADER (pfile))
1204 break;
1205
bdb05a7b
NB
1206 /* Warn about comments only if pedantically GNUC89, and not
1207 in system headers. */
1208 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 1209 && ! buffer->warned_cplusplus_comments)
041c3194 1210 {
1c6d33ef
NB
1211 cpp_pedwarn (pfile,
1212 "C++ style comments are not allowed in ISO C89");
1213 cpp_pedwarn (pfile,
1214 "(this will be reported only once per input file)");
1215 buffer->warned_cplusplus_comments = 1;
1216 }
0d9f234d 1217
a94c1199 1218 /* Skip_line_comment updates buffer->read_ahead. */
01ef6563 1219 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
50410426 1220 cpp_warning (pfile, "multi-line comment");
1c6d33ef 1221 }
0d9f234d 1222
1c6d33ef
NB
1223 /* Skipping the comment has updated buffer->read_ahead. */
1224 if (!pfile->state.save_comments)
1225 {
1226 result->flags |= PREV_WHITE;
5fddcffc 1227 goto update_tokens_line;
0d9f234d 1228 }
1c6d33ef
NB
1229
1230 /* Save the comment as a token in its own right. */
1231 save_comment (pfile, result, comment_start);
bdcbe496 1232 break;
0d9f234d
NB
1233
1234 case '<':
1235 if (pfile->state.angled_headers)
1236 {
1237 result->type = CPP_HEADER_NAME;
1238 c = '>'; /* terminator. */
1239 goto make_string;
1240 }
45b966db 1241
0d9f234d 1242 result->type = CPP_LESS;
29401c30 1243 c = get_effective_char (pfile);
0d9f234d
NB
1244 if (c == '=')
1245 ACCEPT_CHAR (CPP_LESS_EQ);
1246 else if (c == '<')
1247 {
1248 ACCEPT_CHAR (CPP_LSHIFT);
29401c30 1249 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1250 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1251 }
1252 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1253 {
1254 ACCEPT_CHAR (CPP_MIN);
29401c30 1255 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1256 ACCEPT_CHAR (CPP_MIN_EQ);
1257 }
1258 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1259 {
1260 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1261 result->flags |= DIGRAPH;
1262 }
1263 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1264 {
1265 ACCEPT_CHAR (CPP_OPEN_BRACE);
1266 result->flags |= DIGRAPH;
1267 }
1268 break;
1269
1270 case '>':
1271 result->type = CPP_GREATER;
29401c30 1272 c = get_effective_char (pfile);
0d9f234d
NB
1273 if (c == '=')
1274 ACCEPT_CHAR (CPP_GREATER_EQ);
1275 else if (c == '>')
1276 {
1277 ACCEPT_CHAR (CPP_RSHIFT);
29401c30 1278 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1279 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1280 }
1281 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1282 {
1283 ACCEPT_CHAR (CPP_MAX);
29401c30 1284 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1285 ACCEPT_CHAR (CPP_MAX_EQ);
1286 }
1287 break;
1288
cbcff6df 1289 case '%':
29401c30 1290 lex_percent (pfile, result);
0d9f234d
NB
1291 break;
1292
cbcff6df
NB
1293 case '.':
1294 lex_dot (pfile, result);
0d9f234d 1295 break;
45b966db 1296
0d9f234d
NB
1297 case '+':
1298 result->type = CPP_PLUS;
29401c30 1299 c = get_effective_char (pfile);
0d9f234d
NB
1300 if (c == '=')
1301 ACCEPT_CHAR (CPP_PLUS_EQ);
1302 else if (c == '+')
1303 ACCEPT_CHAR (CPP_PLUS_PLUS);
1304 break;
04e3ec78 1305
0d9f234d
NB
1306 case '-':
1307 result->type = CPP_MINUS;
29401c30 1308 c = get_effective_char (pfile);
0d9f234d
NB
1309 if (c == '>')
1310 {
1311 ACCEPT_CHAR (CPP_DEREF);
1312 if (CPP_OPTION (pfile, cplusplus)
29401c30 1313 && get_effective_char (pfile) == '*')
0d9f234d
NB
1314 ACCEPT_CHAR (CPP_DEREF_STAR);
1315 }
1316 else if (c == '=')
1317 ACCEPT_CHAR (CPP_MINUS_EQ);
1318 else if (c == '-')
1319 ACCEPT_CHAR (CPP_MINUS_MINUS);
1320 break;
45b966db 1321
0d9f234d
NB
1322 case '*':
1323 result->type = CPP_MULT;
29401c30 1324 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1325 ACCEPT_CHAR (CPP_MULT_EQ);
1326 break;
04e3ec78 1327
0d9f234d
NB
1328 case '=':
1329 result->type = CPP_EQ;
29401c30 1330 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1331 ACCEPT_CHAR (CPP_EQ_EQ);
1332 break;
f8f769ea 1333
0d9f234d
NB
1334 case '!':
1335 result->type = CPP_NOT;
29401c30 1336 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1337 ACCEPT_CHAR (CPP_NOT_EQ);
1338 break;
45b966db 1339
0d9f234d
NB
1340 case '&':
1341 result->type = CPP_AND;
29401c30 1342 c = get_effective_char (pfile);
0d9f234d
NB
1343 if (c == '=')
1344 ACCEPT_CHAR (CPP_AND_EQ);
1345 else if (c == '&')
1346 ACCEPT_CHAR (CPP_AND_AND);
1347 break;
1348
1349 case '#':
a949941c 1350 result->type = CPP_HASH;
5fddcffc
NB
1351 if (get_effective_char (pfile) == '#')
1352 ACCEPT_CHAR (CPP_PASTE);
0d9f234d 1353 break;
45b966db 1354
0d9f234d
NB
1355 case '|':
1356 result->type = CPP_OR;
29401c30 1357 c = get_effective_char (pfile);
0d9f234d
NB
1358 if (c == '=')
1359 ACCEPT_CHAR (CPP_OR_EQ);
1360 else if (c == '|')
1361 ACCEPT_CHAR (CPP_OR_OR);
1362 break;
45b966db 1363
0d9f234d
NB
1364 case '^':
1365 result->type = CPP_XOR;
29401c30 1366 if (get_effective_char (pfile) == '=')
0d9f234d
NB
1367 ACCEPT_CHAR (CPP_XOR_EQ);
1368 break;
45b966db 1369
0d9f234d
NB
1370 case ':':
1371 result->type = CPP_COLON;
29401c30 1372 c = get_effective_char (pfile);
0d9f234d
NB
1373 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1374 ACCEPT_CHAR (CPP_SCOPE);
1375 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1376 {
1377 result->flags |= DIGRAPH;
1378 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1379 }
1380 break;
45b966db 1381
0d9f234d
NB
1382 case '~': result->type = CPP_COMPL; break;
1383 case ',': result->type = CPP_COMMA; break;
1384 case '(': result->type = CPP_OPEN_PAREN; break;
1385 case ')': result->type = CPP_CLOSE_PAREN; break;
1386 case '[': result->type = CPP_OPEN_SQUARE; break;
1387 case ']': result->type = CPP_CLOSE_SQUARE; break;
1388 case '{': result->type = CPP_OPEN_BRACE; break;
1389 case '}': result->type = CPP_CLOSE_BRACE; break;
1390 case ';': result->type = CPP_SEMICOLON; break;
1391
cc937581
ZW
1392 /* @ is a punctuator in Objective C. */
1393 case '@': result->type = CPP_ATSIGN; break;
0d9f234d
NB
1394
1395 random_char:
1396 default:
1397 result->type = CPP_OTHER;
6c53ebff 1398 result->val.c = c;
0d9f234d
NB
1399 break;
1400 }
bdcbe496
NB
1401
1402 return result;
0d9f234d
NB
1403}
1404
93c80368
NB
1405/* An upper bound on the number of bytes needed to spell a token,
1406 including preceding whitespace. */
1407unsigned int
1408cpp_token_len (token)
1409 const cpp_token *token;
0d9f234d 1410{
93c80368 1411 unsigned int len;
6d2c2047 1412
93c80368 1413 switch (TOKEN_SPELL (token))
041c3194 1414 {
a28c5035 1415 default: len = 0; break;
47ad4138 1416 case SPELL_NUMBER:
a28c5035
NB
1417 case SPELL_STRING: len = token->val.str.len; break;
1418 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1419 }
47ad4138 1420 /* 1 for whitespace, 4 for comment delimiters. */
93c80368 1421 return len + 5;
6d2c2047
ZW
1422}
1423
041c3194 1424/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1425 already contain the enough space to hold the token's spelling.
1426 Returns a pointer to the character after the last character
1427 written. */
93c80368
NB
1428unsigned char *
1429cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1430 cpp_reader *pfile; /* Would be nice to be rid of this... */
1431 const cpp_token *token;
1432 unsigned char *buffer;
1433{
96be6998 1434 switch (TOKEN_SPELL (token))
041c3194
ZW
1435 {
1436 case SPELL_OPERATOR:
1437 {
1438 const unsigned char *spelling;
1439 unsigned char c;
d6d5f795 1440
041c3194 1441 if (token->flags & DIGRAPH)
37b8524c
JDA
1442 spelling
1443 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1444 else if (token->flags & NAMED_OP)
1445 goto spell_ident;
041c3194 1446 else
96be6998 1447 spelling = TOKEN_NAME (token);
041c3194
ZW
1448
1449 while ((c = *spelling++) != '\0')
1450 *buffer++ = c;
1451 }
1452 break;
d6d5f795 1453
47ad4138
ZW
1454 case SPELL_CHAR:
1455 *buffer++ = token->val.c;
1456 break;
1457
1458 spell_ident:
041c3194 1459 case SPELL_IDENT:
a28c5035
NB
1460 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1461 buffer += NODE_LEN (token->val.node);
041c3194 1462 break;
d6d5f795 1463
47ad4138
ZW
1464 case SPELL_NUMBER:
1465 memcpy (buffer, token->val.str.text, token->val.str.len);
1466 buffer += token->val.str.len;
1467 break;
1468
041c3194
ZW
1469 case SPELL_STRING:
1470 {
ba89d661
ZW
1471 int left, right, tag;
1472 switch (token->type)
1473 {
1474 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1475 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
ba89d661
ZW
1476 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1477 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1478 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138
ZW
1479 default:
1480 cpp_ice (pfile, "unknown string token %s\n", TOKEN_NAME (token));
1481 return buffer;
ba89d661
ZW
1482 }
1483 if (tag) *buffer++ = tag;
47ad4138 1484 *buffer++ = left;
bfb9dc7f
ZW
1485 memcpy (buffer, token->val.str.text, token->val.str.len);
1486 buffer += token->val.str.len;
47ad4138 1487 *buffer++ = right;
041c3194
ZW
1488 }
1489 break;
d6d5f795 1490
041c3194 1491 case SPELL_NONE:
96be6998 1492 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1493 break;
1494 }
d6d5f795 1495
041c3194
ZW
1496 return buffer;
1497}
d6d5f795 1498
93c80368
NB
1499/* Returns a token as a null-terminated string. The string is
1500 temporary, and automatically freed later. Useful for diagnostics. */
1501unsigned char *
1502cpp_token_as_text (pfile, token)
c5a04734 1503 cpp_reader *pfile;
041c3194 1504 const cpp_token *token;
c5a04734 1505{
93c80368 1506 unsigned int len = cpp_token_len (token);
ece54d54 1507 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 1508
93c80368
NB
1509 end = cpp_spell_token (pfile, token, start);
1510 end[0] = '\0';
c5a04734 1511
93c80368
NB
1512 return start;
1513}
c5a04734 1514
93c80368
NB
1515/* Used by C front ends. Should really move to using cpp_token_as_text. */
1516const char *
1517cpp_type2name (type)
1518 enum cpp_ttype type;
1519{
1520 return (const char *) token_spellings[type].name;
1521}
c5a04734 1522
4ed5bcfb
NB
1523/* Writes the spelling of token to FP, without any preceding space.
1524 Separated from cpp_spell_token for efficiency - to avoid stdio
1525 double-buffering. */
93c80368
NB
1526void
1527cpp_output_token (token, fp)
1528 const cpp_token *token;
1529 FILE *fp;
1530{
93c80368 1531 switch (TOKEN_SPELL (token))
c5a04734 1532 {
93c80368
NB
1533 case SPELL_OPERATOR:
1534 {
1535 const unsigned char *spelling;
3b681e9d 1536 int c;
c5a04734 1537
93c80368 1538 if (token->flags & DIGRAPH)
37b8524c
JDA
1539 spelling
1540 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1541 else if (token->flags & NAMED_OP)
1542 goto spell_ident;
1543 else
1544 spelling = TOKEN_NAME (token);
041c3194 1545
3b681e9d
ZW
1546 c = *spelling;
1547 do
1548 putc (c, fp);
1549 while ((c = *++spelling) != '\0');
93c80368
NB
1550 }
1551 break;
041c3194 1552
47ad4138
ZW
1553 case SPELL_CHAR:
1554 putc (token->val.c, fp);
1555 break;
1556
93c80368
NB
1557 spell_ident:
1558 case SPELL_IDENT:
3b681e9d 1559 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
93c80368 1560 break;
041c3194 1561
47ad4138
ZW
1562 case SPELL_NUMBER:
1563 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1564 break;
1565
93c80368
NB
1566 case SPELL_STRING:
1567 {
1568 int left, right, tag;
1569 switch (token->type)
1570 {
1571 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1572 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
93c80368
NB
1573 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1574 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1575 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138
ZW
1576 default:
1577 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1578 return;
93c80368
NB
1579 }
1580 if (tag) putc (tag, fp);
47ad4138 1581 putc (left, fp);
93c80368 1582 fwrite (token->val.str.text, 1, token->val.str.len, fp);
47ad4138 1583 putc (right, fp);
93c80368
NB
1584 }
1585 break;
c5a04734 1586
93c80368
NB
1587 case SPELL_NONE:
1588 /* An error, most probably. */
1589 break;
041c3194 1590 }
c5a04734
ZW
1591}
1592
93c80368
NB
1593/* Compare two tokens. */
1594int
1595_cpp_equiv_tokens (a, b)
1596 const cpp_token *a, *b;
c5a04734 1597{
93c80368
NB
1598 if (a->type == b->type && a->flags == b->flags)
1599 switch (TOKEN_SPELL (a))
1600 {
1601 default: /* Keep compiler happy. */
1602 case SPELL_OPERATOR:
1603 return 1;
1604 case SPELL_CHAR:
6c53ebff 1605 return a->val.c == b->val.c; /* Character. */
93c80368 1606 case SPELL_NONE:
56051c0a 1607 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1608 case SPELL_IDENT:
1609 return a->val.node == b->val.node;
47ad4138 1610 case SPELL_NUMBER:
93c80368
NB
1611 case SPELL_STRING:
1612 return (a->val.str.len == b->val.str.len
1613 && !memcmp (a->val.str.text, b->val.str.text,
1614 a->val.str.len));
1615 }
c5a04734 1616
041c3194
ZW
1617 return 0;
1618}
1619
93c80368
NB
1620/* Returns nonzero if a space should be inserted to avoid an
1621 accidental token paste for output. For simplicity, it is
1622 conservative, and occasionally advises a space where one is not
1623 needed, e.g. "." and ".2". */
041c3194 1624
93c80368
NB
1625int
1626cpp_avoid_paste (pfile, token1, token2)
c5a04734 1627 cpp_reader *pfile;
93c80368 1628 const cpp_token *token1, *token2;
c5a04734 1629{
93c80368
NB
1630 enum cpp_ttype a = token1->type, b = token2->type;
1631 cppchar_t c;
c5a04734 1632
93c80368
NB
1633 if (token1->flags & NAMED_OP)
1634 a = CPP_NAME;
1635 if (token2->flags & NAMED_OP)
1636 b = CPP_NAME;
c5a04734 1637
93c80368
NB
1638 c = EOF;
1639 if (token2->flags & DIGRAPH)
37b8524c 1640 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1641 else if (token_spellings[b].category == SPELL_OPERATOR)
1642 c = token_spellings[b].name[0];
c5a04734 1643
93c80368 1644 /* Quickly get everything that can paste with an '='. */
37b8524c 1645 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1646 return 1;
c5a04734 1647
93c80368 1648 switch (a)
c5a04734 1649 {
93c80368
NB
1650 case CPP_GREATER: return c == '>' || c == '?';
1651 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1652 case CPP_PLUS: return c == '+';
1653 case CPP_MINUS: return c == '-' || c == '>';
1654 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1655 case CPP_MOD: return c == ':' || c == '>';
1656 case CPP_AND: return c == '&';
1657 case CPP_OR: return c == '|';
1658 case CPP_COLON: return c == ':' || c == '>';
1659 case CPP_DEREF: return c == '*';
26ec42ee 1660 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1661 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1662 case CPP_NAME: return ((b == CPP_NUMBER
1663 && name_p (pfile, &token2->val.str))
1664 || b == CPP_NAME
1665 || b == CPP_CHAR || b == CPP_STRING); /* L */
1666 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1667 || c == '.' || c == '+' || c == '-');
1668 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1669 && token1->val.c == '@'
93c80368
NB
1670 && (b == CPP_NAME || b == CPP_STRING));
1671 default: break;
c5a04734 1672 }
c5a04734 1673
417f3e3a 1674 return 0;
c5a04734
ZW
1675}
1676
93c80368 1677/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1678 character, to FP. Leading whitespace is removed. If there are
1679 macros, special token padding is not performed. */
c5a04734 1680void
93c80368 1681cpp_output_line (pfile, fp)
c5a04734 1682 cpp_reader *pfile;
93c80368 1683 FILE *fp;
c5a04734 1684{
4ed5bcfb 1685 const cpp_token *token;
96be6998 1686
4ed5bcfb
NB
1687 token = cpp_get_token (pfile);
1688 while (token->type != CPP_EOF)
96be6998 1689 {
4ed5bcfb
NB
1690 cpp_output_token (token, fp);
1691 token = cpp_get_token (pfile);
1692 if (token->flags & PREV_WHITE)
1693 putc (' ', fp);
96be6998
ZW
1694 }
1695
93c80368 1696 putc ('\n', fp);
041c3194 1697}
c5a04734 1698
c8a96070
NB
1699/* Returns the value of a hexadecimal digit. */
1700static unsigned int
1701hex_digit_value (c)
1702 unsigned int c;
1703{
1704 if (c >= 'a' && c <= 'f')
1705 return c - 'a' + 10;
1706 if (c >= 'A' && c <= 'F')
1707 return c - 'A' + 10;
1708 if (c >= '0' && c <= '9')
1709 return c - '0';
1710 abort ();
1711}
1712
62729350
NB
1713/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1714 failure if cpplib is not parsing C++ or C99. Such failure is
1715 silent, and no variables are updated. Otherwise returns 0, and
1716 warns if -Wtraditional.
c8a96070
NB
1717
1718 [lex.charset]: The character designated by the universal character
1719 name \UNNNNNNNN is that character whose character short name in
1720 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1721 universal character name \uNNNN is that character whose character
1722 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1723 for a universal character name is less than 0x20 or in the range
1724 0x7F-0x9F (inclusive), or if the universal character name
1725 designates a character in the basic source character set, then the
1726 program is ill-formed.
1727
1728 We assume that wchar_t is Unicode, so we don't need to do any
62729350 1729 mapping. Is this ever wrong?
c8a96070 1730
62729350
NB
1731 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1732 LIMIT is the end of the string or charconst. PSTR is updated to
1733 point after the UCS on return, and the UCS is written into PC. */
1734
1735static int
1736maybe_read_ucs (pfile, pstr, limit, pc)
c8a96070
NB
1737 cpp_reader *pfile;
1738 const unsigned char **pstr;
1739 const unsigned char *limit;
62729350 1740 unsigned int *pc;
c8a96070
NB
1741{
1742 const unsigned char *p = *pstr;
62729350
NB
1743 unsigned int code = 0;
1744 unsigned int c = *pc, length;
1745
1746 /* Only attempt to interpret a UCS for C++ and C99. */
1747 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1748 return 1;
c8a96070 1749
62729350
NB
1750 if (CPP_WTRADITIONAL (pfile))
1751 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
c8a96070 1752
f8710242
NB
1753 length = (c == 'u' ? 4: 8);
1754
1755 if ((size_t) (limit - p) < length)
1756 {
1757 cpp_error (pfile, "incomplete universal-character-name");
1758 /* Skip to the end to avoid more diagnostics. */
1759 p = limit;
1760 }
1761 else
1762 {
1763 for (; length; length--, p++)
c8a96070 1764 {
f8710242
NB
1765 c = *p;
1766 if (ISXDIGIT (c))
1767 code = (code << 4) + hex_digit_value (c);
1768 else
1769 {
1770 cpp_error (pfile,
1771 "non-hex digit '%c' in universal-character-name", c);
1772 /* We shouldn't skip in case there are multibyte chars. */
1773 break;
1774 }
c8a96070 1775 }
c8a96070
NB
1776 }
1777
1778#ifdef TARGET_EBCDIC
1779 cpp_error (pfile, "universal-character-name on EBCDIC target");
1780 code = 0x3f; /* EBCDIC invalid character */
1781#else
f8710242
NB
1782 /* True extended characters are OK. */
1783 if (code >= 0xa0
1784 && !(code & 0x80000000)
1785 && !(code >= 0xD800 && code <= 0xDFFF))
1786 ;
1787 /* The standard permits $, @ and ` to be specified as UCNs. We use
1788 hex escapes so that this also works with EBCDIC hosts. */
1789 else if (code == 0x24 || code == 0x40 || code == 0x60)
1790 ;
1791 /* Don't give another error if one occurred above. */
1792 else if (length == 0)
1793 cpp_error (pfile, "universal-character-name out of range");
c8a96070
NB
1794#endif
1795
1796 *pstr = p;
62729350
NB
1797 *pc = code;
1798 return 0;
c8a96070
NB
1799}
1800
1801/* Interpret an escape sequence, and return its value. PSTR points to
1802 the input pointer, which is just after the backslash. LIMIT is how
62729350
NB
1803 much text we have. MASK is a bitmask for the precision for the
1804 destination type (char or wchar_t). TRADITIONAL, if true, does not
1805 interpret escapes that did not exist in traditional C.
c8a96070 1806
62729350
NB
1807 Handles all relevant diagnostics. */
1808
1809unsigned int
1810cpp_parse_escape (pfile, pstr, limit, mask, traditional)
c8a96070
NB
1811 cpp_reader *pfile;
1812 const unsigned char **pstr;
1813 const unsigned char *limit;
62729350 1814 unsigned HOST_WIDE_INT mask;
c8a96070
NB
1815 int traditional;
1816{
1817 int unknown = 0;
1818 const unsigned char *str = *pstr;
1819 unsigned int c = *str++;
1820
1821 switch (c)
1822 {
1823 case '\\': case '\'': case '"': case '?': break;
1824 case 'b': c = TARGET_BS; break;
1825 case 'f': c = TARGET_FF; break;
1826 case 'n': c = TARGET_NEWLINE; break;
1827 case 'r': c = TARGET_CR; break;
1828 case 't': c = TARGET_TAB; break;
1829 case 'v': c = TARGET_VT; break;
1830
1831 case '(': case '{': case '[': case '%':
1832 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1833 '\%' is used to prevent SCCS from getting confused. */
1834 unknown = CPP_PEDANTIC (pfile);
1835 break;
1836
1837 case 'a':
1838 if (CPP_WTRADITIONAL (pfile))
1839 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1840 if (!traditional)
1841 c = TARGET_BELL;
1842 break;
1843
1844 case 'e': case 'E':
1845 if (CPP_PEDANTIC (pfile))
1846 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1847 c = TARGET_ESC;
1848 break;
1849
c8a96070 1850 case 'u': case 'U':
62729350 1851 unknown = maybe_read_ucs (pfile, &str, limit, &c);
c8a96070
NB
1852 break;
1853
1854 case 'x':
1855 if (CPP_WTRADITIONAL (pfile))
1856 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1857
1858 if (!traditional)
1859 {
1860 unsigned int i = 0, overflow = 0;
1861 int digits_found = 0;
1862
1863 while (str < limit)
1864 {
1865 c = *str;
1866 if (! ISXDIGIT (c))
1867 break;
1868 str++;
1869 overflow |= i ^ (i << 4 >> 4);
1870 i = (i << 4) + hex_digit_value (c);
1871 digits_found = 1;
1872 }
1873
1874 if (!digits_found)
1875 cpp_error (pfile, "\\x used with no following hex digits");
1876
1877 if (overflow | (i != (i & mask)))
1878 {
1879 cpp_pedwarn (pfile, "hex escape sequence out of range");
1880 i &= mask;
1881 }
1882 c = i;
1883 }
1884 break;
1885
1886 case '0': case '1': case '2': case '3':
1887 case '4': case '5': case '6': case '7':
1888 {
1889 unsigned int i = c - '0';
1890 int count = 0;
1891
1892 while (str < limit && ++count < 3)
1893 {
1894 c = *str;
1895 if (c < '0' || c > '7')
1896 break;
1897 str++;
1898 i = (i << 3) + c - '0';
1899 }
1900
1901 if (i != (i & mask))
1902 {
1903 cpp_pedwarn (pfile, "octal escape sequence out of range");
1904 i &= mask;
1905 }
1906 c = i;
1907 }
1908 break;
1909
1910 default:
1911 unknown = 1;
1912 break;
1913 }
1914
1915 if (unknown)
1916 {
1917 if (ISGRAPH (c))
1918 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1919 else
1920 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1921 }
1922
62729350
NB
1923 if (c > mask)
1924 cpp_pedwarn (pfile, "escape sequence out of range for character");
1925
c8a96070
NB
1926 *pstr = str;
1927 return c;
1928}
1929
1930#ifndef MAX_CHAR_TYPE_SIZE
1931#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1932#endif
1933
1934#ifndef MAX_WCHAR_TYPE_SIZE
1935#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1936#endif
1937
1938/* Interpret a (possibly wide) character constant in TOKEN.
1939 WARN_MULTI warns about multi-character charconsts, if not
1940 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1941 that did not exist in traditional C. PCHARS_SEEN points to a
1942 variable that is filled in with the number of characters seen. */
1943HOST_WIDE_INT
1944cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1945 cpp_reader *pfile;
1946 const cpp_token *token;
1947 int warn_multi;
1948 int traditional;
1949 unsigned int *pchars_seen;
1950{
1951 const unsigned char *str = token->val.str.text;
1952 const unsigned char *limit = str + token->val.str.len;
1953 unsigned int chars_seen = 0;
1954 unsigned int width, max_chars, c;
2a967f3d
NB
1955 unsigned HOST_WIDE_INT mask;
1956 HOST_WIDE_INT result = 0;
c8a96070
NB
1957
1958#ifdef MULTIBYTE_CHARS
1959 (void) local_mbtowc (NULL, NULL, 0);
1960#endif
1961
1962 /* Width in bits. */
1963 if (token->type == CPP_CHAR)
1964 width = MAX_CHAR_TYPE_SIZE;
1965 else
1966 width = MAX_WCHAR_TYPE_SIZE;
1967
1968 if (width < HOST_BITS_PER_WIDE_INT)
1969 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1970 else
1971 mask = ~0;
1972 max_chars = HOST_BITS_PER_WIDE_INT / width;
1973
1974 while (str < limit)
1975 {
1976#ifdef MULTIBYTE_CHARS
1977 wchar_t wc;
1978 int char_len;
1979
1980 char_len = local_mbtowc (&wc, str, limit - str);
1981 if (char_len == -1)
1982 {
1983 cpp_warning (pfile, "ignoring invalid multibyte character");
1984 c = *str++;
1985 }
1986 else
1987 {
1988 str += char_len;
1989 c = wc;
1990 }
1991#else
1992 c = *str++;
1993#endif
1994
1995 if (c == '\\')
62729350 1996 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
c8a96070
NB
1997
1998#ifdef MAP_CHARACTER
1999 if (ISPRINT (c))
2000 c = MAP_CHARACTER (c);
2001#endif
2002
2003 /* Merge character into result; ignore excess chars. */
2004 if (++chars_seen <= max_chars)
2005 {
2006 if (width < HOST_BITS_PER_WIDE_INT)
2007 result = (result << width) | (c & mask);
2008 else
2009 result = c;
2010 }
2011 }
2012
2013 if (chars_seen == 0)
2014 cpp_error (pfile, "empty character constant");
2015 else if (chars_seen > max_chars)
2016 {
2017 chars_seen = max_chars;
f8710242 2018 cpp_warning (pfile, "character constant too long");
c8a96070
NB
2019 }
2020 else if (chars_seen > 1 && !traditional && warn_multi)
2021 cpp_warning (pfile, "multi-character character constant");
2022
2023 /* If char type is signed, sign-extend the constant. The
2024 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2025 if (token->type == CPP_CHAR && chars_seen)
2026 {
2027 unsigned int nbits = chars_seen * width;
2028 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2029
2030 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2031 || ((result >> (nbits - 1)) & 1) == 0)
2032 result &= mask;
2033 else
2034 result |= ~mask;
2035 }
2036
2037 *pchars_seen = chars_seen;
2038 return result;
2039}
2040
1e013d2e
NB
2041/* Memory buffers. Changing these three constants can have a dramatic
2042 effect on performance. The values here are reasonable defaults,
2043 but might be tuned. If you adjust them, be sure to test across a
2044 range of uses of cpplib, including heavy nested function-like macro
2045 expansion. Also check the change in peak memory usage (NJAMD is a
2046 good tool for this). */
2047#define MIN_BUFF_SIZE 8000
2048#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (8000 + (MIN_SIZE) * 3 / 2)
2049#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2050 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 2051
93c80368 2052struct dummy
417f3e3a 2053{
93c80368
NB
2054 char c;
2055 union
2056 {
2057 double d;
2058 int *p;
2059 } u;
2060};
417f3e3a 2061
93c80368 2062#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
b8af0ca5
NB
2063#define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2064
c9e7a609
NB
2065/* Create a new allocation buffer. Place the control block at the end
2066 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5
NB
2067static _cpp_buff *
2068new_buff (len)
6142088c 2069 size_t len;
b8af0ca5
NB
2070{
2071 _cpp_buff *result;
ece54d54 2072 unsigned char *base;
b8af0ca5 2073
1e013d2e
NB
2074 if (len < MIN_BUFF_SIZE)
2075 len = MIN_BUFF_SIZE;
b8af0ca5
NB
2076 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2077
2078 base = xmalloc (len + sizeof (_cpp_buff));
2079 result = (_cpp_buff *) (base + len);
2080 result->base = base;
2081 result->cur = base;
2082 result->limit = base + len;
2083 result->next = NULL;
2084 return result;
2085}
2086
2087/* Place a chain of unwanted allocation buffers on the free list. */
2088void
2089_cpp_release_buff (pfile, buff)
2090 cpp_reader *pfile;
2091 _cpp_buff *buff;
2092{
2093 _cpp_buff *end = buff;
2094
2095 while (end->next)
2096 end = end->next;
2097 end->next = pfile->free_buffs;
2098 pfile->free_buffs = buff;
2099}
2100
2101/* Return a free buffer of size at least MIN_SIZE. */
2102_cpp_buff *
2103_cpp_get_buff (pfile, min_size)
2104 cpp_reader *pfile;
6142088c 2105 size_t min_size;
b8af0ca5
NB
2106{
2107 _cpp_buff *result, **p;
2108
2109 for (p = &pfile->free_buffs;; p = &(*p)->next)
2110 {
6142088c 2111 size_t size;
1e013d2e
NB
2112
2113 if (*p == NULL)
b8af0ca5 2114 return new_buff (min_size);
1e013d2e
NB
2115 result = *p;
2116 size = result->limit - result->base;
2117 /* Return a buffer that's big enough, but don't waste one that's
2118 way too big. */
2119 if (size >= min_size && size < BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
2120 break;
2121 }
2122
2123 *p = result->next;
2124 result->next = NULL;
2125 result->cur = result->base;
2126 return result;
2127}
2128
4fe9b91c 2129/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2130 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2131 the excess bytes to the new buffer. Chains the new buffer after
2132 BUFF, and returns the new buffer. */
b8af0ca5 2133_cpp_buff *
8c3b2693 2134_cpp_append_extend_buff (pfile, buff, min_extra)
b8af0ca5
NB
2135 cpp_reader *pfile;
2136 _cpp_buff *buff;
6142088c 2137 size_t min_extra;
b8af0ca5 2138{
6142088c 2139 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 2140 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 2141
8c3b2693
NB
2142 buff->next = new_buff;
2143 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2144 return new_buff;
2145}
2146
4fe9b91c 2147/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2148 remaining bytes of the buffer pointed to by BUFF, and at least
2149 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2150 Chains the new buffer before the buffer pointed to by BUFF, and
2151 updates the pointer to point to the new buffer. */
2152void
2153_cpp_extend_buff (pfile, pbuff, min_extra)
2154 cpp_reader *pfile;
2155 _cpp_buff **pbuff;
2156 size_t min_extra;
2157{
2158 _cpp_buff *new_buff, *old_buff = *pbuff;
2159 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2160
2161 new_buff = _cpp_get_buff (pfile, size);
2162 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2163 new_buff->next = old_buff;
2164 *pbuff = new_buff;
b8af0ca5
NB
2165}
2166
2167/* Free a chain of buffers starting at BUFF. */
2168void
2169_cpp_free_buff (buff)
2170 _cpp_buff *buff;
2171{
2172 _cpp_buff *next;
2173
2174 for (; buff; buff = next)
2175 {
2176 next = buff->next;
2177 free (buff->base);
2178 }
2179}
417f3e3a 2180
ece54d54
NB
2181/* Allocate permanent, unaligned storage of length LEN. */
2182unsigned char *
2183_cpp_unaligned_alloc (pfile, len)
2184 cpp_reader *pfile;
2185 size_t len;
2186{
2187 _cpp_buff *buff = pfile->u_buff;
2188 unsigned char *result = buff->cur;
2189
2190 if (len > (size_t) (buff->limit - result))
2191 {
2192 buff = _cpp_get_buff (pfile, len);
2193 buff->next = pfile->u_buff;
2194 pfile->u_buff = buff;
2195 result = buff->cur;
2196 }
2197
2198 buff->cur = result + len;
2199 return result;
2200}
2201
8c3b2693 2202/* Allocate permanent, unaligned storage of length LEN. */
93c80368 2203unsigned char *
8c3b2693
NB
2204_cpp_aligned_alloc (pfile, len)
2205 cpp_reader *pfile;
2206 size_t len;
3fef5b2b 2207{
8c3b2693
NB
2208 _cpp_buff *buff = pfile->a_buff;
2209 unsigned char *result = buff->cur;
3fef5b2b 2210
8c3b2693 2211 if (len > (size_t) (buff->limit - result))
3fef5b2b 2212 {
8c3b2693
NB
2213 buff = _cpp_get_buff (pfile, len);
2214 buff->next = pfile->a_buff;
2215 pfile->a_buff = buff;
2216 result = buff->cur;
3fef5b2b 2217 }
041c3194 2218
8c3b2693 2219 buff->cur = result + len;
93c80368 2220 return result;
041c3194 2221}