]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
Daily bump.
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
45b966db 1/* CPP Library - lexical analysis.
5d8ebbd8 2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
45b966db
ZW
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#include "config.h"
24#include "system.h"
4977bab6
ZW
25#include "coretypes.h"
26#include "tm.h"
45b966db
ZW
27#include "cpplib.h"
28#include "cpphash.h"
29
c8a96070
NB
30#ifdef MULTIBYTE_CHARS
31#include "mbchar.h"
32#include <locale.h>
33#endif
34
93c80368
NB
35/* Tokens with SPELL_STRING store their spelling in the token list,
36 and it's length in the token->val.name.len. */
37enum spell_type
f9a0e96c 38{
93c80368
NB
39 SPELL_OPERATOR = 0,
40 SPELL_CHAR,
41 SPELL_IDENT,
47ad4138 42 SPELL_NUMBER,
93c80368
NB
43 SPELL_STRING,
44 SPELL_NONE
f9a0e96c
ZW
45};
46
93c80368 47struct token_spelling
f9a0e96c 48{
93c80368
NB
49 enum spell_type category;
50 const unsigned char *name;
f9a0e96c
ZW
51};
52
8206c799
ZW
53static const unsigned char *const digraph_spellings[] =
54{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
93c80368
NB
55
56#define OP(e, s) { SPELL_OPERATOR, U s },
57#define TK(e, s) { s, U STRINGX (e) },
8206c799 58static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
59#undef OP
60#undef TK
61
62#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
63#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
480709cc 64#define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
f2d5f0cc 65
87062813
NB
66static void handle_newline PARAMS ((cpp_reader *));
67static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
29401c30 68static cppchar_t get_effective_char PARAMS ((cpp_reader *));
0d9f234d 69
041c3194 70static int skip_block_comment PARAMS ((cpp_reader *));
cbcff6df 71static int skip_line_comment PARAMS ((cpp_reader *));
0d9f234d 72static void adjust_column PARAMS ((cpp_reader *));
4d6baafa 73static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
2c3fcba6 74static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
562a5c27 75static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
df383483 76 unsigned int *));
10cf9bde 77static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
562a5c27 78static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
0d9f234d 79static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
87062813 80static bool trigraph_p PARAMS ((cpp_reader *));
562a5c27 81static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
477cdac7 82 cppchar_t));
004cb263 83static bool continue_after_nul PARAMS ((cpp_reader *));
93c80368 84static int name_p PARAMS ((cpp_reader *, const cpp_string *));
62729350 85static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
625458d0 86 const unsigned char *, cppchar_t *));
5fddcffc 87static tokenrun *next_tokenrun PARAMS ((tokenrun *));
f617b8e2 88
c8a96070 89static unsigned int hex_digit_value PARAMS ((unsigned int));
6142088c 90static _cpp_buff *new_buff PARAMS ((size_t));
15dad1d9 91
9d10c9a9
NB
92/* Change to the native locale for multibyte conversions. */
93void
94_cpp_init_mbchar ()
95{
96#ifdef MULTIBYTE_CHARS
97 setlocale (LC_CTYPE, "");
98 GET_ENVIRONMENT (literal_codeset, "LANG");
99#endif
100}
101
041c3194 102/* Utility routine:
9e62c811 103
bfb9dc7f
ZW
104 Compares, the token TOKEN to the NUL-terminated string STRING.
105 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
041c3194 106int
bfb9dc7f
ZW
107cpp_ideq (token, string)
108 const cpp_token *token;
041c3194
ZW
109 const char *string;
110{
bfb9dc7f 111 if (token->type != CPP_NAME)
041c3194 112 return 0;
bfb9dc7f 113
562a5c27 114 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
15dad1d9 115}
1368ee70 116
87062813
NB
117/* Call when meeting a newline, assumed to be in buffer->cur[-1].
118 Returns with buffer->cur pointing to the character immediately
119 following the newline (combination). */
120static void
121handle_newline (pfile)
1444f2ed 122 cpp_reader *pfile;
0d9f234d 123{
87062813 124 cpp_buffer *buffer = pfile->buffer;
0d9f234d 125
87062813 126 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
8d9afc4e 127 only accept CR-LF; maybe we should fall back to that behavior? */
4d6baafa 128 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
87062813 129 buffer->cur++;
0d9f234d 130
87062813
NB
131 buffer->line_base = buffer->cur;
132 buffer->col_adjust = 0;
133 pfile->line++;
0d9f234d
NB
134}
135
87062813
NB
136/* Subroutine of skip_escaped_newlines; called when a 3-character
137 sequence beginning with "??" is encountered. buffer->cur points to
138 the second '?'.
139
140 Warn if necessary, and returns true if the sequence forms a
8d9afc4e 141 trigraph and the trigraph should be honored. */
87062813
NB
142static bool
143trigraph_p (pfile)
45b966db 144 cpp_reader *pfile;
45b966db 145{
87062813
NB
146 cpp_buffer *buffer = pfile->buffer;
147 cppchar_t from_char = buffer->cur[1];
148 bool accept;
149
150 if (!_cpp_trigraph_map[from_char])
151 return false;
152
153 accept = CPP_OPTION (pfile, trigraphs);
154
cbcff6df
NB
155 /* Don't warn about trigraphs in comments. */
156 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
45b966db 157 {
041c3194 158 if (accept)
ebef4e8c
NB
159 cpp_error_with_line (pfile, DL_WARNING,
160 pfile->line, CPP_BUF_COL (buffer) - 1,
161 "trigraph ??%c converted to %c",
162 (int) from_char,
163 (int) _cpp_trigraph_map[from_char]);
4a5b68a2
NB
164 else if (buffer->cur != buffer->last_Wtrigraphs)
165 {
166 buffer->last_Wtrigraphs = buffer->cur;
ebef4e8c
NB
167 cpp_error_with_line (pfile, DL_WARNING,
168 pfile->line, CPP_BUF_COL (buffer) - 1,
169 "trigraph ??%c ignored", (int) from_char);
4a5b68a2 170 }
45b966db 171 }
0d9f234d 172
041c3194 173 return accept;
45b966db
ZW
174}
175
87062813 176/* Skips any escaped newlines introduced by '?' or a '\\', assumed to
480709cc
NB
177 lie in buffer->cur[-1]. Returns the next byte, which will be in
178 buffer->cur[-1]. This routine performs preprocessing stages 1 and
179 2 of the ISO C standard. */
0d9f234d 180static cppchar_t
87062813 181skip_escaped_newlines (pfile)
29401c30 182 cpp_reader *pfile;
45b966db 183{
29401c30 184 cpp_buffer *buffer = pfile->buffer;
87062813 185 cppchar_t next = buffer->cur[-1];
29401c30 186
a5c3cccd
NB
187 /* Only do this if we apply stages 1 and 2. */
188 if (!buffer->from_stage3)
041c3194 189 {
a5c3cccd 190 const unsigned char *saved_cur;
87062813 191 cppchar_t next1;
a5c3cccd
NB
192
193 do
0d9f234d 194 {
a5c3cccd 195 if (next == '?')
0d9f234d 196 {
4d6baafa 197 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
87062813 198 break;
a5c3cccd 199
87062813
NB
200 /* Translate the trigraph. */
201 next = _cpp_trigraph_map[buffer->cur[1]];
202 buffer->cur += 2;
4d6baafa 203 if (next != '\\')
a5c3cccd 204 break;
a5c3cccd
NB
205 }
206
4d6baafa
NB
207 if (buffer->cur == buffer->rlimit)
208 break;
209
87062813
NB
210 /* We have a backslash, and room for at least one more
211 character. Skip horizontal whitespace. */
212 saved_cur = buffer->cur;
a5c3cccd 213 do
87062813
NB
214 next1 = *buffer->cur++;
215 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
041c3194 216
a5c3cccd 217 if (!is_vspace (next1))
0d9f234d 218 {
87062813 219 buffer->cur = saved_cur;
0d9f234d
NB
220 break;
221 }
45b966db 222
87062813
NB
223 if (saved_cur != buffer->cur - 1
224 && !pfile->state.lexing_comment)
ebef4e8c
NB
225 cpp_error (pfile, DL_WARNING,
226 "backslash and newline separated by space");
0d9f234d 227
87062813 228 handle_newline (pfile);
480709cc 229 buffer->backup_to = buffer->cur;
87062813
NB
230 if (buffer->cur == buffer->rlimit)
231 {
ebef4e8c
NB
232 cpp_error (pfile, DL_PEDWARN,
233 "backslash-newline at end of file");
87062813
NB
234 next = EOF;
235 }
236 else
237 next = *buffer->cur++;
0d9f234d 238 }
a5c3cccd 239 while (next == '\\' || next == '?');
041c3194 240 }
45b966db 241
0d9f234d 242 return next;
45b966db
ZW
243}
244
0d9f234d 245/* Obtain the next character, after trigraph conversion and skipping
87062813
NB
246 an arbitrarily long string of escaped newlines. The common case of
247 no trigraphs or escaped newlines falls through quickly. On return,
480709cc
NB
248 buffer->backup_to points to where to return to if the character is
249 not to be processed. */
0d9f234d 250static cppchar_t
29401c30
NB
251get_effective_char (pfile)
252 cpp_reader *pfile;
64aaf407 253{
4d6baafa 254 cppchar_t next;
480709cc 255 cpp_buffer *buffer = pfile->buffer;
0d9f234d 256
480709cc 257 buffer->backup_to = buffer->cur;
4d6baafa
NB
258 next = *buffer->cur++;
259 if (__builtin_expect (next == '?' || next == '\\', 0))
260 next = skip_escaped_newlines (pfile);
0d9f234d 261
df383483 262 return next;
64aaf407
NB
263}
264
0d9f234d
NB
265/* Skip a C-style block comment. We find the end of the comment by
266 seeing if an asterisk is before every '/' we encounter. Returns
da7d8304 267 nonzero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
268static int
269skip_block_comment (pfile)
45b966db
ZW
270 cpp_reader *pfile;
271{
041c3194 272 cpp_buffer *buffer = pfile->buffer;
d8090680 273 cppchar_t c = EOF, prevc = EOF;
0d9f234d 274
cbcff6df 275 pfile->state.lexing_comment = 1;
0d9f234d 276 while (buffer->cur != buffer->rlimit)
45b966db 277 {
0d9f234d
NB
278 prevc = c, c = *buffer->cur++;
279
0d9f234d 280 /* FIXME: For speed, create a new character class of characters
93c80368 281 of interest inside block comments. */
0d9f234d 282 if (c == '?' || c == '\\')
87062813 283 c = skip_escaped_newlines (pfile);
041c3194 284
0d9f234d
NB
285 /* People like decorating comments with '*', so check for '/'
286 instead for efficiency. */
041c3194 287 if (c == '/')
45b966db 288 {
0d9f234d
NB
289 if (prevc == '*')
290 break;
041c3194 291
0d9f234d 292 /* Warn about potential nested comments, but not if the '/'
a1f300c0 293 comes immediately before the true comment delimiter.
041c3194 294 Don't bother to get it right across escaped newlines. */
0d9f234d 295 if (CPP_OPTION (pfile, warn_comments)
87062813 296 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
ebef4e8c
NB
297 cpp_error_with_line (pfile, DL_WARNING,
298 pfile->line, CPP_BUF_COL (buffer),
299 "\"/*\" within comment");
45b966db 300 }
91fcd158 301 else if (is_vspace (c))
87062813 302 handle_newline (pfile);
52fadca8 303 else if (c == '\t')
0d9f234d 304 adjust_column (pfile);
45b966db 305 }
041c3194 306
cbcff6df 307 pfile->state.lexing_comment = 0;
0d9f234d 308 return c != '/' || prevc != '*';
45b966db
ZW
309}
310
480709cc 311/* Skip a C++ line comment, leaving buffer->cur pointing to the
da7d8304 312 terminating newline. Handles escaped newlines. Returns nonzero
480709cc 313 if a multiline comment. */
041c3194 314static int
cbcff6df
NB
315skip_line_comment (pfile)
316 cpp_reader *pfile;
45b966db 317{
cbcff6df 318 cpp_buffer *buffer = pfile->buffer;
67821e3a 319 unsigned int orig_line = pfile->line;
0d9f234d 320 cppchar_t c;
64cdc383
MH
321#ifdef MULTIBYTE_CHARS
322 wchar_t wc;
323 int char_len;
324#endif
041c3194 325
cbcff6df 326 pfile->state.lexing_comment = 1;
64cdc383
MH
327#ifdef MULTIBYTE_CHARS
328 /* Reset multibyte conversion state. */
329 (void) local_mbtowc (NULL, NULL, 0);
330#endif
0d9f234d 331 do
041c3194 332 {
0d9f234d 333 if (buffer->cur == buffer->rlimit)
480709cc 334 goto at_eof;
041c3194 335
64cdc383
MH
336#ifdef MULTIBYTE_CHARS
337 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
338 buffer->rlimit - buffer->cur);
339 if (char_len == -1)
340 {
341 cpp_error (pfile, DL_WARNING,
342 "ignoring invalid multibyte character");
343 char_len = 1;
344 c = *buffer->cur++;
345 }
346 else
347 {
348 buffer->cur += char_len;
349 c = wc;
350 }
351#else
0d9f234d 352 c = *buffer->cur++;
64cdc383 353#endif
0d9f234d 354 if (c == '?' || c == '\\')
87062813 355 c = skip_escaped_newlines (pfile);
041c3194 356 }
0d9f234d 357 while (!is_vspace (c));
45b966db 358
480709cc
NB
359 /* Step back over the newline, except at EOF. */
360 buffer->cur--;
361 at_eof:
362
cbcff6df 363 pfile->state.lexing_comment = 0;
67821e3a 364 return orig_line != pfile->line;
041c3194 365}
45b966db 366
0d9f234d
NB
367/* pfile->buffer->cur is one beyond the \t character. Update
368 col_adjust so we track the column correctly. */
52fadca8 369static void
0d9f234d 370adjust_column (pfile)
52fadca8 371 cpp_reader *pfile;
52fadca8 372{
0d9f234d
NB
373 cpp_buffer *buffer = pfile->buffer;
374 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
375
376 /* Round it up to multiple of the tabstop, but subtract 1 since the
377 tab itself occupies a character position. */
0d9f234d
NB
378 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
379 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
380}
381
0d9f234d
NB
382/* Skips whitespace, saving the next non-whitespace character.
383 Adjusts pfile->col_adjust to account for tabs. Without this,
384 tokens might be assigned an incorrect column. */
4d6baafa 385static int
0d9f234d 386skip_whitespace (pfile, c)
041c3194 387 cpp_reader *pfile;
0d9f234d 388 cppchar_t c;
041c3194
ZW
389{
390 cpp_buffer *buffer = pfile->buffer;
0d9f234d 391 unsigned int warned = 0;
45b966db 392
0d9f234d 393 do
041c3194 394 {
91fcd158
NB
395 /* Horizontal space always OK. */
396 if (c == ' ')
0d9f234d 397 ;
91fcd158 398 else if (c == '\t')
0d9f234d
NB
399 adjust_column (pfile);
400 /* Just \f \v or \0 left. */
91fcd158 401 else if (c == '\0')
041c3194 402 {
4d6baafa
NB
403 if (buffer->cur - 1 == buffer->rlimit)
404 return 0;
91fcd158 405 if (!warned)
0d9f234d 406 {
ebef4e8c 407 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
0d9f234d
NB
408 warned = 1;
409 }
45b966db 410 }
93c80368 411 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
ebef4e8c
NB
412 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
413 CPP_BUF_COL (buffer),
414 "%s in preprocessing directive",
415 c == '\f' ? "form feed" : "vertical tab");
0d9f234d 416
0d9f234d 417 c = *buffer->cur++;
45b966db 418 }
ec5c56db 419 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
420 while (is_nvspace (c));
421
480709cc 422 buffer->cur--;
4d6baafa 423 return 1;
041c3194 424}
45b966db 425
93c80368
NB
426/* See if the characters of a number token are valid in a name (no
427 '.', '+' or '-'). */
428static int
429name_p (pfile, string)
430 cpp_reader *pfile;
431 const cpp_string *string;
432{
433 unsigned int i;
434
435 for (i = 0; i < string->len; i++)
436 if (!is_idchar (string->text[i]))
437 return 0;
438
df383483 439 return 1;
93c80368
NB
440}
441
2c3fcba6
ZW
442/* Parse an identifier, skipping embedded backslash-newlines. This is
443 a critical inner loop. The common case is an identifier which has
444 not been split by backslash-newline, does not contain a dollar
445 sign, and has already been scanned (roughly 10:1 ratio of
446 seen:unseen identifiers in normal code; the distribution is
447 Poisson-like). Second most common case is a new identifier, not
448 split and no dollar sign. The other possibilities are rare and
10cf9bde 449 have been relegated to parse_slow. */
0d9f234d 450static cpp_hashnode *
2c3fcba6 451parse_identifier (pfile)
45b966db 452 cpp_reader *pfile;
45b966db 453{
93c80368 454 cpp_hashnode *result;
562a5c27 455 const uchar *cur, *base;
2c3fcba6
ZW
456
457 /* Fast-path loop. Skim over a normal identifier.
458 N.B. ISIDNUM does not include $. */
4d6baafa
NB
459 cur = pfile->buffer->cur;
460 while (ISIDNUM (*cur))
2c3fcba6 461 cur++;
2c3fcba6
ZW
462
463 /* Check for slow-path cases. */
4d6baafa 464 if (*cur == '?' || *cur == '\\' || *cur == '$')
10cf9bde
NB
465 {
466 unsigned int len;
467
468 base = parse_slow (pfile, cur, 0, &len);
469 result = (cpp_hashnode *)
470 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
471 }
2c3fcba6
ZW
472 else
473 {
10cf9bde
NB
474 base = pfile->buffer->cur - 1;
475 pfile->buffer->cur = cur;
2c3fcba6
ZW
476 result = (cpp_hashnode *)
477 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
2c3fcba6
ZW
478 }
479
480 /* Rarely, identifiers require diagnostics when lexed.
481 XXX Has to be forced out of the fast path. */
482 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
483 && !pfile->state.skipping, 0))
484 {
485 /* It is allowed to poison the same identifier twice. */
486 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
ebef4e8c 487 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
2c3fcba6
ZW
488 NODE_NAME (result));
489
490 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
491 replacement list of a variadic macro. */
492 if (result == pfile->spec_nodes.n__VA_ARGS__
493 && !pfile->state.va_args_ok)
ebef4e8c 494 cpp_error (pfile, DL_PEDWARN,
2c3fcba6
ZW
495 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
496 }
497
498 return result;
499}
500
10cf9bde
NB
501/* Slow path. This handles numbers and identifiers which have been
502 split, or contain dollar signs. The part of the token from
503 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
504 1 if it's a number, and 2 if it has a leading period. Returns a
505 pointer to the token's NUL-terminated spelling in permanent
506 storage, and sets PLEN to its length. */
562a5c27 507static uchar *
10cf9bde 508parse_slow (pfile, cur, number_p, plen)
2c3fcba6 509 cpp_reader *pfile;
562a5c27 510 const uchar *cur;
10cf9bde
NB
511 int number_p;
512 unsigned int *plen;
2c3fcba6 513{
0d9f234d 514 cpp_buffer *buffer = pfile->buffer;
562a5c27 515 const uchar *base = buffer->cur - 1;
2a967f3d 516 struct obstack *stack = &pfile->hash_table->stack;
10cf9bde
NB
517 unsigned int c, prevc, saw_dollar = 0;
518
519 /* Place any leading period. */
520 if (number_p == 2)
521 obstack_1grow (stack, '.');
2c3fcba6
ZW
522
523 /* Copy the part of the token which is known to be okay. */
524 obstack_grow (stack, base, cur - base);
041c3194 525
2c3fcba6
ZW
526 /* Now process the part which isn't. We are looking at one of
527 '$', '\\', or '?' on entry to this loop. */
10cf9bde 528 prevc = cur[-1];
2c3fcba6
ZW
529 c = *cur++;
530 buffer->cur = cur;
10cf9bde 531 for (;;)
041c3194 532 {
10cf9bde
NB
533 /* Potential escaped newline? */
534 buffer->backup_to = buffer->cur - 1;
535 if (c == '?' || c == '\\')
536 c = skip_escaped_newlines (pfile);
537
538 if (!is_idchar (c))
539 {
540 if (!number_p)
541 break;
542 if (c != '.' && !VALID_SIGN (c, prevc))
543 break;
544 }
545
546 /* Handle normal identifier characters in this loop. */
547 do
df383483 548 {
10cf9bde 549 prevc = c;
df383483 550 obstack_1grow (stack, c);
45b966db 551
df383483
KH
552 if (c == '$')
553 saw_dollar++;
ba89d661 554
df383483
KH
555 c = *buffer->cur++;
556 }
10cf9bde 557 while (is_idchar (c));
041c3194 558 }
0d9f234d 559
4d6baafa 560 /* Step back over the unwanted char. */
480709cc 561 BACKUP ();
93c80368 562
4fe9b91c 563 /* $ is not an identifier character in the standard, but is commonly
0d9f234d
NB
564 accepted as an extension. Don't warn about it in skipped
565 conditional blocks. */
cef0d199 566 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
ebef4e8c 567 cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
0d9f234d 568
10cf9bde
NB
569 /* Identifiers and numbers are null-terminated. */
570 *plen = obstack_object_size (stack);
2a967f3d 571 obstack_1grow (stack, '\0');
10cf9bde 572 return obstack_finish (stack);
45b966db
ZW
573}
574
5d8ebbd8 575/* Parse a number, beginning with character C, skipping embedded
da7d8304 576 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
5d8ebbd8 577 before C. Place the result in NUMBER. */
45b966db 578static void
10cf9bde 579parse_number (pfile, number, leading_period)
45b966db 580 cpp_reader *pfile;
0d9f234d 581 cpp_string *number;
93c80368 582 int leading_period;
45b966db 583{
562a5c27 584 const uchar *cur;
45b966db 585
10cf9bde
NB
586 /* Fast-path loop. Skim over a normal number.
587 N.B. ISIDNUM does not include $. */
588 cur = pfile->buffer->cur;
589 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
590 cur++;
cbcff6df 591
10cf9bde
NB
592 /* Check for slow-path cases. */
593 if (*cur == '?' || *cur == '\\' || *cur == '$')
594 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
595 else
041c3194 596 {
562a5c27
NB
597 const uchar *base = pfile->buffer->cur - 1;
598 uchar *dest;
0d9f234d 599
10cf9bde
NB
600 number->len = cur - base + leading_period;
601 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
602 dest[number->len] = '\0';
603 number->text = dest;
45b966db 604
10cf9bde
NB
605 if (leading_period)
606 *dest++ = '.';
607 memcpy (dest, base, cur - base);
608 pfile->buffer->cur = cur;
45b966db 609 }
0d9f234d
NB
610}
611
93c80368
NB
612/* Subroutine of parse_string. */
613static int
614unescaped_terminator_p (pfile, dest)
615 cpp_reader *pfile;
616 const unsigned char *dest;
617{
618 const unsigned char *start, *temp;
619
95bd1dd7 620 /* In #include-style directives, terminators are not escapable. */
93c80368
NB
621 if (pfile->state.angled_headers)
622 return 1;
623
ece54d54 624 start = BUFF_FRONT (pfile->u_buff);
93c80368
NB
625
626 /* An odd number of consecutive backslashes represents an escaped
627 terminator. */
628 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
629 ;
630
631 return ((dest - temp) & 1) == 0;
632}
633
0d9f234d 634/* Parses a string, character constant, or angle-bracketed header file
7868b4a2
NB
635 name. Handles embedded trigraphs and escaped newlines. The stored
636 string is guaranteed NUL-terminated, but it is not guaranteed that
637 this is the first NUL since embedded NULs are preserved.
45b966db 638
87062813
NB
639 When this function returns, buffer->cur points to the next
640 character to be processed. */
041c3194 641static void
0d9f234d 642parse_string (pfile, token, terminator)
45b966db 643 cpp_reader *pfile;
041c3194 644 cpp_token *token;
0d9f234d 645 cppchar_t terminator;
45b966db 646{
041c3194 647 cpp_buffer *buffer = pfile->buffer;
93c80368 648 unsigned char *dest, *limit;
0d9f234d 649 cppchar_t c;
d4e6133f 650 bool warned_nulls = false;
64cdc383
MH
651#ifdef MULTIBYTE_CHARS
652 wchar_t wc;
653 int char_len;
654#endif
0d9f234d 655
ece54d54
NB
656 dest = BUFF_FRONT (pfile->u_buff);
657 limit = BUFF_LIMIT (pfile->u_buff);
93c80368 658
64cdc383
MH
659#ifdef MULTIBYTE_CHARS
660 /* Reset multibyte conversion state. */
661 (void) local_mbtowc (NULL, NULL, 0);
662#endif
0d9f234d 663 for (;;)
45b966db 664 {
87062813 665 /* We need room for another char, possibly the terminating NUL. */
ece54d54
NB
666 if ((size_t) (limit - dest) < 1)
667 {
668 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
8c3b2693 669 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
ece54d54
NB
670 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
671 limit = BUFF_LIMIT (pfile->u_buff);
672 }
7868b4a2 673
64cdc383
MH
674#ifdef MULTIBYTE_CHARS
675 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
676 buffer->rlimit - buffer->cur);
677 if (char_len == -1)
678 {
679 cpp_error (pfile, DL_WARNING,
df383483 680 "ignoring invalid multibyte character");
64cdc383
MH
681 char_len = 1;
682 c = *buffer->cur++;
683 }
684 else
685 {
686 buffer->cur += char_len;
687 c = wc;
688 }
689#else
87062813 690 c = *buffer->cur++;
64cdc383
MH
691#endif
692
693 /* Handle trigraphs, escaped newlines etc. */
0d9f234d 694 if (c == '?' || c == '\\')
87062813 695 c = skip_escaped_newlines (pfile);
45b966db 696
87062813 697 if (c == terminator)
45b966db 698 {
87062813
NB
699 if (unescaped_terminator_p (pfile, dest))
700 break;
0d9f234d
NB
701 }
702 else if (is_vspace (c))
703 {
d4e6133f
NB
704 /* No string literal may extend over multiple lines. In
705 assembly language, suppress the error except for <>
706 includes. This is a kludge around not knowing where
707 comments are. */
708 unterminated:
709 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
ebef4e8c 710 cpp_error (pfile, DL_ERROR, "missing terminating %c character",
625458d0 711 (int) terminator);
d4e6133f
NB
712 buffer->cur--;
713 break;
0d9f234d 714 }
4d6baafa 715 else if (c == '\0')
0d9f234d 716 {
4d6baafa 717 if (buffer->cur - 1 == buffer->rlimit)
d4e6133f 718 goto unterminated;
4d6baafa
NB
719 if (!warned_nulls)
720 {
721 warned_nulls = true;
ebef4e8c
NB
722 cpp_error (pfile, DL_WARNING,
723 "null character(s) preserved in literal");
4d6baafa 724 }
45b966db 725 }
64cdc383
MH
726#ifdef MULTIBYTE_CHARS
727 if (char_len > 1)
728 {
729 for ( ; char_len > 0; --char_len)
730 *dest++ = (*buffer->cur - char_len);
731 }
732 else
733#endif
734 *dest++ = c;
45b966db
ZW
735 }
736
7868b4a2 737 *dest = '\0';
45b966db 738
ece54d54
NB
739 token->val.str.text = BUFF_FRONT (pfile->u_buff);
740 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
741 BUFF_FRONT (pfile->u_buff) = dest + 1;
0d9f234d 742}
041c3194 743
93c80368 744/* The stored comment includes the comment start and any terminator. */
9e62c811 745static void
477cdac7 746save_comment (pfile, token, from, type)
0d9f234d 747 cpp_reader *pfile;
041c3194
ZW
748 cpp_token *token;
749 const unsigned char *from;
477cdac7 750 cppchar_t type;
9e62c811 751{
041c3194 752 unsigned char *buffer;
477cdac7 753 unsigned int len, clen;
df383483 754
1c6d33ef 755 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
480709cc 756
3542203b
NB
757 /* C++ comments probably (not definitely) have moved past a new
758 line, which we don't want to save in the comment. */
480709cc 759 if (is_vspace (pfile->buffer->cur[-1]))
3542203b 760 len--;
477cdac7
JT
761
762 /* If we are currently in a directive, then we need to store all
763 C++ comments as C comments internally, and so we need to
764 allocate a little extra space in that case.
765
766 Note that the only time we encounter a directive here is
767 when we are saving comments in a "#define". */
768 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
769
770 buffer = _cpp_unaligned_alloc (pfile, clen);
df383483 771
041c3194 772 token->type = CPP_COMMENT;
477cdac7 773 token->val.str.len = clen;
0d9f234d 774 token->val.str.text = buffer;
45b966db 775
1c6d33ef
NB
776 buffer[0] = '/';
777 memcpy (buffer + 1, from, len - 1);
477cdac7 778
1eeeb6a4 779 /* Finish conversion to a C comment, if necessary. */
477cdac7
JT
780 if (pfile->state.in_directive && type == '/')
781 {
782 buffer[1] = '*';
783 buffer[clen - 2] = '*';
784 buffer[clen - 1] = '/';
785 }
0d9f234d 786}
45b966db 787
5fddcffc
NB
788/* Allocate COUNT tokens for RUN. */
789void
790_cpp_init_tokenrun (run, count)
791 tokenrun *run;
792 unsigned int count;
793{
794 run->base = xnewvec (cpp_token, count);
795 run->limit = run->base + count;
796 run->next = NULL;
797}
798
799/* Returns the next tokenrun, or creates one if there is none. */
800static tokenrun *
801next_tokenrun (run)
802 tokenrun *run;
803{
804 if (run->next == NULL)
805 {
806 run->next = xnew (tokenrun);
bdcbe496 807 run->next->prev = run;
5fddcffc
NB
808 _cpp_init_tokenrun (run->next, 250);
809 }
810
811 return run->next;
812}
813
4ed5bcfb
NB
814/* Allocate a single token that is invalidated at the same time as the
815 rest of the tokens on the line. Has its line and col set to the
816 same as the last lexed token, so that diagnostics appear in the
817 right place. */
818cpp_token *
819_cpp_temp_token (pfile)
820 cpp_reader *pfile;
821{
822 cpp_token *old, *result;
823
824 old = pfile->cur_token - 1;
825 if (pfile->cur_token == pfile->cur_run->limit)
826 {
827 pfile->cur_run = next_tokenrun (pfile->cur_run);
828 pfile->cur_token = pfile->cur_run->base;
829 }
830
831 result = pfile->cur_token++;
832 result->line = old->line;
833 result->col = old->col;
834 return result;
835}
836
14baae01
NB
837/* Lex a token into RESULT (external interface). Takes care of issues
838 like directive handling, token lookahead, multiple include
a1f300c0 839 optimization and skipping. */
345894b4
NB
840const cpp_token *
841_cpp_lex_token (pfile)
45b966db 842 cpp_reader *pfile;
5fddcffc 843{
bdcbe496 844 cpp_token *result;
5fddcffc 845
bdcbe496 846 for (;;)
5fddcffc 847 {
bdcbe496 848 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 849 {
bdcbe496
NB
850 pfile->cur_run = next_tokenrun (pfile->cur_run);
851 pfile->cur_token = pfile->cur_run->base;
5fddcffc
NB
852 }
853
bdcbe496 854 if (pfile->lookaheads)
14baae01
NB
855 {
856 pfile->lookaheads--;
857 result = pfile->cur_token++;
858 }
bdcbe496 859 else
14baae01 860 result = _cpp_lex_direct (pfile);
bdcbe496
NB
861
862 if (result->flags & BOL)
5fddcffc 863 {
bdcbe496
NB
864 /* Is this a directive. If _cpp_handle_directive returns
865 false, it is an assembler #. */
866 if (result->type == CPP_HASH
e808ec9c
NB
867 /* 6.10.3 p 11: Directives in a list of macro arguments
868 gives undefined behavior. This implementation
869 handles the directive as normal. */
870 && pfile->state.parsing_args != 1
bdcbe496
NB
871 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
872 continue;
97293897
NB
873 if (pfile->cb.line_change && !pfile->state.skipping)
874 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
5fddcffc 875 }
5fddcffc 876
bdcbe496
NB
877 /* We don't skip tokens in directives. */
878 if (pfile->state.in_directive)
879 break;
5fddcffc 880
bdcbe496 881 /* Outside a directive, invalidate controlling macros. At file
14baae01 882 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
bdcbe496 883 get here and MI optimisation works. */
5fddcffc 884 pfile->mi_valid = false;
bdcbe496
NB
885
886 if (!pfile->state.skipping || result->type == CPP_EOF)
887 break;
5fddcffc
NB
888 }
889
345894b4 890 return result;
5fddcffc
NB
891}
892
004cb263
NB
893/* A NUL terminates the current buffer. For ISO preprocessing this is
894 EOF, but for traditional preprocessing it indicates we need a line
895 refill. Returns TRUE to continue preprocessing a new buffer, FALSE
896 to return a CPP_EOF to the caller. */
897static bool
898continue_after_nul (pfile)
899 cpp_reader *pfile;
900{
901 cpp_buffer *buffer = pfile->buffer;
902 bool more = false;
df383483 903
004cb263
NB
904 buffer->saved_flags = BOL;
905 if (CPP_OPTION (pfile, traditional))
1a76916c
NB
906 {
907 if (pfile->state.in_directive)
908 return false;
909
910 _cpp_remove_overlay (pfile);
911 more = _cpp_read_logical_line_trad (pfile);
912 _cpp_overlay_buffer (pfile, pfile->out.base,
913 pfile->out.cur - pfile->out.base);
914 pfile->line = pfile->out.first_line;
915 }
004cb263
NB
916 else
917 {
918 /* Stop parsing arguments with a CPP_EOF. When we finally come
919 back here, do the work of popping the buffer. */
920 if (!pfile->state.parsing_args)
921 {
922 if (buffer->cur != buffer->line_base)
923 {
924 /* Non-empty files should end in a newline. Don't warn
925 for command line and _Pragma buffers. */
926 if (!buffer->from_stage3)
927 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
928 handle_newline (pfile);
929 }
930
931 /* Similarly, finish an in-progress directive with CPP_EOF
932 before popping the buffer. */
933 if (!pfile->state.in_directive && buffer->prev)
934 {
935 more = !buffer->return_at_eof;
936 _cpp_pop_buffer (pfile);
937 }
938 }
939 }
940
941 return more;
942}
943
480709cc
NB
944#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
945 do { \
946 if (get_effective_char (pfile) == CHAR) \
947 result->type = THEN_TYPE; \
948 else \
949 { \
950 BACKUP (); \
951 result->type = ELSE_TYPE; \
952 } \
953 } while (0)
954
14baae01
NB
955/* Lex a token into pfile->cur_token, which is also incremented, to
956 get diagnostics pointing to the correct location.
957
958 Does not handle issues such as token lookahead, multiple-include
959 optimisation, directives, skipping etc. This function is only
960 suitable for use by _cpp_lex_token, and in special cases like
961 lex_expansion_token which doesn't care for any of these issues.
962
963 When meeting a newline, returns CPP_EOF if parsing a directive,
964 otherwise returns to the start of the token buffer if permissible.
965 Returns the location of the lexed token. */
966cpp_token *
967_cpp_lex_direct (pfile)
5fddcffc 968 cpp_reader *pfile;
45b966db 969{
0d9f234d 970 cppchar_t c;
adb84b42 971 cpp_buffer *buffer;
0d9f234d 972 const unsigned char *comment_start;
14baae01 973 cpp_token *result = pfile->cur_token++;
9ec7291f 974
5fddcffc 975 fresh_line:
adb84b42 976 buffer = pfile->buffer;
bd969772
NB
977 result->flags = buffer->saved_flags;
978 buffer->saved_flags = 0;
5fddcffc 979 update_tokens_line:
1444f2ed 980 result->line = pfile->line;
041c3194 981
5fddcffc 982 skipped_white:
480709cc 983 c = *buffer->cur++;
5fddcffc 984 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
5fddcffc
NB
985
986 trigraph:
0d9f234d 987 switch (c)
45b966db 988 {
4d6baafa
NB
989 case ' ': case '\t': case '\f': case '\v': case '\0':
990 result->flags |= PREV_WHITE;
991 if (skip_whitespace (pfile, c))
992 goto skipped_white;
993
004cb263 994 /* End of buffer. */
4d6baafa 995 buffer->cur--;
004cb263
NB
996 if (continue_after_nul (pfile))
997 goto fresh_line;
0d9f234d 998 result->type = CPP_EOF;
5fddcffc 999 break;
45b966db 1000
0d9f234d 1001 case '\n': case '\r':
87062813 1002 handle_newline (pfile);
bdcbe496
NB
1003 buffer->saved_flags = BOL;
1004 if (! pfile->state.in_directive)
45b966db 1005 {
4ed5bcfb
NB
1006 if (pfile->state.parsing_args == 2)
1007 buffer->saved_flags |= PREV_WHITE;
bdcbe496
NB
1008 if (!pfile->keep_tokens)
1009 {
1010 pfile->cur_run = &pfile->base_run;
1011 result = pfile->base_run.base;
1012 pfile->cur_token = result + 1;
1013 }
1014 goto fresh_line;
45b966db 1015 }
5fddcffc
NB
1016 result->type = CPP_EOF;
1017 break;
46d07497 1018
0d9f234d
NB
1019 case '?':
1020 case '\\':
1021 /* These could start an escaped newline, or '?' a trigraph. Let
1022 skip_escaped_newlines do all the work. */
1023 {
67821e3a 1024 unsigned int line = pfile->line;
0d9f234d 1025
87062813 1026 c = skip_escaped_newlines (pfile);
67821e3a 1027 if (line != pfile->line)
87062813 1028 {
480709cc 1029 buffer->cur--;
87062813
NB
1030 /* We had at least one escaped newline of some sort.
1031 Update the token's line and column. */
5fddcffc 1032 goto update_tokens_line;
87062813 1033 }
480709cc 1034 }
0d9f234d 1035
480709cc
NB
1036 /* We are either the original '?' or '\\', or a trigraph. */
1037 if (c == '?')
0d9f234d 1038 result->type = CPP_QUERY;
480709cc
NB
1039 else if (c == '\\')
1040 goto random_char;
1041 else
1042 goto trigraph;
0d9f234d 1043 break;
46d07497 1044
0d9f234d
NB
1045 case '0': case '1': case '2': case '3': case '4':
1046 case '5': case '6': case '7': case '8': case '9':
1047 result->type = CPP_NUMBER;
10cf9bde 1048 parse_number (pfile, &result->val.str, 0);
0d9f234d 1049 break;
46d07497 1050
0abc6a6a
NB
1051 case 'L':
1052 /* 'L' may introduce wide characters or strings. */
df383483
KH
1053 {
1054 const unsigned char *pos = buffer->cur;
0d9f234d 1055
df383483
KH
1056 c = get_effective_char (pfile);
1057 if (c == '\'' || c == '"')
1058 {
1059 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1060 parse_string (pfile, result, c);
1061 break;
1062 }
1063 buffer->cur = pos;
1064 }
1065 /* Fall through. */
0abc6a6a
NB
1066
1067 start_ident:
0d9f234d
NB
1068 case '_':
1069 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1070 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1071 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1072 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1073 case 'y': case 'z':
1074 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
0abc6a6a 1075 case 'G': case 'H': case 'I': case 'J': case 'K':
0d9f234d
NB
1076 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1077 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1078 case 'Y': case 'Z':
1079 result->type = CPP_NAME;
2c3fcba6 1080 result->val.node = parse_identifier (pfile);
0d9f234d 1081
0d9f234d 1082 /* Convert named operators to their proper types. */
0abc6a6a 1083 if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
1084 {
1085 result->flags |= NAMED_OP;
4977bab6 1086 result->type = result->val.node->directive_index;
0d9f234d
NB
1087 }
1088 break;
1089
1090 case '\'':
1091 case '"':
1092 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
0d9f234d
NB
1093 parse_string (pfile, result, c);
1094 break;
041c3194 1095
0d9f234d 1096 case '/':
1c6d33ef
NB
1097 /* A potential block or line comment. */
1098 comment_start = buffer->cur;
29401c30 1099 c = get_effective_char (pfile);
480709cc 1100
1c6d33ef
NB
1101 if (c == '*')
1102 {
0d9f234d 1103 if (skip_block_comment (pfile))
ebef4e8c 1104 cpp_error (pfile, DL_ERROR, "unterminated comment");
0d9f234d 1105 }
480709cc
NB
1106 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1107 || CPP_IN_SYSTEM_HEADER (pfile)))
0d9f234d 1108 {
bdb05a7b
NB
1109 /* Warn about comments only if pedantically GNUC89, and not
1110 in system headers. */
1111 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 1112 && ! buffer->warned_cplusplus_comments)
041c3194 1113 {
ebef4e8c 1114 cpp_error (pfile, DL_PEDWARN,
56508306 1115 "C++ style comments are not allowed in ISO C90");
ebef4e8c
NB
1116 cpp_error (pfile, DL_PEDWARN,
1117 "(this will be reported only once per input file)");
1c6d33ef
NB
1118 buffer->warned_cplusplus_comments = 1;
1119 }
0d9f234d 1120
01ef6563 1121 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
ebef4e8c 1122 cpp_error (pfile, DL_WARNING, "multi-line comment");
1c6d33ef 1123 }
480709cc
NB
1124 else if (c == '=')
1125 {
1126 result->type = CPP_DIV_EQ;
1127 break;
1128 }
1129 else
1130 {
1131 BACKUP ();
1132 result->type = CPP_DIV;
1133 break;
1134 }
0d9f234d 1135
1c6d33ef
NB
1136 if (!pfile->state.save_comments)
1137 {
1138 result->flags |= PREV_WHITE;
5fddcffc 1139 goto update_tokens_line;
0d9f234d 1140 }
1c6d33ef
NB
1141
1142 /* Save the comment as a token in its own right. */
477cdac7 1143 save_comment (pfile, result, comment_start, c);
bdcbe496 1144 break;
0d9f234d
NB
1145
1146 case '<':
1147 if (pfile->state.angled_headers)
1148 {
1149 result->type = CPP_HEADER_NAME;
480709cc
NB
1150 parse_string (pfile, result, '>');
1151 break;
0d9f234d 1152 }
45b966db 1153
29401c30 1154 c = get_effective_char (pfile);
0d9f234d 1155 if (c == '=')
480709cc 1156 result->type = CPP_LESS_EQ;
0d9f234d 1157 else if (c == '<')
480709cc 1158 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
0d9f234d 1159 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
480709cc 1160 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
0d9f234d
NB
1161 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1162 {
480709cc 1163 result->type = CPP_OPEN_SQUARE;
0d9f234d
NB
1164 result->flags |= DIGRAPH;
1165 }
1166 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1167 {
480709cc 1168 result->type = CPP_OPEN_BRACE;
0d9f234d
NB
1169 result->flags |= DIGRAPH;
1170 }
480709cc
NB
1171 else
1172 {
1173 BACKUP ();
1174 result->type = CPP_LESS;
1175 }
0d9f234d
NB
1176 break;
1177
1178 case '>':
29401c30 1179 c = get_effective_char (pfile);
0d9f234d 1180 if (c == '=')
480709cc 1181 result->type = CPP_GREATER_EQ;
0d9f234d 1182 else if (c == '>')
480709cc 1183 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
0d9f234d 1184 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
480709cc
NB
1185 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1186 else
0d9f234d 1187 {
480709cc
NB
1188 BACKUP ();
1189 result->type = CPP_GREATER;
0d9f234d
NB
1190 }
1191 break;
1192
cbcff6df 1193 case '%':
480709cc
NB
1194 c = get_effective_char (pfile);
1195 if (c == '=')
1196 result->type = CPP_MOD_EQ;
1197 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1198 {
1199 result->flags |= DIGRAPH;
1200 result->type = CPP_HASH;
1201 if (get_effective_char (pfile) == '%')
1202 {
1203 const unsigned char *pos = buffer->cur;
1204
1205 if (get_effective_char (pfile) == ':')
1206 result->type = CPP_PASTE;
1207 else
1208 buffer->cur = pos - 1;
1209 }
1210 else
1211 BACKUP ();
1212 }
1213 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1214 {
1215 result->flags |= DIGRAPH;
1216 result->type = CPP_CLOSE_BRACE;
1217 }
1218 else
1219 {
1220 BACKUP ();
1221 result->type = CPP_MOD;
1222 }
0d9f234d
NB
1223 break;
1224
cbcff6df 1225 case '.':
480709cc
NB
1226 result->type = CPP_DOT;
1227 c = get_effective_char (pfile);
1228 if (c == '.')
1229 {
1230 const unsigned char *pos = buffer->cur;
1231
1232 if (get_effective_char (pfile) == '.')
1233 result->type = CPP_ELLIPSIS;
1234 else
1235 buffer->cur = pos - 1;
1236 }
1237 /* All known character sets have 0...9 contiguous. */
0df6c2c7 1238 else if (ISDIGIT (c))
480709cc
NB
1239 {
1240 result->type = CPP_NUMBER;
10cf9bde 1241 parse_number (pfile, &result->val.str, 1);
480709cc
NB
1242 }
1243 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1244 result->type = CPP_DOT_STAR;
1245 else
1246 BACKUP ();
0d9f234d 1247 break;
45b966db 1248
0d9f234d 1249 case '+':
29401c30 1250 c = get_effective_char (pfile);
480709cc
NB
1251 if (c == '+')
1252 result->type = CPP_PLUS_PLUS;
1253 else if (c == '=')
1254 result->type = CPP_PLUS_EQ;
1255 else
1256 {
1257 BACKUP ();
1258 result->type = CPP_PLUS;
1259 }
0d9f234d 1260 break;
04e3ec78 1261
0d9f234d 1262 case '-':
29401c30 1263 c = get_effective_char (pfile);
0d9f234d
NB
1264 if (c == '>')
1265 {
480709cc
NB
1266 result->type = CPP_DEREF;
1267 if (CPP_OPTION (pfile, cplusplus))
1268 {
1269 if (get_effective_char (pfile) == '*')
1270 result->type = CPP_DEREF_STAR;
1271 else
1272 BACKUP ();
1273 }
0d9f234d 1274 }
0d9f234d 1275 else if (c == '-')
480709cc
NB
1276 result->type = CPP_MINUS_MINUS;
1277 else if (c == '=')
1278 result->type = CPP_MINUS_EQ;
1279 else
1280 {
1281 BACKUP ();
1282 result->type = CPP_MINUS;
1283 }
0d9f234d 1284 break;
45b966db 1285
0d9f234d 1286 case '&':
29401c30 1287 c = get_effective_char (pfile);
480709cc
NB
1288 if (c == '&')
1289 result->type = CPP_AND_AND;
1290 else if (c == '=')
1291 result->type = CPP_AND_EQ;
1292 else
1293 {
1294 BACKUP ();
1295 result->type = CPP_AND;
1296 }
0d9f234d 1297 break;
df383483 1298
0d9f234d 1299 case '|':
29401c30 1300 c = get_effective_char (pfile);
480709cc
NB
1301 if (c == '|')
1302 result->type = CPP_OR_OR;
1303 else if (c == '=')
1304 result->type = CPP_OR_EQ;
1305 else
1306 {
1307 BACKUP ();
1308 result->type = CPP_OR;
1309 }
0d9f234d 1310 break;
45b966db 1311
0d9f234d 1312 case ':':
29401c30 1313 c = get_effective_char (pfile);
0d9f234d 1314 if (c == ':' && CPP_OPTION (pfile, cplusplus))
480709cc 1315 result->type = CPP_SCOPE;
0d9f234d
NB
1316 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1317 {
1318 result->flags |= DIGRAPH;
480709cc
NB
1319 result->type = CPP_CLOSE_SQUARE;
1320 }
1321 else
1322 {
1323 BACKUP ();
1324 result->type = CPP_COLON;
0d9f234d
NB
1325 }
1326 break;
45b966db 1327
480709cc
NB
1328 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1329 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1330 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1331 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1332 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1333
0d9f234d
NB
1334 case '~': result->type = CPP_COMPL; break;
1335 case ',': result->type = CPP_COMMA; break;
1336 case '(': result->type = CPP_OPEN_PAREN; break;
1337 case ')': result->type = CPP_CLOSE_PAREN; break;
1338 case '[': result->type = CPP_OPEN_SQUARE; break;
1339 case ']': result->type = CPP_CLOSE_SQUARE; break;
1340 case '{': result->type = CPP_OPEN_BRACE; break;
1341 case '}': result->type = CPP_CLOSE_BRACE; break;
1342 case ';': result->type = CPP_SEMICOLON; break;
1343
40f03658 1344 /* @ is a punctuator in Objective-C. */
cc937581 1345 case '@': result->type = CPP_ATSIGN; break;
0d9f234d 1346
0abc6a6a
NB
1347 case '$':
1348 if (CPP_OPTION (pfile, dollars_in_ident))
1349 goto start_ident;
1350 /* Fall through... */
1351
0d9f234d
NB
1352 random_char:
1353 default:
1354 result->type = CPP_OTHER;
6c53ebff 1355 result->val.c = c;
0d9f234d
NB
1356 break;
1357 }
bdcbe496
NB
1358
1359 return result;
0d9f234d
NB
1360}
1361
5d8ebbd8 1362/* An upper bound on the number of bytes needed to spell TOKEN,
93c80368
NB
1363 including preceding whitespace. */
1364unsigned int
1365cpp_token_len (token)
1366 const cpp_token *token;
0d9f234d 1367{
93c80368 1368 unsigned int len;
6d2c2047 1369
93c80368 1370 switch (TOKEN_SPELL (token))
041c3194 1371 {
a28c5035 1372 default: len = 0; break;
47ad4138 1373 case SPELL_NUMBER:
a28c5035
NB
1374 case SPELL_STRING: len = token->val.str.len; break;
1375 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1376 }
47ad4138 1377 /* 1 for whitespace, 4 for comment delimiters. */
93c80368 1378 return len + 5;
6d2c2047
ZW
1379}
1380
041c3194 1381/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1382 already contain the enough space to hold the token's spelling.
1383 Returns a pointer to the character after the last character
1384 written. */
93c80368
NB
1385unsigned char *
1386cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1387 cpp_reader *pfile; /* Would be nice to be rid of this... */
1388 const cpp_token *token;
1389 unsigned char *buffer;
1390{
96be6998 1391 switch (TOKEN_SPELL (token))
041c3194
ZW
1392 {
1393 case SPELL_OPERATOR:
1394 {
1395 const unsigned char *spelling;
1396 unsigned char c;
d6d5f795 1397
041c3194 1398 if (token->flags & DIGRAPH)
37b8524c
JDA
1399 spelling
1400 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1401 else if (token->flags & NAMED_OP)
1402 goto spell_ident;
041c3194 1403 else
96be6998 1404 spelling = TOKEN_NAME (token);
df383483 1405
041c3194
ZW
1406 while ((c = *spelling++) != '\0')
1407 *buffer++ = c;
1408 }
1409 break;
d6d5f795 1410
47ad4138
ZW
1411 case SPELL_CHAR:
1412 *buffer++ = token->val.c;
1413 break;
1414
1415 spell_ident:
041c3194 1416 case SPELL_IDENT:
a28c5035
NB
1417 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1418 buffer += NODE_LEN (token->val.node);
041c3194 1419 break;
d6d5f795 1420
47ad4138
ZW
1421 case SPELL_NUMBER:
1422 memcpy (buffer, token->val.str.text, token->val.str.len);
1423 buffer += token->val.str.len;
1424 break;
1425
041c3194
ZW
1426 case SPELL_STRING:
1427 {
ba89d661
ZW
1428 int left, right, tag;
1429 switch (token->type)
1430 {
1431 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1432 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
ba89d661
ZW
1433 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1434 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1435 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138 1436 default:
ebef4e8c
NB
1437 cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1438 TOKEN_NAME (token));
47ad4138 1439 return buffer;
ba89d661
ZW
1440 }
1441 if (tag) *buffer++ = tag;
47ad4138 1442 *buffer++ = left;
bfb9dc7f
ZW
1443 memcpy (buffer, token->val.str.text, token->val.str.len);
1444 buffer += token->val.str.len;
47ad4138 1445 *buffer++ = right;
041c3194
ZW
1446 }
1447 break;
d6d5f795 1448
041c3194 1449 case SPELL_NONE:
ebef4e8c 1450 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1451 break;
1452 }
d6d5f795 1453
041c3194
ZW
1454 return buffer;
1455}
d6d5f795 1456
5d8ebbd8
NB
1457/* Returns TOKEN spelt as a null-terminated string. The string is
1458 freed when the reader is destroyed. Useful for diagnostics. */
93c80368
NB
1459unsigned char *
1460cpp_token_as_text (pfile, token)
c5a04734 1461 cpp_reader *pfile;
041c3194 1462 const cpp_token *token;
c5a04734 1463{
93c80368 1464 unsigned int len = cpp_token_len (token);
ece54d54 1465 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 1466
93c80368
NB
1467 end = cpp_spell_token (pfile, token, start);
1468 end[0] = '\0';
c5a04734 1469
93c80368
NB
1470 return start;
1471}
c5a04734 1472
5d8ebbd8
NB
1473/* Used by C front ends, which really should move to using
1474 cpp_token_as_text. */
93c80368
NB
1475const char *
1476cpp_type2name (type)
1477 enum cpp_ttype type;
1478{
1479 return (const char *) token_spellings[type].name;
1480}
c5a04734 1481
4ed5bcfb
NB
1482/* Writes the spelling of token to FP, without any preceding space.
1483 Separated from cpp_spell_token for efficiency - to avoid stdio
1484 double-buffering. */
93c80368
NB
1485void
1486cpp_output_token (token, fp)
1487 const cpp_token *token;
1488 FILE *fp;
1489{
93c80368 1490 switch (TOKEN_SPELL (token))
c5a04734 1491 {
93c80368
NB
1492 case SPELL_OPERATOR:
1493 {
1494 const unsigned char *spelling;
3b681e9d 1495 int c;
c5a04734 1496
93c80368 1497 if (token->flags & DIGRAPH)
37b8524c
JDA
1498 spelling
1499 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1500 else if (token->flags & NAMED_OP)
1501 goto spell_ident;
1502 else
1503 spelling = TOKEN_NAME (token);
041c3194 1504
3b681e9d
ZW
1505 c = *spelling;
1506 do
1507 putc (c, fp);
1508 while ((c = *++spelling) != '\0');
93c80368
NB
1509 }
1510 break;
041c3194 1511
47ad4138
ZW
1512 case SPELL_CHAR:
1513 putc (token->val.c, fp);
1514 break;
1515
93c80368
NB
1516 spell_ident:
1517 case SPELL_IDENT:
3b681e9d 1518 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
93c80368 1519 break;
041c3194 1520
47ad4138
ZW
1521 case SPELL_NUMBER:
1522 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1523 break;
1524
93c80368
NB
1525 case SPELL_STRING:
1526 {
1527 int left, right, tag;
1528 switch (token->type)
1529 {
1530 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1531 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
93c80368
NB
1532 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1533 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1534 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138
ZW
1535 default:
1536 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1537 return;
93c80368
NB
1538 }
1539 if (tag) putc (tag, fp);
47ad4138 1540 putc (left, fp);
93c80368 1541 fwrite (token->val.str.text, 1, token->val.str.len, fp);
47ad4138 1542 putc (right, fp);
93c80368
NB
1543 }
1544 break;
c5a04734 1545
93c80368
NB
1546 case SPELL_NONE:
1547 /* An error, most probably. */
1548 break;
041c3194 1549 }
c5a04734
ZW
1550}
1551
93c80368
NB
1552/* Compare two tokens. */
1553int
1554_cpp_equiv_tokens (a, b)
1555 const cpp_token *a, *b;
c5a04734 1556{
93c80368
NB
1557 if (a->type == b->type && a->flags == b->flags)
1558 switch (TOKEN_SPELL (a))
1559 {
1560 default: /* Keep compiler happy. */
1561 case SPELL_OPERATOR:
1562 return 1;
1563 case SPELL_CHAR:
6c53ebff 1564 return a->val.c == b->val.c; /* Character. */
93c80368 1565 case SPELL_NONE:
56051c0a 1566 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1567 case SPELL_IDENT:
1568 return a->val.node == b->val.node;
47ad4138 1569 case SPELL_NUMBER:
93c80368
NB
1570 case SPELL_STRING:
1571 return (a->val.str.len == b->val.str.len
1572 && !memcmp (a->val.str.text, b->val.str.text,
1573 a->val.str.len));
1574 }
c5a04734 1575
041c3194
ZW
1576 return 0;
1577}
1578
93c80368
NB
1579/* Returns nonzero if a space should be inserted to avoid an
1580 accidental token paste for output. For simplicity, it is
1581 conservative, and occasionally advises a space where one is not
1582 needed, e.g. "." and ".2". */
93c80368
NB
1583int
1584cpp_avoid_paste (pfile, token1, token2)
c5a04734 1585 cpp_reader *pfile;
93c80368 1586 const cpp_token *token1, *token2;
c5a04734 1587{
93c80368
NB
1588 enum cpp_ttype a = token1->type, b = token2->type;
1589 cppchar_t c;
c5a04734 1590
93c80368
NB
1591 if (token1->flags & NAMED_OP)
1592 a = CPP_NAME;
1593 if (token2->flags & NAMED_OP)
1594 b = CPP_NAME;
c5a04734 1595
93c80368
NB
1596 c = EOF;
1597 if (token2->flags & DIGRAPH)
37b8524c 1598 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1599 else if (token_spellings[b].category == SPELL_OPERATOR)
1600 c = token_spellings[b].name[0];
c5a04734 1601
93c80368 1602 /* Quickly get everything that can paste with an '='. */
37b8524c 1603 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1604 return 1;
c5a04734 1605
93c80368 1606 switch (a)
c5a04734 1607 {
93c80368
NB
1608 case CPP_GREATER: return c == '>' || c == '?';
1609 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1610 case CPP_PLUS: return c == '+';
1611 case CPP_MINUS: return c == '-' || c == '>';
1612 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1613 case CPP_MOD: return c == ':' || c == '>';
1614 case CPP_AND: return c == '&';
1615 case CPP_OR: return c == '|';
1616 case CPP_COLON: return c == ':' || c == '>';
1617 case CPP_DEREF: return c == '*';
26ec42ee 1618 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1619 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1620 case CPP_NAME: return ((b == CPP_NUMBER
1621 && name_p (pfile, &token2->val.str))
1622 || b == CPP_NAME
1623 || b == CPP_CHAR || b == CPP_STRING); /* L */
1624 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1625 || c == '.' || c == '+' || c == '-');
1626 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1627 && token1->val.c == '@'
93c80368
NB
1628 && (b == CPP_NAME || b == CPP_STRING));
1629 default: break;
c5a04734 1630 }
c5a04734 1631
417f3e3a 1632 return 0;
c5a04734
ZW
1633}
1634
93c80368 1635/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1636 character, to FP. Leading whitespace is removed. If there are
1637 macros, special token padding is not performed. */
c5a04734 1638void
93c80368 1639cpp_output_line (pfile, fp)
c5a04734 1640 cpp_reader *pfile;
93c80368 1641 FILE *fp;
c5a04734 1642{
4ed5bcfb 1643 const cpp_token *token;
96be6998 1644
4ed5bcfb
NB
1645 token = cpp_get_token (pfile);
1646 while (token->type != CPP_EOF)
96be6998 1647 {
4ed5bcfb
NB
1648 cpp_output_token (token, fp);
1649 token = cpp_get_token (pfile);
1650 if (token->flags & PREV_WHITE)
1651 putc (' ', fp);
96be6998
ZW
1652 }
1653
93c80368 1654 putc ('\n', fp);
041c3194 1655}
c5a04734 1656
c8a96070
NB
1657/* Returns the value of a hexadecimal digit. */
1658static unsigned int
1659hex_digit_value (c)
1660 unsigned int c;
1661{
9e1ac915
KG
1662 if (hex_p (c))
1663 return hex_value (c);
1664 else
1665 abort ();
c8a96070
NB
1666}
1667
62729350
NB
1668/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1669 failure if cpplib is not parsing C++ or C99. Such failure is
1670 silent, and no variables are updated. Otherwise returns 0, and
1671 warns if -Wtraditional.
c8a96070
NB
1672
1673 [lex.charset]: The character designated by the universal character
1674 name \UNNNNNNNN is that character whose character short name in
1675 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1676 universal character name \uNNNN is that character whose character
1677 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1678 for a universal character name is less than 0x20 or in the range
1679 0x7F-0x9F (inclusive), or if the universal character name
1680 designates a character in the basic source character set, then the
1681 program is ill-formed.
1682
1683 We assume that wchar_t is Unicode, so we don't need to do any
62729350 1684 mapping. Is this ever wrong?
c8a96070 1685
62729350
NB
1686 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1687 LIMIT is the end of the string or charconst. PSTR is updated to
1688 point after the UCS on return, and the UCS is written into PC. */
1689
1690static int
1691maybe_read_ucs (pfile, pstr, limit, pc)
c8a96070
NB
1692 cpp_reader *pfile;
1693 const unsigned char **pstr;
1694 const unsigned char *limit;
625458d0 1695 cppchar_t *pc;
c8a96070
NB
1696{
1697 const unsigned char *p = *pstr;
62729350
NB
1698 unsigned int code = 0;
1699 unsigned int c = *pc, length;
1700
1701 /* Only attempt to interpret a UCS for C++ and C99. */
1702 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1703 return 1;
c8a96070 1704
62729350 1705 if (CPP_WTRADITIONAL (pfile))
ebef4e8c
NB
1706 cpp_error (pfile, DL_WARNING,
1707 "the meaning of '\\%c' is different in traditional C", c);
c8a96070 1708
f8710242
NB
1709 length = (c == 'u' ? 4: 8);
1710
1711 if ((size_t) (limit - p) < length)
1712 {
ebef4e8c 1713 cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
f8710242
NB
1714 /* Skip to the end to avoid more diagnostics. */
1715 p = limit;
1716 }
1717 else
1718 {
1719 for (; length; length--, p++)
c8a96070 1720 {
f8710242
NB
1721 c = *p;
1722 if (ISXDIGIT (c))
1723 code = (code << 4) + hex_digit_value (c);
1724 else
1725 {
ebef4e8c 1726 cpp_error (pfile, DL_ERROR,
f8710242
NB
1727 "non-hex digit '%c' in universal-character-name", c);
1728 /* We shouldn't skip in case there are multibyte chars. */
1729 break;
1730 }
c8a96070 1731 }
c8a96070
NB
1732 }
1733
783e2989
NB
1734 if (CPP_OPTION (pfile, EBCDIC))
1735 {
1736 cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1737 code = 0x3f; /* EBCDIC invalid character */
1738 }
1739 /* True extended characters are OK. */
1740 else if (code >= 0xa0
1741 && !(code & 0x80000000)
1742 && !(code >= 0xD800 && code <= 0xDFFF))
f8710242
NB
1743 ;
1744 /* The standard permits $, @ and ` to be specified as UCNs. We use
1745 hex escapes so that this also works with EBCDIC hosts. */
1746 else if (code == 0x24 || code == 0x40 || code == 0x60)
1747 ;
1748 /* Don't give another error if one occurred above. */
1749 else if (length == 0)
ebef4e8c 1750 cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
c8a96070
NB
1751
1752 *pstr = p;
62729350
NB
1753 *pc = code;
1754 return 0;
c8a96070
NB
1755}
1756
4268e8bb
NB
1757/* Returns the value of an escape sequence, truncated to the correct
1758 target precision. PSTR points to the input pointer, which is just
1759 after the backslash. LIMIT is how much text we have. WIDE is true
1760 if the escape sequence is part of a wide character constant or
1761 string literal. Handles all relevant diagnostics. */
1762cppchar_t
1763cpp_parse_escape (pfile, pstr, limit, wide)
c8a96070
NB
1764 cpp_reader *pfile;
1765 const unsigned char **pstr;
1766 const unsigned char *limit;
4268e8bb 1767 int wide;
c8a96070 1768{
783e2989
NB
1769 /* Values of \a \b \e \f \n \r \t \v respectively. */
1770 static const uchar ascii[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
1771 static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
1772
c8a96070 1773 int unknown = 0;
783e2989 1774 const unsigned char *str = *pstr, *charconsts;
4268e8bb
NB
1775 cppchar_t c, mask;
1776 unsigned int width;
1777
783e2989
NB
1778 if (CPP_OPTION (pfile, EBCDIC))
1779 charconsts = ebcdic;
1780 else
1781 charconsts = ascii;
1782
4268e8bb
NB
1783 if (wide)
1784 width = CPP_OPTION (pfile, wchar_precision);
1785 else
1786 width = CPP_OPTION (pfile, char_precision);
1787 if (width < BITS_PER_CPPCHAR_T)
1788 mask = ((cppchar_t) 1 << width) - 1;
1789 else
1790 mask = ~0;
c8a96070 1791
4268e8bb 1792 c = *str++;
c8a96070
NB
1793 switch (c)
1794 {
1795 case '\\': case '\'': case '"': case '?': break;
783e2989
NB
1796 case 'b': c = charconsts[1]; break;
1797 case 'f': c = charconsts[3]; break;
1798 case 'n': c = charconsts[4]; break;
1799 case 'r': c = charconsts[5]; break;
1800 case 't': c = charconsts[6]; break;
1801 case 'v': c = charconsts[7]; break;
c8a96070
NB
1802
1803 case '(': case '{': case '[': case '%':
1804 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1805 '\%' is used to prevent SCCS from getting confused. */
1806 unknown = CPP_PEDANTIC (pfile);
1807 break;
1808
1809 case 'a':
1810 if (CPP_WTRADITIONAL (pfile))
ebef4e8c
NB
1811 cpp_error (pfile, DL_WARNING,
1812 "the meaning of '\\a' is different in traditional C");
783e2989 1813 c = charconsts[0];
c8a96070
NB
1814 break;
1815
1816 case 'e': case 'E':
1817 if (CPP_PEDANTIC (pfile))
ebef4e8c 1818 cpp_error (pfile, DL_PEDWARN,
625458d0 1819 "non-ISO-standard escape sequence, '\\%c'", (int) c);
783e2989 1820 c = charconsts[2];
c8a96070 1821 break;
df383483 1822
c8a96070 1823 case 'u': case 'U':
62729350 1824 unknown = maybe_read_ucs (pfile, &str, limit, &c);
c8a96070
NB
1825 break;
1826
1827 case 'x':
1828 if (CPP_WTRADITIONAL (pfile))
ebef4e8c
NB
1829 cpp_error (pfile, DL_WARNING,
1830 "the meaning of '\\x' is different in traditional C");
c8a96070 1831
df383483
KH
1832 {
1833 cppchar_t i = 0, overflow = 0;
1834 int digits_found = 0;
c8a96070 1835
df383483
KH
1836 while (str < limit)
1837 {
1838 c = *str;
1839 if (! ISXDIGIT (c))
1840 break;
1841 str++;
1842 overflow |= i ^ (i << 4 >> 4);
1843 i = (i << 4) + hex_digit_value (c);
1844 digits_found = 1;
1845 }
c8a96070 1846
df383483
KH
1847 if (!digits_found)
1848 cpp_error (pfile, DL_ERROR,
ebef4e8c 1849 "\\x used with no following hex digits");
c8a96070 1850
df383483
KH
1851 if (overflow | (i != (i & mask)))
1852 {
1853 cpp_error (pfile, DL_PEDWARN,
1854 "hex escape sequence out of range");
1855 i &= mask;
1856 }
1857 c = i;
1858 }
c8a96070
NB
1859 break;
1860
1861 case '0': case '1': case '2': case '3':
1862 case '4': case '5': case '6': case '7':
1863 {
4268e8bb
NB
1864 size_t count = 0;
1865 cppchar_t i = c - '0';
c8a96070
NB
1866
1867 while (str < limit && ++count < 3)
1868 {
1869 c = *str;
1870 if (c < '0' || c > '7')
1871 break;
1872 str++;
1873 i = (i << 3) + c - '0';
1874 }
1875
1876 if (i != (i & mask))
1877 {
ebef4e8c
NB
1878 cpp_error (pfile, DL_PEDWARN,
1879 "octal escape sequence out of range");
c8a96070
NB
1880 i &= mask;
1881 }
1882 c = i;
1883 }
1884 break;
1885
1886 default:
1887 unknown = 1;
1888 break;
1889 }
1890
1891 if (unknown)
1892 {
1893 if (ISGRAPH (c))
625458d0
NB
1894 cpp_error (pfile, DL_PEDWARN,
1895 "unknown escape sequence '\\%c'", (int) c);
c8a96070 1896 else
625458d0
NB
1897 cpp_error (pfile, DL_PEDWARN,
1898 "unknown escape sequence: '\\%03o'", (int) c);
c8a96070
NB
1899 }
1900
62729350 1901 if (c > mask)
4268e8bb 1902 {
639e8b0c 1903 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
4268e8bb
NB
1904 c &= mask;
1905 }
62729350 1906
c8a96070
NB
1907 *pstr = str;
1908 return c;
1909}
1910
c8a96070 1911/* Interpret a (possibly wide) character constant in TOKEN.
4268e8bb
NB
1912 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1913 points to a variable that is filled in with the number of
1914 characters seen, and UNSIGNEDP to a variable that indicates whether
1915 the result has signed type. */
1916cppchar_t
a5a49440 1917cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
c8a96070
NB
1918 cpp_reader *pfile;
1919 const cpp_token *token;
c8a96070 1920 unsigned int *pchars_seen;
4268e8bb 1921 int *unsignedp;
c8a96070
NB
1922{
1923 const unsigned char *str = token->val.str.text;
1924 const unsigned char *limit = str + token->val.str.len;
1925 unsigned int chars_seen = 0;
639e8b0c 1926 size_t width, max_chars;
4268e8bb 1927 cppchar_t c, mask, result = 0;
a47ed310 1928 bool unsigned_p;
c8a96070
NB
1929
1930#ifdef MULTIBYTE_CHARS
1931 (void) local_mbtowc (NULL, NULL, 0);
1932#endif
1933
1934 /* Width in bits. */
1935 if (token->type == CPP_CHAR)
a47ed310 1936 {
4268e8bb 1937 width = CPP_OPTION (pfile, char_precision);
2443d4e1 1938 max_chars = CPP_OPTION (pfile, int_precision) / width;
44a147ad 1939 unsigned_p = CPP_OPTION (pfile, unsigned_char);
a47ed310 1940 }
c8a96070 1941 else
a47ed310 1942 {
4268e8bb 1943 width = CPP_OPTION (pfile, wchar_precision);
2443d4e1 1944 max_chars = 1;
44a147ad 1945 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
a47ed310 1946 }
c8a96070 1947
4268e8bb
NB
1948 if (width < BITS_PER_CPPCHAR_T)
1949 mask = ((cppchar_t) 1 << width) - 1;
c8a96070
NB
1950 else
1951 mask = ~0;
c8a96070
NB
1952
1953 while (str < limit)
1954 {
1955#ifdef MULTIBYTE_CHARS
1956 wchar_t wc;
1957 int char_len;
1958
fc69c47c 1959 char_len = local_mbtowc (&wc, (const char *)str, limit - str);
c8a96070
NB
1960 if (char_len == -1)
1961 {
ebef4e8c
NB
1962 cpp_error (pfile, DL_WARNING,
1963 "ignoring invalid multibyte character");
c8a96070
NB
1964 c = *str++;
1965 }
1966 else
1967 {
1968 str += char_len;
1969 c = wc;
1970 }
1971#else
1972 c = *str++;
1973#endif
1974
1975 if (c == '\\')
4268e8bb 1976 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
c8a96070
NB
1977
1978#ifdef MAP_CHARACTER
1979 if (ISPRINT (c))
1980 c = MAP_CHARACTER (c);
1981#endif
df383483 1982
639e8b0c
NB
1983 chars_seen++;
1984
a5a49440
NB
1985 /* Truncate the character, scale the result and merge the two. */
1986 c &= mask;
639e8b0c 1987 if (width < BITS_PER_CPPCHAR_T)
a5a49440 1988 result = (result << width) | c;
639e8b0c
NB
1989 else
1990 result = c;
c8a96070
NB
1991 }
1992
1993 if (chars_seen == 0)
ebef4e8c 1994 cpp_error (pfile, DL_ERROR, "empty character constant");
639e8b0c 1995 else if (chars_seen > 1)
c8a96070 1996 {
639e8b0c
NB
1997 /* Multichar charconsts are of type int and therefore signed. */
1998 unsigned_p = 0;
a5a49440 1999
639e8b0c
NB
2000 if (chars_seen > max_chars)
2001 {
2002 chars_seen = max_chars;
2003 cpp_error (pfile, DL_WARNING,
2004 "character constant too long for its type");
2005 }
a5a49440 2006 else if (CPP_OPTION (pfile, warn_multichar))
639e8b0c 2007 cpp_error (pfile, DL_WARNING, "multi-character character constant");
c8a96070
NB
2008 }
2009
b9e2d17b
NB
2010 /* Sign-extend or truncate the constant to cppchar_t. The value is
2011 in WIDTH bits, but for multi-char charconsts it's value is the
2012 full target type's width. */
2013 if (chars_seen > 1)
2014 width *= max_chars;
2015 if (width < BITS_PER_CPPCHAR_T)
a5a49440 2016 {
b9e2d17b
NB
2017 mask = ((cppchar_t) 1 << width) - 1;
2018 if (unsigned_p || !(result & (1 << (width - 1))))
2019 result &= mask;
2020 else
2021 result |= ~mask;
a5a49440
NB
2022 }
2023
c8a96070 2024 *pchars_seen = chars_seen;
4268e8bb 2025 *unsignedp = unsigned_p;
c8a96070
NB
2026 return result;
2027}
2028
1e013d2e
NB
2029/* Memory buffers. Changing these three constants can have a dramatic
2030 effect on performance. The values here are reasonable defaults,
2031 but might be tuned. If you adjust them, be sure to test across a
2032 range of uses of cpplib, including heavy nested function-like macro
2033 expansion. Also check the change in peak memory usage (NJAMD is a
2034 good tool for this). */
2035#define MIN_BUFF_SIZE 8000
87062813 2036#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1e013d2e
NB
2037#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2038 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 2039
87062813
NB
2040#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2041 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2042#endif
2043
c9e7a609
NB
2044/* Create a new allocation buffer. Place the control block at the end
2045 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5
NB
2046static _cpp_buff *
2047new_buff (len)
6142088c 2048 size_t len;
b8af0ca5
NB
2049{
2050 _cpp_buff *result;
ece54d54 2051 unsigned char *base;
b8af0ca5 2052
1e013d2e
NB
2053 if (len < MIN_BUFF_SIZE)
2054 len = MIN_BUFF_SIZE;
c70f6ed3 2055 len = CPP_ALIGN (len);
b8af0ca5
NB
2056
2057 base = xmalloc (len + sizeof (_cpp_buff));
2058 result = (_cpp_buff *) (base + len);
2059 result->base = base;
2060 result->cur = base;
2061 result->limit = base + len;
2062 result->next = NULL;
2063 return result;
2064}
2065
2066/* Place a chain of unwanted allocation buffers on the free list. */
2067void
2068_cpp_release_buff (pfile, buff)
2069 cpp_reader *pfile;
2070 _cpp_buff *buff;
2071{
2072 _cpp_buff *end = buff;
2073
2074 while (end->next)
2075 end = end->next;
2076 end->next = pfile->free_buffs;
2077 pfile->free_buffs = buff;
2078}
2079
2080/* Return a free buffer of size at least MIN_SIZE. */
2081_cpp_buff *
2082_cpp_get_buff (pfile, min_size)
2083 cpp_reader *pfile;
6142088c 2084 size_t min_size;
b8af0ca5
NB
2085{
2086 _cpp_buff *result, **p;
2087
2088 for (p = &pfile->free_buffs;; p = &(*p)->next)
2089 {
6142088c 2090 size_t size;
1e013d2e
NB
2091
2092 if (*p == NULL)
b8af0ca5 2093 return new_buff (min_size);
1e013d2e
NB
2094 result = *p;
2095 size = result->limit - result->base;
2096 /* Return a buffer that's big enough, but don't waste one that's
2097 way too big. */
34f5271d 2098 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
2099 break;
2100 }
2101
2102 *p = result->next;
2103 result->next = NULL;
2104 result->cur = result->base;
2105 return result;
2106}
2107
4fe9b91c 2108/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2109 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2110 the excess bytes to the new buffer. Chains the new buffer after
2111 BUFF, and returns the new buffer. */
b8af0ca5 2112_cpp_buff *
8c3b2693 2113_cpp_append_extend_buff (pfile, buff, min_extra)
b8af0ca5
NB
2114 cpp_reader *pfile;
2115 _cpp_buff *buff;
6142088c 2116 size_t min_extra;
b8af0ca5 2117{
6142088c 2118 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 2119 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 2120
8c3b2693
NB
2121 buff->next = new_buff;
2122 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2123 return new_buff;
2124}
2125
4fe9b91c 2126/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2127 remaining bytes of the buffer pointed to by BUFF, and at least
2128 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2129 Chains the new buffer before the buffer pointed to by BUFF, and
2130 updates the pointer to point to the new buffer. */
2131void
2132_cpp_extend_buff (pfile, pbuff, min_extra)
2133 cpp_reader *pfile;
2134 _cpp_buff **pbuff;
2135 size_t min_extra;
2136{
2137 _cpp_buff *new_buff, *old_buff = *pbuff;
2138 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2139
2140 new_buff = _cpp_get_buff (pfile, size);
2141 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2142 new_buff->next = old_buff;
2143 *pbuff = new_buff;
b8af0ca5
NB
2144}
2145
2146/* Free a chain of buffers starting at BUFF. */
2147void
2148_cpp_free_buff (buff)
2149 _cpp_buff *buff;
2150{
2151 _cpp_buff *next;
2152
2153 for (; buff; buff = next)
2154 {
2155 next = buff->next;
2156 free (buff->base);
2157 }
2158}
417f3e3a 2159
ece54d54
NB
2160/* Allocate permanent, unaligned storage of length LEN. */
2161unsigned char *
2162_cpp_unaligned_alloc (pfile, len)
2163 cpp_reader *pfile;
2164 size_t len;
2165{
2166 _cpp_buff *buff = pfile->u_buff;
2167 unsigned char *result = buff->cur;
2168
2169 if (len > (size_t) (buff->limit - result))
2170 {
2171 buff = _cpp_get_buff (pfile, len);
2172 buff->next = pfile->u_buff;
2173 pfile->u_buff = buff;
2174 result = buff->cur;
2175 }
2176
2177 buff->cur = result + len;
2178 return result;
2179}
2180
87062813
NB
2181/* Allocate permanent, unaligned storage of length LEN from a_buff.
2182 That buffer is used for growing allocations when saving macro
2183 replacement lists in a #define, and when parsing an answer to an
2184 assertion in #assert, #unassert or #if (and therefore possibly
2185 whilst expanding macros). It therefore must not be used by any
2186 code that they might call: specifically the lexer and the guts of
2187 the macro expander.
2188
2189 All existing other uses clearly fit this restriction: storing
2190 registered pragmas during initialization. */
93c80368 2191unsigned char *
8c3b2693
NB
2192_cpp_aligned_alloc (pfile, len)
2193 cpp_reader *pfile;
2194 size_t len;
3fef5b2b 2195{
8c3b2693
NB
2196 _cpp_buff *buff = pfile->a_buff;
2197 unsigned char *result = buff->cur;
3fef5b2b 2198
8c3b2693 2199 if (len > (size_t) (buff->limit - result))
3fef5b2b 2200 {
8c3b2693
NB
2201 buff = _cpp_get_buff (pfile, len);
2202 buff->next = pfile->a_buff;
2203 pfile->a_buff = buff;
2204 result = buff->cur;
3fef5b2b 2205 }
041c3194 2206
8c3b2693 2207 buff->cur = result + len;
93c80368 2208 return result;
041c3194 2209}