]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
c-common.c (c_common_init_options): Use C89 for Objective-C, and set the options...
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
45b966db 1/* CPP Library - lexical analysis.
5d8ebbd8 2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
45b966db
ZW
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#include "config.h"
24#include "system.h"
45b966db
ZW
25#include "cpplib.h"
26#include "cpphash.h"
27
c8a96070
NB
28#ifdef MULTIBYTE_CHARS
29#include "mbchar.h"
30#include <locale.h>
31#endif
32
93c80368
NB
33/* Tokens with SPELL_STRING store their spelling in the token list,
34 and it's length in the token->val.name.len. */
35enum spell_type
f9a0e96c 36{
93c80368
NB
37 SPELL_OPERATOR = 0,
38 SPELL_CHAR,
39 SPELL_IDENT,
47ad4138 40 SPELL_NUMBER,
93c80368
NB
41 SPELL_STRING,
42 SPELL_NONE
f9a0e96c
ZW
43};
44
93c80368 45struct token_spelling
f9a0e96c 46{
93c80368
NB
47 enum spell_type category;
48 const unsigned char *name;
f9a0e96c
ZW
49};
50
8206c799
ZW
51static const unsigned char *const digraph_spellings[] =
52{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
93c80368
NB
53
54#define OP(e, s) { SPELL_OPERATOR, U s },
55#define TK(e, s) { s, U STRINGX (e) },
8206c799 56static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
57#undef OP
58#undef TK
59
60#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
61#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
480709cc 62#define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
f2d5f0cc 63
87062813
NB
64static void handle_newline PARAMS ((cpp_reader *));
65static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
29401c30 66static cppchar_t get_effective_char PARAMS ((cpp_reader *));
0d9f234d 67
041c3194 68static int skip_block_comment PARAMS ((cpp_reader *));
cbcff6df 69static int skip_line_comment PARAMS ((cpp_reader *));
0d9f234d 70static void adjust_column PARAMS ((cpp_reader *));
4d6baafa 71static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
2c3fcba6 72static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
562a5c27 73static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
10cf9bde
NB
74 unsigned int *));
75static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
562a5c27 76static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
0d9f234d 77static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
87062813 78static bool trigraph_p PARAMS ((cpp_reader *));
562a5c27 79static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
477cdac7 80 cppchar_t));
93c80368 81static int name_p PARAMS ((cpp_reader *, const cpp_string *));
62729350 82static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
625458d0 83 const unsigned char *, cppchar_t *));
5fddcffc 84static tokenrun *next_tokenrun PARAMS ((tokenrun *));
f617b8e2 85
c8a96070 86static unsigned int hex_digit_value PARAMS ((unsigned int));
6142088c 87static _cpp_buff *new_buff PARAMS ((size_t));
15dad1d9 88
041c3194 89/* Utility routine:
9e62c811 90
bfb9dc7f
ZW
91 Compares, the token TOKEN to the NUL-terminated string STRING.
92 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
041c3194 93int
bfb9dc7f
ZW
94cpp_ideq (token, string)
95 const cpp_token *token;
041c3194
ZW
96 const char *string;
97{
bfb9dc7f 98 if (token->type != CPP_NAME)
041c3194 99 return 0;
bfb9dc7f 100
562a5c27 101 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
15dad1d9 102}
1368ee70 103
87062813
NB
104/* Call when meeting a newline, assumed to be in buffer->cur[-1].
105 Returns with buffer->cur pointing to the character immediately
106 following the newline (combination). */
107static void
108handle_newline (pfile)
1444f2ed 109 cpp_reader *pfile;
0d9f234d 110{
87062813 111 cpp_buffer *buffer = pfile->buffer;
0d9f234d 112
87062813 113 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
4d6baafa
NB
114 only accept CR-LF; maybe we should fall back to that behaviour? */
115 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
87062813 116 buffer->cur++;
0d9f234d 117
87062813
NB
118 buffer->line_base = buffer->cur;
119 buffer->col_adjust = 0;
120 pfile->line++;
0d9f234d
NB
121}
122
87062813
NB
123/* Subroutine of skip_escaped_newlines; called when a 3-character
124 sequence beginning with "??" is encountered. buffer->cur points to
125 the second '?'.
126
127 Warn if necessary, and returns true if the sequence forms a
128 trigraph and the trigraph should be honoured. */
129static bool
130trigraph_p (pfile)
45b966db 131 cpp_reader *pfile;
45b966db 132{
87062813
NB
133 cpp_buffer *buffer = pfile->buffer;
134 cppchar_t from_char = buffer->cur[1];
135 bool accept;
136
137 if (!_cpp_trigraph_map[from_char])
138 return false;
139
140 accept = CPP_OPTION (pfile, trigraphs);
141
cbcff6df
NB
142 /* Don't warn about trigraphs in comments. */
143 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
45b966db 144 {
041c3194 145 if (accept)
ebef4e8c
NB
146 cpp_error_with_line (pfile, DL_WARNING,
147 pfile->line, CPP_BUF_COL (buffer) - 1,
148 "trigraph ??%c converted to %c",
149 (int) from_char,
150 (int) _cpp_trigraph_map[from_char]);
4a5b68a2
NB
151 else if (buffer->cur != buffer->last_Wtrigraphs)
152 {
153 buffer->last_Wtrigraphs = buffer->cur;
ebef4e8c
NB
154 cpp_error_with_line (pfile, DL_WARNING,
155 pfile->line, CPP_BUF_COL (buffer) - 1,
156 "trigraph ??%c ignored", (int) from_char);
4a5b68a2 157 }
45b966db 158 }
0d9f234d 159
041c3194 160 return accept;
45b966db
ZW
161}
162
87062813 163/* Skips any escaped newlines introduced by '?' or a '\\', assumed to
480709cc
NB
164 lie in buffer->cur[-1]. Returns the next byte, which will be in
165 buffer->cur[-1]. This routine performs preprocessing stages 1 and
166 2 of the ISO C standard. */
0d9f234d 167static cppchar_t
87062813 168skip_escaped_newlines (pfile)
29401c30 169 cpp_reader *pfile;
45b966db 170{
29401c30 171 cpp_buffer *buffer = pfile->buffer;
87062813 172 cppchar_t next = buffer->cur[-1];
29401c30 173
a5c3cccd
NB
174 /* Only do this if we apply stages 1 and 2. */
175 if (!buffer->from_stage3)
041c3194 176 {
a5c3cccd 177 const unsigned char *saved_cur;
87062813 178 cppchar_t next1;
a5c3cccd
NB
179
180 do
0d9f234d 181 {
a5c3cccd 182 if (next == '?')
0d9f234d 183 {
4d6baafa 184 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
87062813 185 break;
a5c3cccd 186
87062813
NB
187 /* Translate the trigraph. */
188 next = _cpp_trigraph_map[buffer->cur[1]];
189 buffer->cur += 2;
4d6baafa 190 if (next != '\\')
a5c3cccd 191 break;
a5c3cccd
NB
192 }
193
4d6baafa
NB
194 if (buffer->cur == buffer->rlimit)
195 break;
196
87062813
NB
197 /* We have a backslash, and room for at least one more
198 character. Skip horizontal whitespace. */
199 saved_cur = buffer->cur;
a5c3cccd 200 do
87062813
NB
201 next1 = *buffer->cur++;
202 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
041c3194 203
a5c3cccd 204 if (!is_vspace (next1))
0d9f234d 205 {
87062813 206 buffer->cur = saved_cur;
0d9f234d
NB
207 break;
208 }
45b966db 209
87062813
NB
210 if (saved_cur != buffer->cur - 1
211 && !pfile->state.lexing_comment)
ebef4e8c
NB
212 cpp_error (pfile, DL_WARNING,
213 "backslash and newline separated by space");
0d9f234d 214
87062813 215 handle_newline (pfile);
480709cc 216 buffer->backup_to = buffer->cur;
87062813
NB
217 if (buffer->cur == buffer->rlimit)
218 {
ebef4e8c
NB
219 cpp_error (pfile, DL_PEDWARN,
220 "backslash-newline at end of file");
87062813
NB
221 next = EOF;
222 }
223 else
224 next = *buffer->cur++;
0d9f234d 225 }
a5c3cccd 226 while (next == '\\' || next == '?');
041c3194 227 }
45b966db 228
0d9f234d 229 return next;
45b966db
ZW
230}
231
0d9f234d 232/* Obtain the next character, after trigraph conversion and skipping
87062813
NB
233 an arbitrarily long string of escaped newlines. The common case of
234 no trigraphs or escaped newlines falls through quickly. On return,
480709cc
NB
235 buffer->backup_to points to where to return to if the character is
236 not to be processed. */
0d9f234d 237static cppchar_t
29401c30
NB
238get_effective_char (pfile)
239 cpp_reader *pfile;
64aaf407 240{
4d6baafa 241 cppchar_t next;
480709cc 242 cpp_buffer *buffer = pfile->buffer;
0d9f234d 243
480709cc 244 buffer->backup_to = buffer->cur;
4d6baafa
NB
245 next = *buffer->cur++;
246 if (__builtin_expect (next == '?' || next == '\\', 0))
247 next = skip_escaped_newlines (pfile);
0d9f234d 248
480709cc 249 return next;
64aaf407
NB
250}
251
0d9f234d
NB
252/* Skip a C-style block comment. We find the end of the comment by
253 seeing if an asterisk is before every '/' we encounter. Returns
254 non-zero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
255static int
256skip_block_comment (pfile)
45b966db
ZW
257 cpp_reader *pfile;
258{
041c3194 259 cpp_buffer *buffer = pfile->buffer;
d8090680 260 cppchar_t c = EOF, prevc = EOF;
0d9f234d 261
cbcff6df 262 pfile->state.lexing_comment = 1;
0d9f234d 263 while (buffer->cur != buffer->rlimit)
45b966db 264 {
0d9f234d
NB
265 prevc = c, c = *buffer->cur++;
266
0d9f234d 267 /* FIXME: For speed, create a new character class of characters
93c80368 268 of interest inside block comments. */
0d9f234d 269 if (c == '?' || c == '\\')
87062813 270 c = skip_escaped_newlines (pfile);
041c3194 271
0d9f234d
NB
272 /* People like decorating comments with '*', so check for '/'
273 instead for efficiency. */
041c3194 274 if (c == '/')
45b966db 275 {
0d9f234d
NB
276 if (prevc == '*')
277 break;
041c3194 278
0d9f234d 279 /* Warn about potential nested comments, but not if the '/'
a1f300c0 280 comes immediately before the true comment delimiter.
041c3194 281 Don't bother to get it right across escaped newlines. */
0d9f234d 282 if (CPP_OPTION (pfile, warn_comments)
87062813 283 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
ebef4e8c
NB
284 cpp_error_with_line (pfile, DL_WARNING,
285 pfile->line, CPP_BUF_COL (buffer),
286 "\"/*\" within comment");
45b966db 287 }
91fcd158 288 else if (is_vspace (c))
87062813 289 handle_newline (pfile);
52fadca8 290 else if (c == '\t')
0d9f234d 291 adjust_column (pfile);
45b966db 292 }
041c3194 293
cbcff6df 294 pfile->state.lexing_comment = 0;
0d9f234d 295 return c != '/' || prevc != '*';
45b966db
ZW
296}
297
480709cc
NB
298/* Skip a C++ line comment, leaving buffer->cur pointing to the
299 terminating newline. Handles escaped newlines. Returns non-zero
300 if a multiline comment. */
041c3194 301static int
cbcff6df
NB
302skip_line_comment (pfile)
303 cpp_reader *pfile;
45b966db 304{
cbcff6df 305 cpp_buffer *buffer = pfile->buffer;
67821e3a 306 unsigned int orig_line = pfile->line;
0d9f234d 307 cppchar_t c;
64cdc383
MH
308#ifdef MULTIBYTE_CHARS
309 wchar_t wc;
310 int char_len;
311#endif
041c3194 312
cbcff6df 313 pfile->state.lexing_comment = 1;
64cdc383
MH
314#ifdef MULTIBYTE_CHARS
315 /* Reset multibyte conversion state. */
316 (void) local_mbtowc (NULL, NULL, 0);
317#endif
0d9f234d 318 do
041c3194 319 {
0d9f234d 320 if (buffer->cur == buffer->rlimit)
480709cc 321 goto at_eof;
041c3194 322
64cdc383
MH
323#ifdef MULTIBYTE_CHARS
324 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
325 buffer->rlimit - buffer->cur);
326 if (char_len == -1)
327 {
328 cpp_error (pfile, DL_WARNING,
329 "ignoring invalid multibyte character");
330 char_len = 1;
331 c = *buffer->cur++;
332 }
333 else
334 {
335 buffer->cur += char_len;
336 c = wc;
337 }
338#else
0d9f234d 339 c = *buffer->cur++;
64cdc383 340#endif
0d9f234d 341 if (c == '?' || c == '\\')
87062813 342 c = skip_escaped_newlines (pfile);
041c3194 343 }
0d9f234d 344 while (!is_vspace (c));
45b966db 345
480709cc
NB
346 /* Step back over the newline, except at EOF. */
347 buffer->cur--;
348 at_eof:
349
cbcff6df 350 pfile->state.lexing_comment = 0;
67821e3a 351 return orig_line != pfile->line;
041c3194 352}
45b966db 353
0d9f234d
NB
354/* pfile->buffer->cur is one beyond the \t character. Update
355 col_adjust so we track the column correctly. */
52fadca8 356static void
0d9f234d 357adjust_column (pfile)
52fadca8 358 cpp_reader *pfile;
52fadca8 359{
0d9f234d
NB
360 cpp_buffer *buffer = pfile->buffer;
361 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
362
363 /* Round it up to multiple of the tabstop, but subtract 1 since the
364 tab itself occupies a character position. */
0d9f234d
NB
365 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
366 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
367}
368
0d9f234d
NB
369/* Skips whitespace, saving the next non-whitespace character.
370 Adjusts pfile->col_adjust to account for tabs. Without this,
371 tokens might be assigned an incorrect column. */
4d6baafa 372static int
0d9f234d 373skip_whitespace (pfile, c)
041c3194 374 cpp_reader *pfile;
0d9f234d 375 cppchar_t c;
041c3194
ZW
376{
377 cpp_buffer *buffer = pfile->buffer;
0d9f234d 378 unsigned int warned = 0;
45b966db 379
0d9f234d 380 do
041c3194 381 {
91fcd158
NB
382 /* Horizontal space always OK. */
383 if (c == ' ')
0d9f234d 384 ;
91fcd158 385 else if (c == '\t')
0d9f234d
NB
386 adjust_column (pfile);
387 /* Just \f \v or \0 left. */
91fcd158 388 else if (c == '\0')
041c3194 389 {
4d6baafa
NB
390 if (buffer->cur - 1 == buffer->rlimit)
391 return 0;
91fcd158 392 if (!warned)
0d9f234d 393 {
ebef4e8c 394 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
0d9f234d
NB
395 warned = 1;
396 }
45b966db 397 }
93c80368 398 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
ebef4e8c
NB
399 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
400 CPP_BUF_COL (buffer),
401 "%s in preprocessing directive",
402 c == '\f' ? "form feed" : "vertical tab");
0d9f234d 403
0d9f234d 404 c = *buffer->cur++;
45b966db 405 }
ec5c56db 406 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
407 while (is_nvspace (c));
408
480709cc 409 buffer->cur--;
4d6baafa 410 return 1;
041c3194 411}
45b966db 412
93c80368
NB
413/* See if the characters of a number token are valid in a name (no
414 '.', '+' or '-'). */
415static int
416name_p (pfile, string)
417 cpp_reader *pfile;
418 const cpp_string *string;
419{
420 unsigned int i;
421
422 for (i = 0; i < string->len; i++)
423 if (!is_idchar (string->text[i]))
424 return 0;
425
426 return 1;
427}
428
2c3fcba6
ZW
429/* Parse an identifier, skipping embedded backslash-newlines. This is
430 a critical inner loop. The common case is an identifier which has
431 not been split by backslash-newline, does not contain a dollar
432 sign, and has already been scanned (roughly 10:1 ratio of
433 seen:unseen identifiers in normal code; the distribution is
434 Poisson-like). Second most common case is a new identifier, not
435 split and no dollar sign. The other possibilities are rare and
10cf9bde 436 have been relegated to parse_slow. */
0d9f234d 437static cpp_hashnode *
2c3fcba6 438parse_identifier (pfile)
45b966db 439 cpp_reader *pfile;
45b966db 440{
93c80368 441 cpp_hashnode *result;
562a5c27 442 const uchar *cur, *base;
2c3fcba6
ZW
443
444 /* Fast-path loop. Skim over a normal identifier.
445 N.B. ISIDNUM does not include $. */
4d6baafa
NB
446 cur = pfile->buffer->cur;
447 while (ISIDNUM (*cur))
2c3fcba6 448 cur++;
2c3fcba6
ZW
449
450 /* Check for slow-path cases. */
4d6baafa 451 if (*cur == '?' || *cur == '\\' || *cur == '$')
10cf9bde
NB
452 {
453 unsigned int len;
454
455 base = parse_slow (pfile, cur, 0, &len);
456 result = (cpp_hashnode *)
457 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
458 }
2c3fcba6
ZW
459 else
460 {
10cf9bde
NB
461 base = pfile->buffer->cur - 1;
462 pfile->buffer->cur = cur;
2c3fcba6
ZW
463 result = (cpp_hashnode *)
464 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
2c3fcba6
ZW
465 }
466
467 /* Rarely, identifiers require diagnostics when lexed.
468 XXX Has to be forced out of the fast path. */
469 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
470 && !pfile->state.skipping, 0))
471 {
472 /* It is allowed to poison the same identifier twice. */
473 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
ebef4e8c 474 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
2c3fcba6
ZW
475 NODE_NAME (result));
476
477 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
478 replacement list of a variadic macro. */
479 if (result == pfile->spec_nodes.n__VA_ARGS__
480 && !pfile->state.va_args_ok)
ebef4e8c 481 cpp_error (pfile, DL_PEDWARN,
2c3fcba6
ZW
482 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
483 }
484
485 return result;
486}
487
10cf9bde
NB
488/* Slow path. This handles numbers and identifiers which have been
489 split, or contain dollar signs. The part of the token from
490 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
491 1 if it's a number, and 2 if it has a leading period. Returns a
492 pointer to the token's NUL-terminated spelling in permanent
493 storage, and sets PLEN to its length. */
562a5c27 494static uchar *
10cf9bde 495parse_slow (pfile, cur, number_p, plen)
2c3fcba6 496 cpp_reader *pfile;
562a5c27 497 const uchar *cur;
10cf9bde
NB
498 int number_p;
499 unsigned int *plen;
2c3fcba6 500{
0d9f234d 501 cpp_buffer *buffer = pfile->buffer;
562a5c27 502 const uchar *base = buffer->cur - 1;
2a967f3d 503 struct obstack *stack = &pfile->hash_table->stack;
10cf9bde
NB
504 unsigned int c, prevc, saw_dollar = 0;
505
506 /* Place any leading period. */
507 if (number_p == 2)
508 obstack_1grow (stack, '.');
2c3fcba6
ZW
509
510 /* Copy the part of the token which is known to be okay. */
511 obstack_grow (stack, base, cur - base);
041c3194 512
2c3fcba6
ZW
513 /* Now process the part which isn't. We are looking at one of
514 '$', '\\', or '?' on entry to this loop. */
10cf9bde 515 prevc = cur[-1];
2c3fcba6
ZW
516 c = *cur++;
517 buffer->cur = cur;
10cf9bde 518 for (;;)
041c3194 519 {
10cf9bde
NB
520 /* Potential escaped newline? */
521 buffer->backup_to = buffer->cur - 1;
522 if (c == '?' || c == '\\')
523 c = skip_escaped_newlines (pfile);
524
525 if (!is_idchar (c))
526 {
527 if (!number_p)
528 break;
529 if (c != '.' && !VALID_SIGN (c, prevc))
530 break;
531 }
532
533 /* Handle normal identifier characters in this loop. */
534 do
2c3fcba6 535 {
10cf9bde 536 prevc = c;
2c3fcba6 537 obstack_1grow (stack, c);
45b966db 538
2c3fcba6
ZW
539 if (c == '$')
540 saw_dollar++;
ba89d661 541
2c3fcba6
ZW
542 c = *buffer->cur++;
543 }
10cf9bde 544 while (is_idchar (c));
041c3194 545 }
0d9f234d 546
4d6baafa 547 /* Step back over the unwanted char. */
480709cc 548 BACKUP ();
93c80368 549
4fe9b91c 550 /* $ is not an identifier character in the standard, but is commonly
0d9f234d
NB
551 accepted as an extension. Don't warn about it in skipped
552 conditional blocks. */
cef0d199 553 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
ebef4e8c 554 cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
0d9f234d 555
10cf9bde
NB
556 /* Identifiers and numbers are null-terminated. */
557 *plen = obstack_object_size (stack);
2a967f3d 558 obstack_1grow (stack, '\0');
10cf9bde 559 return obstack_finish (stack);
45b966db
ZW
560}
561
5d8ebbd8
NB
562/* Parse a number, beginning with character C, skipping embedded
563 backslash-newlines. LEADING_PERIOD is non-zero if there was a "."
564 before C. Place the result in NUMBER. */
45b966db 565static void
10cf9bde 566parse_number (pfile, number, leading_period)
45b966db 567 cpp_reader *pfile;
0d9f234d 568 cpp_string *number;
93c80368 569 int leading_period;
45b966db 570{
562a5c27 571 const uchar *cur;
45b966db 572
10cf9bde
NB
573 /* Fast-path loop. Skim over a normal number.
574 N.B. ISIDNUM does not include $. */
575 cur = pfile->buffer->cur;
576 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
577 cur++;
cbcff6df 578
10cf9bde
NB
579 /* Check for slow-path cases. */
580 if (*cur == '?' || *cur == '\\' || *cur == '$')
581 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
582 else
041c3194 583 {
562a5c27
NB
584 const uchar *base = pfile->buffer->cur - 1;
585 uchar *dest;
0d9f234d 586
10cf9bde
NB
587 number->len = cur - base + leading_period;
588 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
589 dest[number->len] = '\0';
590 number->text = dest;
45b966db 591
10cf9bde
NB
592 if (leading_period)
593 *dest++ = '.';
594 memcpy (dest, base, cur - base);
595 pfile->buffer->cur = cur;
45b966db 596 }
0d9f234d
NB
597}
598
93c80368
NB
599/* Subroutine of parse_string. */
600static int
601unescaped_terminator_p (pfile, dest)
602 cpp_reader *pfile;
603 const unsigned char *dest;
604{
605 const unsigned char *start, *temp;
606
607 /* In #include-style directives, terminators are not escapeable. */
608 if (pfile->state.angled_headers)
609 return 1;
610
ece54d54 611 start = BUFF_FRONT (pfile->u_buff);
93c80368
NB
612
613 /* An odd number of consecutive backslashes represents an escaped
614 terminator. */
615 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
616 ;
617
618 return ((dest - temp) & 1) == 0;
619}
620
0d9f234d 621/* Parses a string, character constant, or angle-bracketed header file
7868b4a2
NB
622 name. Handles embedded trigraphs and escaped newlines. The stored
623 string is guaranteed NUL-terminated, but it is not guaranteed that
624 this is the first NUL since embedded NULs are preserved.
45b966db 625
87062813
NB
626 When this function returns, buffer->cur points to the next
627 character to be processed. */
041c3194 628static void
0d9f234d 629parse_string (pfile, token, terminator)
45b966db 630 cpp_reader *pfile;
041c3194 631 cpp_token *token;
0d9f234d 632 cppchar_t terminator;
45b966db 633{
041c3194 634 cpp_buffer *buffer = pfile->buffer;
93c80368 635 unsigned char *dest, *limit;
0d9f234d 636 cppchar_t c;
d4e6133f 637 bool warned_nulls = false;
64cdc383
MH
638#ifdef MULTIBYTE_CHARS
639 wchar_t wc;
640 int char_len;
641#endif
0d9f234d 642
ece54d54
NB
643 dest = BUFF_FRONT (pfile->u_buff);
644 limit = BUFF_LIMIT (pfile->u_buff);
93c80368 645
64cdc383
MH
646#ifdef MULTIBYTE_CHARS
647 /* Reset multibyte conversion state. */
648 (void) local_mbtowc (NULL, NULL, 0);
649#endif
0d9f234d 650 for (;;)
45b966db 651 {
87062813 652 /* We need room for another char, possibly the terminating NUL. */
ece54d54
NB
653 if ((size_t) (limit - dest) < 1)
654 {
655 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
8c3b2693 656 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
ece54d54
NB
657 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
658 limit = BUFF_LIMIT (pfile->u_buff);
659 }
7868b4a2 660
64cdc383
MH
661#ifdef MULTIBYTE_CHARS
662 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
663 buffer->rlimit - buffer->cur);
664 if (char_len == -1)
665 {
666 cpp_error (pfile, DL_WARNING,
667 "ignoring invalid multibyte character");
668 char_len = 1;
669 c = *buffer->cur++;
670 }
671 else
672 {
673 buffer->cur += char_len;
674 c = wc;
675 }
676#else
87062813 677 c = *buffer->cur++;
64cdc383
MH
678#endif
679
680 /* Handle trigraphs, escaped newlines etc. */
0d9f234d 681 if (c == '?' || c == '\\')
87062813 682 c = skip_escaped_newlines (pfile);
45b966db 683
87062813 684 if (c == terminator)
45b966db 685 {
87062813
NB
686 if (unescaped_terminator_p (pfile, dest))
687 break;
0d9f234d
NB
688 }
689 else if (is_vspace (c))
690 {
d4e6133f
NB
691 /* No string literal may extend over multiple lines. In
692 assembly language, suppress the error except for <>
693 includes. This is a kludge around not knowing where
694 comments are. */
695 unterminated:
696 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
ebef4e8c 697 cpp_error (pfile, DL_ERROR, "missing terminating %c character",
625458d0 698 (int) terminator);
d4e6133f
NB
699 buffer->cur--;
700 break;
0d9f234d 701 }
4d6baafa 702 else if (c == '\0')
0d9f234d 703 {
4d6baafa 704 if (buffer->cur - 1 == buffer->rlimit)
d4e6133f 705 goto unterminated;
4d6baafa
NB
706 if (!warned_nulls)
707 {
708 warned_nulls = true;
ebef4e8c
NB
709 cpp_error (pfile, DL_WARNING,
710 "null character(s) preserved in literal");
4d6baafa 711 }
45b966db 712 }
64cdc383
MH
713#ifdef MULTIBYTE_CHARS
714 if (char_len > 1)
715 {
716 for ( ; char_len > 0; --char_len)
717 *dest++ = (*buffer->cur - char_len);
718 }
719 else
720#endif
721 *dest++ = c;
45b966db
ZW
722 }
723
7868b4a2 724 *dest = '\0';
45b966db 725
ece54d54
NB
726 token->val.str.text = BUFF_FRONT (pfile->u_buff);
727 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
728 BUFF_FRONT (pfile->u_buff) = dest + 1;
0d9f234d 729}
041c3194 730
93c80368 731/* The stored comment includes the comment start and any terminator. */
9e62c811 732static void
477cdac7 733save_comment (pfile, token, from, type)
0d9f234d 734 cpp_reader *pfile;
041c3194
ZW
735 cpp_token *token;
736 const unsigned char *from;
477cdac7 737 cppchar_t type;
9e62c811 738{
041c3194 739 unsigned char *buffer;
477cdac7 740 unsigned int len, clen;
0d9f234d 741
1c6d33ef 742 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
480709cc 743
3542203b
NB
744 /* C++ comments probably (not definitely) have moved past a new
745 line, which we don't want to save in the comment. */
480709cc 746 if (is_vspace (pfile->buffer->cur[-1]))
3542203b 747 len--;
477cdac7
JT
748
749 /* If we are currently in a directive, then we need to store all
750 C++ comments as C comments internally, and so we need to
751 allocate a little extra space in that case.
752
753 Note that the only time we encounter a directive here is
754 when we are saving comments in a "#define". */
755 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
756
757 buffer = _cpp_unaligned_alloc (pfile, clen);
041c3194 758
041c3194 759 token->type = CPP_COMMENT;
477cdac7 760 token->val.str.len = clen;
0d9f234d 761 token->val.str.text = buffer;
45b966db 762
1c6d33ef
NB
763 buffer[0] = '/';
764 memcpy (buffer + 1, from, len - 1);
477cdac7 765
1eeeb6a4 766 /* Finish conversion to a C comment, if necessary. */
477cdac7
JT
767 if (pfile->state.in_directive && type == '/')
768 {
769 buffer[1] = '*';
770 buffer[clen - 2] = '*';
771 buffer[clen - 1] = '/';
772 }
0d9f234d 773}
45b966db 774
5fddcffc
NB
775/* Allocate COUNT tokens for RUN. */
776void
777_cpp_init_tokenrun (run, count)
778 tokenrun *run;
779 unsigned int count;
780{
781 run->base = xnewvec (cpp_token, count);
782 run->limit = run->base + count;
783 run->next = NULL;
784}
785
786/* Returns the next tokenrun, or creates one if there is none. */
787static tokenrun *
788next_tokenrun (run)
789 tokenrun *run;
790{
791 if (run->next == NULL)
792 {
793 run->next = xnew (tokenrun);
bdcbe496 794 run->next->prev = run;
5fddcffc
NB
795 _cpp_init_tokenrun (run->next, 250);
796 }
797
798 return run->next;
799}
800
4ed5bcfb
NB
801/* Allocate a single token that is invalidated at the same time as the
802 rest of the tokens on the line. Has its line and col set to the
803 same as the last lexed token, so that diagnostics appear in the
804 right place. */
805cpp_token *
806_cpp_temp_token (pfile)
807 cpp_reader *pfile;
808{
809 cpp_token *old, *result;
810
811 old = pfile->cur_token - 1;
812 if (pfile->cur_token == pfile->cur_run->limit)
813 {
814 pfile->cur_run = next_tokenrun (pfile->cur_run);
815 pfile->cur_token = pfile->cur_run->base;
816 }
817
818 result = pfile->cur_token++;
819 result->line = old->line;
820 result->col = old->col;
821 return result;
822}
823
14baae01
NB
824/* Lex a token into RESULT (external interface). Takes care of issues
825 like directive handling, token lookahead, multiple include
a1f300c0 826 optimization and skipping. */
345894b4
NB
827const cpp_token *
828_cpp_lex_token (pfile)
45b966db 829 cpp_reader *pfile;
5fddcffc 830{
bdcbe496 831 cpp_token *result;
5fddcffc 832
bdcbe496 833 for (;;)
5fddcffc 834 {
bdcbe496 835 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 836 {
bdcbe496
NB
837 pfile->cur_run = next_tokenrun (pfile->cur_run);
838 pfile->cur_token = pfile->cur_run->base;
5fddcffc
NB
839 }
840
bdcbe496 841 if (pfile->lookaheads)
14baae01
NB
842 {
843 pfile->lookaheads--;
844 result = pfile->cur_token++;
845 }
bdcbe496 846 else
14baae01 847 result = _cpp_lex_direct (pfile);
bdcbe496
NB
848
849 if (result->flags & BOL)
5fddcffc 850 {
bdcbe496
NB
851 /* Is this a directive. If _cpp_handle_directive returns
852 false, it is an assembler #. */
853 if (result->type == CPP_HASH
e808ec9c
NB
854 /* 6.10.3 p 11: Directives in a list of macro arguments
855 gives undefined behavior. This implementation
856 handles the directive as normal. */
857 && pfile->state.parsing_args != 1
bdcbe496
NB
858 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
859 continue;
97293897
NB
860 if (pfile->cb.line_change && !pfile->state.skipping)
861 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
5fddcffc 862 }
5fddcffc 863
bdcbe496
NB
864 /* We don't skip tokens in directives. */
865 if (pfile->state.in_directive)
866 break;
5fddcffc 867
bdcbe496 868 /* Outside a directive, invalidate controlling macros. At file
14baae01 869 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
bdcbe496 870 get here and MI optimisation works. */
5fddcffc 871 pfile->mi_valid = false;
bdcbe496
NB
872
873 if (!pfile->state.skipping || result->type == CPP_EOF)
874 break;
5fddcffc
NB
875 }
876
345894b4 877 return result;
5fddcffc
NB
878}
879
480709cc
NB
880#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
881 do { \
882 if (get_effective_char (pfile) == CHAR) \
883 result->type = THEN_TYPE; \
884 else \
885 { \
886 BACKUP (); \
887 result->type = ELSE_TYPE; \
888 } \
889 } while (0)
890
14baae01
NB
891/* Lex a token into pfile->cur_token, which is also incremented, to
892 get diagnostics pointing to the correct location.
893
894 Does not handle issues such as token lookahead, multiple-include
895 optimisation, directives, skipping etc. This function is only
896 suitable for use by _cpp_lex_token, and in special cases like
897 lex_expansion_token which doesn't care for any of these issues.
898
899 When meeting a newline, returns CPP_EOF if parsing a directive,
900 otherwise returns to the start of the token buffer if permissible.
901 Returns the location of the lexed token. */
902cpp_token *
903_cpp_lex_direct (pfile)
5fddcffc 904 cpp_reader *pfile;
45b966db 905{
0d9f234d 906 cppchar_t c;
adb84b42 907 cpp_buffer *buffer;
0d9f234d 908 const unsigned char *comment_start;
14baae01 909 cpp_token *result = pfile->cur_token++;
9ec7291f 910
5fddcffc 911 fresh_line:
adb84b42 912 buffer = pfile->buffer;
bd969772
NB
913 result->flags = buffer->saved_flags;
914 buffer->saved_flags = 0;
5fddcffc 915 update_tokens_line:
1444f2ed 916 result->line = pfile->line;
041c3194 917
5fddcffc 918 skipped_white:
480709cc 919 c = *buffer->cur++;
5fddcffc 920 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
5fddcffc
NB
921
922 trigraph:
0d9f234d 923 switch (c)
45b966db 924 {
4d6baafa
NB
925 case ' ': case '\t': case '\f': case '\v': case '\0':
926 result->flags |= PREV_WHITE;
927 if (skip_whitespace (pfile, c))
928 goto skipped_white;
929
930 /* EOF. */
931 buffer->cur--;
bdcbe496 932 buffer->saved_flags = BOL;
387f9e32 933 if (!pfile->state.parsing_args)
ef6e958a 934 {
bdcbe496 935 if (buffer->cur != buffer->line_base)
5fddcffc
NB
936 {
937 /* Non-empty files should end in a newline. Don't warn
938 for command line and _Pragma buffers. */
939 if (!buffer->from_stage3)
ebef4e8c 940 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
87062813 941 handle_newline (pfile);
7364fdd8 942 }
bdcbe496
NB
943
944 /* Don't pop the last buffer. */
387f9e32 945 if (!pfile->state.in_directive && buffer->prev)
bdcbe496
NB
946 {
947 unsigned char stop = buffer->return_at_eof;
948
949 _cpp_pop_buffer (pfile);
950 if (!stop)
951 goto fresh_line;
952 }
ef6e958a 953 }
0d9f234d 954 result->type = CPP_EOF;
5fddcffc 955 break;
45b966db 956
0d9f234d 957 case '\n': case '\r':
87062813 958 handle_newline (pfile);
bdcbe496
NB
959 buffer->saved_flags = BOL;
960 if (! pfile->state.in_directive)
45b966db 961 {
4ed5bcfb
NB
962 if (pfile->state.parsing_args == 2)
963 buffer->saved_flags |= PREV_WHITE;
bdcbe496
NB
964 if (!pfile->keep_tokens)
965 {
966 pfile->cur_run = &pfile->base_run;
967 result = pfile->base_run.base;
968 pfile->cur_token = result + 1;
969 }
970 goto fresh_line;
45b966db 971 }
5fddcffc
NB
972 result->type = CPP_EOF;
973 break;
46d07497 974
0d9f234d
NB
975 case '?':
976 case '\\':
977 /* These could start an escaped newline, or '?' a trigraph. Let
978 skip_escaped_newlines do all the work. */
979 {
67821e3a 980 unsigned int line = pfile->line;
0d9f234d 981
87062813 982 c = skip_escaped_newlines (pfile);
67821e3a 983 if (line != pfile->line)
87062813 984 {
480709cc 985 buffer->cur--;
87062813
NB
986 /* We had at least one escaped newline of some sort.
987 Update the token's line and column. */
5fddcffc 988 goto update_tokens_line;
87062813 989 }
480709cc 990 }
0d9f234d 991
480709cc
NB
992 /* We are either the original '?' or '\\', or a trigraph. */
993 if (c == '?')
0d9f234d 994 result->type = CPP_QUERY;
480709cc
NB
995 else if (c == '\\')
996 goto random_char;
997 else
998 goto trigraph;
0d9f234d 999 break;
46d07497 1000
0d9f234d
NB
1001 case '0': case '1': case '2': case '3': case '4':
1002 case '5': case '6': case '7': case '8': case '9':
1003 result->type = CPP_NUMBER;
10cf9bde 1004 parse_number (pfile, &result->val.str, 0);
0d9f234d 1005 break;
46d07497 1006
0abc6a6a
NB
1007 case 'L':
1008 /* 'L' may introduce wide characters or strings. */
1009 {
1010 const unsigned char *pos = buffer->cur;
0d9f234d 1011
0abc6a6a
NB
1012 c = get_effective_char (pfile);
1013 if (c == '\'' || c == '"')
1014 {
1015 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1016 parse_string (pfile, result, c);
1017 break;
1018 }
1019 buffer->cur = pos;
1020 }
1021 /* Fall through. */
1022
1023 start_ident:
0d9f234d
NB
1024 case '_':
1025 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1026 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1027 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1028 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1029 case 'y': case 'z':
1030 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
0abc6a6a 1031 case 'G': case 'H': case 'I': case 'J': case 'K':
0d9f234d
NB
1032 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1033 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1034 case 'Y': case 'Z':
1035 result->type = CPP_NAME;
2c3fcba6 1036 result->val.node = parse_identifier (pfile);
0d9f234d 1037
0d9f234d 1038 /* Convert named operators to their proper types. */
0abc6a6a 1039 if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
1040 {
1041 result->flags |= NAMED_OP;
93c80368 1042 result->type = result->val.node->value.operator;
0d9f234d
NB
1043 }
1044 break;
1045
1046 case '\'':
1047 case '"':
1048 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
0d9f234d
NB
1049 parse_string (pfile, result, c);
1050 break;
041c3194 1051
0d9f234d 1052 case '/':
1c6d33ef
NB
1053 /* A potential block or line comment. */
1054 comment_start = buffer->cur;
29401c30 1055 c = get_effective_char (pfile);
480709cc 1056
1c6d33ef
NB
1057 if (c == '*')
1058 {
0d9f234d 1059 if (skip_block_comment (pfile))
ebef4e8c 1060 cpp_error (pfile, DL_ERROR, "unterminated comment");
0d9f234d 1061 }
480709cc
NB
1062 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1063 || CPP_IN_SYSTEM_HEADER (pfile)))
0d9f234d 1064 {
bdb05a7b
NB
1065 /* Warn about comments only if pedantically GNUC89, and not
1066 in system headers. */
1067 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 1068 && ! buffer->warned_cplusplus_comments)
041c3194 1069 {
ebef4e8c 1070 cpp_error (pfile, DL_PEDWARN,
1c6d33ef 1071 "C++ style comments are not allowed in ISO C89");
ebef4e8c
NB
1072 cpp_error (pfile, DL_PEDWARN,
1073 "(this will be reported only once per input file)");
1c6d33ef
NB
1074 buffer->warned_cplusplus_comments = 1;
1075 }
0d9f234d 1076
01ef6563 1077 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
ebef4e8c 1078 cpp_error (pfile, DL_WARNING, "multi-line comment");
1c6d33ef 1079 }
480709cc
NB
1080 else if (c == '=')
1081 {
1082 result->type = CPP_DIV_EQ;
1083 break;
1084 }
1085 else
1086 {
1087 BACKUP ();
1088 result->type = CPP_DIV;
1089 break;
1090 }
0d9f234d 1091
1c6d33ef
NB
1092 if (!pfile->state.save_comments)
1093 {
1094 result->flags |= PREV_WHITE;
5fddcffc 1095 goto update_tokens_line;
0d9f234d 1096 }
1c6d33ef
NB
1097
1098 /* Save the comment as a token in its own right. */
477cdac7 1099 save_comment (pfile, result, comment_start, c);
bdcbe496 1100 break;
0d9f234d
NB
1101
1102 case '<':
1103 if (pfile->state.angled_headers)
1104 {
1105 result->type = CPP_HEADER_NAME;
480709cc
NB
1106 parse_string (pfile, result, '>');
1107 break;
0d9f234d 1108 }
45b966db 1109
29401c30 1110 c = get_effective_char (pfile);
0d9f234d 1111 if (c == '=')
480709cc 1112 result->type = CPP_LESS_EQ;
0d9f234d 1113 else if (c == '<')
480709cc 1114 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
0d9f234d 1115 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
480709cc 1116 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
0d9f234d
NB
1117 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1118 {
480709cc 1119 result->type = CPP_OPEN_SQUARE;
0d9f234d
NB
1120 result->flags |= DIGRAPH;
1121 }
1122 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1123 {
480709cc 1124 result->type = CPP_OPEN_BRACE;
0d9f234d
NB
1125 result->flags |= DIGRAPH;
1126 }
480709cc
NB
1127 else
1128 {
1129 BACKUP ();
1130 result->type = CPP_LESS;
1131 }
0d9f234d
NB
1132 break;
1133
1134 case '>':
29401c30 1135 c = get_effective_char (pfile);
0d9f234d 1136 if (c == '=')
480709cc 1137 result->type = CPP_GREATER_EQ;
0d9f234d 1138 else if (c == '>')
480709cc 1139 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
0d9f234d 1140 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
480709cc
NB
1141 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1142 else
0d9f234d 1143 {
480709cc
NB
1144 BACKUP ();
1145 result->type = CPP_GREATER;
0d9f234d
NB
1146 }
1147 break;
1148
cbcff6df 1149 case '%':
480709cc
NB
1150 c = get_effective_char (pfile);
1151 if (c == '=')
1152 result->type = CPP_MOD_EQ;
1153 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1154 {
1155 result->flags |= DIGRAPH;
1156 result->type = CPP_HASH;
1157 if (get_effective_char (pfile) == '%')
1158 {
1159 const unsigned char *pos = buffer->cur;
1160
1161 if (get_effective_char (pfile) == ':')
1162 result->type = CPP_PASTE;
1163 else
1164 buffer->cur = pos - 1;
1165 }
1166 else
1167 BACKUP ();
1168 }
1169 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1170 {
1171 result->flags |= DIGRAPH;
1172 result->type = CPP_CLOSE_BRACE;
1173 }
1174 else
1175 {
1176 BACKUP ();
1177 result->type = CPP_MOD;
1178 }
0d9f234d
NB
1179 break;
1180
cbcff6df 1181 case '.':
480709cc
NB
1182 result->type = CPP_DOT;
1183 c = get_effective_char (pfile);
1184 if (c == '.')
1185 {
1186 const unsigned char *pos = buffer->cur;
1187
1188 if (get_effective_char (pfile) == '.')
1189 result->type = CPP_ELLIPSIS;
1190 else
1191 buffer->cur = pos - 1;
1192 }
1193 /* All known character sets have 0...9 contiguous. */
0df6c2c7 1194 else if (ISDIGIT (c))
480709cc
NB
1195 {
1196 result->type = CPP_NUMBER;
10cf9bde 1197 parse_number (pfile, &result->val.str, 1);
480709cc
NB
1198 }
1199 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1200 result->type = CPP_DOT_STAR;
1201 else
1202 BACKUP ();
0d9f234d 1203 break;
45b966db 1204
0d9f234d 1205 case '+':
29401c30 1206 c = get_effective_char (pfile);
480709cc
NB
1207 if (c == '+')
1208 result->type = CPP_PLUS_PLUS;
1209 else if (c == '=')
1210 result->type = CPP_PLUS_EQ;
1211 else
1212 {
1213 BACKUP ();
1214 result->type = CPP_PLUS;
1215 }
0d9f234d 1216 break;
04e3ec78 1217
0d9f234d 1218 case '-':
29401c30 1219 c = get_effective_char (pfile);
0d9f234d
NB
1220 if (c == '>')
1221 {
480709cc
NB
1222 result->type = CPP_DEREF;
1223 if (CPP_OPTION (pfile, cplusplus))
1224 {
1225 if (get_effective_char (pfile) == '*')
1226 result->type = CPP_DEREF_STAR;
1227 else
1228 BACKUP ();
1229 }
0d9f234d 1230 }
0d9f234d 1231 else if (c == '-')
480709cc
NB
1232 result->type = CPP_MINUS_MINUS;
1233 else if (c == '=')
1234 result->type = CPP_MINUS_EQ;
1235 else
1236 {
1237 BACKUP ();
1238 result->type = CPP_MINUS;
1239 }
0d9f234d 1240 break;
45b966db 1241
0d9f234d 1242 case '&':
29401c30 1243 c = get_effective_char (pfile);
480709cc
NB
1244 if (c == '&')
1245 result->type = CPP_AND_AND;
1246 else if (c == '=')
1247 result->type = CPP_AND_EQ;
1248 else
1249 {
1250 BACKUP ();
1251 result->type = CPP_AND;
1252 }
0d9f234d
NB
1253 break;
1254
0d9f234d 1255 case '|':
29401c30 1256 c = get_effective_char (pfile);
480709cc
NB
1257 if (c == '|')
1258 result->type = CPP_OR_OR;
1259 else if (c == '=')
1260 result->type = CPP_OR_EQ;
1261 else
1262 {
1263 BACKUP ();
1264 result->type = CPP_OR;
1265 }
0d9f234d 1266 break;
45b966db 1267
0d9f234d 1268 case ':':
29401c30 1269 c = get_effective_char (pfile);
0d9f234d 1270 if (c == ':' && CPP_OPTION (pfile, cplusplus))
480709cc 1271 result->type = CPP_SCOPE;
0d9f234d
NB
1272 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1273 {
1274 result->flags |= DIGRAPH;
480709cc
NB
1275 result->type = CPP_CLOSE_SQUARE;
1276 }
1277 else
1278 {
1279 BACKUP ();
1280 result->type = CPP_COLON;
0d9f234d
NB
1281 }
1282 break;
45b966db 1283
480709cc
NB
1284 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1285 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1286 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1287 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1288 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1289
0d9f234d
NB
1290 case '~': result->type = CPP_COMPL; break;
1291 case ',': result->type = CPP_COMMA; break;
1292 case '(': result->type = CPP_OPEN_PAREN; break;
1293 case ')': result->type = CPP_CLOSE_PAREN; break;
1294 case '[': result->type = CPP_OPEN_SQUARE; break;
1295 case ']': result->type = CPP_CLOSE_SQUARE; break;
1296 case '{': result->type = CPP_OPEN_BRACE; break;
1297 case '}': result->type = CPP_CLOSE_BRACE; break;
1298 case ';': result->type = CPP_SEMICOLON; break;
1299
cc937581
ZW
1300 /* @ is a punctuator in Objective C. */
1301 case '@': result->type = CPP_ATSIGN; break;
0d9f234d 1302
0abc6a6a
NB
1303 case '$':
1304 if (CPP_OPTION (pfile, dollars_in_ident))
1305 goto start_ident;
1306 /* Fall through... */
1307
0d9f234d
NB
1308 random_char:
1309 default:
1310 result->type = CPP_OTHER;
6c53ebff 1311 result->val.c = c;
0d9f234d
NB
1312 break;
1313 }
bdcbe496
NB
1314
1315 return result;
0d9f234d
NB
1316}
1317
5d8ebbd8 1318/* An upper bound on the number of bytes needed to spell TOKEN,
93c80368
NB
1319 including preceding whitespace. */
1320unsigned int
1321cpp_token_len (token)
1322 const cpp_token *token;
0d9f234d 1323{
93c80368 1324 unsigned int len;
6d2c2047 1325
93c80368 1326 switch (TOKEN_SPELL (token))
041c3194 1327 {
a28c5035 1328 default: len = 0; break;
47ad4138 1329 case SPELL_NUMBER:
a28c5035
NB
1330 case SPELL_STRING: len = token->val.str.len; break;
1331 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1332 }
47ad4138 1333 /* 1 for whitespace, 4 for comment delimiters. */
93c80368 1334 return len + 5;
6d2c2047
ZW
1335}
1336
041c3194 1337/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1338 already contain the enough space to hold the token's spelling.
1339 Returns a pointer to the character after the last character
1340 written. */
93c80368
NB
1341unsigned char *
1342cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1343 cpp_reader *pfile; /* Would be nice to be rid of this... */
1344 const cpp_token *token;
1345 unsigned char *buffer;
1346{
96be6998 1347 switch (TOKEN_SPELL (token))
041c3194
ZW
1348 {
1349 case SPELL_OPERATOR:
1350 {
1351 const unsigned char *spelling;
1352 unsigned char c;
d6d5f795 1353
041c3194 1354 if (token->flags & DIGRAPH)
37b8524c
JDA
1355 spelling
1356 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1357 else if (token->flags & NAMED_OP)
1358 goto spell_ident;
041c3194 1359 else
96be6998 1360 spelling = TOKEN_NAME (token);
041c3194
ZW
1361
1362 while ((c = *spelling++) != '\0')
1363 *buffer++ = c;
1364 }
1365 break;
d6d5f795 1366
47ad4138
ZW
1367 case SPELL_CHAR:
1368 *buffer++ = token->val.c;
1369 break;
1370
1371 spell_ident:
041c3194 1372 case SPELL_IDENT:
a28c5035
NB
1373 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1374 buffer += NODE_LEN (token->val.node);
041c3194 1375 break;
d6d5f795 1376
47ad4138
ZW
1377 case SPELL_NUMBER:
1378 memcpy (buffer, token->val.str.text, token->val.str.len);
1379 buffer += token->val.str.len;
1380 break;
1381
041c3194
ZW
1382 case SPELL_STRING:
1383 {
ba89d661
ZW
1384 int left, right, tag;
1385 switch (token->type)
1386 {
1387 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1388 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
ba89d661
ZW
1389 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1390 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1391 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138 1392 default:
ebef4e8c
NB
1393 cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1394 TOKEN_NAME (token));
47ad4138 1395 return buffer;
ba89d661
ZW
1396 }
1397 if (tag) *buffer++ = tag;
47ad4138 1398 *buffer++ = left;
bfb9dc7f
ZW
1399 memcpy (buffer, token->val.str.text, token->val.str.len);
1400 buffer += token->val.str.len;
47ad4138 1401 *buffer++ = right;
041c3194
ZW
1402 }
1403 break;
d6d5f795 1404
041c3194 1405 case SPELL_NONE:
ebef4e8c 1406 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1407 break;
1408 }
d6d5f795 1409
041c3194
ZW
1410 return buffer;
1411}
d6d5f795 1412
5d8ebbd8
NB
1413/* Returns TOKEN spelt as a null-terminated string. The string is
1414 freed when the reader is destroyed. Useful for diagnostics. */
93c80368
NB
1415unsigned char *
1416cpp_token_as_text (pfile, token)
c5a04734 1417 cpp_reader *pfile;
041c3194 1418 const cpp_token *token;
c5a04734 1419{
93c80368 1420 unsigned int len = cpp_token_len (token);
ece54d54 1421 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 1422
93c80368
NB
1423 end = cpp_spell_token (pfile, token, start);
1424 end[0] = '\0';
c5a04734 1425
93c80368
NB
1426 return start;
1427}
c5a04734 1428
5d8ebbd8
NB
1429/* Used by C front ends, which really should move to using
1430 cpp_token_as_text. */
93c80368
NB
1431const char *
1432cpp_type2name (type)
1433 enum cpp_ttype type;
1434{
1435 return (const char *) token_spellings[type].name;
1436}
c5a04734 1437
4ed5bcfb
NB
1438/* Writes the spelling of token to FP, without any preceding space.
1439 Separated from cpp_spell_token for efficiency - to avoid stdio
1440 double-buffering. */
93c80368
NB
1441void
1442cpp_output_token (token, fp)
1443 const cpp_token *token;
1444 FILE *fp;
1445{
93c80368 1446 switch (TOKEN_SPELL (token))
c5a04734 1447 {
93c80368
NB
1448 case SPELL_OPERATOR:
1449 {
1450 const unsigned char *spelling;
3b681e9d 1451 int c;
c5a04734 1452
93c80368 1453 if (token->flags & DIGRAPH)
37b8524c
JDA
1454 spelling
1455 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1456 else if (token->flags & NAMED_OP)
1457 goto spell_ident;
1458 else
1459 spelling = TOKEN_NAME (token);
041c3194 1460
3b681e9d
ZW
1461 c = *spelling;
1462 do
1463 putc (c, fp);
1464 while ((c = *++spelling) != '\0');
93c80368
NB
1465 }
1466 break;
041c3194 1467
47ad4138
ZW
1468 case SPELL_CHAR:
1469 putc (token->val.c, fp);
1470 break;
1471
93c80368
NB
1472 spell_ident:
1473 case SPELL_IDENT:
3b681e9d 1474 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
93c80368 1475 break;
041c3194 1476
47ad4138
ZW
1477 case SPELL_NUMBER:
1478 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1479 break;
1480
93c80368
NB
1481 case SPELL_STRING:
1482 {
1483 int left, right, tag;
1484 switch (token->type)
1485 {
1486 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1487 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
93c80368
NB
1488 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1489 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1490 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138
ZW
1491 default:
1492 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1493 return;
93c80368
NB
1494 }
1495 if (tag) putc (tag, fp);
47ad4138 1496 putc (left, fp);
93c80368 1497 fwrite (token->val.str.text, 1, token->val.str.len, fp);
47ad4138 1498 putc (right, fp);
93c80368
NB
1499 }
1500 break;
c5a04734 1501
93c80368
NB
1502 case SPELL_NONE:
1503 /* An error, most probably. */
1504 break;
041c3194 1505 }
c5a04734
ZW
1506}
1507
93c80368
NB
1508/* Compare two tokens. */
1509int
1510_cpp_equiv_tokens (a, b)
1511 const cpp_token *a, *b;
c5a04734 1512{
93c80368
NB
1513 if (a->type == b->type && a->flags == b->flags)
1514 switch (TOKEN_SPELL (a))
1515 {
1516 default: /* Keep compiler happy. */
1517 case SPELL_OPERATOR:
1518 return 1;
1519 case SPELL_CHAR:
6c53ebff 1520 return a->val.c == b->val.c; /* Character. */
93c80368 1521 case SPELL_NONE:
56051c0a 1522 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1523 case SPELL_IDENT:
1524 return a->val.node == b->val.node;
47ad4138 1525 case SPELL_NUMBER:
93c80368
NB
1526 case SPELL_STRING:
1527 return (a->val.str.len == b->val.str.len
1528 && !memcmp (a->val.str.text, b->val.str.text,
1529 a->val.str.len));
1530 }
c5a04734 1531
041c3194
ZW
1532 return 0;
1533}
1534
93c80368
NB
1535/* Returns nonzero if a space should be inserted to avoid an
1536 accidental token paste for output. For simplicity, it is
1537 conservative, and occasionally advises a space where one is not
1538 needed, e.g. "." and ".2". */
93c80368
NB
1539int
1540cpp_avoid_paste (pfile, token1, token2)
c5a04734 1541 cpp_reader *pfile;
93c80368 1542 const cpp_token *token1, *token2;
c5a04734 1543{
93c80368
NB
1544 enum cpp_ttype a = token1->type, b = token2->type;
1545 cppchar_t c;
c5a04734 1546
93c80368
NB
1547 if (token1->flags & NAMED_OP)
1548 a = CPP_NAME;
1549 if (token2->flags & NAMED_OP)
1550 b = CPP_NAME;
c5a04734 1551
93c80368
NB
1552 c = EOF;
1553 if (token2->flags & DIGRAPH)
37b8524c 1554 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1555 else if (token_spellings[b].category == SPELL_OPERATOR)
1556 c = token_spellings[b].name[0];
c5a04734 1557
93c80368 1558 /* Quickly get everything that can paste with an '='. */
37b8524c 1559 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1560 return 1;
c5a04734 1561
93c80368 1562 switch (a)
c5a04734 1563 {
93c80368
NB
1564 case CPP_GREATER: return c == '>' || c == '?';
1565 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1566 case CPP_PLUS: return c == '+';
1567 case CPP_MINUS: return c == '-' || c == '>';
1568 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1569 case CPP_MOD: return c == ':' || c == '>';
1570 case CPP_AND: return c == '&';
1571 case CPP_OR: return c == '|';
1572 case CPP_COLON: return c == ':' || c == '>';
1573 case CPP_DEREF: return c == '*';
26ec42ee 1574 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1575 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1576 case CPP_NAME: return ((b == CPP_NUMBER
1577 && name_p (pfile, &token2->val.str))
1578 || b == CPP_NAME
1579 || b == CPP_CHAR || b == CPP_STRING); /* L */
1580 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1581 || c == '.' || c == '+' || c == '-');
1582 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1583 && token1->val.c == '@'
93c80368
NB
1584 && (b == CPP_NAME || b == CPP_STRING));
1585 default: break;
c5a04734 1586 }
c5a04734 1587
417f3e3a 1588 return 0;
c5a04734
ZW
1589}
1590
93c80368 1591/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1592 character, to FP. Leading whitespace is removed. If there are
1593 macros, special token padding is not performed. */
c5a04734 1594void
93c80368 1595cpp_output_line (pfile, fp)
c5a04734 1596 cpp_reader *pfile;
93c80368 1597 FILE *fp;
c5a04734 1598{
4ed5bcfb 1599 const cpp_token *token;
96be6998 1600
4ed5bcfb
NB
1601 token = cpp_get_token (pfile);
1602 while (token->type != CPP_EOF)
96be6998 1603 {
4ed5bcfb
NB
1604 cpp_output_token (token, fp);
1605 token = cpp_get_token (pfile);
1606 if (token->flags & PREV_WHITE)
1607 putc (' ', fp);
96be6998
ZW
1608 }
1609
93c80368 1610 putc ('\n', fp);
041c3194 1611}
c5a04734 1612
c8a96070
NB
1613/* Returns the value of a hexadecimal digit. */
1614static unsigned int
1615hex_digit_value (c)
1616 unsigned int c;
1617{
9e1ac915
KG
1618 if (hex_p (c))
1619 return hex_value (c);
1620 else
1621 abort ();
c8a96070
NB
1622}
1623
62729350
NB
1624/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1625 failure if cpplib is not parsing C++ or C99. Such failure is
1626 silent, and no variables are updated. Otherwise returns 0, and
1627 warns if -Wtraditional.
c8a96070
NB
1628
1629 [lex.charset]: The character designated by the universal character
1630 name \UNNNNNNNN is that character whose character short name in
1631 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1632 universal character name \uNNNN is that character whose character
1633 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1634 for a universal character name is less than 0x20 or in the range
1635 0x7F-0x9F (inclusive), or if the universal character name
1636 designates a character in the basic source character set, then the
1637 program is ill-formed.
1638
1639 We assume that wchar_t is Unicode, so we don't need to do any
62729350 1640 mapping. Is this ever wrong?
c8a96070 1641
62729350
NB
1642 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1643 LIMIT is the end of the string or charconst. PSTR is updated to
1644 point after the UCS on return, and the UCS is written into PC. */
1645
1646static int
1647maybe_read_ucs (pfile, pstr, limit, pc)
c8a96070
NB
1648 cpp_reader *pfile;
1649 const unsigned char **pstr;
1650 const unsigned char *limit;
625458d0 1651 cppchar_t *pc;
c8a96070
NB
1652{
1653 const unsigned char *p = *pstr;
62729350
NB
1654 unsigned int code = 0;
1655 unsigned int c = *pc, length;
1656
1657 /* Only attempt to interpret a UCS for C++ and C99. */
1658 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1659 return 1;
c8a96070 1660
62729350 1661 if (CPP_WTRADITIONAL (pfile))
ebef4e8c
NB
1662 cpp_error (pfile, DL_WARNING,
1663 "the meaning of '\\%c' is different in traditional C", c);
c8a96070 1664
f8710242
NB
1665 length = (c == 'u' ? 4: 8);
1666
1667 if ((size_t) (limit - p) < length)
1668 {
ebef4e8c 1669 cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
f8710242
NB
1670 /* Skip to the end to avoid more diagnostics. */
1671 p = limit;
1672 }
1673 else
1674 {
1675 for (; length; length--, p++)
c8a96070 1676 {
f8710242
NB
1677 c = *p;
1678 if (ISXDIGIT (c))
1679 code = (code << 4) + hex_digit_value (c);
1680 else
1681 {
ebef4e8c 1682 cpp_error (pfile, DL_ERROR,
f8710242
NB
1683 "non-hex digit '%c' in universal-character-name", c);
1684 /* We shouldn't skip in case there are multibyte chars. */
1685 break;
1686 }
c8a96070 1687 }
c8a96070
NB
1688 }
1689
1690#ifdef TARGET_EBCDIC
ebef4e8c 1691 cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
c8a96070
NB
1692 code = 0x3f; /* EBCDIC invalid character */
1693#else
f8710242
NB
1694 /* True extended characters are OK. */
1695 if (code >= 0xa0
1696 && !(code & 0x80000000)
1697 && !(code >= 0xD800 && code <= 0xDFFF))
1698 ;
1699 /* The standard permits $, @ and ` to be specified as UCNs. We use
1700 hex escapes so that this also works with EBCDIC hosts. */
1701 else if (code == 0x24 || code == 0x40 || code == 0x60)
1702 ;
1703 /* Don't give another error if one occurred above. */
1704 else if (length == 0)
ebef4e8c 1705 cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
c8a96070
NB
1706#endif
1707
1708 *pstr = p;
62729350
NB
1709 *pc = code;
1710 return 0;
c8a96070
NB
1711}
1712
4268e8bb
NB
1713/* Returns the value of an escape sequence, truncated to the correct
1714 target precision. PSTR points to the input pointer, which is just
1715 after the backslash. LIMIT is how much text we have. WIDE is true
1716 if the escape sequence is part of a wide character constant or
1717 string literal. Handles all relevant diagnostics. */
1718cppchar_t
1719cpp_parse_escape (pfile, pstr, limit, wide)
c8a96070
NB
1720 cpp_reader *pfile;
1721 const unsigned char **pstr;
1722 const unsigned char *limit;
4268e8bb 1723 int wide;
c8a96070
NB
1724{
1725 int unknown = 0;
1726 const unsigned char *str = *pstr;
4268e8bb
NB
1727 cppchar_t c, mask;
1728 unsigned int width;
1729
1730 if (wide)
1731 width = CPP_OPTION (pfile, wchar_precision);
1732 else
1733 width = CPP_OPTION (pfile, char_precision);
1734 if (width < BITS_PER_CPPCHAR_T)
1735 mask = ((cppchar_t) 1 << width) - 1;
1736 else
1737 mask = ~0;
c8a96070 1738
4268e8bb 1739 c = *str++;
c8a96070
NB
1740 switch (c)
1741 {
1742 case '\\': case '\'': case '"': case '?': break;
1743 case 'b': c = TARGET_BS; break;
1744 case 'f': c = TARGET_FF; break;
1745 case 'n': c = TARGET_NEWLINE; break;
1746 case 'r': c = TARGET_CR; break;
1747 case 't': c = TARGET_TAB; break;
1748 case 'v': c = TARGET_VT; break;
1749
1750 case '(': case '{': case '[': case '%':
1751 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1752 '\%' is used to prevent SCCS from getting confused. */
1753 unknown = CPP_PEDANTIC (pfile);
1754 break;
1755
1756 case 'a':
1757 if (CPP_WTRADITIONAL (pfile))
ebef4e8c
NB
1758 cpp_error (pfile, DL_WARNING,
1759 "the meaning of '\\a' is different in traditional C");
001e3fee 1760 c = TARGET_BELL;
c8a96070
NB
1761 break;
1762
1763 case 'e': case 'E':
1764 if (CPP_PEDANTIC (pfile))
ebef4e8c 1765 cpp_error (pfile, DL_PEDWARN,
625458d0 1766 "non-ISO-standard escape sequence, '\\%c'", (int) c);
c8a96070
NB
1767 c = TARGET_ESC;
1768 break;
1769
c8a96070 1770 case 'u': case 'U':
62729350 1771 unknown = maybe_read_ucs (pfile, &str, limit, &c);
c8a96070
NB
1772 break;
1773
1774 case 'x':
1775 if (CPP_WTRADITIONAL (pfile))
ebef4e8c
NB
1776 cpp_error (pfile, DL_WARNING,
1777 "the meaning of '\\x' is different in traditional C");
c8a96070 1778
c8a96070 1779 {
4268e8bb 1780 cppchar_t i = 0, overflow = 0;
c8a96070
NB
1781 int digits_found = 0;
1782
1783 while (str < limit)
1784 {
1785 c = *str;
1786 if (! ISXDIGIT (c))
1787 break;
1788 str++;
1789 overflow |= i ^ (i << 4 >> 4);
1790 i = (i << 4) + hex_digit_value (c);
1791 digits_found = 1;
1792 }
1793
1794 if (!digits_found)
ebef4e8c
NB
1795 cpp_error (pfile, DL_ERROR,
1796 "\\x used with no following hex digits");
c8a96070
NB
1797
1798 if (overflow | (i != (i & mask)))
1799 {
ebef4e8c
NB
1800 cpp_error (pfile, DL_PEDWARN,
1801 "hex escape sequence out of range");
c8a96070
NB
1802 i &= mask;
1803 }
1804 c = i;
1805 }
1806 break;
1807
1808 case '0': case '1': case '2': case '3':
1809 case '4': case '5': case '6': case '7':
1810 {
4268e8bb
NB
1811 size_t count = 0;
1812 cppchar_t i = c - '0';
c8a96070
NB
1813
1814 while (str < limit && ++count < 3)
1815 {
1816 c = *str;
1817 if (c < '0' || c > '7')
1818 break;
1819 str++;
1820 i = (i << 3) + c - '0';
1821 }
1822
1823 if (i != (i & mask))
1824 {
ebef4e8c
NB
1825 cpp_error (pfile, DL_PEDWARN,
1826 "octal escape sequence out of range");
c8a96070
NB
1827 i &= mask;
1828 }
1829 c = i;
1830 }
1831 break;
1832
1833 default:
1834 unknown = 1;
1835 break;
1836 }
1837
1838 if (unknown)
1839 {
1840 if (ISGRAPH (c))
625458d0
NB
1841 cpp_error (pfile, DL_PEDWARN,
1842 "unknown escape sequence '\\%c'", (int) c);
c8a96070 1843 else
625458d0
NB
1844 cpp_error (pfile, DL_PEDWARN,
1845 "unknown escape sequence: '\\%03o'", (int) c);
c8a96070
NB
1846 }
1847
62729350 1848 if (c > mask)
4268e8bb 1849 {
639e8b0c 1850 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
4268e8bb
NB
1851 c &= mask;
1852 }
62729350 1853
c8a96070
NB
1854 *pstr = str;
1855 return c;
1856}
1857
c8a96070 1858/* Interpret a (possibly wide) character constant in TOKEN.
4268e8bb
NB
1859 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1860 points to a variable that is filled in with the number of
1861 characters seen, and UNSIGNEDP to a variable that indicates whether
1862 the result has signed type. */
1863cppchar_t
a5a49440 1864cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
c8a96070
NB
1865 cpp_reader *pfile;
1866 const cpp_token *token;
c8a96070 1867 unsigned int *pchars_seen;
4268e8bb 1868 int *unsignedp;
c8a96070
NB
1869{
1870 const unsigned char *str = token->val.str.text;
1871 const unsigned char *limit = str + token->val.str.len;
1872 unsigned int chars_seen = 0;
639e8b0c 1873 size_t width, max_chars;
4268e8bb 1874 cppchar_t c, mask, result = 0;
a47ed310 1875 bool unsigned_p;
c8a96070
NB
1876
1877#ifdef MULTIBYTE_CHARS
1878 (void) local_mbtowc (NULL, NULL, 0);
1879#endif
1880
1881 /* Width in bits. */
1882 if (token->type == CPP_CHAR)
a47ed310 1883 {
4268e8bb 1884 width = CPP_OPTION (pfile, char_precision);
2443d4e1 1885 max_chars = CPP_OPTION (pfile, int_precision) / width;
44a147ad 1886 unsigned_p = CPP_OPTION (pfile, unsigned_char);
a47ed310 1887 }
c8a96070 1888 else
a47ed310 1889 {
4268e8bb 1890 width = CPP_OPTION (pfile, wchar_precision);
2443d4e1 1891 max_chars = 1;
44a147ad 1892 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
a47ed310 1893 }
c8a96070 1894
4268e8bb
NB
1895 if (width < BITS_PER_CPPCHAR_T)
1896 mask = ((cppchar_t) 1 << width) - 1;
c8a96070
NB
1897 else
1898 mask = ~0;
c8a96070
NB
1899
1900 while (str < limit)
1901 {
1902#ifdef MULTIBYTE_CHARS
1903 wchar_t wc;
1904 int char_len;
1905
1906 char_len = local_mbtowc (&wc, str, limit - str);
1907 if (char_len == -1)
1908 {
ebef4e8c
NB
1909 cpp_error (pfile, DL_WARNING,
1910 "ignoring invalid multibyte character");
c8a96070
NB
1911 c = *str++;
1912 }
1913 else
1914 {
1915 str += char_len;
1916 c = wc;
1917 }
1918#else
1919 c = *str++;
1920#endif
1921
1922 if (c == '\\')
4268e8bb 1923 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
c8a96070
NB
1924
1925#ifdef MAP_CHARACTER
1926 if (ISPRINT (c))
1927 c = MAP_CHARACTER (c);
1928#endif
1929
639e8b0c
NB
1930 chars_seen++;
1931
a5a49440
NB
1932 /* Truncate the character, scale the result and merge the two. */
1933 c &= mask;
639e8b0c 1934 if (width < BITS_PER_CPPCHAR_T)
a5a49440 1935 result = (result << width) | c;
639e8b0c
NB
1936 else
1937 result = c;
c8a96070
NB
1938 }
1939
1940 if (chars_seen == 0)
ebef4e8c 1941 cpp_error (pfile, DL_ERROR, "empty character constant");
639e8b0c 1942 else if (chars_seen > 1)
c8a96070 1943 {
639e8b0c
NB
1944 /* Multichar charconsts are of type int and therefore signed. */
1945 unsigned_p = 0;
a5a49440 1946
639e8b0c
NB
1947 if (chars_seen > max_chars)
1948 {
1949 chars_seen = max_chars;
1950 cpp_error (pfile, DL_WARNING,
1951 "character constant too long for its type");
1952 }
a5a49440 1953 else if (CPP_OPTION (pfile, warn_multichar))
639e8b0c 1954 cpp_error (pfile, DL_WARNING, "multi-character character constant");
c8a96070
NB
1955 }
1956
b9e2d17b
NB
1957 /* Sign-extend or truncate the constant to cppchar_t. The value is
1958 in WIDTH bits, but for multi-char charconsts it's value is the
1959 full target type's width. */
1960 if (chars_seen > 1)
1961 width *= max_chars;
1962 if (width < BITS_PER_CPPCHAR_T)
a5a49440 1963 {
b9e2d17b
NB
1964 mask = ((cppchar_t) 1 << width) - 1;
1965 if (unsigned_p || !(result & (1 << (width - 1))))
1966 result &= mask;
1967 else
1968 result |= ~mask;
a5a49440
NB
1969 }
1970
c8a96070 1971 *pchars_seen = chars_seen;
4268e8bb 1972 *unsignedp = unsigned_p;
c8a96070
NB
1973 return result;
1974}
1975
1e013d2e
NB
1976/* Memory buffers. Changing these three constants can have a dramatic
1977 effect on performance. The values here are reasonable defaults,
1978 but might be tuned. If you adjust them, be sure to test across a
1979 range of uses of cpplib, including heavy nested function-like macro
1980 expansion. Also check the change in peak memory usage (NJAMD is a
1981 good tool for this). */
1982#define MIN_BUFF_SIZE 8000
87062813 1983#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1e013d2e
NB
1984#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1985 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 1986
87062813
NB
1987#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1988 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1989#endif
1990
93c80368 1991struct dummy
417f3e3a 1992{
93c80368
NB
1993 char c;
1994 union
1995 {
1996 double d;
1997 int *p;
1998 } u;
1999};
417f3e3a 2000
93c80368 2001#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
b8af0ca5
NB
2002#define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2003
c9e7a609
NB
2004/* Create a new allocation buffer. Place the control block at the end
2005 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5
NB
2006static _cpp_buff *
2007new_buff (len)
6142088c 2008 size_t len;
b8af0ca5
NB
2009{
2010 _cpp_buff *result;
ece54d54 2011 unsigned char *base;
b8af0ca5 2012
1e013d2e
NB
2013 if (len < MIN_BUFF_SIZE)
2014 len = MIN_BUFF_SIZE;
b8af0ca5
NB
2015 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2016
2017 base = xmalloc (len + sizeof (_cpp_buff));
2018 result = (_cpp_buff *) (base + len);
2019 result->base = base;
2020 result->cur = base;
2021 result->limit = base + len;
2022 result->next = NULL;
2023 return result;
2024}
2025
2026/* Place a chain of unwanted allocation buffers on the free list. */
2027void
2028_cpp_release_buff (pfile, buff)
2029 cpp_reader *pfile;
2030 _cpp_buff *buff;
2031{
2032 _cpp_buff *end = buff;
2033
2034 while (end->next)
2035 end = end->next;
2036 end->next = pfile->free_buffs;
2037 pfile->free_buffs = buff;
2038}
2039
2040/* Return a free buffer of size at least MIN_SIZE. */
2041_cpp_buff *
2042_cpp_get_buff (pfile, min_size)
2043 cpp_reader *pfile;
6142088c 2044 size_t min_size;
b8af0ca5
NB
2045{
2046 _cpp_buff *result, **p;
2047
2048 for (p = &pfile->free_buffs;; p = &(*p)->next)
2049 {
6142088c 2050 size_t size;
1e013d2e
NB
2051
2052 if (*p == NULL)
b8af0ca5 2053 return new_buff (min_size);
1e013d2e
NB
2054 result = *p;
2055 size = result->limit - result->base;
2056 /* Return a buffer that's big enough, but don't waste one that's
2057 way too big. */
34f5271d 2058 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
2059 break;
2060 }
2061
2062 *p = result->next;
2063 result->next = NULL;
2064 result->cur = result->base;
2065 return result;
2066}
2067
4fe9b91c 2068/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2069 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2070 the excess bytes to the new buffer. Chains the new buffer after
2071 BUFF, and returns the new buffer. */
b8af0ca5 2072_cpp_buff *
8c3b2693 2073_cpp_append_extend_buff (pfile, buff, min_extra)
b8af0ca5
NB
2074 cpp_reader *pfile;
2075 _cpp_buff *buff;
6142088c 2076 size_t min_extra;
b8af0ca5 2077{
6142088c 2078 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 2079 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 2080
8c3b2693
NB
2081 buff->next = new_buff;
2082 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2083 return new_buff;
2084}
2085
4fe9b91c 2086/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2087 remaining bytes of the buffer pointed to by BUFF, and at least
2088 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2089 Chains the new buffer before the buffer pointed to by BUFF, and
2090 updates the pointer to point to the new buffer. */
2091void
2092_cpp_extend_buff (pfile, pbuff, min_extra)
2093 cpp_reader *pfile;
2094 _cpp_buff **pbuff;
2095 size_t min_extra;
2096{
2097 _cpp_buff *new_buff, *old_buff = *pbuff;
2098 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2099
2100 new_buff = _cpp_get_buff (pfile, size);
2101 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2102 new_buff->next = old_buff;
2103 *pbuff = new_buff;
b8af0ca5
NB
2104}
2105
2106/* Free a chain of buffers starting at BUFF. */
2107void
2108_cpp_free_buff (buff)
2109 _cpp_buff *buff;
2110{
2111 _cpp_buff *next;
2112
2113 for (; buff; buff = next)
2114 {
2115 next = buff->next;
2116 free (buff->base);
2117 }
2118}
417f3e3a 2119
ece54d54
NB
2120/* Allocate permanent, unaligned storage of length LEN. */
2121unsigned char *
2122_cpp_unaligned_alloc (pfile, len)
2123 cpp_reader *pfile;
2124 size_t len;
2125{
2126 _cpp_buff *buff = pfile->u_buff;
2127 unsigned char *result = buff->cur;
2128
2129 if (len > (size_t) (buff->limit - result))
2130 {
2131 buff = _cpp_get_buff (pfile, len);
2132 buff->next = pfile->u_buff;
2133 pfile->u_buff = buff;
2134 result = buff->cur;
2135 }
2136
2137 buff->cur = result + len;
2138 return result;
2139}
2140
87062813
NB
2141/* Allocate permanent, unaligned storage of length LEN from a_buff.
2142 That buffer is used for growing allocations when saving macro
2143 replacement lists in a #define, and when parsing an answer to an
2144 assertion in #assert, #unassert or #if (and therefore possibly
2145 whilst expanding macros). It therefore must not be used by any
2146 code that they might call: specifically the lexer and the guts of
2147 the macro expander.
2148
2149 All existing other uses clearly fit this restriction: storing
2150 registered pragmas during initialization. */
93c80368 2151unsigned char *
8c3b2693
NB
2152_cpp_aligned_alloc (pfile, len)
2153 cpp_reader *pfile;
2154 size_t len;
3fef5b2b 2155{
8c3b2693
NB
2156 _cpp_buff *buff = pfile->a_buff;
2157 unsigned char *result = buff->cur;
3fef5b2b 2158
8c3b2693 2159 if (len > (size_t) (buff->limit - result))
3fef5b2b 2160 {
8c3b2693
NB
2161 buff = _cpp_get_buff (pfile, len);
2162 buff->next = pfile->a_buff;
2163 pfile->a_buff = buff;
2164 result = buff->cur;
3fef5b2b 2165 }
041c3194 2166
8c3b2693 2167 buff->cur = result + len;
93c80368 2168 return result;
041c3194 2169}