]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/cpplex.c
re PR c++/4934 (Seg fault on legal code)
[thirdparty/gcc.git] / gcc / cpplex.c
CommitLineData
45b966db 1/* CPP Library - lexical analysis.
5d8ebbd8 2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
45b966db
ZW
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
c5a04734 7 Single-pass line tokenization by Neil Booth, April 2000
45b966db
ZW
8
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#include "config.h"
24#include "system.h"
45b966db
ZW
25#include "cpplib.h"
26#include "cpphash.h"
27
c8a96070
NB
28/* MULTIBYTE_CHARS support only works for native compilers.
29 ??? Ideally what we want is to model widechar support after
30 the current floating point support. */
31#ifdef CROSS_COMPILE
32#undef MULTIBYTE_CHARS
33#endif
34
35#ifdef MULTIBYTE_CHARS
36#include "mbchar.h"
37#include <locale.h>
38#endif
39
93c80368
NB
40/* Tokens with SPELL_STRING store their spelling in the token list,
41 and it's length in the token->val.name.len. */
42enum spell_type
f9a0e96c 43{
93c80368
NB
44 SPELL_OPERATOR = 0,
45 SPELL_CHAR,
46 SPELL_IDENT,
47ad4138 47 SPELL_NUMBER,
93c80368
NB
48 SPELL_STRING,
49 SPELL_NONE
f9a0e96c
ZW
50};
51
93c80368 52struct token_spelling
f9a0e96c 53{
93c80368
NB
54 enum spell_type category;
55 const unsigned char *name;
f9a0e96c
ZW
56};
57
8206c799
ZW
58static const unsigned char *const digraph_spellings[] =
59{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
93c80368
NB
60
61#define OP(e, s) { SPELL_OPERATOR, U s },
62#define TK(e, s) { s, U STRINGX (e) },
8206c799 63static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
64#undef OP
65#undef TK
66
67#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
68#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
480709cc 69#define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
f2d5f0cc 70
87062813
NB
71static void handle_newline PARAMS ((cpp_reader *));
72static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
29401c30 73static cppchar_t get_effective_char PARAMS ((cpp_reader *));
0d9f234d 74
041c3194 75static int skip_block_comment PARAMS ((cpp_reader *));
cbcff6df 76static int skip_line_comment PARAMS ((cpp_reader *));
0d9f234d 77static void adjust_column PARAMS ((cpp_reader *));
4d6baafa 78static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
2c3fcba6 79static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
10cf9bde
NB
80static U_CHAR *parse_slow PARAMS ((cpp_reader *, const U_CHAR *, int,
81 unsigned int *));
82static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
93c80368 83static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
0d9f234d 84static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
87062813 85static bool trigraph_p PARAMS ((cpp_reader *));
0d9f234d 86static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
93c80368 87static int name_p PARAMS ((cpp_reader *, const cpp_string *));
62729350
NB
88static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
89 const unsigned char *, unsigned int *));
5fddcffc 90static tokenrun *next_tokenrun PARAMS ((tokenrun *));
f617b8e2 91
c8a96070 92static unsigned int hex_digit_value PARAMS ((unsigned int));
6142088c 93static _cpp_buff *new_buff PARAMS ((size_t));
15dad1d9 94
041c3194 95/* Utility routine:
9e62c811 96
bfb9dc7f
ZW
97 Compares, the token TOKEN to the NUL-terminated string STRING.
98 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
041c3194 99int
bfb9dc7f
ZW
100cpp_ideq (token, string)
101 const cpp_token *token;
041c3194
ZW
102 const char *string;
103{
bfb9dc7f 104 if (token->type != CPP_NAME)
041c3194 105 return 0;
bfb9dc7f 106
a28c5035 107 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
15dad1d9 108}
1368ee70 109
87062813
NB
110/* Call when meeting a newline, assumed to be in buffer->cur[-1].
111 Returns with buffer->cur pointing to the character immediately
112 following the newline (combination). */
113static void
114handle_newline (pfile)
1444f2ed 115 cpp_reader *pfile;
0d9f234d 116{
87062813 117 cpp_buffer *buffer = pfile->buffer;
0d9f234d 118
87062813 119 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
4d6baafa
NB
120 only accept CR-LF; maybe we should fall back to that behaviour? */
121 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
87062813 122 buffer->cur++;
0d9f234d 123
87062813
NB
124 buffer->line_base = buffer->cur;
125 buffer->col_adjust = 0;
126 pfile->line++;
0d9f234d
NB
127}
128
87062813
NB
129/* Subroutine of skip_escaped_newlines; called when a 3-character
130 sequence beginning with "??" is encountered. buffer->cur points to
131 the second '?'.
132
133 Warn if necessary, and returns true if the sequence forms a
134 trigraph and the trigraph should be honoured. */
135static bool
136trigraph_p (pfile)
45b966db 137 cpp_reader *pfile;
45b966db 138{
87062813
NB
139 cpp_buffer *buffer = pfile->buffer;
140 cppchar_t from_char = buffer->cur[1];
141 bool accept;
142
143 if (!_cpp_trigraph_map[from_char])
144 return false;
145
146 accept = CPP_OPTION (pfile, trigraphs);
147
cbcff6df
NB
148 /* Don't warn about trigraphs in comments. */
149 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
45b966db 150 {
041c3194 151 if (accept)
87062813 152 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 1,
041c3194 153 "trigraph ??%c converted to %c",
0d9f234d
NB
154 (int) from_char,
155 (int) _cpp_trigraph_map[from_char]);
4a5b68a2
NB
156 else if (buffer->cur != buffer->last_Wtrigraphs)
157 {
158 buffer->last_Wtrigraphs = buffer->cur;
67821e3a 159 cpp_warning_with_line (pfile, pfile->line,
87062813 160 CPP_BUF_COL (buffer) - 1,
4a5b68a2
NB
161 "trigraph ??%c ignored", (int) from_char);
162 }
45b966db 163 }
0d9f234d 164
041c3194 165 return accept;
45b966db
ZW
166}
167
87062813 168/* Skips any escaped newlines introduced by '?' or a '\\', assumed to
480709cc
NB
169 lie in buffer->cur[-1]. Returns the next byte, which will be in
170 buffer->cur[-1]. This routine performs preprocessing stages 1 and
171 2 of the ISO C standard. */
0d9f234d 172static cppchar_t
87062813 173skip_escaped_newlines (pfile)
29401c30 174 cpp_reader *pfile;
45b966db 175{
29401c30 176 cpp_buffer *buffer = pfile->buffer;
87062813 177 cppchar_t next = buffer->cur[-1];
29401c30 178
a5c3cccd
NB
179 /* Only do this if we apply stages 1 and 2. */
180 if (!buffer->from_stage3)
041c3194 181 {
a5c3cccd 182 const unsigned char *saved_cur;
87062813 183 cppchar_t next1;
a5c3cccd
NB
184
185 do
0d9f234d 186 {
a5c3cccd 187 if (next == '?')
0d9f234d 188 {
4d6baafa 189 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
87062813 190 break;
a5c3cccd 191
87062813
NB
192 /* Translate the trigraph. */
193 next = _cpp_trigraph_map[buffer->cur[1]];
194 buffer->cur += 2;
4d6baafa 195 if (next != '\\')
a5c3cccd 196 break;
a5c3cccd
NB
197 }
198
4d6baafa
NB
199 if (buffer->cur == buffer->rlimit)
200 break;
201
87062813
NB
202 /* We have a backslash, and room for at least one more
203 character. Skip horizontal whitespace. */
204 saved_cur = buffer->cur;
a5c3cccd 205 do
87062813
NB
206 next1 = *buffer->cur++;
207 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
041c3194 208
a5c3cccd 209 if (!is_vspace (next1))
0d9f234d 210 {
87062813 211 buffer->cur = saved_cur;
0d9f234d
NB
212 break;
213 }
45b966db 214
87062813
NB
215 if (saved_cur != buffer->cur - 1
216 && !pfile->state.lexing_comment)
29401c30 217 cpp_warning (pfile, "backslash and newline separated by space");
0d9f234d 218
87062813 219 handle_newline (pfile);
480709cc 220 buffer->backup_to = buffer->cur;
87062813
NB
221 if (buffer->cur == buffer->rlimit)
222 {
223 cpp_pedwarn (pfile, "backslash-newline at end of file");
224 next = EOF;
225 }
226 else
227 next = *buffer->cur++;
0d9f234d 228 }
a5c3cccd 229 while (next == '\\' || next == '?');
041c3194 230 }
45b966db 231
0d9f234d 232 return next;
45b966db
ZW
233}
234
0d9f234d 235/* Obtain the next character, after trigraph conversion and skipping
87062813
NB
236 an arbitrarily long string of escaped newlines. The common case of
237 no trigraphs or escaped newlines falls through quickly. On return,
480709cc
NB
238 buffer->backup_to points to where to return to if the character is
239 not to be processed. */
0d9f234d 240static cppchar_t
29401c30
NB
241get_effective_char (pfile)
242 cpp_reader *pfile;
64aaf407 243{
4d6baafa 244 cppchar_t next;
480709cc 245 cpp_buffer *buffer = pfile->buffer;
0d9f234d 246
480709cc 247 buffer->backup_to = buffer->cur;
4d6baafa
NB
248 next = *buffer->cur++;
249 if (__builtin_expect (next == '?' || next == '\\', 0))
250 next = skip_escaped_newlines (pfile);
0d9f234d 251
480709cc 252 return next;
64aaf407
NB
253}
254
0d9f234d
NB
255/* Skip a C-style block comment. We find the end of the comment by
256 seeing if an asterisk is before every '/' we encounter. Returns
257 non-zero if comment terminated by EOF, zero otherwise. */
041c3194
ZW
258static int
259skip_block_comment (pfile)
45b966db
ZW
260 cpp_reader *pfile;
261{
041c3194 262 cpp_buffer *buffer = pfile->buffer;
d8090680 263 cppchar_t c = EOF, prevc = EOF;
0d9f234d 264
cbcff6df 265 pfile->state.lexing_comment = 1;
0d9f234d 266 while (buffer->cur != buffer->rlimit)
45b966db 267 {
0d9f234d
NB
268 prevc = c, c = *buffer->cur++;
269
0d9f234d 270 /* FIXME: For speed, create a new character class of characters
93c80368 271 of interest inside block comments. */
0d9f234d 272 if (c == '?' || c == '\\')
87062813 273 c = skip_escaped_newlines (pfile);
041c3194 274
0d9f234d
NB
275 /* People like decorating comments with '*', so check for '/'
276 instead for efficiency. */
041c3194 277 if (c == '/')
45b966db 278 {
0d9f234d
NB
279 if (prevc == '*')
280 break;
041c3194 281
0d9f234d 282 /* Warn about potential nested comments, but not if the '/'
a1f300c0 283 comes immediately before the true comment delimiter.
041c3194 284 Don't bother to get it right across escaped newlines. */
0d9f234d 285 if (CPP_OPTION (pfile, warn_comments)
87062813
NB
286 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
287 cpp_warning_with_line (pfile,
288 pfile->line, CPP_BUF_COL (buffer),
289 "\"/*\" within comment");
45b966db 290 }
91fcd158 291 else if (is_vspace (c))
87062813 292 handle_newline (pfile);
52fadca8 293 else if (c == '\t')
0d9f234d 294 adjust_column (pfile);
45b966db 295 }
041c3194 296
cbcff6df 297 pfile->state.lexing_comment = 0;
0d9f234d 298 return c != '/' || prevc != '*';
45b966db
ZW
299}
300
480709cc
NB
301/* Skip a C++ line comment, leaving buffer->cur pointing to the
302 terminating newline. Handles escaped newlines. Returns non-zero
303 if a multiline comment. */
041c3194 304static int
cbcff6df
NB
305skip_line_comment (pfile)
306 cpp_reader *pfile;
45b966db 307{
cbcff6df 308 cpp_buffer *buffer = pfile->buffer;
67821e3a 309 unsigned int orig_line = pfile->line;
0d9f234d 310 cppchar_t c;
041c3194 311
cbcff6df 312 pfile->state.lexing_comment = 1;
0d9f234d 313 do
041c3194 314 {
0d9f234d 315 if (buffer->cur == buffer->rlimit)
480709cc 316 goto at_eof;
041c3194 317
0d9f234d
NB
318 c = *buffer->cur++;
319 if (c == '?' || c == '\\')
87062813 320 c = skip_escaped_newlines (pfile);
041c3194 321 }
0d9f234d 322 while (!is_vspace (c));
45b966db 323
480709cc
NB
324 /* Step back over the newline, except at EOF. */
325 buffer->cur--;
326 at_eof:
327
cbcff6df 328 pfile->state.lexing_comment = 0;
67821e3a 329 return orig_line != pfile->line;
041c3194 330}
45b966db 331
0d9f234d
NB
332/* pfile->buffer->cur is one beyond the \t character. Update
333 col_adjust so we track the column correctly. */
52fadca8 334static void
0d9f234d 335adjust_column (pfile)
52fadca8 336 cpp_reader *pfile;
52fadca8 337{
0d9f234d
NB
338 cpp_buffer *buffer = pfile->buffer;
339 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
52fadca8
NB
340
341 /* Round it up to multiple of the tabstop, but subtract 1 since the
342 tab itself occupies a character position. */
0d9f234d
NB
343 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
344 - col % CPP_OPTION (pfile, tabstop)) - 1;
52fadca8
NB
345}
346
0d9f234d
NB
347/* Skips whitespace, saving the next non-whitespace character.
348 Adjusts pfile->col_adjust to account for tabs. Without this,
349 tokens might be assigned an incorrect column. */
4d6baafa 350static int
0d9f234d 351skip_whitespace (pfile, c)
041c3194 352 cpp_reader *pfile;
0d9f234d 353 cppchar_t c;
041c3194
ZW
354{
355 cpp_buffer *buffer = pfile->buffer;
0d9f234d 356 unsigned int warned = 0;
45b966db 357
0d9f234d 358 do
041c3194 359 {
91fcd158
NB
360 /* Horizontal space always OK. */
361 if (c == ' ')
0d9f234d 362 ;
91fcd158 363 else if (c == '\t')
0d9f234d
NB
364 adjust_column (pfile);
365 /* Just \f \v or \0 left. */
91fcd158 366 else if (c == '\0')
041c3194 367 {
4d6baafa
NB
368 if (buffer->cur - 1 == buffer->rlimit)
369 return 0;
91fcd158 370 if (!warned)
0d9f234d
NB
371 {
372 cpp_warning (pfile, "null character(s) ignored");
373 warned = 1;
374 }
45b966db 375 }
93c80368 376 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
67821e3a 377 cpp_pedwarn_with_line (pfile, pfile->line,
91fcd158
NB
378 CPP_BUF_COL (buffer),
379 "%s in preprocessing directive",
380 c == '\f' ? "form feed" : "vertical tab");
0d9f234d 381
0d9f234d 382 c = *buffer->cur++;
45b966db 383 }
ec5c56db 384 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
385 while (is_nvspace (c));
386
480709cc 387 buffer->cur--;
4d6baafa 388 return 1;
041c3194 389}
45b966db 390
93c80368
NB
391/* See if the characters of a number token are valid in a name (no
392 '.', '+' or '-'). */
393static int
394name_p (pfile, string)
395 cpp_reader *pfile;
396 const cpp_string *string;
397{
398 unsigned int i;
399
400 for (i = 0; i < string->len; i++)
401 if (!is_idchar (string->text[i]))
402 return 0;
403
404 return 1;
405}
406
2c3fcba6
ZW
407/* Parse an identifier, skipping embedded backslash-newlines. This is
408 a critical inner loop. The common case is an identifier which has
409 not been split by backslash-newline, does not contain a dollar
410 sign, and has already been scanned (roughly 10:1 ratio of
411 seen:unseen identifiers in normal code; the distribution is
412 Poisson-like). Second most common case is a new identifier, not
413 split and no dollar sign. The other possibilities are rare and
10cf9bde 414 have been relegated to parse_slow. */
0d9f234d 415static cpp_hashnode *
2c3fcba6 416parse_identifier (pfile)
45b966db 417 cpp_reader *pfile;
45b966db 418{
93c80368 419 cpp_hashnode *result;
10cf9bde 420 const U_CHAR *cur, *base;
2c3fcba6
ZW
421
422 /* Fast-path loop. Skim over a normal identifier.
423 N.B. ISIDNUM does not include $. */
4d6baafa
NB
424 cur = pfile->buffer->cur;
425 while (ISIDNUM (*cur))
2c3fcba6 426 cur++;
2c3fcba6
ZW
427
428 /* Check for slow-path cases. */
4d6baafa 429 if (*cur == '?' || *cur == '\\' || *cur == '$')
10cf9bde
NB
430 {
431 unsigned int len;
432
433 base = parse_slow (pfile, cur, 0, &len);
434 result = (cpp_hashnode *)
435 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
436 }
2c3fcba6
ZW
437 else
438 {
10cf9bde
NB
439 base = pfile->buffer->cur - 1;
440 pfile->buffer->cur = cur;
2c3fcba6
ZW
441 result = (cpp_hashnode *)
442 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
2c3fcba6
ZW
443 }
444
445 /* Rarely, identifiers require diagnostics when lexed.
446 XXX Has to be forced out of the fast path. */
447 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
448 && !pfile->state.skipping, 0))
449 {
450 /* It is allowed to poison the same identifier twice. */
451 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
452 cpp_error (pfile, "attempt to use poisoned \"%s\"",
453 NODE_NAME (result));
454
455 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
456 replacement list of a variadic macro. */
457 if (result == pfile->spec_nodes.n__VA_ARGS__
458 && !pfile->state.va_args_ok)
459 cpp_pedwarn (pfile,
460 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
461 }
462
463 return result;
464}
465
10cf9bde
NB
466/* Slow path. This handles numbers and identifiers which have been
467 split, or contain dollar signs. The part of the token from
468 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
469 1 if it's a number, and 2 if it has a leading period. Returns a
470 pointer to the token's NUL-terminated spelling in permanent
471 storage, and sets PLEN to its length. */
472static U_CHAR *
473parse_slow (pfile, cur, number_p, plen)
2c3fcba6
ZW
474 cpp_reader *pfile;
475 const U_CHAR *cur;
10cf9bde
NB
476 int number_p;
477 unsigned int *plen;
2c3fcba6 478{
0d9f234d 479 cpp_buffer *buffer = pfile->buffer;
2c3fcba6 480 const U_CHAR *base = buffer->cur - 1;
2a967f3d 481 struct obstack *stack = &pfile->hash_table->stack;
10cf9bde
NB
482 unsigned int c, prevc, saw_dollar = 0;
483
484 /* Place any leading period. */
485 if (number_p == 2)
486 obstack_1grow (stack, '.');
2c3fcba6
ZW
487
488 /* Copy the part of the token which is known to be okay. */
489 obstack_grow (stack, base, cur - base);
041c3194 490
2c3fcba6
ZW
491 /* Now process the part which isn't. We are looking at one of
492 '$', '\\', or '?' on entry to this loop. */
10cf9bde 493 prevc = cur[-1];
2c3fcba6
ZW
494 c = *cur++;
495 buffer->cur = cur;
10cf9bde 496 for (;;)
041c3194 497 {
10cf9bde
NB
498 /* Potential escaped newline? */
499 buffer->backup_to = buffer->cur - 1;
500 if (c == '?' || c == '\\')
501 c = skip_escaped_newlines (pfile);
502
503 if (!is_idchar (c))
504 {
505 if (!number_p)
506 break;
507 if (c != '.' && !VALID_SIGN (c, prevc))
508 break;
509 }
510
511 /* Handle normal identifier characters in this loop. */
512 do
2c3fcba6 513 {
10cf9bde 514 prevc = c;
2c3fcba6 515 obstack_1grow (stack, c);
45b966db 516
2c3fcba6
ZW
517 if (c == '$')
518 saw_dollar++;
ba89d661 519
2c3fcba6
ZW
520 c = *buffer->cur++;
521 }
10cf9bde 522 while (is_idchar (c));
041c3194 523 }
0d9f234d 524
4d6baafa 525 /* Step back over the unwanted char. */
480709cc 526 BACKUP ();
93c80368 527
4fe9b91c 528 /* $ is not an identifier character in the standard, but is commonly
0d9f234d
NB
529 accepted as an extension. Don't warn about it in skipped
530 conditional blocks. */
cef0d199 531 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
10cf9bde 532 cpp_pedwarn (pfile, "'$' character(s) in identifier or number");
0d9f234d 533
10cf9bde
NB
534 /* Identifiers and numbers are null-terminated. */
535 *plen = obstack_object_size (stack);
2a967f3d 536 obstack_1grow (stack, '\0');
10cf9bde 537 return obstack_finish (stack);
45b966db
ZW
538}
539
5d8ebbd8
NB
540/* Parse a number, beginning with character C, skipping embedded
541 backslash-newlines. LEADING_PERIOD is non-zero if there was a "."
542 before C. Place the result in NUMBER. */
45b966db 543static void
10cf9bde 544parse_number (pfile, number, leading_period)
45b966db 545 cpp_reader *pfile;
0d9f234d 546 cpp_string *number;
93c80368 547 int leading_period;
45b966db 548{
10cf9bde 549 const U_CHAR *cur;
45b966db 550
10cf9bde
NB
551 /* Fast-path loop. Skim over a normal number.
552 N.B. ISIDNUM does not include $. */
553 cur = pfile->buffer->cur;
554 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
555 cur++;
cbcff6df 556
10cf9bde
NB
557 /* Check for slow-path cases. */
558 if (*cur == '?' || *cur == '\\' || *cur == '$')
559 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
560 else
041c3194 561 {
10cf9bde
NB
562 const U_CHAR *base = pfile->buffer->cur - 1;
563 U_CHAR *dest;
0d9f234d 564
10cf9bde
NB
565 number->len = cur - base + leading_period;
566 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
567 dest[number->len] = '\0';
568 number->text = dest;
45b966db 569
10cf9bde
NB
570 if (leading_period)
571 *dest++ = '.';
572 memcpy (dest, base, cur - base);
573 pfile->buffer->cur = cur;
45b966db 574 }
0d9f234d
NB
575}
576
93c80368
NB
577/* Subroutine of parse_string. */
578static int
579unescaped_terminator_p (pfile, dest)
580 cpp_reader *pfile;
581 const unsigned char *dest;
582{
583 const unsigned char *start, *temp;
584
585 /* In #include-style directives, terminators are not escapeable. */
586 if (pfile->state.angled_headers)
587 return 1;
588
ece54d54 589 start = BUFF_FRONT (pfile->u_buff);
93c80368
NB
590
591 /* An odd number of consecutive backslashes represents an escaped
592 terminator. */
593 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
594 ;
595
596 return ((dest - temp) & 1) == 0;
597}
598
0d9f234d 599/* Parses a string, character constant, or angle-bracketed header file
7868b4a2
NB
600 name. Handles embedded trigraphs and escaped newlines. The stored
601 string is guaranteed NUL-terminated, but it is not guaranteed that
602 this is the first NUL since embedded NULs are preserved.
45b966db 603
87062813
NB
604 When this function returns, buffer->cur points to the next
605 character to be processed. */
041c3194 606static void
0d9f234d 607parse_string (pfile, token, terminator)
45b966db 608 cpp_reader *pfile;
041c3194 609 cpp_token *token;
0d9f234d 610 cppchar_t terminator;
45b966db 611{
041c3194 612 cpp_buffer *buffer = pfile->buffer;
93c80368 613 unsigned char *dest, *limit;
0d9f234d 614 cppchar_t c;
d4e6133f 615 bool warned_nulls = false;
0d9f234d 616
ece54d54
NB
617 dest = BUFF_FRONT (pfile->u_buff);
618 limit = BUFF_LIMIT (pfile->u_buff);
93c80368 619
0d9f234d 620 for (;;)
45b966db 621 {
87062813 622 /* We need room for another char, possibly the terminating NUL. */
ece54d54
NB
623 if ((size_t) (limit - dest) < 1)
624 {
625 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
8c3b2693 626 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
ece54d54
NB
627 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
628 limit = BUFF_LIMIT (pfile->u_buff);
629 }
7868b4a2 630
0d9f234d 631 /* Handle trigraphs, escaped newlines etc. */
87062813 632 c = *buffer->cur++;
0d9f234d 633 if (c == '?' || c == '\\')
87062813 634 c = skip_escaped_newlines (pfile);
45b966db 635
87062813 636 if (c == terminator)
45b966db 637 {
87062813
NB
638 if (unescaped_terminator_p (pfile, dest))
639 break;
0d9f234d
NB
640 }
641 else if (is_vspace (c))
642 {
d4e6133f
NB
643 /* No string literal may extend over multiple lines. In
644 assembly language, suppress the error except for <>
645 includes. This is a kludge around not knowing where
646 comments are. */
647 unterminated:
648 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
649 cpp_error (pfile, "missing terminating %c character", terminator);
650 buffer->cur--;
651 break;
0d9f234d 652 }
4d6baafa 653 else if (c == '\0')
0d9f234d 654 {
4d6baafa 655 if (buffer->cur - 1 == buffer->rlimit)
d4e6133f 656 goto unterminated;
4d6baafa
NB
657 if (!warned_nulls)
658 {
659 warned_nulls = true;
660 cpp_warning (pfile, "null character(s) preserved in literal");
661 }
45b966db 662 }
45b966db 663
93c80368 664 *dest++ = c;
45b966db
ZW
665 }
666
7868b4a2 667 *dest = '\0';
45b966db 668
ece54d54
NB
669 token->val.str.text = BUFF_FRONT (pfile->u_buff);
670 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
671 BUFF_FRONT (pfile->u_buff) = dest + 1;
0d9f234d 672}
041c3194 673
93c80368 674/* The stored comment includes the comment start and any terminator. */
9e62c811 675static void
0d9f234d
NB
676save_comment (pfile, token, from)
677 cpp_reader *pfile;
041c3194
ZW
678 cpp_token *token;
679 const unsigned char *from;
9e62c811 680{
041c3194 681 unsigned char *buffer;
0d9f234d 682 unsigned int len;
0d9f234d 683
1c6d33ef 684 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
480709cc 685
3542203b
NB
686 /* C++ comments probably (not definitely) have moved past a new
687 line, which we don't want to save in the comment. */
480709cc 688 if (is_vspace (pfile->buffer->cur[-1]))
3542203b 689 len--;
ece54d54 690 buffer = _cpp_unaligned_alloc (pfile, len);
041c3194 691
041c3194 692 token->type = CPP_COMMENT;
bfb9dc7f 693 token->val.str.len = len;
0d9f234d 694 token->val.str.text = buffer;
45b966db 695
1c6d33ef
NB
696 buffer[0] = '/';
697 memcpy (buffer + 1, from, len - 1);
0d9f234d 698}
45b966db 699
5fddcffc
NB
700/* Allocate COUNT tokens for RUN. */
701void
702_cpp_init_tokenrun (run, count)
703 tokenrun *run;
704 unsigned int count;
705{
706 run->base = xnewvec (cpp_token, count);
707 run->limit = run->base + count;
708 run->next = NULL;
709}
710
711/* Returns the next tokenrun, or creates one if there is none. */
712static tokenrun *
713next_tokenrun (run)
714 tokenrun *run;
715{
716 if (run->next == NULL)
717 {
718 run->next = xnew (tokenrun);
bdcbe496 719 run->next->prev = run;
5fddcffc
NB
720 _cpp_init_tokenrun (run->next, 250);
721 }
722
723 return run->next;
724}
725
4ed5bcfb
NB
726/* Allocate a single token that is invalidated at the same time as the
727 rest of the tokens on the line. Has its line and col set to the
728 same as the last lexed token, so that diagnostics appear in the
729 right place. */
730cpp_token *
731_cpp_temp_token (pfile)
732 cpp_reader *pfile;
733{
734 cpp_token *old, *result;
735
736 old = pfile->cur_token - 1;
737 if (pfile->cur_token == pfile->cur_run->limit)
738 {
739 pfile->cur_run = next_tokenrun (pfile->cur_run);
740 pfile->cur_token = pfile->cur_run->base;
741 }
742
743 result = pfile->cur_token++;
744 result->line = old->line;
745 result->col = old->col;
746 return result;
747}
748
14baae01
NB
749/* Lex a token into RESULT (external interface). Takes care of issues
750 like directive handling, token lookahead, multiple include
a1f300c0 751 optimization and skipping. */
345894b4
NB
752const cpp_token *
753_cpp_lex_token (pfile)
45b966db 754 cpp_reader *pfile;
5fddcffc 755{
bdcbe496 756 cpp_token *result;
5fddcffc 757
bdcbe496 758 for (;;)
5fddcffc 759 {
bdcbe496 760 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 761 {
bdcbe496
NB
762 pfile->cur_run = next_tokenrun (pfile->cur_run);
763 pfile->cur_token = pfile->cur_run->base;
5fddcffc
NB
764 }
765
bdcbe496 766 if (pfile->lookaheads)
14baae01
NB
767 {
768 pfile->lookaheads--;
769 result = pfile->cur_token++;
770 }
bdcbe496 771 else
14baae01 772 result = _cpp_lex_direct (pfile);
bdcbe496
NB
773
774 if (result->flags & BOL)
5fddcffc 775 {
bdcbe496
NB
776 /* Is this a directive. If _cpp_handle_directive returns
777 false, it is an assembler #. */
778 if (result->type == CPP_HASH
e808ec9c
NB
779 /* 6.10.3 p 11: Directives in a list of macro arguments
780 gives undefined behavior. This implementation
781 handles the directive as normal. */
782 && pfile->state.parsing_args != 1
bdcbe496
NB
783 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
784 continue;
97293897
NB
785 if (pfile->cb.line_change && !pfile->state.skipping)
786 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
5fddcffc 787 }
5fddcffc 788
bdcbe496
NB
789 /* We don't skip tokens in directives. */
790 if (pfile->state.in_directive)
791 break;
5fddcffc 792
bdcbe496 793 /* Outside a directive, invalidate controlling macros. At file
14baae01 794 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
bdcbe496 795 get here and MI optimisation works. */
5fddcffc 796 pfile->mi_valid = false;
bdcbe496
NB
797
798 if (!pfile->state.skipping || result->type == CPP_EOF)
799 break;
5fddcffc
NB
800 }
801
345894b4 802 return result;
5fddcffc
NB
803}
804
480709cc
NB
805#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
806 do { \
807 if (get_effective_char (pfile) == CHAR) \
808 result->type = THEN_TYPE; \
809 else \
810 { \
811 BACKUP (); \
812 result->type = ELSE_TYPE; \
813 } \
814 } while (0)
815
14baae01
NB
816/* Lex a token into pfile->cur_token, which is also incremented, to
817 get diagnostics pointing to the correct location.
818
819 Does not handle issues such as token lookahead, multiple-include
820 optimisation, directives, skipping etc. This function is only
821 suitable for use by _cpp_lex_token, and in special cases like
822 lex_expansion_token which doesn't care for any of these issues.
823
824 When meeting a newline, returns CPP_EOF if parsing a directive,
825 otherwise returns to the start of the token buffer if permissible.
826 Returns the location of the lexed token. */
827cpp_token *
828_cpp_lex_direct (pfile)
5fddcffc 829 cpp_reader *pfile;
45b966db 830{
0d9f234d 831 cppchar_t c;
adb84b42 832 cpp_buffer *buffer;
0d9f234d 833 const unsigned char *comment_start;
14baae01 834 cpp_token *result = pfile->cur_token++;
9ec7291f 835
5fddcffc 836 fresh_line:
adb84b42 837 buffer = pfile->buffer;
bd969772
NB
838 result->flags = buffer->saved_flags;
839 buffer->saved_flags = 0;
5fddcffc 840 update_tokens_line:
1444f2ed 841 result->line = pfile->line;
041c3194 842
5fddcffc 843 skipped_white:
480709cc 844 c = *buffer->cur++;
5fddcffc 845 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
5fddcffc
NB
846
847 trigraph:
0d9f234d 848 switch (c)
45b966db 849 {
4d6baafa
NB
850 case ' ': case '\t': case '\f': case '\v': case '\0':
851 result->flags |= PREV_WHITE;
852 if (skip_whitespace (pfile, c))
853 goto skipped_white;
854
855 /* EOF. */
856 buffer->cur--;
bdcbe496 857 buffer->saved_flags = BOL;
5fddcffc 858 if (!pfile->state.parsing_args && !pfile->state.in_directive)
ef6e958a 859 {
bdcbe496 860 if (buffer->cur != buffer->line_base)
5fddcffc
NB
861 {
862 /* Non-empty files should end in a newline. Don't warn
863 for command line and _Pragma buffers. */
864 if (!buffer->from_stage3)
865 cpp_pedwarn (pfile, "no newline at end of file");
87062813 866 handle_newline (pfile);
7364fdd8 867 }
bdcbe496
NB
868
869 /* Don't pop the last buffer. */
870 if (buffer->prev)
871 {
872 unsigned char stop = buffer->return_at_eof;
873
874 _cpp_pop_buffer (pfile);
875 if (!stop)
876 goto fresh_line;
877 }
ef6e958a 878 }
0d9f234d 879 result->type = CPP_EOF;
5fddcffc 880 break;
45b966db 881
0d9f234d 882 case '\n': case '\r':
87062813 883 handle_newline (pfile);
bdcbe496
NB
884 buffer->saved_flags = BOL;
885 if (! pfile->state.in_directive)
45b966db 886 {
4ed5bcfb
NB
887 if (pfile->state.parsing_args == 2)
888 buffer->saved_flags |= PREV_WHITE;
bdcbe496
NB
889 if (!pfile->keep_tokens)
890 {
891 pfile->cur_run = &pfile->base_run;
892 result = pfile->base_run.base;
893 pfile->cur_token = result + 1;
894 }
895 goto fresh_line;
45b966db 896 }
5fddcffc
NB
897 result->type = CPP_EOF;
898 break;
46d07497 899
0d9f234d
NB
900 case '?':
901 case '\\':
902 /* These could start an escaped newline, or '?' a trigraph. Let
903 skip_escaped_newlines do all the work. */
904 {
67821e3a 905 unsigned int line = pfile->line;
0d9f234d 906
87062813 907 c = skip_escaped_newlines (pfile);
67821e3a 908 if (line != pfile->line)
87062813 909 {
480709cc 910 buffer->cur--;
87062813
NB
911 /* We had at least one escaped newline of some sort.
912 Update the token's line and column. */
5fddcffc 913 goto update_tokens_line;
87062813 914 }
480709cc 915 }
0d9f234d 916
480709cc
NB
917 /* We are either the original '?' or '\\', or a trigraph. */
918 if (c == '?')
0d9f234d 919 result->type = CPP_QUERY;
480709cc
NB
920 else if (c == '\\')
921 goto random_char;
922 else
923 goto trigraph;
0d9f234d 924 break;
46d07497 925
0d9f234d
NB
926 case '0': case '1': case '2': case '3': case '4':
927 case '5': case '6': case '7': case '8': case '9':
928 result->type = CPP_NUMBER;
10cf9bde 929 parse_number (pfile, &result->val.str, 0);
0d9f234d 930 break;
46d07497 931
0abc6a6a
NB
932 case 'L':
933 /* 'L' may introduce wide characters or strings. */
934 {
935 const unsigned char *pos = buffer->cur;
0d9f234d 936
0abc6a6a
NB
937 c = get_effective_char (pfile);
938 if (c == '\'' || c == '"')
939 {
940 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
941 parse_string (pfile, result, c);
942 break;
943 }
944 buffer->cur = pos;
945 }
946 /* Fall through. */
947
948 start_ident:
0d9f234d
NB
949 case '_':
950 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
951 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
952 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
953 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
954 case 'y': case 'z':
955 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
0abc6a6a 956 case 'G': case 'H': case 'I': case 'J': case 'K':
0d9f234d
NB
957 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
958 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
959 case 'Y': case 'Z':
960 result->type = CPP_NAME;
2c3fcba6 961 result->val.node = parse_identifier (pfile);
0d9f234d 962
0d9f234d 963 /* Convert named operators to their proper types. */
0abc6a6a 964 if (result->val.node->flags & NODE_OPERATOR)
0d9f234d
NB
965 {
966 result->flags |= NAMED_OP;
93c80368 967 result->type = result->val.node->value.operator;
0d9f234d
NB
968 }
969 break;
970
971 case '\'':
972 case '"':
973 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
0d9f234d
NB
974 parse_string (pfile, result, c);
975 break;
041c3194 976
0d9f234d 977 case '/':
1c6d33ef
NB
978 /* A potential block or line comment. */
979 comment_start = buffer->cur;
29401c30 980 c = get_effective_char (pfile);
480709cc 981
1c6d33ef
NB
982 if (c == '*')
983 {
0d9f234d 984 if (skip_block_comment (pfile))
67821e3a 985 cpp_error (pfile, "unterminated comment");
0d9f234d 986 }
480709cc
NB
987 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
988 || CPP_IN_SYSTEM_HEADER (pfile)))
0d9f234d 989 {
bdb05a7b
NB
990 /* Warn about comments only if pedantically GNUC89, and not
991 in system headers. */
992 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 993 && ! buffer->warned_cplusplus_comments)
041c3194 994 {
1c6d33ef
NB
995 cpp_pedwarn (pfile,
996 "C++ style comments are not allowed in ISO C89");
997 cpp_pedwarn (pfile,
998 "(this will be reported only once per input file)");
999 buffer->warned_cplusplus_comments = 1;
1000 }
0d9f234d 1001
01ef6563 1002 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
50410426 1003 cpp_warning (pfile, "multi-line comment");
1c6d33ef 1004 }
480709cc
NB
1005 else if (c == '=')
1006 {
1007 result->type = CPP_DIV_EQ;
1008 break;
1009 }
1010 else
1011 {
1012 BACKUP ();
1013 result->type = CPP_DIV;
1014 break;
1015 }
0d9f234d 1016
1c6d33ef
NB
1017 if (!pfile->state.save_comments)
1018 {
1019 result->flags |= PREV_WHITE;
5fddcffc 1020 goto update_tokens_line;
0d9f234d 1021 }
1c6d33ef
NB
1022
1023 /* Save the comment as a token in its own right. */
1024 save_comment (pfile, result, comment_start);
bdcbe496 1025 break;
0d9f234d
NB
1026
1027 case '<':
1028 if (pfile->state.angled_headers)
1029 {
1030 result->type = CPP_HEADER_NAME;
480709cc
NB
1031 parse_string (pfile, result, '>');
1032 break;
0d9f234d 1033 }
45b966db 1034
29401c30 1035 c = get_effective_char (pfile);
0d9f234d 1036 if (c == '=')
480709cc 1037 result->type = CPP_LESS_EQ;
0d9f234d 1038 else if (c == '<')
480709cc 1039 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
0d9f234d 1040 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
480709cc 1041 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
0d9f234d
NB
1042 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1043 {
480709cc 1044 result->type = CPP_OPEN_SQUARE;
0d9f234d
NB
1045 result->flags |= DIGRAPH;
1046 }
1047 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1048 {
480709cc 1049 result->type = CPP_OPEN_BRACE;
0d9f234d
NB
1050 result->flags |= DIGRAPH;
1051 }
480709cc
NB
1052 else
1053 {
1054 BACKUP ();
1055 result->type = CPP_LESS;
1056 }
0d9f234d
NB
1057 break;
1058
1059 case '>':
29401c30 1060 c = get_effective_char (pfile);
0d9f234d 1061 if (c == '=')
480709cc 1062 result->type = CPP_GREATER_EQ;
0d9f234d 1063 else if (c == '>')
480709cc 1064 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
0d9f234d 1065 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
480709cc
NB
1066 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1067 else
0d9f234d 1068 {
480709cc
NB
1069 BACKUP ();
1070 result->type = CPP_GREATER;
0d9f234d
NB
1071 }
1072 break;
1073
cbcff6df 1074 case '%':
480709cc
NB
1075 c = get_effective_char (pfile);
1076 if (c == '=')
1077 result->type = CPP_MOD_EQ;
1078 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1079 {
1080 result->flags |= DIGRAPH;
1081 result->type = CPP_HASH;
1082 if (get_effective_char (pfile) == '%')
1083 {
1084 const unsigned char *pos = buffer->cur;
1085
1086 if (get_effective_char (pfile) == ':')
1087 result->type = CPP_PASTE;
1088 else
1089 buffer->cur = pos - 1;
1090 }
1091 else
1092 BACKUP ();
1093 }
1094 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1095 {
1096 result->flags |= DIGRAPH;
1097 result->type = CPP_CLOSE_BRACE;
1098 }
1099 else
1100 {
1101 BACKUP ();
1102 result->type = CPP_MOD;
1103 }
0d9f234d
NB
1104 break;
1105
cbcff6df 1106 case '.':
480709cc
NB
1107 result->type = CPP_DOT;
1108 c = get_effective_char (pfile);
1109 if (c == '.')
1110 {
1111 const unsigned char *pos = buffer->cur;
1112
1113 if (get_effective_char (pfile) == '.')
1114 result->type = CPP_ELLIPSIS;
1115 else
1116 buffer->cur = pos - 1;
1117 }
1118 /* All known character sets have 0...9 contiguous. */
0df6c2c7 1119 else if (ISDIGIT (c))
480709cc
NB
1120 {
1121 result->type = CPP_NUMBER;
10cf9bde 1122 parse_number (pfile, &result->val.str, 1);
480709cc
NB
1123 }
1124 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1125 result->type = CPP_DOT_STAR;
1126 else
1127 BACKUP ();
0d9f234d 1128 break;
45b966db 1129
0d9f234d 1130 case '+':
29401c30 1131 c = get_effective_char (pfile);
480709cc
NB
1132 if (c == '+')
1133 result->type = CPP_PLUS_PLUS;
1134 else if (c == '=')
1135 result->type = CPP_PLUS_EQ;
1136 else
1137 {
1138 BACKUP ();
1139 result->type = CPP_PLUS;
1140 }
0d9f234d 1141 break;
04e3ec78 1142
0d9f234d 1143 case '-':
29401c30 1144 c = get_effective_char (pfile);
0d9f234d
NB
1145 if (c == '>')
1146 {
480709cc
NB
1147 result->type = CPP_DEREF;
1148 if (CPP_OPTION (pfile, cplusplus))
1149 {
1150 if (get_effective_char (pfile) == '*')
1151 result->type = CPP_DEREF_STAR;
1152 else
1153 BACKUP ();
1154 }
0d9f234d 1155 }
0d9f234d 1156 else if (c == '-')
480709cc
NB
1157 result->type = CPP_MINUS_MINUS;
1158 else if (c == '=')
1159 result->type = CPP_MINUS_EQ;
1160 else
1161 {
1162 BACKUP ();
1163 result->type = CPP_MINUS;
1164 }
0d9f234d 1165 break;
45b966db 1166
0d9f234d 1167 case '&':
29401c30 1168 c = get_effective_char (pfile);
480709cc
NB
1169 if (c == '&')
1170 result->type = CPP_AND_AND;
1171 else if (c == '=')
1172 result->type = CPP_AND_EQ;
1173 else
1174 {
1175 BACKUP ();
1176 result->type = CPP_AND;
1177 }
0d9f234d
NB
1178 break;
1179
0d9f234d 1180 case '|':
29401c30 1181 c = get_effective_char (pfile);
480709cc
NB
1182 if (c == '|')
1183 result->type = CPP_OR_OR;
1184 else if (c == '=')
1185 result->type = CPP_OR_EQ;
1186 else
1187 {
1188 BACKUP ();
1189 result->type = CPP_OR;
1190 }
0d9f234d 1191 break;
45b966db 1192
0d9f234d 1193 case ':':
29401c30 1194 c = get_effective_char (pfile);
0d9f234d 1195 if (c == ':' && CPP_OPTION (pfile, cplusplus))
480709cc 1196 result->type = CPP_SCOPE;
0d9f234d
NB
1197 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1198 {
1199 result->flags |= DIGRAPH;
480709cc
NB
1200 result->type = CPP_CLOSE_SQUARE;
1201 }
1202 else
1203 {
1204 BACKUP ();
1205 result->type = CPP_COLON;
0d9f234d
NB
1206 }
1207 break;
45b966db 1208
480709cc
NB
1209 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1210 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1211 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1212 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1213 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1214
0d9f234d
NB
1215 case '~': result->type = CPP_COMPL; break;
1216 case ',': result->type = CPP_COMMA; break;
1217 case '(': result->type = CPP_OPEN_PAREN; break;
1218 case ')': result->type = CPP_CLOSE_PAREN; break;
1219 case '[': result->type = CPP_OPEN_SQUARE; break;
1220 case ']': result->type = CPP_CLOSE_SQUARE; break;
1221 case '{': result->type = CPP_OPEN_BRACE; break;
1222 case '}': result->type = CPP_CLOSE_BRACE; break;
1223 case ';': result->type = CPP_SEMICOLON; break;
1224
cc937581
ZW
1225 /* @ is a punctuator in Objective C. */
1226 case '@': result->type = CPP_ATSIGN; break;
0d9f234d 1227
0abc6a6a
NB
1228 case '$':
1229 if (CPP_OPTION (pfile, dollars_in_ident))
1230 goto start_ident;
1231 /* Fall through... */
1232
0d9f234d
NB
1233 random_char:
1234 default:
1235 result->type = CPP_OTHER;
6c53ebff 1236 result->val.c = c;
0d9f234d
NB
1237 break;
1238 }
bdcbe496
NB
1239
1240 return result;
0d9f234d
NB
1241}
1242
5d8ebbd8 1243/* An upper bound on the number of bytes needed to spell TOKEN,
93c80368
NB
1244 including preceding whitespace. */
1245unsigned int
1246cpp_token_len (token)
1247 const cpp_token *token;
0d9f234d 1248{
93c80368 1249 unsigned int len;
6d2c2047 1250
93c80368 1251 switch (TOKEN_SPELL (token))
041c3194 1252 {
a28c5035 1253 default: len = 0; break;
47ad4138 1254 case SPELL_NUMBER:
a28c5035
NB
1255 case SPELL_STRING: len = token->val.str.len; break;
1256 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
041c3194 1257 }
47ad4138 1258 /* 1 for whitespace, 4 for comment delimiters. */
93c80368 1259 return len + 5;
6d2c2047
ZW
1260}
1261
041c3194 1262/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885
ZW
1263 already contain the enough space to hold the token's spelling.
1264 Returns a pointer to the character after the last character
1265 written. */
93c80368
NB
1266unsigned char *
1267cpp_spell_token (pfile, token, buffer)
041c3194
ZW
1268 cpp_reader *pfile; /* Would be nice to be rid of this... */
1269 const cpp_token *token;
1270 unsigned char *buffer;
1271{
96be6998 1272 switch (TOKEN_SPELL (token))
041c3194
ZW
1273 {
1274 case SPELL_OPERATOR:
1275 {
1276 const unsigned char *spelling;
1277 unsigned char c;
d6d5f795 1278
041c3194 1279 if (token->flags & DIGRAPH)
37b8524c
JDA
1280 spelling
1281 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
92936ecf
ZW
1282 else if (token->flags & NAMED_OP)
1283 goto spell_ident;
041c3194 1284 else
96be6998 1285 spelling = TOKEN_NAME (token);
041c3194
ZW
1286
1287 while ((c = *spelling++) != '\0')
1288 *buffer++ = c;
1289 }
1290 break;
d6d5f795 1291
47ad4138
ZW
1292 case SPELL_CHAR:
1293 *buffer++ = token->val.c;
1294 break;
1295
1296 spell_ident:
041c3194 1297 case SPELL_IDENT:
a28c5035
NB
1298 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1299 buffer += NODE_LEN (token->val.node);
041c3194 1300 break;
d6d5f795 1301
47ad4138
ZW
1302 case SPELL_NUMBER:
1303 memcpy (buffer, token->val.str.text, token->val.str.len);
1304 buffer += token->val.str.len;
1305 break;
1306
041c3194
ZW
1307 case SPELL_STRING:
1308 {
ba89d661
ZW
1309 int left, right, tag;
1310 switch (token->type)
1311 {
1312 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1313 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
ba89d661
ZW
1314 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1315 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1316 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138
ZW
1317 default:
1318 cpp_ice (pfile, "unknown string token %s\n", TOKEN_NAME (token));
1319 return buffer;
ba89d661
ZW
1320 }
1321 if (tag) *buffer++ = tag;
47ad4138 1322 *buffer++ = left;
bfb9dc7f
ZW
1323 memcpy (buffer, token->val.str.text, token->val.str.len);
1324 buffer += token->val.str.len;
47ad4138 1325 *buffer++ = right;
041c3194
ZW
1326 }
1327 break;
d6d5f795 1328
041c3194 1329 case SPELL_NONE:
1f978f5f 1330 cpp_ice (pfile, "unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
1331 break;
1332 }
d6d5f795 1333
041c3194
ZW
1334 return buffer;
1335}
d6d5f795 1336
5d8ebbd8
NB
1337/* Returns TOKEN spelt as a null-terminated string. The string is
1338 freed when the reader is destroyed. Useful for diagnostics. */
93c80368
NB
1339unsigned char *
1340cpp_token_as_text (pfile, token)
c5a04734 1341 cpp_reader *pfile;
041c3194 1342 const cpp_token *token;
c5a04734 1343{
93c80368 1344 unsigned int len = cpp_token_len (token);
ece54d54 1345 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 1346
93c80368
NB
1347 end = cpp_spell_token (pfile, token, start);
1348 end[0] = '\0';
c5a04734 1349
93c80368
NB
1350 return start;
1351}
c5a04734 1352
5d8ebbd8
NB
1353/* Used by C front ends, which really should move to using
1354 cpp_token_as_text. */
93c80368
NB
1355const char *
1356cpp_type2name (type)
1357 enum cpp_ttype type;
1358{
1359 return (const char *) token_spellings[type].name;
1360}
c5a04734 1361
4ed5bcfb
NB
1362/* Writes the spelling of token to FP, without any preceding space.
1363 Separated from cpp_spell_token for efficiency - to avoid stdio
1364 double-buffering. */
93c80368
NB
1365void
1366cpp_output_token (token, fp)
1367 const cpp_token *token;
1368 FILE *fp;
1369{
93c80368 1370 switch (TOKEN_SPELL (token))
c5a04734 1371 {
93c80368
NB
1372 case SPELL_OPERATOR:
1373 {
1374 const unsigned char *spelling;
3b681e9d 1375 int c;
c5a04734 1376
93c80368 1377 if (token->flags & DIGRAPH)
37b8524c
JDA
1378 spelling
1379 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
93c80368
NB
1380 else if (token->flags & NAMED_OP)
1381 goto spell_ident;
1382 else
1383 spelling = TOKEN_NAME (token);
041c3194 1384
3b681e9d
ZW
1385 c = *spelling;
1386 do
1387 putc (c, fp);
1388 while ((c = *++spelling) != '\0');
93c80368
NB
1389 }
1390 break;
041c3194 1391
47ad4138
ZW
1392 case SPELL_CHAR:
1393 putc (token->val.c, fp);
1394 break;
1395
93c80368
NB
1396 spell_ident:
1397 case SPELL_IDENT:
3b681e9d 1398 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
93c80368 1399 break;
041c3194 1400
47ad4138
ZW
1401 case SPELL_NUMBER:
1402 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1403 break;
1404
93c80368
NB
1405 case SPELL_STRING:
1406 {
1407 int left, right, tag;
1408 switch (token->type)
1409 {
1410 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1411 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
93c80368
NB
1412 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1413 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1414 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
47ad4138
ZW
1415 default:
1416 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1417 return;
93c80368
NB
1418 }
1419 if (tag) putc (tag, fp);
47ad4138 1420 putc (left, fp);
93c80368 1421 fwrite (token->val.str.text, 1, token->val.str.len, fp);
47ad4138 1422 putc (right, fp);
93c80368
NB
1423 }
1424 break;
c5a04734 1425
93c80368
NB
1426 case SPELL_NONE:
1427 /* An error, most probably. */
1428 break;
041c3194 1429 }
c5a04734
ZW
1430}
1431
93c80368
NB
1432/* Compare two tokens. */
1433int
1434_cpp_equiv_tokens (a, b)
1435 const cpp_token *a, *b;
c5a04734 1436{
93c80368
NB
1437 if (a->type == b->type && a->flags == b->flags)
1438 switch (TOKEN_SPELL (a))
1439 {
1440 default: /* Keep compiler happy. */
1441 case SPELL_OPERATOR:
1442 return 1;
1443 case SPELL_CHAR:
6c53ebff 1444 return a->val.c == b->val.c; /* Character. */
93c80368 1445 case SPELL_NONE:
56051c0a 1446 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
93c80368
NB
1447 case SPELL_IDENT:
1448 return a->val.node == b->val.node;
47ad4138 1449 case SPELL_NUMBER:
93c80368
NB
1450 case SPELL_STRING:
1451 return (a->val.str.len == b->val.str.len
1452 && !memcmp (a->val.str.text, b->val.str.text,
1453 a->val.str.len));
1454 }
c5a04734 1455
041c3194
ZW
1456 return 0;
1457}
1458
93c80368
NB
1459/* Returns nonzero if a space should be inserted to avoid an
1460 accidental token paste for output. For simplicity, it is
1461 conservative, and occasionally advises a space where one is not
1462 needed, e.g. "." and ".2". */
93c80368
NB
1463int
1464cpp_avoid_paste (pfile, token1, token2)
c5a04734 1465 cpp_reader *pfile;
93c80368 1466 const cpp_token *token1, *token2;
c5a04734 1467{
93c80368
NB
1468 enum cpp_ttype a = token1->type, b = token2->type;
1469 cppchar_t c;
c5a04734 1470
93c80368
NB
1471 if (token1->flags & NAMED_OP)
1472 a = CPP_NAME;
1473 if (token2->flags & NAMED_OP)
1474 b = CPP_NAME;
c5a04734 1475
93c80368
NB
1476 c = EOF;
1477 if (token2->flags & DIGRAPH)
37b8524c 1478 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
1479 else if (token_spellings[b].category == SPELL_OPERATOR)
1480 c = token_spellings[b].name[0];
c5a04734 1481
93c80368 1482 /* Quickly get everything that can paste with an '='. */
37b8524c 1483 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 1484 return 1;
c5a04734 1485
93c80368 1486 switch (a)
c5a04734 1487 {
93c80368
NB
1488 case CPP_GREATER: return c == '>' || c == '?';
1489 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1490 case CPP_PLUS: return c == '+';
1491 case CPP_MINUS: return c == '-' || c == '>';
1492 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1493 case CPP_MOD: return c == ':' || c == '>';
1494 case CPP_AND: return c == '&';
1495 case CPP_OR: return c == '|';
1496 case CPP_COLON: return c == ':' || c == '>';
1497 case CPP_DEREF: return c == '*';
26ec42ee 1498 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
1499 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1500 case CPP_NAME: return ((b == CPP_NUMBER
1501 && name_p (pfile, &token2->val.str))
1502 || b == CPP_NAME
1503 || b == CPP_CHAR || b == CPP_STRING); /* L */
1504 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1505 || c == '.' || c == '+' || c == '-');
1506 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
6c53ebff 1507 && token1->val.c == '@'
93c80368
NB
1508 && (b == CPP_NAME || b == CPP_STRING));
1509 default: break;
c5a04734 1510 }
c5a04734 1511
417f3e3a 1512 return 0;
c5a04734
ZW
1513}
1514
93c80368 1515/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
1516 character, to FP. Leading whitespace is removed. If there are
1517 macros, special token padding is not performed. */
c5a04734 1518void
93c80368 1519cpp_output_line (pfile, fp)
c5a04734 1520 cpp_reader *pfile;
93c80368 1521 FILE *fp;
c5a04734 1522{
4ed5bcfb 1523 const cpp_token *token;
96be6998 1524
4ed5bcfb
NB
1525 token = cpp_get_token (pfile);
1526 while (token->type != CPP_EOF)
96be6998 1527 {
4ed5bcfb
NB
1528 cpp_output_token (token, fp);
1529 token = cpp_get_token (pfile);
1530 if (token->flags & PREV_WHITE)
1531 putc (' ', fp);
96be6998
ZW
1532 }
1533
93c80368 1534 putc ('\n', fp);
041c3194 1535}
c5a04734 1536
c8a96070
NB
1537/* Returns the value of a hexadecimal digit. */
1538static unsigned int
1539hex_digit_value (c)
1540 unsigned int c;
1541{
9e1ac915
KG
1542 if (hex_p (c))
1543 return hex_value (c);
1544 else
1545 abort ();
c8a96070
NB
1546}
1547
62729350
NB
1548/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1549 failure if cpplib is not parsing C++ or C99. Such failure is
1550 silent, and no variables are updated. Otherwise returns 0, and
1551 warns if -Wtraditional.
c8a96070
NB
1552
1553 [lex.charset]: The character designated by the universal character
1554 name \UNNNNNNNN is that character whose character short name in
1555 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1556 universal character name \uNNNN is that character whose character
1557 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1558 for a universal character name is less than 0x20 or in the range
1559 0x7F-0x9F (inclusive), or if the universal character name
1560 designates a character in the basic source character set, then the
1561 program is ill-formed.
1562
1563 We assume that wchar_t is Unicode, so we don't need to do any
62729350 1564 mapping. Is this ever wrong?
c8a96070 1565
62729350
NB
1566 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1567 LIMIT is the end of the string or charconst. PSTR is updated to
1568 point after the UCS on return, and the UCS is written into PC. */
1569
1570static int
1571maybe_read_ucs (pfile, pstr, limit, pc)
c8a96070
NB
1572 cpp_reader *pfile;
1573 const unsigned char **pstr;
1574 const unsigned char *limit;
62729350 1575 unsigned int *pc;
c8a96070
NB
1576{
1577 const unsigned char *p = *pstr;
62729350
NB
1578 unsigned int code = 0;
1579 unsigned int c = *pc, length;
1580
1581 /* Only attempt to interpret a UCS for C++ and C99. */
1582 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1583 return 1;
c8a96070 1584
62729350 1585 if (CPP_WTRADITIONAL (pfile))
f458d1d5 1586 cpp_warning (pfile, "the meaning of '\\%c' is different in traditional C", c);
c8a96070 1587
f8710242
NB
1588 length = (c == 'u' ? 4: 8);
1589
1590 if ((size_t) (limit - p) < length)
1591 {
1592 cpp_error (pfile, "incomplete universal-character-name");
1593 /* Skip to the end to avoid more diagnostics. */
1594 p = limit;
1595 }
1596 else
1597 {
1598 for (; length; length--, p++)
c8a96070 1599 {
f8710242
NB
1600 c = *p;
1601 if (ISXDIGIT (c))
1602 code = (code << 4) + hex_digit_value (c);
1603 else
1604 {
1605 cpp_error (pfile,
1606 "non-hex digit '%c' in universal-character-name", c);
1607 /* We shouldn't skip in case there are multibyte chars. */
1608 break;
1609 }
c8a96070 1610 }
c8a96070
NB
1611 }
1612
1613#ifdef TARGET_EBCDIC
1614 cpp_error (pfile, "universal-character-name on EBCDIC target");
1615 code = 0x3f; /* EBCDIC invalid character */
1616#else
f8710242
NB
1617 /* True extended characters are OK. */
1618 if (code >= 0xa0
1619 && !(code & 0x80000000)
1620 && !(code >= 0xD800 && code <= 0xDFFF))
1621 ;
1622 /* The standard permits $, @ and ` to be specified as UCNs. We use
1623 hex escapes so that this also works with EBCDIC hosts. */
1624 else if (code == 0x24 || code == 0x40 || code == 0x60)
1625 ;
1626 /* Don't give another error if one occurred above. */
1627 else if (length == 0)
1628 cpp_error (pfile, "universal-character-name out of range");
c8a96070
NB
1629#endif
1630
1631 *pstr = p;
62729350
NB
1632 *pc = code;
1633 return 0;
c8a96070
NB
1634}
1635
1636/* Interpret an escape sequence, and return its value. PSTR points to
1637 the input pointer, which is just after the backslash. LIMIT is how
62729350 1638 much text we have. MASK is a bitmask for the precision for the
f458d1d5 1639 destination type (char or wchar_t).
c8a96070 1640
62729350 1641 Handles all relevant diagnostics. */
62729350 1642unsigned int
f458d1d5 1643cpp_parse_escape (pfile, pstr, limit, mask)
c8a96070
NB
1644 cpp_reader *pfile;
1645 const unsigned char **pstr;
1646 const unsigned char *limit;
62729350 1647 unsigned HOST_WIDE_INT mask;
c8a96070
NB
1648{
1649 int unknown = 0;
1650 const unsigned char *str = *pstr;
1651 unsigned int c = *str++;
1652
1653 switch (c)
1654 {
1655 case '\\': case '\'': case '"': case '?': break;
1656 case 'b': c = TARGET_BS; break;
1657 case 'f': c = TARGET_FF; break;
1658 case 'n': c = TARGET_NEWLINE; break;
1659 case 'r': c = TARGET_CR; break;
1660 case 't': c = TARGET_TAB; break;
1661 case 'v': c = TARGET_VT; break;
1662
1663 case '(': case '{': case '[': case '%':
1664 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1665 '\%' is used to prevent SCCS from getting confused. */
1666 unknown = CPP_PEDANTIC (pfile);
1667 break;
1668
1669 case 'a':
1670 if (CPP_WTRADITIONAL (pfile))
f458d1d5 1671 cpp_warning (pfile, "the meaning of '\\a' is different in traditional C");
001e3fee 1672 c = TARGET_BELL;
c8a96070
NB
1673 break;
1674
1675 case 'e': case 'E':
1676 if (CPP_PEDANTIC (pfile))
1677 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1678 c = TARGET_ESC;
1679 break;
1680
c8a96070 1681 case 'u': case 'U':
62729350 1682 unknown = maybe_read_ucs (pfile, &str, limit, &c);
c8a96070
NB
1683 break;
1684
1685 case 'x':
1686 if (CPP_WTRADITIONAL (pfile))
f458d1d5 1687 cpp_warning (pfile, "the meaning of '\\x' is different in traditional C");
c8a96070 1688
c8a96070
NB
1689 {
1690 unsigned int i = 0, overflow = 0;
1691 int digits_found = 0;
1692
1693 while (str < limit)
1694 {
1695 c = *str;
1696 if (! ISXDIGIT (c))
1697 break;
1698 str++;
1699 overflow |= i ^ (i << 4 >> 4);
1700 i = (i << 4) + hex_digit_value (c);
1701 digits_found = 1;
1702 }
1703
1704 if (!digits_found)
1705 cpp_error (pfile, "\\x used with no following hex digits");
1706
1707 if (overflow | (i != (i & mask)))
1708 {
1709 cpp_pedwarn (pfile, "hex escape sequence out of range");
1710 i &= mask;
1711 }
1712 c = i;
1713 }
1714 break;
1715
1716 case '0': case '1': case '2': case '3':
1717 case '4': case '5': case '6': case '7':
1718 {
1719 unsigned int i = c - '0';
1720 int count = 0;
1721
1722 while (str < limit && ++count < 3)
1723 {
1724 c = *str;
1725 if (c < '0' || c > '7')
1726 break;
1727 str++;
1728 i = (i << 3) + c - '0';
1729 }
1730
1731 if (i != (i & mask))
1732 {
1733 cpp_pedwarn (pfile, "octal escape sequence out of range");
1734 i &= mask;
1735 }
1736 c = i;
1737 }
1738 break;
1739
1740 default:
1741 unknown = 1;
1742 break;
1743 }
1744
1745 if (unknown)
1746 {
1747 if (ISGRAPH (c))
1748 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1749 else
1750 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1751 }
1752
62729350
NB
1753 if (c > mask)
1754 cpp_pedwarn (pfile, "escape sequence out of range for character");
1755
c8a96070
NB
1756 *pstr = str;
1757 return c;
1758}
1759
1760#ifndef MAX_CHAR_TYPE_SIZE
1761#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1762#endif
1763
1764#ifndef MAX_WCHAR_TYPE_SIZE
1765#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1766#endif
1767
1768/* Interpret a (possibly wide) character constant in TOKEN.
f458d1d5
ZW
1769 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN points
1770 to a variable that is filled in with the number of characters seen. */
c8a96070 1771HOST_WIDE_INT
f458d1d5 1772cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
c8a96070
NB
1773 cpp_reader *pfile;
1774 const cpp_token *token;
1775 int warn_multi;
c8a96070
NB
1776 unsigned int *pchars_seen;
1777{
1778 const unsigned char *str = token->val.str.text;
1779 const unsigned char *limit = str + token->val.str.len;
1780 unsigned int chars_seen = 0;
1781 unsigned int width, max_chars, c;
2a967f3d
NB
1782 unsigned HOST_WIDE_INT mask;
1783 HOST_WIDE_INT result = 0;
a47ed310 1784 bool unsigned_p;
c8a96070
NB
1785
1786#ifdef MULTIBYTE_CHARS
1787 (void) local_mbtowc (NULL, NULL, 0);
1788#endif
1789
1790 /* Width in bits. */
1791 if (token->type == CPP_CHAR)
a47ed310
NB
1792 {
1793 width = MAX_CHAR_TYPE_SIZE;
1794 unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
1795 }
c8a96070 1796 else
a47ed310
NB
1797 {
1798 width = MAX_WCHAR_TYPE_SIZE;
1799 unsigned_p = WCHAR_UNSIGNED;
1800 }
c8a96070
NB
1801
1802 if (width < HOST_BITS_PER_WIDE_INT)
1803 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1804 else
1805 mask = ~0;
1806 max_chars = HOST_BITS_PER_WIDE_INT / width;
1807
1808 while (str < limit)
1809 {
1810#ifdef MULTIBYTE_CHARS
1811 wchar_t wc;
1812 int char_len;
1813
1814 char_len = local_mbtowc (&wc, str, limit - str);
1815 if (char_len == -1)
1816 {
1817 cpp_warning (pfile, "ignoring invalid multibyte character");
1818 c = *str++;
1819 }
1820 else
1821 {
1822 str += char_len;
1823 c = wc;
1824 }
1825#else
1826 c = *str++;
1827#endif
1828
1829 if (c == '\\')
f458d1d5 1830 c = cpp_parse_escape (pfile, &str, limit, mask);
c8a96070
NB
1831
1832#ifdef MAP_CHARACTER
1833 if (ISPRINT (c))
1834 c = MAP_CHARACTER (c);
1835#endif
1836
1837 /* Merge character into result; ignore excess chars. */
1838 if (++chars_seen <= max_chars)
1839 {
1840 if (width < HOST_BITS_PER_WIDE_INT)
1841 result = (result << width) | (c & mask);
1842 else
1843 result = c;
1844 }
1845 }
1846
1847 if (chars_seen == 0)
1848 cpp_error (pfile, "empty character constant");
1849 else if (chars_seen > max_chars)
1850 {
1851 chars_seen = max_chars;
f8710242 1852 cpp_warning (pfile, "character constant too long");
c8a96070 1853 }
f458d1d5 1854 else if (chars_seen > 1 && warn_multi)
c8a96070
NB
1855 cpp_warning (pfile, "multi-character character constant");
1856
a47ed310
NB
1857 /* If relevant type is signed, sign-extend the constant. */
1858 if (chars_seen)
c8a96070
NB
1859 {
1860 unsigned int nbits = chars_seen * width;
c8a96070 1861
e1e7d56b 1862 mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
a47ed310 1863 if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
c8a96070
NB
1864 result &= mask;
1865 else
1866 result |= ~mask;
1867 }
1868
1869 *pchars_seen = chars_seen;
1870 return result;
1871}
1872
1e013d2e
NB
1873/* Memory buffers. Changing these three constants can have a dramatic
1874 effect on performance. The values here are reasonable defaults,
1875 but might be tuned. If you adjust them, be sure to test across a
1876 range of uses of cpplib, including heavy nested function-like macro
1877 expansion. Also check the change in peak memory usage (NJAMD is a
1878 good tool for this). */
1879#define MIN_BUFF_SIZE 8000
87062813 1880#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1e013d2e
NB
1881#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1882 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 1883
87062813
NB
1884#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1885 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1886#endif
1887
93c80368 1888struct dummy
417f3e3a 1889{
93c80368
NB
1890 char c;
1891 union
1892 {
1893 double d;
1894 int *p;
1895 } u;
1896};
417f3e3a 1897
93c80368 1898#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
b8af0ca5
NB
1899#define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1900
c9e7a609
NB
1901/* Create a new allocation buffer. Place the control block at the end
1902 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5
NB
1903static _cpp_buff *
1904new_buff (len)
6142088c 1905 size_t len;
b8af0ca5
NB
1906{
1907 _cpp_buff *result;
ece54d54 1908 unsigned char *base;
b8af0ca5 1909
1e013d2e
NB
1910 if (len < MIN_BUFF_SIZE)
1911 len = MIN_BUFF_SIZE;
b8af0ca5
NB
1912 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
1913
1914 base = xmalloc (len + sizeof (_cpp_buff));
1915 result = (_cpp_buff *) (base + len);
1916 result->base = base;
1917 result->cur = base;
1918 result->limit = base + len;
1919 result->next = NULL;
1920 return result;
1921}
1922
1923/* Place a chain of unwanted allocation buffers on the free list. */
1924void
1925_cpp_release_buff (pfile, buff)
1926 cpp_reader *pfile;
1927 _cpp_buff *buff;
1928{
1929 _cpp_buff *end = buff;
1930
1931 while (end->next)
1932 end = end->next;
1933 end->next = pfile->free_buffs;
1934 pfile->free_buffs = buff;
1935}
1936
1937/* Return a free buffer of size at least MIN_SIZE. */
1938_cpp_buff *
1939_cpp_get_buff (pfile, min_size)
1940 cpp_reader *pfile;
6142088c 1941 size_t min_size;
b8af0ca5
NB
1942{
1943 _cpp_buff *result, **p;
1944
1945 for (p = &pfile->free_buffs;; p = &(*p)->next)
1946 {
6142088c 1947 size_t size;
1e013d2e
NB
1948
1949 if (*p == NULL)
b8af0ca5 1950 return new_buff (min_size);
1e013d2e
NB
1951 result = *p;
1952 size = result->limit - result->base;
1953 /* Return a buffer that's big enough, but don't waste one that's
1954 way too big. */
34f5271d 1955 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
1956 break;
1957 }
1958
1959 *p = result->next;
1960 result->next = NULL;
1961 result->cur = result->base;
1962 return result;
1963}
1964
4fe9b91c 1965/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1966 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1967 the excess bytes to the new buffer. Chains the new buffer after
1968 BUFF, and returns the new buffer. */
b8af0ca5 1969_cpp_buff *
8c3b2693 1970_cpp_append_extend_buff (pfile, buff, min_extra)
b8af0ca5
NB
1971 cpp_reader *pfile;
1972 _cpp_buff *buff;
6142088c 1973 size_t min_extra;
b8af0ca5 1974{
6142088c 1975 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 1976 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 1977
8c3b2693
NB
1978 buff->next = new_buff;
1979 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1980 return new_buff;
1981}
1982
4fe9b91c 1983/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
1984 remaining bytes of the buffer pointed to by BUFF, and at least
1985 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1986 Chains the new buffer before the buffer pointed to by BUFF, and
1987 updates the pointer to point to the new buffer. */
1988void
1989_cpp_extend_buff (pfile, pbuff, min_extra)
1990 cpp_reader *pfile;
1991 _cpp_buff **pbuff;
1992 size_t min_extra;
1993{
1994 _cpp_buff *new_buff, *old_buff = *pbuff;
1995 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1996
1997 new_buff = _cpp_get_buff (pfile, size);
1998 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1999 new_buff->next = old_buff;
2000 *pbuff = new_buff;
b8af0ca5
NB
2001}
2002
2003/* Free a chain of buffers starting at BUFF. */
2004void
2005_cpp_free_buff (buff)
2006 _cpp_buff *buff;
2007{
2008 _cpp_buff *next;
2009
2010 for (; buff; buff = next)
2011 {
2012 next = buff->next;
2013 free (buff->base);
2014 }
2015}
417f3e3a 2016
ece54d54
NB
2017/* Allocate permanent, unaligned storage of length LEN. */
2018unsigned char *
2019_cpp_unaligned_alloc (pfile, len)
2020 cpp_reader *pfile;
2021 size_t len;
2022{
2023 _cpp_buff *buff = pfile->u_buff;
2024 unsigned char *result = buff->cur;
2025
2026 if (len > (size_t) (buff->limit - result))
2027 {
2028 buff = _cpp_get_buff (pfile, len);
2029 buff->next = pfile->u_buff;
2030 pfile->u_buff = buff;
2031 result = buff->cur;
2032 }
2033
2034 buff->cur = result + len;
2035 return result;
2036}
2037
87062813
NB
2038/* Allocate permanent, unaligned storage of length LEN from a_buff.
2039 That buffer is used for growing allocations when saving macro
2040 replacement lists in a #define, and when parsing an answer to an
2041 assertion in #assert, #unassert or #if (and therefore possibly
2042 whilst expanding macros). It therefore must not be used by any
2043 code that they might call: specifically the lexer and the guts of
2044 the macro expander.
2045
2046 All existing other uses clearly fit this restriction: storing
2047 registered pragmas during initialization. */
93c80368 2048unsigned char *
8c3b2693
NB
2049_cpp_aligned_alloc (pfile, len)
2050 cpp_reader *pfile;
2051 size_t len;
3fef5b2b 2052{
8c3b2693
NB
2053 _cpp_buff *buff = pfile->a_buff;
2054 unsigned char *result = buff->cur;
3fef5b2b 2055
8c3b2693 2056 if (len > (size_t) (buff->limit - result))
3fef5b2b 2057 {
8c3b2693
NB
2058 buff = _cpp_get_buff (pfile, len);
2059 buff->next = pfile->a_buff;
2060 pfile->a_buff = buff;
2061 result = buff->cur;
3fef5b2b 2062 }
041c3194 2063
8c3b2693 2064 buff->cur = result + len;
93c80368 2065 return result;
041c3194 2066}