]>
Commit | Line | Data |
---|---|---|
0578f103 | 1 | /* CPP Library - lexical analysis. |
e484a1cc | 2 | Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc. |
0578f103 | 3 | Contributed by Per Bothner, 1994-95. |
4 | Based on CCCP program by Paul Rubin, June 1986 | |
5 | Adapted to ANSI C, Richard Stallman, Jan 1987 | |
6 | Broken out to separate file, Zack Weinberg, Mar 2000 | |
6060326b | 7 | Single-pass line tokenization by Neil Booth, April 2000 |
0578f103 | 8 | |
9 | This program is free software; you can redistribute it and/or modify it | |
10 | under the terms of the GNU General Public License as published by the | |
11 | Free Software Foundation; either version 2, or (at your option) any | |
12 | later version. | |
13 | ||
14 | This program is distributed in the hope that it will be useful, | |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | GNU General Public License for more details. | |
18 | ||
19 | You should have received a copy of the GNU General Public License | |
20 | along with this program; if not, write to the Free Software | |
21 | Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
22 | ||
23 | #include "config.h" | |
24 | #include "system.h" | |
0578f103 | 25 | #include "cpplib.h" |
26 | #include "cpphash.h" | |
27 | ||
8330799c | 28 | /* MULTIBYTE_CHARS support only works for native compilers. |
29 | ??? Ideally what we want is to model widechar support after | |
30 | the current floating point support. */ | |
31 | #ifdef CROSS_COMPILE | |
32 | #undef MULTIBYTE_CHARS | |
33 | #endif | |
34 | ||
35 | #ifdef MULTIBYTE_CHARS | |
36 | #include "mbchar.h" | |
37 | #include <locale.h> | |
38 | #endif | |
39 | ||
79bd622b | 40 | /* Tokens with SPELL_STRING store their spelling in the token list, |
41 | and it's length in the token->val.name.len. */ | |
42 | enum spell_type | |
241e762e | 43 | { |
79bd622b | 44 | SPELL_OPERATOR = 0, |
45 | SPELL_CHAR, | |
46 | SPELL_IDENT, | |
8d27e472 | 47 | SPELL_NUMBER, |
79bd622b | 48 | SPELL_STRING, |
49 | SPELL_NONE | |
241e762e | 50 | }; |
51 | ||
79bd622b | 52 | struct token_spelling |
241e762e | 53 | { |
79bd622b | 54 | enum spell_type category; |
55 | const unsigned char *name; | |
241e762e | 56 | }; |
57 | ||
0ca849f9 | 58 | static const unsigned char *const digraph_spellings[] = |
59 | { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" }; | |
79bd622b | 60 | |
61 | #define OP(e, s) { SPELL_OPERATOR, U s }, | |
62 | #define TK(e, s) { s, U STRINGX (e) }, | |
0ca849f9 | 63 | static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; |
79bd622b | 64 | #undef OP |
65 | #undef TK | |
66 | ||
67 | #define TOKEN_SPELL(token) (token_spellings[(token)->type].category) | |
68 | #define TOKEN_NAME(token) (token_spellings[(token)->type].name) | |
1c124f85 | 69 | #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0) |
e2f9a79f | 70 | |
1e0ef2fd | 71 | static void handle_newline PARAMS ((cpp_reader *)); |
72 | static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *)); | |
c808d026 | 73 | static cppchar_t get_effective_char PARAMS ((cpp_reader *)); |
338fa5f7 | 74 | |
f80e83a9 | 75 | static int skip_block_comment PARAMS ((cpp_reader *)); |
f669338a | 76 | static int skip_line_comment PARAMS ((cpp_reader *)); |
338fa5f7 | 77 | static void adjust_column PARAMS ((cpp_reader *)); |
435fb09b | 78 | static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t)); |
66a5287e | 79 | static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *)); |
78a11351 | 80 | static U_CHAR *parse_slow PARAMS ((cpp_reader *, const U_CHAR *, int, |
81 | unsigned int *)); | |
82 | static void parse_number PARAMS ((cpp_reader *, cpp_string *, int)); | |
79bd622b | 83 | static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *)); |
338fa5f7 | 84 | static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t)); |
1e0ef2fd | 85 | static bool trigraph_p PARAMS ((cpp_reader *)); |
d3f7919d | 86 | static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *, |
87 | cppchar_t)); | |
79bd622b | 88 | static int name_p PARAMS ((cpp_reader *, const cpp_string *)); |
c8342759 | 89 | static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **, |
90 | const unsigned char *, unsigned int *)); | |
83dcbb5c | 91 | static tokenrun *next_tokenrun PARAMS ((tokenrun *)); |
e916a356 | 92 | |
8330799c | 93 | static unsigned int hex_digit_value PARAMS ((unsigned int)); |
4b31a107 | 94 | static _cpp_buff *new_buff PARAMS ((size_t)); |
bce8e0c0 | 95 | |
f80e83a9 | 96 | /* Utility routine: |
2c63d6c8 | 97 | |
76faa4c0 | 98 | Compares, the token TOKEN to the NUL-terminated string STRING. |
99 | TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */ | |
f80e83a9 | 100 | int |
76faa4c0 | 101 | cpp_ideq (token, string) |
102 | const cpp_token *token; | |
f80e83a9 | 103 | const char *string; |
104 | { | |
76faa4c0 | 105 | if (token->type != CPP_NAME) |
f80e83a9 | 106 | return 0; |
76faa4c0 | 107 | |
c86dbc5b | 108 | return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string); |
bce8e0c0 | 109 | } |
50fd6b48 | 110 | |
1e0ef2fd | 111 | /* Call when meeting a newline, assumed to be in buffer->cur[-1]. |
112 | Returns with buffer->cur pointing to the character immediately | |
113 | following the newline (combination). */ | |
114 | static void | |
115 | handle_newline (pfile) | |
36a0aa7c | 116 | cpp_reader *pfile; |
338fa5f7 | 117 | { |
1e0ef2fd | 118 | cpp_buffer *buffer = pfile->buffer; |
338fa5f7 | 119 | |
1e0ef2fd | 120 | /* Handle CR-LF and LF-CR. Most other implementations (e.g. java) |
435fb09b | 121 | only accept CR-LF; maybe we should fall back to that behaviour? */ |
122 | if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n') | |
1e0ef2fd | 123 | buffer->cur++; |
338fa5f7 | 124 | |
1e0ef2fd | 125 | buffer->line_base = buffer->cur; |
126 | buffer->col_adjust = 0; | |
127 | pfile->line++; | |
338fa5f7 | 128 | } |
129 | ||
1e0ef2fd | 130 | /* Subroutine of skip_escaped_newlines; called when a 3-character |
131 | sequence beginning with "??" is encountered. buffer->cur points to | |
132 | the second '?'. | |
133 | ||
134 | Warn if necessary, and returns true if the sequence forms a | |
135 | trigraph and the trigraph should be honoured. */ | |
136 | static bool | |
137 | trigraph_p (pfile) | |
0578f103 | 138 | cpp_reader *pfile; |
0578f103 | 139 | { |
1e0ef2fd | 140 | cpp_buffer *buffer = pfile->buffer; |
141 | cppchar_t from_char = buffer->cur[1]; | |
142 | bool accept; | |
143 | ||
144 | if (!_cpp_trigraph_map[from_char]) | |
145 | return false; | |
146 | ||
147 | accept = CPP_OPTION (pfile, trigraphs); | |
148 | ||
f669338a | 149 | /* Don't warn about trigraphs in comments. */ |
150 | if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment) | |
0578f103 | 151 | { |
f80e83a9 | 152 | if (accept) |
73328dce | 153 | cpp_error_with_line (pfile, DL_WARNING, |
154 | pfile->line, CPP_BUF_COL (buffer) - 1, | |
155 | "trigraph ??%c converted to %c", | |
156 | (int) from_char, | |
157 | (int) _cpp_trigraph_map[from_char]); | |
4b912310 | 158 | else if (buffer->cur != buffer->last_Wtrigraphs) |
159 | { | |
160 | buffer->last_Wtrigraphs = buffer->cur; | |
73328dce | 161 | cpp_error_with_line (pfile, DL_WARNING, |
162 | pfile->line, CPP_BUF_COL (buffer) - 1, | |
163 | "trigraph ??%c ignored", (int) from_char); | |
4b912310 | 164 | } |
0578f103 | 165 | } |
338fa5f7 | 166 | |
f80e83a9 | 167 | return accept; |
0578f103 | 168 | } |
169 | ||
1e0ef2fd | 170 | /* Skips any escaped newlines introduced by '?' or a '\\', assumed to |
1c124f85 | 171 | lie in buffer->cur[-1]. Returns the next byte, which will be in |
172 | buffer->cur[-1]. This routine performs preprocessing stages 1 and | |
173 | 2 of the ISO C standard. */ | |
338fa5f7 | 174 | static cppchar_t |
1e0ef2fd | 175 | skip_escaped_newlines (pfile) |
c808d026 | 176 | cpp_reader *pfile; |
0578f103 | 177 | { |
c808d026 | 178 | cpp_buffer *buffer = pfile->buffer; |
1e0ef2fd | 179 | cppchar_t next = buffer->cur[-1]; |
c808d026 | 180 | |
396ffa86 | 181 | /* Only do this if we apply stages 1 and 2. */ |
182 | if (!buffer->from_stage3) | |
f80e83a9 | 183 | { |
396ffa86 | 184 | const unsigned char *saved_cur; |
1e0ef2fd | 185 | cppchar_t next1; |
396ffa86 | 186 | |
187 | do | |
338fa5f7 | 188 | { |
396ffa86 | 189 | if (next == '?') |
338fa5f7 | 190 | { |
435fb09b | 191 | if (buffer->cur[0] != '?' || !trigraph_p (pfile)) |
1e0ef2fd | 192 | break; |
396ffa86 | 193 | |
1e0ef2fd | 194 | /* Translate the trigraph. */ |
195 | next = _cpp_trigraph_map[buffer->cur[1]]; | |
196 | buffer->cur += 2; | |
435fb09b | 197 | if (next != '\\') |
396ffa86 | 198 | break; |
396ffa86 | 199 | } |
200 | ||
435fb09b | 201 | if (buffer->cur == buffer->rlimit) |
202 | break; | |
203 | ||
1e0ef2fd | 204 | /* We have a backslash, and room for at least one more |
205 | character. Skip horizontal whitespace. */ | |
206 | saved_cur = buffer->cur; | |
396ffa86 | 207 | do |
1e0ef2fd | 208 | next1 = *buffer->cur++; |
209 | while (is_nvspace (next1) && buffer->cur < buffer->rlimit); | |
f80e83a9 | 210 | |
396ffa86 | 211 | if (!is_vspace (next1)) |
338fa5f7 | 212 | { |
1e0ef2fd | 213 | buffer->cur = saved_cur; |
338fa5f7 | 214 | break; |
215 | } | |
0578f103 | 216 | |
1e0ef2fd | 217 | if (saved_cur != buffer->cur - 1 |
218 | && !pfile->state.lexing_comment) | |
73328dce | 219 | cpp_error (pfile, DL_WARNING, |
220 | "backslash and newline separated by space"); | |
338fa5f7 | 221 | |
1e0ef2fd | 222 | handle_newline (pfile); |
1c124f85 | 223 | buffer->backup_to = buffer->cur; |
1e0ef2fd | 224 | if (buffer->cur == buffer->rlimit) |
225 | { | |
73328dce | 226 | cpp_error (pfile, DL_PEDWARN, |
227 | "backslash-newline at end of file"); | |
1e0ef2fd | 228 | next = EOF; |
229 | } | |
230 | else | |
231 | next = *buffer->cur++; | |
338fa5f7 | 232 | } |
396ffa86 | 233 | while (next == '\\' || next == '?'); |
f80e83a9 | 234 | } |
0578f103 | 235 | |
338fa5f7 | 236 | return next; |
0578f103 | 237 | } |
238 | ||
338fa5f7 | 239 | /* Obtain the next character, after trigraph conversion and skipping |
1e0ef2fd | 240 | an arbitrarily long string of escaped newlines. The common case of |
241 | no trigraphs or escaped newlines falls through quickly. On return, | |
1c124f85 | 242 | buffer->backup_to points to where to return to if the character is |
243 | not to be processed. */ | |
338fa5f7 | 244 | static cppchar_t |
c808d026 | 245 | get_effective_char (pfile) |
246 | cpp_reader *pfile; | |
852d1b04 | 247 | { |
435fb09b | 248 | cppchar_t next; |
1c124f85 | 249 | cpp_buffer *buffer = pfile->buffer; |
338fa5f7 | 250 | |
1c124f85 | 251 | buffer->backup_to = buffer->cur; |
435fb09b | 252 | next = *buffer->cur++; |
253 | if (__builtin_expect (next == '?' || next == '\\', 0)) | |
254 | next = skip_escaped_newlines (pfile); | |
338fa5f7 | 255 | |
1c124f85 | 256 | return next; |
852d1b04 | 257 | } |
258 | ||
338fa5f7 | 259 | /* Skip a C-style block comment. We find the end of the comment by |
260 | seeing if an asterisk is before every '/' we encounter. Returns | |
261 | non-zero if comment terminated by EOF, zero otherwise. */ | |
f80e83a9 | 262 | static int |
263 | skip_block_comment (pfile) | |
0578f103 | 264 | cpp_reader *pfile; |
265 | { | |
f80e83a9 | 266 | cpp_buffer *buffer = pfile->buffer; |
63e1abce | 267 | cppchar_t c = EOF, prevc = EOF; |
338fa5f7 | 268 | |
f669338a | 269 | pfile->state.lexing_comment = 1; |
338fa5f7 | 270 | while (buffer->cur != buffer->rlimit) |
0578f103 | 271 | { |
338fa5f7 | 272 | prevc = c, c = *buffer->cur++; |
273 | ||
338fa5f7 | 274 | /* FIXME: For speed, create a new character class of characters |
79bd622b | 275 | of interest inside block comments. */ |
338fa5f7 | 276 | if (c == '?' || c == '\\') |
1e0ef2fd | 277 | c = skip_escaped_newlines (pfile); |
f80e83a9 | 278 | |
338fa5f7 | 279 | /* People like decorating comments with '*', so check for '/' |
280 | instead for efficiency. */ | |
f80e83a9 | 281 | if (c == '/') |
0578f103 | 282 | { |
338fa5f7 | 283 | if (prevc == '*') |
284 | break; | |
f80e83a9 | 285 | |
338fa5f7 | 286 | /* Warn about potential nested comments, but not if the '/' |
3fb1e43b | 287 | comes immediately before the true comment delimiter. |
f80e83a9 | 288 | Don't bother to get it right across escaped newlines. */ |
338fa5f7 | 289 | if (CPP_OPTION (pfile, warn_comments) |
1e0ef2fd | 290 | && buffer->cur[0] == '*' && buffer->cur[1] != '/') |
73328dce | 291 | cpp_error_with_line (pfile, DL_WARNING, |
292 | pfile->line, CPP_BUF_COL (buffer), | |
293 | "\"/*\" within comment"); | |
0578f103 | 294 | } |
78719282 | 295 | else if (is_vspace (c)) |
1e0ef2fd | 296 | handle_newline (pfile); |
b86584f6 | 297 | else if (c == '\t') |
338fa5f7 | 298 | adjust_column (pfile); |
0578f103 | 299 | } |
f80e83a9 | 300 | |
f669338a | 301 | pfile->state.lexing_comment = 0; |
338fa5f7 | 302 | return c != '/' || prevc != '*'; |
0578f103 | 303 | } |
304 | ||
1c124f85 | 305 | /* Skip a C++ line comment, leaving buffer->cur pointing to the |
306 | terminating newline. Handles escaped newlines. Returns non-zero | |
307 | if a multiline comment. */ | |
f80e83a9 | 308 | static int |
f669338a | 309 | skip_line_comment (pfile) |
310 | cpp_reader *pfile; | |
0578f103 | 311 | { |
f669338a | 312 | cpp_buffer *buffer = pfile->buffer; |
1ea7ed21 | 313 | unsigned int orig_line = pfile->line; |
338fa5f7 | 314 | cppchar_t c; |
f80e83a9 | 315 | |
f669338a | 316 | pfile->state.lexing_comment = 1; |
338fa5f7 | 317 | do |
f80e83a9 | 318 | { |
338fa5f7 | 319 | if (buffer->cur == buffer->rlimit) |
1c124f85 | 320 | goto at_eof; |
f80e83a9 | 321 | |
338fa5f7 | 322 | c = *buffer->cur++; |
323 | if (c == '?' || c == '\\') | |
1e0ef2fd | 324 | c = skip_escaped_newlines (pfile); |
f80e83a9 | 325 | } |
338fa5f7 | 326 | while (!is_vspace (c)); |
0578f103 | 327 | |
1c124f85 | 328 | /* Step back over the newline, except at EOF. */ |
329 | buffer->cur--; | |
330 | at_eof: | |
331 | ||
f669338a | 332 | pfile->state.lexing_comment = 0; |
1ea7ed21 | 333 | return orig_line != pfile->line; |
f80e83a9 | 334 | } |
0578f103 | 335 | |
338fa5f7 | 336 | /* pfile->buffer->cur is one beyond the \t character. Update |
337 | col_adjust so we track the column correctly. */ | |
b86584f6 | 338 | static void |
338fa5f7 | 339 | adjust_column (pfile) |
b86584f6 | 340 | cpp_reader *pfile; |
b86584f6 | 341 | { |
338fa5f7 | 342 | cpp_buffer *buffer = pfile->buffer; |
343 | unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */ | |
b86584f6 | 344 | |
345 | /* Round it up to multiple of the tabstop, but subtract 1 since the | |
346 | tab itself occupies a character position. */ | |
338fa5f7 | 347 | buffer->col_adjust += (CPP_OPTION (pfile, tabstop) |
348 | - col % CPP_OPTION (pfile, tabstop)) - 1; | |
b86584f6 | 349 | } |
350 | ||
338fa5f7 | 351 | /* Skips whitespace, saving the next non-whitespace character. |
352 | Adjusts pfile->col_adjust to account for tabs. Without this, | |
353 | tokens might be assigned an incorrect column. */ | |
435fb09b | 354 | static int |
338fa5f7 | 355 | skip_whitespace (pfile, c) |
f80e83a9 | 356 | cpp_reader *pfile; |
338fa5f7 | 357 | cppchar_t c; |
f80e83a9 | 358 | { |
359 | cpp_buffer *buffer = pfile->buffer; | |
338fa5f7 | 360 | unsigned int warned = 0; |
0578f103 | 361 | |
338fa5f7 | 362 | do |
f80e83a9 | 363 | { |
78719282 | 364 | /* Horizontal space always OK. */ |
365 | if (c == ' ') | |
338fa5f7 | 366 | ; |
78719282 | 367 | else if (c == '\t') |
338fa5f7 | 368 | adjust_column (pfile); |
369 | /* Just \f \v or \0 left. */ | |
78719282 | 370 | else if (c == '\0') |
f80e83a9 | 371 | { |
435fb09b | 372 | if (buffer->cur - 1 == buffer->rlimit) |
373 | return 0; | |
78719282 | 374 | if (!warned) |
338fa5f7 | 375 | { |
73328dce | 376 | cpp_error (pfile, DL_WARNING, "null character(s) ignored"); |
338fa5f7 | 377 | warned = 1; |
378 | } | |
0578f103 | 379 | } |
79bd622b | 380 | else if (pfile->state.in_directive && CPP_PEDANTIC (pfile)) |
73328dce | 381 | cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, |
382 | CPP_BUF_COL (buffer), | |
383 | "%s in preprocessing directive", | |
384 | c == '\f' ? "form feed" : "vertical tab"); | |
338fa5f7 | 385 | |
338fa5f7 | 386 | c = *buffer->cur++; |
0578f103 | 387 | } |
2c0e001b | 388 | /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */ |
338fa5f7 | 389 | while (is_nvspace (c)); |
390 | ||
1c124f85 | 391 | buffer->cur--; |
435fb09b | 392 | return 1; |
f80e83a9 | 393 | } |
0578f103 | 394 | |
79bd622b | 395 | /* See if the characters of a number token are valid in a name (no |
396 | '.', '+' or '-'). */ | |
397 | static int | |
398 | name_p (pfile, string) | |
399 | cpp_reader *pfile; | |
400 | const cpp_string *string; | |
401 | { | |
402 | unsigned int i; | |
403 | ||
404 | for (i = 0; i < string->len; i++) | |
405 | if (!is_idchar (string->text[i])) | |
406 | return 0; | |
407 | ||
408 | return 1; | |
409 | } | |
410 | ||
66a5287e | 411 | /* Parse an identifier, skipping embedded backslash-newlines. This is |
412 | a critical inner loop. The common case is an identifier which has | |
413 | not been split by backslash-newline, does not contain a dollar | |
414 | sign, and has already been scanned (roughly 10:1 ratio of | |
415 | seen:unseen identifiers in normal code; the distribution is | |
416 | Poisson-like). Second most common case is a new identifier, not | |
417 | split and no dollar sign. The other possibilities are rare and | |
78a11351 | 418 | have been relegated to parse_slow. */ |
338fa5f7 | 419 | static cpp_hashnode * |
66a5287e | 420 | parse_identifier (pfile) |
0578f103 | 421 | cpp_reader *pfile; |
0578f103 | 422 | { |
79bd622b | 423 | cpp_hashnode *result; |
78a11351 | 424 | const U_CHAR *cur, *base; |
66a5287e | 425 | |
426 | /* Fast-path loop. Skim over a normal identifier. | |
427 | N.B. ISIDNUM does not include $. */ | |
435fb09b | 428 | cur = pfile->buffer->cur; |
429 | while (ISIDNUM (*cur)) | |
66a5287e | 430 | cur++; |
66a5287e | 431 | |
432 | /* Check for slow-path cases. */ | |
435fb09b | 433 | if (*cur == '?' || *cur == '\\' || *cur == '$') |
78a11351 | 434 | { |
435 | unsigned int len; | |
436 | ||
437 | base = parse_slow (pfile, cur, 0, &len); | |
438 | result = (cpp_hashnode *) | |
439 | ht_lookup (pfile->hash_table, base, len, HT_ALLOCED); | |
440 | } | |
66a5287e | 441 | else |
442 | { | |
78a11351 | 443 | base = pfile->buffer->cur - 1; |
444 | pfile->buffer->cur = cur; | |
66a5287e | 445 | result = (cpp_hashnode *) |
446 | ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC); | |
66a5287e | 447 | } |
448 | ||
449 | /* Rarely, identifiers require diagnostics when lexed. | |
450 | XXX Has to be forced out of the fast path. */ | |
451 | if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) | |
452 | && !pfile->state.skipping, 0)) | |
453 | { | |
454 | /* It is allowed to poison the same identifier twice. */ | |
455 | if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) | |
73328dce | 456 | cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"", |
66a5287e | 457 | NODE_NAME (result)); |
458 | ||
459 | /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the | |
460 | replacement list of a variadic macro. */ | |
461 | if (result == pfile->spec_nodes.n__VA_ARGS__ | |
462 | && !pfile->state.va_args_ok) | |
73328dce | 463 | cpp_error (pfile, DL_PEDWARN, |
66a5287e | 464 | "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro"); |
465 | } | |
466 | ||
467 | return result; | |
468 | } | |
469 | ||
78a11351 | 470 | /* Slow path. This handles numbers and identifiers which have been |
471 | split, or contain dollar signs. The part of the token from | |
472 | PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is | |
473 | 1 if it's a number, and 2 if it has a leading period. Returns a | |
474 | pointer to the token's NUL-terminated spelling in permanent | |
475 | storage, and sets PLEN to its length. */ | |
476 | static U_CHAR * | |
477 | parse_slow (pfile, cur, number_p, plen) | |
66a5287e | 478 | cpp_reader *pfile; |
479 | const U_CHAR *cur; | |
78a11351 | 480 | int number_p; |
481 | unsigned int *plen; | |
66a5287e | 482 | { |
338fa5f7 | 483 | cpp_buffer *buffer = pfile->buffer; |
66a5287e | 484 | const U_CHAR *base = buffer->cur - 1; |
0d086e18 | 485 | struct obstack *stack = &pfile->hash_table->stack; |
78a11351 | 486 | unsigned int c, prevc, saw_dollar = 0; |
487 | ||
488 | /* Place any leading period. */ | |
489 | if (number_p == 2) | |
490 | obstack_1grow (stack, '.'); | |
66a5287e | 491 | |
492 | /* Copy the part of the token which is known to be okay. */ | |
493 | obstack_grow (stack, base, cur - base); | |
f80e83a9 | 494 | |
66a5287e | 495 | /* Now process the part which isn't. We are looking at one of |
496 | '$', '\\', or '?' on entry to this loop. */ | |
78a11351 | 497 | prevc = cur[-1]; |
66a5287e | 498 | c = *cur++; |
499 | buffer->cur = cur; | |
78a11351 | 500 | for (;;) |
f80e83a9 | 501 | { |
78a11351 | 502 | /* Potential escaped newline? */ |
503 | buffer->backup_to = buffer->cur - 1; | |
504 | if (c == '?' || c == '\\') | |
505 | c = skip_escaped_newlines (pfile); | |
506 | ||
507 | if (!is_idchar (c)) | |
508 | { | |
509 | if (!number_p) | |
510 | break; | |
511 | if (c != '.' && !VALID_SIGN (c, prevc)) | |
512 | break; | |
513 | } | |
514 | ||
515 | /* Handle normal identifier characters in this loop. */ | |
516 | do | |
66a5287e | 517 | { |
78a11351 | 518 | prevc = c; |
66a5287e | 519 | obstack_1grow (stack, c); |
0578f103 | 520 | |
66a5287e | 521 | if (c == '$') |
522 | saw_dollar++; | |
71aa9da4 | 523 | |
66a5287e | 524 | c = *buffer->cur++; |
525 | } | |
78a11351 | 526 | while (is_idchar (c)); |
f80e83a9 | 527 | } |
338fa5f7 | 528 | |
435fb09b | 529 | /* Step back over the unwanted char. */ |
1c124f85 | 530 | BACKUP (); |
79bd622b | 531 | |
20dd417a | 532 | /* $ is not an identifier character in the standard, but is commonly |
338fa5f7 | 533 | accepted as an extension. Don't warn about it in skipped |
534 | conditional blocks. */ | |
5e878517 | 535 | if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping) |
73328dce | 536 | cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number"); |
338fa5f7 | 537 | |
78a11351 | 538 | /* Identifiers and numbers are null-terminated. */ |
539 | *plen = obstack_object_size (stack); | |
0d086e18 | 540 | obstack_1grow (stack, '\0'); |
78a11351 | 541 | return obstack_finish (stack); |
0578f103 | 542 | } |
543 | ||
e484a1cc | 544 | /* Parse a number, beginning with character C, skipping embedded |
545 | backslash-newlines. LEADING_PERIOD is non-zero if there was a "." | |
546 | before C. Place the result in NUMBER. */ | |
0578f103 | 547 | static void |
78a11351 | 548 | parse_number (pfile, number, leading_period) |
0578f103 | 549 | cpp_reader *pfile; |
338fa5f7 | 550 | cpp_string *number; |
79bd622b | 551 | int leading_period; |
0578f103 | 552 | { |
78a11351 | 553 | const U_CHAR *cur; |
0578f103 | 554 | |
78a11351 | 555 | /* Fast-path loop. Skim over a normal number. |
556 | N.B. ISIDNUM does not include $. */ | |
557 | cur = pfile->buffer->cur; | |
558 | while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1])) | |
559 | cur++; | |
f669338a | 560 | |
78a11351 | 561 | /* Check for slow-path cases. */ |
562 | if (*cur == '?' || *cur == '\\' || *cur == '$') | |
563 | number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len); | |
564 | else | |
f80e83a9 | 565 | { |
78a11351 | 566 | const U_CHAR *base = pfile->buffer->cur - 1; |
567 | U_CHAR *dest; | |
338fa5f7 | 568 | |
78a11351 | 569 | number->len = cur - base + leading_period; |
570 | dest = _cpp_unaligned_alloc (pfile, number->len + 1); | |
571 | dest[number->len] = '\0'; | |
572 | number->text = dest; | |
0578f103 | 573 | |
78a11351 | 574 | if (leading_period) |
575 | *dest++ = '.'; | |
576 | memcpy (dest, base, cur - base); | |
577 | pfile->buffer->cur = cur; | |
0578f103 | 578 | } |
338fa5f7 | 579 | } |
580 | ||
79bd622b | 581 | /* Subroutine of parse_string. */ |
582 | static int | |
583 | unescaped_terminator_p (pfile, dest) | |
584 | cpp_reader *pfile; | |
585 | const unsigned char *dest; | |
586 | { | |
587 | const unsigned char *start, *temp; | |
588 | ||
589 | /* In #include-style directives, terminators are not escapeable. */ | |
590 | if (pfile->state.angled_headers) | |
591 | return 1; | |
592 | ||
1fdf6039 | 593 | start = BUFF_FRONT (pfile->u_buff); |
79bd622b | 594 | |
595 | /* An odd number of consecutive backslashes represents an escaped | |
596 | terminator. */ | |
597 | for (temp = dest; temp > start && temp[-1] == '\\'; temp--) | |
598 | ; | |
599 | ||
600 | return ((dest - temp) & 1) == 0; | |
601 | } | |
602 | ||
338fa5f7 | 603 | /* Parses a string, character constant, or angle-bracketed header file |
4b0c16ee | 604 | name. Handles embedded trigraphs and escaped newlines. The stored |
605 | string is guaranteed NUL-terminated, but it is not guaranteed that | |
606 | this is the first NUL since embedded NULs are preserved. | |
0578f103 | 607 | |
1e0ef2fd | 608 | When this function returns, buffer->cur points to the next |
609 | character to be processed. */ | |
f80e83a9 | 610 | static void |
338fa5f7 | 611 | parse_string (pfile, token, terminator) |
0578f103 | 612 | cpp_reader *pfile; |
f80e83a9 | 613 | cpp_token *token; |
338fa5f7 | 614 | cppchar_t terminator; |
0578f103 | 615 | { |
f80e83a9 | 616 | cpp_buffer *buffer = pfile->buffer; |
79bd622b | 617 | unsigned char *dest, *limit; |
338fa5f7 | 618 | cppchar_t c; |
34cf330f | 619 | bool warned_nulls = false; |
338fa5f7 | 620 | |
1fdf6039 | 621 | dest = BUFF_FRONT (pfile->u_buff); |
622 | limit = BUFF_LIMIT (pfile->u_buff); | |
79bd622b | 623 | |
338fa5f7 | 624 | for (;;) |
0578f103 | 625 | { |
1e0ef2fd | 626 | /* We need room for another char, possibly the terminating NUL. */ |
1fdf6039 | 627 | if ((size_t) (limit - dest) < 1) |
628 | { | |
629 | size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff); | |
e6a5f963 | 630 | _cpp_extend_buff (pfile, &pfile->u_buff, 2); |
1fdf6039 | 631 | dest = BUFF_FRONT (pfile->u_buff) + len_so_far; |
632 | limit = BUFF_LIMIT (pfile->u_buff); | |
633 | } | |
4b0c16ee | 634 | |
338fa5f7 | 635 | /* Handle trigraphs, escaped newlines etc. */ |
1e0ef2fd | 636 | c = *buffer->cur++; |
338fa5f7 | 637 | if (c == '?' || c == '\\') |
1e0ef2fd | 638 | c = skip_escaped_newlines (pfile); |
0578f103 | 639 | |
1e0ef2fd | 640 | if (c == terminator) |
0578f103 | 641 | { |
1e0ef2fd | 642 | if (unescaped_terminator_p (pfile, dest)) |
643 | break; | |
338fa5f7 | 644 | } |
645 | else if (is_vspace (c)) | |
646 | { | |
34cf330f | 647 | /* No string literal may extend over multiple lines. In |
648 | assembly language, suppress the error except for <> | |
649 | includes. This is a kludge around not knowing where | |
650 | comments are. */ | |
651 | unterminated: | |
652 | if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>') | |
73328dce | 653 | cpp_error (pfile, DL_ERROR, "missing terminating %c character", |
654 | terminator); | |
34cf330f | 655 | buffer->cur--; |
656 | break; | |
338fa5f7 | 657 | } |
435fb09b | 658 | else if (c == '\0') |
338fa5f7 | 659 | { |
435fb09b | 660 | if (buffer->cur - 1 == buffer->rlimit) |
34cf330f | 661 | goto unterminated; |
435fb09b | 662 | if (!warned_nulls) |
663 | { | |
664 | warned_nulls = true; | |
73328dce | 665 | cpp_error (pfile, DL_WARNING, |
666 | "null character(s) preserved in literal"); | |
435fb09b | 667 | } |
0578f103 | 668 | } |
0578f103 | 669 | |
79bd622b | 670 | *dest++ = c; |
0578f103 | 671 | } |
672 | ||
4b0c16ee | 673 | *dest = '\0'; |
0578f103 | 674 | |
1fdf6039 | 675 | token->val.str.text = BUFF_FRONT (pfile->u_buff); |
676 | token->val.str.len = dest - BUFF_FRONT (pfile->u_buff); | |
677 | BUFF_FRONT (pfile->u_buff) = dest + 1; | |
338fa5f7 | 678 | } |
f80e83a9 | 679 | |
79bd622b | 680 | /* The stored comment includes the comment start and any terminator. */ |
2c63d6c8 | 681 | static void |
d3f7919d | 682 | save_comment (pfile, token, from, type) |
338fa5f7 | 683 | cpp_reader *pfile; |
f80e83a9 | 684 | cpp_token *token; |
685 | const unsigned char *from; | |
d3f7919d | 686 | cppchar_t type; |
2c63d6c8 | 687 | { |
f80e83a9 | 688 | unsigned char *buffer; |
d3f7919d | 689 | unsigned int len, clen; |
338fa5f7 | 690 | |
f0495c2c | 691 | len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */ |
1c124f85 | 692 | |
a543b315 | 693 | /* C++ comments probably (not definitely) have moved past a new |
694 | line, which we don't want to save in the comment. */ | |
1c124f85 | 695 | if (is_vspace (pfile->buffer->cur[-1])) |
a543b315 | 696 | len--; |
d3f7919d | 697 | |
698 | /* If we are currently in a directive, then we need to store all | |
699 | C++ comments as C comments internally, and so we need to | |
700 | allocate a little extra space in that case. | |
701 | ||
702 | Note that the only time we encounter a directive here is | |
703 | when we are saving comments in a "#define". */ | |
704 | clen = (pfile->state.in_directive && type == '/') ? len + 2 : len; | |
705 | ||
706 | buffer = _cpp_unaligned_alloc (pfile, clen); | |
f80e83a9 | 707 | |
f80e83a9 | 708 | token->type = CPP_COMMENT; |
d3f7919d | 709 | token->val.str.len = clen; |
338fa5f7 | 710 | token->val.str.text = buffer; |
0578f103 | 711 | |
f0495c2c | 712 | buffer[0] = '/'; |
713 | memcpy (buffer + 1, from, len - 1); | |
d3f7919d | 714 | |
715 | /* Finish conversion to a C comment, if necessary. */ | |
716 | if (pfile->state.in_directive && type == '/') | |
717 | { | |
718 | buffer[1] = '*'; | |
719 | buffer[clen - 2] = '*'; | |
720 | buffer[clen - 1] = '/'; | |
721 | } | |
338fa5f7 | 722 | } |
0578f103 | 723 | |
83dcbb5c | 724 | /* Allocate COUNT tokens for RUN. */ |
725 | void | |
726 | _cpp_init_tokenrun (run, count) | |
727 | tokenrun *run; | |
728 | unsigned int count; | |
729 | { | |
730 | run->base = xnewvec (cpp_token, count); | |
731 | run->limit = run->base + count; | |
732 | run->next = NULL; | |
733 | } | |
734 | ||
735 | /* Returns the next tokenrun, or creates one if there is none. */ | |
736 | static tokenrun * | |
737 | next_tokenrun (run) | |
738 | tokenrun *run; | |
739 | { | |
740 | if (run->next == NULL) | |
741 | { | |
742 | run->next = xnew (tokenrun); | |
fb5ab82c | 743 | run->next->prev = run; |
83dcbb5c | 744 | _cpp_init_tokenrun (run->next, 250); |
745 | } | |
746 | ||
747 | return run->next; | |
748 | } | |
749 | ||
f9b5f742 | 750 | /* Allocate a single token that is invalidated at the same time as the |
751 | rest of the tokens on the line. Has its line and col set to the | |
752 | same as the last lexed token, so that diagnostics appear in the | |
753 | right place. */ | |
754 | cpp_token * | |
755 | _cpp_temp_token (pfile) | |
756 | cpp_reader *pfile; | |
757 | { | |
758 | cpp_token *old, *result; | |
759 | ||
760 | old = pfile->cur_token - 1; | |
761 | if (pfile->cur_token == pfile->cur_run->limit) | |
762 | { | |
763 | pfile->cur_run = next_tokenrun (pfile->cur_run); | |
764 | pfile->cur_token = pfile->cur_run->base; | |
765 | } | |
766 | ||
767 | result = pfile->cur_token++; | |
768 | result->line = old->line; | |
769 | result->col = old->col; | |
770 | return result; | |
771 | } | |
772 | ||
10b4496a | 773 | /* Lex a token into RESULT (external interface). Takes care of issues |
774 | like directive handling, token lookahead, multiple include | |
3fb1e43b | 775 | optimization and skipping. */ |
c00e481c | 776 | const cpp_token * |
777 | _cpp_lex_token (pfile) | |
0578f103 | 778 | cpp_reader *pfile; |
83dcbb5c | 779 | { |
fb5ab82c | 780 | cpp_token *result; |
83dcbb5c | 781 | |
fb5ab82c | 782 | for (;;) |
83dcbb5c | 783 | { |
fb5ab82c | 784 | if (pfile->cur_token == pfile->cur_run->limit) |
83dcbb5c | 785 | { |
fb5ab82c | 786 | pfile->cur_run = next_tokenrun (pfile->cur_run); |
787 | pfile->cur_token = pfile->cur_run->base; | |
83dcbb5c | 788 | } |
789 | ||
fb5ab82c | 790 | if (pfile->lookaheads) |
10b4496a | 791 | { |
792 | pfile->lookaheads--; | |
793 | result = pfile->cur_token++; | |
794 | } | |
fb5ab82c | 795 | else |
10b4496a | 796 | result = _cpp_lex_direct (pfile); |
fb5ab82c | 797 | |
798 | if (result->flags & BOL) | |
83dcbb5c | 799 | { |
fb5ab82c | 800 | /* Is this a directive. If _cpp_handle_directive returns |
801 | false, it is an assembler #. */ | |
802 | if (result->type == CPP_HASH | |
d6af0368 | 803 | /* 6.10.3 p 11: Directives in a list of macro arguments |
804 | gives undefined behavior. This implementation | |
805 | handles the directive as normal. */ | |
806 | && pfile->state.parsing_args != 1 | |
fb5ab82c | 807 | && _cpp_handle_directive (pfile, result->flags & PREV_WHITE)) |
808 | continue; | |
5621a364 | 809 | if (pfile->cb.line_change && !pfile->state.skipping) |
810 | (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args); | |
83dcbb5c | 811 | } |
83dcbb5c | 812 | |
fb5ab82c | 813 | /* We don't skip tokens in directives. */ |
814 | if (pfile->state.in_directive) | |
815 | break; | |
83dcbb5c | 816 | |
fb5ab82c | 817 | /* Outside a directive, invalidate controlling macros. At file |
10b4496a | 818 | EOF, _cpp_lex_direct takes care of popping the buffer, so we never |
fb5ab82c | 819 | get here and MI optimisation works. */ |
83dcbb5c | 820 | pfile->mi_valid = false; |
fb5ab82c | 821 | |
822 | if (!pfile->state.skipping || result->type == CPP_EOF) | |
823 | break; | |
83dcbb5c | 824 | } |
825 | ||
c00e481c | 826 | return result; |
83dcbb5c | 827 | } |
828 | ||
1c124f85 | 829 | #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \ |
830 | do { \ | |
831 | if (get_effective_char (pfile) == CHAR) \ | |
832 | result->type = THEN_TYPE; \ | |
833 | else \ | |
834 | { \ | |
835 | BACKUP (); \ | |
836 | result->type = ELSE_TYPE; \ | |
837 | } \ | |
838 | } while (0) | |
839 | ||
10b4496a | 840 | /* Lex a token into pfile->cur_token, which is also incremented, to |
841 | get diagnostics pointing to the correct location. | |
842 | ||
843 | Does not handle issues such as token lookahead, multiple-include | |
844 | optimisation, directives, skipping etc. This function is only | |
845 | suitable for use by _cpp_lex_token, and in special cases like | |
846 | lex_expansion_token which doesn't care for any of these issues. | |
847 | ||
848 | When meeting a newline, returns CPP_EOF if parsing a directive, | |
849 | otherwise returns to the start of the token buffer if permissible. | |
850 | Returns the location of the lexed token. */ | |
851 | cpp_token * | |
852 | _cpp_lex_direct (pfile) | |
83dcbb5c | 853 | cpp_reader *pfile; |
0578f103 | 854 | { |
338fa5f7 | 855 | cppchar_t c; |
230f0943 | 856 | cpp_buffer *buffer; |
338fa5f7 | 857 | const unsigned char *comment_start; |
10b4496a | 858 | cpp_token *result = pfile->cur_token++; |
0653b94e | 859 | |
83dcbb5c | 860 | fresh_line: |
230f0943 | 861 | buffer = pfile->buffer; |
8c2e2fc5 | 862 | result->flags = buffer->saved_flags; |
863 | buffer->saved_flags = 0; | |
83dcbb5c | 864 | update_tokens_line: |
36a0aa7c | 865 | result->line = pfile->line; |
f80e83a9 | 866 | |
83dcbb5c | 867 | skipped_white: |
1c124f85 | 868 | c = *buffer->cur++; |
83dcbb5c | 869 | result->col = CPP_BUF_COLUMN (buffer, buffer->cur); |
83dcbb5c | 870 | |
871 | trigraph: | |
338fa5f7 | 872 | switch (c) |
0578f103 | 873 | { |
435fb09b | 874 | case ' ': case '\t': case '\f': case '\v': case '\0': |
875 | result->flags |= PREV_WHITE; | |
876 | if (skip_whitespace (pfile, c)) | |
877 | goto skipped_white; | |
878 | ||
879 | /* EOF. */ | |
880 | buffer->cur--; | |
fb5ab82c | 881 | buffer->saved_flags = BOL; |
83dcbb5c | 882 | if (!pfile->state.parsing_args && !pfile->state.in_directive) |
4dfe8b74 | 883 | { |
fb5ab82c | 884 | if (buffer->cur != buffer->line_base) |
83dcbb5c | 885 | { |
886 | /* Non-empty files should end in a newline. Don't warn | |
887 | for command line and _Pragma buffers. */ | |
888 | if (!buffer->from_stage3) | |
73328dce | 889 | cpp_error (pfile, DL_PEDWARN, "no newline at end of file"); |
1e0ef2fd | 890 | handle_newline (pfile); |
5475a165 | 891 | } |
fb5ab82c | 892 | |
893 | /* Don't pop the last buffer. */ | |
894 | if (buffer->prev) | |
895 | { | |
896 | unsigned char stop = buffer->return_at_eof; | |
897 | ||
898 | _cpp_pop_buffer (pfile); | |
899 | if (!stop) | |
900 | goto fresh_line; | |
901 | } | |
4dfe8b74 | 902 | } |
338fa5f7 | 903 | result->type = CPP_EOF; |
83dcbb5c | 904 | break; |
0578f103 | 905 | |
338fa5f7 | 906 | case '\n': case '\r': |
1e0ef2fd | 907 | handle_newline (pfile); |
fb5ab82c | 908 | buffer->saved_flags = BOL; |
909 | if (! pfile->state.in_directive) | |
0578f103 | 910 | { |
f9b5f742 | 911 | if (pfile->state.parsing_args == 2) |
912 | buffer->saved_flags |= PREV_WHITE; | |
fb5ab82c | 913 | if (!pfile->keep_tokens) |
914 | { | |
915 | pfile->cur_run = &pfile->base_run; | |
916 | result = pfile->base_run.base; | |
917 | pfile->cur_token = result + 1; | |
918 | } | |
919 | goto fresh_line; | |
0578f103 | 920 | } |
83dcbb5c | 921 | result->type = CPP_EOF; |
922 | break; | |
732cb4c9 | 923 | |
338fa5f7 | 924 | case '?': |
925 | case '\\': | |
926 | /* These could start an escaped newline, or '?' a trigraph. Let | |
927 | skip_escaped_newlines do all the work. */ | |
928 | { | |
1ea7ed21 | 929 | unsigned int line = pfile->line; |
338fa5f7 | 930 | |
1e0ef2fd | 931 | c = skip_escaped_newlines (pfile); |
1ea7ed21 | 932 | if (line != pfile->line) |
1e0ef2fd | 933 | { |
1c124f85 | 934 | buffer->cur--; |
1e0ef2fd | 935 | /* We had at least one escaped newline of some sort. |
936 | Update the token's line and column. */ | |
83dcbb5c | 937 | goto update_tokens_line; |
1e0ef2fd | 938 | } |
1c124f85 | 939 | } |
338fa5f7 | 940 | |
1c124f85 | 941 | /* We are either the original '?' or '\\', or a trigraph. */ |
942 | if (c == '?') | |
338fa5f7 | 943 | result->type = CPP_QUERY; |
1c124f85 | 944 | else if (c == '\\') |
945 | goto random_char; | |
946 | else | |
947 | goto trigraph; | |
338fa5f7 | 948 | break; |
732cb4c9 | 949 | |
338fa5f7 | 950 | case '0': case '1': case '2': case '3': case '4': |
951 | case '5': case '6': case '7': case '8': case '9': | |
952 | result->type = CPP_NUMBER; | |
78a11351 | 953 | parse_number (pfile, &result->val.str, 0); |
338fa5f7 | 954 | break; |
732cb4c9 | 955 | |
78c551ad | 956 | case 'L': |
957 | /* 'L' may introduce wide characters or strings. */ | |
958 | { | |
959 | const unsigned char *pos = buffer->cur; | |
338fa5f7 | 960 | |
78c551ad | 961 | c = get_effective_char (pfile); |
962 | if (c == '\'' || c == '"') | |
963 | { | |
964 | result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR); | |
965 | parse_string (pfile, result, c); | |
966 | break; | |
967 | } | |
968 | buffer->cur = pos; | |
969 | } | |
970 | /* Fall through. */ | |
971 | ||
972 | start_ident: | |
338fa5f7 | 973 | case '_': |
974 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
975 | case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': | |
976 | case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': | |
977 | case 's': case 't': case 'u': case 'v': case 'w': case 'x': | |
978 | case 'y': case 'z': | |
979 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
78c551ad | 980 | case 'G': case 'H': case 'I': case 'J': case 'K': |
338fa5f7 | 981 | case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': |
982 | case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
983 | case 'Y': case 'Z': | |
984 | result->type = CPP_NAME; | |
66a5287e | 985 | result->val.node = parse_identifier (pfile); |
338fa5f7 | 986 | |
338fa5f7 | 987 | /* Convert named operators to their proper types. */ |
78c551ad | 988 | if (result->val.node->flags & NODE_OPERATOR) |
338fa5f7 | 989 | { |
990 | result->flags |= NAMED_OP; | |
79bd622b | 991 | result->type = result->val.node->value.operator; |
338fa5f7 | 992 | } |
993 | break; | |
994 | ||
995 | case '\'': | |
996 | case '"': | |
997 | result->type = c == '"' ? CPP_STRING: CPP_CHAR; | |
338fa5f7 | 998 | parse_string (pfile, result, c); |
999 | break; | |
f80e83a9 | 1000 | |
338fa5f7 | 1001 | case '/': |
f0495c2c | 1002 | /* A potential block or line comment. */ |
1003 | comment_start = buffer->cur; | |
c808d026 | 1004 | c = get_effective_char (pfile); |
1c124f85 | 1005 | |
f0495c2c | 1006 | if (c == '*') |
1007 | { | |
338fa5f7 | 1008 | if (skip_block_comment (pfile)) |
73328dce | 1009 | cpp_error (pfile, DL_ERROR, "unterminated comment"); |
338fa5f7 | 1010 | } |
1c124f85 | 1011 | else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments) |
1012 | || CPP_IN_SYSTEM_HEADER (pfile))) | |
338fa5f7 | 1013 | { |
5db5d057 | 1014 | /* Warn about comments only if pedantically GNUC89, and not |
1015 | in system headers. */ | |
1016 | if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile) | |
66914e49 | 1017 | && ! buffer->warned_cplusplus_comments) |
f80e83a9 | 1018 | { |
73328dce | 1019 | cpp_error (pfile, DL_PEDWARN, |
f0495c2c | 1020 | "C++ style comments are not allowed in ISO C89"); |
73328dce | 1021 | cpp_error (pfile, DL_PEDWARN, |
1022 | "(this will be reported only once per input file)"); | |
f0495c2c | 1023 | buffer->warned_cplusplus_comments = 1; |
1024 | } | |
338fa5f7 | 1025 | |
e1caf668 | 1026 | if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) |
73328dce | 1027 | cpp_error (pfile, DL_WARNING, "multi-line comment"); |
f0495c2c | 1028 | } |
1c124f85 | 1029 | else if (c == '=') |
1030 | { | |
1031 | result->type = CPP_DIV_EQ; | |
1032 | break; | |
1033 | } | |
1034 | else | |
1035 | { | |
1036 | BACKUP (); | |
1037 | result->type = CPP_DIV; | |
1038 | break; | |
1039 | } | |
338fa5f7 | 1040 | |
f0495c2c | 1041 | if (!pfile->state.save_comments) |
1042 | { | |
1043 | result->flags |= PREV_WHITE; | |
83dcbb5c | 1044 | goto update_tokens_line; |
338fa5f7 | 1045 | } |
f0495c2c | 1046 | |
1047 | /* Save the comment as a token in its own right. */ | |
d3f7919d | 1048 | save_comment (pfile, result, comment_start, c); |
fb5ab82c | 1049 | break; |
338fa5f7 | 1050 | |
1051 | case '<': | |
1052 | if (pfile->state.angled_headers) | |
1053 | { | |
1054 | result->type = CPP_HEADER_NAME; | |
1c124f85 | 1055 | parse_string (pfile, result, '>'); |
1056 | break; | |
338fa5f7 | 1057 | } |
0578f103 | 1058 | |
c808d026 | 1059 | c = get_effective_char (pfile); |
338fa5f7 | 1060 | if (c == '=') |
1c124f85 | 1061 | result->type = CPP_LESS_EQ; |
338fa5f7 | 1062 | else if (c == '<') |
1c124f85 | 1063 | IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); |
338fa5f7 | 1064 | else if (c == '?' && CPP_OPTION (pfile, cplusplus)) |
1c124f85 | 1065 | IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN); |
338fa5f7 | 1066 | else if (c == ':' && CPP_OPTION (pfile, digraphs)) |
1067 | { | |
1c124f85 | 1068 | result->type = CPP_OPEN_SQUARE; |
338fa5f7 | 1069 | result->flags |= DIGRAPH; |
1070 | } | |
1071 | else if (c == '%' && CPP_OPTION (pfile, digraphs)) | |
1072 | { | |
1c124f85 | 1073 | result->type = CPP_OPEN_BRACE; |
338fa5f7 | 1074 | result->flags |= DIGRAPH; |
1075 | } | |
1c124f85 | 1076 | else |
1077 | { | |
1078 | BACKUP (); | |
1079 | result->type = CPP_LESS; | |
1080 | } | |
338fa5f7 | 1081 | break; |
1082 | ||
1083 | case '>': | |
c808d026 | 1084 | c = get_effective_char (pfile); |
338fa5f7 | 1085 | if (c == '=') |
1c124f85 | 1086 | result->type = CPP_GREATER_EQ; |
338fa5f7 | 1087 | else if (c == '>') |
1c124f85 | 1088 | IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT); |
338fa5f7 | 1089 | else if (c == '?' && CPP_OPTION (pfile, cplusplus)) |
1c124f85 | 1090 | IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX); |
1091 | else | |
338fa5f7 | 1092 | { |
1c124f85 | 1093 | BACKUP (); |
1094 | result->type = CPP_GREATER; | |
338fa5f7 | 1095 | } |
1096 | break; | |
1097 | ||
f669338a | 1098 | case '%': |
1c124f85 | 1099 | c = get_effective_char (pfile); |
1100 | if (c == '=') | |
1101 | result->type = CPP_MOD_EQ; | |
1102 | else if (CPP_OPTION (pfile, digraphs) && c == ':') | |
1103 | { | |
1104 | result->flags |= DIGRAPH; | |
1105 | result->type = CPP_HASH; | |
1106 | if (get_effective_char (pfile) == '%') | |
1107 | { | |
1108 | const unsigned char *pos = buffer->cur; | |
1109 | ||
1110 | if (get_effective_char (pfile) == ':') | |
1111 | result->type = CPP_PASTE; | |
1112 | else | |
1113 | buffer->cur = pos - 1; | |
1114 | } | |
1115 | else | |
1116 | BACKUP (); | |
1117 | } | |
1118 | else if (CPP_OPTION (pfile, digraphs) && c == '>') | |
1119 | { | |
1120 | result->flags |= DIGRAPH; | |
1121 | result->type = CPP_CLOSE_BRACE; | |
1122 | } | |
1123 | else | |
1124 | { | |
1125 | BACKUP (); | |
1126 | result->type = CPP_MOD; | |
1127 | } | |
338fa5f7 | 1128 | break; |
1129 | ||
f669338a | 1130 | case '.': |
1c124f85 | 1131 | result->type = CPP_DOT; |
1132 | c = get_effective_char (pfile); | |
1133 | if (c == '.') | |
1134 | { | |
1135 | const unsigned char *pos = buffer->cur; | |
1136 | ||
1137 | if (get_effective_char (pfile) == '.') | |
1138 | result->type = CPP_ELLIPSIS; | |
1139 | else | |
1140 | buffer->cur = pos - 1; | |
1141 | } | |
1142 | /* All known character sets have 0...9 contiguous. */ | |
66a33570 | 1143 | else if (ISDIGIT (c)) |
1c124f85 | 1144 | { |
1145 | result->type = CPP_NUMBER; | |
78a11351 | 1146 | parse_number (pfile, &result->val.str, 1); |
1c124f85 | 1147 | } |
1148 | else if (c == '*' && CPP_OPTION (pfile, cplusplus)) | |
1149 | result->type = CPP_DOT_STAR; | |
1150 | else | |
1151 | BACKUP (); | |
338fa5f7 | 1152 | break; |
0578f103 | 1153 | |
338fa5f7 | 1154 | case '+': |
c808d026 | 1155 | c = get_effective_char (pfile); |
1c124f85 | 1156 | if (c == '+') |
1157 | result->type = CPP_PLUS_PLUS; | |
1158 | else if (c == '=') | |
1159 | result->type = CPP_PLUS_EQ; | |
1160 | else | |
1161 | { | |
1162 | BACKUP (); | |
1163 | result->type = CPP_PLUS; | |
1164 | } | |
338fa5f7 | 1165 | break; |
ac0749c7 | 1166 | |
338fa5f7 | 1167 | case '-': |
c808d026 | 1168 | c = get_effective_char (pfile); |
338fa5f7 | 1169 | if (c == '>') |
1170 | { | |
1c124f85 | 1171 | result->type = CPP_DEREF; |
1172 | if (CPP_OPTION (pfile, cplusplus)) | |
1173 | { | |
1174 | if (get_effective_char (pfile) == '*') | |
1175 | result->type = CPP_DEREF_STAR; | |
1176 | else | |
1177 | BACKUP (); | |
1178 | } | |
338fa5f7 | 1179 | } |
338fa5f7 | 1180 | else if (c == '-') |
1c124f85 | 1181 | result->type = CPP_MINUS_MINUS; |
1182 | else if (c == '=') | |
1183 | result->type = CPP_MINUS_EQ; | |
1184 | else | |
1185 | { | |
1186 | BACKUP (); | |
1187 | result->type = CPP_MINUS; | |
1188 | } | |
338fa5f7 | 1189 | break; |
0578f103 | 1190 | |
338fa5f7 | 1191 | case '&': |
c808d026 | 1192 | c = get_effective_char (pfile); |
1c124f85 | 1193 | if (c == '&') |
1194 | result->type = CPP_AND_AND; | |
1195 | else if (c == '=') | |
1196 | result->type = CPP_AND_EQ; | |
1197 | else | |
1198 | { | |
1199 | BACKUP (); | |
1200 | result->type = CPP_AND; | |
1201 | } | |
338fa5f7 | 1202 | break; |
1203 | ||
338fa5f7 | 1204 | case '|': |
c808d026 | 1205 | c = get_effective_char (pfile); |
1c124f85 | 1206 | if (c == '|') |
1207 | result->type = CPP_OR_OR; | |
1208 | else if (c == '=') | |
1209 | result->type = CPP_OR_EQ; | |
1210 | else | |
1211 | { | |
1212 | BACKUP (); | |
1213 | result->type = CPP_OR; | |
1214 | } | |
338fa5f7 | 1215 | break; |
0578f103 | 1216 | |
338fa5f7 | 1217 | case ':': |
c808d026 | 1218 | c = get_effective_char (pfile); |
338fa5f7 | 1219 | if (c == ':' && CPP_OPTION (pfile, cplusplus)) |
1c124f85 | 1220 | result->type = CPP_SCOPE; |
338fa5f7 | 1221 | else if (c == '>' && CPP_OPTION (pfile, digraphs)) |
1222 | { | |
1223 | result->flags |= DIGRAPH; | |
1c124f85 | 1224 | result->type = CPP_CLOSE_SQUARE; |
1225 | } | |
1226 | else | |
1227 | { | |
1228 | BACKUP (); | |
1229 | result->type = CPP_COLON; | |
338fa5f7 | 1230 | } |
1231 | break; | |
0578f103 | 1232 | |
1c124f85 | 1233 | case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break; |
1234 | case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break; | |
1235 | case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break; | |
1236 | case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break; | |
1237 | case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break; | |
1238 | ||
338fa5f7 | 1239 | case '~': result->type = CPP_COMPL; break; |
1240 | case ',': result->type = CPP_COMMA; break; | |
1241 | case '(': result->type = CPP_OPEN_PAREN; break; | |
1242 | case ')': result->type = CPP_CLOSE_PAREN; break; | |
1243 | case '[': result->type = CPP_OPEN_SQUARE; break; | |
1244 | case ']': result->type = CPP_CLOSE_SQUARE; break; | |
1245 | case '{': result->type = CPP_OPEN_BRACE; break; | |
1246 | case '}': result->type = CPP_CLOSE_BRACE; break; | |
1247 | case ';': result->type = CPP_SEMICOLON; break; | |
1248 | ||
9ee99ac6 | 1249 | /* @ is a punctuator in Objective C. */ |
1250 | case '@': result->type = CPP_ATSIGN; break; | |
338fa5f7 | 1251 | |
78c551ad | 1252 | case '$': |
1253 | if (CPP_OPTION (pfile, dollars_in_ident)) | |
1254 | goto start_ident; | |
1255 | /* Fall through... */ | |
1256 | ||
338fa5f7 | 1257 | random_char: |
1258 | default: | |
1259 | result->type = CPP_OTHER; | |
33344a1c | 1260 | result->val.c = c; |
338fa5f7 | 1261 | break; |
1262 | } | |
fb5ab82c | 1263 | |
1264 | return result; | |
338fa5f7 | 1265 | } |
1266 | ||
e484a1cc | 1267 | /* An upper bound on the number of bytes needed to spell TOKEN, |
79bd622b | 1268 | including preceding whitespace. */ |
1269 | unsigned int | |
1270 | cpp_token_len (token) | |
1271 | const cpp_token *token; | |
338fa5f7 | 1272 | { |
79bd622b | 1273 | unsigned int len; |
cfad5579 | 1274 | |
79bd622b | 1275 | switch (TOKEN_SPELL (token)) |
f80e83a9 | 1276 | { |
c86dbc5b | 1277 | default: len = 0; break; |
8d27e472 | 1278 | case SPELL_NUMBER: |
c86dbc5b | 1279 | case SPELL_STRING: len = token->val.str.len; break; |
1280 | case SPELL_IDENT: len = NODE_LEN (token->val.node); break; | |
f80e83a9 | 1281 | } |
8d27e472 | 1282 | /* 1 for whitespace, 4 for comment delimiters. */ |
79bd622b | 1283 | return len + 5; |
cfad5579 | 1284 | } |
1285 | ||
f80e83a9 | 1286 | /* Write the spelling of a token TOKEN to BUFFER. The buffer must |
c5ea33a8 | 1287 | already contain the enough space to hold the token's spelling. |
1288 | Returns a pointer to the character after the last character | |
1289 | written. */ | |
79bd622b | 1290 | unsigned char * |
1291 | cpp_spell_token (pfile, token, buffer) | |
f80e83a9 | 1292 | cpp_reader *pfile; /* Would be nice to be rid of this... */ |
1293 | const cpp_token *token; | |
1294 | unsigned char *buffer; | |
1295 | { | |
7e842f95 | 1296 | switch (TOKEN_SPELL (token)) |
f80e83a9 | 1297 | { |
1298 | case SPELL_OPERATOR: | |
1299 | { | |
1300 | const unsigned char *spelling; | |
1301 | unsigned char c; | |
ab12a39c | 1302 | |
f80e83a9 | 1303 | if (token->flags & DIGRAPH) |
ee6c4e4b | 1304 | spelling |
1305 | = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; | |
31674461 | 1306 | else if (token->flags & NAMED_OP) |
1307 | goto spell_ident; | |
f80e83a9 | 1308 | else |
7e842f95 | 1309 | spelling = TOKEN_NAME (token); |
f80e83a9 | 1310 | |
1311 | while ((c = *spelling++) != '\0') | |
1312 | *buffer++ = c; | |
1313 | } | |
1314 | break; | |
ab12a39c | 1315 | |
8d27e472 | 1316 | case SPELL_CHAR: |
1317 | *buffer++ = token->val.c; | |
1318 | break; | |
1319 | ||
1320 | spell_ident: | |
f80e83a9 | 1321 | case SPELL_IDENT: |
c86dbc5b | 1322 | memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node)); |
1323 | buffer += NODE_LEN (token->val.node); | |
f80e83a9 | 1324 | break; |
ab12a39c | 1325 | |
8d27e472 | 1326 | case SPELL_NUMBER: |
1327 | memcpy (buffer, token->val.str.text, token->val.str.len); | |
1328 | buffer += token->val.str.len; | |
1329 | break; | |
1330 | ||
f80e83a9 | 1331 | case SPELL_STRING: |
1332 | { | |
71aa9da4 | 1333 | int left, right, tag; |
1334 | switch (token->type) | |
1335 | { | |
1336 | case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break; | |
1337 | case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break; | |
71aa9da4 | 1338 | case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break; |
1339 | case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break; | |
1340 | case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break; | |
8d27e472 | 1341 | default: |
73328dce | 1342 | cpp_error (pfile, DL_ICE, "unknown string token %s\n", |
1343 | TOKEN_NAME (token)); | |
8d27e472 | 1344 | return buffer; |
71aa9da4 | 1345 | } |
1346 | if (tag) *buffer++ = tag; | |
8d27e472 | 1347 | *buffer++ = left; |
76faa4c0 | 1348 | memcpy (buffer, token->val.str.text, token->val.str.len); |
1349 | buffer += token->val.str.len; | |
8d27e472 | 1350 | *buffer++ = right; |
f80e83a9 | 1351 | } |
1352 | break; | |
ab12a39c | 1353 | |
f80e83a9 | 1354 | case SPELL_NONE: |
73328dce | 1355 | cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token)); |
f80e83a9 | 1356 | break; |
1357 | } | |
ab12a39c | 1358 | |
f80e83a9 | 1359 | return buffer; |
1360 | } | |
ab12a39c | 1361 | |
e484a1cc | 1362 | /* Returns TOKEN spelt as a null-terminated string. The string is |
1363 | freed when the reader is destroyed. Useful for diagnostics. */ | |
79bd622b | 1364 | unsigned char * |
1365 | cpp_token_as_text (pfile, token) | |
6060326b | 1366 | cpp_reader *pfile; |
f80e83a9 | 1367 | const cpp_token *token; |
6060326b | 1368 | { |
79bd622b | 1369 | unsigned int len = cpp_token_len (token); |
1fdf6039 | 1370 | unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end; |
6060326b | 1371 | |
79bd622b | 1372 | end = cpp_spell_token (pfile, token, start); |
1373 | end[0] = '\0'; | |
6060326b | 1374 | |
79bd622b | 1375 | return start; |
1376 | } | |
6060326b | 1377 | |
e484a1cc | 1378 | /* Used by C front ends, which really should move to using |
1379 | cpp_token_as_text. */ | |
79bd622b | 1380 | const char * |
1381 | cpp_type2name (type) | |
1382 | enum cpp_ttype type; | |
1383 | { | |
1384 | return (const char *) token_spellings[type].name; | |
1385 | } | |
6060326b | 1386 | |
f9b5f742 | 1387 | /* Writes the spelling of token to FP, without any preceding space. |
1388 | Separated from cpp_spell_token for efficiency - to avoid stdio | |
1389 | double-buffering. */ | |
79bd622b | 1390 | void |
1391 | cpp_output_token (token, fp) | |
1392 | const cpp_token *token; | |
1393 | FILE *fp; | |
1394 | { | |
79bd622b | 1395 | switch (TOKEN_SPELL (token)) |
6060326b | 1396 | { |
79bd622b | 1397 | case SPELL_OPERATOR: |
1398 | { | |
1399 | const unsigned char *spelling; | |
28874558 | 1400 | int c; |
6060326b | 1401 | |
79bd622b | 1402 | if (token->flags & DIGRAPH) |
ee6c4e4b | 1403 | spelling |
1404 | = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; | |
79bd622b | 1405 | else if (token->flags & NAMED_OP) |
1406 | goto spell_ident; | |
1407 | else | |
1408 | spelling = TOKEN_NAME (token); | |
f80e83a9 | 1409 | |
28874558 | 1410 | c = *spelling; |
1411 | do | |
1412 | putc (c, fp); | |
1413 | while ((c = *++spelling) != '\0'); | |
79bd622b | 1414 | } |
1415 | break; | |
f80e83a9 | 1416 | |
8d27e472 | 1417 | case SPELL_CHAR: |
1418 | putc (token->val.c, fp); | |
1419 | break; | |
1420 | ||
79bd622b | 1421 | spell_ident: |
1422 | case SPELL_IDENT: | |
28874558 | 1423 | fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp); |
79bd622b | 1424 | break; |
f80e83a9 | 1425 | |
8d27e472 | 1426 | case SPELL_NUMBER: |
1427 | fwrite (token->val.str.text, 1, token->val.str.len, fp); | |
1428 | break; | |
1429 | ||
79bd622b | 1430 | case SPELL_STRING: |
1431 | { | |
1432 | int left, right, tag; | |
1433 | switch (token->type) | |
1434 | { | |
1435 | case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break; | |
1436 | case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break; | |
79bd622b | 1437 | case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break; |
1438 | case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break; | |
1439 | case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break; | |
8d27e472 | 1440 | default: |
1441 | fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token)); | |
1442 | return; | |
79bd622b | 1443 | } |
1444 | if (tag) putc (tag, fp); | |
8d27e472 | 1445 | putc (left, fp); |
79bd622b | 1446 | fwrite (token->val.str.text, 1, token->val.str.len, fp); |
8d27e472 | 1447 | putc (right, fp); |
79bd622b | 1448 | } |
1449 | break; | |
6060326b | 1450 | |
79bd622b | 1451 | case SPELL_NONE: |
1452 | /* An error, most probably. */ | |
1453 | break; | |
f80e83a9 | 1454 | } |
6060326b | 1455 | } |
1456 | ||
79bd622b | 1457 | /* Compare two tokens. */ |
1458 | int | |
1459 | _cpp_equiv_tokens (a, b) | |
1460 | const cpp_token *a, *b; | |
6060326b | 1461 | { |
79bd622b | 1462 | if (a->type == b->type && a->flags == b->flags) |
1463 | switch (TOKEN_SPELL (a)) | |
1464 | { | |
1465 | default: /* Keep compiler happy. */ | |
1466 | case SPELL_OPERATOR: | |
1467 | return 1; | |
1468 | case SPELL_CHAR: | |
33344a1c | 1469 | return a->val.c == b->val.c; /* Character. */ |
79bd622b | 1470 | case SPELL_NONE: |
588d632b | 1471 | return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no); |
79bd622b | 1472 | case SPELL_IDENT: |
1473 | return a->val.node == b->val.node; | |
8d27e472 | 1474 | case SPELL_NUMBER: |
79bd622b | 1475 | case SPELL_STRING: |
1476 | return (a->val.str.len == b->val.str.len | |
1477 | && !memcmp (a->val.str.text, b->val.str.text, | |
1478 | a->val.str.len)); | |
1479 | } | |
6060326b | 1480 | |
f80e83a9 | 1481 | return 0; |
1482 | } | |
1483 | ||
79bd622b | 1484 | /* Returns nonzero if a space should be inserted to avoid an |
1485 | accidental token paste for output. For simplicity, it is | |
1486 | conservative, and occasionally advises a space where one is not | |
1487 | needed, e.g. "." and ".2". */ | |
79bd622b | 1488 | int |
1489 | cpp_avoid_paste (pfile, token1, token2) | |
6060326b | 1490 | cpp_reader *pfile; |
79bd622b | 1491 | const cpp_token *token1, *token2; |
6060326b | 1492 | { |
79bd622b | 1493 | enum cpp_ttype a = token1->type, b = token2->type; |
1494 | cppchar_t c; | |
6060326b | 1495 | |
79bd622b | 1496 | if (token1->flags & NAMED_OP) |
1497 | a = CPP_NAME; | |
1498 | if (token2->flags & NAMED_OP) | |
1499 | b = CPP_NAME; | |
6060326b | 1500 | |
79bd622b | 1501 | c = EOF; |
1502 | if (token2->flags & DIGRAPH) | |
ee6c4e4b | 1503 | c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0]; |
79bd622b | 1504 | else if (token_spellings[b].category == SPELL_OPERATOR) |
1505 | c = token_spellings[b].name[0]; | |
6060326b | 1506 | |
79bd622b | 1507 | /* Quickly get everything that can paste with an '='. */ |
ee6c4e4b | 1508 | if ((int) a <= (int) CPP_LAST_EQ && c == '=') |
79bd622b | 1509 | return 1; |
6060326b | 1510 | |
79bd622b | 1511 | switch (a) |
6060326b | 1512 | { |
79bd622b | 1513 | case CPP_GREATER: return c == '>' || c == '?'; |
1514 | case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':'; | |
1515 | case CPP_PLUS: return c == '+'; | |
1516 | case CPP_MINUS: return c == '-' || c == '>'; | |
1517 | case CPP_DIV: return c == '/' || c == '*'; /* Comments. */ | |
1518 | case CPP_MOD: return c == ':' || c == '>'; | |
1519 | case CPP_AND: return c == '&'; | |
1520 | case CPP_OR: return c == '|'; | |
1521 | case CPP_COLON: return c == ':' || c == '>'; | |
1522 | case CPP_DEREF: return c == '*'; | |
efdcc728 | 1523 | case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER; |
79bd622b | 1524 | case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */ |
1525 | case CPP_NAME: return ((b == CPP_NUMBER | |
1526 | && name_p (pfile, &token2->val.str)) | |
1527 | || b == CPP_NAME | |
1528 | || b == CPP_CHAR || b == CPP_STRING); /* L */ | |
1529 | case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME | |
1530 | || c == '.' || c == '+' || c == '-'); | |
1531 | case CPP_OTHER: return (CPP_OPTION (pfile, objc) | |
33344a1c | 1532 | && token1->val.c == '@' |
79bd622b | 1533 | && (b == CPP_NAME || b == CPP_STRING)); |
1534 | default: break; | |
6060326b | 1535 | } |
6060326b | 1536 | |
deb356cf | 1537 | return 0; |
6060326b | 1538 | } |
1539 | ||
79bd622b | 1540 | /* Output all the remaining tokens on the current line, and a newline |
f9b5f742 | 1541 | character, to FP. Leading whitespace is removed. If there are |
1542 | macros, special token padding is not performed. */ | |
6060326b | 1543 | void |
79bd622b | 1544 | cpp_output_line (pfile, fp) |
6060326b | 1545 | cpp_reader *pfile; |
79bd622b | 1546 | FILE *fp; |
6060326b | 1547 | { |
f9b5f742 | 1548 | const cpp_token *token; |
7e842f95 | 1549 | |
f9b5f742 | 1550 | token = cpp_get_token (pfile); |
1551 | while (token->type != CPP_EOF) | |
7e842f95 | 1552 | { |
f9b5f742 | 1553 | cpp_output_token (token, fp); |
1554 | token = cpp_get_token (pfile); | |
1555 | if (token->flags & PREV_WHITE) | |
1556 | putc (' ', fp); | |
7e842f95 | 1557 | } |
1558 | ||
79bd622b | 1559 | putc ('\n', fp); |
f80e83a9 | 1560 | } |
6060326b | 1561 | |
8330799c | 1562 | /* Returns the value of a hexadecimal digit. */ |
1563 | static unsigned int | |
1564 | hex_digit_value (c) | |
1565 | unsigned int c; | |
1566 | { | |
768169bd | 1567 | if (hex_p (c)) |
1568 | return hex_value (c); | |
1569 | else | |
1570 | abort (); | |
8330799c | 1571 | } |
1572 | ||
c8342759 | 1573 | /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate |
1574 | failure if cpplib is not parsing C++ or C99. Such failure is | |
1575 | silent, and no variables are updated. Otherwise returns 0, and | |
1576 | warns if -Wtraditional. | |
8330799c | 1577 | |
1578 | [lex.charset]: The character designated by the universal character | |
1579 | name \UNNNNNNNN is that character whose character short name in | |
1580 | ISO/IEC 10646 is NNNNNNNN; the character designated by the | |
1581 | universal character name \uNNNN is that character whose character | |
1582 | short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value | |
1583 | for a universal character name is less than 0x20 or in the range | |
1584 | 0x7F-0x9F (inclusive), or if the universal character name | |
1585 | designates a character in the basic source character set, then the | |
1586 | program is ill-formed. | |
1587 | ||
1588 | We assume that wchar_t is Unicode, so we don't need to do any | |
c8342759 | 1589 | mapping. Is this ever wrong? |
8330799c | 1590 | |
c8342759 | 1591 | PC points to the 'u' or 'U', PSTR is points to the byte after PC, |
1592 | LIMIT is the end of the string or charconst. PSTR is updated to | |
1593 | point after the UCS on return, and the UCS is written into PC. */ | |
1594 | ||
1595 | static int | |
1596 | maybe_read_ucs (pfile, pstr, limit, pc) | |
8330799c | 1597 | cpp_reader *pfile; |
1598 | const unsigned char **pstr; | |
1599 | const unsigned char *limit; | |
c8342759 | 1600 | unsigned int *pc; |
8330799c | 1601 | { |
1602 | const unsigned char *p = *pstr; | |
c8342759 | 1603 | unsigned int code = 0; |
1604 | unsigned int c = *pc, length; | |
1605 | ||
1606 | /* Only attempt to interpret a UCS for C++ and C99. */ | |
1607 | if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))) | |
1608 | return 1; | |
8330799c | 1609 | |
c8342759 | 1610 | if (CPP_WTRADITIONAL (pfile)) |
73328dce | 1611 | cpp_error (pfile, DL_WARNING, |
1612 | "the meaning of '\\%c' is different in traditional C", c); | |
8330799c | 1613 | |
f73bab03 | 1614 | length = (c == 'u' ? 4: 8); |
1615 | ||
1616 | if ((size_t) (limit - p) < length) | |
1617 | { | |
73328dce | 1618 | cpp_error (pfile, DL_ERROR, "incomplete universal-character-name"); |
f73bab03 | 1619 | /* Skip to the end to avoid more diagnostics. */ |
1620 | p = limit; | |
1621 | } | |
1622 | else | |
1623 | { | |
1624 | for (; length; length--, p++) | |
8330799c | 1625 | { |
f73bab03 | 1626 | c = *p; |
1627 | if (ISXDIGIT (c)) | |
1628 | code = (code << 4) + hex_digit_value (c); | |
1629 | else | |
1630 | { | |
73328dce | 1631 | cpp_error (pfile, DL_ERROR, |
f73bab03 | 1632 | "non-hex digit '%c' in universal-character-name", c); |
1633 | /* We shouldn't skip in case there are multibyte chars. */ | |
1634 | break; | |
1635 | } | |
8330799c | 1636 | } |
8330799c | 1637 | } |
1638 | ||
1639 | #ifdef TARGET_EBCDIC | |
73328dce | 1640 | cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target"); |
8330799c | 1641 | code = 0x3f; /* EBCDIC invalid character */ |
1642 | #else | |
f73bab03 | 1643 | /* True extended characters are OK. */ |
1644 | if (code >= 0xa0 | |
1645 | && !(code & 0x80000000) | |
1646 | && !(code >= 0xD800 && code <= 0xDFFF)) | |
1647 | ; | |
1648 | /* The standard permits $, @ and ` to be specified as UCNs. We use | |
1649 | hex escapes so that this also works with EBCDIC hosts. */ | |
1650 | else if (code == 0x24 || code == 0x40 || code == 0x60) | |
1651 | ; | |
1652 | /* Don't give another error if one occurred above. */ | |
1653 | else if (length == 0) | |
73328dce | 1654 | cpp_error (pfile, DL_ERROR, "universal-character-name out of range"); |
8330799c | 1655 | #endif |
1656 | ||
1657 | *pstr = p; | |
c8342759 | 1658 | *pc = code; |
1659 | return 0; | |
8330799c | 1660 | } |
1661 | ||
1662 | /* Interpret an escape sequence, and return its value. PSTR points to | |
1663 | the input pointer, which is just after the backslash. LIMIT is how | |
c8342759 | 1664 | much text we have. MASK is a bitmask for the precision for the |
455730ef | 1665 | destination type (char or wchar_t). |
8330799c | 1666 | |
c8342759 | 1667 | Handles all relevant diagnostics. */ |
c8342759 | 1668 | unsigned int |
455730ef | 1669 | cpp_parse_escape (pfile, pstr, limit, mask) |
8330799c | 1670 | cpp_reader *pfile; |
1671 | const unsigned char **pstr; | |
1672 | const unsigned char *limit; | |
c8342759 | 1673 | unsigned HOST_WIDE_INT mask; |
8330799c | 1674 | { |
1675 | int unknown = 0; | |
1676 | const unsigned char *str = *pstr; | |
1677 | unsigned int c = *str++; | |
1678 | ||
1679 | switch (c) | |
1680 | { | |
1681 | case '\\': case '\'': case '"': case '?': break; | |
1682 | case 'b': c = TARGET_BS; break; | |
1683 | case 'f': c = TARGET_FF; break; | |
1684 | case 'n': c = TARGET_NEWLINE; break; | |
1685 | case 'r': c = TARGET_CR; break; | |
1686 | case 't': c = TARGET_TAB; break; | |
1687 | case 'v': c = TARGET_VT; break; | |
1688 | ||
1689 | case '(': case '{': case '[': case '%': | |
1690 | /* '\(', etc, are used at beginning of line to avoid confusing Emacs. | |
1691 | '\%' is used to prevent SCCS from getting confused. */ | |
1692 | unknown = CPP_PEDANTIC (pfile); | |
1693 | break; | |
1694 | ||
1695 | case 'a': | |
1696 | if (CPP_WTRADITIONAL (pfile)) | |
73328dce | 1697 | cpp_error (pfile, DL_WARNING, |
1698 | "the meaning of '\\a' is different in traditional C"); | |
0b3481a4 | 1699 | c = TARGET_BELL; |
8330799c | 1700 | break; |
1701 | ||
1702 | case 'e': case 'E': | |
1703 | if (CPP_PEDANTIC (pfile)) | |
73328dce | 1704 | cpp_error (pfile, DL_PEDWARN, |
1705 | "non-ISO-standard escape sequence, '\\%c'", c); | |
8330799c | 1706 | c = TARGET_ESC; |
1707 | break; | |
1708 | ||
8330799c | 1709 | case 'u': case 'U': |
c8342759 | 1710 | unknown = maybe_read_ucs (pfile, &str, limit, &c); |
8330799c | 1711 | break; |
1712 | ||
1713 | case 'x': | |
1714 | if (CPP_WTRADITIONAL (pfile)) | |
73328dce | 1715 | cpp_error (pfile, DL_WARNING, |
1716 | "the meaning of '\\x' is different in traditional C"); | |
8330799c | 1717 | |
8330799c | 1718 | { |
1719 | unsigned int i = 0, overflow = 0; | |
1720 | int digits_found = 0; | |
1721 | ||
1722 | while (str < limit) | |
1723 | { | |
1724 | c = *str; | |
1725 | if (! ISXDIGIT (c)) | |
1726 | break; | |
1727 | str++; | |
1728 | overflow |= i ^ (i << 4 >> 4); | |
1729 | i = (i << 4) + hex_digit_value (c); | |
1730 | digits_found = 1; | |
1731 | } | |
1732 | ||
1733 | if (!digits_found) | |
73328dce | 1734 | cpp_error (pfile, DL_ERROR, |
1735 | "\\x used with no following hex digits"); | |
8330799c | 1736 | |
1737 | if (overflow | (i != (i & mask))) | |
1738 | { | |
73328dce | 1739 | cpp_error (pfile, DL_PEDWARN, |
1740 | "hex escape sequence out of range"); | |
8330799c | 1741 | i &= mask; |
1742 | } | |
1743 | c = i; | |
1744 | } | |
1745 | break; | |
1746 | ||
1747 | case '0': case '1': case '2': case '3': | |
1748 | case '4': case '5': case '6': case '7': | |
1749 | { | |
1750 | unsigned int i = c - '0'; | |
1751 | int count = 0; | |
1752 | ||
1753 | while (str < limit && ++count < 3) | |
1754 | { | |
1755 | c = *str; | |
1756 | if (c < '0' || c > '7') | |
1757 | break; | |
1758 | str++; | |
1759 | i = (i << 3) + c - '0'; | |
1760 | } | |
1761 | ||
1762 | if (i != (i & mask)) | |
1763 | { | |
73328dce | 1764 | cpp_error (pfile, DL_PEDWARN, |
1765 | "octal escape sequence out of range"); | |
8330799c | 1766 | i &= mask; |
1767 | } | |
1768 | c = i; | |
1769 | } | |
1770 | break; | |
1771 | ||
1772 | default: | |
1773 | unknown = 1; | |
1774 | break; | |
1775 | } | |
1776 | ||
1777 | if (unknown) | |
1778 | { | |
1779 | if (ISGRAPH (c)) | |
73328dce | 1780 | cpp_error (pfile, DL_PEDWARN, "unknown escape sequence '\\%c'", c); |
8330799c | 1781 | else |
73328dce | 1782 | cpp_error (pfile, DL_PEDWARN, "unknown escape sequence: '\\%03o'", c); |
8330799c | 1783 | } |
1784 | ||
c8342759 | 1785 | if (c > mask) |
73328dce | 1786 | cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type"); |
c8342759 | 1787 | |
8330799c | 1788 | *pstr = str; |
1789 | return c; | |
1790 | } | |
1791 | ||
1792 | #ifndef MAX_CHAR_TYPE_SIZE | |
1793 | #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE | |
1794 | #endif | |
1795 | ||
1796 | #ifndef MAX_WCHAR_TYPE_SIZE | |
1797 | #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE | |
1798 | #endif | |
1799 | ||
1800 | /* Interpret a (possibly wide) character constant in TOKEN. | |
455730ef | 1801 | WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN points |
1802 | to a variable that is filled in with the number of characters seen. */ | |
8330799c | 1803 | HOST_WIDE_INT |
455730ef | 1804 | cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen) |
8330799c | 1805 | cpp_reader *pfile; |
1806 | const cpp_token *token; | |
1807 | int warn_multi; | |
8330799c | 1808 | unsigned int *pchars_seen; |
1809 | { | |
1810 | const unsigned char *str = token->val.str.text; | |
1811 | const unsigned char *limit = str + token->val.str.len; | |
1812 | unsigned int chars_seen = 0; | |
1813 | unsigned int width, max_chars, c; | |
0d086e18 | 1814 | unsigned HOST_WIDE_INT mask; |
1815 | HOST_WIDE_INT result = 0; | |
b3a9482f | 1816 | bool unsigned_p; |
8330799c | 1817 | |
1818 | #ifdef MULTIBYTE_CHARS | |
1819 | (void) local_mbtowc (NULL, NULL, 0); | |
1820 | #endif | |
1821 | ||
1822 | /* Width in bits. */ | |
1823 | if (token->type == CPP_CHAR) | |
b3a9482f | 1824 | { |
1825 | width = MAX_CHAR_TYPE_SIZE; | |
1826 | unsigned_p = CPP_OPTION (pfile, signed_char) == 0; | |
1827 | } | |
8330799c | 1828 | else |
b3a9482f | 1829 | { |
1830 | width = MAX_WCHAR_TYPE_SIZE; | |
1831 | unsigned_p = WCHAR_UNSIGNED; | |
1832 | } | |
8330799c | 1833 | |
1834 | if (width < HOST_BITS_PER_WIDE_INT) | |
1835 | mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1; | |
1836 | else | |
1837 | mask = ~0; | |
1838 | max_chars = HOST_BITS_PER_WIDE_INT / width; | |
1839 | ||
1840 | while (str < limit) | |
1841 | { | |
1842 | #ifdef MULTIBYTE_CHARS | |
1843 | wchar_t wc; | |
1844 | int char_len; | |
1845 | ||
1846 | char_len = local_mbtowc (&wc, str, limit - str); | |
1847 | if (char_len == -1) | |
1848 | { | |
73328dce | 1849 | cpp_error (pfile, DL_WARNING, |
1850 | "ignoring invalid multibyte character"); | |
8330799c | 1851 | c = *str++; |
1852 | } | |
1853 | else | |
1854 | { | |
1855 | str += char_len; | |
1856 | c = wc; | |
1857 | } | |
1858 | #else | |
1859 | c = *str++; | |
1860 | #endif | |
1861 | ||
1862 | if (c == '\\') | |
455730ef | 1863 | c = cpp_parse_escape (pfile, &str, limit, mask); |
8330799c | 1864 | |
1865 | #ifdef MAP_CHARACTER | |
1866 | if (ISPRINT (c)) | |
1867 | c = MAP_CHARACTER (c); | |
1868 | #endif | |
1869 | ||
1870 | /* Merge character into result; ignore excess chars. */ | |
1871 | if (++chars_seen <= max_chars) | |
1872 | { | |
1873 | if (width < HOST_BITS_PER_WIDE_INT) | |
1874 | result = (result << width) | (c & mask); | |
1875 | else | |
1876 | result = c; | |
1877 | } | |
1878 | } | |
1879 | ||
1880 | if (chars_seen == 0) | |
73328dce | 1881 | cpp_error (pfile, DL_ERROR, "empty character constant"); |
8330799c | 1882 | else if (chars_seen > max_chars) |
1883 | { | |
1884 | chars_seen = max_chars; | |
73328dce | 1885 | cpp_error (pfile, DL_WARNING, "character constant too long"); |
8330799c | 1886 | } |
455730ef | 1887 | else if (chars_seen > 1 && warn_multi) |
73328dce | 1888 | cpp_error (pfile, DL_WARNING, "multi-character character constant"); |
8330799c | 1889 | |
b3a9482f | 1890 | /* If relevant type is signed, sign-extend the constant. */ |
1891 | if (chars_seen) | |
8330799c | 1892 | { |
1893 | unsigned int nbits = chars_seen * width; | |
8330799c | 1894 | |
43653c0a | 1895 | mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits); |
b3a9482f | 1896 | if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0) |
8330799c | 1897 | result &= mask; |
1898 | else | |
1899 | result |= ~mask; | |
1900 | } | |
1901 | ||
1902 | *pchars_seen = chars_seen; | |
1903 | return result; | |
1904 | } | |
1905 | ||
084163dc | 1906 | /* Memory buffers. Changing these three constants can have a dramatic |
1907 | effect on performance. The values here are reasonable defaults, | |
1908 | but might be tuned. If you adjust them, be sure to test across a | |
1909 | range of uses of cpplib, including heavy nested function-like macro | |
1910 | expansion. Also check the change in peak memory usage (NJAMD is a | |
1911 | good tool for this). */ | |
1912 | #define MIN_BUFF_SIZE 8000 | |
1e0ef2fd | 1913 | #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2) |
084163dc | 1914 | #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \ |
1915 | (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2) | |
deb356cf | 1916 | |
1e0ef2fd | 1917 | #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0) |
1918 | #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE! | |
1919 | #endif | |
1920 | ||
79bd622b | 1921 | struct dummy |
deb356cf | 1922 | { |
79bd622b | 1923 | char c; |
1924 | union | |
1925 | { | |
1926 | double d; | |
1927 | int *p; | |
1928 | } u; | |
1929 | }; | |
deb356cf | 1930 | |
79bd622b | 1931 | #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u)) |
06c92cbc | 1932 | #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1)) |
1933 | ||
1785b647 | 1934 | /* Create a new allocation buffer. Place the control block at the end |
1935 | of the buffer, so that buffer overflows will cause immediate chaos. */ | |
06c92cbc | 1936 | static _cpp_buff * |
1937 | new_buff (len) | |
4b31a107 | 1938 | size_t len; |
06c92cbc | 1939 | { |
1940 | _cpp_buff *result; | |
1fdf6039 | 1941 | unsigned char *base; |
06c92cbc | 1942 | |
084163dc | 1943 | if (len < MIN_BUFF_SIZE) |
1944 | len = MIN_BUFF_SIZE; | |
06c92cbc | 1945 | len = CPP_ALIGN (len, DEFAULT_ALIGNMENT); |
1946 | ||
1947 | base = xmalloc (len + sizeof (_cpp_buff)); | |
1948 | result = (_cpp_buff *) (base + len); | |
1949 | result->base = base; | |
1950 | result->cur = base; | |
1951 | result->limit = base + len; | |
1952 | result->next = NULL; | |
1953 | return result; | |
1954 | } | |
1955 | ||
1956 | /* Place a chain of unwanted allocation buffers on the free list. */ | |
1957 | void | |
1958 | _cpp_release_buff (pfile, buff) | |
1959 | cpp_reader *pfile; | |
1960 | _cpp_buff *buff; | |
1961 | { | |
1962 | _cpp_buff *end = buff; | |
1963 | ||
1964 | while (end->next) | |
1965 | end = end->next; | |
1966 | end->next = pfile->free_buffs; | |
1967 | pfile->free_buffs = buff; | |
1968 | } | |
1969 | ||
1970 | /* Return a free buffer of size at least MIN_SIZE. */ | |
1971 | _cpp_buff * | |
1972 | _cpp_get_buff (pfile, min_size) | |
1973 | cpp_reader *pfile; | |
4b31a107 | 1974 | size_t min_size; |
06c92cbc | 1975 | { |
1976 | _cpp_buff *result, **p; | |
1977 | ||
1978 | for (p = &pfile->free_buffs;; p = &(*p)->next) | |
1979 | { | |
4b31a107 | 1980 | size_t size; |
084163dc | 1981 | |
1982 | if (*p == NULL) | |
06c92cbc | 1983 | return new_buff (min_size); |
084163dc | 1984 | result = *p; |
1985 | size = result->limit - result->base; | |
1986 | /* Return a buffer that's big enough, but don't waste one that's | |
1987 | way too big. */ | |
4085c149 | 1988 | if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size)) |
06c92cbc | 1989 | break; |
1990 | } | |
1991 | ||
1992 | *p = result->next; | |
1993 | result->next = NULL; | |
1994 | result->cur = result->base; | |
1995 | return result; | |
1996 | } | |
1997 | ||
20dd417a | 1998 | /* Creates a new buffer with enough space to hold the uncommitted |
e6a5f963 | 1999 | remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies |
2000 | the excess bytes to the new buffer. Chains the new buffer after | |
2001 | BUFF, and returns the new buffer. */ | |
06c92cbc | 2002 | _cpp_buff * |
e6a5f963 | 2003 | _cpp_append_extend_buff (pfile, buff, min_extra) |
06c92cbc | 2004 | cpp_reader *pfile; |
2005 | _cpp_buff *buff; | |
4b31a107 | 2006 | size_t min_extra; |
06c92cbc | 2007 | { |
4b31a107 | 2008 | size_t size = EXTENDED_BUFF_SIZE (buff, min_extra); |
e6a5f963 | 2009 | _cpp_buff *new_buff = _cpp_get_buff (pfile, size); |
06c92cbc | 2010 | |
e6a5f963 | 2011 | buff->next = new_buff; |
2012 | memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff)); | |
2013 | return new_buff; | |
2014 | } | |
2015 | ||
20dd417a | 2016 | /* Creates a new buffer with enough space to hold the uncommitted |
e6a5f963 | 2017 | remaining bytes of the buffer pointed to by BUFF, and at least |
2018 | MIN_EXTRA more bytes. Copies the excess bytes to the new buffer. | |
2019 | Chains the new buffer before the buffer pointed to by BUFF, and | |
2020 | updates the pointer to point to the new buffer. */ | |
2021 | void | |
2022 | _cpp_extend_buff (pfile, pbuff, min_extra) | |
2023 | cpp_reader *pfile; | |
2024 | _cpp_buff **pbuff; | |
2025 | size_t min_extra; | |
2026 | { | |
2027 | _cpp_buff *new_buff, *old_buff = *pbuff; | |
2028 | size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra); | |
2029 | ||
2030 | new_buff = _cpp_get_buff (pfile, size); | |
2031 | memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff)); | |
2032 | new_buff->next = old_buff; | |
2033 | *pbuff = new_buff; | |
06c92cbc | 2034 | } |
2035 | ||
2036 | /* Free a chain of buffers starting at BUFF. */ | |
2037 | void | |
2038 | _cpp_free_buff (buff) | |
2039 | _cpp_buff *buff; | |
2040 | { | |
2041 | _cpp_buff *next; | |
2042 | ||
2043 | for (; buff; buff = next) | |
2044 | { | |
2045 | next = buff->next; | |
2046 | free (buff->base); | |
2047 | } | |
2048 | } | |
deb356cf | 2049 | |
1fdf6039 | 2050 | /* Allocate permanent, unaligned storage of length LEN. */ |
2051 | unsigned char * | |
2052 | _cpp_unaligned_alloc (pfile, len) | |
2053 | cpp_reader *pfile; | |
2054 | size_t len; | |
2055 | { | |
2056 | _cpp_buff *buff = pfile->u_buff; | |
2057 | unsigned char *result = buff->cur; | |
2058 | ||
2059 | if (len > (size_t) (buff->limit - result)) | |
2060 | { | |
2061 | buff = _cpp_get_buff (pfile, len); | |
2062 | buff->next = pfile->u_buff; | |
2063 | pfile->u_buff = buff; | |
2064 | result = buff->cur; | |
2065 | } | |
2066 | ||
2067 | buff->cur = result + len; | |
2068 | return result; | |
2069 | } | |
2070 | ||
1e0ef2fd | 2071 | /* Allocate permanent, unaligned storage of length LEN from a_buff. |
2072 | That buffer is used for growing allocations when saving macro | |
2073 | replacement lists in a #define, and when parsing an answer to an | |
2074 | assertion in #assert, #unassert or #if (and therefore possibly | |
2075 | whilst expanding macros). It therefore must not be used by any | |
2076 | code that they might call: specifically the lexer and the guts of | |
2077 | the macro expander. | |
2078 | ||
2079 | All existing other uses clearly fit this restriction: storing | |
2080 | registered pragmas during initialization. */ | |
79bd622b | 2081 | unsigned char * |
e6a5f963 | 2082 | _cpp_aligned_alloc (pfile, len) |
2083 | cpp_reader *pfile; | |
2084 | size_t len; | |
89b05ef6 | 2085 | { |
e6a5f963 | 2086 | _cpp_buff *buff = pfile->a_buff; |
2087 | unsigned char *result = buff->cur; | |
89b05ef6 | 2088 | |
e6a5f963 | 2089 | if (len > (size_t) (buff->limit - result)) |
89b05ef6 | 2090 | { |
e6a5f963 | 2091 | buff = _cpp_get_buff (pfile, len); |
2092 | buff->next = pfile->a_buff; | |
2093 | pfile->a_buff = buff; | |
2094 | result = buff->cur; | |
89b05ef6 | 2095 | } |
f80e83a9 | 2096 | |
e6a5f963 | 2097 | buff->cur = result + len; |
79bd622b | 2098 | return result; |
f80e83a9 | 2099 | } |