]> git.ipfire.org Git - thirdparty/gcc.git/blame - libcpp/lex.c
PR c++/61661
[thirdparty/gcc.git] / libcpp / lex.c
CommitLineData
0578f103 1/* CPP Library - lexical analysis.
806a3d45 2 Copyright (C) 2000-2014 Free Software Foundation, Inc.
0578f103 3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
6bc9506f 10Free Software Foundation; either version 3, or (at your option) any
0578f103 11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
6bc9506f 19along with this program; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
0578f103 21
22#include "config.h"
23#include "system.h"
0578f103 24#include "cpplib.h"
d856c8a6 25#include "internal.h"
0578f103 26
79bd622b 27enum spell_type
241e762e 28{
79bd622b 29 SPELL_OPERATOR = 0,
79bd622b 30 SPELL_IDENT,
4970d4c2 31 SPELL_LITERAL,
79bd622b 32 SPELL_NONE
241e762e 33};
34
79bd622b 35struct token_spelling
241e762e 36{
79bd622b 37 enum spell_type category;
38 const unsigned char *name;
241e762e 39};
40
0ca849f9 41static const unsigned char *const digraph_spellings[] =
924bbf02 42{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
79bd622b 43
924bbf02 44#define OP(e, s) { SPELL_OPERATOR, UC s },
45#define TK(e, s) { SPELL_ ## s, UC #e },
0ca849f9 46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
79bd622b 47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
e2f9a79f 52
f7fdd7a1 53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
f7fdd7a1 56static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
956c6108 58static void store_comment (cpp_reader *, cpp_token *);
f7fdd7a1 59static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 unsigned int, enum cpp_ttype);
61static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62static int name_p (cpp_reader *, const cpp_string *);
f7fdd7a1 63static tokenrun *next_tokenrun (tokenrun *);
64
f7fdd7a1 65static _cpp_buff *new_buff (size_t);
bce8e0c0 66
e920deaf 67
f80e83a9 68/* Utility routine:
2c63d6c8 69
76faa4c0 70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
f80e83a9 72int
f7fdd7a1 73cpp_ideq (const cpp_token *token, const char *string)
f80e83a9 74{
76faa4c0 75 if (token->type != CPP_NAME)
f80e83a9 76 return 0;
76faa4c0 77
2ee04baa 78 return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
bce8e0c0 79}
50fd6b48 80
a54e0bf8 81/* Record a note TYPE at byte POS into the current cleaned logical
82 line. */
1e0ef2fd 83static void
f7fdd7a1 84add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
338fa5f7 85{
a54e0bf8 86 if (buffer->notes_used == buffer->notes_cap)
87 {
88 buffer->notes_cap = buffer->notes_cap * 2 + 200;
720aca92 89 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90 buffer->notes_cap);
a54e0bf8 91 }
338fa5f7 92
a54e0bf8 93 buffer->notes[buffer->notes_used].pos = pos;
94 buffer->notes[buffer->notes_used].type = type;
95 buffer->notes_used++;
338fa5f7 96}
97
2431e8ba 98\f
99/* Fast path to find line special characters using optimized character
100 scanning algorithms. Anything complicated falls back to the slow
101 path below. Since this loop is very hot it's worth doing these kinds
102 of optimizations.
103
104 One of the paths through the ifdefs should provide
105
106 const uchar *search_line_fast (const uchar *s, const uchar *end);
107
108 Between S and END, search for \n, \r, \\, ?. Return a pointer to
109 the found character.
110
111 Note that the last character of the buffer is *always* a newline,
112 as forced by _cpp_convert_input. This fact can be used to avoid
113 explicitly looking for the end of the buffer. */
114
115/* Configure gives us an ifdef test. */
116#ifndef WORDS_BIGENDIAN
117#define WORDS_BIGENDIAN 0
118#endif
119
120/* We'd like the largest integer that fits into a register. There's nothing
121 in <stdint.h> that gives us that. For most hosts this is unsigned long,
122 but MS decided on an LLP64 model. Thankfully when building with GCC we
123 can get the "real" word size. */
124#ifdef __GNUC__
125typedef unsigned int word_type __attribute__((__mode__(__word__)));
126#else
127typedef unsigned long word_type;
128#endif
129
130/* The code below is only expecting sizes 4 or 8.
131 Die at compile-time if this expectation is violated. */
132typedef char check_word_type_size
133 [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
134
135/* Return X with the first N bytes forced to values that won't match one
136 of the interesting characters. Note that NUL is not interesting. */
137
138static inline word_type
139acc_char_mask_misalign (word_type val, unsigned int n)
140{
141 word_type mask = -1;
142 if (WORDS_BIGENDIAN)
143 mask >>= n * 8;
144 else
145 mask <<= n * 8;
146 return val & mask;
147}
148
149/* Return X replicated to all byte positions within WORD_TYPE. */
150
151static inline word_type
152acc_char_replicate (uchar x)
153{
154 word_type ret;
155
156 ret = (x << 24) | (x << 16) | (x << 8) | x;
157 if (sizeof(word_type) == 8)
158 ret = (ret << 16 << 16) | ret;
159 return ret;
160}
161
162/* Return non-zero if some byte of VAL is (probably) C. */
163
164static inline word_type
165acc_char_cmp (word_type val, word_type c)
166{
167#if defined(__GNUC__) && defined(__alpha__)
168 /* We can get exact results using a compare-bytes instruction.
169 Get (val == c) via (0 >= (val ^ c)). */
170 return __builtin_alpha_cmpbge (0, val ^ c);
171#else
172 word_type magic = 0x7efefefeU;
173 if (sizeof(word_type) == 8)
174 magic = (magic << 16 << 16) | 0xfefefefeU;
175 magic |= 1;
176
177 val ^= c;
178 return ((val + magic) ^ ~val) & ~magic;
179#endif
180}
181
182/* Given the result of acc_char_cmp is non-zero, return the index of
183 the found character. If this was a false positive, return -1. */
184
185static inline int
186acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
187 word_type val ATTRIBUTE_UNUSED)
188{
189#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190 /* The cmpbge instruction sets *bits* of the result corresponding to
191 matches in the bytes with no false positives. */
192 return __builtin_ctzl (cmp);
193#else
194 unsigned int i;
195
196 /* ??? It would be nice to force unrolling here,
197 and have all of these constants folded. */
198 for (i = 0; i < sizeof(word_type); ++i)
199 {
200 uchar c;
201 if (WORDS_BIGENDIAN)
202 c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
203 else
204 c = (val >> i * 8) & 0xff;
205
206 if (c == '\n' || c == '\r' || c == '\\' || c == '?')
207 return i;
208 }
209
210 return -1;
211#endif
212}
213
214/* A version of the fast scanner using bit fiddling techniques.
215
216 For 32-bit words, one would normally perform 16 comparisons and
217 16 branches. With this algorithm one performs 24 arithmetic
218 operations and one branch. Whether this is faster with a 32-bit
219 word size is going to be somewhat system dependent.
220
221 For 64-bit words, we eliminate twice the number of comparisons
222 and branches without increasing the number of arithmetic operations.
223 It's almost certainly going to be a win with 64-bit word size. */
224
225static const uchar * search_line_acc_char (const uchar *, const uchar *)
226 ATTRIBUTE_UNUSED;
227
228static const uchar *
229search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
230{
231 const word_type repl_nl = acc_char_replicate ('\n');
232 const word_type repl_cr = acc_char_replicate ('\r');
233 const word_type repl_bs = acc_char_replicate ('\\');
234 const word_type repl_qm = acc_char_replicate ('?');
235
236 unsigned int misalign;
237 const word_type *p;
238 word_type val, t;
239
240 /* Align the buffer. Mask out any bytes from before the beginning. */
241 p = (word_type *)((uintptr_t)s & -sizeof(word_type));
242 val = *p;
243 misalign = (uintptr_t)s & (sizeof(word_type) - 1);
244 if (misalign)
245 val = acc_char_mask_misalign (val, misalign);
246
247 /* Main loop. */
248 while (1)
249 {
250 t = acc_char_cmp (val, repl_nl);
251 t |= acc_char_cmp (val, repl_cr);
252 t |= acc_char_cmp (val, repl_bs);
253 t |= acc_char_cmp (val, repl_qm);
254
255 if (__builtin_expect (t != 0, 0))
256 {
257 int i = acc_char_index (t, val);
258 if (i >= 0)
259 return (const uchar *)p + i;
260 }
261
262 val = *++p;
263 }
264}
265
3efb5d22 266/* Disable on Solaris 2/x86 until the following problem can be properly
8ab29ece 267 autoconfed:
268
8ab29ece 269 The Solaris 10+ assembler tags objects with the instruction set
270 extensions used, so SSE4.2 executables cannot run on machines that
271 don't support that extension. */
272
273#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
2431e8ba 274
275/* Replicated character data to be shared between implementations.
276 Recall that outside of a context with vector support we can't
277 define compatible vector types, therefore these are all defined
278 in terms of raw characters. */
279static const char repl_chars[4][16] __attribute__((aligned(16))) = {
280 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
281 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
282 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
283 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
284 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
285 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
286 { '?', '?', '?', '?', '?', '?', '?', '?',
287 '?', '?', '?', '?', '?', '?', '?', '?' },
288};
289
290/* A version of the fast scanner using MMX vectorized byte compare insns.
291
292 This uses the PMOVMSKB instruction which was introduced with "MMX2",
cef70b8a 293 which was packaged into SSE1; it is also present in the AMD MMX
2431e8ba 294 extension. Mark the function as using "sse" so that we emit a real
295 "emms" instruction, rather than the 3dNOW "femms" instruction. */
296
297static const uchar *
298#ifndef __SSE__
299__attribute__((__target__("sse")))
300#endif
301search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
302{
303 typedef char v8qi __attribute__ ((__vector_size__ (8)));
304 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
305
306 const v8qi repl_nl = *(const v8qi *)repl_chars[0];
307 const v8qi repl_cr = *(const v8qi *)repl_chars[1];
308 const v8qi repl_bs = *(const v8qi *)repl_chars[2];
309 const v8qi repl_qm = *(const v8qi *)repl_chars[3];
310
311 unsigned int misalign, found, mask;
312 const v8qi *p;
313 v8qi data, t, c;
314
315 /* Align the source pointer. While MMX doesn't generate unaligned data
316 faults, this allows us to safely scan to the end of the buffer without
317 reading beyond the end of the last page. */
318 misalign = (uintptr_t)s & 7;
319 p = (const v8qi *)((uintptr_t)s & -8);
320 data = *p;
321
322 /* Create a mask for the bytes that are valid within the first
323 16-byte block. The Idea here is that the AND with the mask
324 within the loop is "free", since we need some AND or TEST
325 insn in order to set the flags for the branch anyway. */
326 mask = -1u << misalign;
327
328 /* Main loop processing 8 bytes at a time. */
329 goto start;
330 do
331 {
332 data = *++p;
333 mask = -1;
334
335 start:
336 t = __builtin_ia32_pcmpeqb(data, repl_nl);
337 c = __builtin_ia32_pcmpeqb(data, repl_cr);
338 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
339 c = __builtin_ia32_pcmpeqb(data, repl_bs);
340 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
341 c = __builtin_ia32_pcmpeqb(data, repl_qm);
342 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343 found = __builtin_ia32_pmovmskb (t);
344 found &= mask;
345 }
346 while (!found);
347
348 __builtin_ia32_emms ();
349
350 /* FOUND contains 1 in bits for which we matched a relevant
351 character. Conversion to the byte index is trivial. */
352 found = __builtin_ctz(found);
353 return (const uchar *)p + found;
354}
355
356/* A version of the fast scanner using SSE2 vectorized byte compare insns. */
357
358static const uchar *
359#ifndef __SSE2__
360__attribute__((__target__("sse2")))
361#endif
362search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
363{
364 typedef char v16qi __attribute__ ((__vector_size__ (16)));
365
366 const v16qi repl_nl = *(const v16qi *)repl_chars[0];
367 const v16qi repl_cr = *(const v16qi *)repl_chars[1];
368 const v16qi repl_bs = *(const v16qi *)repl_chars[2];
369 const v16qi repl_qm = *(const v16qi *)repl_chars[3];
370
371 unsigned int misalign, found, mask;
372 const v16qi *p;
373 v16qi data, t;
374
375 /* Align the source pointer. */
376 misalign = (uintptr_t)s & 15;
377 p = (const v16qi *)((uintptr_t)s & -16);
378 data = *p;
379
380 /* Create a mask for the bytes that are valid within the first
381 16-byte block. The Idea here is that the AND with the mask
382 within the loop is "free", since we need some AND or TEST
383 insn in order to set the flags for the branch anyway. */
384 mask = -1u << misalign;
385
386 /* Main loop processing 16 bytes at a time. */
387 goto start;
388 do
389 {
390 data = *++p;
391 mask = -1;
392
393 start:
394 t = __builtin_ia32_pcmpeqb128(data, repl_nl);
395 t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
396 t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
397 t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
398 found = __builtin_ia32_pmovmskb128 (t);
399 found &= mask;
400 }
401 while (!found);
402
403 /* FOUND contains 1 in bits for which we matched a relevant
404 character. Conversion to the byte index is trivial. */
405 found = __builtin_ctz(found);
406 return (const uchar *)p + found;
407}
408
b315ae35 409#ifdef HAVE_SSE4
2431e8ba 410/* A version of the fast scanner using SSE 4.2 vectorized string insns. */
411
412static const uchar *
413#ifndef __SSE4_2__
414__attribute__((__target__("sse4.2")))
415#endif
416search_line_sse42 (const uchar *s, const uchar *end)
417{
418 typedef char v16qi __attribute__ ((__vector_size__ (16)));
419 static const v16qi search = { '\n', '\r', '?', '\\' };
420
421 uintptr_t si = (uintptr_t)s;
422 uintptr_t index;
423
424 /* Check for unaligned input. */
425 if (si & 15)
426 {
112f073c 427 v16qi sv;
428
2431e8ba 429 if (__builtin_expect (end - s < 16, 0)
430 && __builtin_expect ((si & 0xfff) > 0xff0, 0))
431 {
432 /* There are less than 16 bytes left in the buffer, and less
433 than 16 bytes left on the page. Reading 16 bytes at this
434 point might generate a spurious page fault. Defer to the
435 SSE2 implementation, which already handles alignment. */
436 return search_line_sse2 (s, end);
437 }
438
439 /* ??? The builtin doesn't understand that the PCMPESTRI read from
440 memory need not be aligned. */
112f073c 441 sv = __builtin_ia32_loaddqu ((const char *) s);
442 index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
443
2431e8ba 444 if (__builtin_expect (index < 16, 0))
445 goto found;
446
447 /* Advance the pointer to an aligned address. We will re-scan a
448 few bytes, but we no longer need care for reading past the
449 end of a page, since we're guaranteed a match. */
450 s = (const uchar *)((si + 16) & -16);
451 }
452
453 /* Main loop, processing 16 bytes at a time. By doing the whole loop
454 in inline assembly, we can make proper use of the flags set. */
455 __asm ( "sub $16, %1\n"
456 " .balign 16\n"
457 "0: add $16, %1\n"
458 " %vpcmpestri $0, (%1), %2\n"
459 " jnc 0b"
460 : "=&c"(index), "+r"(s)
461 : "x"(search), "a"(4), "d"(16));
462
463 found:
464 return s + index;
465}
466
b315ae35 467#else
468/* Work around out-dated assemblers without sse4 support. */
469#define search_line_sse42 search_line_sse2
470#endif
471
2431e8ba 472/* Check the CPU capabilities. */
473
474#include "../gcc/config/i386/cpuid.h"
475
476typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
477static search_line_fast_type search_line_fast;
478
b735cc56 479#define HAVE_init_vectorized_lexer 1
480static inline void
2431e8ba 481init_vectorized_lexer (void)
482{
483 unsigned dummy, ecx = 0, edx = 0;
484 search_line_fast_type impl = search_line_acc_char;
485 int minimum = 0;
486
487#if defined(__SSE4_2__)
488 minimum = 3;
489#elif defined(__SSE2__)
490 minimum = 2;
cef70b8a 491#elif defined(__SSE__)
2431e8ba 492 minimum = 1;
493#endif
494
495 if (minimum == 3)
496 impl = search_line_sse42;
497 else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
498 {
499 if (minimum == 3 || (ecx & bit_SSE4_2))
500 impl = search_line_sse42;
501 else if (minimum == 2 || (edx & bit_SSE2))
502 impl = search_line_sse2;
503 else if (minimum == 1 || (edx & bit_SSE))
504 impl = search_line_mmx;
505 }
506 else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
507 {
85303bd5 508 if (minimum == 1
509 || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
2431e8ba 510 impl = search_line_mmx;
511 }
512
513 search_line_fast = impl;
514}
515
70ebee13 516#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
2431e8ba 517
518/* A vection of the fast scanner using AltiVec vectorized byte compares. */
519/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
520 so we can't compile this function without -maltivec on the command line
521 (or implied by some other switch). */
522
523static const uchar *
524search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
525{
526 typedef __attribute__((altivec(vector))) unsigned char vc;
527
528 const vc repl_nl = {
529 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
530 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
531 };
532 const vc repl_cr = {
533 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
534 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
535 };
536 const vc repl_bs = {
537 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
538 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
539 };
540 const vc repl_qm = {
541 '?', '?', '?', '?', '?', '?', '?', '?',
542 '?', '?', '?', '?', '?', '?', '?', '?',
543 };
544 const vc ones = {
545 -1, -1, -1, -1, -1, -1, -1, -1,
546 -1, -1, -1, -1, -1, -1, -1, -1,
547 };
548 const vc zero = { 0 };
549
550 vc data, mask, t;
551
552 /* Altivec loads automatically mask addresses with -16. This lets us
553 issue the first load as early as possible. */
554 data = __builtin_vec_ld(0, (const vc *)s);
555
556 /* Discard bytes before the beginning of the buffer. Do this by
557 beginning with all ones and shifting in zeros according to the
558 mis-alignment. The LVSR instruction pulls the exact shift we
559 want from the address. */
eb992054 560#ifdef __BIG_ENDIAN__
2431e8ba 561 mask = __builtin_vec_lvsr(0, s);
562 mask = __builtin_vec_perm(zero, ones, mask);
eb992054 563#else
564 mask = __builtin_vec_lvsl(0, s);
565 mask = __builtin_vec_perm(ones, zero, mask);
566#endif
2431e8ba 567 data &= mask;
568
569 /* While altivec loads mask addresses, we still need to align S so
570 that the offset we compute at the end is correct. */
571 s = (const uchar *)((uintptr_t)s & -16);
572
573 /* Main loop processing 16 bytes at a time. */
574 goto start;
575 do
576 {
577 vc m_nl, m_cr, m_bs, m_qm;
578
579 s += 16;
580 data = __builtin_vec_ld(0, (const vc *)s);
581
582 start:
583 m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
584 m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
585 m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
586 m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
587 t = (m_nl | m_cr) | (m_bs | m_qm);
588
589 /* T now contains 0xff in bytes for which we matched one of the relevant
590 characters. We want to exit the loop if any byte in T is non-zero.
591 Below is the expansion of vec_any_ne(t, zero). */
592 }
593 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
594
595 {
596#define N (sizeof(vc) / sizeof(long))
597
2431e8ba 598 union {
599 vc v;
5fe44548 600 /* Statically assert that N is 2 or 4. */
601 unsigned long l[(N == 2 || N == 4) ? N : -1];
2431e8ba 602 } u;
603 unsigned long l, i = 0;
604
605 u.v = t;
606
607 /* Find the first word of T that is non-zero. */
608 switch (N)
609 {
610 case 4:
611 l = u.l[i++];
612 if (l != 0)
613 break;
614 s += sizeof(unsigned long);
615 l = u.l[i++];
616 if (l != 0)
617 break;
618 s += sizeof(unsigned long);
619 case 2:
620 l = u.l[i++];
621 if (l != 0)
622 break;
623 s += sizeof(unsigned long);
624 l = u.l[i];
625 }
626
627 /* L now contains 0xff in bytes for which we matched one of the
628 relevant characters. We can find the byte index by finding
629 its bit index and dividing by 8. */
eb992054 630#ifdef __BIG_ENDIAN__
2431e8ba 631 l = __builtin_clzl(l) >> 3;
eb992054 632#else
633 l = __builtin_ctzl(l) >> 3;
634#endif
2431e8ba 635 return s + l;
636
637#undef N
638 }
639}
640
c434932e 641#elif defined (__ARM_NEON__)
642#include "arm_neon.h"
643
644static const uchar *
645search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
646{
647 const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
648 const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
649 const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
650 const uint8x16_t repl_qm = vdupq_n_u8 ('?');
651 const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
652
653 unsigned int misalign, found, mask;
654 const uint8_t *p;
655 uint8x16_t data;
656
657 /* Align the source pointer. */
658 misalign = (uintptr_t)s & 15;
659 p = (const uint8_t *)((uintptr_t)s & -16);
660 data = vld1q_u8 (p);
661
662 /* Create a mask for the bytes that are valid within the first
663 16-byte block. The Idea here is that the AND with the mask
664 within the loop is "free", since we need some AND or TEST
665 insn in order to set the flags for the branch anyway. */
666 mask = (-1u << misalign) & 0xffff;
667
668 /* Main loop, processing 16 bytes at a time. */
669 goto start;
670
671 do
672 {
673 uint8x8_t l;
674 uint16x4_t m;
675 uint32x2_t n;
676 uint8x16_t t, u, v, w;
677
678 p += 16;
679 data = vld1q_u8 (p);
680 mask = 0xffff;
681
682 start:
683 t = vceqq_u8 (data, repl_nl);
684 u = vceqq_u8 (data, repl_cr);
685 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
686 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
687 t = vandq_u8 (vorrq_u8 (v, w), xmask);
688 l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
689 m = vpaddl_u8 (l);
690 n = vpaddl_u16 (m);
691
692 found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
693 vshr_n_u64 ((uint64x1_t) n, 24)), 0);
694 found &= mask;
695 }
696 while (!found);
697
698 /* FOUND contains 1 in bits for which we matched a relevant
699 character. Conversion to the byte index is trivial. */
700 found = __builtin_ctz (found);
701 return (const uchar *)p + found;
702}
703
2431e8ba 704#else
705
706/* We only have one accellerated alternative. Use a direct call so that
707 we encourage inlining. */
708
709#define search_line_fast search_line_acc_char
710
711#endif
712
b735cc56 713/* Initialize the lexer if needed. */
714
715void
716_cpp_init_lexer (void)
717{
718#ifdef HAVE_init_vectorized_lexer
719 init_vectorized_lexer ();
720#endif
721}
722
a54e0bf8 723/* Returns with a logical line that contains no escaped newlines or
724 trigraphs. This is a time-critical inner loop. */
725void
f7fdd7a1 726_cpp_clean_line (cpp_reader *pfile)
0578f103 727{
a54e0bf8 728 cpp_buffer *buffer;
729 const uchar *s;
730 uchar c, *d, *p;
1e0ef2fd 731
a54e0bf8 732 buffer = pfile->buffer;
733 buffer->cur_note = buffer->notes_used = 0;
734 buffer->cur = buffer->line_base = buffer->next_line;
735 buffer->need_line = false;
2431e8ba 736 s = buffer->next_line;
1e0ef2fd 737
a54e0bf8 738 if (!buffer->from_stage3)
0578f103 739 {
5008f5c5 740 const uchar *pbackslash = NULL;
741
2431e8ba 742 /* Fast path. This is the common case of an un-escaped line with
54d3be91 743 no trigraphs. The primary win here is by not writing any
744 data back to memory until we have to. */
2431e8ba 745 while (1)
54d3be91 746 {
2431e8ba 747 /* Perform an optimized search for \n, \r, \\, ?. */
748 s = search_line_fast (s, buffer->rlimit);
54d3be91 749
2431e8ba 750 c = *s;
751 if (c == '\\')
752 {
753 /* Record the location of the backslash and continue. */
754 pbackslash = s++;
54d3be91 755 }
2431e8ba 756 else if (__builtin_expect (c == '?', 0))
54d3be91 757 {
2431e8ba 758 if (__builtin_expect (s[1] == '?', false)
759 && _cpp_trigraph_map[s[2]])
54d3be91 760 {
2431e8ba 761 /* Have a trigraph. We may or may not have to convert
762 it. Add a line note regardless, for -Wtrigraphs. */
763 add_line_note (buffer, s, s[2]);
764 if (CPP_OPTION (pfile, trigraphs))
765 {
766 /* We do, and that means we have to switch to the
767 slow path. */
768 d = (uchar *) s;
769 *d = _cpp_trigraph_map[s[2]];
770 s += 2;
771 goto slow_path;
772 }
54d3be91 773 }
2431e8ba 774 /* Not a trigraph. Continue on fast-path. */
775 s++;
54d3be91 776 }
2431e8ba 777 else
778 break;
54d3be91 779 }
780
2431e8ba 781 /* This must be \r or \n. We're either done, or we'll be forced
782 to write back to the buffer and continue on the slow path. */
783 d = (uchar *) s;
784
785 if (__builtin_expect (s == buffer->rlimit, false))
786 goto done;
787
788 /* DOS line ending? */
789 if (__builtin_expect (c == '\r', false) && s[1] == '\n')
790 {
791 s++;
792 if (s == buffer->rlimit)
793 goto done;
794 }
795
796 if (__builtin_expect (pbackslash == NULL, true))
797 goto done;
798
799 /* Check for escaped newline. */
800 p = d;
801 while (is_nvspace (p[-1]))
802 p--;
803 if (p - 1 != pbackslash)
804 goto done;
805
806 /* Have an escaped newline; process it and proceed to
807 the slow path. */
808 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
809 d = p - 2;
810 buffer->next_line = p - 1;
a54e0bf8 811
2431e8ba 812 slow_path:
813 while (1)
4b912310 814 {
a54e0bf8 815 c = *++s;
816 *++d = c;
817
818 if (c == '\n' || c == '\r')
819 {
2431e8ba 820 /* Handle DOS line endings. */
a54e0bf8 821 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
822 s++;
823 if (s == buffer->rlimit)
824 break;
825
826 /* Escaped? */
827 p = d;
828 while (p != buffer->next_line && is_nvspace (p[-1]))
829 p--;
830 if (p == buffer->next_line || p[-1] != '\\')
831 break;
832
aad4a87f 833 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
a54e0bf8 834 d = p - 2;
835 buffer->next_line = p - 1;
836 }
837 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
838 {
839 /* Add a note regardless, for the benefit of -Wtrigraphs. */
aad4a87f 840 add_line_note (buffer, d, s[2]);
a54e0bf8 841 if (CPP_OPTION (pfile, trigraphs))
842 {
843 *d = _cpp_trigraph_map[s[2]];
844 s += 2;
845 }
846 }
4b912310 847 }
0578f103 848 }
a54e0bf8 849 else
850 {
2431e8ba 851 while (*s != '\n' && *s != '\r')
a54e0bf8 852 s++;
a54e0bf8 853 d = (uchar *) s;
854
855 /* Handle DOS line endings. */
856 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
857 s++;
858 }
338fa5f7 859
54d3be91 860 done:
a54e0bf8 861 *d = '\n';
aad4a87f 862 /* A sentinel note that should never be processed. */
863 add_line_note (buffer, d + 1, '\n');
a54e0bf8 864 buffer->next_line = s + 1;
0578f103 865}
866
3078f2b2 867/* Return true if the trigraph indicated by NOTE should be warned
868 about in a comment. */
869static bool
f7fdd7a1 870warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
3078f2b2 871{
872 const uchar *p;
873
874 /* Within comments we don't warn about trigraphs, unless the
875 trigraph forms an escaped newline, as that may change
7ef5b942 876 behavior. */
3078f2b2 877 if (note->type != '/')
878 return false;
879
880 /* If -trigraphs, then this was an escaped newline iff the next note
881 is coincident. */
882 if (CPP_OPTION (pfile, trigraphs))
883 return note[1].pos == note->pos;
884
885 /* Otherwise, see if this forms an escaped newline. */
886 p = note->pos + 3;
887 while (is_nvspace (*p))
888 p++;
889
890 /* There might have been escaped newlines between the trigraph and the
891 newline we found. Hence the position test. */
892 return (*p == '\n' && p < note[1].pos);
893}
894
a54e0bf8 895/* Process the notes created by add_line_note as far as the current
896 location. */
897void
f7fdd7a1 898_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
0578f103 899{
c808d026 900 cpp_buffer *buffer = pfile->buffer;
901
a54e0bf8 902 for (;;)
f80e83a9 903 {
a54e0bf8 904 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
905 unsigned int col;
396ffa86 906
a54e0bf8 907 if (note->pos > buffer->cur)
908 break;
396ffa86 909
a54e0bf8 910 buffer->cur_note++;
911 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
435fb09b 912
aad4a87f 913 if (note->type == '\\' || note->type == ' ')
a54e0bf8 914 {
aad4a87f 915 if (note->type == ' ' && !in_comment)
dbddc569 916 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
a54e0bf8 917 "backslash and newline separated by space");
aad4a87f 918
a54e0bf8 919 if (buffer->next_line > buffer->rlimit)
1e0ef2fd 920 {
dbddc569 921 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
a54e0bf8 922 "backslash-newline at end of file");
923 /* Prevent "no newline at end of file" warning. */
924 buffer->next_line = buffer->rlimit;
1e0ef2fd 925 }
a54e0bf8 926
927 buffer->line_base = note->pos;
610625e3 928 CPP_INCREMENT_LINE (pfile, 0);
338fa5f7 929 }
aad4a87f 930 else if (_cpp_trigraph_map[note->type])
931 {
3078f2b2 932 if (CPP_OPTION (pfile, warn_trigraphs)
933 && (!in_comment || warn_in_comment (pfile, note)))
aad4a87f 934 {
935 if (CPP_OPTION (pfile, trigraphs))
3a79f5da 936 cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
937 pfile->line_table->highest_line, col,
938 "trigraph ??%c converted to %c",
939 note->type,
940 (int) _cpp_trigraph_map[note->type]);
aad4a87f 941 else
1542b1ef 942 {
3a79f5da 943 cpp_warning_with_line
944 (pfile, CPP_W_TRIGRAPHS,
945 pfile->line_table->highest_line, col,
1542b1ef 946 "trigraph ??%c ignored, use -trigraphs to enable",
947 note->type);
948 }
aad4a87f 949 }
950 }
3a45011c 951 else if (note->type == 0)
952 /* Already processed in lex_raw_string. */;
aad4a87f 953 else
954 abort ();
f80e83a9 955 }
0578f103 956}
957
338fa5f7 958/* Skip a C-style block comment. We find the end of the comment by
959 seeing if an asterisk is before every '/' we encounter. Returns
edaf8cb5 960 nonzero if comment terminated by EOF, zero otherwise.
961
962 Buffer->cur points to the initial asterisk of the comment. */
a54e0bf8 963bool
f7fdd7a1 964_cpp_skip_block_comment (cpp_reader *pfile)
0578f103 965{
f80e83a9 966 cpp_buffer *buffer = pfile->buffer;
54d3be91 967 const uchar *cur = buffer->cur;
968 uchar c;
338fa5f7 969
54d3be91 970 cur++;
971 if (*cur == '/')
972 cur++;
338fa5f7 973
a54e0bf8 974 for (;;)
975 {
338fa5f7 976 /* People like decorating comments with '*', so check for '/'
977 instead for efficiency. */
54d3be91 978 c = *cur++;
979
f80e83a9 980 if (c == '/')
0578f103 981 {
54d3be91 982 if (cur[-2] == '*')
338fa5f7 983 break;
f80e83a9 984
338fa5f7 985 /* Warn about potential nested comments, but not if the '/'
3fb1e43b 986 comes immediately before the true comment delimiter.
f80e83a9 987 Don't bother to get it right across escaped newlines. */
338fa5f7 988 if (CPP_OPTION (pfile, warn_comments)
54d3be91 989 && cur[0] == '*' && cur[1] != '/')
990 {
991 buffer->cur = cur;
3a79f5da 992 cpp_warning_with_line (pfile, CPP_W_COMMENTS,
993 pfile->line_table->highest_line,
994 CPP_BUF_COL (buffer),
995 "\"/*\" within comment");
54d3be91 996 }
0578f103 997 }
a54e0bf8 998 else if (c == '\n')
999 {
610625e3 1000 unsigned int cols;
54d3be91 1001 buffer->cur = cur - 1;
a54e0bf8 1002 _cpp_process_line_notes (pfile, true);
1003 if (buffer->next_line >= buffer->rlimit)
1004 return true;
1005 _cpp_clean_line (pfile);
610625e3 1006
1007 cols = buffer->next_line - buffer->line_base;
1008 CPP_INCREMENT_LINE (pfile, cols);
1009
54d3be91 1010 cur = buffer->cur;
a54e0bf8 1011 }
0578f103 1012 }
f80e83a9 1013
54d3be91 1014 buffer->cur = cur;
3078f2b2 1015 _cpp_process_line_notes (pfile, true);
a54e0bf8 1016 return false;
0578f103 1017}
1018
1c124f85 1019/* Skip a C++ line comment, leaving buffer->cur pointing to the
d10cfa8d 1020 terminating newline. Handles escaped newlines. Returns nonzero
1c124f85 1021 if a multiline comment. */
f80e83a9 1022static int
f7fdd7a1 1023skip_line_comment (cpp_reader *pfile)
0578f103 1024{
f669338a 1025 cpp_buffer *buffer = pfile->buffer;
4999c35b 1026 source_location orig_line = pfile->line_table->highest_line;
f80e83a9 1027
a54e0bf8 1028 while (*buffer->cur != '\n')
1029 buffer->cur++;
1c124f85 1030
a54e0bf8 1031 _cpp_process_line_notes (pfile, true);
dbddc569 1032 return orig_line != pfile->line_table->highest_line;
f80e83a9 1033}
0578f103 1034
a54e0bf8 1035/* Skips whitespace, saving the next non-whitespace character. */
b86584f6 1036static void
f7fdd7a1 1037skip_whitespace (cpp_reader *pfile, cppchar_t c)
f80e83a9 1038{
1039 cpp_buffer *buffer = pfile->buffer;
fe9eb18b 1040 bool saw_NUL = false;
0578f103 1041
338fa5f7 1042 do
f80e83a9 1043 {
78719282 1044 /* Horizontal space always OK. */
a54e0bf8 1045 if (c == ' ' || c == '\t')
338fa5f7 1046 ;
338fa5f7 1047 /* Just \f \v or \0 left. */
78719282 1048 else if (c == '\0')
fe9eb18b 1049 saw_NUL = true;
79bd622b 1050 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
dbddc569 1051 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
73328dce 1052 CPP_BUF_COL (buffer),
1053 "%s in preprocessing directive",
1054 c == '\f' ? "form feed" : "vertical tab");
338fa5f7 1055
338fa5f7 1056 c = *buffer->cur++;
0578f103 1057 }
2c0e001b 1058 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
338fa5f7 1059 while (is_nvspace (c));
1060
fe9eb18b 1061 if (saw_NUL)
d80d2074 1062 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
fe9eb18b 1063
1c124f85 1064 buffer->cur--;
f80e83a9 1065}
0578f103 1066
79bd622b 1067/* See if the characters of a number token are valid in a name (no
1068 '.', '+' or '-'). */
1069static int
f7fdd7a1 1070name_p (cpp_reader *pfile, const cpp_string *string)
79bd622b 1071{
1072 unsigned int i;
1073
1074 for (i = 0; i < string->len; i++)
1075 if (!is_idchar (string->text[i]))
1076 return 0;
1077
b1a9ff83 1078 return 1;
79bd622b 1079}
1080
bce47149 1081/* After parsing an identifier or other sequence, produce a warning about
1082 sequences not in NFC/NFKC. */
1083static void
1084warn_about_normalization (cpp_reader *pfile,
1085 const cpp_token *token,
1086 const struct normalize_state *s)
1087{
1088 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1089 && !pfile->state.skipping)
1090 {
1091 /* Make sure that the token is printed using UCNs, even
1092 if we'd otherwise happily print UTF-8. */
720aca92 1093 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
bce47149 1094 size_t sz;
1095
1096 sz = cpp_spell_token (pfile, token, buf, false) - buf;
1097 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
3a79f5da 1098 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1099 "`%.*s' is not in NFKC", (int) sz, buf);
bce47149 1100 else
3a79f5da 1101 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1102 "`%.*s' is not in NFC", (int) sz, buf);
15fc692a 1103 free (buf);
bce47149 1104 }
1105}
1106
5bb46c08 1107/* Returns TRUE if the sequence starting at buffer->cur is invalid in
2cbf1359 1108 an identifier. FIRST is TRUE if this starts an identifier. */
5bb46c08 1109static bool
bce47149 1110forms_identifier_p (cpp_reader *pfile, int first,
1111 struct normalize_state *state)
5bb46c08 1112{
2cbf1359 1113 cpp_buffer *buffer = pfile->buffer;
1114
1115 if (*buffer->cur == '$')
1116 {
1117 if (!CPP_OPTION (pfile, dollars_in_ident))
1118 return false;
1119
1120 buffer->cur++;
f0c2775b 1121 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
2cbf1359 1122 {
f0c2775b 1123 CPP_OPTION (pfile, warn_dollars) = 0;
d80d2074 1124 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
2cbf1359 1125 }
1126
1127 return true;
1128 }
5bb46c08 1129
2cbf1359 1130 /* Is this a syntactically valid UCN? */
865c4e44 1131 if (CPP_OPTION (pfile, extended_identifiers)
4e9d1e6d 1132 && *buffer->cur == '\\'
2cbf1359 1133 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
5bb46c08 1134 {
2cbf1359 1135 buffer->cur += 2;
bce47149 1136 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1137 state))
2cbf1359 1138 return true;
1139 buffer->cur -= 2;
5bb46c08 1140 }
5bb46c08 1141
2cbf1359 1142 return false;
5bb46c08 1143}
1144
038c21f1 1145/* Helper function to get the cpp_hashnode of the identifier BASE. */
1146static cpp_hashnode *
1147lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1148{
1149 cpp_hashnode *result;
1150 const uchar *cur;
1151 unsigned int len;
1152 unsigned int hash = HT_HASHSTEP (0, *base);
1153
1154 cur = base + 1;
1155 while (ISIDNUM (*cur))
1156 {
1157 hash = HT_HASHSTEP (hash, *cur);
1158 cur++;
1159 }
1160 len = cur - base;
1161 hash = HT_HASHFINISH (hash, len);
1162 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1163 base, len, hash, HT_ALLOC));
1164
1165 /* Rarely, identifiers require diagnostics when lexed. */
1166 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1167 && !pfile->state.skipping, 0))
1168 {
1169 /* It is allowed to poison the same identifier twice. */
1170 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1171 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1172 NODE_NAME (result));
1173
1174 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1175 replacement list of a variadic macro. */
1176 if (result == pfile->spec_nodes.n__VA_ARGS__
1177 && !pfile->state.va_args_ok)
1178 cpp_error (pfile, CPP_DL_PEDWARN,
1179 "__VA_ARGS__ can only appear in the expansion"
1180 " of a C99 variadic macro");
1181
1182 /* For -Wc++-compat, warn about use of C++ named operators. */
1183 if (result->flags & NODE_WARN_OPERATOR)
3a79f5da 1184 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1185 "identifier \"%s\" is a special operator name in C++",
1186 NODE_NAME (result));
038c21f1 1187 }
1188
1189 return result;
1190}
1191
1192/* Get the cpp_hashnode of an identifier specified by NAME in
1193 the current cpp_reader object. If none is found, NULL is returned. */
1194cpp_hashnode *
1195_cpp_lex_identifier (cpp_reader *pfile, const char *name)
1196{
1197 cpp_hashnode *result;
1198 result = lex_identifier_intern (pfile, (uchar *) name);
1199 return result;
1200}
1201
5bb46c08 1202/* Lex an identifier starting at BUFFER->CUR - 1. */
338fa5f7 1203static cpp_hashnode *
bce47149 1204lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1205 struct normalize_state *nst)
0578f103 1206{
79bd622b 1207 cpp_hashnode *result;
bb1fa6bb 1208 const uchar *cur;
3eb3f293 1209 unsigned int len;
1210 unsigned int hash = HT_HASHSTEP (0, *base);
66a5287e 1211
3eb3f293 1212 cur = pfile->buffer->cur;
bb1fa6bb 1213 if (! starts_ucn)
460f52aa 1214 {
1215 while (ISIDNUM (*cur))
1216 {
1217 hash = HT_HASHSTEP (hash, *cur);
1218 cur++;
1219 }
1220 NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1221 }
bb1fa6bb 1222 pfile->buffer->cur = cur;
bce47149 1223 if (starts_ucn || forms_identifier_p (pfile, false, nst))
78a11351 1224 {
bb1fa6bb 1225 /* Slower version for identifiers containing UCNs (or $). */
1226 do {
1227 while (ISIDNUM (*pfile->buffer->cur))
bce47149 1228 {
460f52aa 1229 NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
bce47149 1230 pfile->buffer->cur++;
bce47149 1231 }
1232 } while (forms_identifier_p (pfile, false, nst));
bb1fa6bb 1233 result = _cpp_interpret_identifier (pfile, base,
1234 pfile->buffer->cur - base);
66a5287e 1235 }
bb1fa6bb 1236 else
1237 {
1238 len = cur - base;
1239 hash = HT_HASHFINISH (hash, len);
5bb46c08 1240
e297899b 1241 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1242 base, len, hash, HT_ALLOC));
bb1fa6bb 1243 }
66a5287e 1244
5bb46c08 1245 /* Rarely, identifiers require diagnostics when lexed. */
66a5287e 1246 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1247 && !pfile->state.skipping, 0))
1248 {
1249 /* It is allowed to poison the same identifier twice. */
1250 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
d80d2074 1251 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
66a5287e 1252 NODE_NAME (result));
1253
1254 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1255 replacement list of a variadic macro. */
1256 if (result == pfile->spec_nodes.n__VA_ARGS__
1257 && !pfile->state.va_args_ok)
d80d2074 1258 cpp_error (pfile, CPP_DL_PEDWARN,
f7fdd7a1 1259 "__VA_ARGS__ can only appear in the expansion"
1260 " of a C99 variadic macro");
2a6a6991 1261
1262 /* For -Wc++-compat, warn about use of C++ named operators. */
1263 if (result->flags & NODE_WARN_OPERATOR)
3a79f5da 1264 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1265 "identifier \"%s\" is a special operator name in C++",
1266 NODE_NAME (result));
66a5287e 1267 }
1268
1269 return result;
1270}
1271
5bb46c08 1272/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
0578f103 1273static void
bce47149 1274lex_number (cpp_reader *pfile, cpp_string *number,
1275 struct normalize_state *nst)
0578f103 1276{
b6d18b0a 1277 const uchar *cur;
5bb46c08 1278 const uchar *base;
1279 uchar *dest;
0578f103 1280
5bb46c08 1281 base = pfile->buffer->cur - 1;
1282 do
f80e83a9 1283 {
5bb46c08 1284 cur = pfile->buffer->cur;
338fa5f7 1285
5bb46c08 1286 /* N.B. ISIDNUM does not include $. */
4d6f7dd4 1287 while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
1288 || VALID_SIGN (*cur, cur[-1]))
bce47149 1289 {
460f52aa 1290 NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
bce47149 1291 cur++;
bce47149 1292 }
0578f103 1293
78a11351 1294 pfile->buffer->cur = cur;
0578f103 1295 }
bce47149 1296 while (forms_identifier_p (pfile, false, nst));
79bd622b 1297
5bb46c08 1298 number->len = cur - base;
1299 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1300 memcpy (dest, base, number->len);
1301 dest[number->len] = '\0';
1302 number->text = dest;
79bd622b 1303}
1304
4970d4c2 1305/* Create a token of type TYPE with a literal spelling. */
1306static void
f7fdd7a1 1307create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1308 unsigned int len, enum cpp_ttype type)
4970d4c2 1309{
1310 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1311
1312 memcpy (dest, base, len);
1313 dest[len] = '\0';
1314 token->type = type;
1315 token->val.str.len = len;
1316 token->val.str.text = dest;
1317}
1318
3a45011c 1319/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1320 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
1321
1322static void
1323bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1324 _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1325{
1326 _cpp_buff *first_buff = *first_buff_p;
1327 _cpp_buff *last_buff = *last_buff_p;
1328
1329 if (first_buff == NULL)
1330 first_buff = last_buff = _cpp_get_buff (pfile, len);
1331 else if (len > BUFF_ROOM (last_buff))
1332 {
1333 size_t room = BUFF_ROOM (last_buff);
1334 memcpy (BUFF_FRONT (last_buff), base, room);
1335 BUFF_FRONT (last_buff) += room;
1336 base += room;
1337 len -= room;
1338 last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1339 }
1340
1341 memcpy (BUFF_FRONT (last_buff), base, len);
1342 BUFF_FRONT (last_buff) += len;
1343
1344 *first_buff_p = first_buff;
1345 *last_buff_p = last_buff;
1346}
1347
4e8832f3 1348
1349/* Returns true if a macro has been defined.
1350 This might not work if compile with -save-temps,
1351 or preprocess separately from compilation. */
1352
1353static bool
1354is_macro(cpp_reader *pfile, const uchar *base)
1355{
1356 const uchar *cur = base;
1357 if (! ISIDST (*cur))
1358 return false;
1359 unsigned int hash = HT_HASHSTEP (0, *cur);
1360 ++cur;
1361 while (ISIDNUM (*cur))
1362 {
1363 hash = HT_HASHSTEP (hash, *cur);
1364 ++cur;
1365 }
1366 hash = HT_HASHFINISH (hash, cur - base);
1367
1368 cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1369 base, cur - base, hash, HT_NO_INSERT));
1370
1371 return !result ? false : (result->type == NT_MACRO);
1372}
1373
1374
538ba11a 1375/* Lexes a raw string. The stored string contains the spelling, including
3a45011c 1376 double quotes, delimiter string, '(' and ')', any leading
538ba11a 1377 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
1378 literal, or CPP_OTHER if it was not properly terminated.
1379
1380 The spelling is NUL-terminated, but it is not guaranteed that this
1381 is the first NUL since embedded NULs are preserved. */
1382
1383static void
1384lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1385 const uchar *cur)
1386{
ffb840b4 1387 uchar raw_prefix[17];
1388 uchar temp_buffer[18];
1389 const uchar *orig_base;
1390 unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1391 enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1392 raw_str_phase phase = RAW_STR_PREFIX;
538ba11a 1393 enum cpp_ttype type;
1394 size_t total_len = 0;
ffb840b4 1395 /* Index into temp_buffer during phases other than RAW_STR,
1396 during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1397 be appended to temp_buffer. */
1398 size_t temp_buffer_len = 0;
538ba11a 1399 _cpp_buff *first_buff = NULL, *last_buff = NULL;
ffb840b4 1400 size_t raw_prefix_start;
3a45011c 1401 _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
538ba11a 1402
1403 type = (*base == 'L' ? CPP_WSTRING :
1404 *base == 'U' ? CPP_STRING32 :
1405 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1406 : CPP_STRING);
1407
3a45011c 1408#define BUF_APPEND(STR,LEN) \
1409 do { \
1410 bufring_append (pfile, (const uchar *)(STR), (LEN), \
1411 &first_buff, &last_buff); \
1412 total_len += (LEN); \
ffb840b4 1413 if (__builtin_expect (temp_buffer_len < 17, 0) \
1414 && (const uchar *)(STR) != base \
1415 && (LEN) <= 2) \
1416 { \
1417 memcpy (temp_buffer + temp_buffer_len, \
1418 (const uchar *)(STR), (LEN)); \
1419 temp_buffer_len += (LEN); \
1420 } \
3a45011c 1421 } while (0);
1422
ffb840b4 1423 orig_base = base;
1424 ++cur;
1425 raw_prefix_start = cur - base;
1426 for (;;)
1427 {
3a45011c 1428 cppchar_t c;
1429
1430 /* If we previously performed any trigraph or line splicing
ffb840b4 1431 transformations, undo them in between the opening and closing
1432 double quote. */
3a45011c 1433 while (note->pos < cur)
1434 ++note;
1435 for (; note->pos == cur; ++note)
1436 {
1437 switch (note->type)
1438 {
1439 case '\\':
1440 case ' ':
1441 /* Restore backslash followed by newline. */
1442 BUF_APPEND (base, cur - base);
1443 base = cur;
1444 BUF_APPEND ("\\", 1);
1445 after_backslash:
1446 if (note->type == ' ')
1447 {
1448 /* GNU backslash whitespace newline extension. FIXME
1449 could be any sequence of non-vertical space. When we
1450 can properly restore any such sequence, we should mark
1451 this note as handled so _cpp_process_line_notes
1452 doesn't warn. */
1453 BUF_APPEND (" ", 1);
1454 }
1455
1456 BUF_APPEND ("\n", 1);
1457 break;
1458
1459 case 0:
1460 /* Already handled. */
1461 break;
1462
1463 default:
1464 if (_cpp_trigraph_map[note->type])
1465 {
1466 /* Don't warn about this trigraph in
1467 _cpp_process_line_notes, since trigraphs show up as
1468 trigraphs in raw strings. */
56e2ce2d 1469 uchar type = note->type;
3a45011c 1470 note->type = 0;
1471
1472 if (!CPP_OPTION (pfile, trigraphs))
1473 /* If we didn't convert the trigraph in the first
1474 place, don't do anything now either. */
1475 break;
1476
1477 BUF_APPEND (base, cur - base);
1478 base = cur;
1479 BUF_APPEND ("??", 2);
1480
1481 /* ??/ followed by newline gets two line notes, one for
1482 the trigraph and one for the backslash/newline. */
1483 if (type == '/' && note[1].pos == cur)
1484 {
1485 if (note[1].type != '\\'
1486 && note[1].type != ' ')
1487 abort ();
1488 BUF_APPEND ("/", 1);
1489 ++note;
1490 goto after_backslash;
1491 }
3a45011c 1492 else
1493 {
1494 /* Skip the replacement character. */
1495 base = ++cur;
1496 BUF_APPEND (&type, 1);
ffb840b4 1497 c = type;
1498 goto check_c;
3a45011c 1499 }
1500 }
1501 else
1502 abort ();
1503 break;
1504 }
1505 }
1506 c = *cur++;
ffb840b4 1507 if (__builtin_expect (temp_buffer_len < 17, 0))
1508 temp_buffer[temp_buffer_len++] = c;
538ba11a 1509
ffb840b4 1510 check_c:
1511 if (phase == RAW_STR_PREFIX)
538ba11a 1512 {
ffb840b4 1513 while (raw_prefix_len < temp_buffer_len)
1514 {
1515 raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
1516 switch (raw_prefix[raw_prefix_len])
1517 {
1518 case ' ': case '(': case ')': case '\\': case '\t':
1519 case '\v': case '\f': case '\n': default:
1520 break;
1521 /* Basic source charset except the above chars. */
1522 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1523 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1524 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1525 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1526 case 'y': case 'z':
1527 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1528 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1529 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1530 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1531 case 'Y': case 'Z':
1532 case '0': case '1': case '2': case '3': case '4': case '5':
1533 case '6': case '7': case '8': case '9':
1534 case '_': case '{': case '}': case '#': case '[': case ']':
1535 case '<': case '>': case '%': case ':': case ';': case '.':
1536 case '?': case '*': case '+': case '-': case '/': case '^':
1537 case '&': case '|': case '~': case '!': case '=': case ',':
1538 case '"': case '\'':
1539 if (raw_prefix_len < 16)
1540 {
1541 raw_prefix_len++;
1542 continue;
1543 }
1544 break;
1545 }
1546
1547 if (raw_prefix[raw_prefix_len] != '(')
1548 {
1549 int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
1550 if (raw_prefix_len == 16)
1551 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1552 col, "raw string delimiter longer "
1553 "than 16 characters");
1554 else if (raw_prefix[raw_prefix_len] == '\n')
1555 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1556 col, "invalid new-line in raw "
1557 "string delimiter");
1558 else
1559 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1560 col, "invalid character '%c' in "
1561 "raw string delimiter",
1562 (int) raw_prefix[raw_prefix_len]);
1563 pfile->buffer->cur = orig_base + raw_prefix_start - 1;
1564 create_literal (pfile, token, orig_base,
1565 raw_prefix_start - 1, CPP_OTHER);
1566 if (first_buff)
1567 _cpp_release_buff (pfile, first_buff);
1568 return;
1569 }
1570 raw_prefix[raw_prefix_len] = '"';
1571 phase = RAW_STR;
1572 /* Nothing should be appended to temp_buffer during
1573 RAW_STR phase. */
1574 temp_buffer_len = 17;
1575 break;
1576 }
1577 continue;
1578 }
1579 else if (phase == RAW_STR_SUFFIX)
1580 {
1581 while (raw_suffix_len <= raw_prefix_len
1582 && raw_suffix_len < temp_buffer_len
1583 && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
1584 raw_suffix_len++;
1585 if (raw_suffix_len > raw_prefix_len)
1586 break;
1587 if (raw_suffix_len == temp_buffer_len)
1588 continue;
1589 phase = RAW_STR;
1590 /* Nothing should be appended to temp_buffer during
1591 RAW_STR phase. */
1592 temp_buffer_len = 17;
1593 }
1594 if (c == ')')
1595 {
1596 phase = RAW_STR_SUFFIX;
1597 raw_suffix_len = 0;
1598 temp_buffer_len = 0;
538ba11a 1599 }
1600 else if (c == '\n')
1601 {
1602 if (pfile->state.in_directive
c7691e08 1603 || (pfile->state.parsing_args
1604 && pfile->buffer->next_line >= pfile->buffer->rlimit))
538ba11a 1605 {
1606 cur--;
1607 type = CPP_OTHER;
1608 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1609 "unterminated raw string");
1610 break;
1611 }
1612
3a45011c 1613 BUF_APPEND (base, cur - base);
538ba11a 1614
1615 if (pfile->buffer->cur < pfile->buffer->rlimit)
1616 CPP_INCREMENT_LINE (pfile, 0);
1617 pfile->buffer->need_line = true;
1618
3a45011c 1619 pfile->buffer->cur = cur-1;
1620 _cpp_process_line_notes (pfile, false);
538ba11a 1621 if (!_cpp_get_fresh_line (pfile))
1622 {
1623 source_location src_loc = token->src_loc;
1624 token->type = CPP_EOF;
1625 /* Tell the compiler the line number of the EOF token. */
1626 token->src_loc = pfile->line_table->highest_line;
1627 token->flags = BOL;
1628 if (first_buff != NULL)
1629 _cpp_release_buff (pfile, first_buff);
1630 cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1631 "unterminated raw string");
1632 return;
1633 }
1634
1635 cur = base = pfile->buffer->cur;
3a45011c 1636 note = &pfile->buffer->notes[pfile->buffer->cur_note];
538ba11a 1637 }
538ba11a 1638 }
1639
244db24d 1640 if (CPP_OPTION (pfile, user_literals))
1641 {
4e8832f3 1642 /* If a string format macro, say from inttypes.h, is placed touching
1643 a string literal it could be parsed as a C++11 user-defined string
1644 literal thus breaking the program.
1645 Try to identify macros with is_macro. A warning is issued. */
1646 if (is_macro (pfile, cur))
76d340ac 1647 {
86c82b61 1648 /* Raise a warning, but do not consume subsequent tokens. */
8947e5dc 1649 if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
76d340ac 1650 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1651 token->src_loc, 0,
1652 "invalid suffix on literal; C++11 requires "
4e8832f3 1653 "a space between literal and string macro");
76d340ac 1654 }
244db24d 1655 /* Grab user defined literal suffix. */
911b08c6 1656 else if (ISIDST (*cur))
244db24d 1657 {
1658 type = cpp_userdef_string_add_type (type);
1659 ++cur;
76d340ac 1660
1661 while (ISIDNUM (*cur))
1662 ++cur;
244db24d 1663 }
244db24d 1664 }
1665
538ba11a 1666 pfile->buffer->cur = cur;
1667 if (first_buff == NULL)
1668 create_literal (pfile, token, base, cur - base, type);
1669 else
1670 {
1671 uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1672
1673 token->type = type;
1674 token->val.str.len = total_len + (cur - base);
1675 token->val.str.text = dest;
1676 last_buff = first_buff;
1677 while (last_buff != NULL)
1678 {
1679 memcpy (dest, last_buff->base,
1680 BUFF_FRONT (last_buff) - last_buff->base);
1681 dest += BUFF_FRONT (last_buff) - last_buff->base;
1682 last_buff = last_buff->next;
1683 }
1684 _cpp_release_buff (pfile, first_buff);
1685 memcpy (dest, base, cur - base);
1686 dest[cur - base] = '\0';
1687 }
1688}
1689
5bb46c08 1690/* Lexes a string, character constant, or angle-bracketed header file
4970d4c2 1691 name. The stored string contains the spelling, including opening
538ba11a 1692 quote and any leading 'L', 'u', 'U' or 'u8' and optional
1693 'R' modifier. It returns the type of the literal, or CPP_OTHER
1694 if it was not properly terminated, or CPP_LESS for an unterminated
1695 header name which must be relexed as normal tokens.
4970d4c2 1696
1697 The spelling is NUL-terminated, but it is not guaranteed that this
1698 is the first NUL since embedded NULs are preserved. */
f80e83a9 1699static void
f7fdd7a1 1700lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
0578f103 1701{
4970d4c2 1702 bool saw_NUL = false;
1703 const uchar *cur;
5bb46c08 1704 cppchar_t terminator;
4970d4c2 1705 enum cpp_ttype type;
1706
1707 cur = base;
1708 terminator = *cur++;
538ba11a 1709 if (terminator == 'L' || terminator == 'U')
4970d4c2 1710 terminator = *cur++;
538ba11a 1711 else if (terminator == 'u')
1712 {
1713 terminator = *cur++;
1714 if (terminator == '8')
1715 terminator = *cur++;
1716 }
1717 if (terminator == 'R')
1718 {
1719 lex_raw_string (pfile, token, base, cur);
1720 return;
1721 }
1722 if (terminator == '"')
924bbf02 1723 type = (*base == 'L' ? CPP_WSTRING :
1724 *base == 'U' ? CPP_STRING32 :
538ba11a 1725 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1726 : CPP_STRING);
4970d4c2 1727 else if (terminator == '\'')
924bbf02 1728 type = (*base == 'L' ? CPP_WCHAR :
1729 *base == 'U' ? CPP_CHAR32 :
1730 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
4970d4c2 1731 else
1732 terminator = '>', type = CPP_HEADER_NAME;
79bd622b 1733
338fa5f7 1734 for (;;)
0578f103 1735 {
4970d4c2 1736 cppchar_t c = *cur++;
4b0c16ee 1737
edaf8cb5 1738 /* In #include-style directives, terminators are not escapable. */
4970d4c2 1739 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1740 cur++;
1741 else if (c == terminator)
5bb46c08 1742 break;
4970d4c2 1743 else if (c == '\n')
338fa5f7 1744 {
4970d4c2 1745 cur--;
7811eab5 1746 /* Unmatched quotes always yield undefined behavior, but
1747 greedy lexing means that what appears to be an unterminated
1748 header name may actually be a legitimate sequence of tokens. */
1749 if (terminator == '>')
1750 {
1751 token->type = CPP_LESS;
1752 return;
1753 }
4970d4c2 1754 type = CPP_OTHER;
1755 break;
0578f103 1756 }
4970d4c2 1757 else if (c == '\0')
1758 saw_NUL = true;
0578f103 1759 }
1760
4970d4c2 1761 if (saw_NUL && !pfile->state.skipping)
d80d2074 1762 cpp_error (pfile, CPP_DL_WARNING,
1763 "null character(s) preserved in literal");
0578f103 1764
0b67f687 1765 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1766 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1767 (int) terminator);
1768
244db24d 1769 if (CPP_OPTION (pfile, user_literals))
1770 {
4e8832f3 1771 /* If a string format macro, say from inttypes.h, is placed touching
1772 a string literal it could be parsed as a C++11 user-defined string
1773 literal thus breaking the program.
1774 Try to identify macros with is_macro. A warning is issued. */
1775 if (is_macro (pfile, cur))
76d340ac 1776 {
86c82b61 1777 /* Raise a warning, but do not consume subsequent tokens. */
8947e5dc 1778 if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
76d340ac 1779 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1780 token->src_loc, 0,
1781 "invalid suffix on literal; C++11 requires "
4e8832f3 1782 "a space between literal and string macro");
76d340ac 1783 }
244db24d 1784 /* Grab user defined literal suffix. */
911b08c6 1785 else if (ISIDST (*cur))
244db24d 1786 {
1787 type = cpp_userdef_char_add_type (type);
1788 type = cpp_userdef_string_add_type (type);
1789 ++cur;
76d340ac 1790
1791 while (ISIDNUM (*cur))
1792 ++cur;
244db24d 1793 }
244db24d 1794 }
1795
4970d4c2 1796 pfile->buffer->cur = cur;
1797 create_literal (pfile, token, base, cur - base, type);
338fa5f7 1798}
f80e83a9 1799
956c6108 1800/* Return the comment table. The client may not make any assumption
1801 about the ordering of the table. */
1802cpp_comment_table *
1803cpp_get_comments (cpp_reader *pfile)
1804{
1805 return &pfile->comments;
1806}
1807
1808/* Append a comment to the end of the comment table. */
1809static void
1810store_comment (cpp_reader *pfile, cpp_token *token)
1811{
1812 int len;
1813
1814 if (pfile->comments.allocated == 0)
1815 {
1816 pfile->comments.allocated = 256;
1817 pfile->comments.entries = (cpp_comment *) xmalloc
1818 (pfile->comments.allocated * sizeof (cpp_comment));
1819 }
1820
1821 if (pfile->comments.count == pfile->comments.allocated)
1822 {
1823 pfile->comments.allocated *= 2;
1824 pfile->comments.entries = (cpp_comment *) xrealloc
1825 (pfile->comments.entries,
1826 pfile->comments.allocated * sizeof (cpp_comment));
1827 }
1828
1829 len = token->val.str.len;
1830
1831 /* Copy comment. Note, token may not be NULL terminated. */
1832 pfile->comments.entries[pfile->comments.count].comment =
1833 (char *) xmalloc (sizeof (char) * (len + 1));
1834 memcpy (pfile->comments.entries[pfile->comments.count].comment,
1835 token->val.str.text, len);
1836 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1837
1838 /* Set source location. */
1839 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1840
1841 /* Increment the count of entries in the comment table. */
1842 pfile->comments.count++;
1843}
1844
79bd622b 1845/* The stored comment includes the comment start and any terminator. */
2c63d6c8 1846static void
f7fdd7a1 1847save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1848 cppchar_t type)
2c63d6c8 1849{
f80e83a9 1850 unsigned char *buffer;
560ab0b2 1851 unsigned int len, clen, i;
b1a9ff83 1852
f0495c2c 1853 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1c124f85 1854
a543b315 1855 /* C++ comments probably (not definitely) have moved past a new
1856 line, which we don't want to save in the comment. */
1c124f85 1857 if (is_vspace (pfile->buffer->cur[-1]))
a543b315 1858 len--;
d3f7919d 1859
560ab0b2 1860 /* If we are currently in a directive or in argument parsing, then
1861 we need to store all C++ comments as C comments internally, and
1862 so we need to allocate a little extra space in that case.
d3f7919d 1863
1864 Note that the only time we encounter a directive here is
1865 when we are saving comments in a "#define". */
560ab0b2 1866 clen = ((pfile->state.in_directive || pfile->state.parsing_args)
1867 && type == '/') ? len + 2 : len;
d3f7919d 1868
1869 buffer = _cpp_unaligned_alloc (pfile, clen);
b1a9ff83 1870
f80e83a9 1871 token->type = CPP_COMMENT;
d3f7919d 1872 token->val.str.len = clen;
338fa5f7 1873 token->val.str.text = buffer;
0578f103 1874
f0495c2c 1875 buffer[0] = '/';
1876 memcpy (buffer + 1, from, len - 1);
d3f7919d 1877
a113df96 1878 /* Finish conversion to a C comment, if necessary. */
560ab0b2 1879 if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
d3f7919d 1880 {
1881 buffer[1] = '*';
1882 buffer[clen - 2] = '*';
1883 buffer[clen - 1] = '/';
560ab0b2 1884 /* As there can be in a C++ comments illegal sequences for C comments
1885 we need to filter them out. */
1886 for (i = 2; i < (clen - 2); i++)
1887 if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
1888 buffer[i] = '|';
d3f7919d 1889 }
956c6108 1890
1891 /* Finally store this comment for use by clients of libcpp. */
1892 store_comment (pfile, token);
338fa5f7 1893}
0578f103 1894
83dcbb5c 1895/* Allocate COUNT tokens for RUN. */
1896void
f7fdd7a1 1897_cpp_init_tokenrun (tokenrun *run, unsigned int count)
83dcbb5c 1898{
3b298764 1899 run->base = XNEWVEC (cpp_token, count);
83dcbb5c 1900 run->limit = run->base + count;
1901 run->next = NULL;
1902}
1903
1904/* Returns the next tokenrun, or creates one if there is none. */
1905static tokenrun *
f7fdd7a1 1906next_tokenrun (tokenrun *run)
83dcbb5c 1907{
1908 if (run->next == NULL)
1909 {
3b298764 1910 run->next = XNEW (tokenrun);
fb5ab82c 1911 run->next->prev = run;
83dcbb5c 1912 _cpp_init_tokenrun (run->next, 250);
1913 }
1914
1915 return run->next;
1916}
1917
a2eb22f0 1918/* Return the number of not yet processed token in a given
ce70f433 1919 context. */
1920int
a2eb22f0 1921_cpp_remaining_tokens_num_in_context (cpp_context *context)
ce70f433 1922{
ce70f433 1923 if (context->tokens_kind == TOKENS_KIND_DIRECT)
ac6130e2 1924 return (LAST (context).token - FIRST (context).token);
ce70f433 1925 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1926 || context->tokens_kind == TOKENS_KIND_EXTENDED)
ac6130e2 1927 return (LAST (context).ptoken - FIRST (context).ptoken);
ce70f433 1928 else
1929 abort ();
1930}
1931
a2eb22f0 1932/* Returns the token present at index INDEX in a given context. If
1933 INDEX is zero, the next token to be processed is returned. */
ce70f433 1934static const cpp_token*
a2eb22f0 1935_cpp_token_from_context_at (cpp_context *context, int index)
ce70f433 1936{
ce70f433 1937 if (context->tokens_kind == TOKENS_KIND_DIRECT)
1938 return &(FIRST (context).token[index]);
1939 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1940 || context->tokens_kind == TOKENS_KIND_EXTENDED)
1941 return FIRST (context).ptoken[index];
1942 else
1943 abort ();
1944}
1945
89768577 1946/* Look ahead in the input stream. */
1947const cpp_token *
1948cpp_peek_token (cpp_reader *pfile, int index)
1949{
1950 cpp_context *context = pfile->context;
1951 const cpp_token *peektok;
1952 int count;
1953
1954 /* First, scan through any pending cpp_context objects. */
1955 while (context->prev)
1956 {
a2eb22f0 1957 ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
89768577 1958
1959 if (index < (int) sz)
a2eb22f0 1960 return _cpp_token_from_context_at (context, index);
89768577 1961 index -= (int) sz;
1962 context = context->prev;
1963 }
1964
1965 /* We will have to read some new tokens after all (and do so
1966 without invalidating preceding tokens). */
1967 count = index;
1968 pfile->keep_tokens++;
1969
1970 do
1971 {
1972 peektok = _cpp_lex_token (pfile);
1973 if (peektok->type == CPP_EOF)
1974 return peektok;
1975 }
1976 while (index--);
1977
1978 _cpp_backup_tokens_direct (pfile, count + 1);
1979 pfile->keep_tokens--;
1980
1981 return peektok;
1982}
1983
f9b5f742 1984/* Allocate a single token that is invalidated at the same time as the
1985 rest of the tokens on the line. Has its line and col set to the
1986 same as the last lexed token, so that diagnostics appear in the
1987 right place. */
1988cpp_token *
f7fdd7a1 1989_cpp_temp_token (cpp_reader *pfile)
f9b5f742 1990{
1991 cpp_token *old, *result;
89768577 1992 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1993 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
f9b5f742 1994
1995 old = pfile->cur_token - 1;
89768577 1996 /* Any pre-existing lookaheads must not be clobbered. */
1997 if (la)
1998 {
1999 if (sz <= la)
2000 {
2001 tokenrun *next = next_tokenrun (pfile->cur_run);
2002
2003 if (sz < la)
2004 memmove (next->base + 1, next->base,
2005 (la - sz) * sizeof (cpp_token));
2006
2007 next->base[0] = pfile->cur_run->limit[-1];
2008 }
2009
2010 if (sz > 1)
2011 memmove (pfile->cur_token + 1, pfile->cur_token,
2012 MIN (la, sz - 1) * sizeof (cpp_token));
2013 }
2014
2015 if (!sz && pfile->cur_token == pfile->cur_run->limit)
f9b5f742 2016 {
2017 pfile->cur_run = next_tokenrun (pfile->cur_run);
2018 pfile->cur_token = pfile->cur_run->base;
2019 }
2020
2021 result = pfile->cur_token++;
610625e3 2022 result->src_loc = old->src_loc;
f9b5f742 2023 return result;
2024}
2025
10b4496a 2026/* Lex a token into RESULT (external interface). Takes care of issues
2027 like directive handling, token lookahead, multiple include
3fb1e43b 2028 optimization and skipping. */
c00e481c 2029const cpp_token *
f7fdd7a1 2030_cpp_lex_token (cpp_reader *pfile)
83dcbb5c 2031{
fb5ab82c 2032 cpp_token *result;
83dcbb5c 2033
fb5ab82c 2034 for (;;)
83dcbb5c 2035 {
fb5ab82c 2036 if (pfile->cur_token == pfile->cur_run->limit)
83dcbb5c 2037 {
fb5ab82c 2038 pfile->cur_run = next_tokenrun (pfile->cur_run);
2039 pfile->cur_token = pfile->cur_run->base;
83dcbb5c 2040 }
e0ff7935 2041 /* We assume that the current token is somewhere in the current
2042 run. */
2043 if (pfile->cur_token < pfile->cur_run->base
2044 || pfile->cur_token >= pfile->cur_run->limit)
2045 abort ();
83dcbb5c 2046
fb5ab82c 2047 if (pfile->lookaheads)
10b4496a 2048 {
2049 pfile->lookaheads--;
2050 result = pfile->cur_token++;
2051 }
fb5ab82c 2052 else
10b4496a 2053 result = _cpp_lex_direct (pfile);
fb5ab82c 2054
2055 if (result->flags & BOL)
83dcbb5c 2056 {
fb5ab82c 2057 /* Is this a directive. If _cpp_handle_directive returns
2058 false, it is an assembler #. */
2059 if (result->type == CPP_HASH
d6af0368 2060 /* 6.10.3 p 11: Directives in a list of macro arguments
2061 gives undefined behavior. This implementation
2062 handles the directive as normal. */
b75b98aa 2063 && pfile->state.parsing_args != 1)
d6d3c909 2064 {
b75b98aa 2065 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
d6d3c909 2066 {
b75b98aa 2067 if (pfile->directive_result.type == CPP_PADDING)
2068 continue;
d6d3c909 2069 result = &pfile->directive_result;
d6d3c909 2070 }
2071 }
b75b98aa 2072 else if (pfile->state.in_deferred_pragma)
2073 result = &pfile->directive_result;
d6d3c909 2074
5621a364 2075 if (pfile->cb.line_change && !pfile->state.skipping)
f7fdd7a1 2076 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
83dcbb5c 2077 }
83dcbb5c 2078
fb5ab82c 2079 /* We don't skip tokens in directives. */
b75b98aa 2080 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
fb5ab82c 2081 break;
83dcbb5c 2082
fb5ab82c 2083 /* Outside a directive, invalidate controlling macros. At file
10b4496a 2084 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
7ef5b942 2085 get here and MI optimization works. */
83dcbb5c 2086 pfile->mi_valid = false;
fb5ab82c 2087
2088 if (!pfile->state.skipping || result->type == CPP_EOF)
2089 break;
83dcbb5c 2090 }
2091
c00e481c 2092 return result;
83dcbb5c 2093}
2094
a54e0bf8 2095/* Returns true if a fresh line has been loaded. */
2096bool
f7fdd7a1 2097_cpp_get_fresh_line (cpp_reader *pfile)
0bb65704 2098{
6e04daf1 2099 int return_at_eof;
2100
a54e0bf8 2101 /* We can't get a new line until we leave the current directive. */
2102 if (pfile->state.in_directive)
2103 return false;
b1a9ff83 2104
a54e0bf8 2105 for (;;)
fb83e0d6 2106 {
a54e0bf8 2107 cpp_buffer *buffer = pfile->buffer;
fb83e0d6 2108
a54e0bf8 2109 if (!buffer->need_line)
2110 return true;
2111
2112 if (buffer->next_line < buffer->rlimit)
0bb65704 2113 {
a54e0bf8 2114 _cpp_clean_line (pfile);
2115 return true;
2116 }
0bb65704 2117
a54e0bf8 2118 /* First, get out of parsing arguments state. */
2119 if (pfile->state.parsing_args)
2120 return false;
2121
2122 /* End of buffer. Non-empty files should end in a newline. */
2123 if (buffer->buf != buffer->rlimit
2124 && buffer->next_line > buffer->rlimit
2125 && !buffer->from_stage3)
2126 {
0448520c 2127 /* Clip to buffer size. */
a54e0bf8 2128 buffer->next_line = buffer->rlimit;
a54e0bf8 2129 }
6e04daf1 2130
2131 return_at_eof = buffer->return_at_eof;
a54e0bf8 2132 _cpp_pop_buffer (pfile);
6e04daf1 2133 if (pfile->buffer == NULL || return_at_eof)
11b5269c 2134 return false;
a54e0bf8 2135 }
0bb65704 2136}
2137
edaf8cb5 2138#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
2139 do \
2140 { \
2141 result->type = ELSE_TYPE; \
2142 if (*buffer->cur == CHAR) \
2143 buffer->cur++, result->type = THEN_TYPE; \
2144 } \
2145 while (0)
1c124f85 2146
10b4496a 2147/* Lex a token into pfile->cur_token, which is also incremented, to
2148 get diagnostics pointing to the correct location.
2149
2150 Does not handle issues such as token lookahead, multiple-include
4172d65e 2151 optimization, directives, skipping etc. This function is only
10b4496a 2152 suitable for use by _cpp_lex_token, and in special cases like
2153 lex_expansion_token which doesn't care for any of these issues.
2154
2155 When meeting a newline, returns CPP_EOF if parsing a directive,
2156 otherwise returns to the start of the token buffer if permissible.
2157 Returns the location of the lexed token. */
2158cpp_token *
f7fdd7a1 2159_cpp_lex_direct (cpp_reader *pfile)
0578f103 2160{
338fa5f7 2161 cppchar_t c;
230f0943 2162 cpp_buffer *buffer;
338fa5f7 2163 const unsigned char *comment_start;
10b4496a 2164 cpp_token *result = pfile->cur_token++;
0653b94e 2165
83dcbb5c 2166 fresh_line:
a54e0bf8 2167 result->flags = 0;
82166c5c 2168 buffer = pfile->buffer;
11b5269c 2169 if (buffer->need_line)
a54e0bf8 2170 {
b75b98aa 2171 if (pfile->state.in_deferred_pragma)
2172 {
2173 result->type = CPP_PRAGMA_EOL;
2174 pfile->state.in_deferred_pragma = false;
2175 if (!pfile->state.pragma_allow_expansion)
2176 pfile->state.prevent_expansion--;
2177 return result;
2178 }
a54e0bf8 2179 if (!_cpp_get_fresh_line (pfile))
2180 {
2181 result->type = CPP_EOF;
2908f819 2182 if (!pfile->state.in_directive)
2183 {
2184 /* Tell the compiler the line number of the EOF token. */
dbddc569 2185 result->src_loc = pfile->line_table->highest_line;
2908f819 2186 result->flags = BOL;
2187 }
a54e0bf8 2188 return result;
2189 }
2190 if (!pfile->keep_tokens)
2191 {
2192 pfile->cur_run = &pfile->base_run;
2193 result = pfile->base_run.base;
2194 pfile->cur_token = result + 1;
2195 }
2196 result->flags = BOL;
2197 if (pfile->state.parsing_args == 2)
2198 result->flags |= PREV_WHITE;
2199 }
11b5269c 2200 buffer = pfile->buffer;
83dcbb5c 2201 update_tokens_line:
dbddc569 2202 result->src_loc = pfile->line_table->highest_line;
f80e83a9 2203
83dcbb5c 2204 skipped_white:
a54e0bf8 2205 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2206 && !pfile->overlaid_buffer)
2207 {
2208 _cpp_process_line_notes (pfile, false);
dbddc569 2209 result->src_loc = pfile->line_table->highest_line;
a54e0bf8 2210 }
1c124f85 2211 c = *buffer->cur++;
610625e3 2212
6ea2c7a3 2213 if (pfile->forced_token_location_p)
2214 result->src_loc = *pfile->forced_token_location_p;
2215 else
2216 result->src_loc = linemap_position_for_column (pfile->line_table,
2217 CPP_BUF_COLUMN (buffer, buffer->cur));
83dcbb5c 2218
338fa5f7 2219 switch (c)
0578f103 2220 {
435fb09b 2221 case ' ': case '\t': case '\f': case '\v': case '\0':
2222 result->flags |= PREV_WHITE;
a54e0bf8 2223 skip_whitespace (pfile, c);
2224 goto skipped_white;
338fa5f7 2225
a54e0bf8 2226 case '\n':
610625e3 2227 if (buffer->cur < buffer->rlimit)
2228 CPP_INCREMENT_LINE (pfile, 0);
a54e0bf8 2229 buffer->need_line = true;
2230 goto fresh_line;
732cb4c9 2231
338fa5f7 2232 case '0': case '1': case '2': case '3': case '4':
2233 case '5': case '6': case '7': case '8': case '9':
bce47149 2234 {
2235 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2236 result->type = CPP_NUMBER;
2237 lex_number (pfile, &result->val.str, &nst);
2238 warn_about_normalization (pfile, result, &nst);
2239 break;
2240 }
732cb4c9 2241
78c551ad 2242 case 'L':
924bbf02 2243 case 'u':
2244 case 'U':
538ba11a 2245 case 'R':
2246 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2247 wide strings or raw strings. */
6f6f3dd7 2248 if (c == 'L' || CPP_OPTION (pfile, rliterals)
2249 || (c != 'R' && CPP_OPTION (pfile, uliterals)))
5bb46c08 2250 {
538ba11a 2251 if ((*buffer->cur == '\'' && c != 'R')
2252 || *buffer->cur == '"'
2253 || (*buffer->cur == 'R'
2254 && c != 'R'
2255 && buffer->cur[1] == '"'
6f6f3dd7 2256 && CPP_OPTION (pfile, rliterals))
538ba11a 2257 || (*buffer->cur == '8'
2258 && c == 'u'
2259 && (buffer->cur[1] == '"'
6f6f3dd7 2260 || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2261 && CPP_OPTION (pfile, rliterals)))))
924bbf02 2262 {
2263 lex_string (pfile, result, buffer->cur - 1);
2264 break;
2265 }
5bb46c08 2266 }
b1a9ff83 2267 /* Fall through. */
78c551ad 2268
338fa5f7 2269 case '_':
2270 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2271 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2272 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
924bbf02 2273 case 's': case 't': case 'v': case 'w': case 'x':
338fa5f7 2274 case 'y': case 'z':
2275 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
78c551ad 2276 case 'G': case 'H': case 'I': case 'J': case 'K':
538ba11a 2277 case 'M': case 'N': case 'O': case 'P': case 'Q':
924bbf02 2278 case 'S': case 'T': case 'V': case 'W': case 'X':
338fa5f7 2279 case 'Y': case 'Z':
2280 result->type = CPP_NAME;
bce47149 2281 {
2282 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2ee04baa 2283 result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2284 &nst);
bce47149 2285 warn_about_normalization (pfile, result, &nst);
2286 }
338fa5f7 2287
338fa5f7 2288 /* Convert named operators to their proper types. */
2ee04baa 2289 if (result->val.node.node->flags & NODE_OPERATOR)
338fa5f7 2290 {
2291 result->flags |= NAMED_OP;
2ee04baa 2292 result->type = (enum cpp_ttype) result->val.node.node->directive_index;
338fa5f7 2293 }
2294 break;
2295
2296 case '\'':
2297 case '"':
4970d4c2 2298 lex_string (pfile, result, buffer->cur - 1);
338fa5f7 2299 break;
f80e83a9 2300
338fa5f7 2301 case '/':
f0495c2c 2302 /* A potential block or line comment. */
2303 comment_start = buffer->cur;
edaf8cb5 2304 c = *buffer->cur;
2305
f0495c2c 2306 if (c == '*')
2307 {
a54e0bf8 2308 if (_cpp_skip_block_comment (pfile))
d80d2074 2309 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
338fa5f7 2310 }
1c124f85 2311 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
610625e3 2312 || cpp_in_system_header (pfile)))
338fa5f7 2313 {
5db5d057 2314 /* Warn about comments only if pedantically GNUC89, and not
2315 in system headers. */
2316 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
66914e49 2317 && ! buffer->warned_cplusplus_comments)
f80e83a9 2318 {
d80d2074 2319 cpp_error (pfile, CPP_DL_PEDWARN,
ba059ac0 2320 "C++ style comments are not allowed in ISO C90");
d80d2074 2321 cpp_error (pfile, CPP_DL_PEDWARN,
73328dce 2322 "(this will be reported only once per input file)");
f0495c2c 2323 buffer->warned_cplusplus_comments = 1;
2324 }
338fa5f7 2325
e1caf668 2326 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
3a79f5da 2327 cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
f0495c2c 2328 }
1c124f85 2329 else if (c == '=')
2330 {
edaf8cb5 2331 buffer->cur++;
1c124f85 2332 result->type = CPP_DIV_EQ;
2333 break;
2334 }
2335 else
2336 {
1c124f85 2337 result->type = CPP_DIV;
2338 break;
2339 }
338fa5f7 2340
f0495c2c 2341 if (!pfile->state.save_comments)
2342 {
2343 result->flags |= PREV_WHITE;
83dcbb5c 2344 goto update_tokens_line;
338fa5f7 2345 }
f0495c2c 2346
2347 /* Save the comment as a token in its own right. */
d3f7919d 2348 save_comment (pfile, result, comment_start, c);
fb5ab82c 2349 break;
338fa5f7 2350
2351 case '<':
2352 if (pfile->state.angled_headers)
2353 {
4970d4c2 2354 lex_string (pfile, result, buffer->cur - 1);
7811eab5 2355 if (result->type != CPP_LESS)
2356 break;
338fa5f7 2357 }
0578f103 2358
edaf8cb5 2359 result->type = CPP_LESS;
2360 if (*buffer->cur == '=')
2361 buffer->cur++, result->type = CPP_LESS_EQ;
2362 else if (*buffer->cur == '<')
338fa5f7 2363 {
edaf8cb5 2364 buffer->cur++;
2365 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
338fa5f7 2366 }
edaf8cb5 2367 else if (CPP_OPTION (pfile, digraphs))
1c124f85 2368 {
edaf8cb5 2369 if (*buffer->cur == ':')
2370 {
1aa79d39 2371 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2372 three characters are <:: and the subsequent character
2373 is neither : nor >, the < is treated as a preprocessor
2374 token by itself". */
2375 if (CPP_OPTION (pfile, cplusplus)
1638c736 2376 && CPP_OPTION (pfile, lang) != CLK_CXX98
2377 && CPP_OPTION (pfile, lang) != CLK_GNUCXX
1aa79d39 2378 && buffer->cur[1] == ':'
2379 && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2380 break;
2381
edaf8cb5 2382 buffer->cur++;
2383 result->flags |= DIGRAPH;
2384 result->type = CPP_OPEN_SQUARE;
2385 }
2386 else if (*buffer->cur == '%')
2387 {
2388 buffer->cur++;
2389 result->flags |= DIGRAPH;
2390 result->type = CPP_OPEN_BRACE;
2391 }
1c124f85 2392 }
338fa5f7 2393 break;
2394
2395 case '>':
edaf8cb5 2396 result->type = CPP_GREATER;
2397 if (*buffer->cur == '=')
2398 buffer->cur++, result->type = CPP_GREATER_EQ;
2399 else if (*buffer->cur == '>')
338fa5f7 2400 {
edaf8cb5 2401 buffer->cur++;
2402 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2403 }
338fa5f7 2404 break;
2405
f669338a 2406 case '%':
edaf8cb5 2407 result->type = CPP_MOD;
2408 if (*buffer->cur == '=')
2409 buffer->cur++, result->type = CPP_MOD_EQ;
2410 else if (CPP_OPTION (pfile, digraphs))
1c124f85 2411 {
edaf8cb5 2412 if (*buffer->cur == ':')
1c124f85 2413 {
edaf8cb5 2414 buffer->cur++;
2415 result->flags |= DIGRAPH;
2416 result->type = CPP_HASH;
2417 if (*buffer->cur == '%' && buffer->cur[1] == ':')
2ee04baa 2418 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
edaf8cb5 2419 }
2420 else if (*buffer->cur == '>')
2421 {
2422 buffer->cur++;
2423 result->flags |= DIGRAPH;
2424 result->type = CPP_CLOSE_BRACE;
1c124f85 2425 }
1c124f85 2426 }
338fa5f7 2427 break;
2428
f669338a 2429 case '.':
1c124f85 2430 result->type = CPP_DOT;
edaf8cb5 2431 if (ISDIGIT (*buffer->cur))
1c124f85 2432 {
bce47149 2433 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1c124f85 2434 result->type = CPP_NUMBER;
bce47149 2435 lex_number (pfile, &result->val.str, &nst);
2436 warn_about_normalization (pfile, result, &nst);
1c124f85 2437 }
edaf8cb5 2438 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2439 buffer->cur += 2, result->type = CPP_ELLIPSIS;
2440 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2441 buffer->cur++, result->type = CPP_DOT_STAR;
338fa5f7 2442 break;
0578f103 2443
338fa5f7 2444 case '+':
edaf8cb5 2445 result->type = CPP_PLUS;
2446 if (*buffer->cur == '+')
2447 buffer->cur++, result->type = CPP_PLUS_PLUS;
2448 else if (*buffer->cur == '=')
2449 buffer->cur++, result->type = CPP_PLUS_EQ;
338fa5f7 2450 break;
ac0749c7 2451
338fa5f7 2452 case '-':
edaf8cb5 2453 result->type = CPP_MINUS;
2454 if (*buffer->cur == '>')
338fa5f7 2455 {
edaf8cb5 2456 buffer->cur++;
1c124f85 2457 result->type = CPP_DEREF;
edaf8cb5 2458 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2459 buffer->cur++, result->type = CPP_DEREF_STAR;
1c124f85 2460 }
edaf8cb5 2461 else if (*buffer->cur == '-')
2462 buffer->cur++, result->type = CPP_MINUS_MINUS;
2463 else if (*buffer->cur == '=')
2464 buffer->cur++, result->type = CPP_MINUS_EQ;
338fa5f7 2465 break;
0578f103 2466
338fa5f7 2467 case '&':
edaf8cb5 2468 result->type = CPP_AND;
2469 if (*buffer->cur == '&')
2470 buffer->cur++, result->type = CPP_AND_AND;
2471 else if (*buffer->cur == '=')
2472 buffer->cur++, result->type = CPP_AND_EQ;
338fa5f7 2473 break;
b1a9ff83 2474
338fa5f7 2475 case '|':
edaf8cb5 2476 result->type = CPP_OR;
2477 if (*buffer->cur == '|')
2478 buffer->cur++, result->type = CPP_OR_OR;
2479 else if (*buffer->cur == '=')
2480 buffer->cur++, result->type = CPP_OR_EQ;
338fa5f7 2481 break;
0578f103 2482
338fa5f7 2483 case ':':
edaf8cb5 2484 result->type = CPP_COLON;
2485 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2486 buffer->cur++, result->type = CPP_SCOPE;
2487 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
338fa5f7 2488 {
edaf8cb5 2489 buffer->cur++;
338fa5f7 2490 result->flags |= DIGRAPH;
1c124f85 2491 result->type = CPP_CLOSE_SQUARE;
2492 }
338fa5f7 2493 break;
0578f103 2494
1c124f85 2495 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2496 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2497 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2498 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2ee04baa 2499 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
1c124f85 2500
a54e0bf8 2501 case '?': result->type = CPP_QUERY; break;
338fa5f7 2502 case '~': result->type = CPP_COMPL; break;
2503 case ',': result->type = CPP_COMMA; break;
2504 case '(': result->type = CPP_OPEN_PAREN; break;
2505 case ')': result->type = CPP_CLOSE_PAREN; break;
2506 case '[': result->type = CPP_OPEN_SQUARE; break;
2507 case ']': result->type = CPP_CLOSE_SQUARE; break;
2508 case '{': result->type = CPP_OPEN_BRACE; break;
2509 case '}': result->type = CPP_CLOSE_BRACE; break;
2510 case ';': result->type = CPP_SEMICOLON; break;
2511
7fd957fe 2512 /* @ is a punctuator in Objective-C. */
9ee99ac6 2513 case '@': result->type = CPP_ATSIGN; break;
338fa5f7 2514
78c551ad 2515 case '$':
2cbf1359 2516 case '\\':
2517 {
2518 const uchar *base = --buffer->cur;
bce47149 2519 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
78c551ad 2520
bce47149 2521 if (forms_identifier_p (pfile, true, &nst))
2cbf1359 2522 {
2523 result->type = CPP_NAME;
2ee04baa 2524 result->val.node.node = lex_identifier (pfile, base, true, &nst);
bce47149 2525 warn_about_normalization (pfile, result, &nst);
2cbf1359 2526 break;
2527 }
2528 buffer->cur++;
bc205914 2529 }
2cbf1359 2530
bc205914 2531 default:
4970d4c2 2532 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2533 break;
338fa5f7 2534 }
fb5ab82c 2535
2536 return result;
338fa5f7 2537}
2538
b1280514 2539/* An upper bound on the number of bytes needed to spell TOKEN.
2540 Does not include preceding whitespace. */
79bd622b 2541unsigned int
f7fdd7a1 2542cpp_token_len (const cpp_token *token)
338fa5f7 2543{
79bd622b 2544 unsigned int len;
cfad5579 2545
79bd622b 2546 switch (TOKEN_SPELL (token))
f80e83a9 2547 {
cd740bd5 2548 default: len = 6; break;
4970d4c2 2549 case SPELL_LITERAL: len = token->val.str.len; break;
2ee04baa 2550 case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break;
f80e83a9 2551 }
b1280514 2552
2553 return len;
cfad5579 2554}
2555
bb1fa6bb 2556/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2557 Return the number of bytes read out of NAME. (There are always
2558 10 bytes written to BUFFER.) */
2559
2560static size_t
2561utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2562{
2563 int j;
2564 int ucn_len = 0;
2565 int ucn_len_c;
2566 unsigned t;
2567 unsigned long utf32;
2568
2569 /* Compute the length of the UTF-8 sequence. */
2570 for (t = *name; t & 0x80; t <<= 1)
2571 ucn_len++;
2572
2573 utf32 = *name & (0x7F >> ucn_len);
2574 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2575 {
2576 utf32 = (utf32 << 6) | (*++name & 0x3F);
2577
2578 /* Ill-formed UTF-8. */
2579 if ((*name & ~0x3F) != 0x80)
2580 abort ();
2581 }
2582
2583 *buffer++ = '\\';
2584 *buffer++ = 'U';
2585 for (j = 7; j >= 0; j--)
2586 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2587 return ucn_len;
2588}
2589
ba99525e 2590/* Given a token TYPE corresponding to a digraph, return a pointer to
2591 the spelling of the digraph. */
2592static const unsigned char *
2593cpp_digraph2name (enum cpp_ttype type)
2594{
2595 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2596}
bb1fa6bb 2597
f80e83a9 2598/* Write the spelling of a token TOKEN to BUFFER. The buffer must
c5ea33a8 2599 already contain the enough space to hold the token's spelling.
f7fdd7a1 2600 Returns a pointer to the character after the last character written.
bb1fa6bb 2601 FORSTRING is true if this is to be the spelling after translation
2602 phase 1 (this is different for UCNs).
f7fdd7a1 2603 FIXME: Would be nice if we didn't need the PFILE argument. */
79bd622b 2604unsigned char *
f7fdd7a1 2605cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
bb1fa6bb 2606 unsigned char *buffer, bool forstring)
f80e83a9 2607{
7e842f95 2608 switch (TOKEN_SPELL (token))
f80e83a9 2609 {
2610 case SPELL_OPERATOR:
2611 {
2612 const unsigned char *spelling;
2613 unsigned char c;
ab12a39c 2614
f80e83a9 2615 if (token->flags & DIGRAPH)
ba99525e 2616 spelling = cpp_digraph2name (token->type);
31674461 2617 else if (token->flags & NAMED_OP)
2618 goto spell_ident;
f80e83a9 2619 else
7e842f95 2620 spelling = TOKEN_NAME (token);
b1a9ff83 2621
f80e83a9 2622 while ((c = *spelling++) != '\0')
2623 *buffer++ = c;
2624 }
2625 break;
ab12a39c 2626
8d27e472 2627 spell_ident:
f80e83a9 2628 case SPELL_IDENT:
bb1fa6bb 2629 if (forstring)
2630 {
2ee04baa 2631 memcpy (buffer, NODE_NAME (token->val.node.node),
2632 NODE_LEN (token->val.node.node));
2633 buffer += NODE_LEN (token->val.node.node);
bb1fa6bb 2634 }
2635 else
2636 {
2637 size_t i;
2ee04baa 2638 const unsigned char * name = NODE_NAME (token->val.node.node);
bb1fa6bb 2639
2ee04baa 2640 for (i = 0; i < NODE_LEN (token->val.node.node); i++)
bb1fa6bb 2641 if (name[i] & ~0x7F)
2642 {
2643 i += utf8_to_ucn (buffer, name + i) - 1;
2644 buffer += 10;
2645 }
2646 else
2ee04baa 2647 *buffer++ = NODE_NAME (token->val.node.node)[i];
bb1fa6bb 2648 }
f80e83a9 2649 break;
ab12a39c 2650
4970d4c2 2651 case SPELL_LITERAL:
8d27e472 2652 memcpy (buffer, token->val.str.text, token->val.str.len);
2653 buffer += token->val.str.len;
2654 break;
2655
f80e83a9 2656 case SPELL_NONE:
d80d2074 2657 cpp_error (pfile, CPP_DL_ICE,
2658 "unspellable token %s", TOKEN_NAME (token));
f80e83a9 2659 break;
2660 }
ab12a39c 2661
f80e83a9 2662 return buffer;
2663}
ab12a39c 2664
e484a1cc 2665/* Returns TOKEN spelt as a null-terminated string. The string is
2666 freed when the reader is destroyed. Useful for diagnostics. */
79bd622b 2667unsigned char *
f7fdd7a1 2668cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
b1280514 2669{
2670 unsigned int len = cpp_token_len (token) + 1;
1fdf6039 2671 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
6060326b 2672
bb1fa6bb 2673 end = cpp_spell_token (pfile, token, start, false);
79bd622b 2674 end[0] = '\0';
6060326b 2675
79bd622b 2676 return start;
2677}
6060326b 2678
ba99525e 2679/* Returns a pointer to a string which spells the token defined by
2680 TYPE and FLAGS. Used by C front ends, which really should move to
2681 using cpp_token_as_text. */
79bd622b 2682const char *
ba99525e 2683cpp_type2name (enum cpp_ttype type, unsigned char flags)
79bd622b 2684{
ba99525e 2685 if (flags & DIGRAPH)
2686 return (const char *) cpp_digraph2name (type);
2687 else if (flags & NAMED_OP)
2688 return cpp_named_operator2name (type);
2689
79bd622b 2690 return (const char *) token_spellings[type].name;
2691}
6060326b 2692
f9b5f742 2693/* Writes the spelling of token to FP, without any preceding space.
2694 Separated from cpp_spell_token for efficiency - to avoid stdio
2695 double-buffering. */
79bd622b 2696void
f7fdd7a1 2697cpp_output_token (const cpp_token *token, FILE *fp)
79bd622b 2698{
79bd622b 2699 switch (TOKEN_SPELL (token))
6060326b 2700 {
79bd622b 2701 case SPELL_OPERATOR:
2702 {
2703 const unsigned char *spelling;
28874558 2704 int c;
6060326b 2705
79bd622b 2706 if (token->flags & DIGRAPH)
ba99525e 2707 spelling = cpp_digraph2name (token->type);
79bd622b 2708 else if (token->flags & NAMED_OP)
2709 goto spell_ident;
2710 else
2711 spelling = TOKEN_NAME (token);
f80e83a9 2712
28874558 2713 c = *spelling;
2714 do
2715 putc (c, fp);
2716 while ((c = *++spelling) != '\0');
79bd622b 2717 }
2718 break;
f80e83a9 2719
79bd622b 2720 spell_ident:
2721 case SPELL_IDENT:
bb1fa6bb 2722 {
2723 size_t i;
2ee04baa 2724 const unsigned char * name = NODE_NAME (token->val.node.node);
bb1fa6bb 2725
2ee04baa 2726 for (i = 0; i < NODE_LEN (token->val.node.node); i++)
bb1fa6bb 2727 if (name[i] & ~0x7F)
2728 {
2729 unsigned char buffer[10];
2730 i += utf8_to_ucn (buffer, name + i) - 1;
2731 fwrite (buffer, 1, 10, fp);
2732 }
2733 else
2ee04baa 2734 fputc (NODE_NAME (token->val.node.node)[i], fp);
bb1fa6bb 2735 }
2736 break;
f80e83a9 2737
4970d4c2 2738 case SPELL_LITERAL:
8d27e472 2739 fwrite (token->val.str.text, 1, token->val.str.len, fp);
2740 break;
2741
79bd622b 2742 case SPELL_NONE:
2743 /* An error, most probably. */
2744 break;
f80e83a9 2745 }
6060326b 2746}
2747
79bd622b 2748/* Compare two tokens. */
2749int
f7fdd7a1 2750_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
6060326b 2751{
79bd622b 2752 if (a->type == b->type && a->flags == b->flags)
2753 switch (TOKEN_SPELL (a))
2754 {
2755 default: /* Keep compiler happy. */
2756 case SPELL_OPERATOR:
2ee04baa 2757 /* token_no is used to track where multiple consecutive ##
941f2388 2758 tokens were originally located. */
2ee04baa 2759 return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
79bd622b 2760 case SPELL_NONE:
2ee04baa 2761 return (a->type != CPP_MACRO_ARG
2762 || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
79bd622b 2763 case SPELL_IDENT:
2ee04baa 2764 return a->val.node.node == b->val.node.node;
4970d4c2 2765 case SPELL_LITERAL:
79bd622b 2766 return (a->val.str.len == b->val.str.len
2767 && !memcmp (a->val.str.text, b->val.str.text,
2768 a->val.str.len));
2769 }
6060326b 2770
f80e83a9 2771 return 0;
2772}
2773
79bd622b 2774/* Returns nonzero if a space should be inserted to avoid an
2775 accidental token paste for output. For simplicity, it is
2776 conservative, and occasionally advises a space where one is not
2777 needed, e.g. "." and ".2". */
79bd622b 2778int
f7fdd7a1 2779cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2780 const cpp_token *token2)
6060326b 2781{
79bd622b 2782 enum cpp_ttype a = token1->type, b = token2->type;
2783 cppchar_t c;
6060326b 2784
79bd622b 2785 if (token1->flags & NAMED_OP)
2786 a = CPP_NAME;
2787 if (token2->flags & NAMED_OP)
2788 b = CPP_NAME;
6060326b 2789
79bd622b 2790 c = EOF;
2791 if (token2->flags & DIGRAPH)
ee6c4e4b 2792 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
79bd622b 2793 else if (token_spellings[b].category == SPELL_OPERATOR)
2794 c = token_spellings[b].name[0];
6060326b 2795
79bd622b 2796 /* Quickly get everything that can paste with an '='. */
ee6c4e4b 2797 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
79bd622b 2798 return 1;
6060326b 2799
79bd622b 2800 switch (a)
6060326b 2801 {
e58c07f7 2802 case CPP_GREATER: return c == '>';
2803 case CPP_LESS: return c == '<' || c == '%' || c == ':';
79bd622b 2804 case CPP_PLUS: return c == '+';
2805 case CPP_MINUS: return c == '-' || c == '>';
2806 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
2807 case CPP_MOD: return c == ':' || c == '>';
2808 case CPP_AND: return c == '&';
2809 case CPP_OR: return c == '|';
2810 case CPP_COLON: return c == ':' || c == '>';
2811 case CPP_DEREF: return c == '*';
efdcc728 2812 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
79bd622b 2813 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
2814 case CPP_NAME: return ((b == CPP_NUMBER
2815 && name_p (pfile, &token2->val.str))
2816 || b == CPP_NAME
2817 || b == CPP_CHAR || b == CPP_STRING); /* L */
2818 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
2819 || c == '.' || c == '+' || c == '-');
2cbf1359 2820 /* UCNs */
bc205914 2821 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
2822 && b == CPP_NAME)
2cbf1359 2823 || (CPP_OPTION (pfile, objc)
bc205914 2824 && token1->val.str.text[0] == '@'
2cbf1359 2825 && (b == CPP_NAME || b == CPP_STRING)));
bd285415 2826 case CPP_STRING:
2827 case CPP_WSTRING:
2828 case CPP_UTF8STRING:
2829 case CPP_STRING16:
2830 case CPP_STRING32: return (CPP_OPTION (pfile, user_literals)
2831 && (b == CPP_NAME
2832 || (TOKEN_SPELL (token2) == SPELL_LITERAL
2833 && ISIDST (token2->val.str.text[0]))));
2834
79bd622b 2835 default: break;
6060326b 2836 }
6060326b 2837
deb356cf 2838 return 0;
6060326b 2839}
2840
79bd622b 2841/* Output all the remaining tokens on the current line, and a newline
f9b5f742 2842 character, to FP. Leading whitespace is removed. If there are
2843 macros, special token padding is not performed. */
6060326b 2844void
f7fdd7a1 2845cpp_output_line (cpp_reader *pfile, FILE *fp)
6060326b 2846{
f9b5f742 2847 const cpp_token *token;
7e842f95 2848
f9b5f742 2849 token = cpp_get_token (pfile);
2850 while (token->type != CPP_EOF)
7e842f95 2851 {
f9b5f742 2852 cpp_output_token (token, fp);
2853 token = cpp_get_token (pfile);
2854 if (token->flags & PREV_WHITE)
2855 putc (' ', fp);
7e842f95 2856 }
2857
79bd622b 2858 putc ('\n', fp);
f80e83a9 2859}
6060326b 2860
c0770282 2861/* Return a string representation of all the remaining tokens on the
2862 current line. The result is allocated using xmalloc and must be
2863 freed by the caller. */
2864unsigned char *
2865cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
2866{
2867 const cpp_token *token;
2868 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
2869 unsigned int alloced = 120 + out;
2870 unsigned char *result = (unsigned char *) xmalloc (alloced);
2871
2872 /* If DIR_NAME is empty, there are no initial contents. */
2873 if (dir_name)
2874 {
2875 sprintf ((char *) result, "#%s ", dir_name);
2876 out += 2;
2877 }
2878
2879 token = cpp_get_token (pfile);
2880 while (token->type != CPP_EOF)
2881 {
2882 unsigned char *last;
2883 /* Include room for a possible space and the terminating nul. */
2884 unsigned int len = cpp_token_len (token) + 2;
2885
2886 if (out + len > alloced)
2887 {
2888 alloced *= 2;
2889 if (out + len > alloced)
2890 alloced = out + len;
2891 result = (unsigned char *) xrealloc (result, alloced);
2892 }
2893
2894 last = cpp_spell_token (pfile, token, &result[out], 0);
2895 out = last - result;
2896
2897 token = cpp_get_token (pfile);
2898 if (token->flags & PREV_WHITE)
2899 result[out++] = ' ';
2900 }
2901
2902 result[out] = '\0';
2903 return result;
2904}
2905
084163dc 2906/* Memory buffers. Changing these three constants can have a dramatic
2907 effect on performance. The values here are reasonable defaults,
2908 but might be tuned. If you adjust them, be sure to test across a
2909 range of uses of cpplib, including heavy nested function-like macro
2910 expansion. Also check the change in peak memory usage (NJAMD is a
2911 good tool for this). */
2912#define MIN_BUFF_SIZE 8000
1e0ef2fd 2913#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
084163dc 2914#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2915 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
deb356cf 2916
1e0ef2fd 2917#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2918 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2919#endif
2920
1785b647 2921/* Create a new allocation buffer. Place the control block at the end
2922 of the buffer, so that buffer overflows will cause immediate chaos. */
06c92cbc 2923static _cpp_buff *
f7fdd7a1 2924new_buff (size_t len)
06c92cbc 2925{
2926 _cpp_buff *result;
1fdf6039 2927 unsigned char *base;
06c92cbc 2928
084163dc 2929 if (len < MIN_BUFF_SIZE)
2930 len = MIN_BUFF_SIZE;
198b48a0 2931 len = CPP_ALIGN (len);
06c92cbc 2932
61ed1f10 2933#ifdef ENABLE_VALGRIND_CHECKING
2934 /* Valgrind warns about uses of interior pointers, so put _cpp_buff
2935 struct first. */
2936 size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
2937 base = XNEWVEC (unsigned char, len + slen);
2938 result = (_cpp_buff *) base;
2939 base += slen;
2940#else
720aca92 2941 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
06c92cbc 2942 result = (_cpp_buff *) (base + len);
61ed1f10 2943#endif
06c92cbc 2944 result->base = base;
2945 result->cur = base;
2946 result->limit = base + len;
2947 result->next = NULL;
2948 return result;
2949}
2950
2951/* Place a chain of unwanted allocation buffers on the free list. */
2952void
f7fdd7a1 2953_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
06c92cbc 2954{
2955 _cpp_buff *end = buff;
2956
2957 while (end->next)
2958 end = end->next;
2959 end->next = pfile->free_buffs;
2960 pfile->free_buffs = buff;
2961}
2962
2963/* Return a free buffer of size at least MIN_SIZE. */
2964_cpp_buff *
f7fdd7a1 2965_cpp_get_buff (cpp_reader *pfile, size_t min_size)
06c92cbc 2966{
2967 _cpp_buff *result, **p;
2968
2969 for (p = &pfile->free_buffs;; p = &(*p)->next)
2970 {
4b31a107 2971 size_t size;
084163dc 2972
2973 if (*p == NULL)
06c92cbc 2974 return new_buff (min_size);
084163dc 2975 result = *p;
2976 size = result->limit - result->base;
2977 /* Return a buffer that's big enough, but don't waste one that's
2978 way too big. */
4085c149 2979 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
06c92cbc 2980 break;
2981 }
2982
2983 *p = result->next;
2984 result->next = NULL;
2985 result->cur = result->base;
2986 return result;
2987}
2988
20dd417a 2989/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 2990 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2991 the excess bytes to the new buffer. Chains the new buffer after
2992 BUFF, and returns the new buffer. */
06c92cbc 2993_cpp_buff *
f7fdd7a1 2994_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
06c92cbc 2995{
4b31a107 2996 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
e6a5f963 2997 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
06c92cbc 2998
e6a5f963 2999 buff->next = new_buff;
3000 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
3001 return new_buff;
3002}
3003
20dd417a 3004/* Creates a new buffer with enough space to hold the uncommitted
e6a5f963 3005 remaining bytes of the buffer pointed to by BUFF, and at least
3006 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
3007 Chains the new buffer before the buffer pointed to by BUFF, and
3008 updates the pointer to point to the new buffer. */
3009void
f7fdd7a1 3010_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
e6a5f963 3011{
3012 _cpp_buff *new_buff, *old_buff = *pbuff;
3013 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
3014
3015 new_buff = _cpp_get_buff (pfile, size);
3016 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
3017 new_buff->next = old_buff;
3018 *pbuff = new_buff;
06c92cbc 3019}
3020
3021/* Free a chain of buffers starting at BUFF. */
3022void
f82b06e0 3023_cpp_free_buff (_cpp_buff *buff)
06c92cbc 3024{
3025 _cpp_buff *next;
3026
3027 for (; buff; buff = next)
3028 {
3029 next = buff->next;
61ed1f10 3030#ifdef ENABLE_VALGRIND_CHECKING
3031 free (buff);
3032#else
06c92cbc 3033 free (buff->base);
61ed1f10 3034#endif
06c92cbc 3035 }
3036}
deb356cf 3037
1fdf6039 3038/* Allocate permanent, unaligned storage of length LEN. */
3039unsigned char *
f7fdd7a1 3040_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1fdf6039 3041{
3042 _cpp_buff *buff = pfile->u_buff;
3043 unsigned char *result = buff->cur;
3044
3045 if (len > (size_t) (buff->limit - result))
3046 {
3047 buff = _cpp_get_buff (pfile, len);
3048 buff->next = pfile->u_buff;
3049 pfile->u_buff = buff;
3050 result = buff->cur;
3051 }
3052
3053 buff->cur = result + len;
3054 return result;
3055}
3056
1e0ef2fd 3057/* Allocate permanent, unaligned storage of length LEN from a_buff.
3058 That buffer is used for growing allocations when saving macro
3059 replacement lists in a #define, and when parsing an answer to an
3060 assertion in #assert, #unassert or #if (and therefore possibly
3061 whilst expanding macros). It therefore must not be used by any
3062 code that they might call: specifically the lexer and the guts of
3063 the macro expander.
3064
3065 All existing other uses clearly fit this restriction: storing
3066 registered pragmas during initialization. */
79bd622b 3067unsigned char *
f7fdd7a1 3068_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
89b05ef6 3069{
e6a5f963 3070 _cpp_buff *buff = pfile->a_buff;
3071 unsigned char *result = buff->cur;
89b05ef6 3072
e6a5f963 3073 if (len > (size_t) (buff->limit - result))
89b05ef6 3074 {
e6a5f963 3075 buff = _cpp_get_buff (pfile, len);
3076 buff->next = pfile->a_buff;
3077 pfile->a_buff = buff;
3078 result = buff->cur;
89b05ef6 3079 }
f80e83a9 3080
e6a5f963 3081 buff->cur = result + len;
79bd622b 3082 return result;
f80e83a9 3083}
c39ed964 3084
3085/* Say which field of TOK is in use. */
3086
3087enum cpp_token_fld_kind
da31536d 3088cpp_token_val_index (const cpp_token *tok)
c39ed964 3089{
3090 switch (TOKEN_SPELL (tok))
3091 {
3092 case SPELL_IDENT:
3093 return CPP_TOKEN_FLD_NODE;
3094 case SPELL_LITERAL:
3095 return CPP_TOKEN_FLD_STR;
941f2388 3096 case SPELL_OPERATOR:
3097 if (tok->type == CPP_PASTE)
2ee04baa 3098 return CPP_TOKEN_FLD_TOKEN_NO;
941f2388 3099 else
3100 return CPP_TOKEN_FLD_NONE;
c39ed964 3101 case SPELL_NONE:
3102 if (tok->type == CPP_MACRO_ARG)
3103 return CPP_TOKEN_FLD_ARG_NO;
3104 else if (tok->type == CPP_PADDING)
3105 return CPP_TOKEN_FLD_SOURCE;
d6d3c909 3106 else if (tok->type == CPP_PRAGMA)
b75b98aa 3107 return CPP_TOKEN_FLD_PRAGMA;
c39ed964 3108 /* else fall through */
3109 default:
3110 return CPP_TOKEN_FLD_NONE;
3111 }
3112}
6ea2c7a3 3113
3114/* All tokens lexed in R after calling this function will be forced to have
3115 their source_location the same as the location referenced by P, until
3116 cpp_stop_forcing_token_locations is called for R. */
3117
3118void
3119cpp_force_token_locations (cpp_reader *r, source_location *p)
3120{
3121 r->forced_token_location_p = p;
3122}
3123
3124/* Go back to assigning locations naturally for lexed tokens. */
3125
3126void
3127cpp_stop_forcing_token_locations (cpp_reader *r)
3128{
3129 r->forced_token_location_p = NULL;
3130}