1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000-2015 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
37 enum spell_type category
;
38 const unsigned char *name
;
41 static const unsigned char *const digraph_spellings
[] =
42 { UC
"%:", UC
"%:%:", UC
"<:", UC
":>", UC
"<%", UC
"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, UC s },
45 #define TK(e, s) { SPELL_ ## s, UC #e },
46 static const struct token_spelling token_spellings
[N_TTYPES
] = { TTYPE_TABLE
};
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer
*, const uchar
*, unsigned int);
54 static int skip_line_comment (cpp_reader
*);
55 static void skip_whitespace (cpp_reader
*, cppchar_t
);
56 static void lex_string (cpp_reader
*, cpp_token
*, const uchar
*);
57 static void save_comment (cpp_reader
*, cpp_token
*, const uchar
*, cppchar_t
);
58 static void store_comment (cpp_reader
*, cpp_token
*);
59 static void create_literal (cpp_reader
*, cpp_token
*, const uchar
*,
60 unsigned int, enum cpp_ttype
);
61 static bool warn_in_comment (cpp_reader
*, _cpp_line_note
*);
62 static int name_p (cpp_reader
*, const cpp_string
*);
63 static tokenrun
*next_tokenrun (tokenrun
*);
65 static _cpp_buff
*new_buff (size_t);
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73 cpp_ideq (const cpp_token
*token
, const char *string
)
75 if (token
->type
!= CPP_NAME
)
78 return !ustrcmp (NODE_NAME (token
->val
.node
.node
), (const uchar
*) string
);
81 /* Record a note TYPE at byte POS into the current cleaned logical
84 add_line_note (cpp_buffer
*buffer
, const uchar
*pos
, unsigned int type
)
86 if (buffer
->notes_used
== buffer
->notes_cap
)
88 buffer
->notes_cap
= buffer
->notes_cap
* 2 + 200;
89 buffer
->notes
= XRESIZEVEC (_cpp_line_note
, buffer
->notes
,
93 buffer
->notes
[buffer
->notes_used
].pos
= pos
;
94 buffer
->notes
[buffer
->notes_used
].type
= type
;
99 /* Fast path to find line special characters using optimized character
100 scanning algorithms. Anything complicated falls back to the slow
101 path below. Since this loop is very hot it's worth doing these kinds
104 One of the paths through the ifdefs should provide
106 const uchar *search_line_fast (const uchar *s, const uchar *end);
108 Between S and END, search for \n, \r, \\, ?. Return a pointer to
111 Note that the last character of the buffer is *always* a newline,
112 as forced by _cpp_convert_input. This fact can be used to avoid
113 explicitly looking for the end of the buffer. */
115 /* Configure gives us an ifdef test. */
116 #ifndef WORDS_BIGENDIAN
117 #define WORDS_BIGENDIAN 0
120 /* We'd like the largest integer that fits into a register. There's nothing
121 in <stdint.h> that gives us that. For most hosts this is unsigned long,
122 but MS decided on an LLP64 model. Thankfully when building with GCC we
123 can get the "real" word size. */
125 typedef unsigned int word_type
__attribute__((__mode__(__word__
)));
127 typedef unsigned long word_type
;
130 /* The code below is only expecting sizes 4 or 8.
131 Die at compile-time if this expectation is violated. */
132 typedef char check_word_type_size
133 [(sizeof(word_type
) == 8 || sizeof(word_type
) == 4) * 2 - 1];
135 /* Return X with the first N bytes forced to values that won't match one
136 of the interesting characters. Note that NUL is not interesting. */
138 static inline word_type
139 acc_char_mask_misalign (word_type val
, unsigned int n
)
149 /* Return X replicated to all byte positions within WORD_TYPE. */
151 static inline word_type
152 acc_char_replicate (uchar x
)
156 ret
= (x
<< 24) | (x
<< 16) | (x
<< 8) | x
;
157 if (sizeof(word_type
) == 8)
158 ret
= (ret
<< 16 << 16) | ret
;
162 /* Return non-zero if some byte of VAL is (probably) C. */
164 static inline word_type
165 acc_char_cmp (word_type val
, word_type c
)
167 #if defined(__GNUC__) && defined(__alpha__)
168 /* We can get exact results using a compare-bytes instruction.
169 Get (val == c) via (0 >= (val ^ c)). */
170 return __builtin_alpha_cmpbge (0, val
^ c
);
172 word_type magic
= 0x7efefefeU
;
173 if (sizeof(word_type
) == 8)
174 magic
= (magic
<< 16 << 16) | 0xfefefefeU
;
178 return ((val
+ magic
) ^ ~val
) & ~magic
;
182 /* Given the result of acc_char_cmp is non-zero, return the index of
183 the found character. If this was a false positive, return -1. */
186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED
,
187 word_type val ATTRIBUTE_UNUSED
)
189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190 /* The cmpbge instruction sets *bits* of the result corresponding to
191 matches in the bytes with no false positives. */
192 return __builtin_ctzl (cmp
);
196 /* ??? It would be nice to force unrolling here,
197 and have all of these constants folded. */
198 for (i
= 0; i
< sizeof(word_type
); ++i
)
202 c
= (val
>> (sizeof(word_type
) - i
- 1) * 8) & 0xff;
204 c
= (val
>> i
* 8) & 0xff;
206 if (c
== '\n' || c
== '\r' || c
== '\\' || c
== '?')
214 /* A version of the fast scanner using bit fiddling techniques.
216 For 32-bit words, one would normally perform 16 comparisons and
217 16 branches. With this algorithm one performs 24 arithmetic
218 operations and one branch. Whether this is faster with a 32-bit
219 word size is going to be somewhat system dependent.
221 For 64-bit words, we eliminate twice the number of comparisons
222 and branches without increasing the number of arithmetic operations.
223 It's almost certainly going to be a win with 64-bit word size. */
225 static const uchar
* search_line_acc_char (const uchar
*, const uchar
*)
229 search_line_acc_char (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
231 const word_type repl_nl
= acc_char_replicate ('\n');
232 const word_type repl_cr
= acc_char_replicate ('\r');
233 const word_type repl_bs
= acc_char_replicate ('\\');
234 const word_type repl_qm
= acc_char_replicate ('?');
236 unsigned int misalign
;
240 /* Align the buffer. Mask out any bytes from before the beginning. */
241 p
= (word_type
*)((uintptr_t)s
& -sizeof(word_type
));
243 misalign
= (uintptr_t)s
& (sizeof(word_type
) - 1);
245 val
= acc_char_mask_misalign (val
, misalign
);
250 t
= acc_char_cmp (val
, repl_nl
);
251 t
|= acc_char_cmp (val
, repl_cr
);
252 t
|= acc_char_cmp (val
, repl_bs
);
253 t
|= acc_char_cmp (val
, repl_qm
);
255 if (__builtin_expect (t
!= 0, 0))
257 int i
= acc_char_index (t
, val
);
259 return (const uchar
*)p
+ i
;
266 /* Disable on Solaris 2/x86 until the following problem can be properly
269 The Solaris 10+ assembler tags objects with the instruction set
270 extensions used, so SSE4.2 executables cannot run on machines that
271 don't support that extension. */
273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
275 /* Replicated character data to be shared between implementations.
276 Recall that outside of a context with vector support we can't
277 define compatible vector types, therefore these are all defined
278 in terms of raw characters. */
279 static const char repl_chars
[4][16] __attribute__((aligned(16))) = {
280 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
281 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
282 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
283 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
284 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
285 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
286 { '?', '?', '?', '?', '?', '?', '?', '?',
287 '?', '?', '?', '?', '?', '?', '?', '?' },
290 /* A version of the fast scanner using MMX vectorized byte compare insns.
292 This uses the PMOVMSKB instruction which was introduced with "MMX2",
293 which was packaged into SSE1; it is also present in the AMD MMX
294 extension. Mark the function as using "sse" so that we emit a real
295 "emms" instruction, rather than the 3dNOW "femms" instruction. */
299 __attribute__((__target__("sse")))
301 search_line_mmx (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
303 typedef char v8qi
__attribute__ ((__vector_size__ (8)));
304 typedef int __m64
__attribute__ ((__vector_size__ (8), __may_alias__
));
306 const v8qi repl_nl
= *(const v8qi
*)repl_chars
[0];
307 const v8qi repl_cr
= *(const v8qi
*)repl_chars
[1];
308 const v8qi repl_bs
= *(const v8qi
*)repl_chars
[2];
309 const v8qi repl_qm
= *(const v8qi
*)repl_chars
[3];
311 unsigned int misalign
, found
, mask
;
315 /* Align the source pointer. While MMX doesn't generate unaligned data
316 faults, this allows us to safely scan to the end of the buffer without
317 reading beyond the end of the last page. */
318 misalign
= (uintptr_t)s
& 7;
319 p
= (const v8qi
*)((uintptr_t)s
& -8);
322 /* Create a mask for the bytes that are valid within the first
323 16-byte block. The Idea here is that the AND with the mask
324 within the loop is "free", since we need some AND or TEST
325 insn in order to set the flags for the branch anyway. */
326 mask
= -1u << misalign
;
328 /* Main loop processing 8 bytes at a time. */
336 t
= __builtin_ia32_pcmpeqb(data
, repl_nl
);
337 c
= __builtin_ia32_pcmpeqb(data
, repl_cr
);
338 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
339 c
= __builtin_ia32_pcmpeqb(data
, repl_bs
);
340 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
341 c
= __builtin_ia32_pcmpeqb(data
, repl_qm
);
342 t
= (v8qi
) __builtin_ia32_por ((__m64
)t
, (__m64
)c
);
343 found
= __builtin_ia32_pmovmskb (t
);
348 __builtin_ia32_emms ();
350 /* FOUND contains 1 in bits for which we matched a relevant
351 character. Conversion to the byte index is trivial. */
352 found
= __builtin_ctz(found
);
353 return (const uchar
*)p
+ found
;
356 /* A version of the fast scanner using SSE2 vectorized byte compare insns. */
360 __attribute__((__target__("sse2")))
362 search_line_sse2 (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
364 typedef char v16qi
__attribute__ ((__vector_size__ (16)));
366 const v16qi repl_nl
= *(const v16qi
*)repl_chars
[0];
367 const v16qi repl_cr
= *(const v16qi
*)repl_chars
[1];
368 const v16qi repl_bs
= *(const v16qi
*)repl_chars
[2];
369 const v16qi repl_qm
= *(const v16qi
*)repl_chars
[3];
371 unsigned int misalign
, found
, mask
;
375 /* Align the source pointer. */
376 misalign
= (uintptr_t)s
& 15;
377 p
= (const v16qi
*)((uintptr_t)s
& -16);
380 /* Create a mask for the bytes that are valid within the first
381 16-byte block. The Idea here is that the AND with the mask
382 within the loop is "free", since we need some AND or TEST
383 insn in order to set the flags for the branch anyway. */
384 mask
= -1u << misalign
;
386 /* Main loop processing 16 bytes at a time. */
394 t
= __builtin_ia32_pcmpeqb128(data
, repl_nl
);
395 t
|= __builtin_ia32_pcmpeqb128(data
, repl_cr
);
396 t
|= __builtin_ia32_pcmpeqb128(data
, repl_bs
);
397 t
|= __builtin_ia32_pcmpeqb128(data
, repl_qm
);
398 found
= __builtin_ia32_pmovmskb128 (t
);
403 /* FOUND contains 1 in bits for which we matched a relevant
404 character. Conversion to the byte index is trivial. */
405 found
= __builtin_ctz(found
);
406 return (const uchar
*)p
+ found
;
410 /* A version of the fast scanner using SSE 4.2 vectorized string insns. */
414 __attribute__((__target__("sse4.2")))
416 search_line_sse42 (const uchar
*s
, const uchar
*end
)
418 typedef char v16qi
__attribute__ ((__vector_size__ (16)));
419 static const v16qi search
= { '\n', '\r', '?', '\\' };
421 uintptr_t si
= (uintptr_t)s
;
424 /* Check for unaligned input. */
429 if (__builtin_expect (end
- s
< 16, 0)
430 && __builtin_expect ((si
& 0xfff) > 0xff0, 0))
432 /* There are less than 16 bytes left in the buffer, and less
433 than 16 bytes left on the page. Reading 16 bytes at this
434 point might generate a spurious page fault. Defer to the
435 SSE2 implementation, which already handles alignment. */
436 return search_line_sse2 (s
, end
);
439 /* ??? The builtin doesn't understand that the PCMPESTRI read from
440 memory need not be aligned. */
441 sv
= __builtin_ia32_loaddqu ((const char *) s
);
442 index
= __builtin_ia32_pcmpestri128 (search
, 4, sv
, 16, 0);
444 if (__builtin_expect (index
< 16, 0))
447 /* Advance the pointer to an aligned address. We will re-scan a
448 few bytes, but we no longer need care for reading past the
449 end of a page, since we're guaranteed a match. */
450 s
= (const uchar
*)((si
+ 16) & -16);
453 /* Main loop, processing 16 bytes at a time. */
454 #ifdef __GCC_ASM_FLAG_OUTPUTS__
459 /* By using inline assembly instead of the builtin,
460 we can use the result, as well as the flags set. */
461 __asm ("%vpcmpestri\t$0, %2, %3"
462 : "=c"(index
), "=@ccc"(f
)
463 : "m"(*s
), "x"(search
), "a"(4), "d"(16));
471 /* By doing the whole loop in inline assembly,
472 we can make proper use of the flags set. */
473 __asm ( ".balign 16\n"
475 " %vpcmpestri\t$0, (%1), %2\n"
477 : "=&c"(index
), "+r"(s
)
478 : "x"(search
), "a"(4), "d"(16));
486 /* Work around out-dated assemblers without sse4 support. */
487 #define search_line_sse42 search_line_sse2
490 /* Check the CPU capabilities. */
492 #include "../gcc/config/i386/cpuid.h"
494 typedef const uchar
* (*search_line_fast_type
) (const uchar
*, const uchar
*);
495 static search_line_fast_type search_line_fast
;
497 #define HAVE_init_vectorized_lexer 1
499 init_vectorized_lexer (void)
501 unsigned dummy
, ecx
= 0, edx
= 0;
502 search_line_fast_type impl
= search_line_acc_char
;
505 #if defined(__SSE4_2__)
507 #elif defined(__SSE2__)
509 #elif defined(__SSE__)
514 impl
= search_line_sse42
;
515 else if (__get_cpuid (1, &dummy
, &dummy
, &ecx
, &edx
) || minimum
== 2)
517 if (minimum
== 3 || (ecx
& bit_SSE4_2
))
518 impl
= search_line_sse42
;
519 else if (minimum
== 2 || (edx
& bit_SSE2
))
520 impl
= search_line_sse2
;
521 else if (minimum
== 1 || (edx
& bit_SSE
))
522 impl
= search_line_mmx
;
524 else if (__get_cpuid (0x80000001, &dummy
, &dummy
, &dummy
, &edx
))
527 || (edx
& (bit_MMXEXT
| bit_CMOV
)) == (bit_MMXEXT
| bit_CMOV
))
528 impl
= search_line_mmx
;
531 search_line_fast
= impl
;
534 #elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
536 /* A vection of the fast scanner using AltiVec vectorized byte compares
537 and VSX unaligned loads (when VSX is available). This is otherwise
538 the same as the pre-GCC 5 version. */
540 ATTRIBUTE_NO_SANITIZE_UNDEFINED
542 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
544 typedef __attribute__((altivec(vector
))) unsigned char vc
;
547 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
548 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
551 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
552 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
555 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
556 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
559 '?', '?', '?', '?', '?', '?', '?', '?',
560 '?', '?', '?', '?', '?', '?', '?', '?',
562 const vc zero
= { 0 };
566 /* Main loop processing 16 bytes at a time. */
569 vc m_nl
, m_cr
, m_bs
, m_qm
;
571 data
= *((const vc
*)s
);
574 m_nl
= (vc
) __builtin_vec_cmpeq(data
, repl_nl
);
575 m_cr
= (vc
) __builtin_vec_cmpeq(data
, repl_cr
);
576 m_bs
= (vc
) __builtin_vec_cmpeq(data
, repl_bs
);
577 m_qm
= (vc
) __builtin_vec_cmpeq(data
, repl_qm
);
578 t
= (m_nl
| m_cr
) | (m_bs
| m_qm
);
580 /* T now contains 0xff in bytes for which we matched one of the relevant
581 characters. We want to exit the loop if any byte in T is non-zero.
582 Below is the expansion of vec_any_ne(t, zero). */
584 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t
, zero
));
586 /* Restore s to to point to the 16 bytes we just processed. */
590 #define N (sizeof(vc) / sizeof(long))
594 /* Statically assert that N is 2 or 4. */
595 unsigned long l
[(N
== 2 || N
== 4) ? N
: -1];
597 unsigned long l
, i
= 0;
601 /* Find the first word of T that is non-zero. */
608 s
+= sizeof(unsigned long);
612 s
+= sizeof(unsigned long);
617 s
+= sizeof(unsigned long);
621 /* L now contains 0xff in bytes for which we matched one of the
622 relevant characters. We can find the byte index by finding
623 its bit index and dividing by 8. */
624 #ifdef __BIG_ENDIAN__
625 l
= __builtin_clzl(l
) >> 3;
627 l
= __builtin_ctzl(l
) >> 3;
635 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
637 /* A vection of the fast scanner using AltiVec vectorized byte compares.
638 This cannot be used for little endian because vec_lvsl/lvsr are
639 deprecated for little endian and the code won't work properly. */
640 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
641 so we can't compile this function without -maltivec on the command line
642 (or implied by some other switch). */
645 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
647 typedef __attribute__((altivec(vector
))) unsigned char vc
;
650 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
651 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
654 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
655 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
658 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
659 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
662 '?', '?', '?', '?', '?', '?', '?', '?',
663 '?', '?', '?', '?', '?', '?', '?', '?',
666 -1, -1, -1, -1, -1, -1, -1, -1,
667 -1, -1, -1, -1, -1, -1, -1, -1,
669 const vc zero
= { 0 };
673 /* Altivec loads automatically mask addresses with -16. This lets us
674 issue the first load as early as possible. */
675 data
= __builtin_vec_ld(0, (const vc
*)s
);
677 /* Discard bytes before the beginning of the buffer. Do this by
678 beginning with all ones and shifting in zeros according to the
679 mis-alignment. The LVSR instruction pulls the exact shift we
680 want from the address. */
681 mask
= __builtin_vec_lvsr(0, s
);
682 mask
= __builtin_vec_perm(zero
, ones
, mask
);
685 /* While altivec loads mask addresses, we still need to align S so
686 that the offset we compute at the end is correct. */
687 s
= (const uchar
*)((uintptr_t)s
& -16);
689 /* Main loop processing 16 bytes at a time. */
693 vc m_nl
, m_cr
, m_bs
, m_qm
;
696 data
= __builtin_vec_ld(0, (const vc
*)s
);
699 m_nl
= (vc
) __builtin_vec_cmpeq(data
, repl_nl
);
700 m_cr
= (vc
) __builtin_vec_cmpeq(data
, repl_cr
);
701 m_bs
= (vc
) __builtin_vec_cmpeq(data
, repl_bs
);
702 m_qm
= (vc
) __builtin_vec_cmpeq(data
, repl_qm
);
703 t
= (m_nl
| m_cr
) | (m_bs
| m_qm
);
705 /* T now contains 0xff in bytes for which we matched one of the relevant
706 characters. We want to exit the loop if any byte in T is non-zero.
707 Below is the expansion of vec_any_ne(t, zero). */
709 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t
, zero
));
712 #define N (sizeof(vc) / sizeof(long))
716 /* Statically assert that N is 2 or 4. */
717 unsigned long l
[(N
== 2 || N
== 4) ? N
: -1];
719 unsigned long l
, i
= 0;
723 /* Find the first word of T that is non-zero. */
730 s
+= sizeof(unsigned long);
734 s
+= sizeof(unsigned long);
739 s
+= sizeof(unsigned long);
743 /* L now contains 0xff in bytes for which we matched one of the
744 relevant characters. We can find the byte index by finding
745 its bit index and dividing by 8. */
746 l
= __builtin_clzl(l
) >> 3;
753 #elif defined (__ARM_NEON)
754 #include "arm_neon.h"
757 search_line_fast (const uchar
*s
, const uchar
*end ATTRIBUTE_UNUSED
)
759 const uint8x16_t repl_nl
= vdupq_n_u8 ('\n');
760 const uint8x16_t repl_cr
= vdupq_n_u8 ('\r');
761 const uint8x16_t repl_bs
= vdupq_n_u8 ('\\');
762 const uint8x16_t repl_qm
= vdupq_n_u8 ('?');
763 const uint8x16_t xmask
= (uint8x16_t
) vdupq_n_u64 (0x8040201008040201ULL
);
765 unsigned int misalign
, found
, mask
;
769 /* Align the source pointer. */
770 misalign
= (uintptr_t)s
& 15;
771 p
= (const uint8_t *)((uintptr_t)s
& -16);
774 /* Create a mask for the bytes that are valid within the first
775 16-byte block. The Idea here is that the AND with the mask
776 within the loop is "free", since we need some AND or TEST
777 insn in order to set the flags for the branch anyway. */
778 mask
= (-1u << misalign
) & 0xffff;
780 /* Main loop, processing 16 bytes at a time. */
788 uint8x16_t t
, u
, v
, w
;
795 t
= vceqq_u8 (data
, repl_nl
);
796 u
= vceqq_u8 (data
, repl_cr
);
797 v
= vorrq_u8 (t
, vceqq_u8 (data
, repl_bs
));
798 w
= vorrq_u8 (u
, vceqq_u8 (data
, repl_qm
));
799 t
= vandq_u8 (vorrq_u8 (v
, w
), xmask
);
800 l
= vpadd_u8 (vget_low_u8 (t
), vget_high_u8 (t
));
804 found
= vget_lane_u32 ((uint32x2_t
) vorr_u64 ((uint64x1_t
) n
,
805 vshr_n_u64 ((uint64x1_t
) n
, 24)), 0);
810 /* FOUND contains 1 in bits for which we matched a relevant
811 character. Conversion to the byte index is trivial. */
812 found
= __builtin_ctz (found
);
813 return (const uchar
*)p
+ found
;
818 /* We only have one accellerated alternative. Use a direct call so that
819 we encourage inlining. */
821 #define search_line_fast search_line_acc_char
825 /* Initialize the lexer if needed. */
828 _cpp_init_lexer (void)
830 #ifdef HAVE_init_vectorized_lexer
831 init_vectorized_lexer ();
835 /* Returns with a logical line that contains no escaped newlines or
836 trigraphs. This is a time-critical inner loop. */
838 _cpp_clean_line (cpp_reader
*pfile
)
844 buffer
= pfile
->buffer
;
845 buffer
->cur_note
= buffer
->notes_used
= 0;
846 buffer
->cur
= buffer
->line_base
= buffer
->next_line
;
847 buffer
->need_line
= false;
848 s
= buffer
->next_line
;
850 if (!buffer
->from_stage3
)
852 const uchar
*pbackslash
= NULL
;
854 /* Fast path. This is the common case of an un-escaped line with
855 no trigraphs. The primary win here is by not writing any
856 data back to memory until we have to. */
859 /* Perform an optimized search for \n, \r, \\, ?. */
860 s
= search_line_fast (s
, buffer
->rlimit
);
865 /* Record the location of the backslash and continue. */
868 else if (__builtin_expect (c
== '?', 0))
870 if (__builtin_expect (s
[1] == '?', false)
871 && _cpp_trigraph_map
[s
[2]])
873 /* Have a trigraph. We may or may not have to convert
874 it. Add a line note regardless, for -Wtrigraphs. */
875 add_line_note (buffer
, s
, s
[2]);
876 if (CPP_OPTION (pfile
, trigraphs
))
878 /* We do, and that means we have to switch to the
881 *d
= _cpp_trigraph_map
[s
[2]];
886 /* Not a trigraph. Continue on fast-path. */
893 /* This must be \r or \n. We're either done, or we'll be forced
894 to write back to the buffer and continue on the slow path. */
897 if (__builtin_expect (s
== buffer
->rlimit
, false))
900 /* DOS line ending? */
901 if (__builtin_expect (c
== '\r', false) && s
[1] == '\n')
904 if (s
== buffer
->rlimit
)
908 if (__builtin_expect (pbackslash
== NULL
, true))
911 /* Check for escaped newline. */
913 while (is_nvspace (p
[-1]))
915 if (p
- 1 != pbackslash
)
918 /* Have an escaped newline; process it and proceed to
920 add_line_note (buffer
, p
- 1, p
!= d
? ' ' : '\\');
922 buffer
->next_line
= p
- 1;
930 if (c
== '\n' || c
== '\r')
932 /* Handle DOS line endings. */
933 if (c
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
935 if (s
== buffer
->rlimit
)
940 while (p
!= buffer
->next_line
&& is_nvspace (p
[-1]))
942 if (p
== buffer
->next_line
|| p
[-1] != '\\')
945 add_line_note (buffer
, p
- 1, p
!= d
? ' ': '\\');
947 buffer
->next_line
= p
- 1;
949 else if (c
== '?' && s
[1] == '?' && _cpp_trigraph_map
[s
[2]])
951 /* Add a note regardless, for the benefit of -Wtrigraphs. */
952 add_line_note (buffer
, d
, s
[2]);
953 if (CPP_OPTION (pfile
, trigraphs
))
955 *d
= _cpp_trigraph_map
[s
[2]];
963 while (*s
!= '\n' && *s
!= '\r')
967 /* Handle DOS line endings. */
968 if (*s
== '\r' && s
!= buffer
->rlimit
&& s
[1] == '\n')
974 /* A sentinel note that should never be processed. */
975 add_line_note (buffer
, d
+ 1, '\n');
976 buffer
->next_line
= s
+ 1;
979 /* Return true if the trigraph indicated by NOTE should be warned
980 about in a comment. */
982 warn_in_comment (cpp_reader
*pfile
, _cpp_line_note
*note
)
986 /* Within comments we don't warn about trigraphs, unless the
987 trigraph forms an escaped newline, as that may change
989 if (note
->type
!= '/')
992 /* If -trigraphs, then this was an escaped newline iff the next note
994 if (CPP_OPTION (pfile
, trigraphs
))
995 return note
[1].pos
== note
->pos
;
997 /* Otherwise, see if this forms an escaped newline. */
999 while (is_nvspace (*p
))
1002 /* There might have been escaped newlines between the trigraph and the
1003 newline we found. Hence the position test. */
1004 return (*p
== '\n' && p
< note
[1].pos
);
1007 /* Process the notes created by add_line_note as far as the current
1010 _cpp_process_line_notes (cpp_reader
*pfile
, int in_comment
)
1012 cpp_buffer
*buffer
= pfile
->buffer
;
1016 _cpp_line_note
*note
= &buffer
->notes
[buffer
->cur_note
];
1019 if (note
->pos
> buffer
->cur
)
1023 col
= CPP_BUF_COLUMN (buffer
, note
->pos
+ 1);
1025 if (note
->type
== '\\' || note
->type
== ' ')
1027 if (note
->type
== ' ' && !in_comment
)
1028 cpp_error_with_line (pfile
, CPP_DL_WARNING
, pfile
->line_table
->highest_line
, col
,
1029 "backslash and newline separated by space");
1031 if (buffer
->next_line
> buffer
->rlimit
)
1033 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
, col
,
1034 "backslash-newline at end of file");
1035 /* Prevent "no newline at end of file" warning. */
1036 buffer
->next_line
= buffer
->rlimit
;
1039 buffer
->line_base
= note
->pos
;
1040 CPP_INCREMENT_LINE (pfile
, 0);
1042 else if (_cpp_trigraph_map
[note
->type
])
1044 if (CPP_OPTION (pfile
, warn_trigraphs
)
1045 && (!in_comment
|| warn_in_comment (pfile
, note
)))
1047 if (CPP_OPTION (pfile
, trigraphs
))
1048 cpp_warning_with_line (pfile
, CPP_W_TRIGRAPHS
,
1049 pfile
->line_table
->highest_line
, col
,
1050 "trigraph ??%c converted to %c",
1052 (int) _cpp_trigraph_map
[note
->type
]);
1055 cpp_warning_with_line
1056 (pfile
, CPP_W_TRIGRAPHS
,
1057 pfile
->line_table
->highest_line
, col
,
1058 "trigraph ??%c ignored, use -trigraphs to enable",
1063 else if (note
->type
== 0)
1064 /* Already processed in lex_raw_string. */;
1070 /* Skip a C-style block comment. We find the end of the comment by
1071 seeing if an asterisk is before every '/' we encounter. Returns
1072 nonzero if comment terminated by EOF, zero otherwise.
1074 Buffer->cur points to the initial asterisk of the comment. */
1076 _cpp_skip_block_comment (cpp_reader
*pfile
)
1078 cpp_buffer
*buffer
= pfile
->buffer
;
1079 const uchar
*cur
= buffer
->cur
;
1088 /* People like decorating comments with '*', so check for '/'
1089 instead for efficiency. */
1097 /* Warn about potential nested comments, but not if the '/'
1098 comes immediately before the true comment delimiter.
1099 Don't bother to get it right across escaped newlines. */
1100 if (CPP_OPTION (pfile
, warn_comments
)
1101 && cur
[0] == '*' && cur
[1] != '/')
1104 cpp_warning_with_line (pfile
, CPP_W_COMMENTS
,
1105 pfile
->line_table
->highest_line
,
1106 CPP_BUF_COL (buffer
),
1107 "\"/*\" within comment");
1113 buffer
->cur
= cur
- 1;
1114 _cpp_process_line_notes (pfile
, true);
1115 if (buffer
->next_line
>= buffer
->rlimit
)
1117 _cpp_clean_line (pfile
);
1119 cols
= buffer
->next_line
- buffer
->line_base
;
1120 CPP_INCREMENT_LINE (pfile
, cols
);
1127 _cpp_process_line_notes (pfile
, true);
1131 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1132 terminating newline. Handles escaped newlines. Returns nonzero
1133 if a multiline comment. */
1135 skip_line_comment (cpp_reader
*pfile
)
1137 cpp_buffer
*buffer
= pfile
->buffer
;
1138 source_location orig_line
= pfile
->line_table
->highest_line
;
1140 while (*buffer
->cur
!= '\n')
1143 _cpp_process_line_notes (pfile
, true);
1144 return orig_line
!= pfile
->line_table
->highest_line
;
1147 /* Skips whitespace, saving the next non-whitespace character. */
1149 skip_whitespace (cpp_reader
*pfile
, cppchar_t c
)
1151 cpp_buffer
*buffer
= pfile
->buffer
;
1152 bool saw_NUL
= false;
1156 /* Horizontal space always OK. */
1157 if (c
== ' ' || c
== '\t')
1159 /* Just \f \v or \0 left. */
1162 else if (pfile
->state
.in_directive
&& CPP_PEDANTIC (pfile
))
1163 cpp_error_with_line (pfile
, CPP_DL_PEDWARN
, pfile
->line_table
->highest_line
,
1164 CPP_BUF_COL (buffer
),
1165 "%s in preprocessing directive",
1166 c
== '\f' ? "form feed" : "vertical tab");
1170 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1171 while (is_nvspace (c
));
1174 cpp_error (pfile
, CPP_DL_WARNING
, "null character(s) ignored");
1179 /* See if the characters of a number token are valid in a name (no
1180 '.', '+' or '-'). */
1182 name_p (cpp_reader
*pfile
, const cpp_string
*string
)
1186 for (i
= 0; i
< string
->len
; i
++)
1187 if (!is_idchar (string
->text
[i
]))
1193 /* After parsing an identifier or other sequence, produce a warning about
1194 sequences not in NFC/NFKC. */
1196 warn_about_normalization (cpp_reader
*pfile
,
1197 const cpp_token
*token
,
1198 const struct normalize_state
*s
)
1200 if (CPP_OPTION (pfile
, warn_normalize
) < NORMALIZE_STATE_RESULT (s
)
1201 && !pfile
->state
.skipping
)
1203 /* Make sure that the token is printed using UCNs, even
1204 if we'd otherwise happily print UTF-8. */
1205 unsigned char *buf
= XNEWVEC (unsigned char, cpp_token_len (token
));
1208 sz
= cpp_spell_token (pfile
, token
, buf
, false) - buf
;
1209 if (NORMALIZE_STATE_RESULT (s
) == normalized_C
)
1210 cpp_warning_with_line (pfile
, CPP_W_NORMALIZE
, token
->src_loc
, 0,
1211 "`%.*s' is not in NFKC", (int) sz
, buf
);
1213 cpp_warning_with_line (pfile
, CPP_W_NORMALIZE
, token
->src_loc
, 0,
1214 "`%.*s' is not in NFC", (int) sz
, buf
);
1219 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1220 an identifier. FIRST is TRUE if this starts an identifier. */
1222 forms_identifier_p (cpp_reader
*pfile
, int first
,
1223 struct normalize_state
*state
)
1225 cpp_buffer
*buffer
= pfile
->buffer
;
1227 if (*buffer
->cur
== '$')
1229 if (!CPP_OPTION (pfile
, dollars_in_ident
))
1233 if (CPP_OPTION (pfile
, warn_dollars
) && !pfile
->state
.skipping
)
1235 CPP_OPTION (pfile
, warn_dollars
) = 0;
1236 cpp_error (pfile
, CPP_DL_PEDWARN
, "'$' in identifier or number");
1242 /* Is this a syntactically valid UCN? */
1243 if (CPP_OPTION (pfile
, extended_identifiers
)
1244 && *buffer
->cur
== '\\'
1245 && (buffer
->cur
[1] == 'u' || buffer
->cur
[1] == 'U'))
1248 if (_cpp_valid_ucn (pfile
, &buffer
->cur
, buffer
->rlimit
, 1 + !first
,
1257 /* Helper function to get the cpp_hashnode of the identifier BASE. */
1258 static cpp_hashnode
*
1259 lex_identifier_intern (cpp_reader
*pfile
, const uchar
*base
)
1261 cpp_hashnode
*result
;
1264 unsigned int hash
= HT_HASHSTEP (0, *base
);
1267 while (ISIDNUM (*cur
))
1269 hash
= HT_HASHSTEP (hash
, *cur
);
1273 hash
= HT_HASHFINISH (hash
, len
);
1274 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1275 base
, len
, hash
, HT_ALLOC
));
1277 /* Rarely, identifiers require diagnostics when lexed. */
1278 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
1279 && !pfile
->state
.skipping
, 0))
1281 /* It is allowed to poison the same identifier twice. */
1282 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
1283 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
1284 NODE_NAME (result
));
1286 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1287 replacement list of a variadic macro. */
1288 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
1289 && !pfile
->state
.va_args_ok
)
1291 if (CPP_OPTION (pfile
, cplusplus
))
1292 cpp_error (pfile
, CPP_DL_PEDWARN
,
1293 "__VA_ARGS__ can only appear in the expansion"
1294 " of a C++11 variadic macro");
1296 cpp_error (pfile
, CPP_DL_PEDWARN
,
1297 "__VA_ARGS__ can only appear in the expansion"
1298 " of a C99 variadic macro");
1301 /* For -Wc++-compat, warn about use of C++ named operators. */
1302 if (result
->flags
& NODE_WARN_OPERATOR
)
1303 cpp_warning (pfile
, CPP_W_CXX_OPERATOR_NAMES
,
1304 "identifier \"%s\" is a special operator name in C++",
1305 NODE_NAME (result
));
1311 /* Get the cpp_hashnode of an identifier specified by NAME in
1312 the current cpp_reader object. If none is found, NULL is returned. */
1314 _cpp_lex_identifier (cpp_reader
*pfile
, const char *name
)
1316 cpp_hashnode
*result
;
1317 result
= lex_identifier_intern (pfile
, (uchar
*) name
);
1321 /* Lex an identifier starting at BUFFER->CUR - 1. */
1322 static cpp_hashnode
*
1323 lex_identifier (cpp_reader
*pfile
, const uchar
*base
, bool starts_ucn
,
1324 struct normalize_state
*nst
, cpp_hashnode
**spelling
)
1326 cpp_hashnode
*result
;
1329 unsigned int hash
= HT_HASHSTEP (0, *base
);
1331 cur
= pfile
->buffer
->cur
;
1334 while (ISIDNUM (*cur
))
1336 hash
= HT_HASHSTEP (hash
, *cur
);
1339 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *(cur
- 1));
1341 pfile
->buffer
->cur
= cur
;
1342 if (starts_ucn
|| forms_identifier_p (pfile
, false, nst
))
1344 /* Slower version for identifiers containing UCNs (or $). */
1346 while (ISIDNUM (*pfile
->buffer
->cur
))
1348 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *pfile
->buffer
->cur
);
1349 pfile
->buffer
->cur
++;
1351 } while (forms_identifier_p (pfile
, false, nst
));
1352 result
= _cpp_interpret_identifier (pfile
, base
,
1353 pfile
->buffer
->cur
- base
);
1354 *spelling
= cpp_lookup (pfile
, base
, pfile
->buffer
->cur
- base
);
1359 hash
= HT_HASHFINISH (hash
, len
);
1361 result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1362 base
, len
, hash
, HT_ALLOC
));
1366 /* Rarely, identifiers require diagnostics when lexed. */
1367 if (__builtin_expect ((result
->flags
& NODE_DIAGNOSTIC
)
1368 && !pfile
->state
.skipping
, 0))
1370 /* It is allowed to poison the same identifier twice. */
1371 if ((result
->flags
& NODE_POISONED
) && !pfile
->state
.poisoned_ok
)
1372 cpp_error (pfile
, CPP_DL_ERROR
, "attempt to use poisoned \"%s\"",
1373 NODE_NAME (result
));
1375 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1376 replacement list of a variadic macro. */
1377 if (result
== pfile
->spec_nodes
.n__VA_ARGS__
1378 && !pfile
->state
.va_args_ok
)
1380 if (CPP_OPTION (pfile
, cplusplus
))
1381 cpp_error (pfile
, CPP_DL_PEDWARN
,
1382 "__VA_ARGS__ can only appear in the expansion"
1383 " of a C++11 variadic macro");
1385 cpp_error (pfile
, CPP_DL_PEDWARN
,
1386 "__VA_ARGS__ can only appear in the expansion"
1387 " of a C99 variadic macro");
1390 /* For -Wc++-compat, warn about use of C++ named operators. */
1391 if (result
->flags
& NODE_WARN_OPERATOR
)
1392 cpp_warning (pfile
, CPP_W_CXX_OPERATOR_NAMES
,
1393 "identifier \"%s\" is a special operator name in C++",
1394 NODE_NAME (result
));
1400 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
1402 lex_number (cpp_reader
*pfile
, cpp_string
*number
,
1403 struct normalize_state
*nst
)
1409 base
= pfile
->buffer
->cur
- 1;
1412 cur
= pfile
->buffer
->cur
;
1414 /* N.B. ISIDNUM does not include $. */
1415 while (ISIDNUM (*cur
) || *cur
== '.' || DIGIT_SEP (*cur
)
1416 || VALID_SIGN (*cur
, cur
[-1]))
1418 NORMALIZE_STATE_UPDATE_IDNUM (nst
, *cur
);
1421 /* A number can't end with a digit separator. */
1422 while (cur
> pfile
->buffer
->cur
&& DIGIT_SEP (cur
[-1]))
1425 pfile
->buffer
->cur
= cur
;
1427 while (forms_identifier_p (pfile
, false, nst
));
1429 number
->len
= cur
- base
;
1430 dest
= _cpp_unaligned_alloc (pfile
, number
->len
+ 1);
1431 memcpy (dest
, base
, number
->len
);
1432 dest
[number
->len
] = '\0';
1433 number
->text
= dest
;
1436 /* Create a token of type TYPE with a literal spelling. */
1438 create_literal (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
1439 unsigned int len
, enum cpp_ttype type
)
1441 uchar
*dest
= _cpp_unaligned_alloc (pfile
, len
+ 1);
1443 memcpy (dest
, base
, len
);
1446 token
->val
.str
.len
= len
;
1447 token
->val
.str
.text
= dest
;
1450 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1451 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
1454 bufring_append (cpp_reader
*pfile
, const uchar
*base
, size_t len
,
1455 _cpp_buff
**first_buff_p
, _cpp_buff
**last_buff_p
)
1457 _cpp_buff
*first_buff
= *first_buff_p
;
1458 _cpp_buff
*last_buff
= *last_buff_p
;
1460 if (first_buff
== NULL
)
1461 first_buff
= last_buff
= _cpp_get_buff (pfile
, len
);
1462 else if (len
> BUFF_ROOM (last_buff
))
1464 size_t room
= BUFF_ROOM (last_buff
);
1465 memcpy (BUFF_FRONT (last_buff
), base
, room
);
1466 BUFF_FRONT (last_buff
) += room
;
1469 last_buff
= _cpp_append_extend_buff (pfile
, last_buff
, len
);
1472 memcpy (BUFF_FRONT (last_buff
), base
, len
);
1473 BUFF_FRONT (last_buff
) += len
;
1475 *first_buff_p
= first_buff
;
1476 *last_buff_p
= last_buff
;
1480 /* Returns true if a macro has been defined.
1481 This might not work if compile with -save-temps,
1482 or preprocess separately from compilation. */
1485 is_macro(cpp_reader
*pfile
, const uchar
*base
)
1487 const uchar
*cur
= base
;
1488 if (! ISIDST (*cur
))
1490 unsigned int hash
= HT_HASHSTEP (0, *cur
);
1492 while (ISIDNUM (*cur
))
1494 hash
= HT_HASHSTEP (hash
, *cur
);
1497 hash
= HT_HASHFINISH (hash
, cur
- base
);
1499 cpp_hashnode
*result
= CPP_HASHNODE (ht_lookup_with_hash (pfile
->hash_table
,
1500 base
, cur
- base
, hash
, HT_NO_INSERT
));
1502 return !result
? false : (result
->type
== NT_MACRO
);
1506 /* Lexes a raw string. The stored string contains the spelling, including
1507 double quotes, delimiter string, '(' and ')', any leading
1508 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
1509 literal, or CPP_OTHER if it was not properly terminated.
1511 The spelling is NUL-terminated, but it is not guaranteed that this
1512 is the first NUL since embedded NULs are preserved. */
1515 lex_raw_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
,
1518 uchar raw_prefix
[17];
1519 uchar temp_buffer
[18];
1520 const uchar
*orig_base
;
1521 unsigned int raw_prefix_len
= 0, raw_suffix_len
= 0;
1522 enum raw_str_phase
{ RAW_STR_PREFIX
, RAW_STR
, RAW_STR_SUFFIX
};
1523 raw_str_phase phase
= RAW_STR_PREFIX
;
1524 enum cpp_ttype type
;
1525 size_t total_len
= 0;
1526 /* Index into temp_buffer during phases other than RAW_STR,
1527 during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1528 be appended to temp_buffer. */
1529 size_t temp_buffer_len
= 0;
1530 _cpp_buff
*first_buff
= NULL
, *last_buff
= NULL
;
1531 size_t raw_prefix_start
;
1532 _cpp_line_note
*note
= &pfile
->buffer
->notes
[pfile
->buffer
->cur_note
];
1534 type
= (*base
== 'L' ? CPP_WSTRING
:
1535 *base
== 'U' ? CPP_STRING32
:
1536 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8STRING
: CPP_STRING16
)
1539 #define BUF_APPEND(STR,LEN) \
1541 bufring_append (pfile, (const uchar *)(STR), (LEN), \
1542 &first_buff, &last_buff); \
1543 total_len += (LEN); \
1544 if (__builtin_expect (temp_buffer_len < 17, 0) \
1545 && (const uchar *)(STR) != base \
1548 memcpy (temp_buffer + temp_buffer_len, \
1549 (const uchar *)(STR), (LEN)); \
1550 temp_buffer_len += (LEN); \
1556 raw_prefix_start
= cur
- base
;
1561 /* If we previously performed any trigraph or line splicing
1562 transformations, undo them in between the opening and closing
1564 while (note
->pos
< cur
)
1566 for (; note
->pos
== cur
; ++note
)
1572 /* Restore backslash followed by newline. */
1573 BUF_APPEND (base
, cur
- base
);
1575 BUF_APPEND ("\\", 1);
1577 if (note
->type
== ' ')
1579 /* GNU backslash whitespace newline extension. FIXME
1580 could be any sequence of non-vertical space. When we
1581 can properly restore any such sequence, we should mark
1582 this note as handled so _cpp_process_line_notes
1584 BUF_APPEND (" ", 1);
1587 BUF_APPEND ("\n", 1);
1591 /* Already handled. */
1595 if (_cpp_trigraph_map
[note
->type
])
1597 /* Don't warn about this trigraph in
1598 _cpp_process_line_notes, since trigraphs show up as
1599 trigraphs in raw strings. */
1600 uchar type
= note
->type
;
1603 if (!CPP_OPTION (pfile
, trigraphs
))
1604 /* If we didn't convert the trigraph in the first
1605 place, don't do anything now either. */
1608 BUF_APPEND (base
, cur
- base
);
1610 BUF_APPEND ("??", 2);
1612 /* ??/ followed by newline gets two line notes, one for
1613 the trigraph and one for the backslash/newline. */
1614 if (type
== '/' && note
[1].pos
== cur
)
1616 if (note
[1].type
!= '\\'
1617 && note
[1].type
!= ' ')
1619 BUF_APPEND ("/", 1);
1621 goto after_backslash
;
1625 /* Skip the replacement character. */
1627 BUF_APPEND (&type
, 1);
1638 if (__builtin_expect (temp_buffer_len
< 17, 0))
1639 temp_buffer
[temp_buffer_len
++] = c
;
1642 if (phase
== RAW_STR_PREFIX
)
1644 while (raw_prefix_len
< temp_buffer_len
)
1646 raw_prefix
[raw_prefix_len
] = temp_buffer
[raw_prefix_len
];
1647 switch (raw_prefix
[raw_prefix_len
])
1649 case ' ': case '(': case ')': case '\\': case '\t':
1650 case '\v': case '\f': case '\n': default:
1652 /* Basic source charset except the above chars. */
1653 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1654 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1655 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1656 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1658 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1659 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1660 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1661 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1663 case '0': case '1': case '2': case '3': case '4': case '5':
1664 case '6': case '7': case '8': case '9':
1665 case '_': case '{': case '}': case '#': case '[': case ']':
1666 case '<': case '>': case '%': case ':': case ';': case '.':
1667 case '?': case '*': case '+': case '-': case '/': case '^':
1668 case '&': case '|': case '~': case '!': case '=': case ',':
1669 case '"': case '\'':
1670 if (raw_prefix_len
< 16)
1678 if (raw_prefix
[raw_prefix_len
] != '(')
1680 int col
= CPP_BUF_COLUMN (pfile
->buffer
, cur
) + 1;
1681 if (raw_prefix_len
== 16)
1682 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1683 col
, "raw string delimiter longer "
1684 "than 16 characters");
1685 else if (raw_prefix
[raw_prefix_len
] == '\n')
1686 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1687 col
, "invalid new-line in raw "
1688 "string delimiter");
1690 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
,
1691 col
, "invalid character '%c' in "
1692 "raw string delimiter",
1693 (int) raw_prefix
[raw_prefix_len
]);
1694 pfile
->buffer
->cur
= orig_base
+ raw_prefix_start
- 1;
1695 create_literal (pfile
, token
, orig_base
,
1696 raw_prefix_start
- 1, CPP_OTHER
);
1698 _cpp_release_buff (pfile
, first_buff
);
1701 raw_prefix
[raw_prefix_len
] = '"';
1703 /* Nothing should be appended to temp_buffer during
1705 temp_buffer_len
= 17;
1710 else if (phase
== RAW_STR_SUFFIX
)
1712 while (raw_suffix_len
<= raw_prefix_len
1713 && raw_suffix_len
< temp_buffer_len
1714 && temp_buffer
[raw_suffix_len
] == raw_prefix
[raw_suffix_len
])
1716 if (raw_suffix_len
> raw_prefix_len
)
1718 if (raw_suffix_len
== temp_buffer_len
)
1721 /* Nothing should be appended to temp_buffer during
1723 temp_buffer_len
= 17;
1727 phase
= RAW_STR_SUFFIX
;
1729 temp_buffer_len
= 0;
1733 if (pfile
->state
.in_directive
1734 || (pfile
->state
.parsing_args
1735 && pfile
->buffer
->next_line
>= pfile
->buffer
->rlimit
))
1739 cpp_error_with_line (pfile
, CPP_DL_ERROR
, token
->src_loc
, 0,
1740 "unterminated raw string");
1744 BUF_APPEND (base
, cur
- base
);
1746 if (pfile
->buffer
->cur
< pfile
->buffer
->rlimit
)
1747 CPP_INCREMENT_LINE (pfile
, 0);
1748 pfile
->buffer
->need_line
= true;
1750 pfile
->buffer
->cur
= cur
-1;
1751 _cpp_process_line_notes (pfile
, false);
1752 if (!_cpp_get_fresh_line (pfile
))
1754 source_location src_loc
= token
->src_loc
;
1755 token
->type
= CPP_EOF
;
1756 /* Tell the compiler the line number of the EOF token. */
1757 token
->src_loc
= pfile
->line_table
->highest_line
;
1759 if (first_buff
!= NULL
)
1760 _cpp_release_buff (pfile
, first_buff
);
1761 cpp_error_with_line (pfile
, CPP_DL_ERROR
, src_loc
, 0,
1762 "unterminated raw string");
1766 cur
= base
= pfile
->buffer
->cur
;
1767 note
= &pfile
->buffer
->notes
[pfile
->buffer
->cur_note
];
1771 if (CPP_OPTION (pfile
, user_literals
))
1773 /* If a string format macro, say from inttypes.h, is placed touching
1774 a string literal it could be parsed as a C++11 user-defined string
1775 literal thus breaking the program.
1776 Try to identify macros with is_macro. A warning is issued. */
1777 if (is_macro (pfile
, cur
))
1779 /* Raise a warning, but do not consume subsequent tokens. */
1780 if (CPP_OPTION (pfile
, warn_literal_suffix
) && !pfile
->state
.skipping
)
1781 cpp_warning_with_line (pfile
, CPP_W_LITERAL_SUFFIX
,
1783 "invalid suffix on literal; C++11 requires "
1784 "a space between literal and string macro");
1786 /* Grab user defined literal suffix. */
1787 else if (ISIDST (*cur
))
1789 type
= cpp_userdef_string_add_type (type
);
1792 while (ISIDNUM (*cur
))
1797 pfile
->buffer
->cur
= cur
;
1798 if (first_buff
== NULL
)
1799 create_literal (pfile
, token
, base
, cur
- base
, type
);
1802 uchar
*dest
= _cpp_unaligned_alloc (pfile
, total_len
+ (cur
- base
) + 1);
1805 token
->val
.str
.len
= total_len
+ (cur
- base
);
1806 token
->val
.str
.text
= dest
;
1807 last_buff
= first_buff
;
1808 while (last_buff
!= NULL
)
1810 memcpy (dest
, last_buff
->base
,
1811 BUFF_FRONT (last_buff
) - last_buff
->base
);
1812 dest
+= BUFF_FRONT (last_buff
) - last_buff
->base
;
1813 last_buff
= last_buff
->next
;
1815 _cpp_release_buff (pfile
, first_buff
);
1816 memcpy (dest
, base
, cur
- base
);
1817 dest
[cur
- base
] = '\0';
1821 /* Lexes a string, character constant, or angle-bracketed header file
1822 name. The stored string contains the spelling, including opening
1823 quote and any leading 'L', 'u', 'U' or 'u8' and optional
1824 'R' modifier. It returns the type of the literal, or CPP_OTHER
1825 if it was not properly terminated, or CPP_LESS for an unterminated
1826 header name which must be relexed as normal tokens.
1828 The spelling is NUL-terminated, but it is not guaranteed that this
1829 is the first NUL since embedded NULs are preserved. */
1831 lex_string (cpp_reader
*pfile
, cpp_token
*token
, const uchar
*base
)
1833 bool saw_NUL
= false;
1835 cppchar_t terminator
;
1836 enum cpp_ttype type
;
1839 terminator
= *cur
++;
1840 if (terminator
== 'L' || terminator
== 'U')
1841 terminator
= *cur
++;
1842 else if (terminator
== 'u')
1844 terminator
= *cur
++;
1845 if (terminator
== '8')
1846 terminator
= *cur
++;
1848 if (terminator
== 'R')
1850 lex_raw_string (pfile
, token
, base
, cur
);
1853 if (terminator
== '"')
1854 type
= (*base
== 'L' ? CPP_WSTRING
:
1855 *base
== 'U' ? CPP_STRING32
:
1856 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8STRING
: CPP_STRING16
)
1858 else if (terminator
== '\'')
1859 type
= (*base
== 'L' ? CPP_WCHAR
:
1860 *base
== 'U' ? CPP_CHAR32
:
1861 *base
== 'u' ? (base
[1] == '8' ? CPP_UTF8CHAR
: CPP_CHAR16
)
1864 terminator
= '>', type
= CPP_HEADER_NAME
;
1868 cppchar_t c
= *cur
++;
1870 /* In #include-style directives, terminators are not escapable. */
1871 if (c
== '\\' && !pfile
->state
.angled_headers
&& *cur
!= '\n')
1873 else if (c
== terminator
)
1878 /* Unmatched quotes always yield undefined behavior, but
1879 greedy lexing means that what appears to be an unterminated
1880 header name may actually be a legitimate sequence of tokens. */
1881 if (terminator
== '>')
1883 token
->type
= CPP_LESS
;
1893 if (saw_NUL
&& !pfile
->state
.skipping
)
1894 cpp_error (pfile
, CPP_DL_WARNING
,
1895 "null character(s) preserved in literal");
1897 if (type
== CPP_OTHER
&& CPP_OPTION (pfile
, lang
) != CLK_ASM
)
1898 cpp_error (pfile
, CPP_DL_PEDWARN
, "missing terminating %c character",
1901 if (CPP_OPTION (pfile
, user_literals
))
1903 /* If a string format macro, say from inttypes.h, is placed touching
1904 a string literal it could be parsed as a C++11 user-defined string
1905 literal thus breaking the program.
1906 Try to identify macros with is_macro. A warning is issued. */
1907 if (is_macro (pfile
, cur
))
1909 /* Raise a warning, but do not consume subsequent tokens. */
1910 if (CPP_OPTION (pfile
, warn_literal_suffix
) && !pfile
->state
.skipping
)
1911 cpp_warning_with_line (pfile
, CPP_W_LITERAL_SUFFIX
,
1913 "invalid suffix on literal; C++11 requires "
1914 "a space between literal and string macro");
1916 /* Grab user defined literal suffix. */
1917 else if (ISIDST (*cur
))
1919 type
= cpp_userdef_char_add_type (type
);
1920 type
= cpp_userdef_string_add_type (type
);
1923 while (ISIDNUM (*cur
))
1927 else if (CPP_OPTION (pfile
, cpp_warn_cxx11_compat
)
1928 && is_macro (pfile
, cur
)
1929 && !pfile
->state
.skipping
)
1930 cpp_warning_with_line (pfile
, CPP_W_CXX11_COMPAT
,
1931 token
->src_loc
, 0, "C++11 requires a space "
1932 "between string literal and macro");
1934 pfile
->buffer
->cur
= cur
;
1935 create_literal (pfile
, token
, base
, cur
- base
, type
);
1938 /* Return the comment table. The client may not make any assumption
1939 about the ordering of the table. */
1941 cpp_get_comments (cpp_reader
*pfile
)
1943 return &pfile
->comments
;
1946 /* Append a comment to the end of the comment table. */
1948 store_comment (cpp_reader
*pfile
, cpp_token
*token
)
1952 if (pfile
->comments
.allocated
== 0)
1954 pfile
->comments
.allocated
= 256;
1955 pfile
->comments
.entries
= (cpp_comment
*) xmalloc
1956 (pfile
->comments
.allocated
* sizeof (cpp_comment
));
1959 if (pfile
->comments
.count
== pfile
->comments
.allocated
)
1961 pfile
->comments
.allocated
*= 2;
1962 pfile
->comments
.entries
= (cpp_comment
*) xrealloc
1963 (pfile
->comments
.entries
,
1964 pfile
->comments
.allocated
* sizeof (cpp_comment
));
1967 len
= token
->val
.str
.len
;
1969 /* Copy comment. Note, token may not be NULL terminated. */
1970 pfile
->comments
.entries
[pfile
->comments
.count
].comment
=
1971 (char *) xmalloc (sizeof (char) * (len
+ 1));
1972 memcpy (pfile
->comments
.entries
[pfile
->comments
.count
].comment
,
1973 token
->val
.str
.text
, len
);
1974 pfile
->comments
.entries
[pfile
->comments
.count
].comment
[len
] = '\0';
1976 /* Set source location. */
1977 pfile
->comments
.entries
[pfile
->comments
.count
].sloc
= token
->src_loc
;
1979 /* Increment the count of entries in the comment table. */
1980 pfile
->comments
.count
++;
1983 /* The stored comment includes the comment start and any terminator. */
1985 save_comment (cpp_reader
*pfile
, cpp_token
*token
, const unsigned char *from
,
1988 unsigned char *buffer
;
1989 unsigned int len
, clen
, i
;
1991 len
= pfile
->buffer
->cur
- from
+ 1; /* + 1 for the initial '/'. */
1993 /* C++ comments probably (not definitely) have moved past a new
1994 line, which we don't want to save in the comment. */
1995 if (is_vspace (pfile
->buffer
->cur
[-1]))
1998 /* If we are currently in a directive or in argument parsing, then
1999 we need to store all C++ comments as C comments internally, and
2000 so we need to allocate a little extra space in that case.
2002 Note that the only time we encounter a directive here is
2003 when we are saving comments in a "#define". */
2004 clen
= ((pfile
->state
.in_directive
|| pfile
->state
.parsing_args
)
2005 && type
== '/') ? len
+ 2 : len
;
2007 buffer
= _cpp_unaligned_alloc (pfile
, clen
);
2009 token
->type
= CPP_COMMENT
;
2010 token
->val
.str
.len
= clen
;
2011 token
->val
.str
.text
= buffer
;
2014 memcpy (buffer
+ 1, from
, len
- 1);
2016 /* Finish conversion to a C comment, if necessary. */
2017 if ((pfile
->state
.in_directive
|| pfile
->state
.parsing_args
) && type
== '/')
2020 buffer
[clen
- 2] = '*';
2021 buffer
[clen
- 1] = '/';
2022 /* As there can be in a C++ comments illegal sequences for C comments
2023 we need to filter them out. */
2024 for (i
= 2; i
< (clen
- 2); i
++)
2025 if (buffer
[i
] == '/' && (buffer
[i
- 1] == '*' || buffer
[i
+ 1] == '*'))
2029 /* Finally store this comment for use by clients of libcpp. */
2030 store_comment (pfile
, token
);
2033 /* Allocate COUNT tokens for RUN. */
2035 _cpp_init_tokenrun (tokenrun
*run
, unsigned int count
)
2037 run
->base
= XNEWVEC (cpp_token
, count
);
2038 run
->limit
= run
->base
+ count
;
2042 /* Returns the next tokenrun, or creates one if there is none. */
2044 next_tokenrun (tokenrun
*run
)
2046 if (run
->next
== NULL
)
2048 run
->next
= XNEW (tokenrun
);
2049 run
->next
->prev
= run
;
2050 _cpp_init_tokenrun (run
->next
, 250);
2056 /* Return the number of not yet processed token in a given
2059 _cpp_remaining_tokens_num_in_context (cpp_context
*context
)
2061 if (context
->tokens_kind
== TOKENS_KIND_DIRECT
)
2062 return (LAST (context
).token
- FIRST (context
).token
);
2063 else if (context
->tokens_kind
== TOKENS_KIND_INDIRECT
2064 || context
->tokens_kind
== TOKENS_KIND_EXTENDED
)
2065 return (LAST (context
).ptoken
- FIRST (context
).ptoken
);
2070 /* Returns the token present at index INDEX in a given context. If
2071 INDEX is zero, the next token to be processed is returned. */
2072 static const cpp_token
*
2073 _cpp_token_from_context_at (cpp_context
*context
, int index
)
2075 if (context
->tokens_kind
== TOKENS_KIND_DIRECT
)
2076 return &(FIRST (context
).token
[index
]);
2077 else if (context
->tokens_kind
== TOKENS_KIND_INDIRECT
2078 || context
->tokens_kind
== TOKENS_KIND_EXTENDED
)
2079 return FIRST (context
).ptoken
[index
];
2084 /* Look ahead in the input stream. */
2086 cpp_peek_token (cpp_reader
*pfile
, int index
)
2088 cpp_context
*context
= pfile
->context
;
2089 const cpp_token
*peektok
;
2092 /* First, scan through any pending cpp_context objects. */
2093 while (context
->prev
)
2095 ptrdiff_t sz
= _cpp_remaining_tokens_num_in_context (context
);
2097 if (index
< (int) sz
)
2098 return _cpp_token_from_context_at (context
, index
);
2100 context
= context
->prev
;
2103 /* We will have to read some new tokens after all (and do so
2104 without invalidating preceding tokens). */
2106 pfile
->keep_tokens
++;
2108 /* For peeked tokens temporarily disable line_change reporting,
2109 until the tokens are parsed for real. */
2110 void (*line_change
) (cpp_reader
*, const cpp_token
*, int)
2111 = pfile
->cb
.line_change
;
2112 pfile
->cb
.line_change
= NULL
;
2116 peektok
= _cpp_lex_token (pfile
);
2117 if (peektok
->type
== CPP_EOF
)
2125 _cpp_backup_tokens_direct (pfile
, count
- index
);
2126 pfile
->keep_tokens
--;
2127 pfile
->cb
.line_change
= line_change
;
2132 /* Allocate a single token that is invalidated at the same time as the
2133 rest of the tokens on the line. Has its line and col set to the
2134 same as the last lexed token, so that diagnostics appear in the
2137 _cpp_temp_token (cpp_reader
*pfile
)
2139 cpp_token
*old
, *result
;
2140 ptrdiff_t sz
= pfile
->cur_run
->limit
- pfile
->cur_token
;
2141 ptrdiff_t la
= (ptrdiff_t) pfile
->lookaheads
;
2143 old
= pfile
->cur_token
- 1;
2144 /* Any pre-existing lookaheads must not be clobbered. */
2149 tokenrun
*next
= next_tokenrun (pfile
->cur_run
);
2152 memmove (next
->base
+ 1, next
->base
,
2153 (la
- sz
) * sizeof (cpp_token
));
2155 next
->base
[0] = pfile
->cur_run
->limit
[-1];
2159 memmove (pfile
->cur_token
+ 1, pfile
->cur_token
,
2160 MIN (la
, sz
- 1) * sizeof (cpp_token
));
2163 if (!sz
&& pfile
->cur_token
== pfile
->cur_run
->limit
)
2165 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
2166 pfile
->cur_token
= pfile
->cur_run
->base
;
2169 result
= pfile
->cur_token
++;
2170 result
->src_loc
= old
->src_loc
;
2174 /* Lex a token into RESULT (external interface). Takes care of issues
2175 like directive handling, token lookahead, multiple include
2176 optimization and skipping. */
2178 _cpp_lex_token (cpp_reader
*pfile
)
2184 if (pfile
->cur_token
== pfile
->cur_run
->limit
)
2186 pfile
->cur_run
= next_tokenrun (pfile
->cur_run
);
2187 pfile
->cur_token
= pfile
->cur_run
->base
;
2189 /* We assume that the current token is somewhere in the current
2191 if (pfile
->cur_token
< pfile
->cur_run
->base
2192 || pfile
->cur_token
>= pfile
->cur_run
->limit
)
2195 if (pfile
->lookaheads
)
2197 pfile
->lookaheads
--;
2198 result
= pfile
->cur_token
++;
2201 result
= _cpp_lex_direct (pfile
);
2203 if (result
->flags
& BOL
)
2205 /* Is this a directive. If _cpp_handle_directive returns
2206 false, it is an assembler #. */
2207 if (result
->type
== CPP_HASH
2208 /* 6.10.3 p 11: Directives in a list of macro arguments
2209 gives undefined behavior. This implementation
2210 handles the directive as normal. */
2211 && pfile
->state
.parsing_args
!= 1)
2213 if (_cpp_handle_directive (pfile
, result
->flags
& PREV_WHITE
))
2215 if (pfile
->directive_result
.type
== CPP_PADDING
)
2217 result
= &pfile
->directive_result
;
2220 else if (pfile
->state
.in_deferred_pragma
)
2221 result
= &pfile
->directive_result
;
2223 if (pfile
->cb
.line_change
&& !pfile
->state
.skipping
)
2224 pfile
->cb
.line_change (pfile
, result
, pfile
->state
.parsing_args
);
2227 /* We don't skip tokens in directives. */
2228 if (pfile
->state
.in_directive
|| pfile
->state
.in_deferred_pragma
)
2231 /* Outside a directive, invalidate controlling macros. At file
2232 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2233 get here and MI optimization works. */
2234 pfile
->mi_valid
= false;
2236 if (!pfile
->state
.skipping
|| result
->type
== CPP_EOF
)
2243 /* Returns true if a fresh line has been loaded. */
2245 _cpp_get_fresh_line (cpp_reader
*pfile
)
2249 /* We can't get a new line until we leave the current directive. */
2250 if (pfile
->state
.in_directive
)
2255 cpp_buffer
*buffer
= pfile
->buffer
;
2257 if (!buffer
->need_line
)
2260 if (buffer
->next_line
< buffer
->rlimit
)
2262 _cpp_clean_line (pfile
);
2266 /* First, get out of parsing arguments state. */
2267 if (pfile
->state
.parsing_args
)
2270 /* End of buffer. Non-empty files should end in a newline. */
2271 if (buffer
->buf
!= buffer
->rlimit
2272 && buffer
->next_line
> buffer
->rlimit
2273 && !buffer
->from_stage3
)
2275 /* Clip to buffer size. */
2276 buffer
->next_line
= buffer
->rlimit
;
2279 return_at_eof
= buffer
->return_at_eof
;
2280 _cpp_pop_buffer (pfile
);
2281 if (pfile
->buffer
== NULL
|| return_at_eof
)
2286 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
2289 result->type = ELSE_TYPE; \
2290 if (*buffer->cur == CHAR) \
2291 buffer->cur++, result->type = THEN_TYPE; \
2295 /* Lex a token into pfile->cur_token, which is also incremented, to
2296 get diagnostics pointing to the correct location.
2298 Does not handle issues such as token lookahead, multiple-include
2299 optimization, directives, skipping etc. This function is only
2300 suitable for use by _cpp_lex_token, and in special cases like
2301 lex_expansion_token which doesn't care for any of these issues.
2303 When meeting a newline, returns CPP_EOF if parsing a directive,
2304 otherwise returns to the start of the token buffer if permissible.
2305 Returns the location of the lexed token. */
2307 _cpp_lex_direct (cpp_reader
*pfile
)
2311 const unsigned char *comment_start
;
2312 cpp_token
*result
= pfile
->cur_token
++;
2316 buffer
= pfile
->buffer
;
2317 if (buffer
->need_line
)
2319 if (pfile
->state
.in_deferred_pragma
)
2321 result
->type
= CPP_PRAGMA_EOL
;
2322 pfile
->state
.in_deferred_pragma
= false;
2323 if (!pfile
->state
.pragma_allow_expansion
)
2324 pfile
->state
.prevent_expansion
--;
2327 if (!_cpp_get_fresh_line (pfile
))
2329 result
->type
= CPP_EOF
;
2330 if (!pfile
->state
.in_directive
)
2332 /* Tell the compiler the line number of the EOF token. */
2333 result
->src_loc
= pfile
->line_table
->highest_line
;
2334 result
->flags
= BOL
;
2338 if (!pfile
->keep_tokens
)
2340 pfile
->cur_run
= &pfile
->base_run
;
2341 result
= pfile
->base_run
.base
;
2342 pfile
->cur_token
= result
+ 1;
2344 result
->flags
= BOL
;
2345 if (pfile
->state
.parsing_args
== 2)
2346 result
->flags
|= PREV_WHITE
;
2348 buffer
= pfile
->buffer
;
2350 result
->src_loc
= pfile
->line_table
->highest_line
;
2353 if (buffer
->cur
>= buffer
->notes
[buffer
->cur_note
].pos
2354 && !pfile
->overlaid_buffer
)
2356 _cpp_process_line_notes (pfile
, false);
2357 result
->src_loc
= pfile
->line_table
->highest_line
;
2361 if (pfile
->forced_token_location_p
)
2362 result
->src_loc
= *pfile
->forced_token_location_p
;
2364 result
->src_loc
= linemap_position_for_column (pfile
->line_table
,
2365 CPP_BUF_COLUMN (buffer
, buffer
->cur
));
2369 case ' ': case '\t': case '\f': case '\v': case '\0':
2370 result
->flags
|= PREV_WHITE
;
2371 skip_whitespace (pfile
, c
);
2375 if (buffer
->cur
< buffer
->rlimit
)
2376 CPP_INCREMENT_LINE (pfile
, 0);
2377 buffer
->need_line
= true;
2380 case '0': case '1': case '2': case '3': case '4':
2381 case '5': case '6': case '7': case '8': case '9':
2383 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
2384 result
->type
= CPP_NUMBER
;
2385 lex_number (pfile
, &result
->val
.str
, &nst
);
2386 warn_about_normalization (pfile
, result
, &nst
);
2394 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2395 wide strings or raw strings. */
2396 if (c
== 'L' || CPP_OPTION (pfile
, rliterals
)
2397 || (c
!= 'R' && CPP_OPTION (pfile
, uliterals
)))
2399 if ((*buffer
->cur
== '\'' && c
!= 'R')
2400 || *buffer
->cur
== '"'
2401 || (*buffer
->cur
== 'R'
2403 && buffer
->cur
[1] == '"'
2404 && CPP_OPTION (pfile
, rliterals
))
2405 || (*buffer
->cur
== '8'
2407 && ((buffer
->cur
[1] == '"' || (buffer
->cur
[1] == '\''
2408 && CPP_OPTION (pfile
, utf8_char_literals
)))
2409 || (buffer
->cur
[1] == 'R' && buffer
->cur
[2] == '"'
2410 && CPP_OPTION (pfile
, rliterals
)))))
2412 lex_string (pfile
, result
, buffer
->cur
- 1);
2419 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2420 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2421 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2422 case 's': case 't': case 'v': case 'w': case 'x':
2424 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2425 case 'G': case 'H': case 'I': case 'J': case 'K':
2426 case 'M': case 'N': case 'O': case 'P': case 'Q':
2427 case 'S': case 'T': case 'V': case 'W': case 'X':
2429 result
->type
= CPP_NAME
;
2431 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
2432 result
->val
.node
.node
= lex_identifier (pfile
, buffer
->cur
- 1, false,
2434 &result
->val
.node
.spelling
);
2435 warn_about_normalization (pfile
, result
, &nst
);
2438 /* Convert named operators to their proper types. */
2439 if (result
->val
.node
.node
->flags
& NODE_OPERATOR
)
2441 result
->flags
|= NAMED_OP
;
2442 result
->type
= (enum cpp_ttype
) result
->val
.node
.node
->directive_index
;
2448 lex_string (pfile
, result
, buffer
->cur
- 1);
2452 /* A potential block or line comment. */
2453 comment_start
= buffer
->cur
;
2458 if (_cpp_skip_block_comment (pfile
))
2459 cpp_error (pfile
, CPP_DL_ERROR
, "unterminated comment");
2461 else if (c
== '/' && ! CPP_OPTION (pfile
, traditional
))
2463 /* Don't warn for system headers. */
2464 if (cpp_in_system_header (pfile
))
2466 /* Warn about comments if pedantically GNUC89, and not
2467 in system headers. */
2468 else if (CPP_OPTION (pfile
, lang
) == CLK_GNUC89
2469 && CPP_PEDANTIC (pfile
)
2470 && ! buffer
->warned_cplusplus_comments
)
2472 cpp_error (pfile
, CPP_DL_PEDWARN
,
2473 "C++ style comments are not allowed in ISO C90");
2474 cpp_error (pfile
, CPP_DL_PEDWARN
,
2475 "(this will be reported only once per input file)");
2476 buffer
->warned_cplusplus_comments
= 1;
2478 /* Or if specifically desired via -Wc90-c99-compat. */
2479 else if (CPP_OPTION (pfile
, cpp_warn_c90_c99_compat
) > 0
2480 && ! CPP_OPTION (pfile
, cplusplus
)
2481 && ! buffer
->warned_cplusplus_comments
)
2483 cpp_error (pfile
, CPP_DL_WARNING
,
2484 "C++ style comments are incompatible with C90");
2485 cpp_error (pfile
, CPP_DL_WARNING
,
2486 "(this will be reported only once per input file)");
2487 buffer
->warned_cplusplus_comments
= 1;
2489 /* In C89/C94, C++ style comments are forbidden. */
2490 else if ((CPP_OPTION (pfile
, lang
) == CLK_STDC89
2491 || CPP_OPTION (pfile
, lang
) == CLK_STDC94
))
2493 /* But don't be confused about valid code such as
2494 - // immediately followed by *,
2495 - // in a preprocessing directive,
2496 - // in an #if 0 block. */
2497 if (buffer
->cur
[1] == '*'
2498 || pfile
->state
.in_directive
2499 || pfile
->state
.skipping
)
2501 result
->type
= CPP_DIV
;
2504 else if (! buffer
->warned_cplusplus_comments
)
2506 cpp_error (pfile
, CPP_DL_ERROR
,
2507 "C++ style comments are not allowed in ISO C90");
2508 cpp_error (pfile
, CPP_DL_ERROR
,
2509 "(this will be reported only once per input "
2511 buffer
->warned_cplusplus_comments
= 1;
2514 if (skip_line_comment (pfile
) && CPP_OPTION (pfile
, warn_comments
))
2515 cpp_warning (pfile
, CPP_W_COMMENTS
, "multi-line comment");
2520 result
->type
= CPP_DIV_EQ
;
2525 result
->type
= CPP_DIV
;
2529 if (!pfile
->state
.save_comments
)
2531 result
->flags
|= PREV_WHITE
;
2532 goto update_tokens_line
;
2535 /* Save the comment as a token in its own right. */
2536 save_comment (pfile
, result
, comment_start
, c
);
2540 if (pfile
->state
.angled_headers
)
2542 lex_string (pfile
, result
, buffer
->cur
- 1);
2543 if (result
->type
!= CPP_LESS
)
2547 result
->type
= CPP_LESS
;
2548 if (*buffer
->cur
== '=')
2549 buffer
->cur
++, result
->type
= CPP_LESS_EQ
;
2550 else if (*buffer
->cur
== '<')
2553 IF_NEXT_IS ('=', CPP_LSHIFT_EQ
, CPP_LSHIFT
);
2555 else if (CPP_OPTION (pfile
, digraphs
))
2557 if (*buffer
->cur
== ':')
2559 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2560 three characters are <:: and the subsequent character
2561 is neither : nor >, the < is treated as a preprocessor
2562 token by itself". */
2563 if (CPP_OPTION (pfile
, cplusplus
)
2564 && CPP_OPTION (pfile
, lang
) != CLK_CXX98
2565 && CPP_OPTION (pfile
, lang
) != CLK_GNUCXX
2566 && buffer
->cur
[1] == ':'
2567 && buffer
->cur
[2] != ':' && buffer
->cur
[2] != '>')
2571 result
->flags
|= DIGRAPH
;
2572 result
->type
= CPP_OPEN_SQUARE
;
2574 else if (*buffer
->cur
== '%')
2577 result
->flags
|= DIGRAPH
;
2578 result
->type
= CPP_OPEN_BRACE
;
2584 result
->type
= CPP_GREATER
;
2585 if (*buffer
->cur
== '=')
2586 buffer
->cur
++, result
->type
= CPP_GREATER_EQ
;
2587 else if (*buffer
->cur
== '>')
2590 IF_NEXT_IS ('=', CPP_RSHIFT_EQ
, CPP_RSHIFT
);
2595 result
->type
= CPP_MOD
;
2596 if (*buffer
->cur
== '=')
2597 buffer
->cur
++, result
->type
= CPP_MOD_EQ
;
2598 else if (CPP_OPTION (pfile
, digraphs
))
2600 if (*buffer
->cur
== ':')
2603 result
->flags
|= DIGRAPH
;
2604 result
->type
= CPP_HASH
;
2605 if (*buffer
->cur
== '%' && buffer
->cur
[1] == ':')
2606 buffer
->cur
+= 2, result
->type
= CPP_PASTE
, result
->val
.token_no
= 0;
2608 else if (*buffer
->cur
== '>')
2611 result
->flags
|= DIGRAPH
;
2612 result
->type
= CPP_CLOSE_BRACE
;
2618 result
->type
= CPP_DOT
;
2619 if (ISDIGIT (*buffer
->cur
))
2621 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
2622 result
->type
= CPP_NUMBER
;
2623 lex_number (pfile
, &result
->val
.str
, &nst
);
2624 warn_about_normalization (pfile
, result
, &nst
);
2626 else if (*buffer
->cur
== '.' && buffer
->cur
[1] == '.')
2627 buffer
->cur
+= 2, result
->type
= CPP_ELLIPSIS
;
2628 else if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
2629 buffer
->cur
++, result
->type
= CPP_DOT_STAR
;
2633 result
->type
= CPP_PLUS
;
2634 if (*buffer
->cur
== '+')
2635 buffer
->cur
++, result
->type
= CPP_PLUS_PLUS
;
2636 else if (*buffer
->cur
== '=')
2637 buffer
->cur
++, result
->type
= CPP_PLUS_EQ
;
2641 result
->type
= CPP_MINUS
;
2642 if (*buffer
->cur
== '>')
2645 result
->type
= CPP_DEREF
;
2646 if (*buffer
->cur
== '*' && CPP_OPTION (pfile
, cplusplus
))
2647 buffer
->cur
++, result
->type
= CPP_DEREF_STAR
;
2649 else if (*buffer
->cur
== '-')
2650 buffer
->cur
++, result
->type
= CPP_MINUS_MINUS
;
2651 else if (*buffer
->cur
== '=')
2652 buffer
->cur
++, result
->type
= CPP_MINUS_EQ
;
2656 result
->type
= CPP_AND
;
2657 if (*buffer
->cur
== '&')
2658 buffer
->cur
++, result
->type
= CPP_AND_AND
;
2659 else if (*buffer
->cur
== '=')
2660 buffer
->cur
++, result
->type
= CPP_AND_EQ
;
2664 result
->type
= CPP_OR
;
2665 if (*buffer
->cur
== '|')
2666 buffer
->cur
++, result
->type
= CPP_OR_OR
;
2667 else if (*buffer
->cur
== '=')
2668 buffer
->cur
++, result
->type
= CPP_OR_EQ
;
2672 result
->type
= CPP_COLON
;
2673 if (*buffer
->cur
== ':' && CPP_OPTION (pfile
, cplusplus
))
2674 buffer
->cur
++, result
->type
= CPP_SCOPE
;
2675 else if (*buffer
->cur
== '>' && CPP_OPTION (pfile
, digraphs
))
2678 result
->flags
|= DIGRAPH
;
2679 result
->type
= CPP_CLOSE_SQUARE
;
2683 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ
, CPP_MULT
); break;
2684 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ
, CPP_EQ
); break;
2685 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ
, CPP_NOT
); break;
2686 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ
, CPP_XOR
); break;
2687 case '#': IF_NEXT_IS ('#', CPP_PASTE
, CPP_HASH
); result
->val
.token_no
= 0; break;
2689 case '?': result
->type
= CPP_QUERY
; break;
2690 case '~': result
->type
= CPP_COMPL
; break;
2691 case ',': result
->type
= CPP_COMMA
; break;
2692 case '(': result
->type
= CPP_OPEN_PAREN
; break;
2693 case ')': result
->type
= CPP_CLOSE_PAREN
; break;
2694 case '[': result
->type
= CPP_OPEN_SQUARE
; break;
2695 case ']': result
->type
= CPP_CLOSE_SQUARE
; break;
2696 case '{': result
->type
= CPP_OPEN_BRACE
; break;
2697 case '}': result
->type
= CPP_CLOSE_BRACE
; break;
2698 case ';': result
->type
= CPP_SEMICOLON
; break;
2700 /* @ is a punctuator in Objective-C. */
2701 case '@': result
->type
= CPP_ATSIGN
; break;
2706 const uchar
*base
= --buffer
->cur
;
2707 struct normalize_state nst
= INITIAL_NORMALIZE_STATE
;
2709 if (forms_identifier_p (pfile
, true, &nst
))
2711 result
->type
= CPP_NAME
;
2712 result
->val
.node
.node
= lex_identifier (pfile
, base
, true, &nst
,
2713 &result
->val
.node
.spelling
);
2714 warn_about_normalization (pfile
, result
, &nst
);
2721 create_literal (pfile
, result
, buffer
->cur
- 1, 1, CPP_OTHER
);
2728 /* An upper bound on the number of bytes needed to spell TOKEN.
2729 Does not include preceding whitespace. */
2731 cpp_token_len (const cpp_token
*token
)
2735 switch (TOKEN_SPELL (token
))
2737 default: len
= 6; break;
2738 case SPELL_LITERAL
: len
= token
->val
.str
.len
; break;
2739 case SPELL_IDENT
: len
= NODE_LEN (token
->val
.node
.node
) * 10; break;
2745 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2746 Return the number of bytes read out of NAME. (There are always
2747 10 bytes written to BUFFER.) */
2750 utf8_to_ucn (unsigned char *buffer
, const unsigned char *name
)
2756 unsigned long utf32
;
2758 /* Compute the length of the UTF-8 sequence. */
2759 for (t
= *name
; t
& 0x80; t
<<= 1)
2762 utf32
= *name
& (0x7F >> ucn_len
);
2763 for (ucn_len_c
= 1; ucn_len_c
< ucn_len
; ucn_len_c
++)
2765 utf32
= (utf32
<< 6) | (*++name
& 0x3F);
2767 /* Ill-formed UTF-8. */
2768 if ((*name
& ~0x3F) != 0x80)
2774 for (j
= 7; j
>= 0; j
--)
2775 *buffer
++ = "0123456789abcdef"[(utf32
>> (4 * j
)) & 0xF];
2779 /* Given a token TYPE corresponding to a digraph, return a pointer to
2780 the spelling of the digraph. */
2781 static const unsigned char *
2782 cpp_digraph2name (enum cpp_ttype type
)
2784 return digraph_spellings
[(int) type
- (int) CPP_FIRST_DIGRAPH
];
2787 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
2788 The buffer must already contain the enough space to hold the
2789 token's spelling. Returns a pointer to the character after the
2790 last character written. */
2792 _cpp_spell_ident_ucns (unsigned char *buffer
, cpp_hashnode
*ident
)
2795 const unsigned char *name
= NODE_NAME (ident
);
2797 for (i
= 0; i
< NODE_LEN (ident
); i
++)
2798 if (name
[i
] & ~0x7F)
2800 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
2804 *buffer
++ = name
[i
];
2809 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
2810 already contain the enough space to hold the token's spelling.
2811 Returns a pointer to the character after the last character written.
2812 FORSTRING is true if this is to be the spelling after translation
2813 phase 1 (with the original spelling of extended identifiers), false
2814 if extended identifiers should always be written using UCNs (there is
2815 no option for always writing them in the internal UTF-8 form).
2816 FIXME: Would be nice if we didn't need the PFILE argument. */
2818 cpp_spell_token (cpp_reader
*pfile
, const cpp_token
*token
,
2819 unsigned char *buffer
, bool forstring
)
2821 switch (TOKEN_SPELL (token
))
2823 case SPELL_OPERATOR
:
2825 const unsigned char *spelling
;
2828 if (token
->flags
& DIGRAPH
)
2829 spelling
= cpp_digraph2name (token
->type
);
2830 else if (token
->flags
& NAMED_OP
)
2833 spelling
= TOKEN_NAME (token
);
2835 while ((c
= *spelling
++) != '\0')
2844 memcpy (buffer
, NODE_NAME (token
->val
.node
.spelling
),
2845 NODE_LEN (token
->val
.node
.spelling
));
2846 buffer
+= NODE_LEN (token
->val
.node
.spelling
);
2849 buffer
= _cpp_spell_ident_ucns (buffer
, token
->val
.node
.node
);
2853 memcpy (buffer
, token
->val
.str
.text
, token
->val
.str
.len
);
2854 buffer
+= token
->val
.str
.len
;
2858 cpp_error (pfile
, CPP_DL_ICE
,
2859 "unspellable token %s", TOKEN_NAME (token
));
2866 /* Returns TOKEN spelt as a null-terminated string. The string is
2867 freed when the reader is destroyed. Useful for diagnostics. */
2869 cpp_token_as_text (cpp_reader
*pfile
, const cpp_token
*token
)
2871 unsigned int len
= cpp_token_len (token
) + 1;
2872 unsigned char *start
= _cpp_unaligned_alloc (pfile
, len
), *end
;
2874 end
= cpp_spell_token (pfile
, token
, start
, false);
2880 /* Returns a pointer to a string which spells the token defined by
2881 TYPE and FLAGS. Used by C front ends, which really should move to
2882 using cpp_token_as_text. */
2884 cpp_type2name (enum cpp_ttype type
, unsigned char flags
)
2886 if (flags
& DIGRAPH
)
2887 return (const char *) cpp_digraph2name (type
);
2888 else if (flags
& NAMED_OP
)
2889 return cpp_named_operator2name (type
);
2891 return (const char *) token_spellings
[type
].name
;
2894 /* Writes the spelling of token to FP, without any preceding space.
2895 Separated from cpp_spell_token for efficiency - to avoid stdio
2896 double-buffering. */
2898 cpp_output_token (const cpp_token
*token
, FILE *fp
)
2900 switch (TOKEN_SPELL (token
))
2902 case SPELL_OPERATOR
:
2904 const unsigned char *spelling
;
2907 if (token
->flags
& DIGRAPH
)
2908 spelling
= cpp_digraph2name (token
->type
);
2909 else if (token
->flags
& NAMED_OP
)
2912 spelling
= TOKEN_NAME (token
);
2917 while ((c
= *++spelling
) != '\0');
2925 const unsigned char * name
= NODE_NAME (token
->val
.node
.node
);
2927 for (i
= 0; i
< NODE_LEN (token
->val
.node
.node
); i
++)
2928 if (name
[i
] & ~0x7F)
2930 unsigned char buffer
[10];
2931 i
+= utf8_to_ucn (buffer
, name
+ i
) - 1;
2932 fwrite (buffer
, 1, 10, fp
);
2935 fputc (NODE_NAME (token
->val
.node
.node
)[i
], fp
);
2940 fwrite (token
->val
.str
.text
, 1, token
->val
.str
.len
, fp
);
2944 /* An error, most probably. */
2949 /* Compare two tokens. */
2951 _cpp_equiv_tokens (const cpp_token
*a
, const cpp_token
*b
)
2953 if (a
->type
== b
->type
&& a
->flags
== b
->flags
)
2954 switch (TOKEN_SPELL (a
))
2956 default: /* Keep compiler happy. */
2957 case SPELL_OPERATOR
:
2958 /* token_no is used to track where multiple consecutive ##
2959 tokens were originally located. */
2960 return (a
->type
!= CPP_PASTE
|| a
->val
.token_no
== b
->val
.token_no
);
2962 return (a
->type
!= CPP_MACRO_ARG
2963 || (a
->val
.macro_arg
.arg_no
== b
->val
.macro_arg
.arg_no
2964 && a
->val
.macro_arg
.spelling
== b
->val
.macro_arg
.spelling
));
2966 return (a
->val
.node
.node
== b
->val
.node
.node
2967 && a
->val
.node
.spelling
== b
->val
.node
.spelling
);
2969 return (a
->val
.str
.len
== b
->val
.str
.len
2970 && !memcmp (a
->val
.str
.text
, b
->val
.str
.text
,
2977 /* Returns nonzero if a space should be inserted to avoid an
2978 accidental token paste for output. For simplicity, it is
2979 conservative, and occasionally advises a space where one is not
2980 needed, e.g. "." and ".2". */
2982 cpp_avoid_paste (cpp_reader
*pfile
, const cpp_token
*token1
,
2983 const cpp_token
*token2
)
2985 enum cpp_ttype a
= token1
->type
, b
= token2
->type
;
2988 if (token1
->flags
& NAMED_OP
)
2990 if (token2
->flags
& NAMED_OP
)
2994 if (token2
->flags
& DIGRAPH
)
2995 c
= digraph_spellings
[(int) b
- (int) CPP_FIRST_DIGRAPH
][0];
2996 else if (token_spellings
[b
].category
== SPELL_OPERATOR
)
2997 c
= token_spellings
[b
].name
[0];
2999 /* Quickly get everything that can paste with an '='. */
3000 if ((int) a
<= (int) CPP_LAST_EQ
&& c
== '=')
3005 case CPP_GREATER
: return c
== '>';
3006 case CPP_LESS
: return c
== '<' || c
== '%' || c
== ':';
3007 case CPP_PLUS
: return c
== '+';
3008 case CPP_MINUS
: return c
== '-' || c
== '>';
3009 case CPP_DIV
: return c
== '/' || c
== '*'; /* Comments. */
3010 case CPP_MOD
: return c
== ':' || c
== '>';
3011 case CPP_AND
: return c
== '&';
3012 case CPP_OR
: return c
== '|';
3013 case CPP_COLON
: return c
== ':' || c
== '>';
3014 case CPP_DEREF
: return c
== '*';
3015 case CPP_DOT
: return c
== '.' || c
== '%' || b
== CPP_NUMBER
;
3016 case CPP_HASH
: return c
== '#' || c
== '%'; /* Digraph form. */
3017 case CPP_NAME
: return ((b
== CPP_NUMBER
3018 && name_p (pfile
, &token2
->val
.str
))
3020 || b
== CPP_CHAR
|| b
== CPP_STRING
); /* L */
3021 case CPP_NUMBER
: return (b
== CPP_NUMBER
|| b
== CPP_NAME
3022 || c
== '.' || c
== '+' || c
== '-');
3024 case CPP_OTHER
: return ((token1
->val
.str
.text
[0] == '\\'
3026 || (CPP_OPTION (pfile
, objc
)
3027 && token1
->val
.str
.text
[0] == '@'
3028 && (b
== CPP_NAME
|| b
== CPP_STRING
)));
3031 case CPP_UTF8STRING
:
3033 case CPP_STRING32
: return (CPP_OPTION (pfile
, user_literals
)
3035 || (TOKEN_SPELL (token2
) == SPELL_LITERAL
3036 && ISIDST (token2
->val
.str
.text
[0]))));
3044 /* Output all the remaining tokens on the current line, and a newline
3045 character, to FP. Leading whitespace is removed. If there are
3046 macros, special token padding is not performed. */
3048 cpp_output_line (cpp_reader
*pfile
, FILE *fp
)
3050 const cpp_token
*token
;
3052 token
= cpp_get_token (pfile
);
3053 while (token
->type
!= CPP_EOF
)
3055 cpp_output_token (token
, fp
);
3056 token
= cpp_get_token (pfile
);
3057 if (token
->flags
& PREV_WHITE
)
3064 /* Return a string representation of all the remaining tokens on the
3065 current line. The result is allocated using xmalloc and must be
3066 freed by the caller. */
3068 cpp_output_line_to_string (cpp_reader
*pfile
, const unsigned char *dir_name
)
3070 const cpp_token
*token
;
3071 unsigned int out
= dir_name
? ustrlen (dir_name
) : 0;
3072 unsigned int alloced
= 120 + out
;
3073 unsigned char *result
= (unsigned char *) xmalloc (alloced
);
3075 /* If DIR_NAME is empty, there are no initial contents. */
3078 sprintf ((char *) result
, "#%s ", dir_name
);
3082 token
= cpp_get_token (pfile
);
3083 while (token
->type
!= CPP_EOF
)
3085 unsigned char *last
;
3086 /* Include room for a possible space and the terminating nul. */
3087 unsigned int len
= cpp_token_len (token
) + 2;
3089 if (out
+ len
> alloced
)
3092 if (out
+ len
> alloced
)
3093 alloced
= out
+ len
;
3094 result
= (unsigned char *) xrealloc (result
, alloced
);
3097 last
= cpp_spell_token (pfile
, token
, &result
[out
], 0);
3098 out
= last
- result
;
3100 token
= cpp_get_token (pfile
);
3101 if (token
->flags
& PREV_WHITE
)
3102 result
[out
++] = ' ';
3109 /* Memory buffers. Changing these three constants can have a dramatic
3110 effect on performance. The values here are reasonable defaults,
3111 but might be tuned. If you adjust them, be sure to test across a
3112 range of uses of cpplib, including heavy nested function-like macro
3113 expansion. Also check the change in peak memory usage (NJAMD is a
3114 good tool for this). */
3115 #define MIN_BUFF_SIZE 8000
3116 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3117 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3118 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3120 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3121 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3124 /* Create a new allocation buffer. Place the control block at the end
3125 of the buffer, so that buffer overflows will cause immediate chaos. */
3127 new_buff (size_t len
)
3130 unsigned char *base
;
3132 if (len
< MIN_BUFF_SIZE
)
3133 len
= MIN_BUFF_SIZE
;
3134 len
= CPP_ALIGN (len
);
3136 #ifdef ENABLE_VALGRIND_CHECKING
3137 /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3139 size_t slen
= CPP_ALIGN2 (sizeof (_cpp_buff
), 2 * DEFAULT_ALIGNMENT
);
3140 base
= XNEWVEC (unsigned char, len
+ slen
);
3141 result
= (_cpp_buff
*) base
;
3144 base
= XNEWVEC (unsigned char, len
+ sizeof (_cpp_buff
));
3145 result
= (_cpp_buff
*) (base
+ len
);
3147 result
->base
= base
;
3149 result
->limit
= base
+ len
;
3150 result
->next
= NULL
;
3154 /* Place a chain of unwanted allocation buffers on the free list. */
3156 _cpp_release_buff (cpp_reader
*pfile
, _cpp_buff
*buff
)
3158 _cpp_buff
*end
= buff
;
3162 end
->next
= pfile
->free_buffs
;
3163 pfile
->free_buffs
= buff
;
3166 /* Return a free buffer of size at least MIN_SIZE. */
3168 _cpp_get_buff (cpp_reader
*pfile
, size_t min_size
)
3170 _cpp_buff
*result
, **p
;
3172 for (p
= &pfile
->free_buffs
;; p
= &(*p
)->next
)
3177 return new_buff (min_size
);
3179 size
= result
->limit
- result
->base
;
3180 /* Return a buffer that's big enough, but don't waste one that's
3182 if (size
>= min_size
&& size
<= BUFF_SIZE_UPPER_BOUND (min_size
))
3187 result
->next
= NULL
;
3188 result
->cur
= result
->base
;
3192 /* Creates a new buffer with enough space to hold the uncommitted
3193 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
3194 the excess bytes to the new buffer. Chains the new buffer after
3195 BUFF, and returns the new buffer. */
3197 _cpp_append_extend_buff (cpp_reader
*pfile
, _cpp_buff
*buff
, size_t min_extra
)
3199 size_t size
= EXTENDED_BUFF_SIZE (buff
, min_extra
);
3200 _cpp_buff
*new_buff
= _cpp_get_buff (pfile
, size
);
3202 buff
->next
= new_buff
;
3203 memcpy (new_buff
->base
, buff
->cur
, BUFF_ROOM (buff
));
3207 /* Creates a new buffer with enough space to hold the uncommitted
3208 remaining bytes of the buffer pointed to by BUFF, and at least
3209 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
3210 Chains the new buffer before the buffer pointed to by BUFF, and
3211 updates the pointer to point to the new buffer. */
3213 _cpp_extend_buff (cpp_reader
*pfile
, _cpp_buff
**pbuff
, size_t min_extra
)
3215 _cpp_buff
*new_buff
, *old_buff
= *pbuff
;
3216 size_t size
= EXTENDED_BUFF_SIZE (old_buff
, min_extra
);
3218 new_buff
= _cpp_get_buff (pfile
, size
);
3219 memcpy (new_buff
->base
, old_buff
->cur
, BUFF_ROOM (old_buff
));
3220 new_buff
->next
= old_buff
;
3224 /* Free a chain of buffers starting at BUFF. */
3226 _cpp_free_buff (_cpp_buff
*buff
)
3230 for (; buff
; buff
= next
)
3233 #ifdef ENABLE_VALGRIND_CHECKING
3241 /* Allocate permanent, unaligned storage of length LEN. */
3243 _cpp_unaligned_alloc (cpp_reader
*pfile
, size_t len
)
3245 _cpp_buff
*buff
= pfile
->u_buff
;
3246 unsigned char *result
= buff
->cur
;
3248 if (len
> (size_t) (buff
->limit
- result
))
3250 buff
= _cpp_get_buff (pfile
, len
);
3251 buff
->next
= pfile
->u_buff
;
3252 pfile
->u_buff
= buff
;
3256 buff
->cur
= result
+ len
;
3260 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3261 That buffer is used for growing allocations when saving macro
3262 replacement lists in a #define, and when parsing an answer to an
3263 assertion in #assert, #unassert or #if (and therefore possibly
3264 whilst expanding macros). It therefore must not be used by any
3265 code that they might call: specifically the lexer and the guts of
3268 All existing other uses clearly fit this restriction: storing
3269 registered pragmas during initialization. */
3271 _cpp_aligned_alloc (cpp_reader
*pfile
, size_t len
)
3273 _cpp_buff
*buff
= pfile
->a_buff
;
3274 unsigned char *result
= buff
->cur
;
3276 if (len
> (size_t) (buff
->limit
- result
))
3278 buff
= _cpp_get_buff (pfile
, len
);
3279 buff
->next
= pfile
->a_buff
;
3280 pfile
->a_buff
= buff
;
3284 buff
->cur
= result
+ len
;
3288 /* Say which field of TOK is in use. */
3290 enum cpp_token_fld_kind
3291 cpp_token_val_index (const cpp_token
*tok
)
3293 switch (TOKEN_SPELL (tok
))
3296 return CPP_TOKEN_FLD_NODE
;
3298 return CPP_TOKEN_FLD_STR
;
3299 case SPELL_OPERATOR
:
3300 if (tok
->type
== CPP_PASTE
)
3301 return CPP_TOKEN_FLD_TOKEN_NO
;
3303 return CPP_TOKEN_FLD_NONE
;
3305 if (tok
->type
== CPP_MACRO_ARG
)
3306 return CPP_TOKEN_FLD_ARG_NO
;
3307 else if (tok
->type
== CPP_PADDING
)
3308 return CPP_TOKEN_FLD_SOURCE
;
3309 else if (tok
->type
== CPP_PRAGMA
)
3310 return CPP_TOKEN_FLD_PRAGMA
;
3311 /* else fall through */
3313 return CPP_TOKEN_FLD_NONE
;
3317 /* All tokens lexed in R after calling this function will be forced to have
3318 their source_location the same as the location referenced by P, until
3319 cpp_stop_forcing_token_locations is called for R. */
3322 cpp_force_token_locations (cpp_reader
*r
, source_location
*p
)
3324 r
->forced_token_location_p
= p
;
3327 /* Go back to assigning locations naturally for lexed tokens. */
3330 cpp_stop_forcing_token_locations (cpp_reader
*r
)
3332 r
->forced_token_location_p
= NULL
;