]> git.ipfire.org Git - thirdparty/gcc.git/blob - libcpp/lex.c
Implement N4197 - Adding u8 character literals
[thirdparty/gcc.git] / libcpp / lex.c
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000-2015 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
26
27 enum spell_type
28 {
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
33 };
34
35 struct token_spelling
36 {
37 enum spell_type category;
38 const unsigned char *name;
39 };
40
41 static const unsigned char *const digraph_spellings[] =
42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
43
44 #define OP(e, s) { SPELL_OPERATOR, UC s },
45 #define TK(e, s) { SPELL_ ## s, UC #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
49
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void store_comment (cpp_reader *, cpp_token *);
59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 unsigned int, enum cpp_ttype);
61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62 static int name_p (cpp_reader *, const cpp_string *);
63 static tokenrun *next_tokenrun (tokenrun *);
64
65 static _cpp_buff *new_buff (size_t);
66
67
68 /* Utility routine:
69
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
72 int
73 cpp_ideq (const cpp_token *token, const char *string)
74 {
75 if (token->type != CPP_NAME)
76 return 0;
77
78 return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
79 }
80
81 /* Record a note TYPE at byte POS into the current cleaned logical
82 line. */
83 static void
84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
85 {
86 if (buffer->notes_used == buffer->notes_cap)
87 {
88 buffer->notes_cap = buffer->notes_cap * 2 + 200;
89 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90 buffer->notes_cap);
91 }
92
93 buffer->notes[buffer->notes_used].pos = pos;
94 buffer->notes[buffer->notes_used].type = type;
95 buffer->notes_used++;
96 }
97
98 \f
99 /* Fast path to find line special characters using optimized character
100 scanning algorithms. Anything complicated falls back to the slow
101 path below. Since this loop is very hot it's worth doing these kinds
102 of optimizations.
103
104 One of the paths through the ifdefs should provide
105
106 const uchar *search_line_fast (const uchar *s, const uchar *end);
107
108 Between S and END, search for \n, \r, \\, ?. Return a pointer to
109 the found character.
110
111 Note that the last character of the buffer is *always* a newline,
112 as forced by _cpp_convert_input. This fact can be used to avoid
113 explicitly looking for the end of the buffer. */
114
115 /* Configure gives us an ifdef test. */
116 #ifndef WORDS_BIGENDIAN
117 #define WORDS_BIGENDIAN 0
118 #endif
119
120 /* We'd like the largest integer that fits into a register. There's nothing
121 in <stdint.h> that gives us that. For most hosts this is unsigned long,
122 but MS decided on an LLP64 model. Thankfully when building with GCC we
123 can get the "real" word size. */
124 #ifdef __GNUC__
125 typedef unsigned int word_type __attribute__((__mode__(__word__)));
126 #else
127 typedef unsigned long word_type;
128 #endif
129
130 /* The code below is only expecting sizes 4 or 8.
131 Die at compile-time if this expectation is violated. */
132 typedef char check_word_type_size
133 [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
134
135 /* Return X with the first N bytes forced to values that won't match one
136 of the interesting characters. Note that NUL is not interesting. */
137
138 static inline word_type
139 acc_char_mask_misalign (word_type val, unsigned int n)
140 {
141 word_type mask = -1;
142 if (WORDS_BIGENDIAN)
143 mask >>= n * 8;
144 else
145 mask <<= n * 8;
146 return val & mask;
147 }
148
149 /* Return X replicated to all byte positions within WORD_TYPE. */
150
151 static inline word_type
152 acc_char_replicate (uchar x)
153 {
154 word_type ret;
155
156 ret = (x << 24) | (x << 16) | (x << 8) | x;
157 if (sizeof(word_type) == 8)
158 ret = (ret << 16 << 16) | ret;
159 return ret;
160 }
161
162 /* Return non-zero if some byte of VAL is (probably) C. */
163
164 static inline word_type
165 acc_char_cmp (word_type val, word_type c)
166 {
167 #if defined(__GNUC__) && defined(__alpha__)
168 /* We can get exact results using a compare-bytes instruction.
169 Get (val == c) via (0 >= (val ^ c)). */
170 return __builtin_alpha_cmpbge (0, val ^ c);
171 #else
172 word_type magic = 0x7efefefeU;
173 if (sizeof(word_type) == 8)
174 magic = (magic << 16 << 16) | 0xfefefefeU;
175 magic |= 1;
176
177 val ^= c;
178 return ((val + magic) ^ ~val) & ~magic;
179 #endif
180 }
181
182 /* Given the result of acc_char_cmp is non-zero, return the index of
183 the found character. If this was a false positive, return -1. */
184
185 static inline int
186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
187 word_type val ATTRIBUTE_UNUSED)
188 {
189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190 /* The cmpbge instruction sets *bits* of the result corresponding to
191 matches in the bytes with no false positives. */
192 return __builtin_ctzl (cmp);
193 #else
194 unsigned int i;
195
196 /* ??? It would be nice to force unrolling here,
197 and have all of these constants folded. */
198 for (i = 0; i < sizeof(word_type); ++i)
199 {
200 uchar c;
201 if (WORDS_BIGENDIAN)
202 c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
203 else
204 c = (val >> i * 8) & 0xff;
205
206 if (c == '\n' || c == '\r' || c == '\\' || c == '?')
207 return i;
208 }
209
210 return -1;
211 #endif
212 }
213
214 /* A version of the fast scanner using bit fiddling techniques.
215
216 For 32-bit words, one would normally perform 16 comparisons and
217 16 branches. With this algorithm one performs 24 arithmetic
218 operations and one branch. Whether this is faster with a 32-bit
219 word size is going to be somewhat system dependent.
220
221 For 64-bit words, we eliminate twice the number of comparisons
222 and branches without increasing the number of arithmetic operations.
223 It's almost certainly going to be a win with 64-bit word size. */
224
225 static const uchar * search_line_acc_char (const uchar *, const uchar *)
226 ATTRIBUTE_UNUSED;
227
228 static const uchar *
229 search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
230 {
231 const word_type repl_nl = acc_char_replicate ('\n');
232 const word_type repl_cr = acc_char_replicate ('\r');
233 const word_type repl_bs = acc_char_replicate ('\\');
234 const word_type repl_qm = acc_char_replicate ('?');
235
236 unsigned int misalign;
237 const word_type *p;
238 word_type val, t;
239
240 /* Align the buffer. Mask out any bytes from before the beginning. */
241 p = (word_type *)((uintptr_t)s & -sizeof(word_type));
242 val = *p;
243 misalign = (uintptr_t)s & (sizeof(word_type) - 1);
244 if (misalign)
245 val = acc_char_mask_misalign (val, misalign);
246
247 /* Main loop. */
248 while (1)
249 {
250 t = acc_char_cmp (val, repl_nl);
251 t |= acc_char_cmp (val, repl_cr);
252 t |= acc_char_cmp (val, repl_bs);
253 t |= acc_char_cmp (val, repl_qm);
254
255 if (__builtin_expect (t != 0, 0))
256 {
257 int i = acc_char_index (t, val);
258 if (i >= 0)
259 return (const uchar *)p + i;
260 }
261
262 val = *++p;
263 }
264 }
265
266 /* Disable on Solaris 2/x86 until the following problem can be properly
267 autoconfed:
268
269 The Solaris 10+ assembler tags objects with the instruction set
270 extensions used, so SSE4.2 executables cannot run on machines that
271 don't support that extension. */
272
273 #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
274
275 /* Replicated character data to be shared between implementations.
276 Recall that outside of a context with vector support we can't
277 define compatible vector types, therefore these are all defined
278 in terms of raw characters. */
279 static const char repl_chars[4][16] __attribute__((aligned(16))) = {
280 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
281 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
282 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
283 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
284 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
285 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
286 { '?', '?', '?', '?', '?', '?', '?', '?',
287 '?', '?', '?', '?', '?', '?', '?', '?' },
288 };
289
290 /* A version of the fast scanner using MMX vectorized byte compare insns.
291
292 This uses the PMOVMSKB instruction which was introduced with "MMX2",
293 which was packaged into SSE1; it is also present in the AMD MMX
294 extension. Mark the function as using "sse" so that we emit a real
295 "emms" instruction, rather than the 3dNOW "femms" instruction. */
296
297 static const uchar *
298 #ifndef __SSE__
299 __attribute__((__target__("sse")))
300 #endif
301 search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
302 {
303 typedef char v8qi __attribute__ ((__vector_size__ (8)));
304 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
305
306 const v8qi repl_nl = *(const v8qi *)repl_chars[0];
307 const v8qi repl_cr = *(const v8qi *)repl_chars[1];
308 const v8qi repl_bs = *(const v8qi *)repl_chars[2];
309 const v8qi repl_qm = *(const v8qi *)repl_chars[3];
310
311 unsigned int misalign, found, mask;
312 const v8qi *p;
313 v8qi data, t, c;
314
315 /* Align the source pointer. While MMX doesn't generate unaligned data
316 faults, this allows us to safely scan to the end of the buffer without
317 reading beyond the end of the last page. */
318 misalign = (uintptr_t)s & 7;
319 p = (const v8qi *)((uintptr_t)s & -8);
320 data = *p;
321
322 /* Create a mask for the bytes that are valid within the first
323 16-byte block. The Idea here is that the AND with the mask
324 within the loop is "free", since we need some AND or TEST
325 insn in order to set the flags for the branch anyway. */
326 mask = -1u << misalign;
327
328 /* Main loop processing 8 bytes at a time. */
329 goto start;
330 do
331 {
332 data = *++p;
333 mask = -1;
334
335 start:
336 t = __builtin_ia32_pcmpeqb(data, repl_nl);
337 c = __builtin_ia32_pcmpeqb(data, repl_cr);
338 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
339 c = __builtin_ia32_pcmpeqb(data, repl_bs);
340 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
341 c = __builtin_ia32_pcmpeqb(data, repl_qm);
342 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343 found = __builtin_ia32_pmovmskb (t);
344 found &= mask;
345 }
346 while (!found);
347
348 __builtin_ia32_emms ();
349
350 /* FOUND contains 1 in bits for which we matched a relevant
351 character. Conversion to the byte index is trivial. */
352 found = __builtin_ctz(found);
353 return (const uchar *)p + found;
354 }
355
356 /* A version of the fast scanner using SSE2 vectorized byte compare insns. */
357
358 static const uchar *
359 #ifndef __SSE2__
360 __attribute__((__target__("sse2")))
361 #endif
362 search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
363 {
364 typedef char v16qi __attribute__ ((__vector_size__ (16)));
365
366 const v16qi repl_nl = *(const v16qi *)repl_chars[0];
367 const v16qi repl_cr = *(const v16qi *)repl_chars[1];
368 const v16qi repl_bs = *(const v16qi *)repl_chars[2];
369 const v16qi repl_qm = *(const v16qi *)repl_chars[3];
370
371 unsigned int misalign, found, mask;
372 const v16qi *p;
373 v16qi data, t;
374
375 /* Align the source pointer. */
376 misalign = (uintptr_t)s & 15;
377 p = (const v16qi *)((uintptr_t)s & -16);
378 data = *p;
379
380 /* Create a mask for the bytes that are valid within the first
381 16-byte block. The Idea here is that the AND with the mask
382 within the loop is "free", since we need some AND or TEST
383 insn in order to set the flags for the branch anyway. */
384 mask = -1u << misalign;
385
386 /* Main loop processing 16 bytes at a time. */
387 goto start;
388 do
389 {
390 data = *++p;
391 mask = -1;
392
393 start:
394 t = __builtin_ia32_pcmpeqb128(data, repl_nl);
395 t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
396 t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
397 t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
398 found = __builtin_ia32_pmovmskb128 (t);
399 found &= mask;
400 }
401 while (!found);
402
403 /* FOUND contains 1 in bits for which we matched a relevant
404 character. Conversion to the byte index is trivial. */
405 found = __builtin_ctz(found);
406 return (const uchar *)p + found;
407 }
408
409 #ifdef HAVE_SSE4
410 /* A version of the fast scanner using SSE 4.2 vectorized string insns. */
411
412 static const uchar *
413 #ifndef __SSE4_2__
414 __attribute__((__target__("sse4.2")))
415 #endif
416 search_line_sse42 (const uchar *s, const uchar *end)
417 {
418 typedef char v16qi __attribute__ ((__vector_size__ (16)));
419 static const v16qi search = { '\n', '\r', '?', '\\' };
420
421 uintptr_t si = (uintptr_t)s;
422 uintptr_t index;
423
424 /* Check for unaligned input. */
425 if (si & 15)
426 {
427 v16qi sv;
428
429 if (__builtin_expect (end - s < 16, 0)
430 && __builtin_expect ((si & 0xfff) > 0xff0, 0))
431 {
432 /* There are less than 16 bytes left in the buffer, and less
433 than 16 bytes left on the page. Reading 16 bytes at this
434 point might generate a spurious page fault. Defer to the
435 SSE2 implementation, which already handles alignment. */
436 return search_line_sse2 (s, end);
437 }
438
439 /* ??? The builtin doesn't understand that the PCMPESTRI read from
440 memory need not be aligned. */
441 sv = __builtin_ia32_loaddqu ((const char *) s);
442 index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
443
444 if (__builtin_expect (index < 16, 0))
445 goto found;
446
447 /* Advance the pointer to an aligned address. We will re-scan a
448 few bytes, but we no longer need care for reading past the
449 end of a page, since we're guaranteed a match. */
450 s = (const uchar *)((si + 16) & -16);
451 }
452
453 /* Main loop, processing 16 bytes at a time. */
454 #ifdef __GCC_ASM_FLAG_OUTPUTS__
455 while (1)
456 {
457 char f;
458
459 /* By using inline assembly instead of the builtin,
460 we can use the result, as well as the flags set. */
461 __asm ("%vpcmpestri\t$0, %2, %3"
462 : "=c"(index), "=@ccc"(f)
463 : "m"(*s), "x"(search), "a"(4), "d"(16));
464 if (f)
465 break;
466
467 s += 16;
468 }
469 #else
470 s -= 16;
471 /* By doing the whole loop in inline assembly,
472 we can make proper use of the flags set. */
473 __asm ( ".balign 16\n"
474 "0: add $16, %1\n"
475 " %vpcmpestri\t$0, (%1), %2\n"
476 " jnc 0b"
477 : "=&c"(index), "+r"(s)
478 : "x"(search), "a"(4), "d"(16));
479 #endif
480
481 found:
482 return s + index;
483 }
484
485 #else
486 /* Work around out-dated assemblers without sse4 support. */
487 #define search_line_sse42 search_line_sse2
488 #endif
489
490 /* Check the CPU capabilities. */
491
492 #include "../gcc/config/i386/cpuid.h"
493
494 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
495 static search_line_fast_type search_line_fast;
496
497 #define HAVE_init_vectorized_lexer 1
498 static inline void
499 init_vectorized_lexer (void)
500 {
501 unsigned dummy, ecx = 0, edx = 0;
502 search_line_fast_type impl = search_line_acc_char;
503 int minimum = 0;
504
505 #if defined(__SSE4_2__)
506 minimum = 3;
507 #elif defined(__SSE2__)
508 minimum = 2;
509 #elif defined(__SSE__)
510 minimum = 1;
511 #endif
512
513 if (minimum == 3)
514 impl = search_line_sse42;
515 else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
516 {
517 if (minimum == 3 || (ecx & bit_SSE4_2))
518 impl = search_line_sse42;
519 else if (minimum == 2 || (edx & bit_SSE2))
520 impl = search_line_sse2;
521 else if (minimum == 1 || (edx & bit_SSE))
522 impl = search_line_mmx;
523 }
524 else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
525 {
526 if (minimum == 1
527 || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
528 impl = search_line_mmx;
529 }
530
531 search_line_fast = impl;
532 }
533
534 #elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
535
536 /* A vection of the fast scanner using AltiVec vectorized byte compares
537 and VSX unaligned loads (when VSX is available). This is otherwise
538 the same as the pre-GCC 5 version. */
539
540 ATTRIBUTE_NO_SANITIZE_UNDEFINED
541 static const uchar *
542 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
543 {
544 typedef __attribute__((altivec(vector))) unsigned char vc;
545
546 const vc repl_nl = {
547 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
548 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
549 };
550 const vc repl_cr = {
551 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
552 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
553 };
554 const vc repl_bs = {
555 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
556 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
557 };
558 const vc repl_qm = {
559 '?', '?', '?', '?', '?', '?', '?', '?',
560 '?', '?', '?', '?', '?', '?', '?', '?',
561 };
562 const vc zero = { 0 };
563
564 vc data, t;
565
566 /* Main loop processing 16 bytes at a time. */
567 do
568 {
569 vc m_nl, m_cr, m_bs, m_qm;
570
571 data = *((const vc *)s);
572 s += 16;
573
574 m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
575 m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
576 m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
577 m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
578 t = (m_nl | m_cr) | (m_bs | m_qm);
579
580 /* T now contains 0xff in bytes for which we matched one of the relevant
581 characters. We want to exit the loop if any byte in T is non-zero.
582 Below is the expansion of vec_any_ne(t, zero). */
583 }
584 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
585
586 /* Restore s to to point to the 16 bytes we just processed. */
587 s -= 16;
588
589 {
590 #define N (sizeof(vc) / sizeof(long))
591
592 union {
593 vc v;
594 /* Statically assert that N is 2 or 4. */
595 unsigned long l[(N == 2 || N == 4) ? N : -1];
596 } u;
597 unsigned long l, i = 0;
598
599 u.v = t;
600
601 /* Find the first word of T that is non-zero. */
602 switch (N)
603 {
604 case 4:
605 l = u.l[i++];
606 if (l != 0)
607 break;
608 s += sizeof(unsigned long);
609 l = u.l[i++];
610 if (l != 0)
611 break;
612 s += sizeof(unsigned long);
613 case 2:
614 l = u.l[i++];
615 if (l != 0)
616 break;
617 s += sizeof(unsigned long);
618 l = u.l[i];
619 }
620
621 /* L now contains 0xff in bytes for which we matched one of the
622 relevant characters. We can find the byte index by finding
623 its bit index and dividing by 8. */
624 #ifdef __BIG_ENDIAN__
625 l = __builtin_clzl(l) >> 3;
626 #else
627 l = __builtin_ctzl(l) >> 3;
628 #endif
629 return s + l;
630
631 #undef N
632 }
633 }
634
635 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
636
637 /* A vection of the fast scanner using AltiVec vectorized byte compares.
638 This cannot be used for little endian because vec_lvsl/lvsr are
639 deprecated for little endian and the code won't work properly. */
640 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
641 so we can't compile this function without -maltivec on the command line
642 (or implied by some other switch). */
643
644 static const uchar *
645 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
646 {
647 typedef __attribute__((altivec(vector))) unsigned char vc;
648
649 const vc repl_nl = {
650 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
651 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
652 };
653 const vc repl_cr = {
654 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
655 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
656 };
657 const vc repl_bs = {
658 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
659 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
660 };
661 const vc repl_qm = {
662 '?', '?', '?', '?', '?', '?', '?', '?',
663 '?', '?', '?', '?', '?', '?', '?', '?',
664 };
665 const vc ones = {
666 -1, -1, -1, -1, -1, -1, -1, -1,
667 -1, -1, -1, -1, -1, -1, -1, -1,
668 };
669 const vc zero = { 0 };
670
671 vc data, mask, t;
672
673 /* Altivec loads automatically mask addresses with -16. This lets us
674 issue the first load as early as possible. */
675 data = __builtin_vec_ld(0, (const vc *)s);
676
677 /* Discard bytes before the beginning of the buffer. Do this by
678 beginning with all ones and shifting in zeros according to the
679 mis-alignment. The LVSR instruction pulls the exact shift we
680 want from the address. */
681 mask = __builtin_vec_lvsr(0, s);
682 mask = __builtin_vec_perm(zero, ones, mask);
683 data &= mask;
684
685 /* While altivec loads mask addresses, we still need to align S so
686 that the offset we compute at the end is correct. */
687 s = (const uchar *)((uintptr_t)s & -16);
688
689 /* Main loop processing 16 bytes at a time. */
690 goto start;
691 do
692 {
693 vc m_nl, m_cr, m_bs, m_qm;
694
695 s += 16;
696 data = __builtin_vec_ld(0, (const vc *)s);
697
698 start:
699 m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
700 m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
701 m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
702 m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
703 t = (m_nl | m_cr) | (m_bs | m_qm);
704
705 /* T now contains 0xff in bytes for which we matched one of the relevant
706 characters. We want to exit the loop if any byte in T is non-zero.
707 Below is the expansion of vec_any_ne(t, zero). */
708 }
709 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
710
711 {
712 #define N (sizeof(vc) / sizeof(long))
713
714 union {
715 vc v;
716 /* Statically assert that N is 2 or 4. */
717 unsigned long l[(N == 2 || N == 4) ? N : -1];
718 } u;
719 unsigned long l, i = 0;
720
721 u.v = t;
722
723 /* Find the first word of T that is non-zero. */
724 switch (N)
725 {
726 case 4:
727 l = u.l[i++];
728 if (l != 0)
729 break;
730 s += sizeof(unsigned long);
731 l = u.l[i++];
732 if (l != 0)
733 break;
734 s += sizeof(unsigned long);
735 case 2:
736 l = u.l[i++];
737 if (l != 0)
738 break;
739 s += sizeof(unsigned long);
740 l = u.l[i];
741 }
742
743 /* L now contains 0xff in bytes for which we matched one of the
744 relevant characters. We can find the byte index by finding
745 its bit index and dividing by 8. */
746 l = __builtin_clzl(l) >> 3;
747 return s + l;
748
749 #undef N
750 }
751 }
752
753 #elif defined (__ARM_NEON)
754 #include "arm_neon.h"
755
756 static const uchar *
757 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
758 {
759 const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
760 const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
761 const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
762 const uint8x16_t repl_qm = vdupq_n_u8 ('?');
763 const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
764
765 unsigned int misalign, found, mask;
766 const uint8_t *p;
767 uint8x16_t data;
768
769 /* Align the source pointer. */
770 misalign = (uintptr_t)s & 15;
771 p = (const uint8_t *)((uintptr_t)s & -16);
772 data = vld1q_u8 (p);
773
774 /* Create a mask for the bytes that are valid within the first
775 16-byte block. The Idea here is that the AND with the mask
776 within the loop is "free", since we need some AND or TEST
777 insn in order to set the flags for the branch anyway. */
778 mask = (-1u << misalign) & 0xffff;
779
780 /* Main loop, processing 16 bytes at a time. */
781 goto start;
782
783 do
784 {
785 uint8x8_t l;
786 uint16x4_t m;
787 uint32x2_t n;
788 uint8x16_t t, u, v, w;
789
790 p += 16;
791 data = vld1q_u8 (p);
792 mask = 0xffff;
793
794 start:
795 t = vceqq_u8 (data, repl_nl);
796 u = vceqq_u8 (data, repl_cr);
797 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
798 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
799 t = vandq_u8 (vorrq_u8 (v, w), xmask);
800 l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
801 m = vpaddl_u8 (l);
802 n = vpaddl_u16 (m);
803
804 found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
805 vshr_n_u64 ((uint64x1_t) n, 24)), 0);
806 found &= mask;
807 }
808 while (!found);
809
810 /* FOUND contains 1 in bits for which we matched a relevant
811 character. Conversion to the byte index is trivial. */
812 found = __builtin_ctz (found);
813 return (const uchar *)p + found;
814 }
815
816 #else
817
818 /* We only have one accellerated alternative. Use a direct call so that
819 we encourage inlining. */
820
821 #define search_line_fast search_line_acc_char
822
823 #endif
824
825 /* Initialize the lexer if needed. */
826
827 void
828 _cpp_init_lexer (void)
829 {
830 #ifdef HAVE_init_vectorized_lexer
831 init_vectorized_lexer ();
832 #endif
833 }
834
835 /* Returns with a logical line that contains no escaped newlines or
836 trigraphs. This is a time-critical inner loop. */
837 void
838 _cpp_clean_line (cpp_reader *pfile)
839 {
840 cpp_buffer *buffer;
841 const uchar *s;
842 uchar c, *d, *p;
843
844 buffer = pfile->buffer;
845 buffer->cur_note = buffer->notes_used = 0;
846 buffer->cur = buffer->line_base = buffer->next_line;
847 buffer->need_line = false;
848 s = buffer->next_line;
849
850 if (!buffer->from_stage3)
851 {
852 const uchar *pbackslash = NULL;
853
854 /* Fast path. This is the common case of an un-escaped line with
855 no trigraphs. The primary win here is by not writing any
856 data back to memory until we have to. */
857 while (1)
858 {
859 /* Perform an optimized search for \n, \r, \\, ?. */
860 s = search_line_fast (s, buffer->rlimit);
861
862 c = *s;
863 if (c == '\\')
864 {
865 /* Record the location of the backslash and continue. */
866 pbackslash = s++;
867 }
868 else if (__builtin_expect (c == '?', 0))
869 {
870 if (__builtin_expect (s[1] == '?', false)
871 && _cpp_trigraph_map[s[2]])
872 {
873 /* Have a trigraph. We may or may not have to convert
874 it. Add a line note regardless, for -Wtrigraphs. */
875 add_line_note (buffer, s, s[2]);
876 if (CPP_OPTION (pfile, trigraphs))
877 {
878 /* We do, and that means we have to switch to the
879 slow path. */
880 d = (uchar *) s;
881 *d = _cpp_trigraph_map[s[2]];
882 s += 2;
883 goto slow_path;
884 }
885 }
886 /* Not a trigraph. Continue on fast-path. */
887 s++;
888 }
889 else
890 break;
891 }
892
893 /* This must be \r or \n. We're either done, or we'll be forced
894 to write back to the buffer and continue on the slow path. */
895 d = (uchar *) s;
896
897 if (__builtin_expect (s == buffer->rlimit, false))
898 goto done;
899
900 /* DOS line ending? */
901 if (__builtin_expect (c == '\r', false) && s[1] == '\n')
902 {
903 s++;
904 if (s == buffer->rlimit)
905 goto done;
906 }
907
908 if (__builtin_expect (pbackslash == NULL, true))
909 goto done;
910
911 /* Check for escaped newline. */
912 p = d;
913 while (is_nvspace (p[-1]))
914 p--;
915 if (p - 1 != pbackslash)
916 goto done;
917
918 /* Have an escaped newline; process it and proceed to
919 the slow path. */
920 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
921 d = p - 2;
922 buffer->next_line = p - 1;
923
924 slow_path:
925 while (1)
926 {
927 c = *++s;
928 *++d = c;
929
930 if (c == '\n' || c == '\r')
931 {
932 /* Handle DOS line endings. */
933 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
934 s++;
935 if (s == buffer->rlimit)
936 break;
937
938 /* Escaped? */
939 p = d;
940 while (p != buffer->next_line && is_nvspace (p[-1]))
941 p--;
942 if (p == buffer->next_line || p[-1] != '\\')
943 break;
944
945 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
946 d = p - 2;
947 buffer->next_line = p - 1;
948 }
949 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
950 {
951 /* Add a note regardless, for the benefit of -Wtrigraphs. */
952 add_line_note (buffer, d, s[2]);
953 if (CPP_OPTION (pfile, trigraphs))
954 {
955 *d = _cpp_trigraph_map[s[2]];
956 s += 2;
957 }
958 }
959 }
960 }
961 else
962 {
963 while (*s != '\n' && *s != '\r')
964 s++;
965 d = (uchar *) s;
966
967 /* Handle DOS line endings. */
968 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
969 s++;
970 }
971
972 done:
973 *d = '\n';
974 /* A sentinel note that should never be processed. */
975 add_line_note (buffer, d + 1, '\n');
976 buffer->next_line = s + 1;
977 }
978
979 /* Return true if the trigraph indicated by NOTE should be warned
980 about in a comment. */
981 static bool
982 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
983 {
984 const uchar *p;
985
986 /* Within comments we don't warn about trigraphs, unless the
987 trigraph forms an escaped newline, as that may change
988 behavior. */
989 if (note->type != '/')
990 return false;
991
992 /* If -trigraphs, then this was an escaped newline iff the next note
993 is coincident. */
994 if (CPP_OPTION (pfile, trigraphs))
995 return note[1].pos == note->pos;
996
997 /* Otherwise, see if this forms an escaped newline. */
998 p = note->pos + 3;
999 while (is_nvspace (*p))
1000 p++;
1001
1002 /* There might have been escaped newlines between the trigraph and the
1003 newline we found. Hence the position test. */
1004 return (*p == '\n' && p < note[1].pos);
1005 }
1006
1007 /* Process the notes created by add_line_note as far as the current
1008 location. */
1009 void
1010 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
1011 {
1012 cpp_buffer *buffer = pfile->buffer;
1013
1014 for (;;)
1015 {
1016 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
1017 unsigned int col;
1018
1019 if (note->pos > buffer->cur)
1020 break;
1021
1022 buffer->cur_note++;
1023 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1024
1025 if (note->type == '\\' || note->type == ' ')
1026 {
1027 if (note->type == ' ' && !in_comment)
1028 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1029 "backslash and newline separated by space");
1030
1031 if (buffer->next_line > buffer->rlimit)
1032 {
1033 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1034 "backslash-newline at end of file");
1035 /* Prevent "no newline at end of file" warning. */
1036 buffer->next_line = buffer->rlimit;
1037 }
1038
1039 buffer->line_base = note->pos;
1040 CPP_INCREMENT_LINE (pfile, 0);
1041 }
1042 else if (_cpp_trigraph_map[note->type])
1043 {
1044 if (CPP_OPTION (pfile, warn_trigraphs)
1045 && (!in_comment || warn_in_comment (pfile, note)))
1046 {
1047 if (CPP_OPTION (pfile, trigraphs))
1048 cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1049 pfile->line_table->highest_line, col,
1050 "trigraph ??%c converted to %c",
1051 note->type,
1052 (int) _cpp_trigraph_map[note->type]);
1053 else
1054 {
1055 cpp_warning_with_line
1056 (pfile, CPP_W_TRIGRAPHS,
1057 pfile->line_table->highest_line, col,
1058 "trigraph ??%c ignored, use -trigraphs to enable",
1059 note->type);
1060 }
1061 }
1062 }
1063 else if (note->type == 0)
1064 /* Already processed in lex_raw_string. */;
1065 else
1066 abort ();
1067 }
1068 }
1069
1070 /* Skip a C-style block comment. We find the end of the comment by
1071 seeing if an asterisk is before every '/' we encounter. Returns
1072 nonzero if comment terminated by EOF, zero otherwise.
1073
1074 Buffer->cur points to the initial asterisk of the comment. */
1075 bool
1076 _cpp_skip_block_comment (cpp_reader *pfile)
1077 {
1078 cpp_buffer *buffer = pfile->buffer;
1079 const uchar *cur = buffer->cur;
1080 uchar c;
1081
1082 cur++;
1083 if (*cur == '/')
1084 cur++;
1085
1086 for (;;)
1087 {
1088 /* People like decorating comments with '*', so check for '/'
1089 instead for efficiency. */
1090 c = *cur++;
1091
1092 if (c == '/')
1093 {
1094 if (cur[-2] == '*')
1095 break;
1096
1097 /* Warn about potential nested comments, but not if the '/'
1098 comes immediately before the true comment delimiter.
1099 Don't bother to get it right across escaped newlines. */
1100 if (CPP_OPTION (pfile, warn_comments)
1101 && cur[0] == '*' && cur[1] != '/')
1102 {
1103 buffer->cur = cur;
1104 cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1105 pfile->line_table->highest_line,
1106 CPP_BUF_COL (buffer),
1107 "\"/*\" within comment");
1108 }
1109 }
1110 else if (c == '\n')
1111 {
1112 unsigned int cols;
1113 buffer->cur = cur - 1;
1114 _cpp_process_line_notes (pfile, true);
1115 if (buffer->next_line >= buffer->rlimit)
1116 return true;
1117 _cpp_clean_line (pfile);
1118
1119 cols = buffer->next_line - buffer->line_base;
1120 CPP_INCREMENT_LINE (pfile, cols);
1121
1122 cur = buffer->cur;
1123 }
1124 }
1125
1126 buffer->cur = cur;
1127 _cpp_process_line_notes (pfile, true);
1128 return false;
1129 }
1130
1131 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1132 terminating newline. Handles escaped newlines. Returns nonzero
1133 if a multiline comment. */
1134 static int
1135 skip_line_comment (cpp_reader *pfile)
1136 {
1137 cpp_buffer *buffer = pfile->buffer;
1138 source_location orig_line = pfile->line_table->highest_line;
1139
1140 while (*buffer->cur != '\n')
1141 buffer->cur++;
1142
1143 _cpp_process_line_notes (pfile, true);
1144 return orig_line != pfile->line_table->highest_line;
1145 }
1146
1147 /* Skips whitespace, saving the next non-whitespace character. */
1148 static void
1149 skip_whitespace (cpp_reader *pfile, cppchar_t c)
1150 {
1151 cpp_buffer *buffer = pfile->buffer;
1152 bool saw_NUL = false;
1153
1154 do
1155 {
1156 /* Horizontal space always OK. */
1157 if (c == ' ' || c == '\t')
1158 ;
1159 /* Just \f \v or \0 left. */
1160 else if (c == '\0')
1161 saw_NUL = true;
1162 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1163 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1164 CPP_BUF_COL (buffer),
1165 "%s in preprocessing directive",
1166 c == '\f' ? "form feed" : "vertical tab");
1167
1168 c = *buffer->cur++;
1169 }
1170 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1171 while (is_nvspace (c));
1172
1173 if (saw_NUL)
1174 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
1175
1176 buffer->cur--;
1177 }
1178
1179 /* See if the characters of a number token are valid in a name (no
1180 '.', '+' or '-'). */
1181 static int
1182 name_p (cpp_reader *pfile, const cpp_string *string)
1183 {
1184 unsigned int i;
1185
1186 for (i = 0; i < string->len; i++)
1187 if (!is_idchar (string->text[i]))
1188 return 0;
1189
1190 return 1;
1191 }
1192
1193 /* After parsing an identifier or other sequence, produce a warning about
1194 sequences not in NFC/NFKC. */
1195 static void
1196 warn_about_normalization (cpp_reader *pfile,
1197 const cpp_token *token,
1198 const struct normalize_state *s)
1199 {
1200 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1201 && !pfile->state.skipping)
1202 {
1203 /* Make sure that the token is printed using UCNs, even
1204 if we'd otherwise happily print UTF-8. */
1205 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1206 size_t sz;
1207
1208 sz = cpp_spell_token (pfile, token, buf, false) - buf;
1209 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1210 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1211 "`%.*s' is not in NFKC", (int) sz, buf);
1212 else
1213 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1214 "`%.*s' is not in NFC", (int) sz, buf);
1215 free (buf);
1216 }
1217 }
1218
1219 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1220 an identifier. FIRST is TRUE if this starts an identifier. */
1221 static bool
1222 forms_identifier_p (cpp_reader *pfile, int first,
1223 struct normalize_state *state)
1224 {
1225 cpp_buffer *buffer = pfile->buffer;
1226
1227 if (*buffer->cur == '$')
1228 {
1229 if (!CPP_OPTION (pfile, dollars_in_ident))
1230 return false;
1231
1232 buffer->cur++;
1233 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1234 {
1235 CPP_OPTION (pfile, warn_dollars) = 0;
1236 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1237 }
1238
1239 return true;
1240 }
1241
1242 /* Is this a syntactically valid UCN? */
1243 if (CPP_OPTION (pfile, extended_identifiers)
1244 && *buffer->cur == '\\'
1245 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1246 {
1247 buffer->cur += 2;
1248 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1249 state))
1250 return true;
1251 buffer->cur -= 2;
1252 }
1253
1254 return false;
1255 }
1256
1257 /* Helper function to get the cpp_hashnode of the identifier BASE. */
1258 static cpp_hashnode *
1259 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1260 {
1261 cpp_hashnode *result;
1262 const uchar *cur;
1263 unsigned int len;
1264 unsigned int hash = HT_HASHSTEP (0, *base);
1265
1266 cur = base + 1;
1267 while (ISIDNUM (*cur))
1268 {
1269 hash = HT_HASHSTEP (hash, *cur);
1270 cur++;
1271 }
1272 len = cur - base;
1273 hash = HT_HASHFINISH (hash, len);
1274 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1275 base, len, hash, HT_ALLOC));
1276
1277 /* Rarely, identifiers require diagnostics when lexed. */
1278 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1279 && !pfile->state.skipping, 0))
1280 {
1281 /* It is allowed to poison the same identifier twice. */
1282 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1283 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1284 NODE_NAME (result));
1285
1286 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1287 replacement list of a variadic macro. */
1288 if (result == pfile->spec_nodes.n__VA_ARGS__
1289 && !pfile->state.va_args_ok)
1290 {
1291 if (CPP_OPTION (pfile, cplusplus))
1292 cpp_error (pfile, CPP_DL_PEDWARN,
1293 "__VA_ARGS__ can only appear in the expansion"
1294 " of a C++11 variadic macro");
1295 else
1296 cpp_error (pfile, CPP_DL_PEDWARN,
1297 "__VA_ARGS__ can only appear in the expansion"
1298 " of a C99 variadic macro");
1299 }
1300
1301 /* For -Wc++-compat, warn about use of C++ named operators. */
1302 if (result->flags & NODE_WARN_OPERATOR)
1303 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1304 "identifier \"%s\" is a special operator name in C++",
1305 NODE_NAME (result));
1306 }
1307
1308 return result;
1309 }
1310
1311 /* Get the cpp_hashnode of an identifier specified by NAME in
1312 the current cpp_reader object. If none is found, NULL is returned. */
1313 cpp_hashnode *
1314 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
1315 {
1316 cpp_hashnode *result;
1317 result = lex_identifier_intern (pfile, (uchar *) name);
1318 return result;
1319 }
1320
1321 /* Lex an identifier starting at BUFFER->CUR - 1. */
1322 static cpp_hashnode *
1323 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1324 struct normalize_state *nst, cpp_hashnode **spelling)
1325 {
1326 cpp_hashnode *result;
1327 const uchar *cur;
1328 unsigned int len;
1329 unsigned int hash = HT_HASHSTEP (0, *base);
1330
1331 cur = pfile->buffer->cur;
1332 if (! starts_ucn)
1333 {
1334 while (ISIDNUM (*cur))
1335 {
1336 hash = HT_HASHSTEP (hash, *cur);
1337 cur++;
1338 }
1339 NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1340 }
1341 pfile->buffer->cur = cur;
1342 if (starts_ucn || forms_identifier_p (pfile, false, nst))
1343 {
1344 /* Slower version for identifiers containing UCNs (or $). */
1345 do {
1346 while (ISIDNUM (*pfile->buffer->cur))
1347 {
1348 NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
1349 pfile->buffer->cur++;
1350 }
1351 } while (forms_identifier_p (pfile, false, nst));
1352 result = _cpp_interpret_identifier (pfile, base,
1353 pfile->buffer->cur - base);
1354 *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
1355 }
1356 else
1357 {
1358 len = cur - base;
1359 hash = HT_HASHFINISH (hash, len);
1360
1361 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1362 base, len, hash, HT_ALLOC));
1363 *spelling = result;
1364 }
1365
1366 /* Rarely, identifiers require diagnostics when lexed. */
1367 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1368 && !pfile->state.skipping, 0))
1369 {
1370 /* It is allowed to poison the same identifier twice. */
1371 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1372 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1373 NODE_NAME (result));
1374
1375 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1376 replacement list of a variadic macro. */
1377 if (result == pfile->spec_nodes.n__VA_ARGS__
1378 && !pfile->state.va_args_ok)
1379 {
1380 if (CPP_OPTION (pfile, cplusplus))
1381 cpp_error (pfile, CPP_DL_PEDWARN,
1382 "__VA_ARGS__ can only appear in the expansion"
1383 " of a C++11 variadic macro");
1384 else
1385 cpp_error (pfile, CPP_DL_PEDWARN,
1386 "__VA_ARGS__ can only appear in the expansion"
1387 " of a C99 variadic macro");
1388 }
1389
1390 /* For -Wc++-compat, warn about use of C++ named operators. */
1391 if (result->flags & NODE_WARN_OPERATOR)
1392 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1393 "identifier \"%s\" is a special operator name in C++",
1394 NODE_NAME (result));
1395 }
1396
1397 return result;
1398 }
1399
1400 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
1401 static void
1402 lex_number (cpp_reader *pfile, cpp_string *number,
1403 struct normalize_state *nst)
1404 {
1405 const uchar *cur;
1406 const uchar *base;
1407 uchar *dest;
1408
1409 base = pfile->buffer->cur - 1;
1410 do
1411 {
1412 cur = pfile->buffer->cur;
1413
1414 /* N.B. ISIDNUM does not include $. */
1415 while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
1416 || VALID_SIGN (*cur, cur[-1]))
1417 {
1418 NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
1419 cur++;
1420 }
1421 /* A number can't end with a digit separator. */
1422 while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
1423 --cur;
1424
1425 pfile->buffer->cur = cur;
1426 }
1427 while (forms_identifier_p (pfile, false, nst));
1428
1429 number->len = cur - base;
1430 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1431 memcpy (dest, base, number->len);
1432 dest[number->len] = '\0';
1433 number->text = dest;
1434 }
1435
1436 /* Create a token of type TYPE with a literal spelling. */
1437 static void
1438 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1439 unsigned int len, enum cpp_ttype type)
1440 {
1441 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1442
1443 memcpy (dest, base, len);
1444 dest[len] = '\0';
1445 token->type = type;
1446 token->val.str.len = len;
1447 token->val.str.text = dest;
1448 }
1449
1450 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1451 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
1452
1453 static void
1454 bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1455 _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1456 {
1457 _cpp_buff *first_buff = *first_buff_p;
1458 _cpp_buff *last_buff = *last_buff_p;
1459
1460 if (first_buff == NULL)
1461 first_buff = last_buff = _cpp_get_buff (pfile, len);
1462 else if (len > BUFF_ROOM (last_buff))
1463 {
1464 size_t room = BUFF_ROOM (last_buff);
1465 memcpy (BUFF_FRONT (last_buff), base, room);
1466 BUFF_FRONT (last_buff) += room;
1467 base += room;
1468 len -= room;
1469 last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1470 }
1471
1472 memcpy (BUFF_FRONT (last_buff), base, len);
1473 BUFF_FRONT (last_buff) += len;
1474
1475 *first_buff_p = first_buff;
1476 *last_buff_p = last_buff;
1477 }
1478
1479
1480 /* Returns true if a macro has been defined.
1481 This might not work if compile with -save-temps,
1482 or preprocess separately from compilation. */
1483
1484 static bool
1485 is_macro(cpp_reader *pfile, const uchar *base)
1486 {
1487 const uchar *cur = base;
1488 if (! ISIDST (*cur))
1489 return false;
1490 unsigned int hash = HT_HASHSTEP (0, *cur);
1491 ++cur;
1492 while (ISIDNUM (*cur))
1493 {
1494 hash = HT_HASHSTEP (hash, *cur);
1495 ++cur;
1496 }
1497 hash = HT_HASHFINISH (hash, cur - base);
1498
1499 cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1500 base, cur - base, hash, HT_NO_INSERT));
1501
1502 return !result ? false : (result->type == NT_MACRO);
1503 }
1504
1505
1506 /* Lexes a raw string. The stored string contains the spelling, including
1507 double quotes, delimiter string, '(' and ')', any leading
1508 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
1509 literal, or CPP_OTHER if it was not properly terminated.
1510
1511 The spelling is NUL-terminated, but it is not guaranteed that this
1512 is the first NUL since embedded NULs are preserved. */
1513
1514 static void
1515 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1516 const uchar *cur)
1517 {
1518 uchar raw_prefix[17];
1519 uchar temp_buffer[18];
1520 const uchar *orig_base;
1521 unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1522 enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1523 raw_str_phase phase = RAW_STR_PREFIX;
1524 enum cpp_ttype type;
1525 size_t total_len = 0;
1526 /* Index into temp_buffer during phases other than RAW_STR,
1527 during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1528 be appended to temp_buffer. */
1529 size_t temp_buffer_len = 0;
1530 _cpp_buff *first_buff = NULL, *last_buff = NULL;
1531 size_t raw_prefix_start;
1532 _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1533
1534 type = (*base == 'L' ? CPP_WSTRING :
1535 *base == 'U' ? CPP_STRING32 :
1536 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1537 : CPP_STRING);
1538
1539 #define BUF_APPEND(STR,LEN) \
1540 do { \
1541 bufring_append (pfile, (const uchar *)(STR), (LEN), \
1542 &first_buff, &last_buff); \
1543 total_len += (LEN); \
1544 if (__builtin_expect (temp_buffer_len < 17, 0) \
1545 && (const uchar *)(STR) != base \
1546 && (LEN) <= 2) \
1547 { \
1548 memcpy (temp_buffer + temp_buffer_len, \
1549 (const uchar *)(STR), (LEN)); \
1550 temp_buffer_len += (LEN); \
1551 } \
1552 } while (0);
1553
1554 orig_base = base;
1555 ++cur;
1556 raw_prefix_start = cur - base;
1557 for (;;)
1558 {
1559 cppchar_t c;
1560
1561 /* If we previously performed any trigraph or line splicing
1562 transformations, undo them in between the opening and closing
1563 double quote. */
1564 while (note->pos < cur)
1565 ++note;
1566 for (; note->pos == cur; ++note)
1567 {
1568 switch (note->type)
1569 {
1570 case '\\':
1571 case ' ':
1572 /* Restore backslash followed by newline. */
1573 BUF_APPEND (base, cur - base);
1574 base = cur;
1575 BUF_APPEND ("\\", 1);
1576 after_backslash:
1577 if (note->type == ' ')
1578 {
1579 /* GNU backslash whitespace newline extension. FIXME
1580 could be any sequence of non-vertical space. When we
1581 can properly restore any such sequence, we should mark
1582 this note as handled so _cpp_process_line_notes
1583 doesn't warn. */
1584 BUF_APPEND (" ", 1);
1585 }
1586
1587 BUF_APPEND ("\n", 1);
1588 break;
1589
1590 case 0:
1591 /* Already handled. */
1592 break;
1593
1594 default:
1595 if (_cpp_trigraph_map[note->type])
1596 {
1597 /* Don't warn about this trigraph in
1598 _cpp_process_line_notes, since trigraphs show up as
1599 trigraphs in raw strings. */
1600 uchar type = note->type;
1601 note->type = 0;
1602
1603 if (!CPP_OPTION (pfile, trigraphs))
1604 /* If we didn't convert the trigraph in the first
1605 place, don't do anything now either. */
1606 break;
1607
1608 BUF_APPEND (base, cur - base);
1609 base = cur;
1610 BUF_APPEND ("??", 2);
1611
1612 /* ??/ followed by newline gets two line notes, one for
1613 the trigraph and one for the backslash/newline. */
1614 if (type == '/' && note[1].pos == cur)
1615 {
1616 if (note[1].type != '\\'
1617 && note[1].type != ' ')
1618 abort ();
1619 BUF_APPEND ("/", 1);
1620 ++note;
1621 goto after_backslash;
1622 }
1623 else
1624 {
1625 /* Skip the replacement character. */
1626 base = ++cur;
1627 BUF_APPEND (&type, 1);
1628 c = type;
1629 goto check_c;
1630 }
1631 }
1632 else
1633 abort ();
1634 break;
1635 }
1636 }
1637 c = *cur++;
1638 if (__builtin_expect (temp_buffer_len < 17, 0))
1639 temp_buffer[temp_buffer_len++] = c;
1640
1641 check_c:
1642 if (phase == RAW_STR_PREFIX)
1643 {
1644 while (raw_prefix_len < temp_buffer_len)
1645 {
1646 raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
1647 switch (raw_prefix[raw_prefix_len])
1648 {
1649 case ' ': case '(': case ')': case '\\': case '\t':
1650 case '\v': case '\f': case '\n': default:
1651 break;
1652 /* Basic source charset except the above chars. */
1653 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1654 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1655 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1656 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1657 case 'y': case 'z':
1658 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1659 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1660 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1661 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1662 case 'Y': case 'Z':
1663 case '0': case '1': case '2': case '3': case '4': case '5':
1664 case '6': case '7': case '8': case '9':
1665 case '_': case '{': case '}': case '#': case '[': case ']':
1666 case '<': case '>': case '%': case ':': case ';': case '.':
1667 case '?': case '*': case '+': case '-': case '/': case '^':
1668 case '&': case '|': case '~': case '!': case '=': case ',':
1669 case '"': case '\'':
1670 if (raw_prefix_len < 16)
1671 {
1672 raw_prefix_len++;
1673 continue;
1674 }
1675 break;
1676 }
1677
1678 if (raw_prefix[raw_prefix_len] != '(')
1679 {
1680 int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
1681 if (raw_prefix_len == 16)
1682 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1683 col, "raw string delimiter longer "
1684 "than 16 characters");
1685 else if (raw_prefix[raw_prefix_len] == '\n')
1686 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1687 col, "invalid new-line in raw "
1688 "string delimiter");
1689 else
1690 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1691 col, "invalid character '%c' in "
1692 "raw string delimiter",
1693 (int) raw_prefix[raw_prefix_len]);
1694 pfile->buffer->cur = orig_base + raw_prefix_start - 1;
1695 create_literal (pfile, token, orig_base,
1696 raw_prefix_start - 1, CPP_OTHER);
1697 if (first_buff)
1698 _cpp_release_buff (pfile, first_buff);
1699 return;
1700 }
1701 raw_prefix[raw_prefix_len] = '"';
1702 phase = RAW_STR;
1703 /* Nothing should be appended to temp_buffer during
1704 RAW_STR phase. */
1705 temp_buffer_len = 17;
1706 break;
1707 }
1708 continue;
1709 }
1710 else if (phase == RAW_STR_SUFFIX)
1711 {
1712 while (raw_suffix_len <= raw_prefix_len
1713 && raw_suffix_len < temp_buffer_len
1714 && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
1715 raw_suffix_len++;
1716 if (raw_suffix_len > raw_prefix_len)
1717 break;
1718 if (raw_suffix_len == temp_buffer_len)
1719 continue;
1720 phase = RAW_STR;
1721 /* Nothing should be appended to temp_buffer during
1722 RAW_STR phase. */
1723 temp_buffer_len = 17;
1724 }
1725 if (c == ')')
1726 {
1727 phase = RAW_STR_SUFFIX;
1728 raw_suffix_len = 0;
1729 temp_buffer_len = 0;
1730 }
1731 else if (c == '\n')
1732 {
1733 if (pfile->state.in_directive
1734 || (pfile->state.parsing_args
1735 && pfile->buffer->next_line >= pfile->buffer->rlimit))
1736 {
1737 cur--;
1738 type = CPP_OTHER;
1739 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1740 "unterminated raw string");
1741 break;
1742 }
1743
1744 BUF_APPEND (base, cur - base);
1745
1746 if (pfile->buffer->cur < pfile->buffer->rlimit)
1747 CPP_INCREMENT_LINE (pfile, 0);
1748 pfile->buffer->need_line = true;
1749
1750 pfile->buffer->cur = cur-1;
1751 _cpp_process_line_notes (pfile, false);
1752 if (!_cpp_get_fresh_line (pfile))
1753 {
1754 source_location src_loc = token->src_loc;
1755 token->type = CPP_EOF;
1756 /* Tell the compiler the line number of the EOF token. */
1757 token->src_loc = pfile->line_table->highest_line;
1758 token->flags = BOL;
1759 if (first_buff != NULL)
1760 _cpp_release_buff (pfile, first_buff);
1761 cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1762 "unterminated raw string");
1763 return;
1764 }
1765
1766 cur = base = pfile->buffer->cur;
1767 note = &pfile->buffer->notes[pfile->buffer->cur_note];
1768 }
1769 }
1770
1771 if (CPP_OPTION (pfile, user_literals))
1772 {
1773 /* If a string format macro, say from inttypes.h, is placed touching
1774 a string literal it could be parsed as a C++11 user-defined string
1775 literal thus breaking the program.
1776 Try to identify macros with is_macro. A warning is issued. */
1777 if (is_macro (pfile, cur))
1778 {
1779 /* Raise a warning, but do not consume subsequent tokens. */
1780 if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1781 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1782 token->src_loc, 0,
1783 "invalid suffix on literal; C++11 requires "
1784 "a space between literal and string macro");
1785 }
1786 /* Grab user defined literal suffix. */
1787 else if (ISIDST (*cur))
1788 {
1789 type = cpp_userdef_string_add_type (type);
1790 ++cur;
1791
1792 while (ISIDNUM (*cur))
1793 ++cur;
1794 }
1795 }
1796
1797 pfile->buffer->cur = cur;
1798 if (first_buff == NULL)
1799 create_literal (pfile, token, base, cur - base, type);
1800 else
1801 {
1802 uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1803
1804 token->type = type;
1805 token->val.str.len = total_len + (cur - base);
1806 token->val.str.text = dest;
1807 last_buff = first_buff;
1808 while (last_buff != NULL)
1809 {
1810 memcpy (dest, last_buff->base,
1811 BUFF_FRONT (last_buff) - last_buff->base);
1812 dest += BUFF_FRONT (last_buff) - last_buff->base;
1813 last_buff = last_buff->next;
1814 }
1815 _cpp_release_buff (pfile, first_buff);
1816 memcpy (dest, base, cur - base);
1817 dest[cur - base] = '\0';
1818 }
1819 }
1820
1821 /* Lexes a string, character constant, or angle-bracketed header file
1822 name. The stored string contains the spelling, including opening
1823 quote and any leading 'L', 'u', 'U' or 'u8' and optional
1824 'R' modifier. It returns the type of the literal, or CPP_OTHER
1825 if it was not properly terminated, or CPP_LESS for an unterminated
1826 header name which must be relexed as normal tokens.
1827
1828 The spelling is NUL-terminated, but it is not guaranteed that this
1829 is the first NUL since embedded NULs are preserved. */
1830 static void
1831 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1832 {
1833 bool saw_NUL = false;
1834 const uchar *cur;
1835 cppchar_t terminator;
1836 enum cpp_ttype type;
1837
1838 cur = base;
1839 terminator = *cur++;
1840 if (terminator == 'L' || terminator == 'U')
1841 terminator = *cur++;
1842 else if (terminator == 'u')
1843 {
1844 terminator = *cur++;
1845 if (terminator == '8')
1846 terminator = *cur++;
1847 }
1848 if (terminator == 'R')
1849 {
1850 lex_raw_string (pfile, token, base, cur);
1851 return;
1852 }
1853 if (terminator == '"')
1854 type = (*base == 'L' ? CPP_WSTRING :
1855 *base == 'U' ? CPP_STRING32 :
1856 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1857 : CPP_STRING);
1858 else if (terminator == '\'')
1859 type = (*base == 'L' ? CPP_WCHAR :
1860 *base == 'U' ? CPP_CHAR32 :
1861 *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
1862 : CPP_CHAR);
1863 else
1864 terminator = '>', type = CPP_HEADER_NAME;
1865
1866 for (;;)
1867 {
1868 cppchar_t c = *cur++;
1869
1870 /* In #include-style directives, terminators are not escapable. */
1871 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1872 cur++;
1873 else if (c == terminator)
1874 break;
1875 else if (c == '\n')
1876 {
1877 cur--;
1878 /* Unmatched quotes always yield undefined behavior, but
1879 greedy lexing means that what appears to be an unterminated
1880 header name may actually be a legitimate sequence of tokens. */
1881 if (terminator == '>')
1882 {
1883 token->type = CPP_LESS;
1884 return;
1885 }
1886 type = CPP_OTHER;
1887 break;
1888 }
1889 else if (c == '\0')
1890 saw_NUL = true;
1891 }
1892
1893 if (saw_NUL && !pfile->state.skipping)
1894 cpp_error (pfile, CPP_DL_WARNING,
1895 "null character(s) preserved in literal");
1896
1897 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1898 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1899 (int) terminator);
1900
1901 if (CPP_OPTION (pfile, user_literals))
1902 {
1903 /* If a string format macro, say from inttypes.h, is placed touching
1904 a string literal it could be parsed as a C++11 user-defined string
1905 literal thus breaking the program.
1906 Try to identify macros with is_macro. A warning is issued. */
1907 if (is_macro (pfile, cur))
1908 {
1909 /* Raise a warning, but do not consume subsequent tokens. */
1910 if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1911 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1912 token->src_loc, 0,
1913 "invalid suffix on literal; C++11 requires "
1914 "a space between literal and string macro");
1915 }
1916 /* Grab user defined literal suffix. */
1917 else if (ISIDST (*cur))
1918 {
1919 type = cpp_userdef_char_add_type (type);
1920 type = cpp_userdef_string_add_type (type);
1921 ++cur;
1922
1923 while (ISIDNUM (*cur))
1924 ++cur;
1925 }
1926 }
1927 else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
1928 && is_macro (pfile, cur)
1929 && !pfile->state.skipping)
1930 cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
1931 token->src_loc, 0, "C++11 requires a space "
1932 "between string literal and macro");
1933
1934 pfile->buffer->cur = cur;
1935 create_literal (pfile, token, base, cur - base, type);
1936 }
1937
1938 /* Return the comment table. The client may not make any assumption
1939 about the ordering of the table. */
1940 cpp_comment_table *
1941 cpp_get_comments (cpp_reader *pfile)
1942 {
1943 return &pfile->comments;
1944 }
1945
1946 /* Append a comment to the end of the comment table. */
1947 static void
1948 store_comment (cpp_reader *pfile, cpp_token *token)
1949 {
1950 int len;
1951
1952 if (pfile->comments.allocated == 0)
1953 {
1954 pfile->comments.allocated = 256;
1955 pfile->comments.entries = (cpp_comment *) xmalloc
1956 (pfile->comments.allocated * sizeof (cpp_comment));
1957 }
1958
1959 if (pfile->comments.count == pfile->comments.allocated)
1960 {
1961 pfile->comments.allocated *= 2;
1962 pfile->comments.entries = (cpp_comment *) xrealloc
1963 (pfile->comments.entries,
1964 pfile->comments.allocated * sizeof (cpp_comment));
1965 }
1966
1967 len = token->val.str.len;
1968
1969 /* Copy comment. Note, token may not be NULL terminated. */
1970 pfile->comments.entries[pfile->comments.count].comment =
1971 (char *) xmalloc (sizeof (char) * (len + 1));
1972 memcpy (pfile->comments.entries[pfile->comments.count].comment,
1973 token->val.str.text, len);
1974 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1975
1976 /* Set source location. */
1977 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1978
1979 /* Increment the count of entries in the comment table. */
1980 pfile->comments.count++;
1981 }
1982
1983 /* The stored comment includes the comment start and any terminator. */
1984 static void
1985 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1986 cppchar_t type)
1987 {
1988 unsigned char *buffer;
1989 unsigned int len, clen, i;
1990
1991 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
1992
1993 /* C++ comments probably (not definitely) have moved past a new
1994 line, which we don't want to save in the comment. */
1995 if (is_vspace (pfile->buffer->cur[-1]))
1996 len--;
1997
1998 /* If we are currently in a directive or in argument parsing, then
1999 we need to store all C++ comments as C comments internally, and
2000 so we need to allocate a little extra space in that case.
2001
2002 Note that the only time we encounter a directive here is
2003 when we are saving comments in a "#define". */
2004 clen = ((pfile->state.in_directive || pfile->state.parsing_args)
2005 && type == '/') ? len + 2 : len;
2006
2007 buffer = _cpp_unaligned_alloc (pfile, clen);
2008
2009 token->type = CPP_COMMENT;
2010 token->val.str.len = clen;
2011 token->val.str.text = buffer;
2012
2013 buffer[0] = '/';
2014 memcpy (buffer + 1, from, len - 1);
2015
2016 /* Finish conversion to a C comment, if necessary. */
2017 if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
2018 {
2019 buffer[1] = '*';
2020 buffer[clen - 2] = '*';
2021 buffer[clen - 1] = '/';
2022 /* As there can be in a C++ comments illegal sequences for C comments
2023 we need to filter them out. */
2024 for (i = 2; i < (clen - 2); i++)
2025 if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
2026 buffer[i] = '|';
2027 }
2028
2029 /* Finally store this comment for use by clients of libcpp. */
2030 store_comment (pfile, token);
2031 }
2032
2033 /* Allocate COUNT tokens for RUN. */
2034 void
2035 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
2036 {
2037 run->base = XNEWVEC (cpp_token, count);
2038 run->limit = run->base + count;
2039 run->next = NULL;
2040 }
2041
2042 /* Returns the next tokenrun, or creates one if there is none. */
2043 static tokenrun *
2044 next_tokenrun (tokenrun *run)
2045 {
2046 if (run->next == NULL)
2047 {
2048 run->next = XNEW (tokenrun);
2049 run->next->prev = run;
2050 _cpp_init_tokenrun (run->next, 250);
2051 }
2052
2053 return run->next;
2054 }
2055
2056 /* Return the number of not yet processed token in a given
2057 context. */
2058 int
2059 _cpp_remaining_tokens_num_in_context (cpp_context *context)
2060 {
2061 if (context->tokens_kind == TOKENS_KIND_DIRECT)
2062 return (LAST (context).token - FIRST (context).token);
2063 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2064 || context->tokens_kind == TOKENS_KIND_EXTENDED)
2065 return (LAST (context).ptoken - FIRST (context).ptoken);
2066 else
2067 abort ();
2068 }
2069
2070 /* Returns the token present at index INDEX in a given context. If
2071 INDEX is zero, the next token to be processed is returned. */
2072 static const cpp_token*
2073 _cpp_token_from_context_at (cpp_context *context, int index)
2074 {
2075 if (context->tokens_kind == TOKENS_KIND_DIRECT)
2076 return &(FIRST (context).token[index]);
2077 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2078 || context->tokens_kind == TOKENS_KIND_EXTENDED)
2079 return FIRST (context).ptoken[index];
2080 else
2081 abort ();
2082 }
2083
2084 /* Look ahead in the input stream. */
2085 const cpp_token *
2086 cpp_peek_token (cpp_reader *pfile, int index)
2087 {
2088 cpp_context *context = pfile->context;
2089 const cpp_token *peektok;
2090 int count;
2091
2092 /* First, scan through any pending cpp_context objects. */
2093 while (context->prev)
2094 {
2095 ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
2096
2097 if (index < (int) sz)
2098 return _cpp_token_from_context_at (context, index);
2099 index -= (int) sz;
2100 context = context->prev;
2101 }
2102
2103 /* We will have to read some new tokens after all (and do so
2104 without invalidating preceding tokens). */
2105 count = index;
2106 pfile->keep_tokens++;
2107
2108 /* For peeked tokens temporarily disable line_change reporting,
2109 until the tokens are parsed for real. */
2110 void (*line_change) (cpp_reader *, const cpp_token *, int)
2111 = pfile->cb.line_change;
2112 pfile->cb.line_change = NULL;
2113
2114 do
2115 {
2116 peektok = _cpp_lex_token (pfile);
2117 if (peektok->type == CPP_EOF)
2118 {
2119 index--;
2120 break;
2121 }
2122 }
2123 while (index--);
2124
2125 _cpp_backup_tokens_direct (pfile, count - index);
2126 pfile->keep_tokens--;
2127 pfile->cb.line_change = line_change;
2128
2129 return peektok;
2130 }
2131
2132 /* Allocate a single token that is invalidated at the same time as the
2133 rest of the tokens on the line. Has its line and col set to the
2134 same as the last lexed token, so that diagnostics appear in the
2135 right place. */
2136 cpp_token *
2137 _cpp_temp_token (cpp_reader *pfile)
2138 {
2139 cpp_token *old, *result;
2140 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
2141 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
2142
2143 old = pfile->cur_token - 1;
2144 /* Any pre-existing lookaheads must not be clobbered. */
2145 if (la)
2146 {
2147 if (sz <= la)
2148 {
2149 tokenrun *next = next_tokenrun (pfile->cur_run);
2150
2151 if (sz < la)
2152 memmove (next->base + 1, next->base,
2153 (la - sz) * sizeof (cpp_token));
2154
2155 next->base[0] = pfile->cur_run->limit[-1];
2156 }
2157
2158 if (sz > 1)
2159 memmove (pfile->cur_token + 1, pfile->cur_token,
2160 MIN (la, sz - 1) * sizeof (cpp_token));
2161 }
2162
2163 if (!sz && pfile->cur_token == pfile->cur_run->limit)
2164 {
2165 pfile->cur_run = next_tokenrun (pfile->cur_run);
2166 pfile->cur_token = pfile->cur_run->base;
2167 }
2168
2169 result = pfile->cur_token++;
2170 result->src_loc = old->src_loc;
2171 return result;
2172 }
2173
2174 /* Lex a token into RESULT (external interface). Takes care of issues
2175 like directive handling, token lookahead, multiple include
2176 optimization and skipping. */
2177 const cpp_token *
2178 _cpp_lex_token (cpp_reader *pfile)
2179 {
2180 cpp_token *result;
2181
2182 for (;;)
2183 {
2184 if (pfile->cur_token == pfile->cur_run->limit)
2185 {
2186 pfile->cur_run = next_tokenrun (pfile->cur_run);
2187 pfile->cur_token = pfile->cur_run->base;
2188 }
2189 /* We assume that the current token is somewhere in the current
2190 run. */
2191 if (pfile->cur_token < pfile->cur_run->base
2192 || pfile->cur_token >= pfile->cur_run->limit)
2193 abort ();
2194
2195 if (pfile->lookaheads)
2196 {
2197 pfile->lookaheads--;
2198 result = pfile->cur_token++;
2199 }
2200 else
2201 result = _cpp_lex_direct (pfile);
2202
2203 if (result->flags & BOL)
2204 {
2205 /* Is this a directive. If _cpp_handle_directive returns
2206 false, it is an assembler #. */
2207 if (result->type == CPP_HASH
2208 /* 6.10.3 p 11: Directives in a list of macro arguments
2209 gives undefined behavior. This implementation
2210 handles the directive as normal. */
2211 && pfile->state.parsing_args != 1)
2212 {
2213 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
2214 {
2215 if (pfile->directive_result.type == CPP_PADDING)
2216 continue;
2217 result = &pfile->directive_result;
2218 }
2219 }
2220 else if (pfile->state.in_deferred_pragma)
2221 result = &pfile->directive_result;
2222
2223 if (pfile->cb.line_change && !pfile->state.skipping)
2224 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
2225 }
2226
2227 /* We don't skip tokens in directives. */
2228 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
2229 break;
2230
2231 /* Outside a directive, invalidate controlling macros. At file
2232 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2233 get here and MI optimization works. */
2234 pfile->mi_valid = false;
2235
2236 if (!pfile->state.skipping || result->type == CPP_EOF)
2237 break;
2238 }
2239
2240 return result;
2241 }
2242
2243 /* Returns true if a fresh line has been loaded. */
2244 bool
2245 _cpp_get_fresh_line (cpp_reader *pfile)
2246 {
2247 int return_at_eof;
2248
2249 /* We can't get a new line until we leave the current directive. */
2250 if (pfile->state.in_directive)
2251 return false;
2252
2253 for (;;)
2254 {
2255 cpp_buffer *buffer = pfile->buffer;
2256
2257 if (!buffer->need_line)
2258 return true;
2259
2260 if (buffer->next_line < buffer->rlimit)
2261 {
2262 _cpp_clean_line (pfile);
2263 return true;
2264 }
2265
2266 /* First, get out of parsing arguments state. */
2267 if (pfile->state.parsing_args)
2268 return false;
2269
2270 /* End of buffer. Non-empty files should end in a newline. */
2271 if (buffer->buf != buffer->rlimit
2272 && buffer->next_line > buffer->rlimit
2273 && !buffer->from_stage3)
2274 {
2275 /* Clip to buffer size. */
2276 buffer->next_line = buffer->rlimit;
2277 }
2278
2279 return_at_eof = buffer->return_at_eof;
2280 _cpp_pop_buffer (pfile);
2281 if (pfile->buffer == NULL || return_at_eof)
2282 return false;
2283 }
2284 }
2285
2286 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
2287 do \
2288 { \
2289 result->type = ELSE_TYPE; \
2290 if (*buffer->cur == CHAR) \
2291 buffer->cur++, result->type = THEN_TYPE; \
2292 } \
2293 while (0)
2294
2295 /* Lex a token into pfile->cur_token, which is also incremented, to
2296 get diagnostics pointing to the correct location.
2297
2298 Does not handle issues such as token lookahead, multiple-include
2299 optimization, directives, skipping etc. This function is only
2300 suitable for use by _cpp_lex_token, and in special cases like
2301 lex_expansion_token which doesn't care for any of these issues.
2302
2303 When meeting a newline, returns CPP_EOF if parsing a directive,
2304 otherwise returns to the start of the token buffer if permissible.
2305 Returns the location of the lexed token. */
2306 cpp_token *
2307 _cpp_lex_direct (cpp_reader *pfile)
2308 {
2309 cppchar_t c;
2310 cpp_buffer *buffer;
2311 const unsigned char *comment_start;
2312 cpp_token *result = pfile->cur_token++;
2313
2314 fresh_line:
2315 result->flags = 0;
2316 buffer = pfile->buffer;
2317 if (buffer->need_line)
2318 {
2319 if (pfile->state.in_deferred_pragma)
2320 {
2321 result->type = CPP_PRAGMA_EOL;
2322 pfile->state.in_deferred_pragma = false;
2323 if (!pfile->state.pragma_allow_expansion)
2324 pfile->state.prevent_expansion--;
2325 return result;
2326 }
2327 if (!_cpp_get_fresh_line (pfile))
2328 {
2329 result->type = CPP_EOF;
2330 if (!pfile->state.in_directive)
2331 {
2332 /* Tell the compiler the line number of the EOF token. */
2333 result->src_loc = pfile->line_table->highest_line;
2334 result->flags = BOL;
2335 }
2336 return result;
2337 }
2338 if (!pfile->keep_tokens)
2339 {
2340 pfile->cur_run = &pfile->base_run;
2341 result = pfile->base_run.base;
2342 pfile->cur_token = result + 1;
2343 }
2344 result->flags = BOL;
2345 if (pfile->state.parsing_args == 2)
2346 result->flags |= PREV_WHITE;
2347 }
2348 buffer = pfile->buffer;
2349 update_tokens_line:
2350 result->src_loc = pfile->line_table->highest_line;
2351
2352 skipped_white:
2353 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2354 && !pfile->overlaid_buffer)
2355 {
2356 _cpp_process_line_notes (pfile, false);
2357 result->src_loc = pfile->line_table->highest_line;
2358 }
2359 c = *buffer->cur++;
2360
2361 if (pfile->forced_token_location_p)
2362 result->src_loc = *pfile->forced_token_location_p;
2363 else
2364 result->src_loc = linemap_position_for_column (pfile->line_table,
2365 CPP_BUF_COLUMN (buffer, buffer->cur));
2366
2367 switch (c)
2368 {
2369 case ' ': case '\t': case '\f': case '\v': case '\0':
2370 result->flags |= PREV_WHITE;
2371 skip_whitespace (pfile, c);
2372 goto skipped_white;
2373
2374 case '\n':
2375 if (buffer->cur < buffer->rlimit)
2376 CPP_INCREMENT_LINE (pfile, 0);
2377 buffer->need_line = true;
2378 goto fresh_line;
2379
2380 case '0': case '1': case '2': case '3': case '4':
2381 case '5': case '6': case '7': case '8': case '9':
2382 {
2383 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2384 result->type = CPP_NUMBER;
2385 lex_number (pfile, &result->val.str, &nst);
2386 warn_about_normalization (pfile, result, &nst);
2387 break;
2388 }
2389
2390 case 'L':
2391 case 'u':
2392 case 'U':
2393 case 'R':
2394 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2395 wide strings or raw strings. */
2396 if (c == 'L' || CPP_OPTION (pfile, rliterals)
2397 || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2398 {
2399 if ((*buffer->cur == '\'' && c != 'R')
2400 || *buffer->cur == '"'
2401 || (*buffer->cur == 'R'
2402 && c != 'R'
2403 && buffer->cur[1] == '"'
2404 && CPP_OPTION (pfile, rliterals))
2405 || (*buffer->cur == '8'
2406 && c == 'u'
2407 && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
2408 && CPP_OPTION (pfile, utf8_char_literals)))
2409 || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2410 && CPP_OPTION (pfile, rliterals)))))
2411 {
2412 lex_string (pfile, result, buffer->cur - 1);
2413 break;
2414 }
2415 }
2416 /* Fall through. */
2417
2418 case '_':
2419 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2420 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2421 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2422 case 's': case 't': case 'v': case 'w': case 'x':
2423 case 'y': case 'z':
2424 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2425 case 'G': case 'H': case 'I': case 'J': case 'K':
2426 case 'M': case 'N': case 'O': case 'P': case 'Q':
2427 case 'S': case 'T': case 'V': case 'W': case 'X':
2428 case 'Y': case 'Z':
2429 result->type = CPP_NAME;
2430 {
2431 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2432 result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2433 &nst,
2434 &result->val.node.spelling);
2435 warn_about_normalization (pfile, result, &nst);
2436 }
2437
2438 /* Convert named operators to their proper types. */
2439 if (result->val.node.node->flags & NODE_OPERATOR)
2440 {
2441 result->flags |= NAMED_OP;
2442 result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2443 }
2444 break;
2445
2446 case '\'':
2447 case '"':
2448 lex_string (pfile, result, buffer->cur - 1);
2449 break;
2450
2451 case '/':
2452 /* A potential block or line comment. */
2453 comment_start = buffer->cur;
2454 c = *buffer->cur;
2455
2456 if (c == '*')
2457 {
2458 if (_cpp_skip_block_comment (pfile))
2459 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2460 }
2461 else if (c == '/' && ! CPP_OPTION (pfile, traditional))
2462 {
2463 /* Don't warn for system headers. */
2464 if (cpp_in_system_header (pfile))
2465 ;
2466 /* Warn about comments if pedantically GNUC89, and not
2467 in system headers. */
2468 else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
2469 && CPP_PEDANTIC (pfile)
2470 && ! buffer->warned_cplusplus_comments)
2471 {
2472 cpp_error (pfile, CPP_DL_PEDWARN,
2473 "C++ style comments are not allowed in ISO C90");
2474 cpp_error (pfile, CPP_DL_PEDWARN,
2475 "(this will be reported only once per input file)");
2476 buffer->warned_cplusplus_comments = 1;
2477 }
2478 /* Or if specifically desired via -Wc90-c99-compat. */
2479 else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
2480 && ! CPP_OPTION (pfile, cplusplus)
2481 && ! buffer->warned_cplusplus_comments)
2482 {
2483 cpp_error (pfile, CPP_DL_WARNING,
2484 "C++ style comments are incompatible with C90");
2485 cpp_error (pfile, CPP_DL_WARNING,
2486 "(this will be reported only once per input file)");
2487 buffer->warned_cplusplus_comments = 1;
2488 }
2489 /* In C89/C94, C++ style comments are forbidden. */
2490 else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
2491 || CPP_OPTION (pfile, lang) == CLK_STDC94))
2492 {
2493 /* But don't be confused about valid code such as
2494 - // immediately followed by *,
2495 - // in a preprocessing directive,
2496 - // in an #if 0 block. */
2497 if (buffer->cur[1] == '*'
2498 || pfile->state.in_directive
2499 || pfile->state.skipping)
2500 {
2501 result->type = CPP_DIV;
2502 break;
2503 }
2504 else if (! buffer->warned_cplusplus_comments)
2505 {
2506 cpp_error (pfile, CPP_DL_ERROR,
2507 "C++ style comments are not allowed in ISO C90");
2508 cpp_error (pfile, CPP_DL_ERROR,
2509 "(this will be reported only once per input "
2510 "file)");
2511 buffer->warned_cplusplus_comments = 1;
2512 }
2513 }
2514 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2515 cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2516 }
2517 else if (c == '=')
2518 {
2519 buffer->cur++;
2520 result->type = CPP_DIV_EQ;
2521 break;
2522 }
2523 else
2524 {
2525 result->type = CPP_DIV;
2526 break;
2527 }
2528
2529 if (!pfile->state.save_comments)
2530 {
2531 result->flags |= PREV_WHITE;
2532 goto update_tokens_line;
2533 }
2534
2535 /* Save the comment as a token in its own right. */
2536 save_comment (pfile, result, comment_start, c);
2537 break;
2538
2539 case '<':
2540 if (pfile->state.angled_headers)
2541 {
2542 lex_string (pfile, result, buffer->cur - 1);
2543 if (result->type != CPP_LESS)
2544 break;
2545 }
2546
2547 result->type = CPP_LESS;
2548 if (*buffer->cur == '=')
2549 buffer->cur++, result->type = CPP_LESS_EQ;
2550 else if (*buffer->cur == '<')
2551 {
2552 buffer->cur++;
2553 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2554 }
2555 else if (CPP_OPTION (pfile, digraphs))
2556 {
2557 if (*buffer->cur == ':')
2558 {
2559 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2560 three characters are <:: and the subsequent character
2561 is neither : nor >, the < is treated as a preprocessor
2562 token by itself". */
2563 if (CPP_OPTION (pfile, cplusplus)
2564 && CPP_OPTION (pfile, lang) != CLK_CXX98
2565 && CPP_OPTION (pfile, lang) != CLK_GNUCXX
2566 && buffer->cur[1] == ':'
2567 && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2568 break;
2569
2570 buffer->cur++;
2571 result->flags |= DIGRAPH;
2572 result->type = CPP_OPEN_SQUARE;
2573 }
2574 else if (*buffer->cur == '%')
2575 {
2576 buffer->cur++;
2577 result->flags |= DIGRAPH;
2578 result->type = CPP_OPEN_BRACE;
2579 }
2580 }
2581 break;
2582
2583 case '>':
2584 result->type = CPP_GREATER;
2585 if (*buffer->cur == '=')
2586 buffer->cur++, result->type = CPP_GREATER_EQ;
2587 else if (*buffer->cur == '>')
2588 {
2589 buffer->cur++;
2590 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2591 }
2592 break;
2593
2594 case '%':
2595 result->type = CPP_MOD;
2596 if (*buffer->cur == '=')
2597 buffer->cur++, result->type = CPP_MOD_EQ;
2598 else if (CPP_OPTION (pfile, digraphs))
2599 {
2600 if (*buffer->cur == ':')
2601 {
2602 buffer->cur++;
2603 result->flags |= DIGRAPH;
2604 result->type = CPP_HASH;
2605 if (*buffer->cur == '%' && buffer->cur[1] == ':')
2606 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
2607 }
2608 else if (*buffer->cur == '>')
2609 {
2610 buffer->cur++;
2611 result->flags |= DIGRAPH;
2612 result->type = CPP_CLOSE_BRACE;
2613 }
2614 }
2615 break;
2616
2617 case '.':
2618 result->type = CPP_DOT;
2619 if (ISDIGIT (*buffer->cur))
2620 {
2621 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2622 result->type = CPP_NUMBER;
2623 lex_number (pfile, &result->val.str, &nst);
2624 warn_about_normalization (pfile, result, &nst);
2625 }
2626 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2627 buffer->cur += 2, result->type = CPP_ELLIPSIS;
2628 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2629 buffer->cur++, result->type = CPP_DOT_STAR;
2630 break;
2631
2632 case '+':
2633 result->type = CPP_PLUS;
2634 if (*buffer->cur == '+')
2635 buffer->cur++, result->type = CPP_PLUS_PLUS;
2636 else if (*buffer->cur == '=')
2637 buffer->cur++, result->type = CPP_PLUS_EQ;
2638 break;
2639
2640 case '-':
2641 result->type = CPP_MINUS;
2642 if (*buffer->cur == '>')
2643 {
2644 buffer->cur++;
2645 result->type = CPP_DEREF;
2646 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2647 buffer->cur++, result->type = CPP_DEREF_STAR;
2648 }
2649 else if (*buffer->cur == '-')
2650 buffer->cur++, result->type = CPP_MINUS_MINUS;
2651 else if (*buffer->cur == '=')
2652 buffer->cur++, result->type = CPP_MINUS_EQ;
2653 break;
2654
2655 case '&':
2656 result->type = CPP_AND;
2657 if (*buffer->cur == '&')
2658 buffer->cur++, result->type = CPP_AND_AND;
2659 else if (*buffer->cur == '=')
2660 buffer->cur++, result->type = CPP_AND_EQ;
2661 break;
2662
2663 case '|':
2664 result->type = CPP_OR;
2665 if (*buffer->cur == '|')
2666 buffer->cur++, result->type = CPP_OR_OR;
2667 else if (*buffer->cur == '=')
2668 buffer->cur++, result->type = CPP_OR_EQ;
2669 break;
2670
2671 case ':':
2672 result->type = CPP_COLON;
2673 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2674 buffer->cur++, result->type = CPP_SCOPE;
2675 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
2676 {
2677 buffer->cur++;
2678 result->flags |= DIGRAPH;
2679 result->type = CPP_CLOSE_SQUARE;
2680 }
2681 break;
2682
2683 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2684 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2685 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2686 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2687 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
2688
2689 case '?': result->type = CPP_QUERY; break;
2690 case '~': result->type = CPP_COMPL; break;
2691 case ',': result->type = CPP_COMMA; break;
2692 case '(': result->type = CPP_OPEN_PAREN; break;
2693 case ')': result->type = CPP_CLOSE_PAREN; break;
2694 case '[': result->type = CPP_OPEN_SQUARE; break;
2695 case ']': result->type = CPP_CLOSE_SQUARE; break;
2696 case '{': result->type = CPP_OPEN_BRACE; break;
2697 case '}': result->type = CPP_CLOSE_BRACE; break;
2698 case ';': result->type = CPP_SEMICOLON; break;
2699
2700 /* @ is a punctuator in Objective-C. */
2701 case '@': result->type = CPP_ATSIGN; break;
2702
2703 case '$':
2704 case '\\':
2705 {
2706 const uchar *base = --buffer->cur;
2707 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2708
2709 if (forms_identifier_p (pfile, true, &nst))
2710 {
2711 result->type = CPP_NAME;
2712 result->val.node.node = lex_identifier (pfile, base, true, &nst,
2713 &result->val.node.spelling);
2714 warn_about_normalization (pfile, result, &nst);
2715 break;
2716 }
2717 buffer->cur++;
2718 }
2719
2720 default:
2721 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2722 break;
2723 }
2724
2725 return result;
2726 }
2727
2728 /* An upper bound on the number of bytes needed to spell TOKEN.
2729 Does not include preceding whitespace. */
2730 unsigned int
2731 cpp_token_len (const cpp_token *token)
2732 {
2733 unsigned int len;
2734
2735 switch (TOKEN_SPELL (token))
2736 {
2737 default: len = 6; break;
2738 case SPELL_LITERAL: len = token->val.str.len; break;
2739 case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break;
2740 }
2741
2742 return len;
2743 }
2744
2745 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2746 Return the number of bytes read out of NAME. (There are always
2747 10 bytes written to BUFFER.) */
2748
2749 static size_t
2750 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2751 {
2752 int j;
2753 int ucn_len = 0;
2754 int ucn_len_c;
2755 unsigned t;
2756 unsigned long utf32;
2757
2758 /* Compute the length of the UTF-8 sequence. */
2759 for (t = *name; t & 0x80; t <<= 1)
2760 ucn_len++;
2761
2762 utf32 = *name & (0x7F >> ucn_len);
2763 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2764 {
2765 utf32 = (utf32 << 6) | (*++name & 0x3F);
2766
2767 /* Ill-formed UTF-8. */
2768 if ((*name & ~0x3F) != 0x80)
2769 abort ();
2770 }
2771
2772 *buffer++ = '\\';
2773 *buffer++ = 'U';
2774 for (j = 7; j >= 0; j--)
2775 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2776 return ucn_len;
2777 }
2778
2779 /* Given a token TYPE corresponding to a digraph, return a pointer to
2780 the spelling of the digraph. */
2781 static const unsigned char *
2782 cpp_digraph2name (enum cpp_ttype type)
2783 {
2784 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2785 }
2786
2787 /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
2788 The buffer must already contain the enough space to hold the
2789 token's spelling. Returns a pointer to the character after the
2790 last character written. */
2791 unsigned char *
2792 _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
2793 {
2794 size_t i;
2795 const unsigned char *name = NODE_NAME (ident);
2796
2797 for (i = 0; i < NODE_LEN (ident); i++)
2798 if (name[i] & ~0x7F)
2799 {
2800 i += utf8_to_ucn (buffer, name + i) - 1;
2801 buffer += 10;
2802 }
2803 else
2804 *buffer++ = name[i];
2805
2806 return buffer;
2807 }
2808
2809 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
2810 already contain the enough space to hold the token's spelling.
2811 Returns a pointer to the character after the last character written.
2812 FORSTRING is true if this is to be the spelling after translation
2813 phase 1 (with the original spelling of extended identifiers), false
2814 if extended identifiers should always be written using UCNs (there is
2815 no option for always writing them in the internal UTF-8 form).
2816 FIXME: Would be nice if we didn't need the PFILE argument. */
2817 unsigned char *
2818 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2819 unsigned char *buffer, bool forstring)
2820 {
2821 switch (TOKEN_SPELL (token))
2822 {
2823 case SPELL_OPERATOR:
2824 {
2825 const unsigned char *spelling;
2826 unsigned char c;
2827
2828 if (token->flags & DIGRAPH)
2829 spelling = cpp_digraph2name (token->type);
2830 else if (token->flags & NAMED_OP)
2831 goto spell_ident;
2832 else
2833 spelling = TOKEN_NAME (token);
2834
2835 while ((c = *spelling++) != '\0')
2836 *buffer++ = c;
2837 }
2838 break;
2839
2840 spell_ident:
2841 case SPELL_IDENT:
2842 if (forstring)
2843 {
2844 memcpy (buffer, NODE_NAME (token->val.node.spelling),
2845 NODE_LEN (token->val.node.spelling));
2846 buffer += NODE_LEN (token->val.node.spelling);
2847 }
2848 else
2849 buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
2850 break;
2851
2852 case SPELL_LITERAL:
2853 memcpy (buffer, token->val.str.text, token->val.str.len);
2854 buffer += token->val.str.len;
2855 break;
2856
2857 case SPELL_NONE:
2858 cpp_error (pfile, CPP_DL_ICE,
2859 "unspellable token %s", TOKEN_NAME (token));
2860 break;
2861 }
2862
2863 return buffer;
2864 }
2865
2866 /* Returns TOKEN spelt as a null-terminated string. The string is
2867 freed when the reader is destroyed. Useful for diagnostics. */
2868 unsigned char *
2869 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
2870 {
2871 unsigned int len = cpp_token_len (token) + 1;
2872 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
2873
2874 end = cpp_spell_token (pfile, token, start, false);
2875 end[0] = '\0';
2876
2877 return start;
2878 }
2879
2880 /* Returns a pointer to a string which spells the token defined by
2881 TYPE and FLAGS. Used by C front ends, which really should move to
2882 using cpp_token_as_text. */
2883 const char *
2884 cpp_type2name (enum cpp_ttype type, unsigned char flags)
2885 {
2886 if (flags & DIGRAPH)
2887 return (const char *) cpp_digraph2name (type);
2888 else if (flags & NAMED_OP)
2889 return cpp_named_operator2name (type);
2890
2891 return (const char *) token_spellings[type].name;
2892 }
2893
2894 /* Writes the spelling of token to FP, without any preceding space.
2895 Separated from cpp_spell_token for efficiency - to avoid stdio
2896 double-buffering. */
2897 void
2898 cpp_output_token (const cpp_token *token, FILE *fp)
2899 {
2900 switch (TOKEN_SPELL (token))
2901 {
2902 case SPELL_OPERATOR:
2903 {
2904 const unsigned char *spelling;
2905 int c;
2906
2907 if (token->flags & DIGRAPH)
2908 spelling = cpp_digraph2name (token->type);
2909 else if (token->flags & NAMED_OP)
2910 goto spell_ident;
2911 else
2912 spelling = TOKEN_NAME (token);
2913
2914 c = *spelling;
2915 do
2916 putc (c, fp);
2917 while ((c = *++spelling) != '\0');
2918 }
2919 break;
2920
2921 spell_ident:
2922 case SPELL_IDENT:
2923 {
2924 size_t i;
2925 const unsigned char * name = NODE_NAME (token->val.node.node);
2926
2927 for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2928 if (name[i] & ~0x7F)
2929 {
2930 unsigned char buffer[10];
2931 i += utf8_to_ucn (buffer, name + i) - 1;
2932 fwrite (buffer, 1, 10, fp);
2933 }
2934 else
2935 fputc (NODE_NAME (token->val.node.node)[i], fp);
2936 }
2937 break;
2938
2939 case SPELL_LITERAL:
2940 fwrite (token->val.str.text, 1, token->val.str.len, fp);
2941 break;
2942
2943 case SPELL_NONE:
2944 /* An error, most probably. */
2945 break;
2946 }
2947 }
2948
2949 /* Compare two tokens. */
2950 int
2951 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
2952 {
2953 if (a->type == b->type && a->flags == b->flags)
2954 switch (TOKEN_SPELL (a))
2955 {
2956 default: /* Keep compiler happy. */
2957 case SPELL_OPERATOR:
2958 /* token_no is used to track where multiple consecutive ##
2959 tokens were originally located. */
2960 return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2961 case SPELL_NONE:
2962 return (a->type != CPP_MACRO_ARG
2963 || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
2964 && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
2965 case SPELL_IDENT:
2966 return (a->val.node.node == b->val.node.node
2967 && a->val.node.spelling == b->val.node.spelling);
2968 case SPELL_LITERAL:
2969 return (a->val.str.len == b->val.str.len
2970 && !memcmp (a->val.str.text, b->val.str.text,
2971 a->val.str.len));
2972 }
2973
2974 return 0;
2975 }
2976
2977 /* Returns nonzero if a space should be inserted to avoid an
2978 accidental token paste for output. For simplicity, it is
2979 conservative, and occasionally advises a space where one is not
2980 needed, e.g. "." and ".2". */
2981 int
2982 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2983 const cpp_token *token2)
2984 {
2985 enum cpp_ttype a = token1->type, b = token2->type;
2986 cppchar_t c;
2987
2988 if (token1->flags & NAMED_OP)
2989 a = CPP_NAME;
2990 if (token2->flags & NAMED_OP)
2991 b = CPP_NAME;
2992
2993 c = EOF;
2994 if (token2->flags & DIGRAPH)
2995 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
2996 else if (token_spellings[b].category == SPELL_OPERATOR)
2997 c = token_spellings[b].name[0];
2998
2999 /* Quickly get everything that can paste with an '='. */
3000 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
3001 return 1;
3002
3003 switch (a)
3004 {
3005 case CPP_GREATER: return c == '>';
3006 case CPP_LESS: return c == '<' || c == '%' || c == ':';
3007 case CPP_PLUS: return c == '+';
3008 case CPP_MINUS: return c == '-' || c == '>';
3009 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
3010 case CPP_MOD: return c == ':' || c == '>';
3011 case CPP_AND: return c == '&';
3012 case CPP_OR: return c == '|';
3013 case CPP_COLON: return c == ':' || c == '>';
3014 case CPP_DEREF: return c == '*';
3015 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
3016 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
3017 case CPP_NAME: return ((b == CPP_NUMBER
3018 && name_p (pfile, &token2->val.str))
3019 || b == CPP_NAME
3020 || b == CPP_CHAR || b == CPP_STRING); /* L */
3021 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
3022 || c == '.' || c == '+' || c == '-');
3023 /* UCNs */
3024 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
3025 && b == CPP_NAME)
3026 || (CPP_OPTION (pfile, objc)
3027 && token1->val.str.text[0] == '@'
3028 && (b == CPP_NAME || b == CPP_STRING)));
3029 case CPP_STRING:
3030 case CPP_WSTRING:
3031 case CPP_UTF8STRING:
3032 case CPP_STRING16:
3033 case CPP_STRING32: return (CPP_OPTION (pfile, user_literals)
3034 && (b == CPP_NAME
3035 || (TOKEN_SPELL (token2) == SPELL_LITERAL
3036 && ISIDST (token2->val.str.text[0]))));
3037
3038 default: break;
3039 }
3040
3041 return 0;
3042 }
3043
3044 /* Output all the remaining tokens on the current line, and a newline
3045 character, to FP. Leading whitespace is removed. If there are
3046 macros, special token padding is not performed. */
3047 void
3048 cpp_output_line (cpp_reader *pfile, FILE *fp)
3049 {
3050 const cpp_token *token;
3051
3052 token = cpp_get_token (pfile);
3053 while (token->type != CPP_EOF)
3054 {
3055 cpp_output_token (token, fp);
3056 token = cpp_get_token (pfile);
3057 if (token->flags & PREV_WHITE)
3058 putc (' ', fp);
3059 }
3060
3061 putc ('\n', fp);
3062 }
3063
3064 /* Return a string representation of all the remaining tokens on the
3065 current line. The result is allocated using xmalloc and must be
3066 freed by the caller. */
3067 unsigned char *
3068 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
3069 {
3070 const cpp_token *token;
3071 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
3072 unsigned int alloced = 120 + out;
3073 unsigned char *result = (unsigned char *) xmalloc (alloced);
3074
3075 /* If DIR_NAME is empty, there are no initial contents. */
3076 if (dir_name)
3077 {
3078 sprintf ((char *) result, "#%s ", dir_name);
3079 out += 2;
3080 }
3081
3082 token = cpp_get_token (pfile);
3083 while (token->type != CPP_EOF)
3084 {
3085 unsigned char *last;
3086 /* Include room for a possible space and the terminating nul. */
3087 unsigned int len = cpp_token_len (token) + 2;
3088
3089 if (out + len > alloced)
3090 {
3091 alloced *= 2;
3092 if (out + len > alloced)
3093 alloced = out + len;
3094 result = (unsigned char *) xrealloc (result, alloced);
3095 }
3096
3097 last = cpp_spell_token (pfile, token, &result[out], 0);
3098 out = last - result;
3099
3100 token = cpp_get_token (pfile);
3101 if (token->flags & PREV_WHITE)
3102 result[out++] = ' ';
3103 }
3104
3105 result[out] = '\0';
3106 return result;
3107 }
3108
3109 /* Memory buffers. Changing these three constants can have a dramatic
3110 effect on performance. The values here are reasonable defaults,
3111 but might be tuned. If you adjust them, be sure to test across a
3112 range of uses of cpplib, including heavy nested function-like macro
3113 expansion. Also check the change in peak memory usage (NJAMD is a
3114 good tool for this). */
3115 #define MIN_BUFF_SIZE 8000
3116 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
3117 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3118 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
3119
3120 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3121 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3122 #endif
3123
3124 /* Create a new allocation buffer. Place the control block at the end
3125 of the buffer, so that buffer overflows will cause immediate chaos. */
3126 static _cpp_buff *
3127 new_buff (size_t len)
3128 {
3129 _cpp_buff *result;
3130 unsigned char *base;
3131
3132 if (len < MIN_BUFF_SIZE)
3133 len = MIN_BUFF_SIZE;
3134 len = CPP_ALIGN (len);
3135
3136 #ifdef ENABLE_VALGRIND_CHECKING
3137 /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3138 struct first. */
3139 size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
3140 base = XNEWVEC (unsigned char, len + slen);
3141 result = (_cpp_buff *) base;
3142 base += slen;
3143 #else
3144 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
3145 result = (_cpp_buff *) (base + len);
3146 #endif
3147 result->base = base;
3148 result->cur = base;
3149 result->limit = base + len;
3150 result->next = NULL;
3151 return result;
3152 }
3153
3154 /* Place a chain of unwanted allocation buffers on the free list. */
3155 void
3156 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
3157 {
3158 _cpp_buff *end = buff;
3159
3160 while (end->next)
3161 end = end->next;
3162 end->next = pfile->free_buffs;
3163 pfile->free_buffs = buff;
3164 }
3165
3166 /* Return a free buffer of size at least MIN_SIZE. */
3167 _cpp_buff *
3168 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
3169 {
3170 _cpp_buff *result, **p;
3171
3172 for (p = &pfile->free_buffs;; p = &(*p)->next)
3173 {
3174 size_t size;
3175
3176 if (*p == NULL)
3177 return new_buff (min_size);
3178 result = *p;
3179 size = result->limit - result->base;
3180 /* Return a buffer that's big enough, but don't waste one that's
3181 way too big. */
3182 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
3183 break;
3184 }
3185
3186 *p = result->next;
3187 result->next = NULL;
3188 result->cur = result->base;
3189 return result;
3190 }
3191
3192 /* Creates a new buffer with enough space to hold the uncommitted
3193 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
3194 the excess bytes to the new buffer. Chains the new buffer after
3195 BUFF, and returns the new buffer. */
3196 _cpp_buff *
3197 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
3198 {
3199 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
3200 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
3201
3202 buff->next = new_buff;
3203 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
3204 return new_buff;
3205 }
3206
3207 /* Creates a new buffer with enough space to hold the uncommitted
3208 remaining bytes of the buffer pointed to by BUFF, and at least
3209 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
3210 Chains the new buffer before the buffer pointed to by BUFF, and
3211 updates the pointer to point to the new buffer. */
3212 void
3213 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
3214 {
3215 _cpp_buff *new_buff, *old_buff = *pbuff;
3216 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
3217
3218 new_buff = _cpp_get_buff (pfile, size);
3219 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
3220 new_buff->next = old_buff;
3221 *pbuff = new_buff;
3222 }
3223
3224 /* Free a chain of buffers starting at BUFF. */
3225 void
3226 _cpp_free_buff (_cpp_buff *buff)
3227 {
3228 _cpp_buff *next;
3229
3230 for (; buff; buff = next)
3231 {
3232 next = buff->next;
3233 #ifdef ENABLE_VALGRIND_CHECKING
3234 free (buff);
3235 #else
3236 free (buff->base);
3237 #endif
3238 }
3239 }
3240
3241 /* Allocate permanent, unaligned storage of length LEN. */
3242 unsigned char *
3243 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
3244 {
3245 _cpp_buff *buff = pfile->u_buff;
3246 unsigned char *result = buff->cur;
3247
3248 if (len > (size_t) (buff->limit - result))
3249 {
3250 buff = _cpp_get_buff (pfile, len);
3251 buff->next = pfile->u_buff;
3252 pfile->u_buff = buff;
3253 result = buff->cur;
3254 }
3255
3256 buff->cur = result + len;
3257 return result;
3258 }
3259
3260 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3261 That buffer is used for growing allocations when saving macro
3262 replacement lists in a #define, and when parsing an answer to an
3263 assertion in #assert, #unassert or #if (and therefore possibly
3264 whilst expanding macros). It therefore must not be used by any
3265 code that they might call: specifically the lexer and the guts of
3266 the macro expander.
3267
3268 All existing other uses clearly fit this restriction: storing
3269 registered pragmas during initialization. */
3270 unsigned char *
3271 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3272 {
3273 _cpp_buff *buff = pfile->a_buff;
3274 unsigned char *result = buff->cur;
3275
3276 if (len > (size_t) (buff->limit - result))
3277 {
3278 buff = _cpp_get_buff (pfile, len);
3279 buff->next = pfile->a_buff;
3280 pfile->a_buff = buff;
3281 result = buff->cur;
3282 }
3283
3284 buff->cur = result + len;
3285 return result;
3286 }
3287
3288 /* Say which field of TOK is in use. */
3289
3290 enum cpp_token_fld_kind
3291 cpp_token_val_index (const cpp_token *tok)
3292 {
3293 switch (TOKEN_SPELL (tok))
3294 {
3295 case SPELL_IDENT:
3296 return CPP_TOKEN_FLD_NODE;
3297 case SPELL_LITERAL:
3298 return CPP_TOKEN_FLD_STR;
3299 case SPELL_OPERATOR:
3300 if (tok->type == CPP_PASTE)
3301 return CPP_TOKEN_FLD_TOKEN_NO;
3302 else
3303 return CPP_TOKEN_FLD_NONE;
3304 case SPELL_NONE:
3305 if (tok->type == CPP_MACRO_ARG)
3306 return CPP_TOKEN_FLD_ARG_NO;
3307 else if (tok->type == CPP_PADDING)
3308 return CPP_TOKEN_FLD_SOURCE;
3309 else if (tok->type == CPP_PRAGMA)
3310 return CPP_TOKEN_FLD_PRAGMA;
3311 /* else fall through */
3312 default:
3313 return CPP_TOKEN_FLD_NONE;
3314 }
3315 }
3316
3317 /* All tokens lexed in R after calling this function will be forced to have
3318 their source_location the same as the location referenced by P, until
3319 cpp_stop_forcing_token_locations is called for R. */
3320
3321 void
3322 cpp_force_token_locations (cpp_reader *r, source_location *p)
3323 {
3324 r->forced_token_location_p = p;
3325 }
3326
3327 /* Go back to assigning locations naturally for lexed tokens. */
3328
3329 void
3330 cpp_stop_forcing_token_locations (cpp_reader *r)
3331 {
3332 r->forced_token_location_p = NULL;
3333 }