]> git.ipfire.org Git - thirdparty/gcc.git/blame - libcpp/lex.c
re PR c++/57869 ([C++11] Casting a object pointer to a function pointer should not...
[thirdparty/gcc.git] / libcpp / lex.c
CommitLineData
45b966db 1/* CPP Library - lexical analysis.
500f3ed9 2 Copyright (C) 2000-2013 Free Software Foundation, Inc.
45b966db
ZW
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
748086b7 10Free Software Foundation; either version 3, or (at your option) any
45b966db
ZW
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
748086b7
JJ
19along with this program; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
45b966db
ZW
21
22#include "config.h"
23#include "system.h"
45b966db 24#include "cpplib.h"
4f4e53dd 25#include "internal.h"
45b966db 26
93c80368 27enum spell_type
f9a0e96c 28{
93c80368 29 SPELL_OPERATOR = 0,
93c80368 30 SPELL_IDENT,
6338b358 31 SPELL_LITERAL,
93c80368 32 SPELL_NONE
f9a0e96c
ZW
33};
34
93c80368 35struct token_spelling
f9a0e96c 36{
93c80368
NB
37 enum spell_type category;
38 const unsigned char *name;
f9a0e96c
ZW
39};
40
8206c799 41static const unsigned char *const digraph_spellings[] =
b6baa67d 42{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
93c80368 43
b6baa67d
KVH
44#define OP(e, s) { SPELL_OPERATOR, UC s },
45#define TK(e, s) { SPELL_ ## s, UC #e },
8206c799 46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 52
6cf87ca4
ZW
53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
6cf87ca4
ZW
56static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
631d0d36 58static void store_comment (cpp_reader *, cpp_token *);
6cf87ca4
ZW
59static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 unsigned int, enum cpp_ttype);
61static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62static int name_p (cpp_reader *, const cpp_string *);
6cf87ca4
ZW
63static tokenrun *next_tokenrun (tokenrun *);
64
6cf87ca4 65static _cpp_buff *new_buff (size_t);
15dad1d9 66
9d10c9a9 67
041c3194 68/* Utility routine:
9e62c811 69
bfb9dc7f
ZW
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
041c3194 72int
6cf87ca4 73cpp_ideq (const cpp_token *token, const char *string)
041c3194 74{
bfb9dc7f 75 if (token->type != CPP_NAME)
041c3194 76 return 0;
bfb9dc7f 77
9a0c6187 78 return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
15dad1d9 79}
1368ee70 80
26aea073
NB
81/* Record a note TYPE at byte POS into the current cleaned logical
82 line. */
87062813 83static void
6cf87ca4 84add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
0d9f234d 85{
26aea073
NB
86 if (buffer->notes_used == buffer->notes_cap)
87 {
88 buffer->notes_cap = buffer->notes_cap * 2 + 200;
c3f829c1
GDR
89 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90 buffer->notes_cap);
26aea073 91 }
0d9f234d 92
26aea073
NB
93 buffer->notes[buffer->notes_used].pos = pos;
94 buffer->notes[buffer->notes_used].type = type;
95 buffer->notes_used++;
0d9f234d
NB
96}
97
246a2fcb
RH
98\f
99/* Fast path to find line special characters using optimized character
100 scanning algorithms. Anything complicated falls back to the slow
101 path below. Since this loop is very hot it's worth doing these kinds
102 of optimizations.
103
104 One of the paths through the ifdefs should provide
105
106 const uchar *search_line_fast (const uchar *s, const uchar *end);
107
108 Between S and END, search for \n, \r, \\, ?. Return a pointer to
109 the found character.
110
111 Note that the last character of the buffer is *always* a newline,
112 as forced by _cpp_convert_input. This fact can be used to avoid
113 explicitly looking for the end of the buffer. */
114
115/* Configure gives us an ifdef test. */
116#ifndef WORDS_BIGENDIAN
117#define WORDS_BIGENDIAN 0
118#endif
119
120/* We'd like the largest integer that fits into a register. There's nothing
121 in <stdint.h> that gives us that. For most hosts this is unsigned long,
122 but MS decided on an LLP64 model. Thankfully when building with GCC we
123 can get the "real" word size. */
124#ifdef __GNUC__
125typedef unsigned int word_type __attribute__((__mode__(__word__)));
126#else
127typedef unsigned long word_type;
128#endif
129
130/* The code below is only expecting sizes 4 or 8.
131 Die at compile-time if this expectation is violated. */
132typedef char check_word_type_size
133 [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
134
135/* Return X with the first N bytes forced to values that won't match one
136 of the interesting characters. Note that NUL is not interesting. */
137
138static inline word_type
139acc_char_mask_misalign (word_type val, unsigned int n)
140{
141 word_type mask = -1;
142 if (WORDS_BIGENDIAN)
143 mask >>= n * 8;
144 else
145 mask <<= n * 8;
146 return val & mask;
147}
148
149/* Return X replicated to all byte positions within WORD_TYPE. */
150
151static inline word_type
152acc_char_replicate (uchar x)
153{
154 word_type ret;
155
156 ret = (x << 24) | (x << 16) | (x << 8) | x;
157 if (sizeof(word_type) == 8)
158 ret = (ret << 16 << 16) | ret;
159 return ret;
160}
161
162/* Return non-zero if some byte of VAL is (probably) C. */
163
164static inline word_type
165acc_char_cmp (word_type val, word_type c)
166{
167#if defined(__GNUC__) && defined(__alpha__)
168 /* We can get exact results using a compare-bytes instruction.
169 Get (val == c) via (0 >= (val ^ c)). */
170 return __builtin_alpha_cmpbge (0, val ^ c);
171#else
172 word_type magic = 0x7efefefeU;
173 if (sizeof(word_type) == 8)
174 magic = (magic << 16 << 16) | 0xfefefefeU;
175 magic |= 1;
176
177 val ^= c;
178 return ((val + magic) ^ ~val) & ~magic;
179#endif
180}
181
182/* Given the result of acc_char_cmp is non-zero, return the index of
183 the found character. If this was a false positive, return -1. */
184
185static inline int
186acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
187 word_type val ATTRIBUTE_UNUSED)
188{
189#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190 /* The cmpbge instruction sets *bits* of the result corresponding to
191 matches in the bytes with no false positives. */
192 return __builtin_ctzl (cmp);
193#else
194 unsigned int i;
195
196 /* ??? It would be nice to force unrolling here,
197 and have all of these constants folded. */
198 for (i = 0; i < sizeof(word_type); ++i)
199 {
200 uchar c;
201 if (WORDS_BIGENDIAN)
202 c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
203 else
204 c = (val >> i * 8) & 0xff;
205
206 if (c == '\n' || c == '\r' || c == '\\' || c == '?')
207 return i;
208 }
209
210 return -1;
211#endif
212}
213
214/* A version of the fast scanner using bit fiddling techniques.
215
216 For 32-bit words, one would normally perform 16 comparisons and
217 16 branches. With this algorithm one performs 24 arithmetic
218 operations and one branch. Whether this is faster with a 32-bit
219 word size is going to be somewhat system dependent.
220
221 For 64-bit words, we eliminate twice the number of comparisons
222 and branches without increasing the number of arithmetic operations.
223 It's almost certainly going to be a win with 64-bit word size. */
224
225static const uchar * search_line_acc_char (const uchar *, const uchar *)
226 ATTRIBUTE_UNUSED;
227
228static const uchar *
229search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
230{
231 const word_type repl_nl = acc_char_replicate ('\n');
232 const word_type repl_cr = acc_char_replicate ('\r');
233 const word_type repl_bs = acc_char_replicate ('\\');
234 const word_type repl_qm = acc_char_replicate ('?');
235
236 unsigned int misalign;
237 const word_type *p;
238 word_type val, t;
239
240 /* Align the buffer. Mask out any bytes from before the beginning. */
241 p = (word_type *)((uintptr_t)s & -sizeof(word_type));
242 val = *p;
243 misalign = (uintptr_t)s & (sizeof(word_type) - 1);
244 if (misalign)
245 val = acc_char_mask_misalign (val, misalign);
246
247 /* Main loop. */
248 while (1)
249 {
250 t = acc_char_cmp (val, repl_nl);
251 t |= acc_char_cmp (val, repl_cr);
252 t |= acc_char_cmp (val, repl_bs);
253 t |= acc_char_cmp (val, repl_qm);
254
255 if (__builtin_expect (t != 0, 0))
256 {
257 int i = acc_char_index (t, val);
258 if (i >= 0)
259 return (const uchar *)p + i;
260 }
261
262 val = *++p;
263 }
264}
265
789d73cb
RO
266/* Disable on Solaris 2/x86 until the following problems can be properly
267 autoconfed:
268
789d73cb
RO
269 The Solaris 9 assembler cannot assemble SSE4.2 insns.
270 Before Solaris 9 Update 6, SSE insns cannot be executed.
271 The Solaris 10+ assembler tags objects with the instruction set
272 extensions used, so SSE4.2 executables cannot run on machines that
273 don't support that extension. */
274
275#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
246a2fcb
RH
276
277/* Replicated character data to be shared between implementations.
278 Recall that outside of a context with vector support we can't
279 define compatible vector types, therefore these are all defined
280 in terms of raw characters. */
281static const char repl_chars[4][16] __attribute__((aligned(16))) = {
282 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
283 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
284 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
285 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
286 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
287 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
288 { '?', '?', '?', '?', '?', '?', '?', '?',
289 '?', '?', '?', '?', '?', '?', '?', '?' },
290};
291
292/* A version of the fast scanner using MMX vectorized byte compare insns.
293
294 This uses the PMOVMSKB instruction which was introduced with "MMX2",
ef230b38 295 which was packaged into SSE1; it is also present in the AMD MMX
246a2fcb
RH
296 extension. Mark the function as using "sse" so that we emit a real
297 "emms" instruction, rather than the 3dNOW "femms" instruction. */
298
299static const uchar *
300#ifndef __SSE__
301__attribute__((__target__("sse")))
302#endif
303search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
304{
305 typedef char v8qi __attribute__ ((__vector_size__ (8)));
306 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
307
308 const v8qi repl_nl = *(const v8qi *)repl_chars[0];
309 const v8qi repl_cr = *(const v8qi *)repl_chars[1];
310 const v8qi repl_bs = *(const v8qi *)repl_chars[2];
311 const v8qi repl_qm = *(const v8qi *)repl_chars[3];
312
313 unsigned int misalign, found, mask;
314 const v8qi *p;
315 v8qi data, t, c;
316
317 /* Align the source pointer. While MMX doesn't generate unaligned data
318 faults, this allows us to safely scan to the end of the buffer without
319 reading beyond the end of the last page. */
320 misalign = (uintptr_t)s & 7;
321 p = (const v8qi *)((uintptr_t)s & -8);
322 data = *p;
323
324 /* Create a mask for the bytes that are valid within the first
325 16-byte block. The Idea here is that the AND with the mask
326 within the loop is "free", since we need some AND or TEST
327 insn in order to set the flags for the branch anyway. */
328 mask = -1u << misalign;
329
330 /* Main loop processing 8 bytes at a time. */
331 goto start;
332 do
333 {
334 data = *++p;
335 mask = -1;
336
337 start:
338 t = __builtin_ia32_pcmpeqb(data, repl_nl);
339 c = __builtin_ia32_pcmpeqb(data, repl_cr);
340 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
341 c = __builtin_ia32_pcmpeqb(data, repl_bs);
342 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343 c = __builtin_ia32_pcmpeqb(data, repl_qm);
344 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
345 found = __builtin_ia32_pmovmskb (t);
346 found &= mask;
347 }
348 while (!found);
349
350 __builtin_ia32_emms ();
351
352 /* FOUND contains 1 in bits for which we matched a relevant
353 character. Conversion to the byte index is trivial. */
354 found = __builtin_ctz(found);
355 return (const uchar *)p + found;
356}
357
358/* A version of the fast scanner using SSE2 vectorized byte compare insns. */
359
360static const uchar *
361#ifndef __SSE2__
362__attribute__((__target__("sse2")))
363#endif
364search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
365{
366 typedef char v16qi __attribute__ ((__vector_size__ (16)));
367
368 const v16qi repl_nl = *(const v16qi *)repl_chars[0];
369 const v16qi repl_cr = *(const v16qi *)repl_chars[1];
370 const v16qi repl_bs = *(const v16qi *)repl_chars[2];
371 const v16qi repl_qm = *(const v16qi *)repl_chars[3];
372
373 unsigned int misalign, found, mask;
374 const v16qi *p;
375 v16qi data, t;
376
377 /* Align the source pointer. */
378 misalign = (uintptr_t)s & 15;
379 p = (const v16qi *)((uintptr_t)s & -16);
380 data = *p;
381
382 /* Create a mask for the bytes that are valid within the first
383 16-byte block. The Idea here is that the AND with the mask
384 within the loop is "free", since we need some AND or TEST
385 insn in order to set the flags for the branch anyway. */
386 mask = -1u << misalign;
387
388 /* Main loop processing 16 bytes at a time. */
389 goto start;
390 do
391 {
392 data = *++p;
393 mask = -1;
394
395 start:
396 t = __builtin_ia32_pcmpeqb128(data, repl_nl);
397 t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
398 t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
399 t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
400 found = __builtin_ia32_pmovmskb128 (t);
401 found &= mask;
402 }
403 while (!found);
404
405 /* FOUND contains 1 in bits for which we matched a relevant
406 character. Conversion to the byte index is trivial. */
407 found = __builtin_ctz(found);
408 return (const uchar *)p + found;
409}
410
6f173e52 411#ifdef HAVE_SSE4
246a2fcb
RH
412/* A version of the fast scanner using SSE 4.2 vectorized string insns. */
413
414static const uchar *
415#ifndef __SSE4_2__
416__attribute__((__target__("sse4.2")))
417#endif
418search_line_sse42 (const uchar *s, const uchar *end)
419{
420 typedef char v16qi __attribute__ ((__vector_size__ (16)));
421 static const v16qi search = { '\n', '\r', '?', '\\' };
422
423 uintptr_t si = (uintptr_t)s;
424 uintptr_t index;
425
426 /* Check for unaligned input. */
427 if (si & 15)
428 {
d35d1c0f
UB
429 v16qi sv;
430
246a2fcb
RH
431 if (__builtin_expect (end - s < 16, 0)
432 && __builtin_expect ((si & 0xfff) > 0xff0, 0))
433 {
434 /* There are less than 16 bytes left in the buffer, and less
435 than 16 bytes left on the page. Reading 16 bytes at this
436 point might generate a spurious page fault. Defer to the
437 SSE2 implementation, which already handles alignment. */
438 return search_line_sse2 (s, end);
439 }
440
441 /* ??? The builtin doesn't understand that the PCMPESTRI read from
442 memory need not be aligned. */
d35d1c0f
UB
443 sv = __builtin_ia32_loaddqu ((const char *) s);
444 index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
445
246a2fcb
RH
446 if (__builtin_expect (index < 16, 0))
447 goto found;
448
449 /* Advance the pointer to an aligned address. We will re-scan a
450 few bytes, but we no longer need care for reading past the
451 end of a page, since we're guaranteed a match. */
452 s = (const uchar *)((si + 16) & -16);
453 }
454
455 /* Main loop, processing 16 bytes at a time. By doing the whole loop
456 in inline assembly, we can make proper use of the flags set. */
457 __asm ( "sub $16, %1\n"
458 " .balign 16\n"
459 "0: add $16, %1\n"
460 " %vpcmpestri $0, (%1), %2\n"
461 " jnc 0b"
462 : "=&c"(index), "+r"(s)
463 : "x"(search), "a"(4), "d"(16));
464
465 found:
466 return s + index;
467}
468
6f173e52
RH
469#else
470/* Work around out-dated assemblers without sse4 support. */
471#define search_line_sse42 search_line_sse2
472#endif
473
246a2fcb
RH
474/* Check the CPU capabilities. */
475
476#include "../gcc/config/i386/cpuid.h"
477
478typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
479static search_line_fast_type search_line_fast;
480
b0c084b7
JJ
481#define HAVE_init_vectorized_lexer 1
482static inline void
246a2fcb
RH
483init_vectorized_lexer (void)
484{
485 unsigned dummy, ecx = 0, edx = 0;
486 search_line_fast_type impl = search_line_acc_char;
487 int minimum = 0;
488
489#if defined(__SSE4_2__)
490 minimum = 3;
491#elif defined(__SSE2__)
492 minimum = 2;
ef230b38 493#elif defined(__SSE__)
246a2fcb
RH
494 minimum = 1;
495#endif
496
497 if (minimum == 3)
498 impl = search_line_sse42;
499 else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
500 {
501 if (minimum == 3 || (ecx & bit_SSE4_2))
502 impl = search_line_sse42;
503 else if (minimum == 2 || (edx & bit_SSE2))
504 impl = search_line_sse2;
505 else if (minimum == 1 || (edx & bit_SSE))
506 impl = search_line_mmx;
507 }
508 else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
509 {
5e70c0b5
UB
510 if (minimum == 1
511 || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
246a2fcb
RH
512 impl = search_line_mmx;
513 }
514
515 search_line_fast = impl;
516}
517
01956319 518#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
246a2fcb
RH
519
520/* A vection of the fast scanner using AltiVec vectorized byte compares. */
521/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
522 so we can't compile this function without -maltivec on the command line
523 (or implied by some other switch). */
524
525static const uchar *
526search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
527{
528 typedef __attribute__((altivec(vector))) unsigned char vc;
529
530 const vc repl_nl = {
531 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
532 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
533 };
534 const vc repl_cr = {
535 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
536 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
537 };
538 const vc repl_bs = {
539 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
540 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
541 };
542 const vc repl_qm = {
543 '?', '?', '?', '?', '?', '?', '?', '?',
544 '?', '?', '?', '?', '?', '?', '?', '?',
545 };
546 const vc ones = {
547 -1, -1, -1, -1, -1, -1, -1, -1,
548 -1, -1, -1, -1, -1, -1, -1, -1,
549 };
550 const vc zero = { 0 };
551
552 vc data, mask, t;
553
554 /* Altivec loads automatically mask addresses with -16. This lets us
555 issue the first load as early as possible. */
556 data = __builtin_vec_ld(0, (const vc *)s);
557
558 /* Discard bytes before the beginning of the buffer. Do this by
559 beginning with all ones and shifting in zeros according to the
560 mis-alignment. The LVSR instruction pulls the exact shift we
561 want from the address. */
562 mask = __builtin_vec_lvsr(0, s);
563 mask = __builtin_vec_perm(zero, ones, mask);
564 data &= mask;
565
566 /* While altivec loads mask addresses, we still need to align S so
567 that the offset we compute at the end is correct. */
568 s = (const uchar *)((uintptr_t)s & -16);
569
570 /* Main loop processing 16 bytes at a time. */
571 goto start;
572 do
573 {
574 vc m_nl, m_cr, m_bs, m_qm;
575
576 s += 16;
577 data = __builtin_vec_ld(0, (const vc *)s);
578
579 start:
580 m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
581 m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
582 m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
583 m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
584 t = (m_nl | m_cr) | (m_bs | m_qm);
585
586 /* T now contains 0xff in bytes for which we matched one of the relevant
587 characters. We want to exit the loop if any byte in T is non-zero.
588 Below is the expansion of vec_any_ne(t, zero). */
589 }
590 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
591
592 {
593#define N (sizeof(vc) / sizeof(long))
594
246a2fcb
RH
595 union {
596 vc v;
53a103d3
DS
597 /* Statically assert that N is 2 or 4. */
598 unsigned long l[(N == 2 || N == 4) ? N : -1];
246a2fcb
RH
599 } u;
600 unsigned long l, i = 0;
601
602 u.v = t;
603
604 /* Find the first word of T that is non-zero. */
605 switch (N)
606 {
607 case 4:
608 l = u.l[i++];
609 if (l != 0)
610 break;
611 s += sizeof(unsigned long);
612 l = u.l[i++];
613 if (l != 0)
614 break;
615 s += sizeof(unsigned long);
616 case 2:
617 l = u.l[i++];
618 if (l != 0)
619 break;
620 s += sizeof(unsigned long);
621 l = u.l[i];
622 }
623
624 /* L now contains 0xff in bytes for which we matched one of the
625 relevant characters. We can find the byte index by finding
626 its bit index and dividing by 8. */
627 l = __builtin_clzl(l) >> 3;
628 return s + l;
629
630#undef N
631 }
632}
633
e75b54a2
RE
634#elif defined (__ARM_NEON__)
635#include "arm_neon.h"
636
637static const uchar *
638search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
639{
640 const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
641 const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
642 const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
643 const uint8x16_t repl_qm = vdupq_n_u8 ('?');
644 const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
645
646 unsigned int misalign, found, mask;
647 const uint8_t *p;
648 uint8x16_t data;
649
650 /* Align the source pointer. */
651 misalign = (uintptr_t)s & 15;
652 p = (const uint8_t *)((uintptr_t)s & -16);
653 data = vld1q_u8 (p);
654
655 /* Create a mask for the bytes that are valid within the first
656 16-byte block. The Idea here is that the AND with the mask
657 within the loop is "free", since we need some AND or TEST
658 insn in order to set the flags for the branch anyway. */
659 mask = (-1u << misalign) & 0xffff;
660
661 /* Main loop, processing 16 bytes at a time. */
662 goto start;
663
664 do
665 {
666 uint8x8_t l;
667 uint16x4_t m;
668 uint32x2_t n;
669 uint8x16_t t, u, v, w;
670
671 p += 16;
672 data = vld1q_u8 (p);
673 mask = 0xffff;
674
675 start:
676 t = vceqq_u8 (data, repl_nl);
677 u = vceqq_u8 (data, repl_cr);
678 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
679 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
680 t = vandq_u8 (vorrq_u8 (v, w), xmask);
681 l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
682 m = vpaddl_u8 (l);
683 n = vpaddl_u16 (m);
684
685 found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
686 vshr_n_u64 ((uint64x1_t) n, 24)), 0);
687 found &= mask;
688 }
689 while (!found);
690
691 /* FOUND contains 1 in bits for which we matched a relevant
692 character. Conversion to the byte index is trivial. */
693 found = __builtin_ctz (found);
694 return (const uchar *)p + found;
695}
696
246a2fcb
RH
697#else
698
699/* We only have one accellerated alternative. Use a direct call so that
700 we encourage inlining. */
701
702#define search_line_fast search_line_acc_char
703
704#endif
705
b0c084b7
JJ
706/* Initialize the lexer if needed. */
707
708void
709_cpp_init_lexer (void)
710{
711#ifdef HAVE_init_vectorized_lexer
712 init_vectorized_lexer ();
713#endif
714}
715
26aea073
NB
716/* Returns with a logical line that contains no escaped newlines or
717 trigraphs. This is a time-critical inner loop. */
718void
6cf87ca4 719_cpp_clean_line (cpp_reader *pfile)
45b966db 720{
26aea073
NB
721 cpp_buffer *buffer;
722 const uchar *s;
723 uchar c, *d, *p;
87062813 724
26aea073
NB
725 buffer = pfile->buffer;
726 buffer->cur_note = buffer->notes_used = 0;
727 buffer->cur = buffer->line_base = buffer->next_line;
728 buffer->need_line = false;
246a2fcb 729 s = buffer->next_line;
87062813 730
26aea073 731 if (!buffer->from_stage3)
45b966db 732 {
7af45bd4
ILT
733 const uchar *pbackslash = NULL;
734
246a2fcb 735 /* Fast path. This is the common case of an un-escaped line with
d08dcf87
ZW
736 no trigraphs. The primary win here is by not writing any
737 data back to memory until we have to. */
246a2fcb 738 while (1)
d08dcf87 739 {
246a2fcb
RH
740 /* Perform an optimized search for \n, \r, \\, ?. */
741 s = search_line_fast (s, buffer->rlimit);
d08dcf87 742
246a2fcb
RH
743 c = *s;
744 if (c == '\\')
745 {
746 /* Record the location of the backslash and continue. */
747 pbackslash = s++;
d08dcf87 748 }
246a2fcb 749 else if (__builtin_expect (c == '?', 0))
d08dcf87 750 {
246a2fcb
RH
751 if (__builtin_expect (s[1] == '?', false)
752 && _cpp_trigraph_map[s[2]])
d08dcf87 753 {
246a2fcb
RH
754 /* Have a trigraph. We may or may not have to convert
755 it. Add a line note regardless, for -Wtrigraphs. */
756 add_line_note (buffer, s, s[2]);
757 if (CPP_OPTION (pfile, trigraphs))
758 {
759 /* We do, and that means we have to switch to the
760 slow path. */
761 d = (uchar *) s;
762 *d = _cpp_trigraph_map[s[2]];
763 s += 2;
764 goto slow_path;
765 }
d08dcf87 766 }
246a2fcb
RH
767 /* Not a trigraph. Continue on fast-path. */
768 s++;
d08dcf87 769 }
246a2fcb
RH
770 else
771 break;
d08dcf87
ZW
772 }
773
246a2fcb
RH
774 /* This must be \r or \n. We're either done, or we'll be forced
775 to write back to the buffer and continue on the slow path. */
776 d = (uchar *) s;
777
778 if (__builtin_expect (s == buffer->rlimit, false))
779 goto done;
780
781 /* DOS line ending? */
782 if (__builtin_expect (c == '\r', false) && s[1] == '\n')
783 {
784 s++;
785 if (s == buffer->rlimit)
786 goto done;
787 }
788
789 if (__builtin_expect (pbackslash == NULL, true))
790 goto done;
791
792 /* Check for escaped newline. */
793 p = d;
794 while (is_nvspace (p[-1]))
795 p--;
796 if (p - 1 != pbackslash)
797 goto done;
798
799 /* Have an escaped newline; process it and proceed to
800 the slow path. */
801 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
802 d = p - 2;
803 buffer->next_line = p - 1;
26aea073 804
246a2fcb
RH
805 slow_path:
806 while (1)
4a5b68a2 807 {
26aea073
NB
808 c = *++s;
809 *++d = c;
810
811 if (c == '\n' || c == '\r')
812 {
246a2fcb 813 /* Handle DOS line endings. */
26aea073
NB
814 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
815 s++;
816 if (s == buffer->rlimit)
817 break;
818
819 /* Escaped? */
820 p = d;
821 while (p != buffer->next_line && is_nvspace (p[-1]))
822 p--;
823 if (p == buffer->next_line || p[-1] != '\\')
824 break;
825
41c32c98 826 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
26aea073
NB
827 d = p - 2;
828 buffer->next_line = p - 1;
829 }
830 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
831 {
832 /* Add a note regardless, for the benefit of -Wtrigraphs. */
41c32c98 833 add_line_note (buffer, d, s[2]);
26aea073
NB
834 if (CPP_OPTION (pfile, trigraphs))
835 {
836 *d = _cpp_trigraph_map[s[2]];
837 s += 2;
838 }
839 }
4a5b68a2 840 }
45b966db 841 }
26aea073
NB
842 else
843 {
246a2fcb 844 while (*s != '\n' && *s != '\r')
26aea073 845 s++;
26aea073
NB
846 d = (uchar *) s;
847
848 /* Handle DOS line endings. */
849 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
850 s++;
851 }
0d9f234d 852
d08dcf87 853 done:
26aea073 854 *d = '\n';
41c32c98
NB
855 /* A sentinel note that should never be processed. */
856 add_line_note (buffer, d + 1, '\n');
26aea073 857 buffer->next_line = s + 1;
45b966db
ZW
858}
859
a8eb6044
NB
860/* Return true if the trigraph indicated by NOTE should be warned
861 about in a comment. */
862static bool
6cf87ca4 863warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
a8eb6044
NB
864{
865 const uchar *p;
866
867 /* Within comments we don't warn about trigraphs, unless the
868 trigraph forms an escaped newline, as that may change
6356f892 869 behavior. */
a8eb6044
NB
870 if (note->type != '/')
871 return false;
872
873 /* If -trigraphs, then this was an escaped newline iff the next note
874 is coincident. */
875 if (CPP_OPTION (pfile, trigraphs))
876 return note[1].pos == note->pos;
877
878 /* Otherwise, see if this forms an escaped newline. */
879 p = note->pos + 3;
880 while (is_nvspace (*p))
881 p++;
882
883 /* There might have been escaped newlines between the trigraph and the
884 newline we found. Hence the position test. */
885 return (*p == '\n' && p < note[1].pos);
886}
887
26aea073
NB
888/* Process the notes created by add_line_note as far as the current
889 location. */
890void
6cf87ca4 891_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
45b966db 892{
29401c30
NB
893 cpp_buffer *buffer = pfile->buffer;
894
26aea073 895 for (;;)
041c3194 896 {
26aea073
NB
897 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
898 unsigned int col;
a5c3cccd 899
26aea073
NB
900 if (note->pos > buffer->cur)
901 break;
a5c3cccd 902
26aea073
NB
903 buffer->cur_note++;
904 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
4d6baafa 905
41c32c98 906 if (note->type == '\\' || note->type == ' ')
26aea073 907 {
41c32c98 908 if (note->type == ' ' && !in_comment)
500bee0a 909 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
26aea073 910 "backslash and newline separated by space");
41c32c98 911
26aea073 912 if (buffer->next_line > buffer->rlimit)
87062813 913 {
500bee0a 914 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
26aea073
NB
915 "backslash-newline at end of file");
916 /* Prevent "no newline at end of file" warning. */
917 buffer->next_line = buffer->rlimit;
87062813 918 }
26aea073
NB
919
920 buffer->line_base = note->pos;
12f9df4e 921 CPP_INCREMENT_LINE (pfile, 0);
0d9f234d 922 }
41c32c98
NB
923 else if (_cpp_trigraph_map[note->type])
924 {
a8eb6044
NB
925 if (CPP_OPTION (pfile, warn_trigraphs)
926 && (!in_comment || warn_in_comment (pfile, note)))
41c32c98
NB
927 {
928 if (CPP_OPTION (pfile, trigraphs))
87cf0651
SB
929 cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
930 pfile->line_table->highest_line, col,
931 "trigraph ??%c converted to %c",
932 note->type,
933 (int) _cpp_trigraph_map[note->type]);
41c32c98 934 else
905bd7b5 935 {
87cf0651
SB
936 cpp_warning_with_line
937 (pfile, CPP_W_TRIGRAPHS,
938 pfile->line_table->highest_line, col,
905bd7b5
GK
939 "trigraph ??%c ignored, use -trigraphs to enable",
940 note->type);
941 }
41c32c98
NB
942 }
943 }
00a81b8b
JM
944 else if (note->type == 0)
945 /* Already processed in lex_raw_string. */;
41c32c98
NB
946 else
947 abort ();
041c3194 948 }
45b966db
ZW
949}
950
0d9f234d
NB
951/* Skip a C-style block comment. We find the end of the comment by
952 seeing if an asterisk is before every '/' we encounter. Returns
6f572ac2
NB
953 nonzero if comment terminated by EOF, zero otherwise.
954
955 Buffer->cur points to the initial asterisk of the comment. */
26aea073 956bool
6cf87ca4 957_cpp_skip_block_comment (cpp_reader *pfile)
45b966db 958{
041c3194 959 cpp_buffer *buffer = pfile->buffer;
d08dcf87
ZW
960 const uchar *cur = buffer->cur;
961 uchar c;
0d9f234d 962
d08dcf87
ZW
963 cur++;
964 if (*cur == '/')
965 cur++;
0d9f234d 966
26aea073
NB
967 for (;;)
968 {
0d9f234d
NB
969 /* People like decorating comments with '*', so check for '/'
970 instead for efficiency. */
d08dcf87
ZW
971 c = *cur++;
972
041c3194 973 if (c == '/')
45b966db 974 {
d08dcf87 975 if (cur[-2] == '*')
0d9f234d 976 break;
041c3194 977
0d9f234d 978 /* Warn about potential nested comments, but not if the '/'
a1f300c0 979 comes immediately before the true comment delimiter.
041c3194 980 Don't bother to get it right across escaped newlines. */
0d9f234d 981 if (CPP_OPTION (pfile, warn_comments)
d08dcf87
ZW
982 && cur[0] == '*' && cur[1] != '/')
983 {
984 buffer->cur = cur;
87cf0651
SB
985 cpp_warning_with_line (pfile, CPP_W_COMMENTS,
986 pfile->line_table->highest_line,
987 CPP_BUF_COL (buffer),
988 "\"/*\" within comment");
d08dcf87 989 }
45b966db 990 }
26aea073
NB
991 else if (c == '\n')
992 {
12f9df4e 993 unsigned int cols;
d08dcf87 994 buffer->cur = cur - 1;
26aea073
NB
995 _cpp_process_line_notes (pfile, true);
996 if (buffer->next_line >= buffer->rlimit)
997 return true;
998 _cpp_clean_line (pfile);
12f9df4e
PB
999
1000 cols = buffer->next_line - buffer->line_base;
1001 CPP_INCREMENT_LINE (pfile, cols);
1002
d08dcf87 1003 cur = buffer->cur;
26aea073 1004 }
45b966db 1005 }
041c3194 1006
d08dcf87 1007 buffer->cur = cur;
a8eb6044 1008 _cpp_process_line_notes (pfile, true);
26aea073 1009 return false;
45b966db
ZW
1010}
1011
480709cc 1012/* Skip a C++ line comment, leaving buffer->cur pointing to the
da7d8304 1013 terminating newline. Handles escaped newlines. Returns nonzero
480709cc 1014 if a multiline comment. */
041c3194 1015static int
6cf87ca4 1016skip_line_comment (cpp_reader *pfile)
45b966db 1017{
cbcff6df 1018 cpp_buffer *buffer = pfile->buffer;
1bb64668 1019 source_location orig_line = pfile->line_table->highest_line;
041c3194 1020
26aea073
NB
1021 while (*buffer->cur != '\n')
1022 buffer->cur++;
480709cc 1023
26aea073 1024 _cpp_process_line_notes (pfile, true);
500bee0a 1025 return orig_line != pfile->line_table->highest_line;
041c3194 1026}
45b966db 1027
26aea073 1028/* Skips whitespace, saving the next non-whitespace character. */
52fadca8 1029static void
6cf87ca4 1030skip_whitespace (cpp_reader *pfile, cppchar_t c)
041c3194
ZW
1031{
1032 cpp_buffer *buffer = pfile->buffer;
f7d151fb 1033 bool saw_NUL = false;
45b966db 1034
0d9f234d 1035 do
041c3194 1036 {
91fcd158 1037 /* Horizontal space always OK. */
26aea073 1038 if (c == ' ' || c == '\t')
0d9f234d 1039 ;
0d9f234d 1040 /* Just \f \v or \0 left. */
91fcd158 1041 else if (c == '\0')
f7d151fb 1042 saw_NUL = true;
93c80368 1043 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
500bee0a 1044 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
ebef4e8c
NB
1045 CPP_BUF_COL (buffer),
1046 "%s in preprocessing directive",
1047 c == '\f' ? "form feed" : "vertical tab");
0d9f234d 1048
0d9f234d 1049 c = *buffer->cur++;
45b966db 1050 }
ec5c56db 1051 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
1052 while (is_nvspace (c));
1053
f7d151fb 1054 if (saw_NUL)
0527bc4e 1055 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
f7d151fb 1056
480709cc 1057 buffer->cur--;
041c3194 1058}
45b966db 1059
93c80368
NB
1060/* See if the characters of a number token are valid in a name (no
1061 '.', '+' or '-'). */
1062static int
6cf87ca4 1063name_p (cpp_reader *pfile, const cpp_string *string)
93c80368
NB
1064{
1065 unsigned int i;
1066
1067 for (i = 0; i < string->len; i++)
1068 if (!is_idchar (string->text[i]))
1069 return 0;
1070
df383483 1071 return 1;
93c80368
NB
1072}
1073
50668cf6
GK
1074/* After parsing an identifier or other sequence, produce a warning about
1075 sequences not in NFC/NFKC. */
1076static void
1077warn_about_normalization (cpp_reader *pfile,
1078 const cpp_token *token,
1079 const struct normalize_state *s)
1080{
1081 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1082 && !pfile->state.skipping)
1083 {
1084 /* Make sure that the token is printed using UCNs, even
1085 if we'd otherwise happily print UTF-8. */
c3f829c1 1086 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
50668cf6
GK
1087 size_t sz;
1088
1089 sz = cpp_spell_token (pfile, token, buf, false) - buf;
1090 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
87cf0651
SB
1091 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1092 "`%.*s' is not in NFKC", (int) sz, buf);
50668cf6 1093 else
87cf0651
SB
1094 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1095 "`%.*s' is not in NFC", (int) sz, buf);
55e7f907 1096 free (buf);
50668cf6
GK
1097 }
1098}
1099
bced6edf 1100/* Returns TRUE if the sequence starting at buffer->cur is invalid in
1613e52b 1101 an identifier. FIRST is TRUE if this starts an identifier. */
bced6edf 1102static bool
50668cf6
GK
1103forms_identifier_p (cpp_reader *pfile, int first,
1104 struct normalize_state *state)
bced6edf 1105{
1613e52b
NB
1106 cpp_buffer *buffer = pfile->buffer;
1107
1108 if (*buffer->cur == '$')
1109 {
1110 if (!CPP_OPTION (pfile, dollars_in_ident))
1111 return false;
1112
1113 buffer->cur++;
78b8811a 1114 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1613e52b 1115 {
78b8811a 1116 CPP_OPTION (pfile, warn_dollars) = 0;
0527bc4e 1117 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1613e52b
NB
1118 }
1119
1120 return true;
1121 }
bced6edf 1122
1613e52b 1123 /* Is this a syntactically valid UCN? */
af15a2fe 1124 if (CPP_OPTION (pfile, extended_identifiers)
6baba9bb 1125 && *buffer->cur == '\\'
1613e52b 1126 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
bced6edf 1127 {
1613e52b 1128 buffer->cur += 2;
50668cf6
GK
1129 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1130 state))
1613e52b
NB
1131 return true;
1132 buffer->cur -= 2;
bced6edf 1133 }
bced6edf 1134
1613e52b 1135 return false;
bced6edf
NB
1136}
1137
17e7cb85
KT
1138/* Helper function to get the cpp_hashnode of the identifier BASE. */
1139static cpp_hashnode *
1140lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1141{
1142 cpp_hashnode *result;
1143 const uchar *cur;
1144 unsigned int len;
1145 unsigned int hash = HT_HASHSTEP (0, *base);
1146
1147 cur = base + 1;
1148 while (ISIDNUM (*cur))
1149 {
1150 hash = HT_HASHSTEP (hash, *cur);
1151 cur++;
1152 }
1153 len = cur - base;
1154 hash = HT_HASHFINISH (hash, len);
1155 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1156 base, len, hash, HT_ALLOC));
1157
1158 /* Rarely, identifiers require diagnostics when lexed. */
1159 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1160 && !pfile->state.skipping, 0))
1161 {
1162 /* It is allowed to poison the same identifier twice. */
1163 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1164 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1165 NODE_NAME (result));
1166
1167 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1168 replacement list of a variadic macro. */
1169 if (result == pfile->spec_nodes.n__VA_ARGS__
1170 && !pfile->state.va_args_ok)
1171 cpp_error (pfile, CPP_DL_PEDWARN,
1172 "__VA_ARGS__ can only appear in the expansion"
1173 " of a C99 variadic macro");
1174
1175 /* For -Wc++-compat, warn about use of C++ named operators. */
1176 if (result->flags & NODE_WARN_OPERATOR)
87cf0651
SB
1177 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1178 "identifier \"%s\" is a special operator name in C++",
1179 NODE_NAME (result));
17e7cb85
KT
1180 }
1181
1182 return result;
1183}
1184
1185/* Get the cpp_hashnode of an identifier specified by NAME in
1186 the current cpp_reader object. If none is found, NULL is returned. */
1187cpp_hashnode *
1188_cpp_lex_identifier (cpp_reader *pfile, const char *name)
1189{
1190 cpp_hashnode *result;
1191 result = lex_identifier_intern (pfile, (uchar *) name);
1192 return result;
1193}
1194
bced6edf 1195/* Lex an identifier starting at BUFFER->CUR - 1. */
0d9f234d 1196static cpp_hashnode *
50668cf6
GK
1197lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1198 struct normalize_state *nst)
45b966db 1199{
93c80368 1200 cpp_hashnode *result;
47e20491 1201 const uchar *cur;
c6e83800
ZW
1202 unsigned int len;
1203 unsigned int hash = HT_HASHSTEP (0, *base);
2c3fcba6 1204
c6e83800 1205 cur = pfile->buffer->cur;
47e20491
GK
1206 if (! starts_ucn)
1207 while (ISIDNUM (*cur))
1208 {
1209 hash = HT_HASHSTEP (hash, *cur);
1210 cur++;
1211 }
1212 pfile->buffer->cur = cur;
50668cf6 1213 if (starts_ucn || forms_identifier_p (pfile, false, nst))
10cf9bde 1214 {
47e20491
GK
1215 /* Slower version for identifiers containing UCNs (or $). */
1216 do {
1217 while (ISIDNUM (*pfile->buffer->cur))
50668cf6
GK
1218 {
1219 pfile->buffer->cur++;
1220 NORMALIZE_STATE_UPDATE_IDNUM (nst);
1221 }
1222 } while (forms_identifier_p (pfile, false, nst));
47e20491
GK
1223 result = _cpp_interpret_identifier (pfile, base,
1224 pfile->buffer->cur - base);
2c3fcba6 1225 }
47e20491
GK
1226 else
1227 {
1228 len = cur - base;
1229 hash = HT_HASHFINISH (hash, len);
bced6edf 1230
2bf41bf0
TT
1231 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1232 base, len, hash, HT_ALLOC));
47e20491 1233 }
2c3fcba6 1234
bced6edf 1235 /* Rarely, identifiers require diagnostics when lexed. */
2c3fcba6
ZW
1236 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1237 && !pfile->state.skipping, 0))
1238 {
1239 /* It is allowed to poison the same identifier twice. */
1240 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
0527bc4e 1241 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
2c3fcba6
ZW
1242 NODE_NAME (result));
1243
1244 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1245 replacement list of a variadic macro. */
1246 if (result == pfile->spec_nodes.n__VA_ARGS__
1247 && !pfile->state.va_args_ok)
0527bc4e 1248 cpp_error (pfile, CPP_DL_PEDWARN,
6cf87ca4
ZW
1249 "__VA_ARGS__ can only appear in the expansion"
1250 " of a C99 variadic macro");
3d8b2a98
ILT
1251
1252 /* For -Wc++-compat, warn about use of C++ named operators. */
1253 if (result->flags & NODE_WARN_OPERATOR)
87cf0651
SB
1254 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1255 "identifier \"%s\" is a special operator name in C++",
1256 NODE_NAME (result));
2c3fcba6
ZW
1257 }
1258
1259 return result;
1260}
1261
bced6edf 1262/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
45b966db 1263static void
50668cf6
GK
1264lex_number (cpp_reader *pfile, cpp_string *number,
1265 struct normalize_state *nst)
45b966db 1266{
562a5c27 1267 const uchar *cur;
bced6edf
NB
1268 const uchar *base;
1269 uchar *dest;
45b966db 1270
bced6edf
NB
1271 base = pfile->buffer->cur - 1;
1272 do
041c3194 1273 {
bced6edf 1274 cur = pfile->buffer->cur;
0d9f234d 1275
bced6edf
NB
1276 /* N.B. ISIDNUM does not include $. */
1277 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
50668cf6
GK
1278 {
1279 cur++;
1280 NORMALIZE_STATE_UPDATE_IDNUM (nst);
1281 }
45b966db 1282
10cf9bde 1283 pfile->buffer->cur = cur;
45b966db 1284 }
50668cf6 1285 while (forms_identifier_p (pfile, false, nst));
93c80368 1286
bced6edf
NB
1287 number->len = cur - base;
1288 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1289 memcpy (dest, base, number->len);
1290 dest[number->len] = '\0';
1291 number->text = dest;
93c80368
NB
1292}
1293
6338b358
NB
1294/* Create a token of type TYPE with a literal spelling. */
1295static void
6cf87ca4
ZW
1296create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1297 unsigned int len, enum cpp_ttype type)
6338b358
NB
1298{
1299 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1300
1301 memcpy (dest, base, len);
1302 dest[len] = '\0';
1303 token->type = type;
1304 token->val.str.len = len;
1305 token->val.str.text = dest;
1306}
1307
00a81b8b
JM
1308/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1309 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
1310
1311static void
1312bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1313 _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1314{
1315 _cpp_buff *first_buff = *first_buff_p;
1316 _cpp_buff *last_buff = *last_buff_p;
1317
1318 if (first_buff == NULL)
1319 first_buff = last_buff = _cpp_get_buff (pfile, len);
1320 else if (len > BUFF_ROOM (last_buff))
1321 {
1322 size_t room = BUFF_ROOM (last_buff);
1323 memcpy (BUFF_FRONT (last_buff), base, room);
1324 BUFF_FRONT (last_buff) += room;
1325 base += room;
1326 len -= room;
1327 last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1328 }
1329
1330 memcpy (BUFF_FRONT (last_buff), base, len);
1331 BUFF_FRONT (last_buff) += len;
1332
1333 *first_buff_p = first_buff;
1334 *last_buff_p = last_buff;
1335}
1336
c865f923
ESR
1337
1338/* Returns true if a macro has been defined.
1339 This might not work if compile with -save-temps,
1340 or preprocess separately from compilation. */
1341
1342static bool
1343is_macro(cpp_reader *pfile, const uchar *base)
1344{
1345 const uchar *cur = base;
1346 if (! ISIDST (*cur))
1347 return false;
1348 unsigned int hash = HT_HASHSTEP (0, *cur);
1349 ++cur;
1350 while (ISIDNUM (*cur))
1351 {
1352 hash = HT_HASHSTEP (hash, *cur);
1353 ++cur;
1354 }
1355 hash = HT_HASHFINISH (hash, cur - base);
1356
1357 cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1358 base, cur - base, hash, HT_NO_INSERT));
1359
1360 return !result ? false : (result->type == NT_MACRO);
1361}
1362
1363
2c6e3f55 1364/* Lexes a raw string. The stored string contains the spelling, including
00a81b8b 1365 double quotes, delimiter string, '(' and ')', any leading
2c6e3f55
JJ
1366 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
1367 literal, or CPP_OTHER if it was not properly terminated.
1368
1369 The spelling is NUL-terminated, but it is not guaranteed that this
1370 is the first NUL since embedded NULs are preserved. */
1371
1372static void
1373lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1374 const uchar *cur)
1375{
2c6e3f55
JJ
1376 const uchar *raw_prefix;
1377 unsigned int raw_prefix_len = 0;
1378 enum cpp_ttype type;
1379 size_t total_len = 0;
1380 _cpp_buff *first_buff = NULL, *last_buff = NULL;
00a81b8b 1381 _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
2c6e3f55
JJ
1382
1383 type = (*base == 'L' ? CPP_WSTRING :
1384 *base == 'U' ? CPP_STRING32 :
1385 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1386 : CPP_STRING);
1387
1388 raw_prefix = cur + 1;
1389 while (raw_prefix_len < 16)
1390 {
1391 switch (raw_prefix[raw_prefix_len])
1392 {
52150625 1393 case ' ': case '(': case ')': case '\\': case '\t':
2c6e3f55
JJ
1394 case '\v': case '\f': case '\n': default:
1395 break;
1396 /* Basic source charset except the above chars. */
1397 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1398 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1399 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1400 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1401 case 'y': case 'z':
1402 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1403 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1404 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1405 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1406 case 'Y': case 'Z':
1407 case '0': case '1': case '2': case '3': case '4': case '5':
1408 case '6': case '7': case '8': case '9':
52150625 1409 case '_': case '{': case '}': case '#': case '[': case ']':
2c6e3f55
JJ
1410 case '<': case '>': case '%': case ':': case ';': case '.':
1411 case '?': case '*': case '+': case '-': case '/': case '^':
1412 case '&': case '|': case '~': case '!': case '=': case ',':
52150625 1413 case '"': case '\'':
2c6e3f55
JJ
1414 raw_prefix_len++;
1415 continue;
1416 }
1417 break;
1418 }
1419
52150625 1420 if (raw_prefix[raw_prefix_len] != '(')
2c6e3f55
JJ
1421 {
1422 int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
1423 + 1;
1424 if (raw_prefix_len == 16)
1425 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1426 "raw string delimiter longer than 16 characters");
1427 else
1428 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1429 "invalid character '%c' in raw string delimiter",
1430 (int) raw_prefix[raw_prefix_len]);
1431 pfile->buffer->cur = raw_prefix - 1;
1432 create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
1433 return;
1434 }
1435
1436 cur = raw_prefix + raw_prefix_len + 1;
1437 for (;;)
1438 {
00a81b8b
JM
1439#define BUF_APPEND(STR,LEN) \
1440 do { \
1441 bufring_append (pfile, (const uchar *)(STR), (LEN), \
1442 &first_buff, &last_buff); \
1443 total_len += (LEN); \
1444 } while (0);
1445
1446 cppchar_t c;
1447
1448 /* If we previously performed any trigraph or line splicing
1449 transformations, undo them within the body of the raw string. */
1450 while (note->pos < cur)
1451 ++note;
1452 for (; note->pos == cur; ++note)
1453 {
1454 switch (note->type)
1455 {
1456 case '\\':
1457 case ' ':
1458 /* Restore backslash followed by newline. */
1459 BUF_APPEND (base, cur - base);
1460 base = cur;
1461 BUF_APPEND ("\\", 1);
1462 after_backslash:
1463 if (note->type == ' ')
1464 {
1465 /* GNU backslash whitespace newline extension. FIXME
1466 could be any sequence of non-vertical space. When we
1467 can properly restore any such sequence, we should mark
1468 this note as handled so _cpp_process_line_notes
1469 doesn't warn. */
1470 BUF_APPEND (" ", 1);
1471 }
1472
1473 BUF_APPEND ("\n", 1);
1474 break;
1475
1476 case 0:
1477 /* Already handled. */
1478 break;
1479
1480 default:
1481 if (_cpp_trigraph_map[note->type])
1482 {
1483 /* Don't warn about this trigraph in
1484 _cpp_process_line_notes, since trigraphs show up as
1485 trigraphs in raw strings. */
d947ada0 1486 uchar type = note->type;
00a81b8b
JM
1487 note->type = 0;
1488
1489 if (!CPP_OPTION (pfile, trigraphs))
1490 /* If we didn't convert the trigraph in the first
1491 place, don't do anything now either. */
1492 break;
1493
1494 BUF_APPEND (base, cur - base);
1495 base = cur;
1496 BUF_APPEND ("??", 2);
1497
1498 /* ??/ followed by newline gets two line notes, one for
1499 the trigraph and one for the backslash/newline. */
1500 if (type == '/' && note[1].pos == cur)
1501 {
1502 if (note[1].type != '\\'
1503 && note[1].type != ' ')
1504 abort ();
1505 BUF_APPEND ("/", 1);
1506 ++note;
1507 goto after_backslash;
1508 }
1509 /* The ) from ??) could be part of the suffix. */
1510 else if (type == ')'
1511 && strncmp ((const char *) cur+1,
1512 (const char *) raw_prefix,
1513 raw_prefix_len) == 0
1514 && cur[raw_prefix_len+1] == '"')
1515 {
6cfae070
JJ
1516 BUF_APPEND (")", 1);
1517 base++;
1518 cur += raw_prefix_len + 2;
00a81b8b
JM
1519 goto break_outer_loop;
1520 }
1521 else
1522 {
1523 /* Skip the replacement character. */
1524 base = ++cur;
1525 BUF_APPEND (&type, 1);
1526 }
1527 }
1528 else
1529 abort ();
1530 break;
1531 }
1532 }
1533 c = *cur++;
2c6e3f55 1534
52150625 1535 if (c == ')'
2c6e3f55
JJ
1536 && strncmp ((const char *) cur, (const char *) raw_prefix,
1537 raw_prefix_len) == 0
1538 && cur[raw_prefix_len] == '"')
1539 {
1540 cur += raw_prefix_len + 1;
1541 break;
1542 }
1543 else if (c == '\n')
1544 {
1545 if (pfile->state.in_directive
1546 || pfile->state.parsing_args
1547 || pfile->state.in_deferred_pragma)
1548 {
1549 cur--;
1550 type = CPP_OTHER;
1551 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1552 "unterminated raw string");
1553 break;
1554 }
1555
00a81b8b 1556 BUF_APPEND (base, cur - base);
2c6e3f55
JJ
1557
1558 if (pfile->buffer->cur < pfile->buffer->rlimit)
1559 CPP_INCREMENT_LINE (pfile, 0);
1560 pfile->buffer->need_line = true;
1561
00a81b8b
JM
1562 pfile->buffer->cur = cur-1;
1563 _cpp_process_line_notes (pfile, false);
2c6e3f55
JJ
1564 if (!_cpp_get_fresh_line (pfile))
1565 {
1566 source_location src_loc = token->src_loc;
1567 token->type = CPP_EOF;
1568 /* Tell the compiler the line number of the EOF token. */
1569 token->src_loc = pfile->line_table->highest_line;
1570 token->flags = BOL;
1571 if (first_buff != NULL)
1572 _cpp_release_buff (pfile, first_buff);
1573 cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1574 "unterminated raw string");
1575 return;
1576 }
1577
1578 cur = base = pfile->buffer->cur;
00a81b8b 1579 note = &pfile->buffer->notes[pfile->buffer->cur_note];
2c6e3f55 1580 }
2c6e3f55 1581 }
00a81b8b 1582 break_outer_loop:
2c6e3f55 1583
3ce4f9e4
ESR
1584 if (CPP_OPTION (pfile, user_literals))
1585 {
c865f923
ESR
1586 /* If a string format macro, say from inttypes.h, is placed touching
1587 a string literal it could be parsed as a C++11 user-defined string
1588 literal thus breaking the program.
1589 Try to identify macros with is_macro. A warning is issued. */
1590 if (is_macro (pfile, cur))
7f5f5f98 1591 {
112448b4 1592 /* Raise a warning, but do not consume subsequent tokens. */
7f5f5f98
OW
1593 if (CPP_OPTION (pfile, warn_literal_suffix))
1594 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1595 token->src_loc, 0,
1596 "invalid suffix on literal; C++11 requires "
c865f923 1597 "a space between literal and string macro");
7f5f5f98 1598 }
3ce4f9e4 1599 /* Grab user defined literal suffix. */
561f7fc7 1600 else if (ISIDST (*cur))
3ce4f9e4
ESR
1601 {
1602 type = cpp_userdef_string_add_type (type);
1603 ++cur;
7f5f5f98
OW
1604
1605 while (ISIDNUM (*cur))
1606 ++cur;
3ce4f9e4 1607 }
3ce4f9e4
ESR
1608 }
1609
2c6e3f55
JJ
1610 pfile->buffer->cur = cur;
1611 if (first_buff == NULL)
1612 create_literal (pfile, token, base, cur - base, type);
1613 else
1614 {
1615 uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1616
1617 token->type = type;
1618 token->val.str.len = total_len + (cur - base);
1619 token->val.str.text = dest;
1620 last_buff = first_buff;
1621 while (last_buff != NULL)
1622 {
1623 memcpy (dest, last_buff->base,
1624 BUFF_FRONT (last_buff) - last_buff->base);
1625 dest += BUFF_FRONT (last_buff) - last_buff->base;
1626 last_buff = last_buff->next;
1627 }
1628 _cpp_release_buff (pfile, first_buff);
1629 memcpy (dest, base, cur - base);
1630 dest[cur - base] = '\0';
1631 }
1632}
1633
bced6edf 1634/* Lexes a string, character constant, or angle-bracketed header file
6338b358 1635 name. The stored string contains the spelling, including opening
2c6e3f55
JJ
1636 quote and any leading 'L', 'u', 'U' or 'u8' and optional
1637 'R' modifier. It returns the type of the literal, or CPP_OTHER
1638 if it was not properly terminated, or CPP_LESS for an unterminated
1639 header name which must be relexed as normal tokens.
6338b358
NB
1640
1641 The spelling is NUL-terminated, but it is not guaranteed that this
1642 is the first NUL since embedded NULs are preserved. */
041c3194 1643static void
6cf87ca4 1644lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
45b966db 1645{
6338b358
NB
1646 bool saw_NUL = false;
1647 const uchar *cur;
bced6edf 1648 cppchar_t terminator;
6338b358
NB
1649 enum cpp_ttype type;
1650
1651 cur = base;
1652 terminator = *cur++;
2c6e3f55 1653 if (terminator == 'L' || terminator == 'U')
6338b358 1654 terminator = *cur++;
2c6e3f55
JJ
1655 else if (terminator == 'u')
1656 {
1657 terminator = *cur++;
1658 if (terminator == '8')
1659 terminator = *cur++;
1660 }
1661 if (terminator == 'R')
1662 {
1663 lex_raw_string (pfile, token, base, cur);
1664 return;
1665 }
1666 if (terminator == '"')
b6baa67d
KVH
1667 type = (*base == 'L' ? CPP_WSTRING :
1668 *base == 'U' ? CPP_STRING32 :
2c6e3f55
JJ
1669 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1670 : CPP_STRING);
6338b358 1671 else if (terminator == '\'')
b6baa67d
KVH
1672 type = (*base == 'L' ? CPP_WCHAR :
1673 *base == 'U' ? CPP_CHAR32 :
1674 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
6338b358
NB
1675 else
1676 terminator = '>', type = CPP_HEADER_NAME;
93c80368 1677
0d9f234d 1678 for (;;)
45b966db 1679 {
6338b358 1680 cppchar_t c = *cur++;
7868b4a2 1681
6f572ac2 1682 /* In #include-style directives, terminators are not escapable. */
6338b358
NB
1683 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1684 cur++;
1685 else if (c == terminator)
bced6edf 1686 break;
6338b358 1687 else if (c == '\n')
0d9f234d 1688 {
6338b358 1689 cur--;
4bb09c26
JM
1690 /* Unmatched quotes always yield undefined behavior, but
1691 greedy lexing means that what appears to be an unterminated
1692 header name may actually be a legitimate sequence of tokens. */
1693 if (terminator == '>')
1694 {
1695 token->type = CPP_LESS;
1696 return;
1697 }
6338b358
NB
1698 type = CPP_OTHER;
1699 break;
45b966db 1700 }
6338b358
NB
1701 else if (c == '\0')
1702 saw_NUL = true;
45b966db
ZW
1703 }
1704
6338b358 1705 if (saw_NUL && !pfile->state.skipping)
0527bc4e
JDA
1706 cpp_error (pfile, CPP_DL_WARNING,
1707 "null character(s) preserved in literal");
45b966db 1708
c663e301
JM
1709 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1710 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1711 (int) terminator);
1712
3ce4f9e4
ESR
1713 if (CPP_OPTION (pfile, user_literals))
1714 {
c865f923
ESR
1715 /* If a string format macro, say from inttypes.h, is placed touching
1716 a string literal it could be parsed as a C++11 user-defined string
1717 literal thus breaking the program.
1718 Try to identify macros with is_macro. A warning is issued. */
1719 if (is_macro (pfile, cur))
7f5f5f98 1720 {
112448b4 1721 /* Raise a warning, but do not consume subsequent tokens. */
7f5f5f98
OW
1722 if (CPP_OPTION (pfile, warn_literal_suffix))
1723 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1724 token->src_loc, 0,
1725 "invalid suffix on literal; C++11 requires "
c865f923 1726 "a space between literal and string macro");
7f5f5f98 1727 }
3ce4f9e4 1728 /* Grab user defined literal suffix. */
561f7fc7 1729 else if (ISIDST (*cur))
3ce4f9e4
ESR
1730 {
1731 type = cpp_userdef_char_add_type (type);
1732 type = cpp_userdef_string_add_type (type);
1733 ++cur;
7f5f5f98
OW
1734
1735 while (ISIDNUM (*cur))
1736 ++cur;
3ce4f9e4 1737 }
3ce4f9e4
ESR
1738 }
1739
6338b358
NB
1740 pfile->buffer->cur = cur;
1741 create_literal (pfile, token, base, cur - base, type);
0d9f234d 1742}
041c3194 1743
631d0d36
MG
1744/* Return the comment table. The client may not make any assumption
1745 about the ordering of the table. */
1746cpp_comment_table *
1747cpp_get_comments (cpp_reader *pfile)
1748{
1749 return &pfile->comments;
1750}
1751
1752/* Append a comment to the end of the comment table. */
1753static void
1754store_comment (cpp_reader *pfile, cpp_token *token)
1755{
1756 int len;
1757
1758 if (pfile->comments.allocated == 0)
1759 {
1760 pfile->comments.allocated = 256;
1761 pfile->comments.entries = (cpp_comment *) xmalloc
1762 (pfile->comments.allocated * sizeof (cpp_comment));
1763 }
1764
1765 if (pfile->comments.count == pfile->comments.allocated)
1766 {
1767 pfile->comments.allocated *= 2;
1768 pfile->comments.entries = (cpp_comment *) xrealloc
1769 (pfile->comments.entries,
1770 pfile->comments.allocated * sizeof (cpp_comment));
1771 }
1772
1773 len = token->val.str.len;
1774
1775 /* Copy comment. Note, token may not be NULL terminated. */
1776 pfile->comments.entries[pfile->comments.count].comment =
1777 (char *) xmalloc (sizeof (char) * (len + 1));
1778 memcpy (pfile->comments.entries[pfile->comments.count].comment,
1779 token->val.str.text, len);
1780 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1781
1782 /* Set source location. */
1783 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1784
1785 /* Increment the count of entries in the comment table. */
1786 pfile->comments.count++;
1787}
1788
93c80368 1789/* The stored comment includes the comment start and any terminator. */
9e62c811 1790static void
6cf87ca4
ZW
1791save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1792 cppchar_t type)
9e62c811 1793{
041c3194 1794 unsigned char *buffer;
651a20b5 1795 unsigned int len, clen, i;
df383483 1796
1c6d33ef 1797 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
480709cc 1798
3542203b
NB
1799 /* C++ comments probably (not definitely) have moved past a new
1800 line, which we don't want to save in the comment. */
480709cc 1801 if (is_vspace (pfile->buffer->cur[-1]))
3542203b 1802 len--;
477cdac7 1803
651a20b5
KT
1804 /* If we are currently in a directive or in argument parsing, then
1805 we need to store all C++ comments as C comments internally, and
1806 so we need to allocate a little extra space in that case.
477cdac7
JT
1807
1808 Note that the only time we encounter a directive here is
1809 when we are saving comments in a "#define". */
651a20b5
KT
1810 clen = ((pfile->state.in_directive || pfile->state.parsing_args)
1811 && type == '/') ? len + 2 : len;
477cdac7
JT
1812
1813 buffer = _cpp_unaligned_alloc (pfile, clen);
df383483 1814
041c3194 1815 token->type = CPP_COMMENT;
477cdac7 1816 token->val.str.len = clen;
0d9f234d 1817 token->val.str.text = buffer;
45b966db 1818
1c6d33ef
NB
1819 buffer[0] = '/';
1820 memcpy (buffer + 1, from, len - 1);
477cdac7 1821
1eeeb6a4 1822 /* Finish conversion to a C comment, if necessary. */
651a20b5 1823 if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
477cdac7
JT
1824 {
1825 buffer[1] = '*';
1826 buffer[clen - 2] = '*';
1827 buffer[clen - 1] = '/';
651a20b5
KT
1828 /* As there can be in a C++ comments illegal sequences for C comments
1829 we need to filter them out. */
1830 for (i = 2; i < (clen - 2); i++)
1831 if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
1832 buffer[i] = '|';
477cdac7 1833 }
631d0d36
MG
1834
1835 /* Finally store this comment for use by clients of libcpp. */
1836 store_comment (pfile, token);
0d9f234d 1837}
45b966db 1838
5fddcffc
NB
1839/* Allocate COUNT tokens for RUN. */
1840void
6cf87ca4 1841_cpp_init_tokenrun (tokenrun *run, unsigned int count)
5fddcffc 1842{
72bb2c39 1843 run->base = XNEWVEC (cpp_token, count);
5fddcffc
NB
1844 run->limit = run->base + count;
1845 run->next = NULL;
1846}
1847
1848/* Returns the next tokenrun, or creates one if there is none. */
1849static tokenrun *
6cf87ca4 1850next_tokenrun (tokenrun *run)
5fddcffc
NB
1851{
1852 if (run->next == NULL)
1853 {
72bb2c39 1854 run->next = XNEW (tokenrun);
bdcbe496 1855 run->next->prev = run;
5fddcffc
NB
1856 _cpp_init_tokenrun (run->next, 250);
1857 }
1858
1859 return run->next;
1860}
1861
ad2305ad 1862/* Return the number of not yet processed token in a given
92582b75
TT
1863 context. */
1864int
ad2305ad 1865_cpp_remaining_tokens_num_in_context (cpp_context *context)
92582b75 1866{
92582b75 1867 if (context->tokens_kind == TOKENS_KIND_DIRECT)
cbbcf655 1868 return (LAST (context).token - FIRST (context).token);
92582b75
TT
1869 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1870 || context->tokens_kind == TOKENS_KIND_EXTENDED)
cbbcf655 1871 return (LAST (context).ptoken - FIRST (context).ptoken);
92582b75
TT
1872 else
1873 abort ();
1874}
1875
ad2305ad
DS
1876/* Returns the token present at index INDEX in a given context. If
1877 INDEX is zero, the next token to be processed is returned. */
92582b75 1878static const cpp_token*
ad2305ad 1879_cpp_token_from_context_at (cpp_context *context, int index)
92582b75 1880{
92582b75
TT
1881 if (context->tokens_kind == TOKENS_KIND_DIRECT)
1882 return &(FIRST (context).token[index]);
1883 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1884 || context->tokens_kind == TOKENS_KIND_EXTENDED)
1885 return FIRST (context).ptoken[index];
1886 else
1887 abort ();
1888}
1889
5950c3c9
BE
1890/* Look ahead in the input stream. */
1891const cpp_token *
1892cpp_peek_token (cpp_reader *pfile, int index)
1893{
1894 cpp_context *context = pfile->context;
1895 const cpp_token *peektok;
1896 int count;
1897
1898 /* First, scan through any pending cpp_context objects. */
1899 while (context->prev)
1900 {
ad2305ad 1901 ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
5950c3c9
BE
1902
1903 if (index < (int) sz)
ad2305ad 1904 return _cpp_token_from_context_at (context, index);
5950c3c9
BE
1905 index -= (int) sz;
1906 context = context->prev;
1907 }
1908
1909 /* We will have to read some new tokens after all (and do so
1910 without invalidating preceding tokens). */
1911 count = index;
1912 pfile->keep_tokens++;
1913
1914 do
1915 {
1916 peektok = _cpp_lex_token (pfile);
1917 if (peektok->type == CPP_EOF)
1918 return peektok;
1919 }
1920 while (index--);
1921
1922 _cpp_backup_tokens_direct (pfile, count + 1);
1923 pfile->keep_tokens--;
1924
1925 return peektok;
1926}
1927
4ed5bcfb
NB
1928/* Allocate a single token that is invalidated at the same time as the
1929 rest of the tokens on the line. Has its line and col set to the
1930 same as the last lexed token, so that diagnostics appear in the
1931 right place. */
1932cpp_token *
6cf87ca4 1933_cpp_temp_token (cpp_reader *pfile)
4ed5bcfb
NB
1934{
1935 cpp_token *old, *result;
5950c3c9
BE
1936 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1937 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
4ed5bcfb
NB
1938
1939 old = pfile->cur_token - 1;
5950c3c9
BE
1940 /* Any pre-existing lookaheads must not be clobbered. */
1941 if (la)
1942 {
1943 if (sz <= la)
1944 {
1945 tokenrun *next = next_tokenrun (pfile->cur_run);
1946
1947 if (sz < la)
1948 memmove (next->base + 1, next->base,
1949 (la - sz) * sizeof (cpp_token));
1950
1951 next->base[0] = pfile->cur_run->limit[-1];
1952 }
1953
1954 if (sz > 1)
1955 memmove (pfile->cur_token + 1, pfile->cur_token,
1956 MIN (la, sz - 1) * sizeof (cpp_token));
1957 }
1958
1959 if (!sz && pfile->cur_token == pfile->cur_run->limit)
4ed5bcfb
NB
1960 {
1961 pfile->cur_run = next_tokenrun (pfile->cur_run);
1962 pfile->cur_token = pfile->cur_run->base;
1963 }
1964
1965 result = pfile->cur_token++;
12f9df4e 1966 result->src_loc = old->src_loc;
4ed5bcfb
NB
1967 return result;
1968}
1969
14baae01
NB
1970/* Lex a token into RESULT (external interface). Takes care of issues
1971 like directive handling, token lookahead, multiple include
a1f300c0 1972 optimization and skipping. */
345894b4 1973const cpp_token *
6cf87ca4 1974_cpp_lex_token (cpp_reader *pfile)
5fddcffc 1975{
bdcbe496 1976 cpp_token *result;
5fddcffc 1977
bdcbe496 1978 for (;;)
5fddcffc 1979 {
bdcbe496 1980 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 1981 {
bdcbe496
NB
1982 pfile->cur_run = next_tokenrun (pfile->cur_run);
1983 pfile->cur_token = pfile->cur_run->base;
5fddcffc 1984 }
ee380365
TT
1985 /* We assume that the current token is somewhere in the current
1986 run. */
1987 if (pfile->cur_token < pfile->cur_run->base
1988 || pfile->cur_token >= pfile->cur_run->limit)
1989 abort ();
5fddcffc 1990
bdcbe496 1991 if (pfile->lookaheads)
14baae01
NB
1992 {
1993 pfile->lookaheads--;
1994 result = pfile->cur_token++;
1995 }
bdcbe496 1996 else
14baae01 1997 result = _cpp_lex_direct (pfile);
bdcbe496
NB
1998
1999 if (result->flags & BOL)
5fddcffc 2000 {
bdcbe496
NB
2001 /* Is this a directive. If _cpp_handle_directive returns
2002 false, it is an assembler #. */
2003 if (result->type == CPP_HASH
e808ec9c
NB
2004 /* 6.10.3 p 11: Directives in a list of macro arguments
2005 gives undefined behavior. This implementation
2006 handles the directive as normal. */
bc4071dd 2007 && pfile->state.parsing_args != 1)
21b11495 2008 {
bc4071dd 2009 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
21b11495 2010 {
bc4071dd
RH
2011 if (pfile->directive_result.type == CPP_PADDING)
2012 continue;
21b11495 2013 result = &pfile->directive_result;
21b11495
ZW
2014 }
2015 }
bc4071dd
RH
2016 else if (pfile->state.in_deferred_pragma)
2017 result = &pfile->directive_result;
21b11495 2018
97293897 2019 if (pfile->cb.line_change && !pfile->state.skipping)
6cf87ca4 2020 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
5fddcffc 2021 }
5fddcffc 2022
bdcbe496 2023 /* We don't skip tokens in directives. */
bc4071dd 2024 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
bdcbe496 2025 break;
5fddcffc 2026
bdcbe496 2027 /* Outside a directive, invalidate controlling macros. At file
14baae01 2028 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
6356f892 2029 get here and MI optimization works. */
5fddcffc 2030 pfile->mi_valid = false;
bdcbe496
NB
2031
2032 if (!pfile->state.skipping || result->type == CPP_EOF)
2033 break;
5fddcffc
NB
2034 }
2035
345894b4 2036 return result;
5fddcffc
NB
2037}
2038
26aea073
NB
2039/* Returns true if a fresh line has been loaded. */
2040bool
6cf87ca4 2041_cpp_get_fresh_line (cpp_reader *pfile)
004cb263 2042{
22234f56
PB
2043 int return_at_eof;
2044
26aea073
NB
2045 /* We can't get a new line until we leave the current directive. */
2046 if (pfile->state.in_directive)
2047 return false;
df383483 2048
26aea073 2049 for (;;)
1a76916c 2050 {
26aea073 2051 cpp_buffer *buffer = pfile->buffer;
1a76916c 2052
26aea073
NB
2053 if (!buffer->need_line)
2054 return true;
2055
2056 if (buffer->next_line < buffer->rlimit)
004cb263 2057 {
26aea073
NB
2058 _cpp_clean_line (pfile);
2059 return true;
2060 }
004cb263 2061
26aea073
NB
2062 /* First, get out of parsing arguments state. */
2063 if (pfile->state.parsing_args)
2064 return false;
2065
2066 /* End of buffer. Non-empty files should end in a newline. */
2067 if (buffer->buf != buffer->rlimit
2068 && buffer->next_line > buffer->rlimit
2069 && !buffer->from_stage3)
2070 {
ed0e74e0 2071 /* Clip to buffer size. */
26aea073 2072 buffer->next_line = buffer->rlimit;
26aea073 2073 }
22234f56
PB
2074
2075 return_at_eof = buffer->return_at_eof;
26aea073 2076 _cpp_pop_buffer (pfile);
22234f56 2077 if (pfile->buffer == NULL || return_at_eof)
a506c55c 2078 return false;
26aea073 2079 }
004cb263
NB
2080}
2081
6f572ac2
NB
2082#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
2083 do \
2084 { \
2085 result->type = ELSE_TYPE; \
2086 if (*buffer->cur == CHAR) \
2087 buffer->cur++, result->type = THEN_TYPE; \
2088 } \
2089 while (0)
480709cc 2090
14baae01
NB
2091/* Lex a token into pfile->cur_token, which is also incremented, to
2092 get diagnostics pointing to the correct location.
2093
2094 Does not handle issues such as token lookahead, multiple-include
f1ba665b 2095 optimization, directives, skipping etc. This function is only
14baae01
NB
2096 suitable for use by _cpp_lex_token, and in special cases like
2097 lex_expansion_token which doesn't care for any of these issues.
2098
2099 When meeting a newline, returns CPP_EOF if parsing a directive,
2100 otherwise returns to the start of the token buffer if permissible.
2101 Returns the location of the lexed token. */
2102cpp_token *
6cf87ca4 2103_cpp_lex_direct (cpp_reader *pfile)
45b966db 2104{
0d9f234d 2105 cppchar_t c;
adb84b42 2106 cpp_buffer *buffer;
0d9f234d 2107 const unsigned char *comment_start;
14baae01 2108 cpp_token *result = pfile->cur_token++;
9ec7291f 2109
5fddcffc 2110 fresh_line:
26aea073 2111 result->flags = 0;
2be570f9 2112 buffer = pfile->buffer;
a506c55c 2113 if (buffer->need_line)
26aea073 2114 {
bc4071dd
RH
2115 if (pfile->state.in_deferred_pragma)
2116 {
2117 result->type = CPP_PRAGMA_EOL;
2118 pfile->state.in_deferred_pragma = false;
2119 if (!pfile->state.pragma_allow_expansion)
2120 pfile->state.prevent_expansion--;
2121 return result;
2122 }
26aea073
NB
2123 if (!_cpp_get_fresh_line (pfile))
2124 {
2125 result->type = CPP_EOF;
9ff7868d
NB
2126 if (!pfile->state.in_directive)
2127 {
2128 /* Tell the compiler the line number of the EOF token. */
500bee0a 2129 result->src_loc = pfile->line_table->highest_line;
9ff7868d
NB
2130 result->flags = BOL;
2131 }
26aea073
NB
2132 return result;
2133 }
2134 if (!pfile->keep_tokens)
2135 {
2136 pfile->cur_run = &pfile->base_run;
2137 result = pfile->base_run.base;
2138 pfile->cur_token = result + 1;
2139 }
2140 result->flags = BOL;
2141 if (pfile->state.parsing_args == 2)
2142 result->flags |= PREV_WHITE;
2143 }
a506c55c 2144 buffer = pfile->buffer;
5fddcffc 2145 update_tokens_line:
500bee0a 2146 result->src_loc = pfile->line_table->highest_line;
041c3194 2147
5fddcffc 2148 skipped_white:
26aea073
NB
2149 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2150 && !pfile->overlaid_buffer)
2151 {
2152 _cpp_process_line_notes (pfile, false);
500bee0a 2153 result->src_loc = pfile->line_table->highest_line;
26aea073 2154 }
480709cc 2155 c = *buffer->cur++;
12f9df4e 2156
e3dfef44
GC
2157 if (pfile->forced_token_location_p)
2158 result->src_loc = *pfile->forced_token_location_p;
2159 else
2160 result->src_loc = linemap_position_for_column (pfile->line_table,
2161 CPP_BUF_COLUMN (buffer, buffer->cur));
5fddcffc 2162
0d9f234d 2163 switch (c)
45b966db 2164 {
4d6baafa
NB
2165 case ' ': case '\t': case '\f': case '\v': case '\0':
2166 result->flags |= PREV_WHITE;
26aea073
NB
2167 skip_whitespace (pfile, c);
2168 goto skipped_white;
0d9f234d 2169
26aea073 2170 case '\n':
12f9df4e
PB
2171 if (buffer->cur < buffer->rlimit)
2172 CPP_INCREMENT_LINE (pfile, 0);
26aea073
NB
2173 buffer->need_line = true;
2174 goto fresh_line;
46d07497 2175
0d9f234d
NB
2176 case '0': case '1': case '2': case '3': case '4':
2177 case '5': case '6': case '7': case '8': case '9':
50668cf6
GK
2178 {
2179 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2180 result->type = CPP_NUMBER;
2181 lex_number (pfile, &result->val.str, &nst);
2182 warn_about_normalization (pfile, result, &nst);
2183 break;
2184 }
46d07497 2185
0abc6a6a 2186 case 'L':
b6baa67d
KVH
2187 case 'u':
2188 case 'U':
2c6e3f55
JJ
2189 case 'R':
2190 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2191 wide strings or raw strings. */
a48e3dd1
JM
2192 if (c == 'L' || CPP_OPTION (pfile, rliterals)
2193 || (c != 'R' && CPP_OPTION (pfile, uliterals)))
bced6edf 2194 {
2c6e3f55
JJ
2195 if ((*buffer->cur == '\'' && c != 'R')
2196 || *buffer->cur == '"'
2197 || (*buffer->cur == 'R'
2198 && c != 'R'
2199 && buffer->cur[1] == '"'
a48e3dd1 2200 && CPP_OPTION (pfile, rliterals))
2c6e3f55
JJ
2201 || (*buffer->cur == '8'
2202 && c == 'u'
2203 && (buffer->cur[1] == '"'
a48e3dd1
JM
2204 || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2205 && CPP_OPTION (pfile, rliterals)))))
b6baa67d
KVH
2206 {
2207 lex_string (pfile, result, buffer->cur - 1);
2208 break;
2209 }
bced6edf 2210 }
df383483 2211 /* Fall through. */
0abc6a6a 2212
0d9f234d
NB
2213 case '_':
2214 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2215 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2216 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
b6baa67d 2217 case 's': case 't': case 'v': case 'w': case 'x':
0d9f234d
NB
2218 case 'y': case 'z':
2219 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
0abc6a6a 2220 case 'G': case 'H': case 'I': case 'J': case 'K':
2c6e3f55 2221 case 'M': case 'N': case 'O': case 'P': case 'Q':
b6baa67d 2222 case 'S': case 'T': case 'V': case 'W': case 'X':
0d9f234d
NB
2223 case 'Y': case 'Z':
2224 result->type = CPP_NAME;
50668cf6
GK
2225 {
2226 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
9a0c6187
JM
2227 result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2228 &nst);
50668cf6
GK
2229 warn_about_normalization (pfile, result, &nst);
2230 }
0d9f234d 2231
0d9f234d 2232 /* Convert named operators to their proper types. */
9a0c6187 2233 if (result->val.node.node->flags & NODE_OPERATOR)
0d9f234d
NB
2234 {
2235 result->flags |= NAMED_OP;
9a0c6187 2236 result->type = (enum cpp_ttype) result->val.node.node->directive_index;
0d9f234d
NB
2237 }
2238 break;
2239
2240 case '\'':
2241 case '"':
6338b358 2242 lex_string (pfile, result, buffer->cur - 1);
0d9f234d 2243 break;
041c3194 2244
0d9f234d 2245 case '/':
1c6d33ef
NB
2246 /* A potential block or line comment. */
2247 comment_start = buffer->cur;
6f572ac2
NB
2248 c = *buffer->cur;
2249
1c6d33ef
NB
2250 if (c == '*')
2251 {
26aea073 2252 if (_cpp_skip_block_comment (pfile))
0527bc4e 2253 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
0d9f234d 2254 }
480709cc 2255 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
12f9df4e 2256 || cpp_in_system_header (pfile)))
0d9f234d 2257 {
bdb05a7b
NB
2258 /* Warn about comments only if pedantically GNUC89, and not
2259 in system headers. */
2260 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 2261 && ! buffer->warned_cplusplus_comments)
041c3194 2262 {
0527bc4e 2263 cpp_error (pfile, CPP_DL_PEDWARN,
56508306 2264 "C++ style comments are not allowed in ISO C90");
0527bc4e 2265 cpp_error (pfile, CPP_DL_PEDWARN,
ebef4e8c 2266 "(this will be reported only once per input file)");
1c6d33ef
NB
2267 buffer->warned_cplusplus_comments = 1;
2268 }
0d9f234d 2269
01ef6563 2270 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
87cf0651 2271 cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
1c6d33ef 2272 }
480709cc
NB
2273 else if (c == '=')
2274 {
6f572ac2 2275 buffer->cur++;
480709cc
NB
2276 result->type = CPP_DIV_EQ;
2277 break;
2278 }
2279 else
2280 {
480709cc
NB
2281 result->type = CPP_DIV;
2282 break;
2283 }
0d9f234d 2284
1c6d33ef
NB
2285 if (!pfile->state.save_comments)
2286 {
2287 result->flags |= PREV_WHITE;
5fddcffc 2288 goto update_tokens_line;
0d9f234d 2289 }
1c6d33ef
NB
2290
2291 /* Save the comment as a token in its own right. */
477cdac7 2292 save_comment (pfile, result, comment_start, c);
bdcbe496 2293 break;
0d9f234d
NB
2294
2295 case '<':
2296 if (pfile->state.angled_headers)
2297 {
6338b358 2298 lex_string (pfile, result, buffer->cur - 1);
4bb09c26
JM
2299 if (result->type != CPP_LESS)
2300 break;
0d9f234d 2301 }
45b966db 2302
6f572ac2
NB
2303 result->type = CPP_LESS;
2304 if (*buffer->cur == '=')
2305 buffer->cur++, result->type = CPP_LESS_EQ;
2306 else if (*buffer->cur == '<')
0d9f234d 2307 {
6f572ac2
NB
2308 buffer->cur++;
2309 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
0d9f234d 2310 }
6f572ac2 2311 else if (CPP_OPTION (pfile, digraphs))
480709cc 2312 {
6f572ac2
NB
2313 if (*buffer->cur == ':')
2314 {
1582c677
PC
2315 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2316 three characters are <:: and the subsequent character
2317 is neither : nor >, the < is treated as a preprocessor
2318 token by itself". */
2319 if (CPP_OPTION (pfile, cplusplus)
61949153
PC
2320 && CPP_OPTION (pfile, lang) != CLK_CXX98
2321 && CPP_OPTION (pfile, lang) != CLK_GNUCXX
1582c677
PC
2322 && buffer->cur[1] == ':'
2323 && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2324 break;
2325
6f572ac2
NB
2326 buffer->cur++;
2327 result->flags |= DIGRAPH;
2328 result->type = CPP_OPEN_SQUARE;
2329 }
2330 else if (*buffer->cur == '%')
2331 {
2332 buffer->cur++;
2333 result->flags |= DIGRAPH;
2334 result->type = CPP_OPEN_BRACE;
2335 }
480709cc 2336 }
0d9f234d
NB
2337 break;
2338
2339 case '>':
6f572ac2
NB
2340 result->type = CPP_GREATER;
2341 if (*buffer->cur == '=')
2342 buffer->cur++, result->type = CPP_GREATER_EQ;
2343 else if (*buffer->cur == '>')
0d9f234d 2344 {
6f572ac2
NB
2345 buffer->cur++;
2346 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2347 }
0d9f234d
NB
2348 break;
2349
cbcff6df 2350 case '%':
6f572ac2
NB
2351 result->type = CPP_MOD;
2352 if (*buffer->cur == '=')
2353 buffer->cur++, result->type = CPP_MOD_EQ;
2354 else if (CPP_OPTION (pfile, digraphs))
480709cc 2355 {
6f572ac2 2356 if (*buffer->cur == ':')
480709cc 2357 {
6f572ac2
NB
2358 buffer->cur++;
2359 result->flags |= DIGRAPH;
2360 result->type = CPP_HASH;
2361 if (*buffer->cur == '%' && buffer->cur[1] == ':')
9a0c6187 2362 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
6f572ac2
NB
2363 }
2364 else if (*buffer->cur == '>')
2365 {
2366 buffer->cur++;
2367 result->flags |= DIGRAPH;
2368 result->type = CPP_CLOSE_BRACE;
480709cc 2369 }
480709cc 2370 }
0d9f234d
NB
2371 break;
2372
cbcff6df 2373 case '.':
480709cc 2374 result->type = CPP_DOT;
6f572ac2 2375 if (ISDIGIT (*buffer->cur))
480709cc 2376 {
50668cf6 2377 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
480709cc 2378 result->type = CPP_NUMBER;
50668cf6
GK
2379 lex_number (pfile, &result->val.str, &nst);
2380 warn_about_normalization (pfile, result, &nst);
480709cc 2381 }
6f572ac2
NB
2382 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2383 buffer->cur += 2, result->type = CPP_ELLIPSIS;
2384 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2385 buffer->cur++, result->type = CPP_DOT_STAR;
0d9f234d 2386 break;
45b966db 2387
0d9f234d 2388 case '+':
6f572ac2
NB
2389 result->type = CPP_PLUS;
2390 if (*buffer->cur == '+')
2391 buffer->cur++, result->type = CPP_PLUS_PLUS;
2392 else if (*buffer->cur == '=')
2393 buffer->cur++, result->type = CPP_PLUS_EQ;
0d9f234d 2394 break;
04e3ec78 2395
0d9f234d 2396 case '-':
6f572ac2
NB
2397 result->type = CPP_MINUS;
2398 if (*buffer->cur == '>')
0d9f234d 2399 {
6f572ac2 2400 buffer->cur++;
480709cc 2401 result->type = CPP_DEREF;
6f572ac2
NB
2402 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2403 buffer->cur++, result->type = CPP_DEREF_STAR;
480709cc 2404 }
6f572ac2
NB
2405 else if (*buffer->cur == '-')
2406 buffer->cur++, result->type = CPP_MINUS_MINUS;
2407 else if (*buffer->cur == '=')
2408 buffer->cur++, result->type = CPP_MINUS_EQ;
0d9f234d 2409 break;
45b966db 2410
0d9f234d 2411 case '&':
6f572ac2
NB
2412 result->type = CPP_AND;
2413 if (*buffer->cur == '&')
2414 buffer->cur++, result->type = CPP_AND_AND;
2415 else if (*buffer->cur == '=')
2416 buffer->cur++, result->type = CPP_AND_EQ;
0d9f234d 2417 break;
df383483 2418
0d9f234d 2419 case '|':
6f572ac2
NB
2420 result->type = CPP_OR;
2421 if (*buffer->cur == '|')
2422 buffer->cur++, result->type = CPP_OR_OR;
2423 else if (*buffer->cur == '=')
2424 buffer->cur++, result->type = CPP_OR_EQ;
0d9f234d 2425 break;
45b966db 2426
0d9f234d 2427 case ':':
6f572ac2
NB
2428 result->type = CPP_COLON;
2429 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2430 buffer->cur++, result->type = CPP_SCOPE;
2431 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
0d9f234d 2432 {
6f572ac2 2433 buffer->cur++;
0d9f234d 2434 result->flags |= DIGRAPH;
480709cc
NB
2435 result->type = CPP_CLOSE_SQUARE;
2436 }
0d9f234d 2437 break;
45b966db 2438
480709cc
NB
2439 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2440 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2441 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2442 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
9a0c6187 2443 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
480709cc 2444
26aea073 2445 case '?': result->type = CPP_QUERY; break;
0d9f234d
NB
2446 case '~': result->type = CPP_COMPL; break;
2447 case ',': result->type = CPP_COMMA; break;
2448 case '(': result->type = CPP_OPEN_PAREN; break;
2449 case ')': result->type = CPP_CLOSE_PAREN; break;
2450 case '[': result->type = CPP_OPEN_SQUARE; break;
2451 case ']': result->type = CPP_CLOSE_SQUARE; break;
2452 case '{': result->type = CPP_OPEN_BRACE; break;
2453 case '}': result->type = CPP_CLOSE_BRACE; break;
2454 case ';': result->type = CPP_SEMICOLON; break;
2455
40f03658 2456 /* @ is a punctuator in Objective-C. */
cc937581 2457 case '@': result->type = CPP_ATSIGN; break;
0d9f234d 2458
0abc6a6a 2459 case '$':
1613e52b
NB
2460 case '\\':
2461 {
2462 const uchar *base = --buffer->cur;
50668cf6 2463 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
0abc6a6a 2464
50668cf6 2465 if (forms_identifier_p (pfile, true, &nst))
1613e52b
NB
2466 {
2467 result->type = CPP_NAME;
9a0c6187 2468 result->val.node.node = lex_identifier (pfile, base, true, &nst);
50668cf6 2469 warn_about_normalization (pfile, result, &nst);
1613e52b
NB
2470 break;
2471 }
2472 buffer->cur++;
1067694a 2473 }
1613e52b 2474
1067694a 2475 default:
6338b358
NB
2476 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2477 break;
0d9f234d 2478 }
bdcbe496
NB
2479
2480 return result;
0d9f234d
NB
2481}
2482
59325650
NB
2483/* An upper bound on the number of bytes needed to spell TOKEN.
2484 Does not include preceding whitespace. */
93c80368 2485unsigned int
6cf87ca4 2486cpp_token_len (const cpp_token *token)
0d9f234d 2487{
93c80368 2488 unsigned int len;
6d2c2047 2489
93c80368 2490 switch (TOKEN_SPELL (token))
041c3194 2491 {
cc955282 2492 default: len = 6; break;
6338b358 2493 case SPELL_LITERAL: len = token->val.str.len; break;
9a0c6187 2494 case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break;
041c3194 2495 }
59325650
NB
2496
2497 return len;
6d2c2047
ZW
2498}
2499
47e20491
GK
2500/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2501 Return the number of bytes read out of NAME. (There are always
2502 10 bytes written to BUFFER.) */
2503
2504static size_t
2505utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2506{
2507 int j;
2508 int ucn_len = 0;
2509 int ucn_len_c;
2510 unsigned t;
2511 unsigned long utf32;
2512
2513 /* Compute the length of the UTF-8 sequence. */
2514 for (t = *name; t & 0x80; t <<= 1)
2515 ucn_len++;
2516
2517 utf32 = *name & (0x7F >> ucn_len);
2518 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2519 {
2520 utf32 = (utf32 << 6) | (*++name & 0x3F);
2521
2522 /* Ill-formed UTF-8. */
2523 if ((*name & ~0x3F) != 0x80)
2524 abort ();
2525 }
2526
2527 *buffer++ = '\\';
2528 *buffer++ = 'U';
2529 for (j = 7; j >= 0; j--)
2530 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2531 return ucn_len;
2532}
2533
cfc93532
MLI
2534/* Given a token TYPE corresponding to a digraph, return a pointer to
2535 the spelling of the digraph. */
2536static const unsigned char *
2537cpp_digraph2name (enum cpp_ttype type)
2538{
2539 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2540}
47e20491 2541
041c3194 2542/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885 2543 already contain the enough space to hold the token's spelling.
6cf87ca4 2544 Returns a pointer to the character after the last character written.
47e20491
GK
2545 FORSTRING is true if this is to be the spelling after translation
2546 phase 1 (this is different for UCNs).
6cf87ca4 2547 FIXME: Would be nice if we didn't need the PFILE argument. */
93c80368 2548unsigned char *
6cf87ca4 2549cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
47e20491 2550 unsigned char *buffer, bool forstring)
041c3194 2551{
96be6998 2552 switch (TOKEN_SPELL (token))
041c3194
ZW
2553 {
2554 case SPELL_OPERATOR:
2555 {
2556 const unsigned char *spelling;
2557 unsigned char c;
d6d5f795 2558
041c3194 2559 if (token->flags & DIGRAPH)
cfc93532 2560 spelling = cpp_digraph2name (token->type);
92936ecf
ZW
2561 else if (token->flags & NAMED_OP)
2562 goto spell_ident;
041c3194 2563 else
96be6998 2564 spelling = TOKEN_NAME (token);
df383483 2565
041c3194
ZW
2566 while ((c = *spelling++) != '\0')
2567 *buffer++ = c;
2568 }
2569 break;
d6d5f795 2570
47ad4138 2571 spell_ident:
041c3194 2572 case SPELL_IDENT:
47e20491
GK
2573 if (forstring)
2574 {
9a0c6187
JM
2575 memcpy (buffer, NODE_NAME (token->val.node.node),
2576 NODE_LEN (token->val.node.node));
2577 buffer += NODE_LEN (token->val.node.node);
47e20491
GK
2578 }
2579 else
2580 {
2581 size_t i;
9a0c6187 2582 const unsigned char * name = NODE_NAME (token->val.node.node);
47e20491 2583
9a0c6187 2584 for (i = 0; i < NODE_LEN (token->val.node.node); i++)
47e20491
GK
2585 if (name[i] & ~0x7F)
2586 {
2587 i += utf8_to_ucn (buffer, name + i) - 1;
2588 buffer += 10;
2589 }
2590 else
9a0c6187 2591 *buffer++ = NODE_NAME (token->val.node.node)[i];
47e20491 2592 }
041c3194 2593 break;
d6d5f795 2594
6338b358 2595 case SPELL_LITERAL:
47ad4138
ZW
2596 memcpy (buffer, token->val.str.text, token->val.str.len);
2597 buffer += token->val.str.len;
2598 break;
2599
041c3194 2600 case SPELL_NONE:
0527bc4e
JDA
2601 cpp_error (pfile, CPP_DL_ICE,
2602 "unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
2603 break;
2604 }
d6d5f795 2605
041c3194
ZW
2606 return buffer;
2607}
d6d5f795 2608
5d8ebbd8
NB
2609/* Returns TOKEN spelt as a null-terminated string. The string is
2610 freed when the reader is destroyed. Useful for diagnostics. */
93c80368 2611unsigned char *
6cf87ca4 2612cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
59325650
NB
2613{
2614 unsigned int len = cpp_token_len (token) + 1;
ece54d54 2615 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 2616
47e20491 2617 end = cpp_spell_token (pfile, token, start, false);
93c80368 2618 end[0] = '\0';
c5a04734 2619
93c80368
NB
2620 return start;
2621}
c5a04734 2622
cfc93532
MLI
2623/* Returns a pointer to a string which spells the token defined by
2624 TYPE and FLAGS. Used by C front ends, which really should move to
2625 using cpp_token_as_text. */
93c80368 2626const char *
cfc93532 2627cpp_type2name (enum cpp_ttype type, unsigned char flags)
93c80368 2628{
cfc93532
MLI
2629 if (flags & DIGRAPH)
2630 return (const char *) cpp_digraph2name (type);
2631 else if (flags & NAMED_OP)
2632 return cpp_named_operator2name (type);
2633
93c80368
NB
2634 return (const char *) token_spellings[type].name;
2635}
c5a04734 2636
4ed5bcfb
NB
2637/* Writes the spelling of token to FP, without any preceding space.
2638 Separated from cpp_spell_token for efficiency - to avoid stdio
2639 double-buffering. */
93c80368 2640void
6cf87ca4 2641cpp_output_token (const cpp_token *token, FILE *fp)
93c80368 2642{
93c80368 2643 switch (TOKEN_SPELL (token))
c5a04734 2644 {
93c80368
NB
2645 case SPELL_OPERATOR:
2646 {
2647 const unsigned char *spelling;
3b681e9d 2648 int c;
c5a04734 2649
93c80368 2650 if (token->flags & DIGRAPH)
cfc93532 2651 spelling = cpp_digraph2name (token->type);
93c80368
NB
2652 else if (token->flags & NAMED_OP)
2653 goto spell_ident;
2654 else
2655 spelling = TOKEN_NAME (token);
041c3194 2656
3b681e9d
ZW
2657 c = *spelling;
2658 do
2659 putc (c, fp);
2660 while ((c = *++spelling) != '\0');
93c80368
NB
2661 }
2662 break;
041c3194 2663
93c80368
NB
2664 spell_ident:
2665 case SPELL_IDENT:
47e20491
GK
2666 {
2667 size_t i;
9a0c6187 2668 const unsigned char * name = NODE_NAME (token->val.node.node);
47e20491 2669
9a0c6187 2670 for (i = 0; i < NODE_LEN (token->val.node.node); i++)
47e20491
GK
2671 if (name[i] & ~0x7F)
2672 {
2673 unsigned char buffer[10];
2674 i += utf8_to_ucn (buffer, name + i) - 1;
2675 fwrite (buffer, 1, 10, fp);
2676 }
2677 else
9a0c6187 2678 fputc (NODE_NAME (token->val.node.node)[i], fp);
47e20491
GK
2679 }
2680 break;
041c3194 2681
6338b358 2682 case SPELL_LITERAL:
47ad4138
ZW
2683 fwrite (token->val.str.text, 1, token->val.str.len, fp);
2684 break;
2685
93c80368
NB
2686 case SPELL_NONE:
2687 /* An error, most probably. */
2688 break;
041c3194 2689 }
c5a04734
ZW
2690}
2691
93c80368
NB
2692/* Compare two tokens. */
2693int
6cf87ca4 2694_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
c5a04734 2695{
93c80368
NB
2696 if (a->type == b->type && a->flags == b->flags)
2697 switch (TOKEN_SPELL (a))
2698 {
2699 default: /* Keep compiler happy. */
2700 case SPELL_OPERATOR:
9a0c6187 2701 /* token_no is used to track where multiple consecutive ##
aa508502 2702 tokens were originally located. */
9a0c6187 2703 return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
93c80368 2704 case SPELL_NONE:
9a0c6187
JM
2705 return (a->type != CPP_MACRO_ARG
2706 || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
93c80368 2707 case SPELL_IDENT:
9a0c6187 2708 return a->val.node.node == b->val.node.node;
6338b358 2709 case SPELL_LITERAL:
93c80368
NB
2710 return (a->val.str.len == b->val.str.len
2711 && !memcmp (a->val.str.text, b->val.str.text,
2712 a->val.str.len));
2713 }
c5a04734 2714
041c3194
ZW
2715 return 0;
2716}
2717
93c80368
NB
2718/* Returns nonzero if a space should be inserted to avoid an
2719 accidental token paste for output. For simplicity, it is
2720 conservative, and occasionally advises a space where one is not
2721 needed, e.g. "." and ".2". */
93c80368 2722int
6cf87ca4
ZW
2723cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2724 const cpp_token *token2)
c5a04734 2725{
93c80368
NB
2726 enum cpp_ttype a = token1->type, b = token2->type;
2727 cppchar_t c;
c5a04734 2728
93c80368
NB
2729 if (token1->flags & NAMED_OP)
2730 a = CPP_NAME;
2731 if (token2->flags & NAMED_OP)
2732 b = CPP_NAME;
c5a04734 2733
93c80368
NB
2734 c = EOF;
2735 if (token2->flags & DIGRAPH)
37b8524c 2736 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
2737 else if (token_spellings[b].category == SPELL_OPERATOR)
2738 c = token_spellings[b].name[0];
c5a04734 2739
93c80368 2740 /* Quickly get everything that can paste with an '='. */
37b8524c 2741 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 2742 return 1;
c5a04734 2743
93c80368 2744 switch (a)
c5a04734 2745 {
b52dbbf8
SE
2746 case CPP_GREATER: return c == '>';
2747 case CPP_LESS: return c == '<' || c == '%' || c == ':';
93c80368
NB
2748 case CPP_PLUS: return c == '+';
2749 case CPP_MINUS: return c == '-' || c == '>';
2750 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
2751 case CPP_MOD: return c == ':' || c == '>';
2752 case CPP_AND: return c == '&';
2753 case CPP_OR: return c == '|';
2754 case CPP_COLON: return c == ':' || c == '>';
2755 case CPP_DEREF: return c == '*';
26ec42ee 2756 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
2757 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
2758 case CPP_NAME: return ((b == CPP_NUMBER
2759 && name_p (pfile, &token2->val.str))
2760 || b == CPP_NAME
2761 || b == CPP_CHAR || b == CPP_STRING); /* L */
2762 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
2763 || c == '.' || c == '+' || c == '-');
1613e52b 2764 /* UCNs */
1067694a
NB
2765 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
2766 && b == CPP_NAME)
1613e52b 2767 || (CPP_OPTION (pfile, objc)
1067694a 2768 && token1->val.str.text[0] == '@'
1613e52b 2769 && (b == CPP_NAME || b == CPP_STRING)));
87e356ba
JJ
2770 case CPP_STRING:
2771 case CPP_WSTRING:
2772 case CPP_UTF8STRING:
2773 case CPP_STRING16:
2774 case CPP_STRING32: return (CPP_OPTION (pfile, user_literals)
2775 && (b == CPP_NAME
2776 || (TOKEN_SPELL (token2) == SPELL_LITERAL
2777 && ISIDST (token2->val.str.text[0]))));
2778
93c80368 2779 default: break;
c5a04734 2780 }
c5a04734 2781
417f3e3a 2782 return 0;
c5a04734
ZW
2783}
2784
93c80368 2785/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
2786 character, to FP. Leading whitespace is removed. If there are
2787 macros, special token padding is not performed. */
c5a04734 2788void
6cf87ca4 2789cpp_output_line (cpp_reader *pfile, FILE *fp)
c5a04734 2790{
4ed5bcfb 2791 const cpp_token *token;
96be6998 2792
4ed5bcfb
NB
2793 token = cpp_get_token (pfile);
2794 while (token->type != CPP_EOF)
96be6998 2795 {
4ed5bcfb
NB
2796 cpp_output_token (token, fp);
2797 token = cpp_get_token (pfile);
2798 if (token->flags & PREV_WHITE)
2799 putc (' ', fp);
96be6998
ZW
2800 }
2801
93c80368 2802 putc ('\n', fp);
041c3194 2803}
c5a04734 2804
5d6342eb
TT
2805/* Return a string representation of all the remaining tokens on the
2806 current line. The result is allocated using xmalloc and must be
2807 freed by the caller. */
2808unsigned char *
2809cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
2810{
2811 const cpp_token *token;
2812 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
2813 unsigned int alloced = 120 + out;
2814 unsigned char *result = (unsigned char *) xmalloc (alloced);
2815
2816 /* If DIR_NAME is empty, there are no initial contents. */
2817 if (dir_name)
2818 {
2819 sprintf ((char *) result, "#%s ", dir_name);
2820 out += 2;
2821 }
2822
2823 token = cpp_get_token (pfile);
2824 while (token->type != CPP_EOF)
2825 {
2826 unsigned char *last;
2827 /* Include room for a possible space and the terminating nul. */
2828 unsigned int len = cpp_token_len (token) + 2;
2829
2830 if (out + len > alloced)
2831 {
2832 alloced *= 2;
2833 if (out + len > alloced)
2834 alloced = out + len;
2835 result = (unsigned char *) xrealloc (result, alloced);
2836 }
2837
2838 last = cpp_spell_token (pfile, token, &result[out], 0);
2839 out = last - result;
2840
2841 token = cpp_get_token (pfile);
2842 if (token->flags & PREV_WHITE)
2843 result[out++] = ' ';
2844 }
2845
2846 result[out] = '\0';
2847 return result;
2848}
2849
1e013d2e
NB
2850/* Memory buffers. Changing these three constants can have a dramatic
2851 effect on performance. The values here are reasonable defaults,
2852 but might be tuned. If you adjust them, be sure to test across a
2853 range of uses of cpplib, including heavy nested function-like macro
2854 expansion. Also check the change in peak memory usage (NJAMD is a
2855 good tool for this). */
2856#define MIN_BUFF_SIZE 8000
87062813 2857#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1e013d2e
NB
2858#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2859 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 2860
87062813
NB
2861#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2862 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2863#endif
2864
c9e7a609
NB
2865/* Create a new allocation buffer. Place the control block at the end
2866 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5 2867static _cpp_buff *
6cf87ca4 2868new_buff (size_t len)
b8af0ca5
NB
2869{
2870 _cpp_buff *result;
ece54d54 2871 unsigned char *base;
b8af0ca5 2872
1e013d2e
NB
2873 if (len < MIN_BUFF_SIZE)
2874 len = MIN_BUFF_SIZE;
c70f6ed3 2875 len = CPP_ALIGN (len);
b8af0ca5 2876
1a80db97
JJ
2877#ifdef ENABLE_VALGRIND_CHECKING
2878 /* Valgrind warns about uses of interior pointers, so put _cpp_buff
2879 struct first. */
2880 size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
2881 base = XNEWVEC (unsigned char, len + slen);
2882 result = (_cpp_buff *) base;
2883 base += slen;
2884#else
c3f829c1 2885 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
b8af0ca5 2886 result = (_cpp_buff *) (base + len);
1a80db97 2887#endif
b8af0ca5
NB
2888 result->base = base;
2889 result->cur = base;
2890 result->limit = base + len;
2891 result->next = NULL;
2892 return result;
2893}
2894
2895/* Place a chain of unwanted allocation buffers on the free list. */
2896void
6cf87ca4 2897_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
b8af0ca5
NB
2898{
2899 _cpp_buff *end = buff;
2900
2901 while (end->next)
2902 end = end->next;
2903 end->next = pfile->free_buffs;
2904 pfile->free_buffs = buff;
2905}
2906
2907/* Return a free buffer of size at least MIN_SIZE. */
2908_cpp_buff *
6cf87ca4 2909_cpp_get_buff (cpp_reader *pfile, size_t min_size)
b8af0ca5
NB
2910{
2911 _cpp_buff *result, **p;
2912
2913 for (p = &pfile->free_buffs;; p = &(*p)->next)
2914 {
6142088c 2915 size_t size;
1e013d2e
NB
2916
2917 if (*p == NULL)
b8af0ca5 2918 return new_buff (min_size);
1e013d2e
NB
2919 result = *p;
2920 size = result->limit - result->base;
2921 /* Return a buffer that's big enough, but don't waste one that's
2922 way too big. */
34f5271d 2923 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
2924 break;
2925 }
2926
2927 *p = result->next;
2928 result->next = NULL;
2929 result->cur = result->base;
2930 return result;
2931}
2932
4fe9b91c 2933/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2934 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2935 the excess bytes to the new buffer. Chains the new buffer after
2936 BUFF, and returns the new buffer. */
b8af0ca5 2937_cpp_buff *
6cf87ca4 2938_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
b8af0ca5 2939{
6142088c 2940 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 2941 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 2942
8c3b2693
NB
2943 buff->next = new_buff;
2944 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2945 return new_buff;
2946}
2947
4fe9b91c 2948/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2949 remaining bytes of the buffer pointed to by BUFF, and at least
2950 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2951 Chains the new buffer before the buffer pointed to by BUFF, and
2952 updates the pointer to point to the new buffer. */
2953void
6cf87ca4 2954_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
8c3b2693
NB
2955{
2956 _cpp_buff *new_buff, *old_buff = *pbuff;
2957 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2958
2959 new_buff = _cpp_get_buff (pfile, size);
2960 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2961 new_buff->next = old_buff;
2962 *pbuff = new_buff;
b8af0ca5
NB
2963}
2964
2965/* Free a chain of buffers starting at BUFF. */
2966void
5671bf27 2967_cpp_free_buff (_cpp_buff *buff)
b8af0ca5
NB
2968{
2969 _cpp_buff *next;
2970
2971 for (; buff; buff = next)
2972 {
2973 next = buff->next;
1a80db97
JJ
2974#ifdef ENABLE_VALGRIND_CHECKING
2975 free (buff);
2976#else
b8af0ca5 2977 free (buff->base);
1a80db97 2978#endif
b8af0ca5
NB
2979 }
2980}
417f3e3a 2981
ece54d54
NB
2982/* Allocate permanent, unaligned storage of length LEN. */
2983unsigned char *
6cf87ca4 2984_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
ece54d54
NB
2985{
2986 _cpp_buff *buff = pfile->u_buff;
2987 unsigned char *result = buff->cur;
2988
2989 if (len > (size_t) (buff->limit - result))
2990 {
2991 buff = _cpp_get_buff (pfile, len);
2992 buff->next = pfile->u_buff;
2993 pfile->u_buff = buff;
2994 result = buff->cur;
2995 }
2996
2997 buff->cur = result + len;
2998 return result;
2999}
3000
87062813
NB
3001/* Allocate permanent, unaligned storage of length LEN from a_buff.
3002 That buffer is used for growing allocations when saving macro
3003 replacement lists in a #define, and when parsing an answer to an
3004 assertion in #assert, #unassert or #if (and therefore possibly
3005 whilst expanding macros). It therefore must not be used by any
3006 code that they might call: specifically the lexer and the guts of
3007 the macro expander.
3008
3009 All existing other uses clearly fit this restriction: storing
3010 registered pragmas during initialization. */
93c80368 3011unsigned char *
6cf87ca4 3012_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3fef5b2b 3013{
8c3b2693
NB
3014 _cpp_buff *buff = pfile->a_buff;
3015 unsigned char *result = buff->cur;
3fef5b2b 3016
8c3b2693 3017 if (len > (size_t) (buff->limit - result))
3fef5b2b 3018 {
8c3b2693
NB
3019 buff = _cpp_get_buff (pfile, len);
3020 buff->next = pfile->a_buff;
3021 pfile->a_buff = buff;
3022 result = buff->cur;
3fef5b2b 3023 }
041c3194 3024
8c3b2693 3025 buff->cur = result + len;
93c80368 3026 return result;
041c3194 3027}
d8044160
GK
3028
3029/* Say which field of TOK is in use. */
3030
3031enum cpp_token_fld_kind
3032cpp_token_val_index (cpp_token *tok)
3033{
3034 switch (TOKEN_SPELL (tok))
3035 {
3036 case SPELL_IDENT:
3037 return CPP_TOKEN_FLD_NODE;
3038 case SPELL_LITERAL:
3039 return CPP_TOKEN_FLD_STR;
aa508502
JM
3040 case SPELL_OPERATOR:
3041 if (tok->type == CPP_PASTE)
9a0c6187 3042 return CPP_TOKEN_FLD_TOKEN_NO;
aa508502
JM
3043 else
3044 return CPP_TOKEN_FLD_NONE;
d8044160
GK
3045 case SPELL_NONE:
3046 if (tok->type == CPP_MACRO_ARG)
3047 return CPP_TOKEN_FLD_ARG_NO;
3048 else if (tok->type == CPP_PADDING)
3049 return CPP_TOKEN_FLD_SOURCE;
21b11495 3050 else if (tok->type == CPP_PRAGMA)
bc4071dd 3051 return CPP_TOKEN_FLD_PRAGMA;
d8044160
GK
3052 /* else fall through */
3053 default:
3054 return CPP_TOKEN_FLD_NONE;
3055 }
3056}
e3dfef44
GC
3057
3058/* All tokens lexed in R after calling this function will be forced to have
3059 their source_location the same as the location referenced by P, until
3060 cpp_stop_forcing_token_locations is called for R. */
3061
3062void
3063cpp_force_token_locations (cpp_reader *r, source_location *p)
3064{
3065 r->forced_token_location_p = p;
3066}
3067
3068/* Go back to assigning locations naturally for lexed tokens. */
3069
3070void
3071cpp_stop_forcing_token_locations (cpp_reader *r)
3072{
3073 r->forced_token_location_p = NULL;
3074}