]> git.ipfire.org Git - thirdparty/gcc.git/blame - libcpp/lex.c
Daily bump.
[thirdparty/gcc.git] / libcpp / lex.c
CommitLineData
45b966db 1/* CPP Library - lexical analysis.
500f3ed9 2 Copyright (C) 2000-2013 Free Software Foundation, Inc.
45b966db
ZW
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
748086b7 10Free Software Foundation; either version 3, or (at your option) any
45b966db
ZW
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
748086b7
JJ
19along with this program; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
45b966db
ZW
21
22#include "config.h"
23#include "system.h"
45b966db 24#include "cpplib.h"
4f4e53dd 25#include "internal.h"
45b966db 26
93c80368 27enum spell_type
f9a0e96c 28{
93c80368 29 SPELL_OPERATOR = 0,
93c80368 30 SPELL_IDENT,
6338b358 31 SPELL_LITERAL,
93c80368 32 SPELL_NONE
f9a0e96c
ZW
33};
34
93c80368 35struct token_spelling
f9a0e96c 36{
93c80368
NB
37 enum spell_type category;
38 const unsigned char *name;
f9a0e96c
ZW
39};
40
8206c799 41static const unsigned char *const digraph_spellings[] =
b6baa67d 42{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
93c80368 43
b6baa67d
KVH
44#define OP(e, s) { SPELL_OPERATOR, UC s },
45#define TK(e, s) { SPELL_ ## s, UC #e },
8206c799 46static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
93c80368
NB
47#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
f2d5f0cc 52
6cf87ca4
ZW
53static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
6cf87ca4
ZW
56static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
631d0d36 58static void store_comment (cpp_reader *, cpp_token *);
6cf87ca4
ZW
59static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 unsigned int, enum cpp_ttype);
61static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62static int name_p (cpp_reader *, const cpp_string *);
6cf87ca4
ZW
63static tokenrun *next_tokenrun (tokenrun *);
64
6cf87ca4 65static _cpp_buff *new_buff (size_t);
15dad1d9 66
9d10c9a9 67
041c3194 68/* Utility routine:
9e62c811 69
bfb9dc7f
ZW
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
041c3194 72int
6cf87ca4 73cpp_ideq (const cpp_token *token, const char *string)
041c3194 74{
bfb9dc7f 75 if (token->type != CPP_NAME)
041c3194 76 return 0;
bfb9dc7f 77
9a0c6187 78 return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
15dad1d9 79}
1368ee70 80
26aea073
NB
81/* Record a note TYPE at byte POS into the current cleaned logical
82 line. */
87062813 83static void
6cf87ca4 84add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
0d9f234d 85{
26aea073
NB
86 if (buffer->notes_used == buffer->notes_cap)
87 {
88 buffer->notes_cap = buffer->notes_cap * 2 + 200;
c3f829c1
GDR
89 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90 buffer->notes_cap);
26aea073 91 }
0d9f234d 92
26aea073
NB
93 buffer->notes[buffer->notes_used].pos = pos;
94 buffer->notes[buffer->notes_used].type = type;
95 buffer->notes_used++;
0d9f234d
NB
96}
97
246a2fcb
RH
98\f
99/* Fast path to find line special characters using optimized character
100 scanning algorithms. Anything complicated falls back to the slow
101 path below. Since this loop is very hot it's worth doing these kinds
102 of optimizations.
103
104 One of the paths through the ifdefs should provide
105
106 const uchar *search_line_fast (const uchar *s, const uchar *end);
107
108 Between S and END, search for \n, \r, \\, ?. Return a pointer to
109 the found character.
110
111 Note that the last character of the buffer is *always* a newline,
112 as forced by _cpp_convert_input. This fact can be used to avoid
113 explicitly looking for the end of the buffer. */
114
115/* Configure gives us an ifdef test. */
116#ifndef WORDS_BIGENDIAN
117#define WORDS_BIGENDIAN 0
118#endif
119
120/* We'd like the largest integer that fits into a register. There's nothing
121 in <stdint.h> that gives us that. For most hosts this is unsigned long,
122 but MS decided on an LLP64 model. Thankfully when building with GCC we
123 can get the "real" word size. */
124#ifdef __GNUC__
125typedef unsigned int word_type __attribute__((__mode__(__word__)));
126#else
127typedef unsigned long word_type;
128#endif
129
130/* The code below is only expecting sizes 4 or 8.
131 Die at compile-time if this expectation is violated. */
132typedef char check_word_type_size
133 [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
134
135/* Return X with the first N bytes forced to values that won't match one
136 of the interesting characters. Note that NUL is not interesting. */
137
138static inline word_type
139acc_char_mask_misalign (word_type val, unsigned int n)
140{
141 word_type mask = -1;
142 if (WORDS_BIGENDIAN)
143 mask >>= n * 8;
144 else
145 mask <<= n * 8;
146 return val & mask;
147}
148
149/* Return X replicated to all byte positions within WORD_TYPE. */
150
151static inline word_type
152acc_char_replicate (uchar x)
153{
154 word_type ret;
155
156 ret = (x << 24) | (x << 16) | (x << 8) | x;
157 if (sizeof(word_type) == 8)
158 ret = (ret << 16 << 16) | ret;
159 return ret;
160}
161
162/* Return non-zero if some byte of VAL is (probably) C. */
163
164static inline word_type
165acc_char_cmp (word_type val, word_type c)
166{
167#if defined(__GNUC__) && defined(__alpha__)
168 /* We can get exact results using a compare-bytes instruction.
169 Get (val == c) via (0 >= (val ^ c)). */
170 return __builtin_alpha_cmpbge (0, val ^ c);
171#else
172 word_type magic = 0x7efefefeU;
173 if (sizeof(word_type) == 8)
174 magic = (magic << 16 << 16) | 0xfefefefeU;
175 magic |= 1;
176
177 val ^= c;
178 return ((val + magic) ^ ~val) & ~magic;
179#endif
180}
181
182/* Given the result of acc_char_cmp is non-zero, return the index of
183 the found character. If this was a false positive, return -1. */
184
185static inline int
186acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
187 word_type val ATTRIBUTE_UNUSED)
188{
189#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190 /* The cmpbge instruction sets *bits* of the result corresponding to
191 matches in the bytes with no false positives. */
192 return __builtin_ctzl (cmp);
193#else
194 unsigned int i;
195
196 /* ??? It would be nice to force unrolling here,
197 and have all of these constants folded. */
198 for (i = 0; i < sizeof(word_type); ++i)
199 {
200 uchar c;
201 if (WORDS_BIGENDIAN)
202 c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
203 else
204 c = (val >> i * 8) & 0xff;
205
206 if (c == '\n' || c == '\r' || c == '\\' || c == '?')
207 return i;
208 }
209
210 return -1;
211#endif
212}
213
214/* A version of the fast scanner using bit fiddling techniques.
215
216 For 32-bit words, one would normally perform 16 comparisons and
217 16 branches. With this algorithm one performs 24 arithmetic
218 operations and one branch. Whether this is faster with a 32-bit
219 word size is going to be somewhat system dependent.
220
221 For 64-bit words, we eliminate twice the number of comparisons
222 and branches without increasing the number of arithmetic operations.
223 It's almost certainly going to be a win with 64-bit word size. */
224
225static const uchar * search_line_acc_char (const uchar *, const uchar *)
226 ATTRIBUTE_UNUSED;
227
228static const uchar *
229search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
230{
231 const word_type repl_nl = acc_char_replicate ('\n');
232 const word_type repl_cr = acc_char_replicate ('\r');
233 const word_type repl_bs = acc_char_replicate ('\\');
234 const word_type repl_qm = acc_char_replicate ('?');
235
236 unsigned int misalign;
237 const word_type *p;
238 word_type val, t;
239
240 /* Align the buffer. Mask out any bytes from before the beginning. */
241 p = (word_type *)((uintptr_t)s & -sizeof(word_type));
242 val = *p;
243 misalign = (uintptr_t)s & (sizeof(word_type) - 1);
244 if (misalign)
245 val = acc_char_mask_misalign (val, misalign);
246
247 /* Main loop. */
248 while (1)
249 {
250 t = acc_char_cmp (val, repl_nl);
251 t |= acc_char_cmp (val, repl_cr);
252 t |= acc_char_cmp (val, repl_bs);
253 t |= acc_char_cmp (val, repl_qm);
254
255 if (__builtin_expect (t != 0, 0))
256 {
257 int i = acc_char_index (t, val);
258 if (i >= 0)
259 return (const uchar *)p + i;
260 }
261
262 val = *++p;
263 }
264}
265
789d73cb
RO
266/* Disable on Solaris 2/x86 until the following problems can be properly
267 autoconfed:
268
789d73cb
RO
269 The Solaris 9 assembler cannot assemble SSE4.2 insns.
270 Before Solaris 9 Update 6, SSE insns cannot be executed.
271 The Solaris 10+ assembler tags objects with the instruction set
272 extensions used, so SSE4.2 executables cannot run on machines that
273 don't support that extension. */
274
275#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
246a2fcb
RH
276
277/* Replicated character data to be shared between implementations.
278 Recall that outside of a context with vector support we can't
279 define compatible vector types, therefore these are all defined
280 in terms of raw characters. */
281static const char repl_chars[4][16] __attribute__((aligned(16))) = {
282 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
283 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
284 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
285 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
286 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
287 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
288 { '?', '?', '?', '?', '?', '?', '?', '?',
289 '?', '?', '?', '?', '?', '?', '?', '?' },
290};
291
292/* A version of the fast scanner using MMX vectorized byte compare insns.
293
294 This uses the PMOVMSKB instruction which was introduced with "MMX2",
ef230b38 295 which was packaged into SSE1; it is also present in the AMD MMX
246a2fcb
RH
296 extension. Mark the function as using "sse" so that we emit a real
297 "emms" instruction, rather than the 3dNOW "femms" instruction. */
298
299static const uchar *
300#ifndef __SSE__
301__attribute__((__target__("sse")))
302#endif
303search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
304{
305 typedef char v8qi __attribute__ ((__vector_size__ (8)));
306 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
307
308 const v8qi repl_nl = *(const v8qi *)repl_chars[0];
309 const v8qi repl_cr = *(const v8qi *)repl_chars[1];
310 const v8qi repl_bs = *(const v8qi *)repl_chars[2];
311 const v8qi repl_qm = *(const v8qi *)repl_chars[3];
312
313 unsigned int misalign, found, mask;
314 const v8qi *p;
315 v8qi data, t, c;
316
317 /* Align the source pointer. While MMX doesn't generate unaligned data
318 faults, this allows us to safely scan to the end of the buffer without
319 reading beyond the end of the last page. */
320 misalign = (uintptr_t)s & 7;
321 p = (const v8qi *)((uintptr_t)s & -8);
322 data = *p;
323
324 /* Create a mask for the bytes that are valid within the first
325 16-byte block. The Idea here is that the AND with the mask
326 within the loop is "free", since we need some AND or TEST
327 insn in order to set the flags for the branch anyway. */
328 mask = -1u << misalign;
329
330 /* Main loop processing 8 bytes at a time. */
331 goto start;
332 do
333 {
334 data = *++p;
335 mask = -1;
336
337 start:
338 t = __builtin_ia32_pcmpeqb(data, repl_nl);
339 c = __builtin_ia32_pcmpeqb(data, repl_cr);
340 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
341 c = __builtin_ia32_pcmpeqb(data, repl_bs);
342 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343 c = __builtin_ia32_pcmpeqb(data, repl_qm);
344 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
345 found = __builtin_ia32_pmovmskb (t);
346 found &= mask;
347 }
348 while (!found);
349
350 __builtin_ia32_emms ();
351
352 /* FOUND contains 1 in bits for which we matched a relevant
353 character. Conversion to the byte index is trivial. */
354 found = __builtin_ctz(found);
355 return (const uchar *)p + found;
356}
357
358/* A version of the fast scanner using SSE2 vectorized byte compare insns. */
359
360static const uchar *
361#ifndef __SSE2__
362__attribute__((__target__("sse2")))
363#endif
364search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
365{
366 typedef char v16qi __attribute__ ((__vector_size__ (16)));
367
368 const v16qi repl_nl = *(const v16qi *)repl_chars[0];
369 const v16qi repl_cr = *(const v16qi *)repl_chars[1];
370 const v16qi repl_bs = *(const v16qi *)repl_chars[2];
371 const v16qi repl_qm = *(const v16qi *)repl_chars[3];
372
373 unsigned int misalign, found, mask;
374 const v16qi *p;
375 v16qi data, t;
376
377 /* Align the source pointer. */
378 misalign = (uintptr_t)s & 15;
379 p = (const v16qi *)((uintptr_t)s & -16);
380 data = *p;
381
382 /* Create a mask for the bytes that are valid within the first
383 16-byte block. The Idea here is that the AND with the mask
384 within the loop is "free", since we need some AND or TEST
385 insn in order to set the flags for the branch anyway. */
386 mask = -1u << misalign;
387
388 /* Main loop processing 16 bytes at a time. */
389 goto start;
390 do
391 {
392 data = *++p;
393 mask = -1;
394
395 start:
396 t = __builtin_ia32_pcmpeqb128(data, repl_nl);
397 t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
398 t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
399 t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
400 found = __builtin_ia32_pmovmskb128 (t);
401 found &= mask;
402 }
403 while (!found);
404
405 /* FOUND contains 1 in bits for which we matched a relevant
406 character. Conversion to the byte index is trivial. */
407 found = __builtin_ctz(found);
408 return (const uchar *)p + found;
409}
410
6f173e52 411#ifdef HAVE_SSE4
246a2fcb
RH
412/* A version of the fast scanner using SSE 4.2 vectorized string insns. */
413
414static const uchar *
415#ifndef __SSE4_2__
416__attribute__((__target__("sse4.2")))
417#endif
418search_line_sse42 (const uchar *s, const uchar *end)
419{
420 typedef char v16qi __attribute__ ((__vector_size__ (16)));
421 static const v16qi search = { '\n', '\r', '?', '\\' };
422
423 uintptr_t si = (uintptr_t)s;
424 uintptr_t index;
425
426 /* Check for unaligned input. */
427 if (si & 15)
428 {
d35d1c0f
UB
429 v16qi sv;
430
246a2fcb
RH
431 if (__builtin_expect (end - s < 16, 0)
432 && __builtin_expect ((si & 0xfff) > 0xff0, 0))
433 {
434 /* There are less than 16 bytes left in the buffer, and less
435 than 16 bytes left on the page. Reading 16 bytes at this
436 point might generate a spurious page fault. Defer to the
437 SSE2 implementation, which already handles alignment. */
438 return search_line_sse2 (s, end);
439 }
440
441 /* ??? The builtin doesn't understand that the PCMPESTRI read from
442 memory need not be aligned. */
d35d1c0f
UB
443 sv = __builtin_ia32_loaddqu ((const char *) s);
444 index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
445
246a2fcb
RH
446 if (__builtin_expect (index < 16, 0))
447 goto found;
448
449 /* Advance the pointer to an aligned address. We will re-scan a
450 few bytes, but we no longer need care for reading past the
451 end of a page, since we're guaranteed a match. */
452 s = (const uchar *)((si + 16) & -16);
453 }
454
455 /* Main loop, processing 16 bytes at a time. By doing the whole loop
456 in inline assembly, we can make proper use of the flags set. */
457 __asm ( "sub $16, %1\n"
458 " .balign 16\n"
459 "0: add $16, %1\n"
460 " %vpcmpestri $0, (%1), %2\n"
461 " jnc 0b"
462 : "=&c"(index), "+r"(s)
463 : "x"(search), "a"(4), "d"(16));
464
465 found:
466 return s + index;
467}
468
6f173e52
RH
469#else
470/* Work around out-dated assemblers without sse4 support. */
471#define search_line_sse42 search_line_sse2
472#endif
473
246a2fcb
RH
474/* Check the CPU capabilities. */
475
476#include "../gcc/config/i386/cpuid.h"
477
478typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
479static search_line_fast_type search_line_fast;
480
b0c084b7
JJ
481#define HAVE_init_vectorized_lexer 1
482static inline void
246a2fcb
RH
483init_vectorized_lexer (void)
484{
485 unsigned dummy, ecx = 0, edx = 0;
486 search_line_fast_type impl = search_line_acc_char;
487 int minimum = 0;
488
489#if defined(__SSE4_2__)
490 minimum = 3;
491#elif defined(__SSE2__)
492 minimum = 2;
ef230b38 493#elif defined(__SSE__)
246a2fcb
RH
494 minimum = 1;
495#endif
496
497 if (minimum == 3)
498 impl = search_line_sse42;
499 else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
500 {
501 if (minimum == 3 || (ecx & bit_SSE4_2))
502 impl = search_line_sse42;
503 else if (minimum == 2 || (edx & bit_SSE2))
504 impl = search_line_sse2;
505 else if (minimum == 1 || (edx & bit_SSE))
506 impl = search_line_mmx;
507 }
508 else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
509 {
5e70c0b5
UB
510 if (minimum == 1
511 || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
246a2fcb
RH
512 impl = search_line_mmx;
513 }
514
515 search_line_fast = impl;
516}
517
01956319 518#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
246a2fcb
RH
519
520/* A vection of the fast scanner using AltiVec vectorized byte compares. */
521/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
522 so we can't compile this function without -maltivec on the command line
523 (or implied by some other switch). */
524
525static const uchar *
526search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
527{
528 typedef __attribute__((altivec(vector))) unsigned char vc;
529
530 const vc repl_nl = {
531 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
532 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
533 };
534 const vc repl_cr = {
535 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
536 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
537 };
538 const vc repl_bs = {
539 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
540 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
541 };
542 const vc repl_qm = {
543 '?', '?', '?', '?', '?', '?', '?', '?',
544 '?', '?', '?', '?', '?', '?', '?', '?',
545 };
546 const vc ones = {
547 -1, -1, -1, -1, -1, -1, -1, -1,
548 -1, -1, -1, -1, -1, -1, -1, -1,
549 };
550 const vc zero = { 0 };
551
552 vc data, mask, t;
553
554 /* Altivec loads automatically mask addresses with -16. This lets us
555 issue the first load as early as possible. */
556 data = __builtin_vec_ld(0, (const vc *)s);
557
558 /* Discard bytes before the beginning of the buffer. Do this by
559 beginning with all ones and shifting in zeros according to the
560 mis-alignment. The LVSR instruction pulls the exact shift we
561 want from the address. */
562 mask = __builtin_vec_lvsr(0, s);
563 mask = __builtin_vec_perm(zero, ones, mask);
564 data &= mask;
565
566 /* While altivec loads mask addresses, we still need to align S so
567 that the offset we compute at the end is correct. */
568 s = (const uchar *)((uintptr_t)s & -16);
569
570 /* Main loop processing 16 bytes at a time. */
571 goto start;
572 do
573 {
574 vc m_nl, m_cr, m_bs, m_qm;
575
576 s += 16;
577 data = __builtin_vec_ld(0, (const vc *)s);
578
579 start:
580 m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
581 m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
582 m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
583 m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
584 t = (m_nl | m_cr) | (m_bs | m_qm);
585
586 /* T now contains 0xff in bytes for which we matched one of the relevant
587 characters. We want to exit the loop if any byte in T is non-zero.
588 Below is the expansion of vec_any_ne(t, zero). */
589 }
590 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
591
592 {
593#define N (sizeof(vc) / sizeof(long))
594
246a2fcb
RH
595 union {
596 vc v;
53a103d3
DS
597 /* Statically assert that N is 2 or 4. */
598 unsigned long l[(N == 2 || N == 4) ? N : -1];
246a2fcb
RH
599 } u;
600 unsigned long l, i = 0;
601
602 u.v = t;
603
604 /* Find the first word of T that is non-zero. */
605 switch (N)
606 {
607 case 4:
608 l = u.l[i++];
609 if (l != 0)
610 break;
611 s += sizeof(unsigned long);
612 l = u.l[i++];
613 if (l != 0)
614 break;
615 s += sizeof(unsigned long);
616 case 2:
617 l = u.l[i++];
618 if (l != 0)
619 break;
620 s += sizeof(unsigned long);
621 l = u.l[i];
622 }
623
624 /* L now contains 0xff in bytes for which we matched one of the
625 relevant characters. We can find the byte index by finding
626 its bit index and dividing by 8. */
627 l = __builtin_clzl(l) >> 3;
628 return s + l;
629
630#undef N
631 }
632}
633
e75b54a2
RE
634#elif defined (__ARM_NEON__)
635#include "arm_neon.h"
636
637static const uchar *
638search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
639{
640 const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
641 const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
642 const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
643 const uint8x16_t repl_qm = vdupq_n_u8 ('?');
644 const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
645
646 unsigned int misalign, found, mask;
647 const uint8_t *p;
648 uint8x16_t data;
649
650 /* Align the source pointer. */
651 misalign = (uintptr_t)s & 15;
652 p = (const uint8_t *)((uintptr_t)s & -16);
653 data = vld1q_u8 (p);
654
655 /* Create a mask for the bytes that are valid within the first
656 16-byte block. The Idea here is that the AND with the mask
657 within the loop is "free", since we need some AND or TEST
658 insn in order to set the flags for the branch anyway. */
659 mask = (-1u << misalign) & 0xffff;
660
661 /* Main loop, processing 16 bytes at a time. */
662 goto start;
663
664 do
665 {
666 uint8x8_t l;
667 uint16x4_t m;
668 uint32x2_t n;
669 uint8x16_t t, u, v, w;
670
671 p += 16;
672 data = vld1q_u8 (p);
673 mask = 0xffff;
674
675 start:
676 t = vceqq_u8 (data, repl_nl);
677 u = vceqq_u8 (data, repl_cr);
678 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
679 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
680 t = vandq_u8 (vorrq_u8 (v, w), xmask);
681 l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
682 m = vpaddl_u8 (l);
683 n = vpaddl_u16 (m);
684
685 found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
686 vshr_n_u64 ((uint64x1_t) n, 24)), 0);
687 found &= mask;
688 }
689 while (!found);
690
691 /* FOUND contains 1 in bits for which we matched a relevant
692 character. Conversion to the byte index is trivial. */
693 found = __builtin_ctz (found);
694 return (const uchar *)p + found;
695}
696
246a2fcb
RH
697#else
698
699/* We only have one accellerated alternative. Use a direct call so that
700 we encourage inlining. */
701
702#define search_line_fast search_line_acc_char
703
704#endif
705
b0c084b7
JJ
706/* Initialize the lexer if needed. */
707
708void
709_cpp_init_lexer (void)
710{
711#ifdef HAVE_init_vectorized_lexer
712 init_vectorized_lexer ();
713#endif
714}
715
26aea073
NB
716/* Returns with a logical line that contains no escaped newlines or
717 trigraphs. This is a time-critical inner loop. */
718void
6cf87ca4 719_cpp_clean_line (cpp_reader *pfile)
45b966db 720{
26aea073
NB
721 cpp_buffer *buffer;
722 const uchar *s;
723 uchar c, *d, *p;
87062813 724
26aea073
NB
725 buffer = pfile->buffer;
726 buffer->cur_note = buffer->notes_used = 0;
727 buffer->cur = buffer->line_base = buffer->next_line;
728 buffer->need_line = false;
246a2fcb 729 s = buffer->next_line;
87062813 730
26aea073 731 if (!buffer->from_stage3)
45b966db 732 {
7af45bd4
ILT
733 const uchar *pbackslash = NULL;
734
246a2fcb 735 /* Fast path. This is the common case of an un-escaped line with
d08dcf87
ZW
736 no trigraphs. The primary win here is by not writing any
737 data back to memory until we have to. */
246a2fcb 738 while (1)
d08dcf87 739 {
246a2fcb
RH
740 /* Perform an optimized search for \n, \r, \\, ?. */
741 s = search_line_fast (s, buffer->rlimit);
d08dcf87 742
246a2fcb
RH
743 c = *s;
744 if (c == '\\')
745 {
746 /* Record the location of the backslash and continue. */
747 pbackslash = s++;
d08dcf87 748 }
246a2fcb 749 else if (__builtin_expect (c == '?', 0))
d08dcf87 750 {
246a2fcb
RH
751 if (__builtin_expect (s[1] == '?', false)
752 && _cpp_trigraph_map[s[2]])
d08dcf87 753 {
246a2fcb
RH
754 /* Have a trigraph. We may or may not have to convert
755 it. Add a line note regardless, for -Wtrigraphs. */
756 add_line_note (buffer, s, s[2]);
757 if (CPP_OPTION (pfile, trigraphs))
758 {
759 /* We do, and that means we have to switch to the
760 slow path. */
761 d = (uchar *) s;
762 *d = _cpp_trigraph_map[s[2]];
763 s += 2;
764 goto slow_path;
765 }
d08dcf87 766 }
246a2fcb
RH
767 /* Not a trigraph. Continue on fast-path. */
768 s++;
d08dcf87 769 }
246a2fcb
RH
770 else
771 break;
d08dcf87
ZW
772 }
773
246a2fcb
RH
774 /* This must be \r or \n. We're either done, or we'll be forced
775 to write back to the buffer and continue on the slow path. */
776 d = (uchar *) s;
777
778 if (__builtin_expect (s == buffer->rlimit, false))
779 goto done;
780
781 /* DOS line ending? */
782 if (__builtin_expect (c == '\r', false) && s[1] == '\n')
783 {
784 s++;
785 if (s == buffer->rlimit)
786 goto done;
787 }
788
789 if (__builtin_expect (pbackslash == NULL, true))
790 goto done;
791
792 /* Check for escaped newline. */
793 p = d;
794 while (is_nvspace (p[-1]))
795 p--;
796 if (p - 1 != pbackslash)
797 goto done;
798
799 /* Have an escaped newline; process it and proceed to
800 the slow path. */
801 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
802 d = p - 2;
803 buffer->next_line = p - 1;
26aea073 804
246a2fcb
RH
805 slow_path:
806 while (1)
4a5b68a2 807 {
26aea073
NB
808 c = *++s;
809 *++d = c;
810
811 if (c == '\n' || c == '\r')
812 {
246a2fcb 813 /* Handle DOS line endings. */
26aea073
NB
814 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
815 s++;
816 if (s == buffer->rlimit)
817 break;
818
819 /* Escaped? */
820 p = d;
821 while (p != buffer->next_line && is_nvspace (p[-1]))
822 p--;
823 if (p == buffer->next_line || p[-1] != '\\')
824 break;
825
41c32c98 826 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
26aea073
NB
827 d = p - 2;
828 buffer->next_line = p - 1;
829 }
830 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
831 {
832 /* Add a note regardless, for the benefit of -Wtrigraphs. */
41c32c98 833 add_line_note (buffer, d, s[2]);
26aea073
NB
834 if (CPP_OPTION (pfile, trigraphs))
835 {
836 *d = _cpp_trigraph_map[s[2]];
837 s += 2;
838 }
839 }
4a5b68a2 840 }
45b966db 841 }
26aea073
NB
842 else
843 {
246a2fcb 844 while (*s != '\n' && *s != '\r')
26aea073 845 s++;
26aea073
NB
846 d = (uchar *) s;
847
848 /* Handle DOS line endings. */
849 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
850 s++;
851 }
0d9f234d 852
d08dcf87 853 done:
26aea073 854 *d = '\n';
41c32c98
NB
855 /* A sentinel note that should never be processed. */
856 add_line_note (buffer, d + 1, '\n');
26aea073 857 buffer->next_line = s + 1;
45b966db
ZW
858}
859
a8eb6044
NB
860/* Return true if the trigraph indicated by NOTE should be warned
861 about in a comment. */
862static bool
6cf87ca4 863warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
a8eb6044
NB
864{
865 const uchar *p;
866
867 /* Within comments we don't warn about trigraphs, unless the
868 trigraph forms an escaped newline, as that may change
6356f892 869 behavior. */
a8eb6044
NB
870 if (note->type != '/')
871 return false;
872
873 /* If -trigraphs, then this was an escaped newline iff the next note
874 is coincident. */
875 if (CPP_OPTION (pfile, trigraphs))
876 return note[1].pos == note->pos;
877
878 /* Otherwise, see if this forms an escaped newline. */
879 p = note->pos + 3;
880 while (is_nvspace (*p))
881 p++;
882
883 /* There might have been escaped newlines between the trigraph and the
884 newline we found. Hence the position test. */
885 return (*p == '\n' && p < note[1].pos);
886}
887
26aea073
NB
888/* Process the notes created by add_line_note as far as the current
889 location. */
890void
6cf87ca4 891_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
45b966db 892{
29401c30
NB
893 cpp_buffer *buffer = pfile->buffer;
894
26aea073 895 for (;;)
041c3194 896 {
26aea073
NB
897 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
898 unsigned int col;
a5c3cccd 899
26aea073
NB
900 if (note->pos > buffer->cur)
901 break;
a5c3cccd 902
26aea073
NB
903 buffer->cur_note++;
904 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
4d6baafa 905
41c32c98 906 if (note->type == '\\' || note->type == ' ')
26aea073 907 {
41c32c98 908 if (note->type == ' ' && !in_comment)
500bee0a 909 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
26aea073 910 "backslash and newline separated by space");
41c32c98 911
26aea073 912 if (buffer->next_line > buffer->rlimit)
87062813 913 {
500bee0a 914 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
26aea073
NB
915 "backslash-newline at end of file");
916 /* Prevent "no newline at end of file" warning. */
917 buffer->next_line = buffer->rlimit;
87062813 918 }
26aea073
NB
919
920 buffer->line_base = note->pos;
12f9df4e 921 CPP_INCREMENT_LINE (pfile, 0);
0d9f234d 922 }
41c32c98
NB
923 else if (_cpp_trigraph_map[note->type])
924 {
a8eb6044
NB
925 if (CPP_OPTION (pfile, warn_trigraphs)
926 && (!in_comment || warn_in_comment (pfile, note)))
41c32c98
NB
927 {
928 if (CPP_OPTION (pfile, trigraphs))
87cf0651
SB
929 cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
930 pfile->line_table->highest_line, col,
931 "trigraph ??%c converted to %c",
932 note->type,
933 (int) _cpp_trigraph_map[note->type]);
41c32c98 934 else
905bd7b5 935 {
87cf0651
SB
936 cpp_warning_with_line
937 (pfile, CPP_W_TRIGRAPHS,
938 pfile->line_table->highest_line, col,
905bd7b5
GK
939 "trigraph ??%c ignored, use -trigraphs to enable",
940 note->type);
941 }
41c32c98
NB
942 }
943 }
00a81b8b
JM
944 else if (note->type == 0)
945 /* Already processed in lex_raw_string. */;
41c32c98
NB
946 else
947 abort ();
041c3194 948 }
45b966db
ZW
949}
950
0d9f234d
NB
951/* Skip a C-style block comment. We find the end of the comment by
952 seeing if an asterisk is before every '/' we encounter. Returns
6f572ac2
NB
953 nonzero if comment terminated by EOF, zero otherwise.
954
955 Buffer->cur points to the initial asterisk of the comment. */
26aea073 956bool
6cf87ca4 957_cpp_skip_block_comment (cpp_reader *pfile)
45b966db 958{
041c3194 959 cpp_buffer *buffer = pfile->buffer;
d08dcf87
ZW
960 const uchar *cur = buffer->cur;
961 uchar c;
0d9f234d 962
d08dcf87
ZW
963 cur++;
964 if (*cur == '/')
965 cur++;
0d9f234d 966
26aea073
NB
967 for (;;)
968 {
0d9f234d
NB
969 /* People like decorating comments with '*', so check for '/'
970 instead for efficiency. */
d08dcf87
ZW
971 c = *cur++;
972
041c3194 973 if (c == '/')
45b966db 974 {
d08dcf87 975 if (cur[-2] == '*')
0d9f234d 976 break;
041c3194 977
0d9f234d 978 /* Warn about potential nested comments, but not if the '/'
a1f300c0 979 comes immediately before the true comment delimiter.
041c3194 980 Don't bother to get it right across escaped newlines. */
0d9f234d 981 if (CPP_OPTION (pfile, warn_comments)
d08dcf87
ZW
982 && cur[0] == '*' && cur[1] != '/')
983 {
984 buffer->cur = cur;
87cf0651
SB
985 cpp_warning_with_line (pfile, CPP_W_COMMENTS,
986 pfile->line_table->highest_line,
987 CPP_BUF_COL (buffer),
988 "\"/*\" within comment");
d08dcf87 989 }
45b966db 990 }
26aea073
NB
991 else if (c == '\n')
992 {
12f9df4e 993 unsigned int cols;
d08dcf87 994 buffer->cur = cur - 1;
26aea073
NB
995 _cpp_process_line_notes (pfile, true);
996 if (buffer->next_line >= buffer->rlimit)
997 return true;
998 _cpp_clean_line (pfile);
12f9df4e
PB
999
1000 cols = buffer->next_line - buffer->line_base;
1001 CPP_INCREMENT_LINE (pfile, cols);
1002
d08dcf87 1003 cur = buffer->cur;
26aea073 1004 }
45b966db 1005 }
041c3194 1006
d08dcf87 1007 buffer->cur = cur;
a8eb6044 1008 _cpp_process_line_notes (pfile, true);
26aea073 1009 return false;
45b966db
ZW
1010}
1011
480709cc 1012/* Skip a C++ line comment, leaving buffer->cur pointing to the
da7d8304 1013 terminating newline. Handles escaped newlines. Returns nonzero
480709cc 1014 if a multiline comment. */
041c3194 1015static int
6cf87ca4 1016skip_line_comment (cpp_reader *pfile)
45b966db 1017{
cbcff6df 1018 cpp_buffer *buffer = pfile->buffer;
1bb64668 1019 source_location orig_line = pfile->line_table->highest_line;
041c3194 1020
26aea073
NB
1021 while (*buffer->cur != '\n')
1022 buffer->cur++;
480709cc 1023
26aea073 1024 _cpp_process_line_notes (pfile, true);
500bee0a 1025 return orig_line != pfile->line_table->highest_line;
041c3194 1026}
45b966db 1027
26aea073 1028/* Skips whitespace, saving the next non-whitespace character. */
52fadca8 1029static void
6cf87ca4 1030skip_whitespace (cpp_reader *pfile, cppchar_t c)
041c3194
ZW
1031{
1032 cpp_buffer *buffer = pfile->buffer;
f7d151fb 1033 bool saw_NUL = false;
45b966db 1034
0d9f234d 1035 do
041c3194 1036 {
91fcd158 1037 /* Horizontal space always OK. */
26aea073 1038 if (c == ' ' || c == '\t')
0d9f234d 1039 ;
0d9f234d 1040 /* Just \f \v or \0 left. */
91fcd158 1041 else if (c == '\0')
f7d151fb 1042 saw_NUL = true;
93c80368 1043 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
500bee0a 1044 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
ebef4e8c
NB
1045 CPP_BUF_COL (buffer),
1046 "%s in preprocessing directive",
1047 c == '\f' ? "form feed" : "vertical tab");
0d9f234d 1048
0d9f234d 1049 c = *buffer->cur++;
45b966db 1050 }
ec5c56db 1051 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
0d9f234d
NB
1052 while (is_nvspace (c));
1053
f7d151fb 1054 if (saw_NUL)
0527bc4e 1055 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
f7d151fb 1056
480709cc 1057 buffer->cur--;
041c3194 1058}
45b966db 1059
93c80368
NB
1060/* See if the characters of a number token are valid in a name (no
1061 '.', '+' or '-'). */
1062static int
6cf87ca4 1063name_p (cpp_reader *pfile, const cpp_string *string)
93c80368
NB
1064{
1065 unsigned int i;
1066
1067 for (i = 0; i < string->len; i++)
1068 if (!is_idchar (string->text[i]))
1069 return 0;
1070
df383483 1071 return 1;
93c80368
NB
1072}
1073
50668cf6
GK
1074/* After parsing an identifier or other sequence, produce a warning about
1075 sequences not in NFC/NFKC. */
1076static void
1077warn_about_normalization (cpp_reader *pfile,
1078 const cpp_token *token,
1079 const struct normalize_state *s)
1080{
1081 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1082 && !pfile->state.skipping)
1083 {
1084 /* Make sure that the token is printed using UCNs, even
1085 if we'd otherwise happily print UTF-8. */
c3f829c1 1086 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
50668cf6
GK
1087 size_t sz;
1088
1089 sz = cpp_spell_token (pfile, token, buf, false) - buf;
1090 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
87cf0651
SB
1091 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1092 "`%.*s' is not in NFKC", (int) sz, buf);
50668cf6 1093 else
87cf0651
SB
1094 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1095 "`%.*s' is not in NFC", (int) sz, buf);
55e7f907 1096 free (buf);
50668cf6
GK
1097 }
1098}
1099
bced6edf 1100/* Returns TRUE if the sequence starting at buffer->cur is invalid in
1613e52b 1101 an identifier. FIRST is TRUE if this starts an identifier. */
bced6edf 1102static bool
50668cf6
GK
1103forms_identifier_p (cpp_reader *pfile, int first,
1104 struct normalize_state *state)
bced6edf 1105{
1613e52b
NB
1106 cpp_buffer *buffer = pfile->buffer;
1107
1108 if (*buffer->cur == '$')
1109 {
1110 if (!CPP_OPTION (pfile, dollars_in_ident))
1111 return false;
1112
1113 buffer->cur++;
78b8811a 1114 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1613e52b 1115 {
78b8811a 1116 CPP_OPTION (pfile, warn_dollars) = 0;
0527bc4e 1117 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1613e52b
NB
1118 }
1119
1120 return true;
1121 }
bced6edf 1122
1613e52b 1123 /* Is this a syntactically valid UCN? */
af15a2fe 1124 if (CPP_OPTION (pfile, extended_identifiers)
6baba9bb 1125 && *buffer->cur == '\\'
1613e52b 1126 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
bced6edf 1127 {
1613e52b 1128 buffer->cur += 2;
50668cf6
GK
1129 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1130 state))
1613e52b
NB
1131 return true;
1132 buffer->cur -= 2;
bced6edf 1133 }
bced6edf 1134
1613e52b 1135 return false;
bced6edf
NB
1136}
1137
17e7cb85
KT
1138/* Helper function to get the cpp_hashnode of the identifier BASE. */
1139static cpp_hashnode *
1140lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1141{
1142 cpp_hashnode *result;
1143 const uchar *cur;
1144 unsigned int len;
1145 unsigned int hash = HT_HASHSTEP (0, *base);
1146
1147 cur = base + 1;
1148 while (ISIDNUM (*cur))
1149 {
1150 hash = HT_HASHSTEP (hash, *cur);
1151 cur++;
1152 }
1153 len = cur - base;
1154 hash = HT_HASHFINISH (hash, len);
1155 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1156 base, len, hash, HT_ALLOC));
1157
1158 /* Rarely, identifiers require diagnostics when lexed. */
1159 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1160 && !pfile->state.skipping, 0))
1161 {
1162 /* It is allowed to poison the same identifier twice. */
1163 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1164 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1165 NODE_NAME (result));
1166
1167 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1168 replacement list of a variadic macro. */
1169 if (result == pfile->spec_nodes.n__VA_ARGS__
1170 && !pfile->state.va_args_ok)
1171 cpp_error (pfile, CPP_DL_PEDWARN,
1172 "__VA_ARGS__ can only appear in the expansion"
1173 " of a C99 variadic macro");
1174
1175 /* For -Wc++-compat, warn about use of C++ named operators. */
1176 if (result->flags & NODE_WARN_OPERATOR)
87cf0651
SB
1177 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1178 "identifier \"%s\" is a special operator name in C++",
1179 NODE_NAME (result));
17e7cb85
KT
1180 }
1181
1182 return result;
1183}
1184
1185/* Get the cpp_hashnode of an identifier specified by NAME in
1186 the current cpp_reader object. If none is found, NULL is returned. */
1187cpp_hashnode *
1188_cpp_lex_identifier (cpp_reader *pfile, const char *name)
1189{
1190 cpp_hashnode *result;
1191 result = lex_identifier_intern (pfile, (uchar *) name);
1192 return result;
1193}
1194
bced6edf 1195/* Lex an identifier starting at BUFFER->CUR - 1. */
0d9f234d 1196static cpp_hashnode *
50668cf6
GK
1197lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1198 struct normalize_state *nst)
45b966db 1199{
93c80368 1200 cpp_hashnode *result;
47e20491 1201 const uchar *cur;
c6e83800
ZW
1202 unsigned int len;
1203 unsigned int hash = HT_HASHSTEP (0, *base);
2c3fcba6 1204
c6e83800 1205 cur = pfile->buffer->cur;
47e20491
GK
1206 if (! starts_ucn)
1207 while (ISIDNUM (*cur))
1208 {
1209 hash = HT_HASHSTEP (hash, *cur);
1210 cur++;
1211 }
1212 pfile->buffer->cur = cur;
50668cf6 1213 if (starts_ucn || forms_identifier_p (pfile, false, nst))
10cf9bde 1214 {
47e20491
GK
1215 /* Slower version for identifiers containing UCNs (or $). */
1216 do {
1217 while (ISIDNUM (*pfile->buffer->cur))
50668cf6
GK
1218 {
1219 pfile->buffer->cur++;
1220 NORMALIZE_STATE_UPDATE_IDNUM (nst);
1221 }
1222 } while (forms_identifier_p (pfile, false, nst));
47e20491
GK
1223 result = _cpp_interpret_identifier (pfile, base,
1224 pfile->buffer->cur - base);
2c3fcba6 1225 }
47e20491
GK
1226 else
1227 {
1228 len = cur - base;
1229 hash = HT_HASHFINISH (hash, len);
bced6edf 1230
2bf41bf0
TT
1231 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1232 base, len, hash, HT_ALLOC));
47e20491 1233 }
2c3fcba6 1234
bced6edf 1235 /* Rarely, identifiers require diagnostics when lexed. */
2c3fcba6
ZW
1236 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1237 && !pfile->state.skipping, 0))
1238 {
1239 /* It is allowed to poison the same identifier twice. */
1240 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
0527bc4e 1241 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
2c3fcba6
ZW
1242 NODE_NAME (result));
1243
1244 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1245 replacement list of a variadic macro. */
1246 if (result == pfile->spec_nodes.n__VA_ARGS__
1247 && !pfile->state.va_args_ok)
0527bc4e 1248 cpp_error (pfile, CPP_DL_PEDWARN,
6cf87ca4
ZW
1249 "__VA_ARGS__ can only appear in the expansion"
1250 " of a C99 variadic macro");
3d8b2a98
ILT
1251
1252 /* For -Wc++-compat, warn about use of C++ named operators. */
1253 if (result->flags & NODE_WARN_OPERATOR)
87cf0651
SB
1254 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1255 "identifier \"%s\" is a special operator name in C++",
1256 NODE_NAME (result));
2c3fcba6
ZW
1257 }
1258
1259 return result;
1260}
1261
bced6edf 1262/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
45b966db 1263static void
50668cf6
GK
1264lex_number (cpp_reader *pfile, cpp_string *number,
1265 struct normalize_state *nst)
45b966db 1266{
562a5c27 1267 const uchar *cur;
bced6edf
NB
1268 const uchar *base;
1269 uchar *dest;
45b966db 1270
bced6edf
NB
1271 base = pfile->buffer->cur - 1;
1272 do
041c3194 1273 {
bced6edf 1274 cur = pfile->buffer->cur;
0d9f234d 1275
bced6edf
NB
1276 /* N.B. ISIDNUM does not include $. */
1277 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
50668cf6
GK
1278 {
1279 cur++;
1280 NORMALIZE_STATE_UPDATE_IDNUM (nst);
1281 }
45b966db 1282
10cf9bde 1283 pfile->buffer->cur = cur;
45b966db 1284 }
50668cf6 1285 while (forms_identifier_p (pfile, false, nst));
93c80368 1286
bced6edf
NB
1287 number->len = cur - base;
1288 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1289 memcpy (dest, base, number->len);
1290 dest[number->len] = '\0';
1291 number->text = dest;
93c80368
NB
1292}
1293
6338b358
NB
1294/* Create a token of type TYPE with a literal spelling. */
1295static void
6cf87ca4
ZW
1296create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1297 unsigned int len, enum cpp_ttype type)
6338b358
NB
1298{
1299 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1300
1301 memcpy (dest, base, len);
1302 dest[len] = '\0';
1303 token->type = type;
1304 token->val.str.len = len;
1305 token->val.str.text = dest;
1306}
1307
00a81b8b
JM
1308/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1309 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
1310
1311static void
1312bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1313 _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1314{
1315 _cpp_buff *first_buff = *first_buff_p;
1316 _cpp_buff *last_buff = *last_buff_p;
1317
1318 if (first_buff == NULL)
1319 first_buff = last_buff = _cpp_get_buff (pfile, len);
1320 else if (len > BUFF_ROOM (last_buff))
1321 {
1322 size_t room = BUFF_ROOM (last_buff);
1323 memcpy (BUFF_FRONT (last_buff), base, room);
1324 BUFF_FRONT (last_buff) += room;
1325 base += room;
1326 len -= room;
1327 last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1328 }
1329
1330 memcpy (BUFF_FRONT (last_buff), base, len);
1331 BUFF_FRONT (last_buff) += len;
1332
1333 *first_buff_p = first_buff;
1334 *last_buff_p = last_buff;
1335}
1336
2c6e3f55 1337/* Lexes a raw string. The stored string contains the spelling, including
00a81b8b 1338 double quotes, delimiter string, '(' and ')', any leading
2c6e3f55
JJ
1339 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
1340 literal, or CPP_OTHER if it was not properly terminated.
1341
1342 The spelling is NUL-terminated, but it is not guaranteed that this
1343 is the first NUL since embedded NULs are preserved. */
1344
1345static void
1346lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1347 const uchar *cur)
1348{
2c6e3f55
JJ
1349 const uchar *raw_prefix;
1350 unsigned int raw_prefix_len = 0;
1351 enum cpp_ttype type;
1352 size_t total_len = 0;
1353 _cpp_buff *first_buff = NULL, *last_buff = NULL;
00a81b8b 1354 _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
2c6e3f55
JJ
1355
1356 type = (*base == 'L' ? CPP_WSTRING :
1357 *base == 'U' ? CPP_STRING32 :
1358 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1359 : CPP_STRING);
1360
1361 raw_prefix = cur + 1;
1362 while (raw_prefix_len < 16)
1363 {
1364 switch (raw_prefix[raw_prefix_len])
1365 {
52150625 1366 case ' ': case '(': case ')': case '\\': case '\t':
2c6e3f55
JJ
1367 case '\v': case '\f': case '\n': default:
1368 break;
1369 /* Basic source charset except the above chars. */
1370 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1371 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1372 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1373 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1374 case 'y': case 'z':
1375 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1376 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1377 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1378 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1379 case 'Y': case 'Z':
1380 case '0': case '1': case '2': case '3': case '4': case '5':
1381 case '6': case '7': case '8': case '9':
52150625 1382 case '_': case '{': case '}': case '#': case '[': case ']':
2c6e3f55
JJ
1383 case '<': case '>': case '%': case ':': case ';': case '.':
1384 case '?': case '*': case '+': case '-': case '/': case '^':
1385 case '&': case '|': case '~': case '!': case '=': case ',':
52150625 1386 case '"': case '\'':
2c6e3f55
JJ
1387 raw_prefix_len++;
1388 continue;
1389 }
1390 break;
1391 }
1392
52150625 1393 if (raw_prefix[raw_prefix_len] != '(')
2c6e3f55
JJ
1394 {
1395 int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
1396 + 1;
1397 if (raw_prefix_len == 16)
1398 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1399 "raw string delimiter longer than 16 characters");
1400 else
1401 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1402 "invalid character '%c' in raw string delimiter",
1403 (int) raw_prefix[raw_prefix_len]);
1404 pfile->buffer->cur = raw_prefix - 1;
1405 create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
1406 return;
1407 }
1408
1409 cur = raw_prefix + raw_prefix_len + 1;
1410 for (;;)
1411 {
00a81b8b
JM
1412#define BUF_APPEND(STR,LEN) \
1413 do { \
1414 bufring_append (pfile, (const uchar *)(STR), (LEN), \
1415 &first_buff, &last_buff); \
1416 total_len += (LEN); \
1417 } while (0);
1418
1419 cppchar_t c;
1420
1421 /* If we previously performed any trigraph or line splicing
1422 transformations, undo them within the body of the raw string. */
1423 while (note->pos < cur)
1424 ++note;
1425 for (; note->pos == cur; ++note)
1426 {
1427 switch (note->type)
1428 {
1429 case '\\':
1430 case ' ':
1431 /* Restore backslash followed by newline. */
1432 BUF_APPEND (base, cur - base);
1433 base = cur;
1434 BUF_APPEND ("\\", 1);
1435 after_backslash:
1436 if (note->type == ' ')
1437 {
1438 /* GNU backslash whitespace newline extension. FIXME
1439 could be any sequence of non-vertical space. When we
1440 can properly restore any such sequence, we should mark
1441 this note as handled so _cpp_process_line_notes
1442 doesn't warn. */
1443 BUF_APPEND (" ", 1);
1444 }
1445
1446 BUF_APPEND ("\n", 1);
1447 break;
1448
1449 case 0:
1450 /* Already handled. */
1451 break;
1452
1453 default:
1454 if (_cpp_trigraph_map[note->type])
1455 {
1456 /* Don't warn about this trigraph in
1457 _cpp_process_line_notes, since trigraphs show up as
1458 trigraphs in raw strings. */
d947ada0 1459 uchar type = note->type;
00a81b8b
JM
1460 note->type = 0;
1461
1462 if (!CPP_OPTION (pfile, trigraphs))
1463 /* If we didn't convert the trigraph in the first
1464 place, don't do anything now either. */
1465 break;
1466
1467 BUF_APPEND (base, cur - base);
1468 base = cur;
1469 BUF_APPEND ("??", 2);
1470
1471 /* ??/ followed by newline gets two line notes, one for
1472 the trigraph and one for the backslash/newline. */
1473 if (type == '/' && note[1].pos == cur)
1474 {
1475 if (note[1].type != '\\'
1476 && note[1].type != ' ')
1477 abort ();
1478 BUF_APPEND ("/", 1);
1479 ++note;
1480 goto after_backslash;
1481 }
1482 /* The ) from ??) could be part of the suffix. */
1483 else if (type == ')'
1484 && strncmp ((const char *) cur+1,
1485 (const char *) raw_prefix,
1486 raw_prefix_len) == 0
1487 && cur[raw_prefix_len+1] == '"')
1488 {
6cfae070
JJ
1489 BUF_APPEND (")", 1);
1490 base++;
1491 cur += raw_prefix_len + 2;
00a81b8b
JM
1492 goto break_outer_loop;
1493 }
1494 else
1495 {
1496 /* Skip the replacement character. */
1497 base = ++cur;
1498 BUF_APPEND (&type, 1);
1499 }
1500 }
1501 else
1502 abort ();
1503 break;
1504 }
1505 }
1506 c = *cur++;
2c6e3f55 1507
52150625 1508 if (c == ')'
2c6e3f55
JJ
1509 && strncmp ((const char *) cur, (const char *) raw_prefix,
1510 raw_prefix_len) == 0
1511 && cur[raw_prefix_len] == '"')
1512 {
1513 cur += raw_prefix_len + 1;
1514 break;
1515 }
1516 else if (c == '\n')
1517 {
1518 if (pfile->state.in_directive
1519 || pfile->state.parsing_args
1520 || pfile->state.in_deferred_pragma)
1521 {
1522 cur--;
1523 type = CPP_OTHER;
1524 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1525 "unterminated raw string");
1526 break;
1527 }
1528
00a81b8b 1529 BUF_APPEND (base, cur - base);
2c6e3f55
JJ
1530
1531 if (pfile->buffer->cur < pfile->buffer->rlimit)
1532 CPP_INCREMENT_LINE (pfile, 0);
1533 pfile->buffer->need_line = true;
1534
00a81b8b
JM
1535 pfile->buffer->cur = cur-1;
1536 _cpp_process_line_notes (pfile, false);
2c6e3f55
JJ
1537 if (!_cpp_get_fresh_line (pfile))
1538 {
1539 source_location src_loc = token->src_loc;
1540 token->type = CPP_EOF;
1541 /* Tell the compiler the line number of the EOF token. */
1542 token->src_loc = pfile->line_table->highest_line;
1543 token->flags = BOL;
1544 if (first_buff != NULL)
1545 _cpp_release_buff (pfile, first_buff);
1546 cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1547 "unterminated raw string");
1548 return;
1549 }
1550
1551 cur = base = pfile->buffer->cur;
00a81b8b 1552 note = &pfile->buffer->notes[pfile->buffer->cur_note];
2c6e3f55 1553 }
2c6e3f55 1554 }
00a81b8b 1555 break_outer_loop:
2c6e3f55 1556
3ce4f9e4
ESR
1557 if (CPP_OPTION (pfile, user_literals))
1558 {
7f5f5f98
OW
1559 /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
1560 underscore is ill-formed. Since this breaks programs using macros
1561 from inttypes.h, we generate a warning and treat the ud-suffix as a
1562 separate preprocessing token. This approach is under discussion by
1563 the standards committee, and has been adopted as a conforming
1564 extension by other front ends such as clang. */
1565 if (ISALPHA (*cur))
1566 {
112448b4 1567 /* Raise a warning, but do not consume subsequent tokens. */
7f5f5f98
OW
1568 if (CPP_OPTION (pfile, warn_literal_suffix))
1569 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1570 token->src_loc, 0,
1571 "invalid suffix on literal; C++11 requires "
1572 "a space between literal and identifier");
1573 }
3ce4f9e4 1574 /* Grab user defined literal suffix. */
7f5f5f98 1575 else if (*cur == '_')
3ce4f9e4
ESR
1576 {
1577 type = cpp_userdef_string_add_type (type);
1578 ++cur;
7f5f5f98
OW
1579
1580 while (ISIDNUM (*cur))
1581 ++cur;
3ce4f9e4 1582 }
3ce4f9e4
ESR
1583 }
1584
2c6e3f55
JJ
1585 pfile->buffer->cur = cur;
1586 if (first_buff == NULL)
1587 create_literal (pfile, token, base, cur - base, type);
1588 else
1589 {
1590 uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1591
1592 token->type = type;
1593 token->val.str.len = total_len + (cur - base);
1594 token->val.str.text = dest;
1595 last_buff = first_buff;
1596 while (last_buff != NULL)
1597 {
1598 memcpy (dest, last_buff->base,
1599 BUFF_FRONT (last_buff) - last_buff->base);
1600 dest += BUFF_FRONT (last_buff) - last_buff->base;
1601 last_buff = last_buff->next;
1602 }
1603 _cpp_release_buff (pfile, first_buff);
1604 memcpy (dest, base, cur - base);
1605 dest[cur - base] = '\0';
1606 }
1607}
1608
bced6edf 1609/* Lexes a string, character constant, or angle-bracketed header file
6338b358 1610 name. The stored string contains the spelling, including opening
2c6e3f55
JJ
1611 quote and any leading 'L', 'u', 'U' or 'u8' and optional
1612 'R' modifier. It returns the type of the literal, or CPP_OTHER
1613 if it was not properly terminated, or CPP_LESS for an unterminated
1614 header name which must be relexed as normal tokens.
6338b358
NB
1615
1616 The spelling is NUL-terminated, but it is not guaranteed that this
1617 is the first NUL since embedded NULs are preserved. */
041c3194 1618static void
6cf87ca4 1619lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
45b966db 1620{
6338b358
NB
1621 bool saw_NUL = false;
1622 const uchar *cur;
bced6edf 1623 cppchar_t terminator;
6338b358
NB
1624 enum cpp_ttype type;
1625
1626 cur = base;
1627 terminator = *cur++;
2c6e3f55 1628 if (terminator == 'L' || terminator == 'U')
6338b358 1629 terminator = *cur++;
2c6e3f55
JJ
1630 else if (terminator == 'u')
1631 {
1632 terminator = *cur++;
1633 if (terminator == '8')
1634 terminator = *cur++;
1635 }
1636 if (terminator == 'R')
1637 {
1638 lex_raw_string (pfile, token, base, cur);
1639 return;
1640 }
1641 if (terminator == '"')
b6baa67d
KVH
1642 type = (*base == 'L' ? CPP_WSTRING :
1643 *base == 'U' ? CPP_STRING32 :
2c6e3f55
JJ
1644 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1645 : CPP_STRING);
6338b358 1646 else if (terminator == '\'')
b6baa67d
KVH
1647 type = (*base == 'L' ? CPP_WCHAR :
1648 *base == 'U' ? CPP_CHAR32 :
1649 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
6338b358
NB
1650 else
1651 terminator = '>', type = CPP_HEADER_NAME;
93c80368 1652
0d9f234d 1653 for (;;)
45b966db 1654 {
6338b358 1655 cppchar_t c = *cur++;
7868b4a2 1656
6f572ac2 1657 /* In #include-style directives, terminators are not escapable. */
6338b358
NB
1658 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1659 cur++;
1660 else if (c == terminator)
bced6edf 1661 break;
6338b358 1662 else if (c == '\n')
0d9f234d 1663 {
6338b358 1664 cur--;
4bb09c26
JM
1665 /* Unmatched quotes always yield undefined behavior, but
1666 greedy lexing means that what appears to be an unterminated
1667 header name may actually be a legitimate sequence of tokens. */
1668 if (terminator == '>')
1669 {
1670 token->type = CPP_LESS;
1671 return;
1672 }
6338b358
NB
1673 type = CPP_OTHER;
1674 break;
45b966db 1675 }
6338b358
NB
1676 else if (c == '\0')
1677 saw_NUL = true;
45b966db
ZW
1678 }
1679
6338b358 1680 if (saw_NUL && !pfile->state.skipping)
0527bc4e
JDA
1681 cpp_error (pfile, CPP_DL_WARNING,
1682 "null character(s) preserved in literal");
45b966db 1683
c663e301
JM
1684 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1685 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1686 (int) terminator);
1687
3ce4f9e4
ESR
1688 if (CPP_OPTION (pfile, user_literals))
1689 {
7f5f5f98
OW
1690 /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
1691 underscore is ill-formed. Since this breaks programs using macros
1692 from inttypes.h, we generate a warning and treat the ud-suffix as a
1693 separate preprocessing token. This approach is under discussion by
1694 the standards committee, and has been adopted as a conforming
1695 extension by other front ends such as clang. */
1696 if (ISALPHA (*cur))
1697 {
112448b4 1698 /* Raise a warning, but do not consume subsequent tokens. */
7f5f5f98
OW
1699 if (CPP_OPTION (pfile, warn_literal_suffix))
1700 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1701 token->src_loc, 0,
1702 "invalid suffix on literal; C++11 requires "
1703 "a space between literal and identifier");
1704 }
3ce4f9e4 1705 /* Grab user defined literal suffix. */
7f5f5f98 1706 else if (*cur == '_')
3ce4f9e4
ESR
1707 {
1708 type = cpp_userdef_char_add_type (type);
1709 type = cpp_userdef_string_add_type (type);
1710 ++cur;
7f5f5f98
OW
1711
1712 while (ISIDNUM (*cur))
1713 ++cur;
3ce4f9e4 1714 }
3ce4f9e4
ESR
1715 }
1716
6338b358
NB
1717 pfile->buffer->cur = cur;
1718 create_literal (pfile, token, base, cur - base, type);
0d9f234d 1719}
041c3194 1720
631d0d36
MG
1721/* Return the comment table. The client may not make any assumption
1722 about the ordering of the table. */
1723cpp_comment_table *
1724cpp_get_comments (cpp_reader *pfile)
1725{
1726 return &pfile->comments;
1727}
1728
1729/* Append a comment to the end of the comment table. */
1730static void
1731store_comment (cpp_reader *pfile, cpp_token *token)
1732{
1733 int len;
1734
1735 if (pfile->comments.allocated == 0)
1736 {
1737 pfile->comments.allocated = 256;
1738 pfile->comments.entries = (cpp_comment *) xmalloc
1739 (pfile->comments.allocated * sizeof (cpp_comment));
1740 }
1741
1742 if (pfile->comments.count == pfile->comments.allocated)
1743 {
1744 pfile->comments.allocated *= 2;
1745 pfile->comments.entries = (cpp_comment *) xrealloc
1746 (pfile->comments.entries,
1747 pfile->comments.allocated * sizeof (cpp_comment));
1748 }
1749
1750 len = token->val.str.len;
1751
1752 /* Copy comment. Note, token may not be NULL terminated. */
1753 pfile->comments.entries[pfile->comments.count].comment =
1754 (char *) xmalloc (sizeof (char) * (len + 1));
1755 memcpy (pfile->comments.entries[pfile->comments.count].comment,
1756 token->val.str.text, len);
1757 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1758
1759 /* Set source location. */
1760 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1761
1762 /* Increment the count of entries in the comment table. */
1763 pfile->comments.count++;
1764}
1765
93c80368 1766/* The stored comment includes the comment start and any terminator. */
9e62c811 1767static void
6cf87ca4
ZW
1768save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1769 cppchar_t type)
9e62c811 1770{
041c3194 1771 unsigned char *buffer;
651a20b5 1772 unsigned int len, clen, i;
df383483 1773
1c6d33ef 1774 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
480709cc 1775
3542203b
NB
1776 /* C++ comments probably (not definitely) have moved past a new
1777 line, which we don't want to save in the comment. */
480709cc 1778 if (is_vspace (pfile->buffer->cur[-1]))
3542203b 1779 len--;
477cdac7 1780
651a20b5
KT
1781 /* If we are currently in a directive or in argument parsing, then
1782 we need to store all C++ comments as C comments internally, and
1783 so we need to allocate a little extra space in that case.
477cdac7
JT
1784
1785 Note that the only time we encounter a directive here is
1786 when we are saving comments in a "#define". */
651a20b5
KT
1787 clen = ((pfile->state.in_directive || pfile->state.parsing_args)
1788 && type == '/') ? len + 2 : len;
477cdac7
JT
1789
1790 buffer = _cpp_unaligned_alloc (pfile, clen);
df383483 1791
041c3194 1792 token->type = CPP_COMMENT;
477cdac7 1793 token->val.str.len = clen;
0d9f234d 1794 token->val.str.text = buffer;
45b966db 1795
1c6d33ef
NB
1796 buffer[0] = '/';
1797 memcpy (buffer + 1, from, len - 1);
477cdac7 1798
1eeeb6a4 1799 /* Finish conversion to a C comment, if necessary. */
651a20b5 1800 if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
477cdac7
JT
1801 {
1802 buffer[1] = '*';
1803 buffer[clen - 2] = '*';
1804 buffer[clen - 1] = '/';
651a20b5
KT
1805 /* As there can be in a C++ comments illegal sequences for C comments
1806 we need to filter them out. */
1807 for (i = 2; i < (clen - 2); i++)
1808 if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
1809 buffer[i] = '|';
477cdac7 1810 }
631d0d36
MG
1811
1812 /* Finally store this comment for use by clients of libcpp. */
1813 store_comment (pfile, token);
0d9f234d 1814}
45b966db 1815
5fddcffc
NB
1816/* Allocate COUNT tokens for RUN. */
1817void
6cf87ca4 1818_cpp_init_tokenrun (tokenrun *run, unsigned int count)
5fddcffc 1819{
72bb2c39 1820 run->base = XNEWVEC (cpp_token, count);
5fddcffc
NB
1821 run->limit = run->base + count;
1822 run->next = NULL;
1823}
1824
1825/* Returns the next tokenrun, or creates one if there is none. */
1826static tokenrun *
6cf87ca4 1827next_tokenrun (tokenrun *run)
5fddcffc
NB
1828{
1829 if (run->next == NULL)
1830 {
72bb2c39 1831 run->next = XNEW (tokenrun);
bdcbe496 1832 run->next->prev = run;
5fddcffc
NB
1833 _cpp_init_tokenrun (run->next, 250);
1834 }
1835
1836 return run->next;
1837}
1838
ad2305ad 1839/* Return the number of not yet processed token in a given
92582b75
TT
1840 context. */
1841int
ad2305ad 1842_cpp_remaining_tokens_num_in_context (cpp_context *context)
92582b75 1843{
92582b75 1844 if (context->tokens_kind == TOKENS_KIND_DIRECT)
cbbcf655 1845 return (LAST (context).token - FIRST (context).token);
92582b75
TT
1846 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1847 || context->tokens_kind == TOKENS_KIND_EXTENDED)
cbbcf655 1848 return (LAST (context).ptoken - FIRST (context).ptoken);
92582b75
TT
1849 else
1850 abort ();
1851}
1852
ad2305ad
DS
1853/* Returns the token present at index INDEX in a given context. If
1854 INDEX is zero, the next token to be processed is returned. */
92582b75 1855static const cpp_token*
ad2305ad 1856_cpp_token_from_context_at (cpp_context *context, int index)
92582b75 1857{
92582b75
TT
1858 if (context->tokens_kind == TOKENS_KIND_DIRECT)
1859 return &(FIRST (context).token[index]);
1860 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1861 || context->tokens_kind == TOKENS_KIND_EXTENDED)
1862 return FIRST (context).ptoken[index];
1863 else
1864 abort ();
1865}
1866
5950c3c9
BE
1867/* Look ahead in the input stream. */
1868const cpp_token *
1869cpp_peek_token (cpp_reader *pfile, int index)
1870{
1871 cpp_context *context = pfile->context;
1872 const cpp_token *peektok;
1873 int count;
1874
1875 /* First, scan through any pending cpp_context objects. */
1876 while (context->prev)
1877 {
ad2305ad 1878 ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
5950c3c9
BE
1879
1880 if (index < (int) sz)
ad2305ad 1881 return _cpp_token_from_context_at (context, index);
5950c3c9
BE
1882 index -= (int) sz;
1883 context = context->prev;
1884 }
1885
1886 /* We will have to read some new tokens after all (and do so
1887 without invalidating preceding tokens). */
1888 count = index;
1889 pfile->keep_tokens++;
1890
1891 do
1892 {
1893 peektok = _cpp_lex_token (pfile);
1894 if (peektok->type == CPP_EOF)
1895 return peektok;
1896 }
1897 while (index--);
1898
1899 _cpp_backup_tokens_direct (pfile, count + 1);
1900 pfile->keep_tokens--;
1901
1902 return peektok;
1903}
1904
4ed5bcfb
NB
1905/* Allocate a single token that is invalidated at the same time as the
1906 rest of the tokens on the line. Has its line and col set to the
1907 same as the last lexed token, so that diagnostics appear in the
1908 right place. */
1909cpp_token *
6cf87ca4 1910_cpp_temp_token (cpp_reader *pfile)
4ed5bcfb
NB
1911{
1912 cpp_token *old, *result;
5950c3c9
BE
1913 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1914 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
4ed5bcfb
NB
1915
1916 old = pfile->cur_token - 1;
5950c3c9
BE
1917 /* Any pre-existing lookaheads must not be clobbered. */
1918 if (la)
1919 {
1920 if (sz <= la)
1921 {
1922 tokenrun *next = next_tokenrun (pfile->cur_run);
1923
1924 if (sz < la)
1925 memmove (next->base + 1, next->base,
1926 (la - sz) * sizeof (cpp_token));
1927
1928 next->base[0] = pfile->cur_run->limit[-1];
1929 }
1930
1931 if (sz > 1)
1932 memmove (pfile->cur_token + 1, pfile->cur_token,
1933 MIN (la, sz - 1) * sizeof (cpp_token));
1934 }
1935
1936 if (!sz && pfile->cur_token == pfile->cur_run->limit)
4ed5bcfb
NB
1937 {
1938 pfile->cur_run = next_tokenrun (pfile->cur_run);
1939 pfile->cur_token = pfile->cur_run->base;
1940 }
1941
1942 result = pfile->cur_token++;
12f9df4e 1943 result->src_loc = old->src_loc;
4ed5bcfb
NB
1944 return result;
1945}
1946
14baae01
NB
1947/* Lex a token into RESULT (external interface). Takes care of issues
1948 like directive handling, token lookahead, multiple include
a1f300c0 1949 optimization and skipping. */
345894b4 1950const cpp_token *
6cf87ca4 1951_cpp_lex_token (cpp_reader *pfile)
5fddcffc 1952{
bdcbe496 1953 cpp_token *result;
5fddcffc 1954
bdcbe496 1955 for (;;)
5fddcffc 1956 {
bdcbe496 1957 if (pfile->cur_token == pfile->cur_run->limit)
5fddcffc 1958 {
bdcbe496
NB
1959 pfile->cur_run = next_tokenrun (pfile->cur_run);
1960 pfile->cur_token = pfile->cur_run->base;
5fddcffc 1961 }
ee380365
TT
1962 /* We assume that the current token is somewhere in the current
1963 run. */
1964 if (pfile->cur_token < pfile->cur_run->base
1965 || pfile->cur_token >= pfile->cur_run->limit)
1966 abort ();
5fddcffc 1967
bdcbe496 1968 if (pfile->lookaheads)
14baae01
NB
1969 {
1970 pfile->lookaheads--;
1971 result = pfile->cur_token++;
1972 }
bdcbe496 1973 else
14baae01 1974 result = _cpp_lex_direct (pfile);
bdcbe496
NB
1975
1976 if (result->flags & BOL)
5fddcffc 1977 {
bdcbe496
NB
1978 /* Is this a directive. If _cpp_handle_directive returns
1979 false, it is an assembler #. */
1980 if (result->type == CPP_HASH
e808ec9c
NB
1981 /* 6.10.3 p 11: Directives in a list of macro arguments
1982 gives undefined behavior. This implementation
1983 handles the directive as normal. */
bc4071dd 1984 && pfile->state.parsing_args != 1)
21b11495 1985 {
bc4071dd 1986 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
21b11495 1987 {
bc4071dd
RH
1988 if (pfile->directive_result.type == CPP_PADDING)
1989 continue;
21b11495 1990 result = &pfile->directive_result;
21b11495
ZW
1991 }
1992 }
bc4071dd
RH
1993 else if (pfile->state.in_deferred_pragma)
1994 result = &pfile->directive_result;
21b11495 1995
97293897 1996 if (pfile->cb.line_change && !pfile->state.skipping)
6cf87ca4 1997 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
5fddcffc 1998 }
5fddcffc 1999
bdcbe496 2000 /* We don't skip tokens in directives. */
bc4071dd 2001 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
bdcbe496 2002 break;
5fddcffc 2003
bdcbe496 2004 /* Outside a directive, invalidate controlling macros. At file
14baae01 2005 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
6356f892 2006 get here and MI optimization works. */
5fddcffc 2007 pfile->mi_valid = false;
bdcbe496
NB
2008
2009 if (!pfile->state.skipping || result->type == CPP_EOF)
2010 break;
5fddcffc
NB
2011 }
2012
345894b4 2013 return result;
5fddcffc
NB
2014}
2015
26aea073
NB
2016/* Returns true if a fresh line has been loaded. */
2017bool
6cf87ca4 2018_cpp_get_fresh_line (cpp_reader *pfile)
004cb263 2019{
22234f56
PB
2020 int return_at_eof;
2021
26aea073
NB
2022 /* We can't get a new line until we leave the current directive. */
2023 if (pfile->state.in_directive)
2024 return false;
df383483 2025
26aea073 2026 for (;;)
1a76916c 2027 {
26aea073 2028 cpp_buffer *buffer = pfile->buffer;
1a76916c 2029
26aea073
NB
2030 if (!buffer->need_line)
2031 return true;
2032
2033 if (buffer->next_line < buffer->rlimit)
004cb263 2034 {
26aea073
NB
2035 _cpp_clean_line (pfile);
2036 return true;
2037 }
004cb263 2038
26aea073
NB
2039 /* First, get out of parsing arguments state. */
2040 if (pfile->state.parsing_args)
2041 return false;
2042
2043 /* End of buffer. Non-empty files should end in a newline. */
2044 if (buffer->buf != buffer->rlimit
2045 && buffer->next_line > buffer->rlimit
2046 && !buffer->from_stage3)
2047 {
ed0e74e0 2048 /* Clip to buffer size. */
26aea073 2049 buffer->next_line = buffer->rlimit;
26aea073 2050 }
22234f56
PB
2051
2052 return_at_eof = buffer->return_at_eof;
26aea073 2053 _cpp_pop_buffer (pfile);
22234f56 2054 if (pfile->buffer == NULL || return_at_eof)
a506c55c 2055 return false;
26aea073 2056 }
004cb263
NB
2057}
2058
6f572ac2
NB
2059#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
2060 do \
2061 { \
2062 result->type = ELSE_TYPE; \
2063 if (*buffer->cur == CHAR) \
2064 buffer->cur++, result->type = THEN_TYPE; \
2065 } \
2066 while (0)
480709cc 2067
14baae01
NB
2068/* Lex a token into pfile->cur_token, which is also incremented, to
2069 get diagnostics pointing to the correct location.
2070
2071 Does not handle issues such as token lookahead, multiple-include
f1ba665b 2072 optimization, directives, skipping etc. This function is only
14baae01
NB
2073 suitable for use by _cpp_lex_token, and in special cases like
2074 lex_expansion_token which doesn't care for any of these issues.
2075
2076 When meeting a newline, returns CPP_EOF if parsing a directive,
2077 otherwise returns to the start of the token buffer if permissible.
2078 Returns the location of the lexed token. */
2079cpp_token *
6cf87ca4 2080_cpp_lex_direct (cpp_reader *pfile)
45b966db 2081{
0d9f234d 2082 cppchar_t c;
adb84b42 2083 cpp_buffer *buffer;
0d9f234d 2084 const unsigned char *comment_start;
14baae01 2085 cpp_token *result = pfile->cur_token++;
9ec7291f 2086
5fddcffc 2087 fresh_line:
26aea073 2088 result->flags = 0;
2be570f9 2089 buffer = pfile->buffer;
a506c55c 2090 if (buffer->need_line)
26aea073 2091 {
bc4071dd
RH
2092 if (pfile->state.in_deferred_pragma)
2093 {
2094 result->type = CPP_PRAGMA_EOL;
2095 pfile->state.in_deferred_pragma = false;
2096 if (!pfile->state.pragma_allow_expansion)
2097 pfile->state.prevent_expansion--;
2098 return result;
2099 }
26aea073
NB
2100 if (!_cpp_get_fresh_line (pfile))
2101 {
2102 result->type = CPP_EOF;
9ff7868d
NB
2103 if (!pfile->state.in_directive)
2104 {
2105 /* Tell the compiler the line number of the EOF token. */
500bee0a 2106 result->src_loc = pfile->line_table->highest_line;
9ff7868d
NB
2107 result->flags = BOL;
2108 }
26aea073
NB
2109 return result;
2110 }
2111 if (!pfile->keep_tokens)
2112 {
2113 pfile->cur_run = &pfile->base_run;
2114 result = pfile->base_run.base;
2115 pfile->cur_token = result + 1;
2116 }
2117 result->flags = BOL;
2118 if (pfile->state.parsing_args == 2)
2119 result->flags |= PREV_WHITE;
2120 }
a506c55c 2121 buffer = pfile->buffer;
5fddcffc 2122 update_tokens_line:
500bee0a 2123 result->src_loc = pfile->line_table->highest_line;
041c3194 2124
5fddcffc 2125 skipped_white:
26aea073
NB
2126 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2127 && !pfile->overlaid_buffer)
2128 {
2129 _cpp_process_line_notes (pfile, false);
500bee0a 2130 result->src_loc = pfile->line_table->highest_line;
26aea073 2131 }
480709cc 2132 c = *buffer->cur++;
12f9df4e 2133
e3dfef44
GC
2134 if (pfile->forced_token_location_p)
2135 result->src_loc = *pfile->forced_token_location_p;
2136 else
2137 result->src_loc = linemap_position_for_column (pfile->line_table,
2138 CPP_BUF_COLUMN (buffer, buffer->cur));
5fddcffc 2139
0d9f234d 2140 switch (c)
45b966db 2141 {
4d6baafa
NB
2142 case ' ': case '\t': case '\f': case '\v': case '\0':
2143 result->flags |= PREV_WHITE;
26aea073
NB
2144 skip_whitespace (pfile, c);
2145 goto skipped_white;
0d9f234d 2146
26aea073 2147 case '\n':
12f9df4e
PB
2148 if (buffer->cur < buffer->rlimit)
2149 CPP_INCREMENT_LINE (pfile, 0);
26aea073
NB
2150 buffer->need_line = true;
2151 goto fresh_line;
46d07497 2152
0d9f234d
NB
2153 case '0': case '1': case '2': case '3': case '4':
2154 case '5': case '6': case '7': case '8': case '9':
50668cf6
GK
2155 {
2156 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2157 result->type = CPP_NUMBER;
2158 lex_number (pfile, &result->val.str, &nst);
2159 warn_about_normalization (pfile, result, &nst);
2160 break;
2161 }
46d07497 2162
0abc6a6a 2163 case 'L':
b6baa67d
KVH
2164 case 'u':
2165 case 'U':
2c6e3f55
JJ
2166 case 'R':
2167 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2168 wide strings or raw strings. */
a48e3dd1
JM
2169 if (c == 'L' || CPP_OPTION (pfile, rliterals)
2170 || (c != 'R' && CPP_OPTION (pfile, uliterals)))
bced6edf 2171 {
2c6e3f55
JJ
2172 if ((*buffer->cur == '\'' && c != 'R')
2173 || *buffer->cur == '"'
2174 || (*buffer->cur == 'R'
2175 && c != 'R'
2176 && buffer->cur[1] == '"'
a48e3dd1 2177 && CPP_OPTION (pfile, rliterals))
2c6e3f55
JJ
2178 || (*buffer->cur == '8'
2179 && c == 'u'
2180 && (buffer->cur[1] == '"'
a48e3dd1
JM
2181 || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2182 && CPP_OPTION (pfile, rliterals)))))
b6baa67d
KVH
2183 {
2184 lex_string (pfile, result, buffer->cur - 1);
2185 break;
2186 }
bced6edf 2187 }
df383483 2188 /* Fall through. */
0abc6a6a 2189
0d9f234d
NB
2190 case '_':
2191 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2192 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2193 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
b6baa67d 2194 case 's': case 't': case 'v': case 'w': case 'x':
0d9f234d
NB
2195 case 'y': case 'z':
2196 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
0abc6a6a 2197 case 'G': case 'H': case 'I': case 'J': case 'K':
2c6e3f55 2198 case 'M': case 'N': case 'O': case 'P': case 'Q':
b6baa67d 2199 case 'S': case 'T': case 'V': case 'W': case 'X':
0d9f234d
NB
2200 case 'Y': case 'Z':
2201 result->type = CPP_NAME;
50668cf6
GK
2202 {
2203 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
9a0c6187
JM
2204 result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2205 &nst);
50668cf6
GK
2206 warn_about_normalization (pfile, result, &nst);
2207 }
0d9f234d 2208
0d9f234d 2209 /* Convert named operators to their proper types. */
9a0c6187 2210 if (result->val.node.node->flags & NODE_OPERATOR)
0d9f234d
NB
2211 {
2212 result->flags |= NAMED_OP;
9a0c6187 2213 result->type = (enum cpp_ttype) result->val.node.node->directive_index;
0d9f234d
NB
2214 }
2215 break;
2216
2217 case '\'':
2218 case '"':
6338b358 2219 lex_string (pfile, result, buffer->cur - 1);
0d9f234d 2220 break;
041c3194 2221
0d9f234d 2222 case '/':
1c6d33ef
NB
2223 /* A potential block or line comment. */
2224 comment_start = buffer->cur;
6f572ac2
NB
2225 c = *buffer->cur;
2226
1c6d33ef
NB
2227 if (c == '*')
2228 {
26aea073 2229 if (_cpp_skip_block_comment (pfile))
0527bc4e 2230 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
0d9f234d 2231 }
480709cc 2232 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
12f9df4e 2233 || cpp_in_system_header (pfile)))
0d9f234d 2234 {
bdb05a7b
NB
2235 /* Warn about comments only if pedantically GNUC89, and not
2236 in system headers. */
2237 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
a94c1199 2238 && ! buffer->warned_cplusplus_comments)
041c3194 2239 {
0527bc4e 2240 cpp_error (pfile, CPP_DL_PEDWARN,
56508306 2241 "C++ style comments are not allowed in ISO C90");
0527bc4e 2242 cpp_error (pfile, CPP_DL_PEDWARN,
ebef4e8c 2243 "(this will be reported only once per input file)");
1c6d33ef
NB
2244 buffer->warned_cplusplus_comments = 1;
2245 }
0d9f234d 2246
01ef6563 2247 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
87cf0651 2248 cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
1c6d33ef 2249 }
480709cc
NB
2250 else if (c == '=')
2251 {
6f572ac2 2252 buffer->cur++;
480709cc
NB
2253 result->type = CPP_DIV_EQ;
2254 break;
2255 }
2256 else
2257 {
480709cc
NB
2258 result->type = CPP_DIV;
2259 break;
2260 }
0d9f234d 2261
1c6d33ef
NB
2262 if (!pfile->state.save_comments)
2263 {
2264 result->flags |= PREV_WHITE;
5fddcffc 2265 goto update_tokens_line;
0d9f234d 2266 }
1c6d33ef
NB
2267
2268 /* Save the comment as a token in its own right. */
477cdac7 2269 save_comment (pfile, result, comment_start, c);
bdcbe496 2270 break;
0d9f234d
NB
2271
2272 case '<':
2273 if (pfile->state.angled_headers)
2274 {
6338b358 2275 lex_string (pfile, result, buffer->cur - 1);
4bb09c26
JM
2276 if (result->type != CPP_LESS)
2277 break;
0d9f234d 2278 }
45b966db 2279
6f572ac2
NB
2280 result->type = CPP_LESS;
2281 if (*buffer->cur == '=')
2282 buffer->cur++, result->type = CPP_LESS_EQ;
2283 else if (*buffer->cur == '<')
0d9f234d 2284 {
6f572ac2
NB
2285 buffer->cur++;
2286 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
0d9f234d 2287 }
6f572ac2 2288 else if (CPP_OPTION (pfile, digraphs))
480709cc 2289 {
6f572ac2
NB
2290 if (*buffer->cur == ':')
2291 {
1582c677
PC
2292 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2293 three characters are <:: and the subsequent character
2294 is neither : nor >, the < is treated as a preprocessor
2295 token by itself". */
2296 if (CPP_OPTION (pfile, cplusplus)
2297 && (CPP_OPTION (pfile, lang) == CLK_CXX11
2298 || CPP_OPTION (pfile, lang) == CLK_GNUCXX11)
2299 && buffer->cur[1] == ':'
2300 && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2301 break;
2302
6f572ac2
NB
2303 buffer->cur++;
2304 result->flags |= DIGRAPH;
2305 result->type = CPP_OPEN_SQUARE;
2306 }
2307 else if (*buffer->cur == '%')
2308 {
2309 buffer->cur++;
2310 result->flags |= DIGRAPH;
2311 result->type = CPP_OPEN_BRACE;
2312 }
480709cc 2313 }
0d9f234d
NB
2314 break;
2315
2316 case '>':
6f572ac2
NB
2317 result->type = CPP_GREATER;
2318 if (*buffer->cur == '=')
2319 buffer->cur++, result->type = CPP_GREATER_EQ;
2320 else if (*buffer->cur == '>')
0d9f234d 2321 {
6f572ac2
NB
2322 buffer->cur++;
2323 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2324 }
0d9f234d
NB
2325 break;
2326
cbcff6df 2327 case '%':
6f572ac2
NB
2328 result->type = CPP_MOD;
2329 if (*buffer->cur == '=')
2330 buffer->cur++, result->type = CPP_MOD_EQ;
2331 else if (CPP_OPTION (pfile, digraphs))
480709cc 2332 {
6f572ac2 2333 if (*buffer->cur == ':')
480709cc 2334 {
6f572ac2
NB
2335 buffer->cur++;
2336 result->flags |= DIGRAPH;
2337 result->type = CPP_HASH;
2338 if (*buffer->cur == '%' && buffer->cur[1] == ':')
9a0c6187 2339 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
6f572ac2
NB
2340 }
2341 else if (*buffer->cur == '>')
2342 {
2343 buffer->cur++;
2344 result->flags |= DIGRAPH;
2345 result->type = CPP_CLOSE_BRACE;
480709cc 2346 }
480709cc 2347 }
0d9f234d
NB
2348 break;
2349
cbcff6df 2350 case '.':
480709cc 2351 result->type = CPP_DOT;
6f572ac2 2352 if (ISDIGIT (*buffer->cur))
480709cc 2353 {
50668cf6 2354 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
480709cc 2355 result->type = CPP_NUMBER;
50668cf6
GK
2356 lex_number (pfile, &result->val.str, &nst);
2357 warn_about_normalization (pfile, result, &nst);
480709cc 2358 }
6f572ac2
NB
2359 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2360 buffer->cur += 2, result->type = CPP_ELLIPSIS;
2361 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2362 buffer->cur++, result->type = CPP_DOT_STAR;
0d9f234d 2363 break;
45b966db 2364
0d9f234d 2365 case '+':
6f572ac2
NB
2366 result->type = CPP_PLUS;
2367 if (*buffer->cur == '+')
2368 buffer->cur++, result->type = CPP_PLUS_PLUS;
2369 else if (*buffer->cur == '=')
2370 buffer->cur++, result->type = CPP_PLUS_EQ;
0d9f234d 2371 break;
04e3ec78 2372
0d9f234d 2373 case '-':
6f572ac2
NB
2374 result->type = CPP_MINUS;
2375 if (*buffer->cur == '>')
0d9f234d 2376 {
6f572ac2 2377 buffer->cur++;
480709cc 2378 result->type = CPP_DEREF;
6f572ac2
NB
2379 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2380 buffer->cur++, result->type = CPP_DEREF_STAR;
480709cc 2381 }
6f572ac2
NB
2382 else if (*buffer->cur == '-')
2383 buffer->cur++, result->type = CPP_MINUS_MINUS;
2384 else if (*buffer->cur == '=')
2385 buffer->cur++, result->type = CPP_MINUS_EQ;
0d9f234d 2386 break;
45b966db 2387
0d9f234d 2388 case '&':
6f572ac2
NB
2389 result->type = CPP_AND;
2390 if (*buffer->cur == '&')
2391 buffer->cur++, result->type = CPP_AND_AND;
2392 else if (*buffer->cur == '=')
2393 buffer->cur++, result->type = CPP_AND_EQ;
0d9f234d 2394 break;
df383483 2395
0d9f234d 2396 case '|':
6f572ac2
NB
2397 result->type = CPP_OR;
2398 if (*buffer->cur == '|')
2399 buffer->cur++, result->type = CPP_OR_OR;
2400 else if (*buffer->cur == '=')
2401 buffer->cur++, result->type = CPP_OR_EQ;
0d9f234d 2402 break;
45b966db 2403
0d9f234d 2404 case ':':
6f572ac2
NB
2405 result->type = CPP_COLON;
2406 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2407 buffer->cur++, result->type = CPP_SCOPE;
2408 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
0d9f234d 2409 {
6f572ac2 2410 buffer->cur++;
0d9f234d 2411 result->flags |= DIGRAPH;
480709cc
NB
2412 result->type = CPP_CLOSE_SQUARE;
2413 }
0d9f234d 2414 break;
45b966db 2415
480709cc
NB
2416 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2417 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2418 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2419 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
9a0c6187 2420 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
480709cc 2421
26aea073 2422 case '?': result->type = CPP_QUERY; break;
0d9f234d
NB
2423 case '~': result->type = CPP_COMPL; break;
2424 case ',': result->type = CPP_COMMA; break;
2425 case '(': result->type = CPP_OPEN_PAREN; break;
2426 case ')': result->type = CPP_CLOSE_PAREN; break;
2427 case '[': result->type = CPP_OPEN_SQUARE; break;
2428 case ']': result->type = CPP_CLOSE_SQUARE; break;
2429 case '{': result->type = CPP_OPEN_BRACE; break;
2430 case '}': result->type = CPP_CLOSE_BRACE; break;
2431 case ';': result->type = CPP_SEMICOLON; break;
2432
40f03658 2433 /* @ is a punctuator in Objective-C. */
cc937581 2434 case '@': result->type = CPP_ATSIGN; break;
0d9f234d 2435
0abc6a6a 2436 case '$':
1613e52b
NB
2437 case '\\':
2438 {
2439 const uchar *base = --buffer->cur;
50668cf6 2440 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
0abc6a6a 2441
50668cf6 2442 if (forms_identifier_p (pfile, true, &nst))
1613e52b
NB
2443 {
2444 result->type = CPP_NAME;
9a0c6187 2445 result->val.node.node = lex_identifier (pfile, base, true, &nst);
50668cf6 2446 warn_about_normalization (pfile, result, &nst);
1613e52b
NB
2447 break;
2448 }
2449 buffer->cur++;
1067694a 2450 }
1613e52b 2451
1067694a 2452 default:
6338b358
NB
2453 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2454 break;
0d9f234d 2455 }
bdcbe496
NB
2456
2457 return result;
0d9f234d
NB
2458}
2459
59325650
NB
2460/* An upper bound on the number of bytes needed to spell TOKEN.
2461 Does not include preceding whitespace. */
93c80368 2462unsigned int
6cf87ca4 2463cpp_token_len (const cpp_token *token)
0d9f234d 2464{
93c80368 2465 unsigned int len;
6d2c2047 2466
93c80368 2467 switch (TOKEN_SPELL (token))
041c3194 2468 {
cc955282 2469 default: len = 6; break;
6338b358 2470 case SPELL_LITERAL: len = token->val.str.len; break;
9a0c6187 2471 case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break;
041c3194 2472 }
59325650
NB
2473
2474 return len;
6d2c2047
ZW
2475}
2476
47e20491
GK
2477/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2478 Return the number of bytes read out of NAME. (There are always
2479 10 bytes written to BUFFER.) */
2480
2481static size_t
2482utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2483{
2484 int j;
2485 int ucn_len = 0;
2486 int ucn_len_c;
2487 unsigned t;
2488 unsigned long utf32;
2489
2490 /* Compute the length of the UTF-8 sequence. */
2491 for (t = *name; t & 0x80; t <<= 1)
2492 ucn_len++;
2493
2494 utf32 = *name & (0x7F >> ucn_len);
2495 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2496 {
2497 utf32 = (utf32 << 6) | (*++name & 0x3F);
2498
2499 /* Ill-formed UTF-8. */
2500 if ((*name & ~0x3F) != 0x80)
2501 abort ();
2502 }
2503
2504 *buffer++ = '\\';
2505 *buffer++ = 'U';
2506 for (j = 7; j >= 0; j--)
2507 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2508 return ucn_len;
2509}
2510
cfc93532
MLI
2511/* Given a token TYPE corresponding to a digraph, return a pointer to
2512 the spelling of the digraph. */
2513static const unsigned char *
2514cpp_digraph2name (enum cpp_ttype type)
2515{
2516 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2517}
47e20491 2518
041c3194 2519/* Write the spelling of a token TOKEN to BUFFER. The buffer must
cf00a885 2520 already contain the enough space to hold the token's spelling.
6cf87ca4 2521 Returns a pointer to the character after the last character written.
47e20491
GK
2522 FORSTRING is true if this is to be the spelling after translation
2523 phase 1 (this is different for UCNs).
6cf87ca4 2524 FIXME: Would be nice if we didn't need the PFILE argument. */
93c80368 2525unsigned char *
6cf87ca4 2526cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
47e20491 2527 unsigned char *buffer, bool forstring)
041c3194 2528{
96be6998 2529 switch (TOKEN_SPELL (token))
041c3194
ZW
2530 {
2531 case SPELL_OPERATOR:
2532 {
2533 const unsigned char *spelling;
2534 unsigned char c;
d6d5f795 2535
041c3194 2536 if (token->flags & DIGRAPH)
cfc93532 2537 spelling = cpp_digraph2name (token->type);
92936ecf
ZW
2538 else if (token->flags & NAMED_OP)
2539 goto spell_ident;
041c3194 2540 else
96be6998 2541 spelling = TOKEN_NAME (token);
df383483 2542
041c3194
ZW
2543 while ((c = *spelling++) != '\0')
2544 *buffer++ = c;
2545 }
2546 break;
d6d5f795 2547
47ad4138 2548 spell_ident:
041c3194 2549 case SPELL_IDENT:
47e20491
GK
2550 if (forstring)
2551 {
9a0c6187
JM
2552 memcpy (buffer, NODE_NAME (token->val.node.node),
2553 NODE_LEN (token->val.node.node));
2554 buffer += NODE_LEN (token->val.node.node);
47e20491
GK
2555 }
2556 else
2557 {
2558 size_t i;
9a0c6187 2559 const unsigned char * name = NODE_NAME (token->val.node.node);
47e20491 2560
9a0c6187 2561 for (i = 0; i < NODE_LEN (token->val.node.node); i++)
47e20491
GK
2562 if (name[i] & ~0x7F)
2563 {
2564 i += utf8_to_ucn (buffer, name + i) - 1;
2565 buffer += 10;
2566 }
2567 else
9a0c6187 2568 *buffer++ = NODE_NAME (token->val.node.node)[i];
47e20491 2569 }
041c3194 2570 break;
d6d5f795 2571
6338b358 2572 case SPELL_LITERAL:
47ad4138
ZW
2573 memcpy (buffer, token->val.str.text, token->val.str.len);
2574 buffer += token->val.str.len;
2575 break;
2576
041c3194 2577 case SPELL_NONE:
0527bc4e
JDA
2578 cpp_error (pfile, CPP_DL_ICE,
2579 "unspellable token %s", TOKEN_NAME (token));
041c3194
ZW
2580 break;
2581 }
d6d5f795 2582
041c3194
ZW
2583 return buffer;
2584}
d6d5f795 2585
5d8ebbd8
NB
2586/* Returns TOKEN spelt as a null-terminated string. The string is
2587 freed when the reader is destroyed. Useful for diagnostics. */
93c80368 2588unsigned char *
6cf87ca4 2589cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
59325650
NB
2590{
2591 unsigned int len = cpp_token_len (token) + 1;
ece54d54 2592 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
c5a04734 2593
47e20491 2594 end = cpp_spell_token (pfile, token, start, false);
93c80368 2595 end[0] = '\0';
c5a04734 2596
93c80368
NB
2597 return start;
2598}
c5a04734 2599
cfc93532
MLI
2600/* Returns a pointer to a string which spells the token defined by
2601 TYPE and FLAGS. Used by C front ends, which really should move to
2602 using cpp_token_as_text. */
93c80368 2603const char *
cfc93532 2604cpp_type2name (enum cpp_ttype type, unsigned char flags)
93c80368 2605{
cfc93532
MLI
2606 if (flags & DIGRAPH)
2607 return (const char *) cpp_digraph2name (type);
2608 else if (flags & NAMED_OP)
2609 return cpp_named_operator2name (type);
2610
93c80368
NB
2611 return (const char *) token_spellings[type].name;
2612}
c5a04734 2613
4ed5bcfb
NB
2614/* Writes the spelling of token to FP, without any preceding space.
2615 Separated from cpp_spell_token for efficiency - to avoid stdio
2616 double-buffering. */
93c80368 2617void
6cf87ca4 2618cpp_output_token (const cpp_token *token, FILE *fp)
93c80368 2619{
93c80368 2620 switch (TOKEN_SPELL (token))
c5a04734 2621 {
93c80368
NB
2622 case SPELL_OPERATOR:
2623 {
2624 const unsigned char *spelling;
3b681e9d 2625 int c;
c5a04734 2626
93c80368 2627 if (token->flags & DIGRAPH)
cfc93532 2628 spelling = cpp_digraph2name (token->type);
93c80368
NB
2629 else if (token->flags & NAMED_OP)
2630 goto spell_ident;
2631 else
2632 spelling = TOKEN_NAME (token);
041c3194 2633
3b681e9d
ZW
2634 c = *spelling;
2635 do
2636 putc (c, fp);
2637 while ((c = *++spelling) != '\0');
93c80368
NB
2638 }
2639 break;
041c3194 2640
93c80368
NB
2641 spell_ident:
2642 case SPELL_IDENT:
47e20491
GK
2643 {
2644 size_t i;
9a0c6187 2645 const unsigned char * name = NODE_NAME (token->val.node.node);
47e20491 2646
9a0c6187 2647 for (i = 0; i < NODE_LEN (token->val.node.node); i++)
47e20491
GK
2648 if (name[i] & ~0x7F)
2649 {
2650 unsigned char buffer[10];
2651 i += utf8_to_ucn (buffer, name + i) - 1;
2652 fwrite (buffer, 1, 10, fp);
2653 }
2654 else
9a0c6187 2655 fputc (NODE_NAME (token->val.node.node)[i], fp);
47e20491
GK
2656 }
2657 break;
041c3194 2658
6338b358 2659 case SPELL_LITERAL:
47ad4138
ZW
2660 fwrite (token->val.str.text, 1, token->val.str.len, fp);
2661 break;
2662
93c80368
NB
2663 case SPELL_NONE:
2664 /* An error, most probably. */
2665 break;
041c3194 2666 }
c5a04734
ZW
2667}
2668
93c80368
NB
2669/* Compare two tokens. */
2670int
6cf87ca4 2671_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
c5a04734 2672{
93c80368
NB
2673 if (a->type == b->type && a->flags == b->flags)
2674 switch (TOKEN_SPELL (a))
2675 {
2676 default: /* Keep compiler happy. */
2677 case SPELL_OPERATOR:
9a0c6187 2678 /* token_no is used to track where multiple consecutive ##
aa508502 2679 tokens were originally located. */
9a0c6187 2680 return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
93c80368 2681 case SPELL_NONE:
9a0c6187
JM
2682 return (a->type != CPP_MACRO_ARG
2683 || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
93c80368 2684 case SPELL_IDENT:
9a0c6187 2685 return a->val.node.node == b->val.node.node;
6338b358 2686 case SPELL_LITERAL:
93c80368
NB
2687 return (a->val.str.len == b->val.str.len
2688 && !memcmp (a->val.str.text, b->val.str.text,
2689 a->val.str.len));
2690 }
c5a04734 2691
041c3194
ZW
2692 return 0;
2693}
2694
93c80368
NB
2695/* Returns nonzero if a space should be inserted to avoid an
2696 accidental token paste for output. For simplicity, it is
2697 conservative, and occasionally advises a space where one is not
2698 needed, e.g. "." and ".2". */
93c80368 2699int
6cf87ca4
ZW
2700cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2701 const cpp_token *token2)
c5a04734 2702{
93c80368
NB
2703 enum cpp_ttype a = token1->type, b = token2->type;
2704 cppchar_t c;
c5a04734 2705
93c80368
NB
2706 if (token1->flags & NAMED_OP)
2707 a = CPP_NAME;
2708 if (token2->flags & NAMED_OP)
2709 b = CPP_NAME;
c5a04734 2710
93c80368
NB
2711 c = EOF;
2712 if (token2->flags & DIGRAPH)
37b8524c 2713 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
93c80368
NB
2714 else if (token_spellings[b].category == SPELL_OPERATOR)
2715 c = token_spellings[b].name[0];
c5a04734 2716
93c80368 2717 /* Quickly get everything that can paste with an '='. */
37b8524c 2718 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
93c80368 2719 return 1;
c5a04734 2720
93c80368 2721 switch (a)
c5a04734 2722 {
b52dbbf8
SE
2723 case CPP_GREATER: return c == '>';
2724 case CPP_LESS: return c == '<' || c == '%' || c == ':';
93c80368
NB
2725 case CPP_PLUS: return c == '+';
2726 case CPP_MINUS: return c == '-' || c == '>';
2727 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
2728 case CPP_MOD: return c == ':' || c == '>';
2729 case CPP_AND: return c == '&';
2730 case CPP_OR: return c == '|';
2731 case CPP_COLON: return c == ':' || c == '>';
2732 case CPP_DEREF: return c == '*';
26ec42ee 2733 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
93c80368
NB
2734 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
2735 case CPP_NAME: return ((b == CPP_NUMBER
2736 && name_p (pfile, &token2->val.str))
2737 || b == CPP_NAME
2738 || b == CPP_CHAR || b == CPP_STRING); /* L */
2739 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
2740 || c == '.' || c == '+' || c == '-');
1613e52b 2741 /* UCNs */
1067694a
NB
2742 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
2743 && b == CPP_NAME)
1613e52b 2744 || (CPP_OPTION (pfile, objc)
1067694a 2745 && token1->val.str.text[0] == '@'
1613e52b 2746 && (b == CPP_NAME || b == CPP_STRING)));
93c80368 2747 default: break;
c5a04734 2748 }
c5a04734 2749
417f3e3a 2750 return 0;
c5a04734
ZW
2751}
2752
93c80368 2753/* Output all the remaining tokens on the current line, and a newline
4ed5bcfb
NB
2754 character, to FP. Leading whitespace is removed. If there are
2755 macros, special token padding is not performed. */
c5a04734 2756void
6cf87ca4 2757cpp_output_line (cpp_reader *pfile, FILE *fp)
c5a04734 2758{
4ed5bcfb 2759 const cpp_token *token;
96be6998 2760
4ed5bcfb
NB
2761 token = cpp_get_token (pfile);
2762 while (token->type != CPP_EOF)
96be6998 2763 {
4ed5bcfb
NB
2764 cpp_output_token (token, fp);
2765 token = cpp_get_token (pfile);
2766 if (token->flags & PREV_WHITE)
2767 putc (' ', fp);
96be6998
ZW
2768 }
2769
93c80368 2770 putc ('\n', fp);
041c3194 2771}
c5a04734 2772
5d6342eb
TT
2773/* Return a string representation of all the remaining tokens on the
2774 current line. The result is allocated using xmalloc and must be
2775 freed by the caller. */
2776unsigned char *
2777cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
2778{
2779 const cpp_token *token;
2780 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
2781 unsigned int alloced = 120 + out;
2782 unsigned char *result = (unsigned char *) xmalloc (alloced);
2783
2784 /* If DIR_NAME is empty, there are no initial contents. */
2785 if (dir_name)
2786 {
2787 sprintf ((char *) result, "#%s ", dir_name);
2788 out += 2;
2789 }
2790
2791 token = cpp_get_token (pfile);
2792 while (token->type != CPP_EOF)
2793 {
2794 unsigned char *last;
2795 /* Include room for a possible space and the terminating nul. */
2796 unsigned int len = cpp_token_len (token) + 2;
2797
2798 if (out + len > alloced)
2799 {
2800 alloced *= 2;
2801 if (out + len > alloced)
2802 alloced = out + len;
2803 result = (unsigned char *) xrealloc (result, alloced);
2804 }
2805
2806 last = cpp_spell_token (pfile, token, &result[out], 0);
2807 out = last - result;
2808
2809 token = cpp_get_token (pfile);
2810 if (token->flags & PREV_WHITE)
2811 result[out++] = ' ';
2812 }
2813
2814 result[out] = '\0';
2815 return result;
2816}
2817
1e013d2e
NB
2818/* Memory buffers. Changing these three constants can have a dramatic
2819 effect on performance. The values here are reasonable defaults,
2820 but might be tuned. If you adjust them, be sure to test across a
2821 range of uses of cpplib, including heavy nested function-like macro
2822 expansion. Also check the change in peak memory usage (NJAMD is a
2823 good tool for this). */
2824#define MIN_BUFF_SIZE 8000
87062813 2825#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1e013d2e
NB
2826#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2827 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
417f3e3a 2828
87062813
NB
2829#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2830 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2831#endif
2832
c9e7a609
NB
2833/* Create a new allocation buffer. Place the control block at the end
2834 of the buffer, so that buffer overflows will cause immediate chaos. */
b8af0ca5 2835static _cpp_buff *
6cf87ca4 2836new_buff (size_t len)
b8af0ca5
NB
2837{
2838 _cpp_buff *result;
ece54d54 2839 unsigned char *base;
b8af0ca5 2840
1e013d2e
NB
2841 if (len < MIN_BUFF_SIZE)
2842 len = MIN_BUFF_SIZE;
c70f6ed3 2843 len = CPP_ALIGN (len);
b8af0ca5 2844
c3f829c1 2845 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
b8af0ca5
NB
2846 result = (_cpp_buff *) (base + len);
2847 result->base = base;
2848 result->cur = base;
2849 result->limit = base + len;
2850 result->next = NULL;
2851 return result;
2852}
2853
2854/* Place a chain of unwanted allocation buffers on the free list. */
2855void
6cf87ca4 2856_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
b8af0ca5
NB
2857{
2858 _cpp_buff *end = buff;
2859
2860 while (end->next)
2861 end = end->next;
2862 end->next = pfile->free_buffs;
2863 pfile->free_buffs = buff;
2864}
2865
2866/* Return a free buffer of size at least MIN_SIZE. */
2867_cpp_buff *
6cf87ca4 2868_cpp_get_buff (cpp_reader *pfile, size_t min_size)
b8af0ca5
NB
2869{
2870 _cpp_buff *result, **p;
2871
2872 for (p = &pfile->free_buffs;; p = &(*p)->next)
2873 {
6142088c 2874 size_t size;
1e013d2e
NB
2875
2876 if (*p == NULL)
b8af0ca5 2877 return new_buff (min_size);
1e013d2e
NB
2878 result = *p;
2879 size = result->limit - result->base;
2880 /* Return a buffer that's big enough, but don't waste one that's
2881 way too big. */
34f5271d 2882 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
b8af0ca5
NB
2883 break;
2884 }
2885
2886 *p = result->next;
2887 result->next = NULL;
2888 result->cur = result->base;
2889 return result;
2890}
2891
4fe9b91c 2892/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2893 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2894 the excess bytes to the new buffer. Chains the new buffer after
2895 BUFF, and returns the new buffer. */
b8af0ca5 2896_cpp_buff *
6cf87ca4 2897_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
b8af0ca5 2898{
6142088c 2899 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
8c3b2693 2900 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
b8af0ca5 2901
8c3b2693
NB
2902 buff->next = new_buff;
2903 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2904 return new_buff;
2905}
2906
4fe9b91c 2907/* Creates a new buffer with enough space to hold the uncommitted
8c3b2693
NB
2908 remaining bytes of the buffer pointed to by BUFF, and at least
2909 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2910 Chains the new buffer before the buffer pointed to by BUFF, and
2911 updates the pointer to point to the new buffer. */
2912void
6cf87ca4 2913_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
8c3b2693
NB
2914{
2915 _cpp_buff *new_buff, *old_buff = *pbuff;
2916 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2917
2918 new_buff = _cpp_get_buff (pfile, size);
2919 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2920 new_buff->next = old_buff;
2921 *pbuff = new_buff;
b8af0ca5
NB
2922}
2923
2924/* Free a chain of buffers starting at BUFF. */
2925void
5671bf27 2926_cpp_free_buff (_cpp_buff *buff)
b8af0ca5
NB
2927{
2928 _cpp_buff *next;
2929
2930 for (; buff; buff = next)
2931 {
2932 next = buff->next;
2933 free (buff->base);
2934 }
2935}
417f3e3a 2936
ece54d54
NB
2937/* Allocate permanent, unaligned storage of length LEN. */
2938unsigned char *
6cf87ca4 2939_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
ece54d54
NB
2940{
2941 _cpp_buff *buff = pfile->u_buff;
2942 unsigned char *result = buff->cur;
2943
2944 if (len > (size_t) (buff->limit - result))
2945 {
2946 buff = _cpp_get_buff (pfile, len);
2947 buff->next = pfile->u_buff;
2948 pfile->u_buff = buff;
2949 result = buff->cur;
2950 }
2951
2952 buff->cur = result + len;
2953 return result;
2954}
2955
87062813
NB
2956/* Allocate permanent, unaligned storage of length LEN from a_buff.
2957 That buffer is used for growing allocations when saving macro
2958 replacement lists in a #define, and when parsing an answer to an
2959 assertion in #assert, #unassert or #if (and therefore possibly
2960 whilst expanding macros). It therefore must not be used by any
2961 code that they might call: specifically the lexer and the guts of
2962 the macro expander.
2963
2964 All existing other uses clearly fit this restriction: storing
2965 registered pragmas during initialization. */
93c80368 2966unsigned char *
6cf87ca4 2967_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3fef5b2b 2968{
8c3b2693
NB
2969 _cpp_buff *buff = pfile->a_buff;
2970 unsigned char *result = buff->cur;
3fef5b2b 2971
8c3b2693 2972 if (len > (size_t) (buff->limit - result))
3fef5b2b 2973 {
8c3b2693
NB
2974 buff = _cpp_get_buff (pfile, len);
2975 buff->next = pfile->a_buff;
2976 pfile->a_buff = buff;
2977 result = buff->cur;
3fef5b2b 2978 }
041c3194 2979
8c3b2693 2980 buff->cur = result + len;
93c80368 2981 return result;
041c3194 2982}
d8044160
GK
2983
2984/* Say which field of TOK is in use. */
2985
2986enum cpp_token_fld_kind
2987cpp_token_val_index (cpp_token *tok)
2988{
2989 switch (TOKEN_SPELL (tok))
2990 {
2991 case SPELL_IDENT:
2992 return CPP_TOKEN_FLD_NODE;
2993 case SPELL_LITERAL:
2994 return CPP_TOKEN_FLD_STR;
aa508502
JM
2995 case SPELL_OPERATOR:
2996 if (tok->type == CPP_PASTE)
9a0c6187 2997 return CPP_TOKEN_FLD_TOKEN_NO;
aa508502
JM
2998 else
2999 return CPP_TOKEN_FLD_NONE;
d8044160
GK
3000 case SPELL_NONE:
3001 if (tok->type == CPP_MACRO_ARG)
3002 return CPP_TOKEN_FLD_ARG_NO;
3003 else if (tok->type == CPP_PADDING)
3004 return CPP_TOKEN_FLD_SOURCE;
21b11495 3005 else if (tok->type == CPP_PRAGMA)
bc4071dd 3006 return CPP_TOKEN_FLD_PRAGMA;
d8044160
GK
3007 /* else fall through */
3008 default:
3009 return CPP_TOKEN_FLD_NONE;
3010 }
3011}
e3dfef44
GC
3012
3013/* All tokens lexed in R after calling this function will be forced to have
3014 their source_location the same as the location referenced by P, until
3015 cpp_stop_forcing_token_locations is called for R. */
3016
3017void
3018cpp_force_token_locations (cpp_reader *r, source_location *p)
3019{
3020 r->forced_token_location_p = p;
3021}
3022
3023/* Go back to assigning locations naturally for lexed tokens. */
3024
3025void
3026cpp_stop_forcing_token_locations (cpp_reader *r)
3027{
3028 r->forced_token_location_p = NULL;
3029}