]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/input.c
tree-pass.h (make_pass_materialize_all_clones): Declare.
[thirdparty/gcc.git] / gcc / input.c
CommitLineData
447924ef 1/* Data and functions related to line maps and input files.
818ab71a 2 Copyright (C) 2004-2016 Free Software Foundation, Inc.
447924ef
JM
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "intl.h"
ba4ad400 24#include "diagnostic-core.h"
d9b950dd 25#include "selftest.h"
741d3be5 26#include "cpplib.h"
7ecc3eb9 27
a7d79e5c
DM
28#ifndef HAVE_ICONV
29#define HAVE_ICONV 0
30#endif
31
7ecc3eb9
DS
32/* This is a cache used by get_next_line to store the content of a
33 file to be searched for file lines. */
34struct fcache
35{
36 /* These are information used to store a line boundary. */
37 struct line_info
38 {
39 /* The line number. It starts from 1. */
40 size_t line_num;
41
42 /* The position (byte count) of the beginning of the line,
43 relative to the file data pointer. This starts at zero. */
44 size_t start_pos;
45
46 /* The position (byte count) of the last byte of the line. This
47 normally points to the '\n' character, or to one byte after the
48 last byte of the file, if the file doesn't contain a '\n'
49 character. */
50 size_t end_pos;
51
52 line_info (size_t l, size_t s, size_t e)
53 : line_num (l), start_pos (s), end_pos (e)
54 {}
55
56 line_info ()
57 :line_num (0), start_pos (0), end_pos (0)
58 {}
59 };
60
61 /* The number of time this file has been accessed. This is used
62 to designate which file cache to evict from the cache
63 array. */
64 unsigned use_count;
65
66 const char *file_path;
67
68 FILE *fp;
69
70 /* This points to the content of the file that we've read so
71 far. */
72 char *data;
73
74 /* The size of the DATA array above.*/
75 size_t size;
76
77 /* The number of bytes read from the underlying file so far. This
78 must be less (or equal) than SIZE above. */
79 size_t nb_read;
80
81 /* The index of the beginning of the current line. */
82 size_t line_start_idx;
83
84 /* The number of the previous line read. This starts at 1. Zero
85 means we've read no line so far. */
86 size_t line_num;
87
88 /* This is the total number of lines of the current file. At the
89 moment, we try to get this information from the line map
90 subsystem. Note that this is just a hint. When using the C++
91 front-end, this hint is correct because the input file is then
92 completely tokenized before parsing starts; so the line map knows
93 the number of lines before compilation really starts. For e.g,
94 the C front-end, it can happen that we start emitting diagnostics
95 before the line map has seen the end of the file. */
96 size_t total_lines;
97
98 /* This is a record of the beginning and end of the lines we've seen
99 while reading the file. This is useful to avoid walking the data
100 from the beginning when we are asked to read a line that is
101 before LINE_START_IDX above. Note that the maximum size of this
102 record is fcache_line_record_size, so that the memory consumption
103 doesn't explode. We thus scale total_lines down to
104 fcache_line_record_size. */
105 vec<line_info, va_heap> line_record;
106
107 fcache ();
108 ~fcache ();
109};
447924ef
JM
110
111/* Current position in real source file. */
112
3edf64aa 113location_t input_location = UNKNOWN_LOCATION;
447924ef
JM
114
115struct line_maps *line_table;
116
7ecc3eb9
DS
117static fcache *fcache_tab;
118static const size_t fcache_tab_size = 16;
119static const size_t fcache_buffer_size = 4 * 1024;
120static const size_t fcache_line_record_size = 100;
121
84756fd4
DS
122/* Expand the source location LOC into a human readable location. If
123 LOC resolves to a builtin location, the file name of the readable
7eb918cc
DS
124 location is set to the string "<built-in>". If EXPANSION_POINT_P is
125 TRUE and LOC is virtual, then it is resolved to the expansion
126 point of the involved macro. Otherwise, it is resolved to the
c4ca1a09
DS
127 spelling location of the token.
128
129 When resolving to the spelling location of the token, if the
130 resulting location is for a built-in location (that is, it has no
131 associated line/column) in the context of a macro expansion, the
132 returned location is the first one (while unwinding the macro
133 location towards its expansion point) that is in real source
134 code. */
7eb918cc
DS
135
136static expanded_location
137expand_location_1 (source_location loc,
138 bool expansion_point_p)
447924ef
JM
139{
140 expanded_location xloc;
0e50b624 141 const line_map_ordinary *map;
c4ca1a09 142 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
5368224f
DC
143 tree block = NULL;
144
145 if (IS_ADHOC_LOC (loc))
146 {
147 block = LOCATION_BLOCK (loc);
148 loc = LOCATION_LOCUS (loc);
149 }
c4ca1a09
DS
150
151 memset (&xloc, 0, sizeof (xloc));
84756fd4 152
c4ca1a09
DS
153 if (loc >= RESERVED_LOCATION_COUNT)
154 {
155 if (!expansion_point_p)
156 {
157 /* We want to resolve LOC to its spelling location.
158
159 But if that spelling location is a reserved location that
160 appears in the context of a macro expansion (like for a
161 location for a built-in token), let's consider the first
162 location (toward the expansion point) that is not reserved;
163 that is, the first location that is in real source code. */
164 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
0e50b624 165 loc, NULL);
c4ca1a09
DS
166 lrk = LRK_SPELLING_LOCATION;
167 }
168 loc = linemap_resolve_location (line_table, loc,
169 lrk, &map);
170 xloc = linemap_expand_location (line_table, map, loc);
171 }
84756fd4 172
5368224f 173 xloc.data = block;
447924ef 174 if (loc <= BUILTINS_LOCATION)
84756fd4
DS
175 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
176
447924ef
JM
177 return xloc;
178}
64a1a422 179
7ecc3eb9
DS
180/* Initialize the set of cache used for files accessed by caret
181 diagnostic. */
182
183static void
184diagnostic_file_cache_init (void)
185{
186 if (fcache_tab == NULL)
187 fcache_tab = new fcache[fcache_tab_size];
188}
189
592f32fa 190/* Free the resources used by the set of cache used for files accessed
7ecc3eb9
DS
191 by caret diagnostic. */
192
193void
194diagnostic_file_cache_fini (void)
195{
196 if (fcache_tab)
197 {
198 delete [] (fcache_tab);
199 fcache_tab = NULL;
200 }
201}
202
203/* Return the total lines number that have been read so far by the
204 line map (in the preprocessor) so far. For languages like C++ that
205 entirely preprocess the input file before starting to parse, this
206 equals the actual number of lines of the file. */
207
208static size_t
209total_lines_num (const char *file_path)
210{
211 size_t r = 0;
212 source_location l = 0;
213 if (linemap_get_file_highest_location (line_table, file_path, &l))
214 {
215 gcc_assert (l >= RESERVED_LOCATION_COUNT);
216 expanded_location xloc = expand_location (l);
217 r = xloc.line;
218 }
219 return r;
220}
221
222/* Lookup the cache used for the content of a given file accessed by
223 caret diagnostic. Return the found cached file, or NULL if no
224 cached file was found. */
225
226static fcache*
227lookup_file_in_cache_tab (const char *file_path)
228{
229 if (file_path == NULL)
230 return NULL;
231
232 diagnostic_file_cache_init ();
233
234 /* This will contain the found cached file. */
235 fcache *r = NULL;
236 for (unsigned i = 0; i < fcache_tab_size; ++i)
237 {
238 fcache *c = &fcache_tab[i];
239 if (c->file_path && !strcmp (c->file_path, file_path))
240 {
241 ++c->use_count;
242 r = c;
243 }
244 }
245
246 if (r)
247 ++r->use_count;
248
249 return r;
250}
251
252/* Return the file cache that has been less used, recently, or the
253 first empty one. If HIGHEST_USE_COUNT is non-null,
254 *HIGHEST_USE_COUNT is set to the highest use count of the entries
255 in the cache table. */
256
257static fcache*
258evicted_cache_tab_entry (unsigned *highest_use_count)
259{
260 diagnostic_file_cache_init ();
261
262 fcache *to_evict = &fcache_tab[0];
263 unsigned huc = to_evict->use_count;
264 for (unsigned i = 1; i < fcache_tab_size; ++i)
265 {
266 fcache *c = &fcache_tab[i];
267 bool c_is_empty = (c->file_path == NULL);
268
269 if (c->use_count < to_evict->use_count
270 || (to_evict->file_path && c_is_empty))
271 /* We evict C because it's either an entry with a lower use
272 count or one that is empty. */
273 to_evict = c;
274
275 if (huc < c->use_count)
276 huc = c->use_count;
277
278 if (c_is_empty)
279 /* We've reached the end of the cache; subsequent elements are
280 all empty. */
281 break;
282 }
283
284 if (highest_use_count)
285 *highest_use_count = huc;
286
287 return to_evict;
288}
289
290/* Create the cache used for the content of a given file to be
291 accessed by caret diagnostic. This cache is added to an array of
292 cache and can be retrieved by lookup_file_in_cache_tab. This
293 function returns the created cache. Note that only the last
294 fcache_tab_size files are cached. */
295
296static fcache*
297add_file_to_cache_tab (const char *file_path)
298{
299
300 FILE *fp = fopen (file_path, "r");
317363b4
DS
301 if (fp == NULL)
302 return NULL;
7ecc3eb9
DS
303
304 unsigned highest_use_count = 0;
305 fcache *r = evicted_cache_tab_entry (&highest_use_count);
306 r->file_path = file_path;
307 if (r->fp)
308 fclose (r->fp);
309 r->fp = fp;
310 r->nb_read = 0;
311 r->line_start_idx = 0;
312 r->line_num = 0;
313 r->line_record.truncate (0);
314 /* Ensure that this cache entry doesn't get evicted next time
315 add_file_to_cache_tab is called. */
316 r->use_count = ++highest_use_count;
317 r->total_lines = total_lines_num (file_path);
318
319 return r;
320}
321
322/* Lookup the cache used for the content of a given file accessed by
323 caret diagnostic. If no cached file was found, create a new cache
324 for this file, add it to the array of cached file and return
325 it. */
326
327static fcache*
328lookup_or_add_file_to_cache_tab (const char *file_path)
329{
330 fcache *r = lookup_file_in_cache_tab (file_path);
331 if (r == NULL)
332 r = add_file_to_cache_tab (file_path);
333 return r;
334}
335
336/* Default constructor for a cache of file used by caret
337 diagnostic. */
338
339fcache::fcache ()
340: use_count (0), file_path (NULL), fp (NULL), data (0),
341 size (0), nb_read (0), line_start_idx (0), line_num (0),
342 total_lines (0)
343{
344 line_record.create (0);
345}
346
347/* Destructor for a cache of file used by caret diagnostic. */
348
349fcache::~fcache ()
350{
351 if (fp)
352 {
353 fclose (fp);
354 fp = NULL;
355 }
356 if (data)
357 {
358 XDELETEVEC (data);
359 data = 0;
360 }
361 line_record.release ();
362}
363
364/* Returns TRUE iff the cache would need to be filled with data coming
365 from the file. That is, either the cache is empty or full or the
366 current line is empty. Note that if the cache is full, it would
367 need to be extended and filled again. */
368
369static bool
370needs_read (fcache *c)
371{
372 return (c->nb_read == 0
373 || c->nb_read == c->size
374 || (c->line_start_idx >= c->nb_read - 1));
375}
376
377/* Return TRUE iff the cache is full and thus needs to be
378 extended. */
379
380static bool
381needs_grow (fcache *c)
382{
383 return c->nb_read == c->size;
384}
385
386/* Grow the cache if it needs to be extended. */
387
388static void
389maybe_grow (fcache *c)
9fec0042 390{
7ecc3eb9
DS
391 if (!needs_grow (c))
392 return;
393
394 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
395 c->data = XRESIZEVEC (char, c->data, size + 1);
396 c->size = size;
397}
9fec0042 398
7ecc3eb9
DS
399/* Read more data into the cache. Extends the cache if need be.
400 Returns TRUE iff new data could be read. */
401
402static bool
403read_data (fcache *c)
404{
405 if (feof (c->fp) || ferror (c->fp))
406 return false;
407
408 maybe_grow (c);
409
410 char * from = c->data + c->nb_read;
411 size_t to_read = c->size - c->nb_read;
412 size_t nb_read = fread (from, 1, to_read, c->fp);
413
414 if (ferror (c->fp))
415 return false;
416
417 c->nb_read += nb_read;
418 return !!nb_read;
419}
420
421/* Read new data iff the cache needs to be filled with more data
422 coming from the file FP. Return TRUE iff the cache was filled with
423 mode data. */
424
425static bool
426maybe_read_data (fcache *c)
427{
428 if (!needs_read (c))
429 return false;
430 return read_data (c);
431}
432
433/* Read a new line from file FP, using C as a cache for the data
434 coming from the file. Upon successful completion, *LINE is set to
435 the beginning of the line found. Space for that line has been
436 allocated in the cache thus *LINE has the same life time as C.
437 *LINE_LEN is set to the length of the line. Note that the line
438 does not contain any terminal delimiter. This function returns
439 true if some data was read or process from the cache, false
440 otherwise. Note that subsequent calls to get_next_line return the
441 next lines of the file and might overwrite the content of
442 *LINE. */
443
444static bool
445get_next_line (fcache *c, char **line, ssize_t *line_len)
446{
447 /* Fill the cache with data to process. */
448 maybe_read_data (c);
449
450 size_t remaining_size = c->nb_read - c->line_start_idx;
451 if (remaining_size == 0)
452 /* There is no more data to process. */
453 return false;
454
455 char *line_start = c->data + c->line_start_idx;
456
457 char *next_line_start = NULL;
458 size_t len = 0;
459 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
460 if (line_end == NULL)
9fec0042 461 {
7ecc3eb9
DS
462 /* We haven't found the end-of-line delimiter in the cache.
463 Fill the cache with more data from the file and look for the
464 '\n'. */
465 while (maybe_read_data (c))
466 {
467 line_start = c->data + c->line_start_idx;
468 remaining_size = c->nb_read - c->line_start_idx;
469 line_end = (char *) memchr (line_start, '\n', remaining_size);
470 if (line_end != NULL)
471 {
472 next_line_start = line_end + 1;
473 break;
474 }
475 }
476 if (line_end == NULL)
477 /* We've loadded all the file into the cache and still no
478 '\n'. Let's say the line ends up at one byte passed the
479 end of the file. This is to stay consistent with the case
480 of when the line ends up with a '\n' and line_end points to
481 that terminal '\n'. That consistency is useful below in
482 the len calculation. */
483 line_end = c->data + c->nb_read ;
9fec0042 484 }
7ecc3eb9
DS
485 else
486 next_line_start = line_end + 1;
487
488 if (ferror (c->fp))
489 return -1;
490
491 /* At this point, we've found the end of the of line. It either
492 points to the '\n' or to one byte after the last byte of the
493 file. */
494 gcc_assert (line_end != NULL);
9fec0042 495
7ecc3eb9
DS
496 len = line_end - line_start;
497
498 if (c->line_start_idx < c->nb_read)
499 *line = line_start;
500
501 ++c->line_num;
502
503 /* Before we update our line record, make sure the hint about the
504 total number of lines of the file is correct. If it's not, then
505 we give up recording line boundaries from now on. */
506 bool update_line_record = true;
507 if (c->line_num > c->total_lines)
508 update_line_record = false;
509
510 /* Now update our line record so that re-reading lines from the
511 before c->line_start_idx is faster. */
512 if (update_line_record
513 && c->line_record.length () < fcache_line_record_size)
514 {
515 /* If the file lines fits in the line record, we just record all
516 its lines ...*/
517 if (c->total_lines <= fcache_line_record_size
518 && c->line_num > c->line_record.length ())
519 c->line_record.safe_push (fcache::line_info (c->line_num,
520 c->line_start_idx,
521 line_end - c->data));
522 else if (c->total_lines > fcache_line_record_size)
523 {
524 /* ... otherwise, we just scale total_lines down to
525 (fcache_line_record_size lines. */
526 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
527 if (c->line_record.length () == 0
528 || n >= c->line_record.length ())
529 c->line_record.safe_push (fcache::line_info (c->line_num,
530 c->line_start_idx,
531 line_end - c->data));
532 }
533 }
534
535 /* Update c->line_start_idx so that it points to the next line to be
536 read. */
537 if (next_line_start)
538 c->line_start_idx = next_line_start - c->data;
539 else
540 /* We didn't find any terminal '\n'. Let's consider that the end
541 of line is the end of the data in the cache. The next
542 invocation of get_next_line will either read more data from the
543 underlying file or return false early because we've reached the
544 end of the file. */
545 c->line_start_idx = c->nb_read;
546
547 *line_len = len;
548
549 return true;
550}
551
552/* Reads the next line from FILE into *LINE. If *LINE is too small
553 (or NULL) it is allocated (or extended) to have enough space to
554 containe the line. *LINE_LENGTH must contain the size of the
555 initial*LINE buffer. It's then updated by this function to the
556 actual length of the returned line. Note that the returned line
557 can contain several zero bytes. Also note that the returned string
558 is allocated in static storage that is going to be re-used by
559 subsequent invocations of read_line. */
560
561static bool
562read_next_line (fcache *cache, char ** line, ssize_t *line_len)
563{
564 char *l = NULL;
565 ssize_t len = 0;
566
567 if (!get_next_line (cache, &l, &len))
568 return false;
569
570 if (*line == NULL)
571 *line = XNEWVEC (char, len);
572 else
573 if (*line_len < len)
574 *line = XRESIZEVEC (char, *line, len);
575
576 memcpy (*line, l, len);
577 *line_len = len;
578
579 return true;
580}
581
582/* Consume the next bytes coming from the cache (or from its
583 underlying file if there are remaining unread bytes in the file)
584 until we reach the next end-of-line (or end-of-file). There is no
585 copying from the cache involved. Return TRUE upon successful
586 completion. */
587
588static bool
589goto_next_line (fcache *cache)
590{
591 char *l;
592 ssize_t len;
593
594 return get_next_line (cache, &l, &len);
595}
596
597/* Read an arbitrary line number LINE_NUM from the file cached in C.
598 The line is copied into *LINE. *LINE_LEN must have been set to the
599 length of *LINE. If *LINE is too small (or NULL) it's extended (or
600 allocated) and *LINE_LEN is adjusted accordingly. *LINE ends up
601 with a terminal zero byte and can contain additional zero bytes.
602 This function returns bool if a line was read. */
603
604static bool
605read_line_num (fcache *c, size_t line_num,
606 char ** line, ssize_t *line_len)
607{
608 gcc_assert (line_num > 0);
609
610 if (line_num <= c->line_num)
9789a912 611 {
7ecc3eb9
DS
612 /* We've been asked to read lines that are before c->line_num.
613 So lets use our line record (if it's not empty) to try to
614 avoid re-reading the file from the beginning again. */
7f4d640c 615
7ecc3eb9 616 if (c->line_record.is_empty ())
9fec0042 617 {
7ecc3eb9
DS
618 c->line_start_idx = 0;
619 c->line_num = 0;
620 }
621 else
622 {
623 fcache::line_info *i = NULL;
624 if (c->total_lines <= fcache_line_record_size)
625 {
626 /* In languages where the input file is not totally
627 preprocessed up front, the c->total_lines hint
628 can be smaller than the number of lines of the
629 file. In that case, only the first
630 c->total_lines have been recorded.
631
632 Otherwise, the first c->total_lines we've read have
633 their start/end recorded here. */
634 i = (line_num <= c->total_lines)
635 ? &c->line_record[line_num - 1]
636 : &c->line_record[c->total_lines - 1];
637 gcc_assert (i->line_num <= line_num);
638 }
639 else
640 {
641 /* So the file had more lines than our line record
642 size. Thus the number of lines we've recorded has
643 been scaled down to fcache_line_reacord_size. Let's
644 pick the start/end of the recorded line that is
645 closest to line_num. */
646 size_t n = (line_num <= c->total_lines)
647 ? line_num * fcache_line_record_size / c->total_lines
648 : c ->line_record.length () - 1;
649 if (n < c->line_record.length ())
650 {
651 i = &c->line_record[n];
652 gcc_assert (i->line_num <= line_num);
653 }
654 }
655
656 if (i && i->line_num == line_num)
657 {
658 /* We have the start/end of the line. Let's just copy
659 it again and we are done. */
660 ssize_t len = i->end_pos - i->start_pos + 1;
661 if (*line_len < len)
662 *line = XRESIZEVEC (char, *line, len);
663 memmove (*line, c->data + i->start_pos, len);
664 (*line)[len - 1] = '\0';
665 *line_len = --len;
666 return true;
667 }
668
669 if (i)
670 {
671 c->line_start_idx = i->start_pos;
672 c->line_num = i->line_num - 1;
673 }
674 else
675 {
676 c->line_start_idx = 0;
677 c->line_num = 0;
678 }
9fec0042 679 }
9fec0042 680 }
7ecc3eb9
DS
681
682 /* Let's walk from line c->line_num up to line_num - 1, without
683 copying any line. */
684 while (c->line_num < line_num - 1)
685 if (!goto_next_line (c))
686 return false;
687
688 /* The line we want is the next one. Let's read and copy it back to
689 the caller. */
690 return read_next_line (c, line, line_len);
9fec0042
MLI
691}
692
31bdd08a 693/* Return the physical source line that corresponds to FILE_PATH/LINE in a
9fec0042 694 buffer that is statically allocated. The newline is replaced by
7ecc3eb9
DS
695 the null character. Note that the line can contain several null
696 characters, so LINE_LEN, if non-null, points to the actual length
697 of the line. */
9fec0042
MLI
698
699const char *
31bdd08a 700location_get_source_line (const char *file_path, int line,
7ecc3eb9 701 int *line_len)
9fec0042 702{
7ecc3eb9
DS
703 static char *buffer;
704 static ssize_t len;
705
31bdd08a 706 if (line == 0)
367c8286
DS
707 return NULL;
708
31bdd08a 709 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
367c8286
DS
710 if (c == NULL)
711 return NULL;
712
31bdd08a 713 bool read = read_line_num (c, line, &buffer, &len);
9fec0042 714
7ecc3eb9
DS
715 if (read && line_len)
716 *line_len = len;
9fec0042 717
7ecc3eb9 718 return read ? buffer : NULL;
9fec0042
MLI
719}
720
c468587a
DS
721/* Test if the location originates from the spelling location of a
722 builtin-tokens. That is, return TRUE if LOC is a (possibly
723 virtual) location of a built-in token that appears in the expansion
724 list of a macro. Please note that this function also works on
725 tokens that result from built-in tokens. For instance, the
726 function would return true if passed a token "4" that is the result
727 of the expansion of the built-in __LINE__ macro. */
728bool
729is_location_from_builtin_token (source_location loc)
730{
0e50b624 731 const line_map_ordinary *map = NULL;
c468587a
DS
732 loc = linemap_resolve_location (line_table, loc,
733 LRK_SPELLING_LOCATION, &map);
734 return loc == BUILTINS_LOCATION;
735}
736
7eb918cc
DS
737/* Expand the source location LOC into a human readable location. If
738 LOC is virtual, it resolves to the expansion point of the involved
739 macro. If LOC resolves to a builtin location, the file name of the
740 readable location is set to the string "<built-in>". */
741
742expanded_location
743expand_location (source_location loc)
744{
745 return expand_location_1 (loc, /*expansion_point_p=*/true);
746}
747
748/* Expand the source location LOC into a human readable location. If
749 LOC is virtual, it resolves to the expansion location of the
750 relevant macro. If LOC resolves to a builtin location, the file
751 name of the readable location is set to the string
752 "<built-in>". */
753
754expanded_location
755expand_location_to_spelling_point (source_location loc)
756{
e1f0c178 757 return expand_location_1 (loc, /*expansion_point_p=*/false);
7eb918cc
DS
758}
759
8a645150
DM
760/* The rich_location class within libcpp requires a way to expand
761 source_location instances, and relies on the client code
762 providing a symbol named
763 linemap_client_expand_location_to_spelling_point
764 to do this.
765
766 This is the implementation for libcommon.a (all host binaries),
767 which simply calls into expand_location_to_spelling_point. */
768
769expanded_location
770linemap_client_expand_location_to_spelling_point (source_location loc)
771{
772 return expand_location_to_spelling_point (loc);
773}
774
775
e1f0c178
MLI
776/* If LOCATION is in a system header and if it is a virtual location for
777 a token coming from the expansion of a macro, unwind it to the
778 location of the expansion point of the macro. Otherwise, just return
70dc395a
DS
779 LOCATION.
780
781 This is used for instance when we want to emit diagnostics about a
e1f0c178
MLI
782 token that may be located in a macro that is itself defined in a
783 system header, for example, for the NULL macro. In such a case, if
784 LOCATION were passed directly to diagnostic functions such as
785 warning_at, the diagnostic would be suppressed (unless
786 -Wsystem-headers). */
70dc395a
DS
787
788source_location
789expansion_point_location_if_in_system_header (source_location location)
790{
791 if (in_system_header_at (location))
792 location = linemap_resolve_location (line_table, location,
793 LRK_MACRO_EXPANSION_POINT,
794 NULL);
795 return location;
796}
7eb918cc 797
79ce98bc
MP
798/* If LOCATION is a virtual location for a token coming from the expansion
799 of a macro, unwind to the location of the expansion point of the macro. */
800
801source_location
802expansion_point_location (source_location location)
803{
804 return linemap_resolve_location (line_table, location,
805 LRK_MACRO_EXPANSION_POINT, NULL);
806}
807
a01fc549
DM
808/* Given location LOC, strip away any packed range information
809 or ad-hoc information. */
810
811location_t
812get_pure_location (location_t loc)
813{
814 if (IS_ADHOC_LOC (loc))
815 loc
816 = line_table->location_adhoc_data_map.data[loc & MAX_SOURCE_LOCATION].locus;
817
818 if (loc >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
819 return loc;
820
821 if (loc < RESERVED_LOCATION_COUNT)
822 return loc;
823
824 const line_map *map = linemap_lookup (line_table, loc);
825 const line_map_ordinary *ordmap = linemap_check_ordinary (map);
826
827 return loc & ~((1 << ordmap->m_range_bits) - 1);
828}
829
830/* Construct a location with caret at CARET, ranging from START to
831 finish e.g.
832
833 11111111112
834 12345678901234567890
835 522
836 523 return foo + bar;
837 ~~~~^~~~~
838 524
839
840 The location's caret is at the "+", line 523 column 15, but starts
841 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
842 of "bar" at column 19. */
843
844location_t
845make_location (location_t caret, location_t start, location_t finish)
846{
847 location_t pure_loc = get_pure_location (caret);
848 source_range src_range;
849 src_range.m_start = start;
850 src_range.m_finish = finish;
851 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
852 pure_loc,
853 src_range,
854 NULL);
855 return combined_loc;
856}
857
64a1a422
TT
858#define ONE_K 1024
859#define ONE_M (ONE_K * ONE_K)
860
861/* Display a number as an integer multiple of either:
862 - 1024, if said integer is >= to 10 K (in base 2)
863 - 1024 * 1024, if said integer is >= 10 M in (base 2)
864 */
865#define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
866 ? (x) \
867 : ((x) < 10 * ONE_M \
868 ? (x) / ONE_K \
869 : (x) / ONE_M)))
870
871/* For a given integer, display either:
872 - the character 'k', if the number is higher than 10 K (in base 2)
873 but strictly lower than 10 M (in base 2)
874 - the character 'M' if the number is higher than 10 M (in base2)
875 - the charcter ' ' if the number is strictly lower than 10 K */
876#define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
877
878/* Display an integer amount as multiple of 1K or 1M (in base 2).
879 Display the correct unit (either k, M, or ' ') after the amout, as
880 well. */
881#define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
882
883/* Dump statistics to stderr about the memory usage of the line_table
884 set of line maps. This also displays some statistics about macro
885 expansion. */
886
887void
888dump_line_table_statistics (void)
889{
890 struct linemap_stats s;
d17687f6 891 long total_used_map_size,
64a1a422
TT
892 macro_maps_size,
893 total_allocated_map_size;
894
895 memset (&s, 0, sizeof (s));
896
897 linemap_get_statistics (line_table, &s);
898
899 macro_maps_size = s.macro_maps_used_size
900 + s.macro_maps_locations_size;
901
902 total_allocated_map_size = s.ordinary_maps_allocated_size
903 + s.macro_maps_allocated_size
904 + s.macro_maps_locations_size;
905
906 total_used_map_size = s.ordinary_maps_used_size
907 + s.macro_maps_used_size
908 + s.macro_maps_locations_size;
909
d17687f6 910 fprintf (stderr, "Number of expanded macros: %5ld\n",
64a1a422
TT
911 s.num_expanded_macros);
912 if (s.num_expanded_macros != 0)
d17687f6 913 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
64a1a422
TT
914 s.num_macro_tokens / s.num_expanded_macros);
915 fprintf (stderr,
916 "\nLine Table allocations during the "
917 "compilation process\n");
d17687f6 918 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
64a1a422
TT
919 SCALE (s.num_ordinary_maps_used),
920 STAT_LABEL (s.num_ordinary_maps_used));
d17687f6 921 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
64a1a422
TT
922 SCALE (s.ordinary_maps_used_size),
923 STAT_LABEL (s.ordinary_maps_used_size));
d17687f6 924 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
64a1a422
TT
925 SCALE (s.num_ordinary_maps_allocated),
926 STAT_LABEL (s.num_ordinary_maps_allocated));
d17687f6 927 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
64a1a422
TT
928 SCALE (s.ordinary_maps_allocated_size),
929 STAT_LABEL (s.ordinary_maps_allocated_size));
d17687f6 930 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
64a1a422
TT
931 SCALE (s.num_macro_maps_used),
932 STAT_LABEL (s.num_macro_maps_used));
d17687f6 933 fprintf (stderr, "Macro maps used size: %5ld%c\n",
64a1a422
TT
934 SCALE (s.macro_maps_used_size),
935 STAT_LABEL (s.macro_maps_used_size));
d17687f6 936 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
64a1a422
TT
937 SCALE (s.macro_maps_locations_size),
938 STAT_LABEL (s.macro_maps_locations_size));
d17687f6 939 fprintf (stderr, "Macro maps size: %5ld%c\n",
64a1a422
TT
940 SCALE (macro_maps_size),
941 STAT_LABEL (macro_maps_size));
d17687f6 942 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
64a1a422
TT
943 SCALE (s.duplicated_macro_maps_locations_size),
944 STAT_LABEL (s.duplicated_macro_maps_locations_size));
d17687f6 945 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
64a1a422
TT
946 SCALE (total_allocated_map_size),
947 STAT_LABEL (total_allocated_map_size));
d17687f6 948 fprintf (stderr, "Total used maps size: %5ld%c\n",
64a1a422
TT
949 SCALE (total_used_map_size),
950 STAT_LABEL (total_used_map_size));
ee015909
DM
951 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
952 SCALE (s.adhoc_table_size),
953 STAT_LABEL (s.adhoc_table_size));
954 fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
955 s.adhoc_table_entries_used);
ebedc9a3
DM
956 fprintf (stderr, "optimized_ranges: %i\n",
957 line_table->num_optimized_ranges);
958 fprintf (stderr, "unoptimized_ranges: %i\n",
959 line_table->num_unoptimized_ranges);
ee015909 960
64a1a422
TT
961 fprintf (stderr, "\n");
962}
ba4ad400
DM
963
964/* Get location one beyond the final location in ordinary map IDX. */
965
966static source_location
967get_end_location (struct line_maps *set, unsigned int idx)
968{
969 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
970 return set->highest_location;
971
972 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
973 return MAP_START_LOCATION (next_map);
974}
975
976/* Helper function for write_digit_row. */
977
978static void
979write_digit (FILE *stream, int digit)
980{
981 fputc ('0' + (digit % 10), stream);
982}
983
984/* Helper function for dump_location_info.
985 Write a row of numbers to STREAM, numbering a source line,
986 giving the units, tens, hundreds etc of the column number. */
987
988static void
989write_digit_row (FILE *stream, int indent,
ebedc9a3 990 const line_map_ordinary *map,
ba4ad400
DM
991 source_location loc, int max_col, int divisor)
992{
993 fprintf (stream, "%*c", indent, ' ');
994 fprintf (stream, "|");
995 for (int column = 1; column < max_col; column++)
996 {
ebedc9a3 997 source_location column_loc = loc + (column << map->m_range_bits);
ba4ad400
DM
998 write_digit (stream, column_loc / divisor);
999 }
1000 fprintf (stream, "\n");
1001}
1002
1003/* Write a half-closed (START) / half-open (END) interval of
1004 source_location to STREAM. */
1005
1006static void
1007dump_location_range (FILE *stream,
1008 source_location start, source_location end)
1009{
1010 fprintf (stream,
1011 " source_location interval: %u <= loc < %u\n",
1012 start, end);
1013}
1014
1015/* Write a labelled description of a half-closed (START) / half-open (END)
1016 interval of source_location to STREAM. */
1017
1018static void
1019dump_labelled_location_range (FILE *stream,
1020 const char *name,
1021 source_location start, source_location end)
1022{
1023 fprintf (stream, "%s\n", name);
1024 dump_location_range (stream, start, end);
1025 fprintf (stream, "\n");
1026}
1027
1028/* Write a visualization of the locations in the line_table to STREAM. */
1029
1030void
1031dump_location_info (FILE *stream)
1032{
1033 /* Visualize the reserved locations. */
1034 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1035 0, RESERVED_LOCATION_COUNT);
1036
1037 /* Visualize the ordinary line_map instances, rendering the sources. */
1038 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1039 {
1040 source_location end_location = get_end_location (line_table, idx);
1041 /* half-closed: doesn't include this one. */
1042
0e50b624
DM
1043 const line_map_ordinary *map
1044 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
ba4ad400
DM
1045 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1046 dump_location_range (stream,
1047 MAP_START_LOCATION (map), end_location);
1048 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1049 fprintf (stream, " starting at line: %i\n",
1050 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
ebedc9a3
DM
1051 fprintf (stream, " column and range bits: %i\n",
1052 map->m_column_and_range_bits);
ba4ad400 1053 fprintf (stream, " column bits: %i\n",
ebedc9a3
DM
1054 map->m_column_and_range_bits - map->m_range_bits);
1055 fprintf (stream, " range bits: %i\n",
1056 map->m_range_bits);
ba4ad400
DM
1057
1058 /* Render the span of source lines that this "map" covers. */
1059 for (source_location loc = MAP_START_LOCATION (map);
1060 loc < end_location;
ebedc9a3 1061 loc += (1 << map->m_range_bits) )
ba4ad400 1062 {
ebedc9a3
DM
1063 gcc_assert (pure_location_p (line_table, loc) );
1064
ba4ad400
DM
1065 expanded_location exploc
1066 = linemap_expand_location (line_table, map, loc);
1067
1068 if (0 == exploc.column)
1069 {
1070 /* Beginning of a new source line: draw the line. */
1071
1072 int line_size;
31bdd08a
DM
1073 const char *line_text = location_get_source_line (exploc.file,
1074 exploc.line,
1075 &line_size);
ba4ad400
DM
1076 if (!line_text)
1077 break;
1078 fprintf (stream,
1079 "%s:%3i|loc:%5i|%.*s\n",
1080 exploc.file, exploc.line,
1081 loc,
1082 line_size, line_text);
1083
1084 /* "loc" is at column 0, which means "the whole line".
1085 Render the locations *within* the line, by underlining
1086 it, showing the source_location numeric values
1087 at each column. */
ebedc9a3 1088 int max_col = (1 << map->m_column_and_range_bits) - 1;
ba4ad400
DM
1089 if (max_col > line_size)
1090 max_col = line_size + 1;
1091
1092 int indent = 14 + strlen (exploc.file);
1093
1094 /* Thousands. */
1095 if (end_location > 999)
ebedc9a3 1096 write_digit_row (stream, indent, map, loc, max_col, 1000);
ba4ad400
DM
1097
1098 /* Hundreds. */
1099 if (end_location > 99)
ebedc9a3 1100 write_digit_row (stream, indent, map, loc, max_col, 100);
ba4ad400
DM
1101
1102 /* Tens. */
ebedc9a3 1103 write_digit_row (stream, indent, map, loc, max_col, 10);
ba4ad400
DM
1104
1105 /* Units. */
ebedc9a3 1106 write_digit_row (stream, indent, map, loc, max_col, 1);
ba4ad400
DM
1107 }
1108 }
1109 fprintf (stream, "\n");
1110 }
1111
1112 /* Visualize unallocated values. */
1113 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1114 line_table->highest_location,
1115 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1116
1117 /* Visualize the macro line_map instances, rendering the sources. */
1118 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1119 {
1120 /* Each macro map that is allocated owns source_location values
1121 that are *lower* that the one before them.
1122 Hence it's meaningful to view them either in order of ascending
1123 source locations, or in order of ascending macro map index. */
1124 const bool ascending_source_locations = true;
1125 unsigned int idx = (ascending_source_locations
1126 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1127 : i);
0e50b624 1128 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
ba4ad400
DM
1129 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1130 idx,
1131 linemap_map_get_macro_name (map),
1132 MACRO_MAP_NUM_MACRO_TOKENS (map));
1133 dump_location_range (stream,
1134 map->start_location,
1135 (map->start_location
1136 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1137 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1138 "expansion point is location %i",
1139 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1140 fprintf (stream, " map->start_location: %u\n",
1141 map->start_location);
1142
1143 fprintf (stream, " macro_locations:\n");
1144 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1145 {
1146 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1147 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1148
1149 /* linemap_add_macro_token encodes token numbers in an expansion
1150 by putting them after MAP_START_LOCATION. */
1151
1152 /* I'm typically seeing 4 uninitialized entries at the end of
1153 0xafafafaf.
1154 This appears to be due to macro.c:replace_args
1155 adding 2 extra args for padding tokens; presumably there may
1156 be a leading and/or trailing padding token injected,
1157 each for 2 more location slots.
1158 This would explain there being up to 4 source_locations slots
1159 that may be uninitialized. */
1160
1161 fprintf (stream, " %u: %u, %u\n",
1162 i,
1163 x,
1164 y);
1165 if (x == y)
1166 {
1167 if (x < MAP_START_LOCATION (map))
1168 inform (x, "token %u has x-location == y-location == %u", i, x);
1169 else
1170 fprintf (stream,
1171 "x-location == y-location == %u encodes token # %u\n",
1172 x, x - MAP_START_LOCATION (map));
1173 }
1174 else
1175 {
1176 inform (x, "token %u has x-location == %u", i, x);
1177 inform (x, "token %u has y-location == %u", i, y);
1178 }
1179 }
1180 fprintf (stream, "\n");
1181 }
1182
1183 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1184 macro map, presumably due to an off-by-one error somewhere
1185 between the logic in linemap_enter_macro and
1186 LINEMAPS_MACRO_LOWEST_LOCATION. */
1187 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1188 MAX_SOURCE_LOCATION,
1189 MAX_SOURCE_LOCATION + 1);
1190
1191 /* Visualize ad-hoc values. */
1192 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1193 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1194}
d9b950dd 1195
88fa5555
DM
1196/* string_concat's constructor. */
1197
1198string_concat::string_concat (int num, location_t *locs)
1199 : m_num (num)
1200{
1201 m_locs = ggc_vec_alloc <location_t> (num);
1202 for (int i = 0; i < num; i++)
1203 m_locs[i] = locs[i];
1204}
1205
1206/* string_concat_db's constructor. */
1207
1208string_concat_db::string_concat_db ()
1209{
1210 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1211}
1212
1213/* Record that a string concatenation occurred, covering NUM
1214 string literal tokens. LOCS is an array of size NUM, containing the
1215 locations of the tokens. A copy of LOCS is taken. */
1216
1217void
1218string_concat_db::record_string_concatenation (int num, location_t *locs)
1219{
1220 gcc_assert (num > 1);
1221 gcc_assert (locs);
1222
1223 location_t key_loc = get_key_loc (locs[0]);
1224
1225 string_concat *concat
1226 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1227 m_table->put (key_loc, concat);
1228}
1229
1230/* Determine if LOC was the location of the the initial token of a
1231 concatenation of string literal tokens.
1232 If so, *OUT_NUM is written to with the number of tokens, and
1233 *OUT_LOCS with the location of an array of locations of the
1234 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1235 storage owned by the string_concat_db.
1236 Otherwise, return false. */
1237
1238bool
1239string_concat_db::get_string_concatenation (location_t loc,
1240 int *out_num,
1241 location_t **out_locs)
1242{
1243 gcc_assert (out_num);
1244 gcc_assert (out_locs);
1245
1246 location_t key_loc = get_key_loc (loc);
1247
1248 string_concat **concat = m_table->get (key_loc);
1249 if (!concat)
1250 return false;
1251
1252 *out_num = (*concat)->m_num;
1253 *out_locs =(*concat)->m_locs;
1254 return true;
1255}
1256
1257/* Internal function. Canonicalize LOC into a form suitable for
1258 use as a key within the database, stripping away macro expansion,
1259 ad-hoc information, and range information, using the location of
1260 the start of LOC within an ordinary linemap. */
1261
1262location_t
1263string_concat_db::get_key_loc (location_t loc)
1264{
1265 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1266 NULL);
1267
1268 loc = get_range_from_loc (line_table, loc).m_start;
1269
1270 return loc;
1271}
1272
1273/* Helper class for use within get_substring_ranges_for_loc.
1274 An vec of cpp_string with responsibility for releasing all of the
1275 str->text for each str in the vector. */
1276
1277class auto_cpp_string_vec : public auto_vec <cpp_string>
1278{
1279 public:
1280 auto_cpp_string_vec (int alloc)
1281 : auto_vec <cpp_string> (alloc) {}
1282
1283 ~auto_cpp_string_vec ()
1284 {
1285 /* Clean up the copies within this vec. */
1286 int i;
1287 cpp_string *str;
1288 FOR_EACH_VEC_ELT (*this, i, str)
1289 free (const_cast <unsigned char *> (str->text));
1290 }
1291};
1292
1293/* Attempt to populate RANGES with source location information on the
1294 individual characters within the string literal found at STRLOC.
1295 If CONCATS is non-NULL, then any string literals that the token at
1296 STRLOC was concatenated with are also added to RANGES.
1297
1298 Return NULL if successful, or an error message if any errors occurred (in
1299 which case RANGES may be only partially populated and should not
1300 be used).
1301
1302 This is implemented by re-parsing the relevant source line(s). */
1303
1304static const char *
1305get_substring_ranges_for_loc (cpp_reader *pfile,
1306 string_concat_db *concats,
1307 location_t strloc,
1308 enum cpp_ttype type,
1309 cpp_substring_ranges &ranges)
1310{
1311 gcc_assert (pfile);
1312
1313 if (strloc == UNKNOWN_LOCATION)
1314 return "unknown location";
1315
1316 /* If string concatenation has occurred at STRLOC, get the locations
1317 of all of the literal tokens making up the compound string.
1318 Otherwise, just use STRLOC. */
1319 int num_locs = 1;
1320 location_t *strlocs = &strloc;
1321 if (concats)
1322 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1323
1324 auto_cpp_string_vec strs (num_locs);
1325 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1326 for (int i = 0; i < num_locs; i++)
1327 {
1328 /* Get range of strloc. We will use it to locate the start and finish
1329 of the literal token within the line. */
1330 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1331
1332 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1333 /* If the string is within a macro expansion, we can't get at the
1334 end location. */
1335 return "macro expansion";
1336
1337 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1338 /* If so, we can't reliably determine where the token started within
1339 its line. */
1340 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1341
1342 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1343 /* If so, we can't reliably determine where the token finished within
1344 its line. */
1345 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1346
1347 expanded_location start
1348 = expand_location_to_spelling_point (src_range.m_start);
1349 expanded_location finish
1350 = expand_location_to_spelling_point (src_range.m_finish);
1351 if (start.file != finish.file)
1352 return "range endpoints are in different files";
1353 if (start.line != finish.line)
1354 return "range endpoints are on different lines";
1355 if (start.column > finish.column)
1356 return "range endpoints are reversed";
1357
1358 int line_width;
1359 const char *line = location_get_source_line (start.file, start.line,
1360 &line_width);
1361 if (line == NULL)
1362 return "unable to read source line";
1363
1364 /* Determine the location of the literal (including quotes
1365 and leading prefix chars, such as the 'u' in a u""
1366 token). */
1367 const char *literal = line + start.column - 1;
1368 int literal_length = finish.column - start.column + 1;
1369
1370 gcc_assert (line_width >= (start.column - 1 + literal_length));
1371 cpp_string from;
1372 from.len = literal_length;
1373 /* Make a copy of the literal, to avoid having to rely on
1374 the lifetime of the copy of the line within the cache.
1375 This will be released by the auto_cpp_string_vec dtor. */
1376 from.text = XDUPVEC (unsigned char, literal, literal_length);
1377 strs.safe_push (from);
1378
1379 /* For very long lines, a new linemap could have started
1380 halfway through the token.
1381 Ensure that the loc_reader uses the linemap of the
1382 *end* of the token for its start location. */
1383 const line_map_ordinary *final_ord_map;
1384 linemap_resolve_location (line_table, src_range.m_finish,
1385 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1386 location_t start_loc
1387 = linemap_position_for_line_and_column (line_table, final_ord_map,
1388 start.line, start.column);
1389
1390 cpp_string_location_reader loc_reader (start_loc, line_table);
1391 loc_readers.safe_push (loc_reader);
1392 }
1393
1394 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1395 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1396 loc_readers.address (),
1397 num_locs, &ranges, type);
1398 if (err)
1399 return err;
1400
1401 /* Success: "ranges" should now contain information on the string. */
1402 return NULL;
1403}
1404
65e736c0
DM
1405/* Attempt to populate *OUT_LOC with source location information on the
1406 given characters within the string literal found at STRLOC.
1407 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1408 character set.
1409
1410 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1411 and string literal "012345\n789"
1412 *OUT_LOC is written to with:
1413 "012345\n789"
1414 ~^~~~~
1415
88fa5555
DM
1416 If CONCATS is non-NULL, then any string literals that the token at
1417 STRLOC was concatenated with are also considered.
1418
1419 This is implemented by re-parsing the relevant source line(s).
1420
1421 Return NULL if successful, or an error message if any errors occurred.
1422 Error messages are intended for GCC developers (to help debugging) rather
1423 than for end-users. */
1424
1425const char *
65e736c0
DM
1426get_source_location_for_substring (cpp_reader *pfile,
1427 string_concat_db *concats,
1428 location_t strloc,
1429 enum cpp_ttype type,
1430 int caret_idx, int start_idx, int end_idx,
1431 source_location *out_loc)
1432{
1433 gcc_checking_assert (caret_idx >= 0);
88fa5555
DM
1434 gcc_checking_assert (start_idx >= 0);
1435 gcc_checking_assert (end_idx >= 0);
65e736c0 1436 gcc_assert (out_loc);
88fa5555
DM
1437
1438 cpp_substring_ranges ranges;
1439 const char *err
1440 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1441 if (err)
1442 return err;
1443
65e736c0
DM
1444 if (caret_idx >= ranges.get_num_ranges ())
1445 return "caret_idx out of range";
88fa5555
DM
1446 if (start_idx >= ranges.get_num_ranges ())
1447 return "start_idx out of range";
1448 if (end_idx >= ranges.get_num_ranges ())
1449 return "end_idx out of range";
1450
65e736c0
DM
1451 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1452 ranges.get_range (start_idx).m_start,
1453 ranges.get_range (end_idx).m_finish);
1454 return NULL;
1455}
1456
0e06d2b3
DM
1457#if CHECKING_P
1458
1459namespace selftest {
1460
1461/* Selftests of location handling. */
1462
65e736c0
DM
1463/* Attempt to populate *OUT_RANGE with source location information on the
1464 given character within the string literal found at STRLOC.
1465 CHAR_IDX refers to an offset within the execution character set.
1466 If CONCATS is non-NULL, then any string literals that the token at
1467 STRLOC was concatenated with are also considered.
1468
1469 This is implemented by re-parsing the relevant source line(s).
1470
1471 Return NULL if successful, or an error message if any errors occurred.
1472 Error messages are intended for GCC developers (to help debugging) rather
1473 than for end-users. */
1474
1475static const char *
1476get_source_range_for_char (cpp_reader *pfile,
1477 string_concat_db *concats,
1478 location_t strloc,
1479 enum cpp_ttype type,
1480 int char_idx,
1481 source_range *out_range)
1482{
1483 gcc_checking_assert (char_idx >= 0);
1484 gcc_assert (out_range);
1485
1486 cpp_substring_ranges ranges;
1487 const char *err
1488 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1489 if (err)
1490 return err;
1491
1492 if (char_idx >= ranges.get_num_ranges ())
1493 return "char_idx out of range";
1494
1495 *out_range = ranges.get_range (char_idx);
88fa5555
DM
1496 return NULL;
1497}
1498
65e736c0 1499/* As get_source_range_for_char, but write to *OUT the number
88fa5555
DM
1500 of ranges that are available. */
1501
0e06d2b3 1502static const char *
88fa5555
DM
1503get_num_source_ranges_for_substring (cpp_reader *pfile,
1504 string_concat_db *concats,
1505 location_t strloc,
1506 enum cpp_ttype type,
1507 int *out)
1508{
1509 gcc_assert (out);
1510
1511 cpp_substring_ranges ranges;
1512 const char *err
1513 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1514
1515 if (err)
1516 return err;
1517
1518 *out = ranges.get_num_ranges ();
1519 return NULL;
1520}
1521
d9b950dd
DM
1522/* Selftests of location handling. */
1523
741d3be5
DM
1524/* Helper function for verifying location data: when location_t
1525 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1526 as having column 0. */
1527
1528static bool
1529should_have_column_data_p (location_t loc)
1530{
1531 if (IS_ADHOC_LOC (loc))
1532 loc = get_location_from_adhoc_loc (line_table, loc);
1533 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1534 return false;
1535 return true;
1536}
1537
1538/* Selftest for should_have_column_data_p. */
1539
1540static void
1541test_should_have_column_data_p ()
1542{
1543 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1544 ASSERT_TRUE
1545 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1546 ASSERT_FALSE
1547 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1548}
1549
d9b950dd
DM
1550/* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1551 on LOC. */
1552
1553static void
1554assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1555 location_t loc)
1556{
1557 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1558 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
741d3be5
DM
1559 /* If location_t values are sufficiently high, then column numbers
1560 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1561 When close to the threshold, column numbers *may* be present: if
1562 the final linemap before the threshold contains a line that straddles
1563 the threshold, locations in that line have column information. */
1564 if (should_have_column_data_p (loc))
1565 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1566}
1567
1568/* Various selftests in this file involve constructing a line table
1569 and one or more line maps within it.
1570
1571 For maximum test coverage we want to run these tests with a variety
1572 of situations:
1573 - line_table->default_range_bits: some frontends use a non-zero value
1574 and others use zero
1575 - the fallback modes within line-map.c: there are various threshold
1576 values for source_location/location_t beyond line-map.c changes
1577 behavior (disabling of the range-packing optimization, disabling
1578 of column-tracking). We can exercise these by starting the line_table
1579 at interesting values at or near these thresholds.
1580
1581 The following struct describes a particular case within our test
1582 matrix. */
1583
1584struct line_table_case
1585{
1586 line_table_case (int default_range_bits, int base_location)
1587 : m_default_range_bits (default_range_bits),
1588 m_base_location (base_location)
1589 {}
1590
1591 int m_default_range_bits;
1592 int m_base_location;
1593};
1594
1595/* A class for overriding the global "line_table" within a selftest,
1596 restoring its value afterwards. */
1597
1598class temp_line_table
1599{
1600 public:
1601 temp_line_table (const line_table_case &);
1602 ~temp_line_table ();
1603
1604 private:
1605 line_maps *m_old_line_table;
1606};
1607
1608/* Constructor. Store the old value of line_table, and create a new
1609 one, using the sitation described in CASE_. */
1610
1611temp_line_table::temp_line_table (const line_table_case &case_)
1612 : m_old_line_table (line_table)
1613{
1614 line_table = ggc_alloc<line_maps> ();
1615 linemap_init (line_table, BUILTINS_LOCATION);
1616 line_table->reallocator = m_old_line_table->reallocator;
1617 line_table->round_alloc_size = m_old_line_table->round_alloc_size;
1618 line_table->default_range_bits = case_.m_default_range_bits;
1619 if (case_.m_base_location)
1620 {
1621 line_table->highest_location = case_.m_base_location;
1622 line_table->highest_line = case_.m_base_location;
1623 }
1624}
1625
1626/* Destructor. Restore the old value of line_table. */
1627
1628temp_line_table::~temp_line_table ()
1629{
1630 line_table = m_old_line_table;
d9b950dd
DM
1631}
1632
1633/* Verify basic operation of ordinary linemaps. */
1634
1635static void
741d3be5 1636test_accessing_ordinary_linemaps (const line_table_case &case_)
d9b950dd 1637{
741d3be5
DM
1638 temp_line_table tmp_lt (case_);
1639
d9b950dd
DM
1640 /* Build a simple linemap describing some locations. */
1641 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1642
1643 linemap_line_start (line_table, 1, 100);
1644 location_t loc_a = linemap_position_for_column (line_table, 1);
1645 location_t loc_b = linemap_position_for_column (line_table, 23);
1646
1647 linemap_line_start (line_table, 2, 100);
1648 location_t loc_c = linemap_position_for_column (line_table, 1);
1649 location_t loc_d = linemap_position_for_column (line_table, 17);
1650
1651 /* Example of a very long line. */
1652 linemap_line_start (line_table, 3, 2000);
1653 location_t loc_e = linemap_position_for_column (line_table, 700);
1654
1655 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1656
1657 /* Multiple files. */
1658 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1659 linemap_line_start (line_table, 1, 200);
1660 location_t loc_f = linemap_position_for_column (line_table, 150);
1661 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1662
1663 /* Verify that we can recover the location info. */
1664 assert_loceq ("foo.c", 1, 1, loc_a);
1665 assert_loceq ("foo.c", 1, 23, loc_b);
1666 assert_loceq ("foo.c", 2, 1, loc_c);
1667 assert_loceq ("foo.c", 2, 17, loc_d);
1668 assert_loceq ("foo.c", 3, 700, loc_e);
1669 assert_loceq ("bar.c", 1, 150, loc_f);
1670
1671 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
a01fc549
DM
1672 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1673
1674 /* Verify using make_location to build a range, and extracting data
1675 back from it. */
1676 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1677 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1678 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1679 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1680 ASSERT_EQ (loc_b, src_range.m_start);
1681 ASSERT_EQ (loc_d, src_range.m_finish);
d9b950dd
DM
1682}
1683
1684/* Verify various properties of UNKNOWN_LOCATION. */
1685
1686static void
1687test_unknown_location ()
1688{
1689 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1690 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1691 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1692}
1693
1694/* Verify various properties of BUILTINS_LOCATION. */
1695
1696static void
1697test_builtins ()
1698{
10d2fc23 1699 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
d9b950dd
DM
1700 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1701}
1702
1703/* Verify reading of input files (e.g. for caret-based diagnostics). */
1704
1705static void
1706test_reading_source_line ()
1707{
85ecd05c 1708 /* Create a tempfile and write some text to it. */
741d3be5
DM
1709 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1710 "01234567890123456789\n"
1711 "This is the test text\n"
1712 "This is the 3rd line\n");
85ecd05c
DM
1713
1714 /* Read back a specific line from the tempfile. */
d9b950dd 1715 int line_size;
741d3be5
DM
1716 const char *source_line = location_get_source_line (tmp.get_filename (),
1717 2, &line_size);
d9b950dd 1718 ASSERT_TRUE (source_line != NULL);
85ecd05c
DM
1719 ASSERT_EQ (21, line_size);
1720 if (!strncmp ("This is the test text",
1721 source_line, line_size))
09765e3a 1722 ::selftest::pass (SELFTEST_LOCATION,
d9b950dd
DM
1723 "source_line matched expected value");
1724 else
09765e3a 1725 ::selftest::fail (SELFTEST_LOCATION,
d9b950dd 1726 "source_line did not match expected value");
85ecd05c 1727
d9b950dd
DM
1728}
1729
741d3be5
DM
1730/* Tests of lexing. */
1731
1732/* Verify that token TOK from PARSER has cpp_token_as_text
1733 equal to EXPECTED_TEXT. */
1734
1735#define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1736 SELFTEST_BEGIN_STMT \
1737 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1738 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1739 SELFTEST_END_STMT
1740
1741/* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1742 and ranges from EXP_START_COL to EXP_FINISH_COL.
1743 Use LOC as the effective location of the selftest. */
1744
1745static void
1746assert_token_loc_eq (const location &loc,
1747 const cpp_token *tok,
1748 const char *exp_filename, int exp_linenum,
1749 int exp_start_col, int exp_finish_col)
1750{
1751 location_t tok_loc = tok->src_loc;
1752 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1753 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1754
1755 /* If location_t values are sufficiently high, then column numbers
1756 will be unavailable. */
1757 if (!should_have_column_data_p (tok_loc))
1758 return;
1759
1760 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1761 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1762 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1763 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1764}
1765
1766/* Use assert_token_loc_eq to verify the TOK->src_loc, using
1767 SELFTEST_LOCATION as the effective location of the selftest. */
1768
1769#define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1770 EXP_START_COL, EXP_FINISH_COL) \
1771 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1772 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1773
1774/* Test of lexing a file using libcpp, verifying tokens and their
1775 location information. */
1776
1777static void
1778test_lexer (const line_table_case &case_)
1779{
1780 /* Create a tempfile and write some text to it. */
1781 const char *content =
1782 /*00000000011111111112222222222333333.3333444444444.455555555556
1783 12345678901234567890123456789012345.6789012345678.901234567890. */
1784 ("test_name /* c-style comment */\n"
1785 " \"test literal\"\n"
1786 " // test c++-style comment\n"
1787 " 42\n");
1788 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1789
1790 temp_line_table tmp_lt (case_);
1791
1792 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1793
1794 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1795 ASSERT_NE (fname, NULL);
1796
1797 /* Verify that we get the expected tokens back, with the correct
1798 location information. */
1799
1800 location_t loc;
1801 const cpp_token *tok;
1802 tok = cpp_get_token_with_location (parser, &loc);
1803 ASSERT_NE (tok, NULL);
1804 ASSERT_EQ (tok->type, CPP_NAME);
1805 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1806 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1807
1808 tok = cpp_get_token_with_location (parser, &loc);
1809 ASSERT_NE (tok, NULL);
1810 ASSERT_EQ (tok->type, CPP_STRING);
1811 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1812 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1813
1814 tok = cpp_get_token_with_location (parser, &loc);
1815 ASSERT_NE (tok, NULL);
1816 ASSERT_EQ (tok->type, CPP_NUMBER);
1817 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1818 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1819
1820 tok = cpp_get_token_with_location (parser, &loc);
1821 ASSERT_NE (tok, NULL);
1822 ASSERT_EQ (tok->type, CPP_EOF);
1823
1824 cpp_finish (parser, NULL);
1825 cpp_destroy (parser);
1826}
1827
88fa5555
DM
1828/* Forward decls. */
1829
1830struct lexer_test;
1831class lexer_test_options;
1832
1833/* A class for specifying options of a lexer_test.
1834 The "apply" vfunc is called during the lexer_test constructor. */
1835
1836class lexer_test_options
1837{
1838 public:
1839 virtual void apply (lexer_test &) = 0;
1840};
1841
1842/* A struct for writing lexer tests. */
1843
1844struct lexer_test
1845{
1846 lexer_test (const line_table_case &case_, const char *content,
1847 lexer_test_options *options);
1848 ~lexer_test ();
1849
1850 const cpp_token *get_token ();
1851
1852 temp_source_file m_tempfile;
1853 temp_line_table m_tmp_lt;
1854 cpp_reader *m_parser;
1855 string_concat_db m_concats;
1856};
1857
1858/* Use an EBCDIC encoding for the execution charset, specifically
1859 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
1860
1861 This exercises iconv integration within libcpp.
1862 Not every build of iconv supports the given charset,
1863 so we need to flag this error and handle it gracefully. */
1864
1865class ebcdic_execution_charset : public lexer_test_options
1866{
1867 public:
1868 ebcdic_execution_charset () : m_num_iconv_errors (0)
1869 {
1870 gcc_assert (s_singleton == NULL);
1871 s_singleton = this;
1872 }
1873 ~ebcdic_execution_charset ()
1874 {
1875 gcc_assert (s_singleton == this);
1876 s_singleton = NULL;
1877 }
1878
1879 void apply (lexer_test &test) FINAL OVERRIDE
1880 {
1881 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
1882 cpp_opts->narrow_charset = "IBM1047";
1883
1884 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
1885 callbacks->error = on_error;
1886 }
1887
1888 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
1889 int level ATTRIBUTE_UNUSED,
1890 int reason ATTRIBUTE_UNUSED,
1891 rich_location *richloc ATTRIBUTE_UNUSED,
1892 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
1893 ATTRIBUTE_FPTR_PRINTF(5,0)
1894 {
1895 gcc_assert (s_singleton);
1896 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
1897 when the local iconv build doesn't support the conversion. */
1898 if (strstr (msgid, "not supported by iconv"))
1899 {
1900 s_singleton->m_num_iconv_errors++;
1901 return true;
1902 }
1903
1904 /* Otherwise, we have an unexpected error. */
1905 abort ();
1906 }
1907
1908 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
1909
1910 private:
1911 static ebcdic_execution_charset *s_singleton;
1912 int m_num_iconv_errors;
1913};
1914
1915ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
1916
1917/* Constructor. Override line_table with a new instance based on CASE_,
1918 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
1919 start parsing the tempfile. */
1920
1921lexer_test::lexer_test (const line_table_case &case_, const char *content,
1922 lexer_test_options *options) :
1923 /* Create a tempfile and write the text to it. */
1924 m_tempfile (SELFTEST_LOCATION, ".c", content),
1925 m_tmp_lt (case_),
1926 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
1927 m_concats ()
1928{
1929 if (options)
1930 options->apply (*this);
1931
1932 cpp_init_iconv (m_parser);
1933
1934 /* Parse the file. */
1935 const char *fname = cpp_read_main_file (m_parser,
1936 m_tempfile.get_filename ());
1937 ASSERT_NE (fname, NULL);
1938}
1939
1940/* Destructor. Verify that the next token in m_parser is EOF. */
1941
1942lexer_test::~lexer_test ()
1943{
1944 location_t loc;
1945 const cpp_token *tok;
1946
1947 tok = cpp_get_token_with_location (m_parser, &loc);
1948 ASSERT_NE (tok, NULL);
1949 ASSERT_EQ (tok->type, CPP_EOF);
1950
1951 cpp_finish (m_parser, NULL);
1952 cpp_destroy (m_parser);
1953}
1954
1955/* Get the next token from m_parser. */
1956
1957const cpp_token *
1958lexer_test::get_token ()
1959{
1960 location_t loc;
1961 const cpp_token *tok;
1962
1963 tok = cpp_get_token_with_location (m_parser, &loc);
1964 ASSERT_NE (tok, NULL);
1965 return tok;
1966}
1967
1968/* Verify that locations within string literals are correctly handled. */
1969
1970/* Verify get_source_range_for_substring for token(s) at STRLOC,
1971 using the string concatenation database for TEST.
1972
1973 Assert that the character at index IDX is on EXPECTED_LINE,
1974 and that it begins at column EXPECTED_START_COL and ends at
1975 EXPECTED_FINISH_COL (unless the locations are beyond
1976 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
1977 columns). */
1978
1979static void
1980assert_char_at_range (const location &loc,
1981 lexer_test& test,
1982 location_t strloc, enum cpp_ttype type, int idx,
1983 int expected_line, int expected_start_col,
1984 int expected_finish_col)
1985{
1986 cpp_reader *pfile = test.m_parser;
1987 string_concat_db *concats = &test.m_concats;
1988
1989 source_range actual_range;
1990 const char *err
65e736c0
DM
1991 = get_source_range_for_char (pfile, concats, strloc, type, idx,
1992 &actual_range);
88fa5555
DM
1993 if (should_have_column_data_p (strloc))
1994 ASSERT_EQ_AT (loc, NULL, err);
1995 else
1996 {
1997 ASSERT_STREQ_AT (loc,
1998 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
1999 err);
2000 return;
2001 }
2002
2003 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2004 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2005 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2006 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2007
2008 if (should_have_column_data_p (actual_range.m_start))
2009 {
2010 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2011 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2012 }
2013 if (should_have_column_data_p (actual_range.m_finish))
2014 {
2015 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2016 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2017 }
2018}
2019
2020/* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2021 the effective location of any errors. */
2022
2023#define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2024 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2025 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2026 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2027 (EXPECTED_FINISH_COL))
2028
2029/* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2030 using the string concatenation database for TEST.
2031
2032 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2033
2034static void
2035assert_num_substring_ranges (const location &loc,
2036 lexer_test& test,
2037 location_t strloc,
2038 enum cpp_ttype type,
2039 int expected_num_ranges)
2040{
2041 cpp_reader *pfile = test.m_parser;
2042 string_concat_db *concats = &test.m_concats;
2043
0e06d2b3 2044 int actual_num_ranges = -1;
88fa5555
DM
2045 const char *err
2046 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2047 &actual_num_ranges);
2048 if (should_have_column_data_p (strloc))
2049 ASSERT_EQ_AT (loc, NULL, err);
2050 else
2051 {
2052 ASSERT_STREQ_AT (loc,
2053 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2054 err);
2055 return;
2056 }
2057 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2058}
2059
2060/* Macro for calling assert_num_substring_ranges, supplying
2061 SELFTEST_LOCATION for the effective location of any errors. */
2062
2063#define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2064 EXPECTED_NUM_RANGES) \
2065 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2066 (TYPE), (EXPECTED_NUM_RANGES))
2067
2068
2069/* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2070 returns an error (using the string concatenation database for TEST). */
2071
2072static void
2073assert_has_no_substring_ranges (const location &loc,
2074 lexer_test& test,
2075 location_t strloc,
2076 enum cpp_ttype type,
2077 const char *expected_err)
2078{
2079 cpp_reader *pfile = test.m_parser;
2080 string_concat_db *concats = &test.m_concats;
2081 cpp_substring_ranges ranges;
2082 const char *actual_err
2083 = get_substring_ranges_for_loc (pfile, concats, strloc,
2084 type, ranges);
2085 if (should_have_column_data_p (strloc))
2086 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2087 else
2088 ASSERT_STREQ_AT (loc,
2089 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2090 actual_err);
2091}
2092
2093#define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2094 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2095 (STRLOC), (TYPE), (ERR))
2096
2097/* Lex a simple string literal. Verify the substring location data, before
2098 and after running cpp_interpret_string on it. */
2099
2100static void
2101test_lexer_string_locations_simple (const line_table_case &case_)
2102{
2103 /* Digits 0-9 (with 0 at column 10), the simple way.
2104 ....................000000000.11111111112.2222222223333333333
2105 ....................123456789.01234567890.1234567890123456789
2106 We add a trailing comment to ensure that we correctly locate
2107 the end of the string literal token. */
2108 const char *content = " \"0123456789\" /* not a string */\n";
2109 lexer_test test (case_, content, NULL);
2110
2111 /* Verify that we get the expected token back, with the correct
2112 location information. */
2113 const cpp_token *tok = test.get_token ();
2114 ASSERT_EQ (tok->type, CPP_STRING);
2115 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2116 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2117
2118 /* At this point in lexing, the quote characters are treated as part of
2119 the string (they are stripped off by cpp_interpret_string). */
2120
2121 ASSERT_EQ (tok->val.str.len, 12);
2122
2123 /* Verify that cpp_interpret_string works. */
2124 cpp_string dst_string;
2125 const enum cpp_ttype type = CPP_STRING;
2126 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2127 &dst_string, type);
2128 ASSERT_TRUE (result);
2129 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2130 free (const_cast <unsigned char *> (dst_string.text));
2131
2132 /* Verify ranges of individual characters. This no longer includes the
2133 quotes. */
2134 for (int i = 0; i <= 9; i++)
2135 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2136 10 + i, 10 + i);
2137
2138 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2139}
2140
2141/* As test_lexer_string_locations_simple, but use an EBCDIC execution
2142 encoding. */
2143
2144static void
2145test_lexer_string_locations_ebcdic (const line_table_case &case_)
2146{
2147 /* EBCDIC support requires iconv. */
2148 if (!HAVE_ICONV)
2149 return;
2150
2151 /* Digits 0-9 (with 0 at column 10), the simple way.
2152 ....................000000000.11111111112.2222222223333333333
2153 ....................123456789.01234567890.1234567890123456789
2154 We add a trailing comment to ensure that we correctly locate
2155 the end of the string literal token. */
2156 const char *content = " \"0123456789\" /* not a string */\n";
2157 ebcdic_execution_charset use_ebcdic;
2158 lexer_test test (case_, content, &use_ebcdic);
2159
2160 /* Verify that we get the expected token back, with the correct
2161 location information. */
2162 const cpp_token *tok = test.get_token ();
2163 ASSERT_EQ (tok->type, CPP_STRING);
2164 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2165 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2166
2167 /* At this point in lexing, the quote characters are treated as part of
2168 the string (they are stripped off by cpp_interpret_string). */
2169
2170 ASSERT_EQ (tok->val.str.len, 12);
2171
2172 /* The remainder of the test requires an iconv implementation that
2173 can convert from UTF-8 to the EBCDIC encoding requested above. */
2174 if (use_ebcdic.iconv_errors_occurred_p ())
2175 return;
2176
2177 /* Verify that cpp_interpret_string works. */
2178 cpp_string dst_string;
2179 const enum cpp_ttype type = CPP_STRING;
2180 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2181 &dst_string, type);
2182 ASSERT_TRUE (result);
2183 /* We should now have EBCDIC-encoded text, specifically
2184 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2185 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2186 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2187 (const char *)dst_string.text);
2188 free (const_cast <unsigned char *> (dst_string.text));
2189
2190 /* Verify that we don't attempt to record substring location information
2191 for such cases. */
2192 ASSERT_HAS_NO_SUBSTRING_RANGES
2193 (test, tok->src_loc, type,
2194 "execution character set != source character set");
2195}
2196
2197/* Lex a string literal containing a hex-escaped character.
2198 Verify the substring location data, before and after running
2199 cpp_interpret_string on it. */
2200
2201static void
2202test_lexer_string_locations_hex (const line_table_case &case_)
2203{
2204 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2205 and with a space in place of digit 6, to terminate the escaped
2206 hex code.
2207 ....................000000000.111111.11112222.
2208 ....................123456789.012345.67890123. */
2209 const char *content = " \"01234\\x35 789\"\n";
2210 lexer_test test (case_, content, NULL);
2211
2212 /* Verify that we get the expected token back, with the correct
2213 location information. */
2214 const cpp_token *tok = test.get_token ();
2215 ASSERT_EQ (tok->type, CPP_STRING);
2216 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2217 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2218
2219 /* At this point in lexing, the quote characters are treated as part of
2220 the string (they are stripped off by cpp_interpret_string). */
2221 ASSERT_EQ (tok->val.str.len, 15);
2222
2223 /* Verify that cpp_interpret_string works. */
2224 cpp_string dst_string;
2225 const enum cpp_ttype type = CPP_STRING;
2226 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2227 &dst_string, type);
2228 ASSERT_TRUE (result);
2229 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2230 free (const_cast <unsigned char *> (dst_string.text));
2231
2232 /* Verify ranges of individual characters. This no longer includes the
2233 quotes. */
2234 for (int i = 0; i <= 4; i++)
2235 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2236 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2237 for (int i = 6; i <= 9; i++)
2238 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2239
2240 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2241}
2242
2243/* Lex a string literal containing an octal-escaped character.
2244 Verify the substring location data after running cpp_interpret_string
2245 on it. */
2246
2247static void
2248test_lexer_string_locations_oct (const line_table_case &case_)
2249{
2250 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2251 and with a space in place of digit 6, to terminate the escaped
2252 octal code.
2253 ....................000000000.111111.11112222.2222223333333333444
2254 ....................123456789.012345.67890123.4567890123456789012 */
2255 const char *content = " \"01234\\065 789\" /* not a string */\n";
2256 lexer_test test (case_, content, NULL);
2257
2258 /* Verify that we get the expected token back, with the correct
2259 location information. */
2260 const cpp_token *tok = test.get_token ();
2261 ASSERT_EQ (tok->type, CPP_STRING);
2262 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2263
2264 /* Verify that cpp_interpret_string works. */
2265 cpp_string dst_string;
2266 const enum cpp_ttype type = CPP_STRING;
2267 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2268 &dst_string, type);
2269 ASSERT_TRUE (result);
2270 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2271 free (const_cast <unsigned char *> (dst_string.text));
2272
2273 /* Verify ranges of individual characters. This no longer includes the
2274 quotes. */
2275 for (int i = 0; i < 5; i++)
2276 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2277 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2278 for (int i = 6; i <= 9; i++)
2279 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2280
2281 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2282}
2283
2284/* Test of string literal containing letter escapes. */
2285
2286static void
2287test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2288{
2289 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2290 .....................000000000.1.11111.1.1.11222.22222223333333
2291 .....................123456789.0.12345.6.7.89012.34567890123456. */
2292 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2293 lexer_test test (case_, content, NULL);
2294
2295 /* Verify that we get the expected tokens back. */
2296 const cpp_token *tok = test.get_token ();
2297 ASSERT_EQ (tok->type, CPP_STRING);
2298 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2299
2300 /* Verify ranges of individual characters. */
2301 /* "\t". */
2302 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2303 0, 1, 10, 11);
2304 /* "foo". */
2305 for (int i = 1; i <= 3; i++)
2306 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2307 i, 1, 11 + i, 11 + i);
2308 /* "\\" and "\n". */
2309 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2310 4, 1, 15, 16);
2311 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2312 5, 1, 17, 18);
2313
2314 /* "bar". */
2315 for (int i = 6; i <= 8; i++)
2316 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2317 i, 1, 13 + i, 13 + i);
2318
2319 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 9);
2320}
2321
2322/* Another test of a string literal containing a letter escape.
2323 Based on string seen in
2324 printf ("%-%\n");
2325 in gcc.dg/format/c90-printf-1.c. */
2326
2327static void
2328test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2329{
2330 /* .....................000000000.1111.11.1111.22222222223.
2331 .....................123456789.0123.45.6789.01234567890. */
2332 const char *content = (" \"%-%\\n\" /* non-str */\n");
2333 lexer_test test (case_, content, NULL);
2334
2335 /* Verify that we get the expected tokens back. */
2336 const cpp_token *tok = test.get_token ();
2337 ASSERT_EQ (tok->type, CPP_STRING);
2338 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2339
2340 /* Verify ranges of individual characters. */
2341 /* "%-%". */
2342 for (int i = 0; i < 3; i++)
2343 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2344 i, 1, 10 + i, 10 + i);
2345 /* "\n". */
2346 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2347 3, 1, 13, 14);
2348
2349 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 4);
2350}
2351
2352/* Lex a string literal containing UCN 4 characters.
2353 Verify the substring location data after running cpp_interpret_string
2354 on it. */
2355
2356static void
2357test_lexer_string_locations_ucn4 (const line_table_case &case_)
2358{
2359 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2360 as UCN 4.
2361 ....................000000000.111111.111122.222222223.33333333344444
2362 ....................123456789.012345.678901.234567890.12345678901234 */
2363 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2364 lexer_test test (case_, content, NULL);
2365
2366 /* Verify that we get the expected token back, with the correct
2367 location information. */
2368 const cpp_token *tok = test.get_token ();
2369 ASSERT_EQ (tok->type, CPP_STRING);
2370 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2371
2372 /* Verify that cpp_interpret_string works.
2373 The string should be encoded in the execution character
2374 set. Assuming that that is UTF-8, we should have the following:
2375 ----------- ---- ----- ------- ----------------
2376 Byte offset Byte Octal Unicode Source Column(s)
2377 ----------- ---- ----- ------- ----------------
2378 0 0x30 '0' 10
2379 1 0x31 '1' 11
2380 2 0x32 '2' 12
2381 3 0x33 '3' 13
2382 4 0x34 '4' 14
2383 5 0xE2 \342 U+2174 15-20
2384 6 0x85 \205 (cont) 15-20
2385 7 0xB4 \264 (cont) 15-20
2386 8 0xE2 \342 U+2175 21-26
2387 9 0x85 \205 (cont) 21-26
2388 10 0xB5 \265 (cont) 21-26
2389 11 0x37 '7' 27
2390 12 0x38 '8' 28
2391 13 0x39 '9' 29
2392 ----------- ---- ----- ------- ---------------. */
2393
2394 cpp_string dst_string;
2395 const enum cpp_ttype type = CPP_STRING;
2396 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2397 &dst_string, type);
2398 ASSERT_TRUE (result);
2399 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2400 (const char *)dst_string.text);
2401 free (const_cast <unsigned char *> (dst_string.text));
2402
2403 /* Verify ranges of individual characters. This no longer includes the
2404 quotes.
2405 '01234'. */
2406 for (int i = 0; i <= 4; i++)
2407 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2408 /* U+2174. */
2409 for (int i = 5; i <= 7; i++)
2410 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2411 /* U+2175. */
2412 for (int i = 8; i <= 10; i++)
2413 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2414 /* '789'. */
2415 for (int i = 11; i <= 13; i++)
2416 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2417
2418 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 14);
2419}
2420
2421/* Lex a string literal containing UCN 8 characters.
2422 Verify the substring location data after running cpp_interpret_string
2423 on it. */
2424
2425static void
2426test_lexer_string_locations_ucn8 (const line_table_case &case_)
2427{
2428 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2429 ....................000000000.111111.1111222222.2222333333333.344444
2430 ....................123456789.012345.6789012345.6789012345678.901234 */
2431 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2432 lexer_test test (case_, content, NULL);
2433
2434 /* Verify that we get the expected token back, with the correct
2435 location information. */
2436 const cpp_token *tok = test.get_token ();
2437 ASSERT_EQ (tok->type, CPP_STRING);
2438 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2439 "\"01234\\U00002174\\U00002175789\"");
2440
2441 /* Verify that cpp_interpret_string works.
2442 The UTF-8 encoding of the string is identical to that from
2443 the ucn4 testcase above; the only difference is the column
2444 locations. */
2445 cpp_string dst_string;
2446 const enum cpp_ttype type = CPP_STRING;
2447 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2448 &dst_string, type);
2449 ASSERT_TRUE (result);
2450 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2451 (const char *)dst_string.text);
2452 free (const_cast <unsigned char *> (dst_string.text));
2453
2454 /* Verify ranges of individual characters. This no longer includes the
2455 quotes.
2456 '01234'. */
2457 for (int i = 0; i <= 4; i++)
2458 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2459 /* U+2174. */
2460 for (int i = 5; i <= 7; i++)
2461 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2462 /* U+2175. */
2463 for (int i = 8; i <= 10; i++)
2464 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2465 /* '789' at columns 35-37 */
2466 for (int i = 11; i <= 13; i++)
2467 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2468
2469 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 14);
2470}
2471
2472/* Fetch a big-endian 32-bit value and convert to host endianness. */
2473
2474static uint32_t
2475uint32_from_big_endian (const uint32_t *ptr_be_value)
2476{
2477 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2478 return (((uint32_t) buf[0] << 24)
2479 | ((uint32_t) buf[1] << 16)
2480 | ((uint32_t) buf[2] << 8)
2481 | (uint32_t) buf[3]);
2482}
2483
2484/* Lex a wide string literal and verify that attempts to read substring
2485 location data from it fail gracefully. */
2486
2487static void
2488test_lexer_string_locations_wide_string (const line_table_case &case_)
2489{
2490 /* Digits 0-9.
2491 ....................000000000.11111111112.22222222233333
2492 ....................123456789.01234567890.12345678901234 */
2493 const char *content = " L\"0123456789\" /* non-str */\n";
2494 lexer_test test (case_, content, NULL);
2495
2496 /* Verify that we get the expected token back, with the correct
2497 location information. */
2498 const cpp_token *tok = test.get_token ();
2499 ASSERT_EQ (tok->type, CPP_WSTRING);
2500 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2501
2502 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2503 cpp_string dst_string;
2504 const enum cpp_ttype type = CPP_WSTRING;
2505 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2506 &dst_string, type);
2507 ASSERT_TRUE (result);
2508 /* The cpp_reader defaults to big-endian with
2509 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2510 now be encoded as UTF-32BE. */
2511 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2512 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2513 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2514 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2515 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2516 free (const_cast <unsigned char *> (dst_string.text));
2517
2518 /* We don't yet support generating substring location information
2519 for L"" strings. */
2520 ASSERT_HAS_NO_SUBSTRING_RANGES
2521 (test, tok->src_loc, type,
2522 "execution character set != source character set");
2523}
2524
2525/* Fetch a big-endian 16-bit value and convert to host endianness. */
2526
2527static uint16_t
2528uint16_from_big_endian (const uint16_t *ptr_be_value)
2529{
2530 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2531 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2532}
2533
2534/* Lex a u"" string literal and verify that attempts to read substring
2535 location data from it fail gracefully. */
2536
2537static void
2538test_lexer_string_locations_string16 (const line_table_case &case_)
2539{
2540 /* Digits 0-9.
2541 ....................000000000.11111111112.22222222233333
2542 ....................123456789.01234567890.12345678901234 */
2543 const char *content = " u\"0123456789\" /* non-str */\n";
2544 lexer_test test (case_, content, NULL);
2545
2546 /* Verify that we get the expected token back, with the correct
2547 location information. */
2548 const cpp_token *tok = test.get_token ();
2549 ASSERT_EQ (tok->type, CPP_STRING16);
2550 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2551
2552 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2553 cpp_string dst_string;
2554 const enum cpp_ttype type = CPP_STRING16;
2555 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2556 &dst_string, type);
2557 ASSERT_TRUE (result);
2558
2559 /* The cpp_reader defaults to big-endian, so dst_string should
2560 now be encoded as UTF-16BE. */
2561 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2562 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2563 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2564 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2565 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2566 free (const_cast <unsigned char *> (dst_string.text));
2567
2568 /* We don't yet support generating substring location information
2569 for L"" strings. */
2570 ASSERT_HAS_NO_SUBSTRING_RANGES
2571 (test, tok->src_loc, type,
2572 "execution character set != source character set");
2573}
2574
2575/* Lex a U"" string literal and verify that attempts to read substring
2576 location data from it fail gracefully. */
2577
2578static void
2579test_lexer_string_locations_string32 (const line_table_case &case_)
2580{
2581 /* Digits 0-9.
2582 ....................000000000.11111111112.22222222233333
2583 ....................123456789.01234567890.12345678901234 */
2584 const char *content = " U\"0123456789\" /* non-str */\n";
2585 lexer_test test (case_, content, NULL);
2586
2587 /* Verify that we get the expected token back, with the correct
2588 location information. */
2589 const cpp_token *tok = test.get_token ();
2590 ASSERT_EQ (tok->type, CPP_STRING32);
2591 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2592
2593 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2594 cpp_string dst_string;
2595 const enum cpp_ttype type = CPP_STRING32;
2596 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2597 &dst_string, type);
2598 ASSERT_TRUE (result);
2599
2600 /* The cpp_reader defaults to big-endian, so dst_string should
2601 now be encoded as UTF-32BE. */
2602 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2603 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2604 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2605 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2606 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2607 free (const_cast <unsigned char *> (dst_string.text));
2608
2609 /* We don't yet support generating substring location information
2610 for L"" strings. */
2611 ASSERT_HAS_NO_SUBSTRING_RANGES
2612 (test, tok->src_loc, type,
2613 "execution character set != source character set");
2614}
2615
2616/* Lex a u8-string literal.
2617 Verify the substring location data after running cpp_interpret_string
2618 on it. */
2619
2620static void
2621test_lexer_string_locations_u8 (const line_table_case &case_)
2622{
2623 /* Digits 0-9.
2624 ....................000000000.11111111112.22222222233333
2625 ....................123456789.01234567890.12345678901234 */
2626 const char *content = " u8\"0123456789\" /* non-str */\n";
2627 lexer_test test (case_, content, NULL);
2628
2629 /* Verify that we get the expected token back, with the correct
2630 location information. */
2631 const cpp_token *tok = test.get_token ();
2632 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2633 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2634
2635 /* Verify that cpp_interpret_string works. */
2636 cpp_string dst_string;
2637 const enum cpp_ttype type = CPP_STRING;
2638 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2639 &dst_string, type);
2640 ASSERT_TRUE (result);
2641 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2642 free (const_cast <unsigned char *> (dst_string.text));
2643
2644 /* Verify ranges of individual characters. This no longer includes the
2645 quotes. */
2646 for (int i = 0; i <= 9; i++)
2647 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2648}
2649
2650/* Lex a string literal containing UTF-8 source characters.
2651 Verify the substring location data after running cpp_interpret_string
2652 on it. */
2653
2654static void
2655test_lexer_string_locations_utf8_source (const line_table_case &case_)
2656{
2657 /* This string literal is written out to the source file as UTF-8,
2658 and is of the form "before mojibake after", where "mojibake"
2659 is written as the following four unicode code points:
2660 U+6587 CJK UNIFIED IDEOGRAPH-6587
2661 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2662 U+5316 CJK UNIFIED IDEOGRAPH-5316
2663 U+3051 HIRAGANA LETTER KE.
2664 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2665 "before" and "after" are 1 byte per unicode character.
2666
2667 The numbering shown are "columns", which are *byte* numbers within
2668 the line, rather than unicode character numbers.
2669
2670 .................... 000000000.1111111.
2671 .................... 123456789.0123456. */
2672 const char *content = (" \"before "
2673 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2674 UTF-8: 0xE6 0x96 0x87
2675 C octal escaped UTF-8: \346\226\207
2676 "column" numbers: 17-19. */
2677 "\346\226\207"
2678
2679 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2680 UTF-8: 0xE5 0xAD 0x97
2681 C octal escaped UTF-8: \345\255\227
2682 "column" numbers: 20-22. */
2683 "\345\255\227"
2684
2685 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2686 UTF-8: 0xE5 0x8C 0x96
2687 C octal escaped UTF-8: \345\214\226
2688 "column" numbers: 23-25. */
2689 "\345\214\226"
2690
2691 /* U+3051 HIRAGANA LETTER KE
2692 UTF-8: 0xE3 0x81 0x91
2693 C octal escaped UTF-8: \343\201\221
2694 "column" numbers: 26-28. */
2695 "\343\201\221"
2696
2697 /* column numbers 29 onwards
2698 2333333.33334444444444
2699 9012345.67890123456789. */
2700 " after\" /* non-str */\n");
2701 lexer_test test (case_, content, NULL);
2702
2703 /* Verify that we get the expected token back, with the correct
2704 location information. */
2705 const cpp_token *tok = test.get_token ();
2706 ASSERT_EQ (tok->type, CPP_STRING);
2707 ASSERT_TOKEN_AS_TEXT_EQ
2708 (test.m_parser, tok,
2709 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2710
2711 /* Verify that cpp_interpret_string works. */
2712 cpp_string dst_string;
2713 const enum cpp_ttype type = CPP_STRING;
2714 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2715 &dst_string, type);
2716 ASSERT_TRUE (result);
2717 ASSERT_STREQ
2718 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2719 (const char *)dst_string.text);
2720 free (const_cast <unsigned char *> (dst_string.text));
2721
2722 /* Verify ranges of individual characters. This no longer includes the
2723 quotes.
2724 Assuming that both source and execution encodings are UTF-8, we have
2725 a run of 25 octets in each. */
2726 for (int i = 0; i < 25; i++)
2727 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2728
2729 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 25);
2730}
2731
2732/* Test of string literal concatenation. */
2733
2734static void
2735test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
2736{
2737 /* Digits 0-9.
2738 .....................000000000.111111.11112222222222
2739 .....................123456789.012345.67890123456789. */
2740 const char *content = (" \"01234\" /* non-str */\n"
2741 " \"56789\" /* non-str */\n");
2742 lexer_test test (case_, content, NULL);
2743
2744 location_t input_locs[2];
2745
2746 /* Verify that we get the expected tokens back. */
2747 auto_vec <cpp_string> input_strings;
2748 const cpp_token *tok_a = test.get_token ();
2749 ASSERT_EQ (tok_a->type, CPP_STRING);
2750 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
2751 input_strings.safe_push (tok_a->val.str);
2752 input_locs[0] = tok_a->src_loc;
2753
2754 const cpp_token *tok_b = test.get_token ();
2755 ASSERT_EQ (tok_b->type, CPP_STRING);
2756 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
2757 input_strings.safe_push (tok_b->val.str);
2758 input_locs[1] = tok_b->src_loc;
2759
2760 /* Verify that cpp_interpret_string works. */
2761 cpp_string dst_string;
2762 const enum cpp_ttype type = CPP_STRING;
2763 bool result = cpp_interpret_string (test.m_parser,
2764 input_strings.address (), 2,
2765 &dst_string, type);
2766 ASSERT_TRUE (result);
2767 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2768 free (const_cast <unsigned char *> (dst_string.text));
2769
2770 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2771 test.m_concats.record_string_concatenation (2, input_locs);
2772
2773 location_t initial_loc = input_locs[0];
2774
2775 for (int i = 0; i <= 4; i++)
2776 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2777 for (int i = 5; i <= 9; i++)
2778 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
2779
2780 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2781}
2782
2783/* Another test of string literal concatenation. */
2784
2785static void
2786test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
2787{
2788 /* Digits 0-9.
2789 .....................000000000.111.11111112222222
2790 .....................123456789.012.34567890123456. */
2791 const char *content = (" \"01\" /* non-str */\n"
2792 " \"23\" /* non-str */\n"
2793 " \"45\" /* non-str */\n"
2794 " \"67\" /* non-str */\n"
2795 " \"89\" /* non-str */\n");
2796 lexer_test test (case_, content, NULL);
2797
2798 auto_vec <cpp_string> input_strings;
2799 location_t input_locs[5];
2800
2801 /* Verify that we get the expected tokens back. */
2802 for (int i = 0; i < 5; i++)
2803 {
2804 const cpp_token *tok = test.get_token ();
2805 ASSERT_EQ (tok->type, CPP_STRING);
2806 input_strings.safe_push (tok->val.str);
2807 input_locs[i] = tok->src_loc;
2808 }
2809
2810 /* Verify that cpp_interpret_string works. */
2811 cpp_string dst_string;
2812 const enum cpp_ttype type = CPP_STRING;
2813 bool result = cpp_interpret_string (test.m_parser,
2814 input_strings.address (), 5,
2815 &dst_string, type);
2816 ASSERT_TRUE (result);
2817 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2818 free (const_cast <unsigned char *> (dst_string.text));
2819
2820 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2821 test.m_concats.record_string_concatenation (5, input_locs);
2822
2823 location_t initial_loc = input_locs[0];
2824
2825 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
2826 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
2827 and expect get_source_range_for_substring to fail.
2828 However, for a string concatenation test, we can have a case
2829 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
2830 but subsequent strings can be after it.
2831 Attempting to detect this within assert_char_at_range
2832 would overcomplicate the logic for the common test cases, so
2833 we detect it here. */
2834 if (should_have_column_data_p (input_locs[0])
2835 && !should_have_column_data_p (input_locs[4]))
2836 {
2837 /* Verify that get_source_range_for_substring gracefully rejects
2838 this case. */
2839 source_range actual_range;
2840 const char *err
65e736c0
DM
2841 = get_source_range_for_char (test.m_parser, &test.m_concats,
2842 initial_loc, type, 0, &actual_range);
88fa5555
DM
2843 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
2844 return;
2845 }
2846
2847 for (int i = 0; i < 5; i++)
2848 for (int j = 0; j < 2; j++)
2849 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
2850 i + 1, 10 + j, 10 + j);
2851
2852 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2853}
2854
2855/* Another test of string literal concatenation, this time combined with
2856 various kinds of escaped characters. */
2857
2858static void
2859test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
2860{
2861 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
2862 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
2863 const char *content
2864 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
2865 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
2866 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
2867 lexer_test test (case_, content, NULL);
2868
2869 auto_vec <cpp_string> input_strings;
2870 location_t input_locs[4];
2871
2872 /* Verify that we get the expected tokens back. */
2873 for (int i = 0; i < 4; i++)
2874 {
2875 const cpp_token *tok = test.get_token ();
2876 ASSERT_EQ (tok->type, CPP_STRING);
2877 input_strings.safe_push (tok->val.str);
2878 input_locs[i] = tok->src_loc;
2879 }
2880
2881 /* Verify that cpp_interpret_string works. */
2882 cpp_string dst_string;
2883 const enum cpp_ttype type = CPP_STRING;
2884 bool result = cpp_interpret_string (test.m_parser,
2885 input_strings.address (), 4,
2886 &dst_string, type);
2887 ASSERT_TRUE (result);
2888 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2889 free (const_cast <unsigned char *> (dst_string.text));
2890
2891 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2892 test.m_concats.record_string_concatenation (4, input_locs);
2893
2894 location_t initial_loc = input_locs[0];
2895
2896 for (int i = 0; i <= 4; i++)
2897 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2898 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
2899 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
2900 for (int i = 7; i <= 9; i++)
2901 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
2902
2903 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2904}
2905
2906/* Test of string literal in a macro. */
2907
2908static void
2909test_lexer_string_locations_macro (const line_table_case &case_)
2910{
2911 /* Digits 0-9.
2912 .....................0000000001111111111.22222222223.
2913 .....................1234567890123456789.01234567890. */
2914 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
2915 " MACRO");
2916 lexer_test test (case_, content, NULL);
2917
2918 /* Verify that we get the expected tokens back. */
2919 const cpp_token *tok = test.get_token ();
2920 ASSERT_EQ (tok->type, CPP_PADDING);
2921
2922 tok = test.get_token ();
2923 ASSERT_EQ (tok->type, CPP_STRING);
2924 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2925
2926 /* Verify ranges of individual characters. We ought to
2927 see columns within the macro definition. */
2928 for (int i = 0; i <= 9; i++)
2929 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2930 i, 1, 20 + i, 20 + i);
2931
2932 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2933
2934 tok = test.get_token ();
2935 ASSERT_EQ (tok->type, CPP_PADDING);
2936}
2937
2938/* Test of stringification of a macro argument. */
2939
2940static void
2941test_lexer_string_locations_stringified_macro_argument
2942 (const line_table_case &case_)
2943{
2944 /* .....................000000000111111111122222222223.
2945 .....................123456789012345678901234567890. */
2946 const char *content = ("#define MACRO(X) #X /* non-str */\n"
2947 "MACRO(foo)\n");
2948 lexer_test test (case_, content, NULL);
2949
2950 /* Verify that we get the expected token back. */
2951 const cpp_token *tok = test.get_token ();
2952 ASSERT_EQ (tok->type, CPP_PADDING);
2953
2954 tok = test.get_token ();
2955 ASSERT_EQ (tok->type, CPP_STRING);
2956 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
2957
2958 /* We don't support getting the location of a stringified macro
2959 argument. Verify that it fails gracefully. */
2960 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
2961 "cpp_interpret_string_1 failed");
2962
2963 tok = test.get_token ();
2964 ASSERT_EQ (tok->type, CPP_PADDING);
2965
2966 tok = test.get_token ();
2967 ASSERT_EQ (tok->type, CPP_PADDING);
2968}
2969
2970/* Ensure that we are fail gracefully if something attempts to pass
2971 in a location that isn't a string literal token. Seen on this code:
2972
2973 const char a[] = " %d ";
2974 __builtin_printf (a, 0.5);
2975 ^
2976
2977 when c-format.c erroneously used the indicated one-character
2978 location as the format string location, leading to a read past the
2979 end of a string buffer in cpp_interpret_string_1. */
2980
2981static void
2982test_lexer_string_locations_non_string (const line_table_case &case_)
2983{
2984 /* .....................000000000111111111122222222223.
2985 .....................123456789012345678901234567890. */
2986 const char *content = (" a\n");
2987 lexer_test test (case_, content, NULL);
2988
2989 /* Verify that we get the expected token back. */
2990 const cpp_token *tok = test.get_token ();
2991 ASSERT_EQ (tok->type, CPP_NAME);
2992 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
2993
2994 /* At this point, libcpp is attempting to interpret the name as a
2995 string literal, despite it not starting with a quote. We don't detect
2996 that, but we should at least fail gracefully. */
2997 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
2998 "cpp_interpret_string_1 failed");
2999}
3000
3001/* Ensure that we can read substring information for a token which
3002 starts in one linemap and ends in another . Adapted from
3003 gcc.dg/cpp/pr69985.c. */
3004
3005static void
3006test_lexer_string_locations_long_line (const line_table_case &case_)
3007{
3008 /* .....................000000.000111111111
3009 .....................123456.789012346789. */
3010 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3011 " \"0123456789012345678901234567890123456789"
3012 "0123456789012345678901234567890123456789"
3013 "0123456789012345678901234567890123456789"
3014 "0123456789\"\n");
3015
3016 lexer_test test (case_, content, NULL);
3017
3018 /* Verify that we get the expected token back. */
3019 const cpp_token *tok = test.get_token ();
3020 ASSERT_EQ (tok->type, CPP_STRING);
3021
3022 if (!should_have_column_data_p (line_table->highest_location))
3023 return;
3024
3025 /* Verify ranges of individual characters. */
3026 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 130);
3027 for (int i = 0; i < 130; i++)
3028 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3029 i, 2, 7 + i, 7 + i);
3030}
3031
3032/* Test of lexing char constants. */
3033
3034static void
3035test_lexer_char_constants (const line_table_case &case_)
3036{
3037 /* Various char constants.
3038 .....................0000000001111111111.22222222223.
3039 .....................1234567890123456789.01234567890. */
3040 const char *content = (" 'a'\n"
3041 " u'a'\n"
3042 " U'a'\n"
3043 " L'a'\n"
3044 " 'abc'\n");
3045 lexer_test test (case_, content, NULL);
3046
3047 /* Verify that we get the expected tokens back. */
3048 /* 'a'. */
3049 const cpp_token *tok = test.get_token ();
3050 ASSERT_EQ (tok->type, CPP_CHAR);
3051 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3052
3053 unsigned int chars_seen;
3054 int unsignedp;
3055 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3056 &chars_seen, &unsignedp);
3057 ASSERT_EQ (cc, 'a');
3058 ASSERT_EQ (chars_seen, 1);
3059
3060 /* u'a'. */
3061 tok = test.get_token ();
3062 ASSERT_EQ (tok->type, CPP_CHAR16);
3063 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3064
3065 /* U'a'. */
3066 tok = test.get_token ();
3067 ASSERT_EQ (tok->type, CPP_CHAR32);
3068 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3069
3070 /* L'a'. */
3071 tok = test.get_token ();
3072 ASSERT_EQ (tok->type, CPP_WCHAR);
3073 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3074
3075 /* 'abc' (c-char-sequence). */
3076 tok = test.get_token ();
3077 ASSERT_EQ (tok->type, CPP_CHAR);
3078 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3079}
741d3be5
DM
3080/* A table of interesting location_t values, giving one axis of our test
3081 matrix. */
3082
3083static const location_t boundary_locations[] = {
3084 /* Zero means "don't override the default values for a new line_table". */
3085 0,
3086
3087 /* An arbitrary non-zero value that isn't close to one of
3088 the boundary values below. */
3089 0x10000,
3090
3091 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3092 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3093 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3094 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3095 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3096 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3097
3098 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3099 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3100 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3101 LINE_MAP_MAX_LOCATION_WITH_COLS,
3102 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3103 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3104};
3105
d9b950dd
DM
3106/* Run all of the selftests within this file. */
3107
3108void
3109input_c_tests ()
3110{
741d3be5 3111 test_should_have_column_data_p ();
d9b950dd
DM
3112 test_unknown_location ();
3113 test_builtins ();
741d3be5
DM
3114
3115 /* As noted above in the description of struct line_table_case,
3116 we want to explore a test matrix of interesting line_table
3117 situations, running various selftests for each case within the
3118 matrix. */
3119
3120 /* Run all tests with:
3121 (a) line_table->default_range_bits == 0, and
3122 (b) line_table->default_range_bits == 5. */
3123 int num_cases_tested = 0;
3124 for (int default_range_bits = 0; default_range_bits <= 5;
3125 default_range_bits += 5)
3126 {
3127 /* ...and use each of the "interesting" location values as
3128 the starting location within line_table. */
3129 const int num_boundary_locations
3130 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3131 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3132 {
3133 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3134
3135 /* Run all tests for the given case within the test matrix. */
3136 test_accessing_ordinary_linemaps (c);
3137 test_lexer (c);
88fa5555
DM
3138 test_lexer_string_locations_simple (c);
3139 test_lexer_string_locations_ebcdic (c);
3140 test_lexer_string_locations_hex (c);
3141 test_lexer_string_locations_oct (c);
3142 test_lexer_string_locations_letter_escape_1 (c);
3143 test_lexer_string_locations_letter_escape_2 (c);
3144 test_lexer_string_locations_ucn4 (c);
3145 test_lexer_string_locations_ucn8 (c);
3146 test_lexer_string_locations_wide_string (c);
3147 test_lexer_string_locations_string16 (c);
3148 test_lexer_string_locations_string32 (c);
3149 test_lexer_string_locations_u8 (c);
3150 test_lexer_string_locations_utf8_source (c);
3151 test_lexer_string_locations_concatenation_1 (c);
3152 test_lexer_string_locations_concatenation_2 (c);
3153 test_lexer_string_locations_concatenation_3 (c);
3154 test_lexer_string_locations_macro (c);
3155 test_lexer_string_locations_stringified_macro_argument (c);
3156 test_lexer_string_locations_non_string (c);
3157 test_lexer_string_locations_long_line (c);
3158 test_lexer_char_constants (c);
741d3be5
DM
3159
3160 num_cases_tested++;
3161 }
3162 }
3163
3164 /* Verify that we fully covered the test matrix. */
3165 ASSERT_EQ (num_cases_tested, 2 * 12);
3166
d9b950dd
DM
3167 test_reading_source_line ();
3168}
3169
3170} // namespace selftest
3171
3172#endif /* CHECKING_P */