]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/input.c
Fix ICE in lto_symtab_merge_symbols_1 (PR lto/88004).
[thirdparty/gcc.git] / gcc / input.c
CommitLineData
447924ef 1/* Data and functions related to line maps and input files.
85ec4feb 2 Copyright (C) 2004-2018 Free Software Foundation, Inc.
447924ef
JM
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "intl.h"
ba4ad400 24#include "diagnostic-core.h"
d9b950dd 25#include "selftest.h"
741d3be5 26#include "cpplib.h"
7ecc3eb9 27
a7d79e5c
DM
28#ifndef HAVE_ICONV
29#define HAVE_ICONV 0
30#endif
31
7ecc3eb9
DS
32/* This is a cache used by get_next_line to store the content of a
33 file to be searched for file lines. */
34struct fcache
35{
36 /* These are information used to store a line boundary. */
37 struct line_info
38 {
39 /* The line number. It starts from 1. */
40 size_t line_num;
41
42 /* The position (byte count) of the beginning of the line,
43 relative to the file data pointer. This starts at zero. */
44 size_t start_pos;
45
46 /* The position (byte count) of the last byte of the line. This
47 normally points to the '\n' character, or to one byte after the
48 last byte of the file, if the file doesn't contain a '\n'
49 character. */
50 size_t end_pos;
51
52 line_info (size_t l, size_t s, size_t e)
53 : line_num (l), start_pos (s), end_pos (e)
54 {}
55
56 line_info ()
57 :line_num (0), start_pos (0), end_pos (0)
58 {}
59 };
60
61 /* The number of time this file has been accessed. This is used
62 to designate which file cache to evict from the cache
63 array. */
64 unsigned use_count;
65
f5ea989d
DM
66 /* The file_path is the key for identifying a particular file in
67 the cache.
68 For libcpp-using code, the underlying buffer for this field is
69 owned by the corresponding _cpp_file within the cpp_reader. */
7ecc3eb9
DS
70 const char *file_path;
71
72 FILE *fp;
73
74 /* This points to the content of the file that we've read so
75 far. */
76 char *data;
77
78 /* The size of the DATA array above.*/
79 size_t size;
80
81 /* The number of bytes read from the underlying file so far. This
82 must be less (or equal) than SIZE above. */
83 size_t nb_read;
84
85 /* The index of the beginning of the current line. */
86 size_t line_start_idx;
87
88 /* The number of the previous line read. This starts at 1. Zero
89 means we've read no line so far. */
90 size_t line_num;
91
92 /* This is the total number of lines of the current file. At the
93 moment, we try to get this information from the line map
94 subsystem. Note that this is just a hint. When using the C++
95 front-end, this hint is correct because the input file is then
96 completely tokenized before parsing starts; so the line map knows
97 the number of lines before compilation really starts. For e.g,
98 the C front-end, it can happen that we start emitting diagnostics
99 before the line map has seen the end of the file. */
100 size_t total_lines;
101
c65236d6
DM
102 /* Could this file be missing a trailing newline on its final line?
103 Initially true (to cope with empty files), set to true/false
104 as each line is read. */
105 bool missing_trailing_newline;
106
7ecc3eb9
DS
107 /* This is a record of the beginning and end of the lines we've seen
108 while reading the file. This is useful to avoid walking the data
109 from the beginning when we are asked to read a line that is
110 before LINE_START_IDX above. Note that the maximum size of this
111 record is fcache_line_record_size, so that the memory consumption
112 doesn't explode. We thus scale total_lines down to
113 fcache_line_record_size. */
114 vec<line_info, va_heap> line_record;
115
116 fcache ();
117 ~fcache ();
118};
447924ef
JM
119
120/* Current position in real source file. */
121
3edf64aa 122location_t input_location = UNKNOWN_LOCATION;
447924ef
JM
123
124struct line_maps *line_table;
125
f87e22c5
DM
126/* A stashed copy of "line_table" for use by selftest::line_table_test.
127 This needs to be a global so that it can be a GC root, and thus
128 prevent the stashed copy from being garbage-collected if the GC runs
129 during a line_table_test. */
130
131struct line_maps *saved_line_table;
132
7ecc3eb9
DS
133static fcache *fcache_tab;
134static const size_t fcache_tab_size = 16;
135static const size_t fcache_buffer_size = 4 * 1024;
136static const size_t fcache_line_record_size = 100;
137
84756fd4
DS
138/* Expand the source location LOC into a human readable location. If
139 LOC resolves to a builtin location, the file name of the readable
7eb918cc
DS
140 location is set to the string "<built-in>". If EXPANSION_POINT_P is
141 TRUE and LOC is virtual, then it is resolved to the expansion
142 point of the involved macro. Otherwise, it is resolved to the
c4ca1a09
DS
143 spelling location of the token.
144
145 When resolving to the spelling location of the token, if the
146 resulting location is for a built-in location (that is, it has no
147 associated line/column) in the context of a macro expansion, the
148 returned location is the first one (while unwinding the macro
149 location towards its expansion point) that is in real source
c471c6ed
DM
150 code.
151
152 ASPECT controls which part of the location to use. */
7eb918cc
DS
153
154static expanded_location
620e594b 155expand_location_1 (location_t loc,
c471c6ed
DM
156 bool expansion_point_p,
157 enum location_aspect aspect)
447924ef
JM
158{
159 expanded_location xloc;
0e50b624 160 const line_map_ordinary *map;
c4ca1a09 161 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
5368224f
DC
162 tree block = NULL;
163
164 if (IS_ADHOC_LOC (loc))
165 {
166 block = LOCATION_BLOCK (loc);
167 loc = LOCATION_LOCUS (loc);
168 }
c4ca1a09
DS
169
170 memset (&xloc, 0, sizeof (xloc));
84756fd4 171
c4ca1a09
DS
172 if (loc >= RESERVED_LOCATION_COUNT)
173 {
174 if (!expansion_point_p)
175 {
176 /* We want to resolve LOC to its spelling location.
177
178 But if that spelling location is a reserved location that
179 appears in the context of a macro expansion (like for a
180 location for a built-in token), let's consider the first
181 location (toward the expansion point) that is not reserved;
182 that is, the first location that is in real source code. */
183 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
0e50b624 184 loc, NULL);
c4ca1a09
DS
185 lrk = LRK_SPELLING_LOCATION;
186 }
c471c6ed
DM
187 loc = linemap_resolve_location (line_table, loc, lrk, &map);
188
189 /* loc is now either in an ordinary map, or is a reserved location.
190 If it is a compound location, the caret is in a spelling location,
191 but the start/finish might still be a virtual location.
192 Depending of what the caller asked for, we may need to recurse
193 one level in order to resolve any virtual locations in the
194 end-points. */
195 switch (aspect)
196 {
197 default:
198 gcc_unreachable ();
199 /* Fall through. */
200 case LOCATION_ASPECT_CARET:
201 break;
202 case LOCATION_ASPECT_START:
203 {
620e594b 204 location_t start = get_start (loc);
c471c6ed
DM
205 if (start != loc)
206 return expand_location_1 (start, expansion_point_p, aspect);
207 }
208 break;
209 case LOCATION_ASPECT_FINISH:
210 {
620e594b 211 location_t finish = get_finish (loc);
c471c6ed
DM
212 if (finish != loc)
213 return expand_location_1 (finish, expansion_point_p, aspect);
214 }
215 break;
216 }
c4ca1a09
DS
217 xloc = linemap_expand_location (line_table, map, loc);
218 }
84756fd4 219
5368224f 220 xloc.data = block;
447924ef 221 if (loc <= BUILTINS_LOCATION)
84756fd4
DS
222 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
223
447924ef
JM
224 return xloc;
225}
64a1a422 226
7ecc3eb9
DS
227/* Initialize the set of cache used for files accessed by caret
228 diagnostic. */
229
230static void
231diagnostic_file_cache_init (void)
232{
233 if (fcache_tab == NULL)
234 fcache_tab = new fcache[fcache_tab_size];
235}
236
592f32fa 237/* Free the resources used by the set of cache used for files accessed
7ecc3eb9
DS
238 by caret diagnostic. */
239
240void
241diagnostic_file_cache_fini (void)
242{
243 if (fcache_tab)
244 {
245 delete [] (fcache_tab);
246 fcache_tab = NULL;
247 }
248}
249
250/* Return the total lines number that have been read so far by the
251 line map (in the preprocessor) so far. For languages like C++ that
252 entirely preprocess the input file before starting to parse, this
253 equals the actual number of lines of the file. */
254
255static size_t
256total_lines_num (const char *file_path)
257{
258 size_t r = 0;
620e594b 259 location_t l = 0;
7ecc3eb9
DS
260 if (linemap_get_file_highest_location (line_table, file_path, &l))
261 {
262 gcc_assert (l >= RESERVED_LOCATION_COUNT);
263 expanded_location xloc = expand_location (l);
264 r = xloc.line;
265 }
266 return r;
267}
268
269/* Lookup the cache used for the content of a given file accessed by
270 caret diagnostic. Return the found cached file, or NULL if no
271 cached file was found. */
272
273static fcache*
274lookup_file_in_cache_tab (const char *file_path)
275{
276 if (file_path == NULL)
277 return NULL;
278
279 diagnostic_file_cache_init ();
280
281 /* This will contain the found cached file. */
282 fcache *r = NULL;
283 for (unsigned i = 0; i < fcache_tab_size; ++i)
284 {
285 fcache *c = &fcache_tab[i];
286 if (c->file_path && !strcmp (c->file_path, file_path))
287 {
288 ++c->use_count;
289 r = c;
290 }
291 }
292
293 if (r)
294 ++r->use_count;
295
296 return r;
297}
298
f89b03b6
DM
299/* Purge any mention of FILENAME from the cache of files used for
300 printing source code. For use in selftests when working
301 with tempfiles. */
302
303void
304diagnostics_file_cache_forcibly_evict_file (const char *file_path)
305{
306 gcc_assert (file_path);
307
308 fcache *r = lookup_file_in_cache_tab (file_path);
309 if (!r)
310 /* Not found. */
311 return;
312
313 r->file_path = NULL;
314 if (r->fp)
315 fclose (r->fp);
316 r->fp = NULL;
317 r->nb_read = 0;
318 r->line_start_idx = 0;
319 r->line_num = 0;
320 r->line_record.truncate (0);
321 r->use_count = 0;
322 r->total_lines = 0;
c65236d6 323 r->missing_trailing_newline = true;
f89b03b6
DM
324}
325
7ecc3eb9
DS
326/* Return the file cache that has been less used, recently, or the
327 first empty one. If HIGHEST_USE_COUNT is non-null,
328 *HIGHEST_USE_COUNT is set to the highest use count of the entries
329 in the cache table. */
330
331static fcache*
332evicted_cache_tab_entry (unsigned *highest_use_count)
333{
334 diagnostic_file_cache_init ();
335
336 fcache *to_evict = &fcache_tab[0];
337 unsigned huc = to_evict->use_count;
338 for (unsigned i = 1; i < fcache_tab_size; ++i)
339 {
340 fcache *c = &fcache_tab[i];
341 bool c_is_empty = (c->file_path == NULL);
342
343 if (c->use_count < to_evict->use_count
344 || (to_evict->file_path && c_is_empty))
345 /* We evict C because it's either an entry with a lower use
346 count or one that is empty. */
347 to_evict = c;
348
349 if (huc < c->use_count)
350 huc = c->use_count;
351
352 if (c_is_empty)
353 /* We've reached the end of the cache; subsequent elements are
354 all empty. */
355 break;
356 }
357
358 if (highest_use_count)
359 *highest_use_count = huc;
360
361 return to_evict;
362}
363
364/* Create the cache used for the content of a given file to be
365 accessed by caret diagnostic. This cache is added to an array of
366 cache and can be retrieved by lookup_file_in_cache_tab. This
367 function returns the created cache. Note that only the last
368 fcache_tab_size files are cached. */
369
370static fcache*
371add_file_to_cache_tab (const char *file_path)
372{
373
374 FILE *fp = fopen (file_path, "r");
317363b4
DS
375 if (fp == NULL)
376 return NULL;
7ecc3eb9
DS
377
378 unsigned highest_use_count = 0;
379 fcache *r = evicted_cache_tab_entry (&highest_use_count);
380 r->file_path = file_path;
381 if (r->fp)
382 fclose (r->fp);
383 r->fp = fp;
384 r->nb_read = 0;
385 r->line_start_idx = 0;
386 r->line_num = 0;
387 r->line_record.truncate (0);
388 /* Ensure that this cache entry doesn't get evicted next time
389 add_file_to_cache_tab is called. */
390 r->use_count = ++highest_use_count;
391 r->total_lines = total_lines_num (file_path);
c65236d6 392 r->missing_trailing_newline = true;
7ecc3eb9
DS
393
394 return r;
395}
396
397/* Lookup the cache used for the content of a given file accessed by
398 caret diagnostic. If no cached file was found, create a new cache
399 for this file, add it to the array of cached file and return
400 it. */
401
402static fcache*
403lookup_or_add_file_to_cache_tab (const char *file_path)
404{
405 fcache *r = lookup_file_in_cache_tab (file_path);
406 if (r == NULL)
407 r = add_file_to_cache_tab (file_path);
408 return r;
409}
410
411/* Default constructor for a cache of file used by caret
412 diagnostic. */
413
414fcache::fcache ()
415: use_count (0), file_path (NULL), fp (NULL), data (0),
416 size (0), nb_read (0), line_start_idx (0), line_num (0),
c65236d6 417 total_lines (0), missing_trailing_newline (true)
7ecc3eb9
DS
418{
419 line_record.create (0);
420}
421
422/* Destructor for a cache of file used by caret diagnostic. */
423
424fcache::~fcache ()
425{
426 if (fp)
427 {
428 fclose (fp);
429 fp = NULL;
430 }
431 if (data)
432 {
433 XDELETEVEC (data);
434 data = 0;
435 }
436 line_record.release ();
437}
438
439/* Returns TRUE iff the cache would need to be filled with data coming
440 from the file. That is, either the cache is empty or full or the
441 current line is empty. Note that if the cache is full, it would
442 need to be extended and filled again. */
443
444static bool
445needs_read (fcache *c)
446{
447 return (c->nb_read == 0
448 || c->nb_read == c->size
449 || (c->line_start_idx >= c->nb_read - 1));
450}
451
452/* Return TRUE iff the cache is full and thus needs to be
453 extended. */
454
455static bool
456needs_grow (fcache *c)
457{
458 return c->nb_read == c->size;
459}
460
461/* Grow the cache if it needs to be extended. */
462
463static void
464maybe_grow (fcache *c)
9fec0042 465{
7ecc3eb9
DS
466 if (!needs_grow (c))
467 return;
468
469 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
1adae327 470 c->data = XRESIZEVEC (char, c->data, size);
7ecc3eb9
DS
471 c->size = size;
472}
9fec0042 473
7ecc3eb9
DS
474/* Read more data into the cache. Extends the cache if need be.
475 Returns TRUE iff new data could be read. */
476
477static bool
478read_data (fcache *c)
479{
480 if (feof (c->fp) || ferror (c->fp))
481 return false;
482
483 maybe_grow (c);
484
485 char * from = c->data + c->nb_read;
486 size_t to_read = c->size - c->nb_read;
487 size_t nb_read = fread (from, 1, to_read, c->fp);
488
489 if (ferror (c->fp))
490 return false;
491
492 c->nb_read += nb_read;
493 return !!nb_read;
494}
495
496/* Read new data iff the cache needs to be filled with more data
497 coming from the file FP. Return TRUE iff the cache was filled with
498 mode data. */
499
500static bool
501maybe_read_data (fcache *c)
502{
503 if (!needs_read (c))
504 return false;
505 return read_data (c);
506}
507
508/* Read a new line from file FP, using C as a cache for the data
509 coming from the file. Upon successful completion, *LINE is set to
1adae327
BE
510 the beginning of the line found. *LINE points directly in the
511 line cache and is only valid until the next call of get_next_line.
7ecc3eb9
DS
512 *LINE_LEN is set to the length of the line. Note that the line
513 does not contain any terminal delimiter. This function returns
514 true if some data was read or process from the cache, false
1adae327
BE
515 otherwise. Note that subsequent calls to get_next_line might
516 make the content of *LINE invalid. */
7ecc3eb9
DS
517
518static bool
519get_next_line (fcache *c, char **line, ssize_t *line_len)
520{
521 /* Fill the cache with data to process. */
522 maybe_read_data (c);
523
524 size_t remaining_size = c->nb_read - c->line_start_idx;
525 if (remaining_size == 0)
526 /* There is no more data to process. */
527 return false;
528
529 char *line_start = c->data + c->line_start_idx;
530
531 char *next_line_start = NULL;
532 size_t len = 0;
533 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
534 if (line_end == NULL)
9fec0042 535 {
7ecc3eb9
DS
536 /* We haven't found the end-of-line delimiter in the cache.
537 Fill the cache with more data from the file and look for the
538 '\n'. */
539 while (maybe_read_data (c))
540 {
541 line_start = c->data + c->line_start_idx;
542 remaining_size = c->nb_read - c->line_start_idx;
543 line_end = (char *) memchr (line_start, '\n', remaining_size);
544 if (line_end != NULL)
545 {
546 next_line_start = line_end + 1;
547 break;
548 }
549 }
550 if (line_end == NULL)
c65236d6
DM
551 {
552 /* We've loadded all the file into the cache and still no
553 '\n'. Let's say the line ends up at one byte passed the
554 end of the file. This is to stay consistent with the case
555 of when the line ends up with a '\n' and line_end points to
556 that terminal '\n'. That consistency is useful below in
557 the len calculation. */
558 line_end = c->data + c->nb_read ;
559 c->missing_trailing_newline = true;
560 }
561 else
562 c->missing_trailing_newline = false;
9fec0042 563 }
7ecc3eb9 564 else
c65236d6
DM
565 {
566 next_line_start = line_end + 1;
567 c->missing_trailing_newline = false;
568 }
7ecc3eb9
DS
569
570 if (ferror (c->fp))
1adae327 571 return false;
7ecc3eb9
DS
572
573 /* At this point, we've found the end of the of line. It either
574 points to the '\n' or to one byte after the last byte of the
575 file. */
576 gcc_assert (line_end != NULL);
9fec0042 577
7ecc3eb9
DS
578 len = line_end - line_start;
579
580 if (c->line_start_idx < c->nb_read)
581 *line = line_start;
582
583 ++c->line_num;
584
585 /* Before we update our line record, make sure the hint about the
586 total number of lines of the file is correct. If it's not, then
587 we give up recording line boundaries from now on. */
588 bool update_line_record = true;
589 if (c->line_num > c->total_lines)
590 update_line_record = false;
591
592 /* Now update our line record so that re-reading lines from the
593 before c->line_start_idx is faster. */
594 if (update_line_record
595 && c->line_record.length () < fcache_line_record_size)
596 {
597 /* If the file lines fits in the line record, we just record all
598 its lines ...*/
599 if (c->total_lines <= fcache_line_record_size
600 && c->line_num > c->line_record.length ())
601 c->line_record.safe_push (fcache::line_info (c->line_num,
602 c->line_start_idx,
603 line_end - c->data));
604 else if (c->total_lines > fcache_line_record_size)
605 {
606 /* ... otherwise, we just scale total_lines down to
607 (fcache_line_record_size lines. */
608 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
609 if (c->line_record.length () == 0
610 || n >= c->line_record.length ())
611 c->line_record.safe_push (fcache::line_info (c->line_num,
612 c->line_start_idx,
613 line_end - c->data));
614 }
615 }
616
617 /* Update c->line_start_idx so that it points to the next line to be
618 read. */
619 if (next_line_start)
620 c->line_start_idx = next_line_start - c->data;
621 else
622 /* We didn't find any terminal '\n'. Let's consider that the end
623 of line is the end of the data in the cache. The next
624 invocation of get_next_line will either read more data from the
625 underlying file or return false early because we've reached the
626 end of the file. */
627 c->line_start_idx = c->nb_read;
628
629 *line_len = len;
630
631 return true;
632}
633
7ecc3eb9
DS
634/* Consume the next bytes coming from the cache (or from its
635 underlying file if there are remaining unread bytes in the file)
636 until we reach the next end-of-line (or end-of-file). There is no
637 copying from the cache involved. Return TRUE upon successful
638 completion. */
639
640static bool
641goto_next_line (fcache *cache)
642{
643 char *l;
644 ssize_t len;
645
646 return get_next_line (cache, &l, &len);
647}
648
649/* Read an arbitrary line number LINE_NUM from the file cached in C.
1adae327
BE
650 If the line was read successfully, *LINE points to the beginning
651 of the line in the file cache and *LINE_LEN is the length of the
652 line. *LINE is not nul-terminated, but may contain zero bytes.
653 *LINE is only valid until the next call of read_line_num.
7ecc3eb9
DS
654 This function returns bool if a line was read. */
655
656static bool
657read_line_num (fcache *c, size_t line_num,
1adae327 658 char **line, ssize_t *line_len)
7ecc3eb9
DS
659{
660 gcc_assert (line_num > 0);
661
662 if (line_num <= c->line_num)
9789a912 663 {
7ecc3eb9
DS
664 /* We've been asked to read lines that are before c->line_num.
665 So lets use our line record (if it's not empty) to try to
666 avoid re-reading the file from the beginning again. */
7f4d640c 667
7ecc3eb9 668 if (c->line_record.is_empty ())
9fec0042 669 {
7ecc3eb9
DS
670 c->line_start_idx = 0;
671 c->line_num = 0;
672 }
673 else
674 {
675 fcache::line_info *i = NULL;
676 if (c->total_lines <= fcache_line_record_size)
677 {
678 /* In languages where the input file is not totally
679 preprocessed up front, the c->total_lines hint
680 can be smaller than the number of lines of the
681 file. In that case, only the first
682 c->total_lines have been recorded.
683
684 Otherwise, the first c->total_lines we've read have
685 their start/end recorded here. */
686 i = (line_num <= c->total_lines)
687 ? &c->line_record[line_num - 1]
688 : &c->line_record[c->total_lines - 1];
689 gcc_assert (i->line_num <= line_num);
690 }
691 else
692 {
693 /* So the file had more lines than our line record
694 size. Thus the number of lines we've recorded has
695 been scaled down to fcache_line_reacord_size. Let's
696 pick the start/end of the recorded line that is
697 closest to line_num. */
698 size_t n = (line_num <= c->total_lines)
699 ? line_num * fcache_line_record_size / c->total_lines
700 : c ->line_record.length () - 1;
701 if (n < c->line_record.length ())
702 {
703 i = &c->line_record[n];
704 gcc_assert (i->line_num <= line_num);
705 }
706 }
707
708 if (i && i->line_num == line_num)
709 {
1adae327
BE
710 /* We have the start/end of the line. */
711 *line = c->data + i->start_pos;
712 *line_len = i->end_pos - i->start_pos;
7ecc3eb9
DS
713 return true;
714 }
715
716 if (i)
717 {
718 c->line_start_idx = i->start_pos;
719 c->line_num = i->line_num - 1;
720 }
721 else
722 {
723 c->line_start_idx = 0;
724 c->line_num = 0;
725 }
9fec0042 726 }
9fec0042 727 }
7ecc3eb9
DS
728
729 /* Let's walk from line c->line_num up to line_num - 1, without
730 copying any line. */
731 while (c->line_num < line_num - 1)
732 if (!goto_next_line (c))
733 return false;
734
735 /* The line we want is the next one. Let's read and copy it back to
736 the caller. */
1adae327 737 return get_next_line (c, line, line_len);
9fec0042
MLI
738}
739
1adae327
BE
740/* Return the physical source line that corresponds to FILE_PATH/LINE.
741 The line is not nul-terminated. The returned pointer is only
742 valid until the next call of location_get_source_line.
743 Note that the line can contain several null characters,
7761dfbe
DM
744 so the returned value's length has the actual length of the line.
745 If the function fails, a NULL char_span is returned. */
9fec0042 746
7761dfbe
DM
747char_span
748location_get_source_line (const char *file_path, int line)
9fec0042 749{
ac2a97db 750 char *buffer = NULL;
1adae327 751 ssize_t len;
7ecc3eb9 752
31bdd08a 753 if (line == 0)
7761dfbe 754 return char_span (NULL, 0);
367c8286 755
31bdd08a 756 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
367c8286 757 if (c == NULL)
7761dfbe 758 return char_span (NULL, 0);
367c8286 759
31bdd08a 760 bool read = read_line_num (c, line, &buffer, &len);
7761dfbe
DM
761 if (!read)
762 return char_span (NULL, 0);
9fec0042 763
7761dfbe 764 return char_span (buffer, len);
9fec0042
MLI
765}
766
c65236d6
DM
767/* Determine if FILE_PATH missing a trailing newline on its final line.
768 Only valid to call once all of the file has been loaded, by
769 requesting a line number beyond the end of the file. */
770
771bool
772location_missing_trailing_newline (const char *file_path)
773{
774 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
775 if (c == NULL)
776 return false;
777
778 return c->missing_trailing_newline;
779}
780
c468587a
DS
781/* Test if the location originates from the spelling location of a
782 builtin-tokens. That is, return TRUE if LOC is a (possibly
783 virtual) location of a built-in token that appears in the expansion
784 list of a macro. Please note that this function also works on
785 tokens that result from built-in tokens. For instance, the
786 function would return true if passed a token "4" that is the result
787 of the expansion of the built-in __LINE__ macro. */
788bool
620e594b 789is_location_from_builtin_token (location_t loc)
c468587a 790{
0e50b624 791 const line_map_ordinary *map = NULL;
c468587a
DS
792 loc = linemap_resolve_location (line_table, loc,
793 LRK_SPELLING_LOCATION, &map);
794 return loc == BUILTINS_LOCATION;
795}
796
7eb918cc
DS
797/* Expand the source location LOC into a human readable location. If
798 LOC is virtual, it resolves to the expansion point of the involved
799 macro. If LOC resolves to a builtin location, the file name of the
800 readable location is set to the string "<built-in>". */
801
802expanded_location
620e594b 803expand_location (location_t loc)
7eb918cc 804{
c471c6ed
DM
805 return expand_location_1 (loc, /*expansion_point_p=*/true,
806 LOCATION_ASPECT_CARET);
7eb918cc
DS
807}
808
809/* Expand the source location LOC into a human readable location. If
810 LOC is virtual, it resolves to the expansion location of the
811 relevant macro. If LOC resolves to a builtin location, the file
812 name of the readable location is set to the string
813 "<built-in>". */
814
815expanded_location
620e594b 816expand_location_to_spelling_point (location_t loc,
0d48e877 817 enum location_aspect aspect)
7eb918cc 818{
0d48e877 819 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
7eb918cc
DS
820}
821
8a645150 822/* The rich_location class within libcpp requires a way to expand
620e594b 823 location_t instances, and relies on the client code
8a645150
DM
824 providing a symbol named
825 linemap_client_expand_location_to_spelling_point
826 to do this.
827
828 This is the implementation for libcommon.a (all host binaries),
c471c6ed 829 which simply calls into expand_location_1. */
8a645150
DM
830
831expanded_location
620e594b 832linemap_client_expand_location_to_spelling_point (location_t loc,
c471c6ed 833 enum location_aspect aspect)
8a645150 834{
c471c6ed 835 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
8a645150
DM
836}
837
838
e1f0c178
MLI
839/* If LOCATION is in a system header and if it is a virtual location for
840 a token coming from the expansion of a macro, unwind it to the
841 location of the expansion point of the macro. Otherwise, just return
70dc395a
DS
842 LOCATION.
843
844 This is used for instance when we want to emit diagnostics about a
e1f0c178
MLI
845 token that may be located in a macro that is itself defined in a
846 system header, for example, for the NULL macro. In such a case, if
847 LOCATION were passed directly to diagnostic functions such as
848 warning_at, the diagnostic would be suppressed (unless
849 -Wsystem-headers). */
70dc395a 850
620e594b
DM
851location_t
852expansion_point_location_if_in_system_header (location_t location)
70dc395a
DS
853{
854 if (in_system_header_at (location))
855 location = linemap_resolve_location (line_table, location,
856 LRK_MACRO_EXPANSION_POINT,
857 NULL);
858 return location;
859}
7eb918cc 860
79ce98bc
MP
861/* If LOCATION is a virtual location for a token coming from the expansion
862 of a macro, unwind to the location of the expansion point of the macro. */
863
620e594b
DM
864location_t
865expansion_point_location (location_t location)
79ce98bc
MP
866{
867 return linemap_resolve_location (line_table, location,
868 LRK_MACRO_EXPANSION_POINT, NULL);
869}
870
a01fc549
DM
871/* Construct a location with caret at CARET, ranging from START to
872 finish e.g.
873
874 11111111112
875 12345678901234567890
876 522
877 523 return foo + bar;
878 ~~~~^~~~~
879 524
880
881 The location's caret is at the "+", line 523 column 15, but starts
882 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
883 of "bar" at column 19. */
884
885location_t
886make_location (location_t caret, location_t start, location_t finish)
887{
888 location_t pure_loc = get_pure_location (caret);
889 source_range src_range;
9144eabb
DM
890 src_range.m_start = get_start (start);
891 src_range.m_finish = get_finish (finish);
a01fc549
DM
892 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
893 pure_loc,
894 src_range,
895 NULL);
896 return combined_loc;
897}
898
a32c8316
MP
899/* Same as above, but taking a source range rather than two locations. */
900
901location_t
902make_location (location_t caret, source_range src_range)
903{
904 location_t pure_loc = get_pure_location (caret);
905 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
906}
907
64a1a422
TT
908/* Dump statistics to stderr about the memory usage of the line_table
909 set of line maps. This also displays some statistics about macro
910 expansion. */
911
912void
913dump_line_table_statistics (void)
914{
915 struct linemap_stats s;
d17687f6 916 long total_used_map_size,
64a1a422
TT
917 macro_maps_size,
918 total_allocated_map_size;
919
920 memset (&s, 0, sizeof (s));
921
922 linemap_get_statistics (line_table, &s);
923
924 macro_maps_size = s.macro_maps_used_size
925 + s.macro_maps_locations_size;
926
927 total_allocated_map_size = s.ordinary_maps_allocated_size
928 + s.macro_maps_allocated_size
929 + s.macro_maps_locations_size;
930
931 total_used_map_size = s.ordinary_maps_used_size
932 + s.macro_maps_used_size
933 + s.macro_maps_locations_size;
934
d17687f6 935 fprintf (stderr, "Number of expanded macros: %5ld\n",
64a1a422
TT
936 s.num_expanded_macros);
937 if (s.num_expanded_macros != 0)
d17687f6 938 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
64a1a422
TT
939 s.num_macro_tokens / s.num_expanded_macros);
940 fprintf (stderr,
941 "\nLine Table allocations during the "
40ce7fa6 942 "compilation process\n");
d17687f6 943 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
40ce7fa6 944 SIZE_AMOUNT (s.num_ordinary_maps_used));
d17687f6 945 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
40ce7fa6 946 SIZE_AMOUNT (s.ordinary_maps_used_size));
d17687f6 947 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
40ce7fa6 948 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
d17687f6 949 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
40ce7fa6 950 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
d17687f6 951 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
40ce7fa6 952 SIZE_AMOUNT (s.num_macro_maps_used));
d17687f6 953 fprintf (stderr, "Macro maps used size: %5ld%c\n",
40ce7fa6 954 SIZE_AMOUNT (s.macro_maps_used_size));
d17687f6 955 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
40ce7fa6 956 SIZE_AMOUNT (s.macro_maps_locations_size));
d17687f6 957 fprintf (stderr, "Macro maps size: %5ld%c\n",
40ce7fa6 958 SIZE_AMOUNT (macro_maps_size));
d17687f6 959 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
40ce7fa6 960 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
d17687f6 961 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
40ce7fa6 962 SIZE_AMOUNT (total_allocated_map_size));
d17687f6 963 fprintf (stderr, "Total used maps size: %5ld%c\n",
40ce7fa6 964 SIZE_AMOUNT (total_used_map_size));
ee015909 965 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
40ce7fa6
ML
966 SIZE_AMOUNT (s.adhoc_table_size));
967 fprintf (stderr, "Ad-hoc table entries used: %5ld%c\n",
968 SIZE_AMOUNT (s.adhoc_table_entries_used));
969 fprintf (stderr, "optimized_ranges: %5xu%c\n",
970 SIZE_AMOUNT (line_table->num_optimized_ranges));
971 fprintf (stderr, "unoptimized_ranges: %5xu%c\n",
972 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
ee015909 973
64a1a422
TT
974 fprintf (stderr, "\n");
975}
ba4ad400
DM
976
977/* Get location one beyond the final location in ordinary map IDX. */
978
620e594b 979static location_t
ba4ad400
DM
980get_end_location (struct line_maps *set, unsigned int idx)
981{
982 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
983 return set->highest_location;
984
985 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
986 return MAP_START_LOCATION (next_map);
987}
988
989/* Helper function for write_digit_row. */
990
991static void
992write_digit (FILE *stream, int digit)
993{
994 fputc ('0' + (digit % 10), stream);
995}
996
997/* Helper function for dump_location_info.
998 Write a row of numbers to STREAM, numbering a source line,
999 giving the units, tens, hundreds etc of the column number. */
1000
1001static void
1002write_digit_row (FILE *stream, int indent,
ebedc9a3 1003 const line_map_ordinary *map,
620e594b 1004 location_t loc, int max_col, int divisor)
ba4ad400
DM
1005{
1006 fprintf (stream, "%*c", indent, ' ');
1007 fprintf (stream, "|");
1008 for (int column = 1; column < max_col; column++)
1009 {
620e594b 1010 location_t column_loc = loc + (column << map->m_range_bits);
ba4ad400
DM
1011 write_digit (stream, column_loc / divisor);
1012 }
1013 fprintf (stream, "\n");
1014}
1015
1016/* Write a half-closed (START) / half-open (END) interval of
620e594b 1017 location_t to STREAM. */
ba4ad400
DM
1018
1019static void
1020dump_location_range (FILE *stream,
620e594b 1021 location_t start, location_t end)
ba4ad400
DM
1022{
1023 fprintf (stream,
620e594b 1024 " location_t interval: %u <= loc < %u\n",
ba4ad400
DM
1025 start, end);
1026}
1027
1028/* Write a labelled description of a half-closed (START) / half-open (END)
620e594b 1029 interval of location_t to STREAM. */
ba4ad400
DM
1030
1031static void
1032dump_labelled_location_range (FILE *stream,
1033 const char *name,
620e594b 1034 location_t start, location_t end)
ba4ad400
DM
1035{
1036 fprintf (stream, "%s\n", name);
1037 dump_location_range (stream, start, end);
1038 fprintf (stream, "\n");
1039}
1040
1041/* Write a visualization of the locations in the line_table to STREAM. */
1042
1043void
1044dump_location_info (FILE *stream)
1045{
1046 /* Visualize the reserved locations. */
1047 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1048 0, RESERVED_LOCATION_COUNT);
1049
1050 /* Visualize the ordinary line_map instances, rendering the sources. */
1051 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1052 {
620e594b 1053 location_t end_location = get_end_location (line_table, idx);
ba4ad400
DM
1054 /* half-closed: doesn't include this one. */
1055
0e50b624
DM
1056 const line_map_ordinary *map
1057 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
ba4ad400
DM
1058 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1059 dump_location_range (stream,
1060 MAP_START_LOCATION (map), end_location);
1061 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1062 fprintf (stream, " starting at line: %i\n",
1063 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
ebedc9a3
DM
1064 fprintf (stream, " column and range bits: %i\n",
1065 map->m_column_and_range_bits);
ba4ad400 1066 fprintf (stream, " column bits: %i\n",
ebedc9a3
DM
1067 map->m_column_and_range_bits - map->m_range_bits);
1068 fprintf (stream, " range bits: %i\n",
1069 map->m_range_bits);
ba4ad400
DM
1070
1071 /* Render the span of source lines that this "map" covers. */
620e594b 1072 for (location_t loc = MAP_START_LOCATION (map);
ba4ad400 1073 loc < end_location;
ebedc9a3 1074 loc += (1 << map->m_range_bits) )
ba4ad400 1075 {
ebedc9a3
DM
1076 gcc_assert (pure_location_p (line_table, loc) );
1077
ba4ad400
DM
1078 expanded_location exploc
1079 = linemap_expand_location (line_table, map, loc);
1080
01512446 1081 if (exploc.column == 0)
ba4ad400
DM
1082 {
1083 /* Beginning of a new source line: draw the line. */
1084
7761dfbe
DM
1085 char_span line_text = location_get_source_line (exploc.file,
1086 exploc.line);
ba4ad400
DM
1087 if (!line_text)
1088 break;
1089 fprintf (stream,
1090 "%s:%3i|loc:%5i|%.*s\n",
1091 exploc.file, exploc.line,
1092 loc,
7761dfbe 1093 (int)line_text.length (), line_text.get_buffer ());
ba4ad400
DM
1094
1095 /* "loc" is at column 0, which means "the whole line".
1096 Render the locations *within* the line, by underlining
620e594b 1097 it, showing the location_t numeric values
ba4ad400 1098 at each column. */
7761dfbe
DM
1099 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1100 if (max_col > line_text.length ())
1101 max_col = line_text.length () + 1;
ba4ad400
DM
1102
1103 int indent = 14 + strlen (exploc.file);
1104
1105 /* Thousands. */
1106 if (end_location > 999)
ebedc9a3 1107 write_digit_row (stream, indent, map, loc, max_col, 1000);
ba4ad400
DM
1108
1109 /* Hundreds. */
1110 if (end_location > 99)
ebedc9a3 1111 write_digit_row (stream, indent, map, loc, max_col, 100);
ba4ad400
DM
1112
1113 /* Tens. */
ebedc9a3 1114 write_digit_row (stream, indent, map, loc, max_col, 10);
ba4ad400
DM
1115
1116 /* Units. */
ebedc9a3 1117 write_digit_row (stream, indent, map, loc, max_col, 1);
ba4ad400
DM
1118 }
1119 }
1120 fprintf (stream, "\n");
1121 }
1122
1123 /* Visualize unallocated values. */
1124 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1125 line_table->highest_location,
1126 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1127
1128 /* Visualize the macro line_map instances, rendering the sources. */
1129 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1130 {
620e594b 1131 /* Each macro map that is allocated owns location_t values
ba4ad400
DM
1132 that are *lower* that the one before them.
1133 Hence it's meaningful to view them either in order of ascending
1134 source locations, or in order of ascending macro map index. */
620e594b
DM
1135 const bool ascending_location_ts = true;
1136 unsigned int idx = (ascending_location_ts
ba4ad400
DM
1137 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1138 : i);
0e50b624 1139 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
ba4ad400
DM
1140 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1141 idx,
1142 linemap_map_get_macro_name (map),
1143 MACRO_MAP_NUM_MACRO_TOKENS (map));
1144 dump_location_range (stream,
1145 map->start_location,
1146 (map->start_location
1147 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1148 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1149 "expansion point is location %i",
1150 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1151 fprintf (stream, " map->start_location: %u\n",
1152 map->start_location);
1153
1154 fprintf (stream, " macro_locations:\n");
1155 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1156 {
620e594b
DM
1157 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1158 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
ba4ad400
DM
1159
1160 /* linemap_add_macro_token encodes token numbers in an expansion
1161 by putting them after MAP_START_LOCATION. */
1162
1163 /* I'm typically seeing 4 uninitialized entries at the end of
1164 0xafafafaf.
1165 This appears to be due to macro.c:replace_args
1166 adding 2 extra args for padding tokens; presumably there may
1167 be a leading and/or trailing padding token injected,
1168 each for 2 more location slots.
620e594b 1169 This would explain there being up to 4 location_ts slots
ba4ad400
DM
1170 that may be uninitialized. */
1171
1172 fprintf (stream, " %u: %u, %u\n",
1173 i,
1174 x,
1175 y);
1176 if (x == y)
1177 {
1178 if (x < MAP_START_LOCATION (map))
1179 inform (x, "token %u has x-location == y-location == %u", i, x);
1180 else
1181 fprintf (stream,
1182 "x-location == y-location == %u encodes token # %u\n",
1183 x, x - MAP_START_LOCATION (map));
1184 }
1185 else
1186 {
1187 inform (x, "token %u has x-location == %u", i, x);
1188 inform (x, "token %u has y-location == %u", i, y);
1189 }
1190 }
1191 fprintf (stream, "\n");
1192 }
1193
620e594b 1194 /* It appears that MAX_LOCATION_T itself is never assigned to a
ba4ad400
DM
1195 macro map, presumably due to an off-by-one error somewhere
1196 between the logic in linemap_enter_macro and
1197 LINEMAPS_MACRO_LOWEST_LOCATION. */
620e594b
DM
1198 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1199 MAX_LOCATION_T,
1200 MAX_LOCATION_T + 1);
ba4ad400
DM
1201
1202 /* Visualize ad-hoc values. */
1203 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
620e594b 1204 MAX_LOCATION_T + 1, UINT_MAX);
ba4ad400 1205}
d9b950dd 1206
88fa5555
DM
1207/* string_concat's constructor. */
1208
1209string_concat::string_concat (int num, location_t *locs)
1210 : m_num (num)
1211{
1212 m_locs = ggc_vec_alloc <location_t> (num);
1213 for (int i = 0; i < num; i++)
1214 m_locs[i] = locs[i];
1215}
1216
1217/* string_concat_db's constructor. */
1218
1219string_concat_db::string_concat_db ()
1220{
1221 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1222}
1223
1224/* Record that a string concatenation occurred, covering NUM
1225 string literal tokens. LOCS is an array of size NUM, containing the
1226 locations of the tokens. A copy of LOCS is taken. */
1227
1228void
1229string_concat_db::record_string_concatenation (int num, location_t *locs)
1230{
1231 gcc_assert (num > 1);
1232 gcc_assert (locs);
1233
1234 location_t key_loc = get_key_loc (locs[0]);
1235
1236 string_concat *concat
1237 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1238 m_table->put (key_loc, concat);
1239}
1240
1241/* Determine if LOC was the location of the the initial token of a
1242 concatenation of string literal tokens.
1243 If so, *OUT_NUM is written to with the number of tokens, and
1244 *OUT_LOCS with the location of an array of locations of the
1245 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1246 storage owned by the string_concat_db.
1247 Otherwise, return false. */
1248
1249bool
1250string_concat_db::get_string_concatenation (location_t loc,
1251 int *out_num,
1252 location_t **out_locs)
1253{
1254 gcc_assert (out_num);
1255 gcc_assert (out_locs);
1256
1257 location_t key_loc = get_key_loc (loc);
1258
1259 string_concat **concat = m_table->get (key_loc);
1260 if (!concat)
1261 return false;
1262
1263 *out_num = (*concat)->m_num;
1264 *out_locs =(*concat)->m_locs;
1265 return true;
1266}
1267
1268/* Internal function. Canonicalize LOC into a form suitable for
1269 use as a key within the database, stripping away macro expansion,
1270 ad-hoc information, and range information, using the location of
1271 the start of LOC within an ordinary linemap. */
1272
1273location_t
1274string_concat_db::get_key_loc (location_t loc)
1275{
1276 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1277 NULL);
1278
1279 loc = get_range_from_loc (line_table, loc).m_start;
1280
1281 return loc;
1282}
1283
1284/* Helper class for use within get_substring_ranges_for_loc.
1285 An vec of cpp_string with responsibility for releasing all of the
1286 str->text for each str in the vector. */
1287
1288class auto_cpp_string_vec : public auto_vec <cpp_string>
1289{
1290 public:
1291 auto_cpp_string_vec (int alloc)
1292 : auto_vec <cpp_string> (alloc) {}
1293
1294 ~auto_cpp_string_vec ()
1295 {
1296 /* Clean up the copies within this vec. */
1297 int i;
1298 cpp_string *str;
1299 FOR_EACH_VEC_ELT (*this, i, str)
1300 free (const_cast <unsigned char *> (str->text));
1301 }
1302};
1303
1304/* Attempt to populate RANGES with source location information on the
1305 individual characters within the string literal found at STRLOC.
1306 If CONCATS is non-NULL, then any string literals that the token at
1307 STRLOC was concatenated with are also added to RANGES.
1308
1309 Return NULL if successful, or an error message if any errors occurred (in
1310 which case RANGES may be only partially populated and should not
1311 be used).
1312
1313 This is implemented by re-parsing the relevant source line(s). */
1314
1315static const char *
1316get_substring_ranges_for_loc (cpp_reader *pfile,
1317 string_concat_db *concats,
1318 location_t strloc,
1319 enum cpp_ttype type,
1320 cpp_substring_ranges &ranges)
1321{
1322 gcc_assert (pfile);
1323
1324 if (strloc == UNKNOWN_LOCATION)
1325 return "unknown location";
1326
67b5d0b2
DM
1327 /* Reparsing the strings requires accurate location information.
1328 If -ftrack-macro-expansion has been overridden from its default
1329 of 2, then we might have a location of a macro expansion point,
1330 rather than the location of the literal itself.
1331 Avoid this by requiring that we have full macro expansion tracking
1332 for substring locations to be available. */
1333 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1334 return "track_macro_expansion != 2";
1335
94f597df
DM
1336 /* If #line or # 44 "file"-style directives are present, then there's
1337 no guarantee that the line numbers we have can be used to locate
1338 the strings. For example, we might have a .i file with # directives
1339 pointing back to lines within a .c file, but the .c file might
1340 have been edited since the .i file was created.
1341 In such a case, the safest course is to disable on-demand substring
1342 locations. */
1343 if (line_table->seen_line_directive)
1344 return "seen line directive";
1345
88fa5555
DM
1346 /* If string concatenation has occurred at STRLOC, get the locations
1347 of all of the literal tokens making up the compound string.
1348 Otherwise, just use STRLOC. */
1349 int num_locs = 1;
1350 location_t *strlocs = &strloc;
1351 if (concats)
1352 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1353
1354 auto_cpp_string_vec strs (num_locs);
1355 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1356 for (int i = 0; i < num_locs; i++)
1357 {
1358 /* Get range of strloc. We will use it to locate the start and finish
1359 of the literal token within the line. */
1360 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1361
1362 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
0d48e877
DM
1363 {
1364 /* If the string token was within a macro expansion, then we can
1365 cope with it for the simple case where we have a single token.
1366 Otherwise, bail out. */
1367 if (src_range.m_start != src_range.m_finish)
1368 return "macro expansion";
1369 }
1370 else
1371 {
1372 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1373 /* If so, we can't reliably determine where the token started within
1374 its line. */
1375 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1376
1377 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1378 /* If so, we can't reliably determine where the token finished
1379 within its line. */
1380 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1381 }
88fa5555
DM
1382
1383 expanded_location start
0d48e877
DM
1384 = expand_location_to_spelling_point (src_range.m_start,
1385 LOCATION_ASPECT_START);
88fa5555 1386 expanded_location finish
0d48e877
DM
1387 = expand_location_to_spelling_point (src_range.m_finish,
1388 LOCATION_ASPECT_FINISH);
88fa5555
DM
1389 if (start.file != finish.file)
1390 return "range endpoints are in different files";
1391 if (start.line != finish.line)
1392 return "range endpoints are on different lines";
1393 if (start.column > finish.column)
1394 return "range endpoints are reversed";
1395
7761dfbe
DM
1396 char_span line = location_get_source_line (start.file, start.line);
1397 if (!line)
88fa5555
DM
1398 return "unable to read source line";
1399
1400 /* Determine the location of the literal (including quotes
1401 and leading prefix chars, such as the 'u' in a u""
1402 token). */
7761dfbe 1403 size_t literal_length = finish.column - start.column + 1;
88fa5555 1404
7cfa044d 1405 /* Ensure that we don't crash if we got the wrong location. */
7761dfbe 1406 if (line.length () < (start.column - 1 + literal_length))
7cfa044d
DM
1407 return "line is not wide enough";
1408
7761dfbe
DM
1409 char_span literal = line.subspan (start.column - 1, literal_length);
1410
88fa5555
DM
1411 cpp_string from;
1412 from.len = literal_length;
1413 /* Make a copy of the literal, to avoid having to rely on
1414 the lifetime of the copy of the line within the cache.
1415 This will be released by the auto_cpp_string_vec dtor. */
7761dfbe 1416 from.text = (unsigned char *)literal.xstrdup ();
88fa5555
DM
1417 strs.safe_push (from);
1418
1419 /* For very long lines, a new linemap could have started
1420 halfway through the token.
1421 Ensure that the loc_reader uses the linemap of the
1422 *end* of the token for its start location. */
05d57d65
DM
1423 const line_map_ordinary *start_ord_map;
1424 linemap_resolve_location (line_table, src_range.m_start,
1425 LRK_SPELLING_LOCATION, &start_ord_map);
88fa5555
DM
1426 const line_map_ordinary *final_ord_map;
1427 linemap_resolve_location (line_table, src_range.m_finish,
05d57d65 1428 LRK_SPELLING_LOCATION, &final_ord_map);
3d0a5393
DM
1429 if (start_ord_map == NULL || final_ord_map == NULL)
1430 return "failed to get ordinary maps";
05d57d65
DM
1431 /* Bulletproofing. We ought to only have different ordinary maps
1432 for start vs finish due to line-length jumps. */
1433 if (start_ord_map != final_ord_map
1434 && start_ord_map->to_file != final_ord_map->to_file)
1435 return "start and finish are spelled in different ordinary maps";
88fa5555
DM
1436 location_t start_loc
1437 = linemap_position_for_line_and_column (line_table, final_ord_map,
1438 start.line, start.column);
1439
1440 cpp_string_location_reader loc_reader (start_loc, line_table);
1441 loc_readers.safe_push (loc_reader);
1442 }
1443
1444 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1445 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1446 loc_readers.address (),
1447 num_locs, &ranges, type);
1448 if (err)
1449 return err;
1450
1451 /* Success: "ranges" should now contain information on the string. */
1452 return NULL;
1453}
1454
65e736c0
DM
1455/* Attempt to populate *OUT_LOC with source location information on the
1456 given characters within the string literal found at STRLOC.
1457 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1458 character set.
1459
1460 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1461 and string literal "012345\n789"
1462 *OUT_LOC is written to with:
1463 "012345\n789"
1464 ~^~~~~
1465
88fa5555
DM
1466 If CONCATS is non-NULL, then any string literals that the token at
1467 STRLOC was concatenated with are also considered.
1468
1469 This is implemented by re-parsing the relevant source line(s).
1470
1471 Return NULL if successful, or an error message if any errors occurred.
1472 Error messages are intended for GCC developers (to help debugging) rather
1473 than for end-users. */
1474
1475const char *
620e594b
DM
1476get_location_within_string (cpp_reader *pfile,
1477 string_concat_db *concats,
1478 location_t strloc,
1479 enum cpp_ttype type,
1480 int caret_idx, int start_idx, int end_idx,
1481 location_t *out_loc)
65e736c0
DM
1482{
1483 gcc_checking_assert (caret_idx >= 0);
88fa5555
DM
1484 gcc_checking_assert (start_idx >= 0);
1485 gcc_checking_assert (end_idx >= 0);
65e736c0 1486 gcc_assert (out_loc);
88fa5555
DM
1487
1488 cpp_substring_ranges ranges;
1489 const char *err
1490 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1491 if (err)
1492 return err;
1493
65e736c0
DM
1494 if (caret_idx >= ranges.get_num_ranges ())
1495 return "caret_idx out of range";
88fa5555
DM
1496 if (start_idx >= ranges.get_num_ranges ())
1497 return "start_idx out of range";
1498 if (end_idx >= ranges.get_num_ranges ())
1499 return "end_idx out of range";
1500
65e736c0
DM
1501 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1502 ranges.get_range (start_idx).m_start,
1503 ranges.get_range (end_idx).m_finish);
1504 return NULL;
1505}
1506
0e06d2b3
DM
1507#if CHECKING_P
1508
1509namespace selftest {
1510
1511/* Selftests of location handling. */
1512
65e736c0
DM
1513/* Attempt to populate *OUT_RANGE with source location information on the
1514 given character within the string literal found at STRLOC.
1515 CHAR_IDX refers to an offset within the execution character set.
1516 If CONCATS is non-NULL, then any string literals that the token at
1517 STRLOC was concatenated with are also considered.
1518
1519 This is implemented by re-parsing the relevant source line(s).
1520
1521 Return NULL if successful, or an error message if any errors occurred.
1522 Error messages are intended for GCC developers (to help debugging) rather
1523 than for end-users. */
1524
1525static const char *
1526get_source_range_for_char (cpp_reader *pfile,
1527 string_concat_db *concats,
1528 location_t strloc,
1529 enum cpp_ttype type,
1530 int char_idx,
1531 source_range *out_range)
1532{
1533 gcc_checking_assert (char_idx >= 0);
1534 gcc_assert (out_range);
1535
1536 cpp_substring_ranges ranges;
1537 const char *err
1538 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1539 if (err)
1540 return err;
1541
1542 if (char_idx >= ranges.get_num_ranges ())
1543 return "char_idx out of range";
1544
1545 *out_range = ranges.get_range (char_idx);
88fa5555
DM
1546 return NULL;
1547}
1548
65e736c0 1549/* As get_source_range_for_char, but write to *OUT the number
88fa5555
DM
1550 of ranges that are available. */
1551
0e06d2b3 1552static const char *
88fa5555
DM
1553get_num_source_ranges_for_substring (cpp_reader *pfile,
1554 string_concat_db *concats,
1555 location_t strloc,
1556 enum cpp_ttype type,
1557 int *out)
1558{
1559 gcc_assert (out);
1560
1561 cpp_substring_ranges ranges;
1562 const char *err
1563 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1564
1565 if (err)
1566 return err;
1567
1568 *out = ranges.get_num_ranges ();
1569 return NULL;
1570}
1571
d9b950dd
DM
1572/* Selftests of location handling. */
1573
082284da
DM
1574/* Verify that compare() on linenum_type handles comparisons over the full
1575 range of the type. */
1576
1577static void
1578test_linenum_comparisons ()
1579{
1580 linenum_type min_line (0);
1581 linenum_type max_line (0xffffffff);
1582 ASSERT_EQ (0, compare (min_line, min_line));
1583 ASSERT_EQ (0, compare (max_line, max_line));
1584
1585 ASSERT_GT (compare (max_line, min_line), 0);
1586 ASSERT_LT (compare (min_line, max_line), 0);
1587}
1588
741d3be5
DM
1589/* Helper function for verifying location data: when location_t
1590 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1591 as having column 0. */
1592
1593static bool
1594should_have_column_data_p (location_t loc)
1595{
1596 if (IS_ADHOC_LOC (loc))
1597 loc = get_location_from_adhoc_loc (line_table, loc);
1598 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1599 return false;
1600 return true;
1601}
1602
1603/* Selftest for should_have_column_data_p. */
1604
1605static void
1606test_should_have_column_data_p ()
1607{
1608 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1609 ASSERT_TRUE
1610 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1611 ASSERT_FALSE
1612 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1613}
1614
d9b950dd
DM
1615/* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1616 on LOC. */
1617
1618static void
1619assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1620 location_t loc)
1621{
1622 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1623 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
741d3be5
DM
1624 /* If location_t values are sufficiently high, then column numbers
1625 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1626 When close to the threshold, column numbers *may* be present: if
1627 the final linemap before the threshold contains a line that straddles
1628 the threshold, locations in that line have column information. */
1629 if (should_have_column_data_p (loc))
1630 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1631}
1632
f87e22c5
DM
1633/* Various selftests involve constructing a line table and one or more
1634 line maps within it.
741d3be5
DM
1635
1636 For maximum test coverage we want to run these tests with a variety
1637 of situations:
1638 - line_table->default_range_bits: some frontends use a non-zero value
1639 and others use zero
1640 - the fallback modes within line-map.c: there are various threshold
620e594b 1641 values for location_t beyond line-map.c changes
741d3be5
DM
1642 behavior (disabling of the range-packing optimization, disabling
1643 of column-tracking). We can exercise these by starting the line_table
1644 at interesting values at or near these thresholds.
1645
1646 The following struct describes a particular case within our test
1647 matrix. */
1648
1649struct line_table_case
1650{
1651 line_table_case (int default_range_bits, int base_location)
1652 : m_default_range_bits (default_range_bits),
1653 m_base_location (base_location)
1654 {}
1655
1656 int m_default_range_bits;
1657 int m_base_location;
1658};
1659
f87e22c5
DM
1660/* Constructor. Store the old value of line_table, and create a new
1661 one, using sane defaults. */
741d3be5 1662
f87e22c5 1663line_table_test::line_table_test ()
741d3be5 1664{
f87e22c5
DM
1665 gcc_assert (saved_line_table == NULL);
1666 saved_line_table = line_table;
1667 line_table = ggc_alloc<line_maps> ();
1668 linemap_init (line_table, BUILTINS_LOCATION);
1669 gcc_assert (saved_line_table->reallocator);
1670 line_table->reallocator = saved_line_table->reallocator;
1671 gcc_assert (saved_line_table->round_alloc_size);
1672 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1673 line_table->default_range_bits = 0;
1674}
741d3be5
DM
1675
1676/* Constructor. Store the old value of line_table, and create a new
1677 one, using the sitation described in CASE_. */
1678
f87e22c5 1679line_table_test::line_table_test (const line_table_case &case_)
741d3be5 1680{
f87e22c5
DM
1681 gcc_assert (saved_line_table == NULL);
1682 saved_line_table = line_table;
741d3be5
DM
1683 line_table = ggc_alloc<line_maps> ();
1684 linemap_init (line_table, BUILTINS_LOCATION);
f87e22c5
DM
1685 gcc_assert (saved_line_table->reallocator);
1686 line_table->reallocator = saved_line_table->reallocator;
1687 gcc_assert (saved_line_table->round_alloc_size);
1688 line_table->round_alloc_size = saved_line_table->round_alloc_size;
741d3be5
DM
1689 line_table->default_range_bits = case_.m_default_range_bits;
1690 if (case_.m_base_location)
1691 {
1692 line_table->highest_location = case_.m_base_location;
1693 line_table->highest_line = case_.m_base_location;
1694 }
1695}
1696
1697/* Destructor. Restore the old value of line_table. */
1698
f87e22c5 1699line_table_test::~line_table_test ()
741d3be5 1700{
f87e22c5
DM
1701 gcc_assert (saved_line_table != NULL);
1702 line_table = saved_line_table;
1703 saved_line_table = NULL;
d9b950dd
DM
1704}
1705
1706/* Verify basic operation of ordinary linemaps. */
1707
1708static void
741d3be5 1709test_accessing_ordinary_linemaps (const line_table_case &case_)
d9b950dd 1710{
f87e22c5 1711 line_table_test ltt (case_);
741d3be5 1712
d9b950dd
DM
1713 /* Build a simple linemap describing some locations. */
1714 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1715
1716 linemap_line_start (line_table, 1, 100);
1717 location_t loc_a = linemap_position_for_column (line_table, 1);
1718 location_t loc_b = linemap_position_for_column (line_table, 23);
1719
1720 linemap_line_start (line_table, 2, 100);
1721 location_t loc_c = linemap_position_for_column (line_table, 1);
1722 location_t loc_d = linemap_position_for_column (line_table, 17);
1723
1724 /* Example of a very long line. */
1725 linemap_line_start (line_table, 3, 2000);
1726 location_t loc_e = linemap_position_for_column (line_table, 700);
1727
5ccf1d8d
DM
1728 /* Transitioning back to a short line. */
1729 linemap_line_start (line_table, 4, 0);
1730 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1731
1732 if (should_have_column_data_p (loc_back_to_short))
1733 {
1734 /* Verify that we switched to short lines in the linemap. */
1735 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1736 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1737 }
1738
b9f4757f
DM
1739 /* Example of a line that will eventually be seen to be longer
1740 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1741 below that. */
1742 linemap_line_start (line_table, 5, 2000);
1743
1744 location_t loc_start_of_very_long_line
1745 = linemap_position_for_column (line_table, 2000);
1746 location_t loc_too_wide
1747 = linemap_position_for_column (line_table, 4097);
1748 location_t loc_too_wide_2
1749 = linemap_position_for_column (line_table, 4098);
1750
1751 /* ...and back to a sane line length. */
1752 linemap_line_start (line_table, 6, 100);
1753 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1754
d9b950dd
DM
1755 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1756
1757 /* Multiple files. */
1758 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1759 linemap_line_start (line_table, 1, 200);
1760 location_t loc_f = linemap_position_for_column (line_table, 150);
1761 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1762
1763 /* Verify that we can recover the location info. */
1764 assert_loceq ("foo.c", 1, 1, loc_a);
1765 assert_loceq ("foo.c", 1, 23, loc_b);
1766 assert_loceq ("foo.c", 2, 1, loc_c);
1767 assert_loceq ("foo.c", 2, 17, loc_d);
1768 assert_loceq ("foo.c", 3, 700, loc_e);
5ccf1d8d 1769 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
b9f4757f
DM
1770
1771 /* In the very wide line, the initial location should be fully tracked. */
1772 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1773 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1774 be disabled. */
1775 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1776 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1777 /*...and column-tracking should be re-enabled for subsequent lines. */
1778 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1779
d9b950dd
DM
1780 assert_loceq ("bar.c", 1, 150, loc_f);
1781
1782 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
a01fc549
DM
1783 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1784
1785 /* Verify using make_location to build a range, and extracting data
1786 back from it. */
1787 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1788 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1789 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1790 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1791 ASSERT_EQ (loc_b, src_range.m_start);
1792 ASSERT_EQ (loc_d, src_range.m_finish);
d9b950dd
DM
1793}
1794
1795/* Verify various properties of UNKNOWN_LOCATION. */
1796
1797static void
1798test_unknown_location ()
1799{
1800 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1801 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1802 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1803}
1804
1805/* Verify various properties of BUILTINS_LOCATION. */
1806
1807static void
1808test_builtins ()
1809{
10d2fc23 1810 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
d9b950dd
DM
1811 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1812}
1813
9144eabb 1814/* Regression test for make_location.
cfa435e1
DM
1815 Ensure that we use pure locations for the start/finish of the range,
1816 rather than storing a packed or ad-hoc range as the start/finish. */
9144eabb
DM
1817
1818static void
1819test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1820{
1821 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1822 with C++ frontend.
1823 ....................0000000001111111111222.
1824 ....................1234567890123456789012. */
1825 const char *content = " r += !aaa == bbb;\n";
1826 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1827 line_table_test ltt (case_);
1828 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1829
1830 const location_t c11 = linemap_position_for_column (line_table, 11);
1831 const location_t c12 = linemap_position_for_column (line_table, 12);
1832 const location_t c13 = linemap_position_for_column (line_table, 13);
1833 const location_t c14 = linemap_position_for_column (line_table, 14);
1834 const location_t c21 = linemap_position_for_column (line_table, 21);
1835
1836 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1837 return;
1838
1839 /* Use column 13 for the caret location, arbitrarily, to verify that we
1840 handle start != caret. */
1841 const location_t aaa = make_location (c13, c12, c14);
1842 ASSERT_EQ (c13, get_pure_location (aaa));
1843 ASSERT_EQ (c12, get_start (aaa));
1844 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1845 ASSERT_EQ (c14, get_finish (aaa));
1846 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1847
1848 /* Make a location using a location with a range as the start-point. */
1849 const location_t not_aaa = make_location (c11, aaa, c14);
1850 ASSERT_EQ (c11, get_pure_location (not_aaa));
1851 /* It should use the start location of the range, not store the range
1852 itself. */
1853 ASSERT_EQ (c12, get_start (not_aaa));
1854 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1855 ASSERT_EQ (c14, get_finish (not_aaa));
1856 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1857
1858 /* Similarly, make a location with a range as the end-point. */
1859 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1860 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1861 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1862 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1863 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1864 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1865 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1866 /* It should use the finish location of the range, not store the range
1867 itself. */
1868 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1869 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1870 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1871 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1872 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1873}
1874
d9b950dd
DM
1875/* Verify reading of input files (e.g. for caret-based diagnostics). */
1876
1877static void
1878test_reading_source_line ()
1879{
85ecd05c 1880 /* Create a tempfile and write some text to it. */
741d3be5
DM
1881 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1882 "01234567890123456789\n"
1883 "This is the test text\n"
1adae327 1884 "This is the 3rd line");
85ecd05c
DM
1885
1886 /* Read back a specific line from the tempfile. */
7761dfbe
DM
1887 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1888 ASSERT_TRUE (source_line);
1889 ASSERT_TRUE (source_line.get_buffer () != NULL);
1890 ASSERT_EQ (20, source_line.length ());
1adae327 1891 ASSERT_TRUE (!strncmp ("This is the 3rd line",
7761dfbe 1892 source_line.get_buffer (), source_line.length ()));
1adae327 1893
7761dfbe
DM
1894 source_line = location_get_source_line (tmp.get_filename (), 2);
1895 ASSERT_TRUE (source_line);
1896 ASSERT_TRUE (source_line.get_buffer () != NULL);
1897 ASSERT_EQ (21, source_line.length ());
1adae327 1898 ASSERT_TRUE (!strncmp ("This is the test text",
7761dfbe 1899 source_line.get_buffer (), source_line.length ()));
85ecd05c 1900
7761dfbe
DM
1901 source_line = location_get_source_line (tmp.get_filename (), 4);
1902 ASSERT_FALSE (source_line);
1903 ASSERT_TRUE (source_line.get_buffer () == NULL);
d9b950dd
DM
1904}
1905
741d3be5
DM
1906/* Tests of lexing. */
1907
1908/* Verify that token TOK from PARSER has cpp_token_as_text
1909 equal to EXPECTED_TEXT. */
1910
1911#define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1912 SELFTEST_BEGIN_STMT \
1913 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1914 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1915 SELFTEST_END_STMT
1916
1917/* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1918 and ranges from EXP_START_COL to EXP_FINISH_COL.
1919 Use LOC as the effective location of the selftest. */
1920
1921static void
1922assert_token_loc_eq (const location &loc,
1923 const cpp_token *tok,
1924 const char *exp_filename, int exp_linenum,
1925 int exp_start_col, int exp_finish_col)
1926{
1927 location_t tok_loc = tok->src_loc;
1928 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1929 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1930
1931 /* If location_t values are sufficiently high, then column numbers
1932 will be unavailable. */
1933 if (!should_have_column_data_p (tok_loc))
1934 return;
1935
1936 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1937 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1938 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1939 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1940}
1941
1942/* Use assert_token_loc_eq to verify the TOK->src_loc, using
1943 SELFTEST_LOCATION as the effective location of the selftest. */
1944
1945#define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1946 EXP_START_COL, EXP_FINISH_COL) \
1947 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1948 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1949
1950/* Test of lexing a file using libcpp, verifying tokens and their
1951 location information. */
1952
1953static void
1954test_lexer (const line_table_case &case_)
1955{
1956 /* Create a tempfile and write some text to it. */
1957 const char *content =
1958 /*00000000011111111112222222222333333.3333444444444.455555555556
1959 12345678901234567890123456789012345.6789012345678.901234567890. */
1960 ("test_name /* c-style comment */\n"
1961 " \"test literal\"\n"
1962 " // test c++-style comment\n"
1963 " 42\n");
1964 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1965
f87e22c5 1966 line_table_test ltt (case_);
741d3be5
DM
1967
1968 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1969
1970 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1971 ASSERT_NE (fname, NULL);
1972
1973 /* Verify that we get the expected tokens back, with the correct
1974 location information. */
1975
1976 location_t loc;
1977 const cpp_token *tok;
1978 tok = cpp_get_token_with_location (parser, &loc);
1979 ASSERT_NE (tok, NULL);
1980 ASSERT_EQ (tok->type, CPP_NAME);
1981 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1982 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1983
1984 tok = cpp_get_token_with_location (parser, &loc);
1985 ASSERT_NE (tok, NULL);
1986 ASSERT_EQ (tok->type, CPP_STRING);
1987 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1988 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1989
1990 tok = cpp_get_token_with_location (parser, &loc);
1991 ASSERT_NE (tok, NULL);
1992 ASSERT_EQ (tok->type, CPP_NUMBER);
1993 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1994 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1995
1996 tok = cpp_get_token_with_location (parser, &loc);
1997 ASSERT_NE (tok, NULL);
1998 ASSERT_EQ (tok->type, CPP_EOF);
1999
2000 cpp_finish (parser, NULL);
2001 cpp_destroy (parser);
2002}
2003
88fa5555
DM
2004/* Forward decls. */
2005
2006struct lexer_test;
2007class lexer_test_options;
2008
2009/* A class for specifying options of a lexer_test.
2010 The "apply" vfunc is called during the lexer_test constructor. */
2011
2012class lexer_test_options
2013{
2014 public:
2015 virtual void apply (lexer_test &) = 0;
2016};
2017
f5ea989d
DM
2018/* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2019 in its dtor.
2020
2021 This is needed by struct lexer_test to ensure that the cleanup of the
2022 cpp_reader happens *after* the cleanup of the temp_source_file. */
2023
2024class cpp_reader_ptr
2025{
2026 public:
2027 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2028
2029 ~cpp_reader_ptr ()
2030 {
2031 cpp_finish (m_ptr, NULL);
2032 cpp_destroy (m_ptr);
2033 }
2034
2035 operator cpp_reader * () const { return m_ptr; }
2036
2037 private:
2038 cpp_reader *m_ptr;
2039};
2040
88fa5555
DM
2041/* A struct for writing lexer tests. */
2042
2043struct lexer_test
2044{
2045 lexer_test (const line_table_case &case_, const char *content,
2046 lexer_test_options *options);
2047 ~lexer_test ();
2048
2049 const cpp_token *get_token ();
2050
f5ea989d
DM
2051 /* The ordering of these fields matters.
2052 The line_table_test must be first, since the cpp_reader_ptr
2053 uses it.
2054 The cpp_reader must be cleaned up *after* the temp_source_file
2055 since the filenames in input.c's input cache are owned by the
2056 cpp_reader; in particular, when ~temp_source_file evicts the
2057 filename the filenames must still be alive. */
f87e22c5 2058 line_table_test m_ltt;
f5ea989d
DM
2059 cpp_reader_ptr m_parser;
2060 temp_source_file m_tempfile;
88fa5555 2061 string_concat_db m_concats;
a3998c2f 2062 bool m_implicitly_expect_EOF;
88fa5555
DM
2063};
2064
2065/* Use an EBCDIC encoding for the execution charset, specifically
2066 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2067
2068 This exercises iconv integration within libcpp.
2069 Not every build of iconv supports the given charset,
2070 so we need to flag this error and handle it gracefully. */
2071
2072class ebcdic_execution_charset : public lexer_test_options
2073{
2074 public:
2075 ebcdic_execution_charset () : m_num_iconv_errors (0)
2076 {
2077 gcc_assert (s_singleton == NULL);
2078 s_singleton = this;
2079 }
2080 ~ebcdic_execution_charset ()
2081 {
2082 gcc_assert (s_singleton == this);
2083 s_singleton = NULL;
2084 }
2085
2086 void apply (lexer_test &test) FINAL OVERRIDE
2087 {
2088 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2089 cpp_opts->narrow_charset = "IBM1047";
2090
2091 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
c24300ba 2092 callbacks->diagnostic = on_diagnostic;
88fa5555
DM
2093 }
2094
c24300ba
DM
2095 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2096 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2097 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2098 rich_location *richloc ATTRIBUTE_UNUSED,
2099 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
88fa5555
DM
2100 ATTRIBUTE_FPTR_PRINTF(5,0)
2101 {
2102 gcc_assert (s_singleton);
a7085816
JJ
2103 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2104 const char *msg = "conversion from %s to %s not supported by iconv";
2105#ifdef ENABLE_NLS
2106 msg = dgettext ("cpplib", msg);
2107#endif
88fa5555
DM
2108 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2109 when the local iconv build doesn't support the conversion. */
a7085816 2110 if (strcmp (msgid, msg) == 0)
88fa5555
DM
2111 {
2112 s_singleton->m_num_iconv_errors++;
2113 return true;
2114 }
2115
2116 /* Otherwise, we have an unexpected error. */
2117 abort ();
2118 }
2119
2120 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2121
2122 private:
2123 static ebcdic_execution_charset *s_singleton;
2124 int m_num_iconv_errors;
2125};
2126
2127ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2128
c24300ba 2129/* A lexer_test_options subclass that records a list of diagnostic
a3998c2f
DM
2130 messages emitted by the lexer. */
2131
c24300ba 2132class lexer_diagnostic_sink : public lexer_test_options
a3998c2f
DM
2133{
2134 public:
c24300ba 2135 lexer_diagnostic_sink ()
a3998c2f
DM
2136 {
2137 gcc_assert (s_singleton == NULL);
2138 s_singleton = this;
2139 }
c24300ba 2140 ~lexer_diagnostic_sink ()
a3998c2f
DM
2141 {
2142 gcc_assert (s_singleton == this);
2143 s_singleton = NULL;
2144
2145 int i;
2146 char *str;
c24300ba 2147 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
a3998c2f
DM
2148 free (str);
2149 }
2150
2151 void apply (lexer_test &test) FINAL OVERRIDE
2152 {
2153 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
c24300ba 2154 callbacks->diagnostic = on_diagnostic;
a3998c2f
DM
2155 }
2156
c24300ba
DM
2157 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2158 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2159 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2160 rich_location *richloc ATTRIBUTE_UNUSED,
2161 const char *msgid, va_list *ap)
a3998c2f
DM
2162 ATTRIBUTE_FPTR_PRINTF(5,0)
2163 {
2164 char *msg = xvasprintf (msgid, *ap);
c24300ba 2165 s_singleton->m_diagnostics.safe_push (msg);
a3998c2f
DM
2166 return true;
2167 }
2168
c24300ba 2169 auto_vec<char *> m_diagnostics;
a3998c2f
DM
2170
2171 private:
c24300ba 2172 static lexer_diagnostic_sink *s_singleton;
a3998c2f
DM
2173};
2174
c24300ba 2175lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
a3998c2f 2176
88fa5555
DM
2177/* Constructor. Override line_table with a new instance based on CASE_,
2178 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2179 start parsing the tempfile. */
2180
2181lexer_test::lexer_test (const line_table_case &case_, const char *content,
f5ea989d
DM
2182 lexer_test_options *options)
2183: m_ltt (case_),
2184 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
88fa5555
DM
2185 /* Create a tempfile and write the text to it. */
2186 m_tempfile (SELFTEST_LOCATION, ".c", content),
a3998c2f
DM
2187 m_concats (),
2188 m_implicitly_expect_EOF (true)
88fa5555
DM
2189{
2190 if (options)
2191 options->apply (*this);
2192
2193 cpp_init_iconv (m_parser);
2194
2195 /* Parse the file. */
2196 const char *fname = cpp_read_main_file (m_parser,
2197 m_tempfile.get_filename ());
2198 ASSERT_NE (fname, NULL);
2199}
2200
a3998c2f 2201/* Destructor. By default, verify that the next token in m_parser is EOF. */
88fa5555
DM
2202
2203lexer_test::~lexer_test ()
2204{
2205 location_t loc;
2206 const cpp_token *tok;
2207
a3998c2f
DM
2208 if (m_implicitly_expect_EOF)
2209 {
2210 tok = cpp_get_token_with_location (m_parser, &loc);
2211 ASSERT_NE (tok, NULL);
2212 ASSERT_EQ (tok->type, CPP_EOF);
2213 }
88fa5555
DM
2214}
2215
2216/* Get the next token from m_parser. */
2217
2218const cpp_token *
2219lexer_test::get_token ()
2220{
2221 location_t loc;
2222 const cpp_token *tok;
2223
2224 tok = cpp_get_token_with_location (m_parser, &loc);
2225 ASSERT_NE (tok, NULL);
2226 return tok;
2227}
2228
2229/* Verify that locations within string literals are correctly handled. */
2230
2231/* Verify get_source_range_for_substring for token(s) at STRLOC,
2232 using the string concatenation database for TEST.
2233
2234 Assert that the character at index IDX is on EXPECTED_LINE,
2235 and that it begins at column EXPECTED_START_COL and ends at
2236 EXPECTED_FINISH_COL (unless the locations are beyond
2237 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2238 columns). */
2239
2240static void
2241assert_char_at_range (const location &loc,
2242 lexer_test& test,
2243 location_t strloc, enum cpp_ttype type, int idx,
2244 int expected_line, int expected_start_col,
2245 int expected_finish_col)
2246{
2247 cpp_reader *pfile = test.m_parser;
2248 string_concat_db *concats = &test.m_concats;
2249
a954833d 2250 source_range actual_range = source_range();
88fa5555 2251 const char *err
65e736c0
DM
2252 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2253 &actual_range);
88fa5555
DM
2254 if (should_have_column_data_p (strloc))
2255 ASSERT_EQ_AT (loc, NULL, err);
2256 else
2257 {
2258 ASSERT_STREQ_AT (loc,
2259 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2260 err);
2261 return;
2262 }
2263
2264 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2265 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2266 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2267 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2268
2269 if (should_have_column_data_p (actual_range.m_start))
2270 {
2271 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2272 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2273 }
2274 if (should_have_column_data_p (actual_range.m_finish))
2275 {
2276 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2277 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2278 }
2279}
2280
2281/* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2282 the effective location of any errors. */
2283
2284#define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2285 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2286 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2287 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2288 (EXPECTED_FINISH_COL))
2289
2290/* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2291 using the string concatenation database for TEST.
2292
2293 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2294
2295static void
2296assert_num_substring_ranges (const location &loc,
2297 lexer_test& test,
2298 location_t strloc,
2299 enum cpp_ttype type,
2300 int expected_num_ranges)
2301{
2302 cpp_reader *pfile = test.m_parser;
2303 string_concat_db *concats = &test.m_concats;
2304
0e06d2b3 2305 int actual_num_ranges = -1;
88fa5555
DM
2306 const char *err
2307 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2308 &actual_num_ranges);
2309 if (should_have_column_data_p (strloc))
2310 ASSERT_EQ_AT (loc, NULL, err);
2311 else
2312 {
2313 ASSERT_STREQ_AT (loc,
2314 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2315 err);
2316 return;
2317 }
2318 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2319}
2320
2321/* Macro for calling assert_num_substring_ranges, supplying
2322 SELFTEST_LOCATION for the effective location of any errors. */
2323
2324#define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2325 EXPECTED_NUM_RANGES) \
2326 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2327 (TYPE), (EXPECTED_NUM_RANGES))
2328
2329
2330/* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2331 returns an error (using the string concatenation database for TEST). */
2332
2333static void
2334assert_has_no_substring_ranges (const location &loc,
2335 lexer_test& test,
2336 location_t strloc,
2337 enum cpp_ttype type,
2338 const char *expected_err)
2339{
2340 cpp_reader *pfile = test.m_parser;
2341 string_concat_db *concats = &test.m_concats;
2342 cpp_substring_ranges ranges;
2343 const char *actual_err
2344 = get_substring_ranges_for_loc (pfile, concats, strloc,
2345 type, ranges);
2346 if (should_have_column_data_p (strloc))
2347 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2348 else
2349 ASSERT_STREQ_AT (loc,
2350 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2351 actual_err);
2352}
2353
2354#define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2355 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2356 (STRLOC), (TYPE), (ERR))
2357
2358/* Lex a simple string literal. Verify the substring location data, before
2359 and after running cpp_interpret_string on it. */
2360
2361static void
2362test_lexer_string_locations_simple (const line_table_case &case_)
2363{
2364 /* Digits 0-9 (with 0 at column 10), the simple way.
2365 ....................000000000.11111111112.2222222223333333333
2366 ....................123456789.01234567890.1234567890123456789
2367 We add a trailing comment to ensure that we correctly locate
2368 the end of the string literal token. */
2369 const char *content = " \"0123456789\" /* not a string */\n";
2370 lexer_test test (case_, content, NULL);
2371
2372 /* Verify that we get the expected token back, with the correct
2373 location information. */
2374 const cpp_token *tok = test.get_token ();
2375 ASSERT_EQ (tok->type, CPP_STRING);
2376 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2377 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2378
2379 /* At this point in lexing, the quote characters are treated as part of
2380 the string (they are stripped off by cpp_interpret_string). */
2381
2382 ASSERT_EQ (tok->val.str.len, 12);
2383
2384 /* Verify that cpp_interpret_string works. */
2385 cpp_string dst_string;
2386 const enum cpp_ttype type = CPP_STRING;
2387 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2388 &dst_string, type);
2389 ASSERT_TRUE (result);
2390 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2391 free (const_cast <unsigned char *> (dst_string.text));
2392
2393 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3
DM
2394 opening quote, but does include the closing quote. */
2395 for (int i = 0; i <= 10; i++)
88fa5555
DM
2396 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2397 10 + i, 10 + i);
2398
bbd6fcf3 2399 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
88fa5555
DM
2400}
2401
2402/* As test_lexer_string_locations_simple, but use an EBCDIC execution
2403 encoding. */
2404
2405static void
2406test_lexer_string_locations_ebcdic (const line_table_case &case_)
2407{
2408 /* EBCDIC support requires iconv. */
2409 if (!HAVE_ICONV)
2410 return;
2411
2412 /* Digits 0-9 (with 0 at column 10), the simple way.
2413 ....................000000000.11111111112.2222222223333333333
2414 ....................123456789.01234567890.1234567890123456789
2415 We add a trailing comment to ensure that we correctly locate
2416 the end of the string literal token. */
2417 const char *content = " \"0123456789\" /* not a string */\n";
2418 ebcdic_execution_charset use_ebcdic;
2419 lexer_test test (case_, content, &use_ebcdic);
2420
2421 /* Verify that we get the expected token back, with the correct
2422 location information. */
2423 const cpp_token *tok = test.get_token ();
2424 ASSERT_EQ (tok->type, CPP_STRING);
2425 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2426 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2427
2428 /* At this point in lexing, the quote characters are treated as part of
2429 the string (they are stripped off by cpp_interpret_string). */
2430
2431 ASSERT_EQ (tok->val.str.len, 12);
2432
2433 /* The remainder of the test requires an iconv implementation that
2434 can convert from UTF-8 to the EBCDIC encoding requested above. */
2435 if (use_ebcdic.iconv_errors_occurred_p ())
2436 return;
2437
2438 /* Verify that cpp_interpret_string works. */
2439 cpp_string dst_string;
2440 const enum cpp_ttype type = CPP_STRING;
2441 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2442 &dst_string, type);
2443 ASSERT_TRUE (result);
2444 /* We should now have EBCDIC-encoded text, specifically
2445 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2446 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2447 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2448 (const char *)dst_string.text);
2449 free (const_cast <unsigned char *> (dst_string.text));
2450
2451 /* Verify that we don't attempt to record substring location information
2452 for such cases. */
2453 ASSERT_HAS_NO_SUBSTRING_RANGES
2454 (test, tok->src_loc, type,
2455 "execution character set != source character set");
2456}
2457
2458/* Lex a string literal containing a hex-escaped character.
2459 Verify the substring location data, before and after running
2460 cpp_interpret_string on it. */
2461
2462static void
2463test_lexer_string_locations_hex (const line_table_case &case_)
2464{
2465 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2466 and with a space in place of digit 6, to terminate the escaped
2467 hex code.
2468 ....................000000000.111111.11112222.
2469 ....................123456789.012345.67890123. */
2470 const char *content = " \"01234\\x35 789\"\n";
2471 lexer_test test (case_, content, NULL);
2472
2473 /* Verify that we get the expected token back, with the correct
2474 location information. */
2475 const cpp_token *tok = test.get_token ();
2476 ASSERT_EQ (tok->type, CPP_STRING);
2477 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2478 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2479
2480 /* At this point in lexing, the quote characters are treated as part of
2481 the string (they are stripped off by cpp_interpret_string). */
2482 ASSERT_EQ (tok->val.str.len, 15);
2483
2484 /* Verify that cpp_interpret_string works. */
2485 cpp_string dst_string;
2486 const enum cpp_ttype type = CPP_STRING;
2487 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2488 &dst_string, type);
2489 ASSERT_TRUE (result);
2490 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2491 free (const_cast <unsigned char *> (dst_string.text));
2492
2493 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2494 opening quote, but does include the closing quote. */
88fa5555
DM
2495 for (int i = 0; i <= 4; i++)
2496 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2497 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
bbd6fcf3 2498 for (int i = 6; i <= 10; i++)
88fa5555
DM
2499 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2500
bbd6fcf3 2501 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
88fa5555
DM
2502}
2503
2504/* Lex a string literal containing an octal-escaped character.
2505 Verify the substring location data after running cpp_interpret_string
2506 on it. */
2507
2508static void
2509test_lexer_string_locations_oct (const line_table_case &case_)
2510{
2511 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2512 and with a space in place of digit 6, to terminate the escaped
2513 octal code.
2514 ....................000000000.111111.11112222.2222223333333333444
2515 ....................123456789.012345.67890123.4567890123456789012 */
2516 const char *content = " \"01234\\065 789\" /* not a string */\n";
2517 lexer_test test (case_, content, NULL);
2518
2519 /* Verify that we get the expected token back, with the correct
2520 location information. */
2521 const cpp_token *tok = test.get_token ();
2522 ASSERT_EQ (tok->type, CPP_STRING);
2523 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2524
2525 /* Verify that cpp_interpret_string works. */
2526 cpp_string dst_string;
2527 const enum cpp_ttype type = CPP_STRING;
2528 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2529 &dst_string, type);
2530 ASSERT_TRUE (result);
2531 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2532 free (const_cast <unsigned char *> (dst_string.text));
2533
2534 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2535 opening quote, but does include the closing quote. */
88fa5555
DM
2536 for (int i = 0; i < 5; i++)
2537 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2538 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
bbd6fcf3 2539 for (int i = 6; i <= 10; i++)
88fa5555
DM
2540 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2541
bbd6fcf3 2542 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
88fa5555
DM
2543}
2544
2545/* Test of string literal containing letter escapes. */
2546
2547static void
2548test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2549{
2550 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2551 .....................000000000.1.11111.1.1.11222.22222223333333
2552 .....................123456789.0.12345.6.7.89012.34567890123456. */
2553 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2554 lexer_test test (case_, content, NULL);
2555
2556 /* Verify that we get the expected tokens back. */
2557 const cpp_token *tok = test.get_token ();
2558 ASSERT_EQ (tok->type, CPP_STRING);
2559 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2560
2561 /* Verify ranges of individual characters. */
2562 /* "\t". */
2563 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2564 0, 1, 10, 11);
2565 /* "foo". */
2566 for (int i = 1; i <= 3; i++)
2567 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2568 i, 1, 11 + i, 11 + i);
2569 /* "\\" and "\n". */
2570 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2571 4, 1, 15, 16);
2572 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2573 5, 1, 17, 18);
2574
bbd6fcf3
DM
2575 /* "bar" and closing quote for nul-terminator. */
2576 for (int i = 6; i <= 9; i++)
88fa5555
DM
2577 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2578 i, 1, 13 + i, 13 + i);
2579
bbd6fcf3 2580 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
88fa5555
DM
2581}
2582
2583/* Another test of a string literal containing a letter escape.
2584 Based on string seen in
2585 printf ("%-%\n");
2586 in gcc.dg/format/c90-printf-1.c. */
2587
2588static void
2589test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2590{
2591 /* .....................000000000.1111.11.1111.22222222223.
2592 .....................123456789.0123.45.6789.01234567890. */
2593 const char *content = (" \"%-%\\n\" /* non-str */\n");
2594 lexer_test test (case_, content, NULL);
2595
2596 /* Verify that we get the expected tokens back. */
2597 const cpp_token *tok = test.get_token ();
2598 ASSERT_EQ (tok->type, CPP_STRING);
2599 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2600
2601 /* Verify ranges of individual characters. */
2602 /* "%-%". */
2603 for (int i = 0; i < 3; i++)
2604 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2605 i, 1, 10 + i, 10 + i);
2606 /* "\n". */
2607 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2608 3, 1, 13, 14);
2609
bbd6fcf3
DM
2610 /* Closing quote for nul-terminator. */
2611 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2612 4, 1, 15, 15);
2613
2614 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
88fa5555
DM
2615}
2616
2617/* Lex a string literal containing UCN 4 characters.
2618 Verify the substring location data after running cpp_interpret_string
2619 on it. */
2620
2621static void
2622test_lexer_string_locations_ucn4 (const line_table_case &case_)
2623{
2624 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2625 as UCN 4.
2626 ....................000000000.111111.111122.222222223.33333333344444
2627 ....................123456789.012345.678901.234567890.12345678901234 */
2628 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2629 lexer_test test (case_, content, NULL);
2630
2631 /* Verify that we get the expected token back, with the correct
2632 location information. */
2633 const cpp_token *tok = test.get_token ();
2634 ASSERT_EQ (tok->type, CPP_STRING);
2635 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2636
2637 /* Verify that cpp_interpret_string works.
2638 The string should be encoded in the execution character
2639 set. Assuming that that is UTF-8, we should have the following:
2640 ----------- ---- ----- ------- ----------------
2641 Byte offset Byte Octal Unicode Source Column(s)
2642 ----------- ---- ----- ------- ----------------
2643 0 0x30 '0' 10
2644 1 0x31 '1' 11
2645 2 0x32 '2' 12
2646 3 0x33 '3' 13
2647 4 0x34 '4' 14
2648 5 0xE2 \342 U+2174 15-20
2649 6 0x85 \205 (cont) 15-20
2650 7 0xB4 \264 (cont) 15-20
2651 8 0xE2 \342 U+2175 21-26
2652 9 0x85 \205 (cont) 21-26
2653 10 0xB5 \265 (cont) 21-26
2654 11 0x37 '7' 27
2655 12 0x38 '8' 28
2656 13 0x39 '9' 29
bbd6fcf3 2657 14 0x00 30 (closing quote)
88fa5555
DM
2658 ----------- ---- ----- ------- ---------------. */
2659
2660 cpp_string dst_string;
2661 const enum cpp_ttype type = CPP_STRING;
2662 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2663 &dst_string, type);
2664 ASSERT_TRUE (result);
2665 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2666 (const char *)dst_string.text);
2667 free (const_cast <unsigned char *> (dst_string.text));
2668
2669 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2670 opening quote, but does include the closing quote.
88fa5555
DM
2671 '01234'. */
2672 for (int i = 0; i <= 4; i++)
2673 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2674 /* U+2174. */
2675 for (int i = 5; i <= 7; i++)
2676 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2677 /* U+2175. */
2678 for (int i = 8; i <= 10; i++)
2679 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
bbd6fcf3
DM
2680 /* '789' and nul terminator */
2681 for (int i = 11; i <= 14; i++)
88fa5555
DM
2682 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2683
bbd6fcf3 2684 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
88fa5555
DM
2685}
2686
2687/* Lex a string literal containing UCN 8 characters.
2688 Verify the substring location data after running cpp_interpret_string
2689 on it. */
2690
2691static void
2692test_lexer_string_locations_ucn8 (const line_table_case &case_)
2693{
2694 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2695 ....................000000000.111111.1111222222.2222333333333.344444
2696 ....................123456789.012345.6789012345.6789012345678.901234 */
2697 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2698 lexer_test test (case_, content, NULL);
2699
2700 /* Verify that we get the expected token back, with the correct
2701 location information. */
2702 const cpp_token *tok = test.get_token ();
2703 ASSERT_EQ (tok->type, CPP_STRING);
2704 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2705 "\"01234\\U00002174\\U00002175789\"");
2706
2707 /* Verify that cpp_interpret_string works.
2708 The UTF-8 encoding of the string is identical to that from
2709 the ucn4 testcase above; the only difference is the column
2710 locations. */
2711 cpp_string dst_string;
2712 const enum cpp_ttype type = CPP_STRING;
2713 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2714 &dst_string, type);
2715 ASSERT_TRUE (result);
2716 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2717 (const char *)dst_string.text);
2718 free (const_cast <unsigned char *> (dst_string.text));
2719
2720 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2721 opening quote, but does include the closing quote.
88fa5555
DM
2722 '01234'. */
2723 for (int i = 0; i <= 4; i++)
2724 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2725 /* U+2174. */
2726 for (int i = 5; i <= 7; i++)
2727 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2728 /* U+2175. */
2729 for (int i = 8; i <= 10; i++)
2730 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2731 /* '789' at columns 35-37 */
2732 for (int i = 11; i <= 13; i++)
2733 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
bbd6fcf3
DM
2734 /* Closing quote/nul-terminator at column 38. */
2735 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
88fa5555 2736
bbd6fcf3 2737 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
88fa5555
DM
2738}
2739
2740/* Fetch a big-endian 32-bit value and convert to host endianness. */
2741
2742static uint32_t
2743uint32_from_big_endian (const uint32_t *ptr_be_value)
2744{
2745 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2746 return (((uint32_t) buf[0] << 24)
2747 | ((uint32_t) buf[1] << 16)
2748 | ((uint32_t) buf[2] << 8)
2749 | (uint32_t) buf[3]);
2750}
2751
2752/* Lex a wide string literal and verify that attempts to read substring
2753 location data from it fail gracefully. */
2754
2755static void
2756test_lexer_string_locations_wide_string (const line_table_case &case_)
2757{
2758 /* Digits 0-9.
2759 ....................000000000.11111111112.22222222233333
2760 ....................123456789.01234567890.12345678901234 */
2761 const char *content = " L\"0123456789\" /* non-str */\n";
2762 lexer_test test (case_, content, NULL);
2763
2764 /* Verify that we get the expected token back, with the correct
2765 location information. */
2766 const cpp_token *tok = test.get_token ();
2767 ASSERT_EQ (tok->type, CPP_WSTRING);
2768 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2769
2770 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2771 cpp_string dst_string;
2772 const enum cpp_ttype type = CPP_WSTRING;
2773 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2774 &dst_string, type);
2775 ASSERT_TRUE (result);
2776 /* The cpp_reader defaults to big-endian with
2777 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2778 now be encoded as UTF-32BE. */
2779 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2780 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2781 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2782 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2783 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2784 free (const_cast <unsigned char *> (dst_string.text));
2785
2786 /* We don't yet support generating substring location information
2787 for L"" strings. */
2788 ASSERT_HAS_NO_SUBSTRING_RANGES
2789 (test, tok->src_loc, type,
2790 "execution character set != source character set");
2791}
2792
2793/* Fetch a big-endian 16-bit value and convert to host endianness. */
2794
2795static uint16_t
2796uint16_from_big_endian (const uint16_t *ptr_be_value)
2797{
2798 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2799 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2800}
2801
2802/* Lex a u"" string literal and verify that attempts to read substring
2803 location data from it fail gracefully. */
2804
2805static void
2806test_lexer_string_locations_string16 (const line_table_case &case_)
2807{
2808 /* Digits 0-9.
2809 ....................000000000.11111111112.22222222233333
2810 ....................123456789.01234567890.12345678901234 */
2811 const char *content = " u\"0123456789\" /* non-str */\n";
2812 lexer_test test (case_, content, NULL);
2813
2814 /* Verify that we get the expected token back, with the correct
2815 location information. */
2816 const cpp_token *tok = test.get_token ();
2817 ASSERT_EQ (tok->type, CPP_STRING16);
2818 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2819
2820 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2821 cpp_string dst_string;
2822 const enum cpp_ttype type = CPP_STRING16;
2823 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2824 &dst_string, type);
2825 ASSERT_TRUE (result);
2826
2827 /* The cpp_reader defaults to big-endian, so dst_string should
2828 now be encoded as UTF-16BE. */
2829 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2830 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2831 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2832 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2833 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2834 free (const_cast <unsigned char *> (dst_string.text));
2835
2836 /* We don't yet support generating substring location information
2837 for L"" strings. */
2838 ASSERT_HAS_NO_SUBSTRING_RANGES
2839 (test, tok->src_loc, type,
2840 "execution character set != source character set");
2841}
2842
2843/* Lex a U"" string literal and verify that attempts to read substring
2844 location data from it fail gracefully. */
2845
2846static void
2847test_lexer_string_locations_string32 (const line_table_case &case_)
2848{
2849 /* Digits 0-9.
2850 ....................000000000.11111111112.22222222233333
2851 ....................123456789.01234567890.12345678901234 */
2852 const char *content = " U\"0123456789\" /* non-str */\n";
2853 lexer_test test (case_, content, NULL);
2854
2855 /* Verify that we get the expected token back, with the correct
2856 location information. */
2857 const cpp_token *tok = test.get_token ();
2858 ASSERT_EQ (tok->type, CPP_STRING32);
2859 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2860
2861 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2862 cpp_string dst_string;
2863 const enum cpp_ttype type = CPP_STRING32;
2864 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2865 &dst_string, type);
2866 ASSERT_TRUE (result);
2867
2868 /* The cpp_reader defaults to big-endian, so dst_string should
2869 now be encoded as UTF-32BE. */
2870 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2871 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2872 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2873 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2874 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2875 free (const_cast <unsigned char *> (dst_string.text));
2876
2877 /* We don't yet support generating substring location information
2878 for L"" strings. */
2879 ASSERT_HAS_NO_SUBSTRING_RANGES
2880 (test, tok->src_loc, type,
2881 "execution character set != source character set");
2882}
2883
2884/* Lex a u8-string literal.
2885 Verify the substring location data after running cpp_interpret_string
2886 on it. */
2887
2888static void
2889test_lexer_string_locations_u8 (const line_table_case &case_)
2890{
2891 /* Digits 0-9.
2892 ....................000000000.11111111112.22222222233333
2893 ....................123456789.01234567890.12345678901234 */
2894 const char *content = " u8\"0123456789\" /* non-str */\n";
2895 lexer_test test (case_, content, NULL);
2896
2897 /* Verify that we get the expected token back, with the correct
2898 location information. */
2899 const cpp_token *tok = test.get_token ();
2900 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2901 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2902
2903 /* Verify that cpp_interpret_string works. */
2904 cpp_string dst_string;
2905 const enum cpp_ttype type = CPP_STRING;
2906 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2907 &dst_string, type);
2908 ASSERT_TRUE (result);
2909 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2910 free (const_cast <unsigned char *> (dst_string.text));
2911
2912 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3
DM
2913 opening quote, but does include the closing quote. */
2914 for (int i = 0; i <= 10; i++)
88fa5555
DM
2915 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2916}
2917
2918/* Lex a string literal containing UTF-8 source characters.
2919 Verify the substring location data after running cpp_interpret_string
2920 on it. */
2921
2922static void
2923test_lexer_string_locations_utf8_source (const line_table_case &case_)
2924{
2925 /* This string literal is written out to the source file as UTF-8,
2926 and is of the form "before mojibake after", where "mojibake"
2927 is written as the following four unicode code points:
2928 U+6587 CJK UNIFIED IDEOGRAPH-6587
2929 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2930 U+5316 CJK UNIFIED IDEOGRAPH-5316
2931 U+3051 HIRAGANA LETTER KE.
2932 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2933 "before" and "after" are 1 byte per unicode character.
2934
2935 The numbering shown are "columns", which are *byte* numbers within
2936 the line, rather than unicode character numbers.
2937
2938 .................... 000000000.1111111.
2939 .................... 123456789.0123456. */
2940 const char *content = (" \"before "
2941 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2942 UTF-8: 0xE6 0x96 0x87
2943 C octal escaped UTF-8: \346\226\207
2944 "column" numbers: 17-19. */
2945 "\346\226\207"
2946
2947 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2948 UTF-8: 0xE5 0xAD 0x97
2949 C octal escaped UTF-8: \345\255\227
2950 "column" numbers: 20-22. */
2951 "\345\255\227"
2952
2953 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2954 UTF-8: 0xE5 0x8C 0x96
2955 C octal escaped UTF-8: \345\214\226
2956 "column" numbers: 23-25. */
2957 "\345\214\226"
2958
2959 /* U+3051 HIRAGANA LETTER KE
2960 UTF-8: 0xE3 0x81 0x91
2961 C octal escaped UTF-8: \343\201\221
2962 "column" numbers: 26-28. */
2963 "\343\201\221"
2964
2965 /* column numbers 29 onwards
2966 2333333.33334444444444
2967 9012345.67890123456789. */
2968 " after\" /* non-str */\n");
2969 lexer_test test (case_, content, NULL);
2970
2971 /* Verify that we get the expected token back, with the correct
2972 location information. */
2973 const cpp_token *tok = test.get_token ();
2974 ASSERT_EQ (tok->type, CPP_STRING);
2975 ASSERT_TOKEN_AS_TEXT_EQ
2976 (test.m_parser, tok,
2977 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2978
2979 /* Verify that cpp_interpret_string works. */
2980 cpp_string dst_string;
2981 const enum cpp_ttype type = CPP_STRING;
2982 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2983 &dst_string, type);
2984 ASSERT_TRUE (result);
2985 ASSERT_STREQ
2986 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2987 (const char *)dst_string.text);
2988 free (const_cast <unsigned char *> (dst_string.text));
2989
2990 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2991 opening quote, but does include the closing quote.
88fa5555 2992 Assuming that both source and execution encodings are UTF-8, we have
bbd6fcf3 2993 a run of 25 octets in each, plus the NUL terminator. */
88fa5555
DM
2994 for (int i = 0; i < 25; i++)
2995 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
bbd6fcf3
DM
2996 /* NUL-terminator should use the closing quote at column 35. */
2997 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
88fa5555 2998
bbd6fcf3 2999 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
88fa5555
DM
3000}
3001
3002/* Test of string literal concatenation. */
3003
3004static void
3005test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3006{
3007 /* Digits 0-9.
3008 .....................000000000.111111.11112222222222
3009 .....................123456789.012345.67890123456789. */
3010 const char *content = (" \"01234\" /* non-str */\n"
3011 " \"56789\" /* non-str */\n");
3012 lexer_test test (case_, content, NULL);
3013
3014 location_t input_locs[2];
3015
3016 /* Verify that we get the expected tokens back. */
3017 auto_vec <cpp_string> input_strings;
3018 const cpp_token *tok_a = test.get_token ();
3019 ASSERT_EQ (tok_a->type, CPP_STRING);
3020 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3021 input_strings.safe_push (tok_a->val.str);
3022 input_locs[0] = tok_a->src_loc;
3023
3024 const cpp_token *tok_b = test.get_token ();
3025 ASSERT_EQ (tok_b->type, CPP_STRING);
3026 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3027 input_strings.safe_push (tok_b->val.str);
3028 input_locs[1] = tok_b->src_loc;
3029
3030 /* Verify that cpp_interpret_string works. */
3031 cpp_string dst_string;
3032 const enum cpp_ttype type = CPP_STRING;
3033 bool result = cpp_interpret_string (test.m_parser,
3034 input_strings.address (), 2,
3035 &dst_string, type);
3036 ASSERT_TRUE (result);
3037 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3038 free (const_cast <unsigned char *> (dst_string.text));
3039
3040 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3041 test.m_concats.record_string_concatenation (2, input_locs);
3042
3043 location_t initial_loc = input_locs[0];
3044
bbd6fcf3 3045 /* "01234" on line 1. */
88fa5555
DM
3046 for (int i = 0; i <= 4; i++)
3047 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
bbd6fcf3
DM
3048 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3049 for (int i = 5; i <= 10; i++)
88fa5555
DM
3050 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3051
bbd6fcf3 3052 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
88fa5555
DM
3053}
3054
3055/* Another test of string literal concatenation. */
3056
3057static void
3058test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3059{
3060 /* Digits 0-9.
3061 .....................000000000.111.11111112222222
3062 .....................123456789.012.34567890123456. */
3063 const char *content = (" \"01\" /* non-str */\n"
3064 " \"23\" /* non-str */\n"
3065 " \"45\" /* non-str */\n"
3066 " \"67\" /* non-str */\n"
3067 " \"89\" /* non-str */\n");
3068 lexer_test test (case_, content, NULL);
3069
3070 auto_vec <cpp_string> input_strings;
3071 location_t input_locs[5];
3072
3073 /* Verify that we get the expected tokens back. */
3074 for (int i = 0; i < 5; i++)
3075 {
3076 const cpp_token *tok = test.get_token ();
3077 ASSERT_EQ (tok->type, CPP_STRING);
3078 input_strings.safe_push (tok->val.str);
3079 input_locs[i] = tok->src_loc;
3080 }
3081
3082 /* Verify that cpp_interpret_string works. */
3083 cpp_string dst_string;
3084 const enum cpp_ttype type = CPP_STRING;
3085 bool result = cpp_interpret_string (test.m_parser,
3086 input_strings.address (), 5,
3087 &dst_string, type);
3088 ASSERT_TRUE (result);
3089 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3090 free (const_cast <unsigned char *> (dst_string.text));
3091
3092 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3093 test.m_concats.record_string_concatenation (5, input_locs);
3094
3095 location_t initial_loc = input_locs[0];
3096
3097 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3098 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3099 and expect get_source_range_for_substring to fail.
3100 However, for a string concatenation test, we can have a case
3101 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3102 but subsequent strings can be after it.
3103 Attempting to detect this within assert_char_at_range
3104 would overcomplicate the logic for the common test cases, so
3105 we detect it here. */
3106 if (should_have_column_data_p (input_locs[0])
3107 && !should_have_column_data_p (input_locs[4]))
3108 {
3109 /* Verify that get_source_range_for_substring gracefully rejects
3110 this case. */
3111 source_range actual_range;
3112 const char *err
65e736c0
DM
3113 = get_source_range_for_char (test.m_parser, &test.m_concats,
3114 initial_loc, type, 0, &actual_range);
88fa5555
DM
3115 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3116 return;
3117 }
3118
3119 for (int i = 0; i < 5; i++)
3120 for (int j = 0; j < 2; j++)
3121 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3122 i + 1, 10 + j, 10 + j);
3123
bbd6fcf3
DM
3124 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3125 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3126
3127 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
88fa5555
DM
3128}
3129
3130/* Another test of string literal concatenation, this time combined with
3131 various kinds of escaped characters. */
3132
3133static void
3134test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3135{
3136 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3137 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3138 const char *content
3139 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3140 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3141 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3142 lexer_test test (case_, content, NULL);
3143
3144 auto_vec <cpp_string> input_strings;
3145 location_t input_locs[4];
3146
3147 /* Verify that we get the expected tokens back. */
3148 for (int i = 0; i < 4; i++)
3149 {
3150 const cpp_token *tok = test.get_token ();
3151 ASSERT_EQ (tok->type, CPP_STRING);
3152 input_strings.safe_push (tok->val.str);
3153 input_locs[i] = tok->src_loc;
3154 }
3155
3156 /* Verify that cpp_interpret_string works. */
3157 cpp_string dst_string;
3158 const enum cpp_ttype type = CPP_STRING;
3159 bool result = cpp_interpret_string (test.m_parser,
3160 input_strings.address (), 4,
3161 &dst_string, type);
3162 ASSERT_TRUE (result);
3163 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3164 free (const_cast <unsigned char *> (dst_string.text));
3165
3166 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3167 test.m_concats.record_string_concatenation (4, input_locs);
3168
3169 location_t initial_loc = input_locs[0];
3170
3171 for (int i = 0; i <= 4; i++)
3172 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3173 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3174 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3175 for (int i = 7; i <= 9; i++)
3176 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3177
bbd6fcf3
DM
3178 /* NUL-terminator should use the location of the final closing quote. */
3179 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3180
3181 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
88fa5555
DM
3182}
3183
3184/* Test of string literal in a macro. */
3185
3186static void
3187test_lexer_string_locations_macro (const line_table_case &case_)
3188{
3189 /* Digits 0-9.
3190 .....................0000000001111111111.22222222223.
3191 .....................1234567890123456789.01234567890. */
3192 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3193 " MACRO");
3194 lexer_test test (case_, content, NULL);
3195
3196 /* Verify that we get the expected tokens back. */
3197 const cpp_token *tok = test.get_token ();
3198 ASSERT_EQ (tok->type, CPP_PADDING);
3199
3200 tok = test.get_token ();
3201 ASSERT_EQ (tok->type, CPP_STRING);
3202 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3203
3204 /* Verify ranges of individual characters. We ought to
3205 see columns within the macro definition. */
bbd6fcf3 3206 for (int i = 0; i <= 10; i++)
88fa5555
DM
3207 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3208 i, 1, 20 + i, 20 + i);
3209
bbd6fcf3 3210 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
88fa5555
DM
3211
3212 tok = test.get_token ();
3213 ASSERT_EQ (tok->type, CPP_PADDING);
3214}
3215
3216/* Test of stringification of a macro argument. */
3217
3218static void
3219test_lexer_string_locations_stringified_macro_argument
3220 (const line_table_case &case_)
3221{
3222 /* .....................000000000111111111122222222223.
3223 .....................123456789012345678901234567890. */
3224 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3225 "MACRO(foo)\n");
3226 lexer_test test (case_, content, NULL);
3227
3228 /* Verify that we get the expected token back. */
3229 const cpp_token *tok = test.get_token ();
3230 ASSERT_EQ (tok->type, CPP_PADDING);
3231
3232 tok = test.get_token ();
3233 ASSERT_EQ (tok->type, CPP_STRING);
3234 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3235
3236 /* We don't support getting the location of a stringified macro
3237 argument. Verify that it fails gracefully. */
3238 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3239 "cpp_interpret_string_1 failed");
3240
3241 tok = test.get_token ();
3242 ASSERT_EQ (tok->type, CPP_PADDING);
3243
3244 tok = test.get_token ();
3245 ASSERT_EQ (tok->type, CPP_PADDING);
3246}
3247
3248/* Ensure that we are fail gracefully if something attempts to pass
3249 in a location that isn't a string literal token. Seen on this code:
3250
3251 const char a[] = " %d ";
3252 __builtin_printf (a, 0.5);
3253 ^
3254
3255 when c-format.c erroneously used the indicated one-character
3256 location as the format string location, leading to a read past the
3257 end of a string buffer in cpp_interpret_string_1. */
3258
3259static void
3260test_lexer_string_locations_non_string (const line_table_case &case_)
3261{
3262 /* .....................000000000111111111122222222223.
3263 .....................123456789012345678901234567890. */
3264 const char *content = (" a\n");
3265 lexer_test test (case_, content, NULL);
3266
3267 /* Verify that we get the expected token back. */
3268 const cpp_token *tok = test.get_token ();
3269 ASSERT_EQ (tok->type, CPP_NAME);
3270 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3271
3272 /* At this point, libcpp is attempting to interpret the name as a
3273 string literal, despite it not starting with a quote. We don't detect
3274 that, but we should at least fail gracefully. */
3275 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3276 "cpp_interpret_string_1 failed");
3277}
3278
3279/* Ensure that we can read substring information for a token which
3280 starts in one linemap and ends in another . Adapted from
3281 gcc.dg/cpp/pr69985.c. */
3282
3283static void
3284test_lexer_string_locations_long_line (const line_table_case &case_)
3285{
3286 /* .....................000000.000111111111
3287 .....................123456.789012346789. */
3288 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3289 " \"0123456789012345678901234567890123456789"
3290 "0123456789012345678901234567890123456789"
3291 "0123456789012345678901234567890123456789"
3292 "0123456789\"\n");
3293
3294 lexer_test test (case_, content, NULL);
3295
3296 /* Verify that we get the expected token back. */
3297 const cpp_token *tok = test.get_token ();
3298 ASSERT_EQ (tok->type, CPP_STRING);
3299
3300 if (!should_have_column_data_p (line_table->highest_location))
3301 return;
3302
3303 /* Verify ranges of individual characters. */
bbd6fcf3
DM
3304 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3305 for (int i = 0; i < 131; i++)
88fa5555
DM
3306 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3307 i, 2, 7 + i, 7 + i);
3308}
3309
b8f56412
DM
3310/* Test of locations within a raw string that doesn't contain a newline. */
3311
3312static void
3313test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3314{
3315 /* .....................00.0000000111111111122.
3316 .....................12.3456789012345678901. */
3317 const char *content = ("R\"foo(0123456789)foo\"\n");
3318 lexer_test test (case_, content, NULL);
3319
3320 /* Verify that we get the expected token back. */
3321 const cpp_token *tok = test.get_token ();
3322 ASSERT_EQ (tok->type, CPP_STRING);
3323
3324 /* Verify that cpp_interpret_string works. */
3325 cpp_string dst_string;
3326 const enum cpp_ttype type = CPP_STRING;
3327 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3328 &dst_string, type);
3329 ASSERT_TRUE (result);
3330 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3331 free (const_cast <unsigned char *> (dst_string.text));
3332
3333 if (!should_have_column_data_p (line_table->highest_location))
3334 return;
3335
3336 /* 0-9, plus the nil terminator. */
3337 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3338 for (int i = 0; i < 11; i++)
3339 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3340 i, 1, 7 + i, 7 + i);
3341}
3342
3343/* Test of locations within a raw string that contains a newline. */
3344
3345static void
3346test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3347{
3348 /* .....................00.0000.
3349 .....................12.3456. */
3350 const char *content = ("R\"foo(\n"
3351 /* .....................00000.
3352 .....................12345. */
3353 "hello\n"
3354 "world\n"
3355 /* .....................00000.
3356 .....................12345. */
3357 ")foo\"\n");
3358 lexer_test test (case_, content, NULL);
3359
3360 /* Verify that we get the expected token back. */
3361 const cpp_token *tok = test.get_token ();
3362 ASSERT_EQ (tok->type, CPP_STRING);
3363
3364 /* Verify that cpp_interpret_string works. */
3365 cpp_string dst_string;
3366 const enum cpp_ttype type = CPP_STRING;
3367 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3368 &dst_string, type);
3369 ASSERT_TRUE (result);
3370 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3371 free (const_cast <unsigned char *> (dst_string.text));
3372
3373 if (!should_have_column_data_p (line_table->highest_location))
3374 return;
3375
3376 /* Currently we don't support locations within raw strings that
3377 contain newlines. */
3378 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3379 "range endpoints are on different lines");
3380}
3381
a3998c2f
DM
3382/* Test of parsing an unterminated raw string. */
3383
3384static void
3385test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3386{
3387 const char *content = "R\"ouch()ouCh\" /* etc */";
3388
c24300ba
DM
3389 lexer_diagnostic_sink diagnostics;
3390 lexer_test test (case_, content, &diagnostics);
a3998c2f
DM
3391 test.m_implicitly_expect_EOF = false;
3392
3393 /* Attempt to parse the raw string. */
3394 const cpp_token *tok = test.get_token ();
3395 ASSERT_EQ (tok->type, CPP_EOF);
3396
c24300ba 3397 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
a3998c2f
DM
3398 /* We expect the message "unterminated raw string"
3399 in the "cpplib" translation domain.
3400 It's not clear that dgettext is available on all supported hosts,
3401 so this assertion is commented-out for now.
3402 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
c24300ba 3403 diagnostics.m_diagnostics[0]);
a3998c2f
DM
3404 */
3405}
3406
88fa5555
DM
3407/* Test of lexing char constants. */
3408
3409static void
3410test_lexer_char_constants (const line_table_case &case_)
3411{
3412 /* Various char constants.
3413 .....................0000000001111111111.22222222223.
3414 .....................1234567890123456789.01234567890. */
3415 const char *content = (" 'a'\n"
3416 " u'a'\n"
3417 " U'a'\n"
3418 " L'a'\n"
3419 " 'abc'\n");
3420 lexer_test test (case_, content, NULL);
3421
3422 /* Verify that we get the expected tokens back. */
3423 /* 'a'. */
3424 const cpp_token *tok = test.get_token ();
3425 ASSERT_EQ (tok->type, CPP_CHAR);
3426 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3427
3428 unsigned int chars_seen;
3429 int unsignedp;
3430 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3431 &chars_seen, &unsignedp);
3432 ASSERT_EQ (cc, 'a');
3433 ASSERT_EQ (chars_seen, 1);
3434
3435 /* u'a'. */
3436 tok = test.get_token ();
3437 ASSERT_EQ (tok->type, CPP_CHAR16);
3438 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3439
3440 /* U'a'. */
3441 tok = test.get_token ();
3442 ASSERT_EQ (tok->type, CPP_CHAR32);
3443 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3444
3445 /* L'a'. */
3446 tok = test.get_token ();
3447 ASSERT_EQ (tok->type, CPP_WCHAR);
3448 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3449
3450 /* 'abc' (c-char-sequence). */
3451 tok = test.get_token ();
3452 ASSERT_EQ (tok->type, CPP_CHAR);
3453 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3454}
741d3be5
DM
3455/* A table of interesting location_t values, giving one axis of our test
3456 matrix. */
3457
3458static const location_t boundary_locations[] = {
3459 /* Zero means "don't override the default values for a new line_table". */
3460 0,
3461
3462 /* An arbitrary non-zero value that isn't close to one of
3463 the boundary values below. */
3464 0x10000,
3465
3466 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3467 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3468 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3469 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3470 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3471 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3472
3473 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3474 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3475 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3476 LINE_MAP_MAX_LOCATION_WITH_COLS,
3477 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3478 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3479};
3480
f87e22c5 3481/* Run TESTCASE multiple times, once for each case in our test matrix. */
d9b950dd
DM
3482
3483void
f87e22c5 3484for_each_line_table_case (void (*testcase) (const line_table_case &))
d9b950dd 3485{
741d3be5
DM
3486 /* As noted above in the description of struct line_table_case,
3487 we want to explore a test matrix of interesting line_table
3488 situations, running various selftests for each case within the
3489 matrix. */
3490
3491 /* Run all tests with:
3492 (a) line_table->default_range_bits == 0, and
3493 (b) line_table->default_range_bits == 5. */
3494 int num_cases_tested = 0;
3495 for (int default_range_bits = 0; default_range_bits <= 5;
3496 default_range_bits += 5)
3497 {
3498 /* ...and use each of the "interesting" location values as
3499 the starting location within line_table. */
3500 const int num_boundary_locations
3501 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3502 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3503 {
3504 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3505
f87e22c5 3506 testcase (c);
741d3be5
DM
3507
3508 num_cases_tested++;
3509 }
3510 }
3511
3512 /* Verify that we fully covered the test matrix. */
3513 ASSERT_EQ (num_cases_tested, 2 * 12);
f87e22c5
DM
3514}
3515
3516/* Run all of the selftests within this file. */
3517
3518void
3519input_c_tests ()
3520{
082284da 3521 test_linenum_comparisons ();
f87e22c5
DM
3522 test_should_have_column_data_p ();
3523 test_unknown_location ();
3524 test_builtins ();
9144eabb 3525 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
f87e22c5
DM
3526
3527 for_each_line_table_case (test_accessing_ordinary_linemaps);
3528 for_each_line_table_case (test_lexer);
3529 for_each_line_table_case (test_lexer_string_locations_simple);
3530 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3531 for_each_line_table_case (test_lexer_string_locations_hex);
3532 for_each_line_table_case (test_lexer_string_locations_oct);
3533 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3534 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3535 for_each_line_table_case (test_lexer_string_locations_ucn4);
3536 for_each_line_table_case (test_lexer_string_locations_ucn8);
3537 for_each_line_table_case (test_lexer_string_locations_wide_string);
3538 for_each_line_table_case (test_lexer_string_locations_string16);
3539 for_each_line_table_case (test_lexer_string_locations_string32);
3540 for_each_line_table_case (test_lexer_string_locations_u8);
3541 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3542 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3543 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3544 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3545 for_each_line_table_case (test_lexer_string_locations_macro);
3546 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3547 for_each_line_table_case (test_lexer_string_locations_non_string);
3548 for_each_line_table_case (test_lexer_string_locations_long_line);
b8f56412
DM
3549 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3550 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
a3998c2f 3551 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
f87e22c5 3552 for_each_line_table_case (test_lexer_char_constants);
741d3be5 3553
d9b950dd
DM
3554 test_reading_source_line ();
3555}
3556
3557} // namespace selftest
3558
3559#endif /* CHECKING_P */