]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/input.c
Update copyright years.
[thirdparty/gcc.git] / gcc / input.c
CommitLineData
37ba4887 1/* Data and functions related to line maps and input files.
fbd26352 2 Copyright (C) 2004-2019 Free Software Foundation, Inc.
37ba4887 3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "intl.h"
934182c6 24#include "diagnostic.h"
28f17529 25#include "diagnostic-core.h"
99b4f3a2 26#include "selftest.h"
b73690a4 27#include "cpplib.h"
ffc2c526 28
e2f73ee8 29#ifndef HAVE_ICONV
30#define HAVE_ICONV 0
31#endif
32
ffc2c526 33/* This is a cache used by get_next_line to store the content of a
34 file to be searched for file lines. */
35struct fcache
36{
37 /* These are information used to store a line boundary. */
38 struct line_info
39 {
40 /* The line number. It starts from 1. */
41 size_t line_num;
42
43 /* The position (byte count) of the beginning of the line,
44 relative to the file data pointer. This starts at zero. */
45 size_t start_pos;
46
47 /* The position (byte count) of the last byte of the line. This
48 normally points to the '\n' character, or to one byte after the
49 last byte of the file, if the file doesn't contain a '\n'
50 character. */
51 size_t end_pos;
52
53 line_info (size_t l, size_t s, size_t e)
54 : line_num (l), start_pos (s), end_pos (e)
55 {}
56
57 line_info ()
58 :line_num (0), start_pos (0), end_pos (0)
59 {}
60 };
61
62 /* The number of time this file has been accessed. This is used
63 to designate which file cache to evict from the cache
64 array. */
65 unsigned use_count;
66
c6a7d9e9 67 /* The file_path is the key for identifying a particular file in
68 the cache.
69 For libcpp-using code, the underlying buffer for this field is
70 owned by the corresponding _cpp_file within the cpp_reader. */
ffc2c526 71 const char *file_path;
72
73 FILE *fp;
74
75 /* This points to the content of the file that we've read so
76 far. */
77 char *data;
78
79 /* The size of the DATA array above.*/
80 size_t size;
81
82 /* The number of bytes read from the underlying file so far. This
83 must be less (or equal) than SIZE above. */
84 size_t nb_read;
85
86 /* The index of the beginning of the current line. */
87 size_t line_start_idx;
88
89 /* The number of the previous line read. This starts at 1. Zero
90 means we've read no line so far. */
91 size_t line_num;
92
93 /* This is the total number of lines of the current file. At the
94 moment, we try to get this information from the line map
95 subsystem. Note that this is just a hint. When using the C++
96 front-end, this hint is correct because the input file is then
97 completely tokenized before parsing starts; so the line map knows
98 the number of lines before compilation really starts. For e.g,
99 the C front-end, it can happen that we start emitting diagnostics
100 before the line map has seen the end of the file. */
101 size_t total_lines;
102
fe066ce3 103 /* Could this file be missing a trailing newline on its final line?
104 Initially true (to cope with empty files), set to true/false
105 as each line is read. */
106 bool missing_trailing_newline;
107
ffc2c526 108 /* This is a record of the beginning and end of the lines we've seen
109 while reading the file. This is useful to avoid walking the data
110 from the beginning when we are asked to read a line that is
111 before LINE_START_IDX above. Note that the maximum size of this
112 record is fcache_line_record_size, so that the memory consumption
113 doesn't explode. We thus scale total_lines down to
114 fcache_line_record_size. */
115 vec<line_info, va_heap> line_record;
116
117 fcache ();
118 ~fcache ();
119};
37ba4887 120
121/* Current position in real source file. */
122
415309e2 123location_t input_location = UNKNOWN_LOCATION;
37ba4887 124
125struct line_maps *line_table;
126
7ec388ed 127/* A stashed copy of "line_table" for use by selftest::line_table_test.
128 This needs to be a global so that it can be a GC root, and thus
129 prevent the stashed copy from being garbage-collected if the GC runs
130 during a line_table_test. */
131
132struct line_maps *saved_line_table;
133
ffc2c526 134static fcache *fcache_tab;
135static const size_t fcache_tab_size = 16;
136static const size_t fcache_buffer_size = 4 * 1024;
137static const size_t fcache_line_record_size = 100;
138
5ebe2143 139/* Expand the source location LOC into a human readable location. If
140 LOC resolves to a builtin location, the file name of the readable
39107655 141 location is set to the string "<built-in>". If EXPANSION_POINT_P is
142 TRUE and LOC is virtual, then it is resolved to the expansion
143 point of the involved macro. Otherwise, it is resolved to the
bd172d61 144 spelling location of the token.
145
146 When resolving to the spelling location of the token, if the
147 resulting location is for a built-in location (that is, it has no
148 associated line/column) in the context of a macro expansion, the
149 returned location is the first one (while unwinding the macro
150 location towards its expansion point) that is in real source
56df12ff 151 code.
152
153 ASPECT controls which part of the location to use. */
39107655 154
155static expanded_location
be1e7283 156expand_location_1 (location_t loc,
56df12ff 157 bool expansion_point_p,
158 enum location_aspect aspect)
37ba4887 159{
160 expanded_location xloc;
551e34da 161 const line_map_ordinary *map;
bd172d61 162 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
5169661d 163 tree block = NULL;
164
165 if (IS_ADHOC_LOC (loc))
166 {
167 block = LOCATION_BLOCK (loc);
168 loc = LOCATION_LOCUS (loc);
169 }
bd172d61 170
171 memset (&xloc, 0, sizeof (xloc));
5ebe2143 172
bd172d61 173 if (loc >= RESERVED_LOCATION_COUNT)
174 {
175 if (!expansion_point_p)
176 {
177 /* We want to resolve LOC to its spelling location.
178
179 But if that spelling location is a reserved location that
180 appears in the context of a macro expansion (like for a
181 location for a built-in token), let's consider the first
182 location (toward the expansion point) that is not reserved;
183 that is, the first location that is in real source code. */
184 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
551e34da 185 loc, NULL);
bd172d61 186 lrk = LRK_SPELLING_LOCATION;
187 }
56df12ff 188 loc = linemap_resolve_location (line_table, loc, lrk, &map);
189
190 /* loc is now either in an ordinary map, or is a reserved location.
191 If it is a compound location, the caret is in a spelling location,
192 but the start/finish might still be a virtual location.
193 Depending of what the caller asked for, we may need to recurse
194 one level in order to resolve any virtual locations in the
195 end-points. */
196 switch (aspect)
197 {
198 default:
199 gcc_unreachable ();
200 /* Fall through. */
201 case LOCATION_ASPECT_CARET:
202 break;
203 case LOCATION_ASPECT_START:
204 {
be1e7283 205 location_t start = get_start (loc);
56df12ff 206 if (start != loc)
207 return expand_location_1 (start, expansion_point_p, aspect);
208 }
209 break;
210 case LOCATION_ASPECT_FINISH:
211 {
be1e7283 212 location_t finish = get_finish (loc);
56df12ff 213 if (finish != loc)
214 return expand_location_1 (finish, expansion_point_p, aspect);
215 }
216 break;
217 }
bd172d61 218 xloc = linemap_expand_location (line_table, map, loc);
219 }
5ebe2143 220
5169661d 221 xloc.data = block;
37ba4887 222 if (loc <= BUILTINS_LOCATION)
5ebe2143 223 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
224
37ba4887 225 return xloc;
226}
e77b8253 227
ffc2c526 228/* Initialize the set of cache used for files accessed by caret
229 diagnostic. */
230
231static void
232diagnostic_file_cache_init (void)
233{
234 if (fcache_tab == NULL)
235 fcache_tab = new fcache[fcache_tab_size];
236}
237
e7683169 238/* Free the resources used by the set of cache used for files accessed
ffc2c526 239 by caret diagnostic. */
240
241void
242diagnostic_file_cache_fini (void)
243{
244 if (fcache_tab)
245 {
246 delete [] (fcache_tab);
247 fcache_tab = NULL;
248 }
249}
250
251/* Return the total lines number that have been read so far by the
252 line map (in the preprocessor) so far. For languages like C++ that
253 entirely preprocess the input file before starting to parse, this
254 equals the actual number of lines of the file. */
255
256static size_t
257total_lines_num (const char *file_path)
258{
259 size_t r = 0;
be1e7283 260 location_t l = 0;
ffc2c526 261 if (linemap_get_file_highest_location (line_table, file_path, &l))
262 {
263 gcc_assert (l >= RESERVED_LOCATION_COUNT);
264 expanded_location xloc = expand_location (l);
265 r = xloc.line;
266 }
267 return r;
268}
269
270/* Lookup the cache used for the content of a given file accessed by
271 caret diagnostic. Return the found cached file, or NULL if no
272 cached file was found. */
273
274static fcache*
275lookup_file_in_cache_tab (const char *file_path)
276{
277 if (file_path == NULL)
278 return NULL;
279
280 diagnostic_file_cache_init ();
281
282 /* This will contain the found cached file. */
283 fcache *r = NULL;
284 for (unsigned i = 0; i < fcache_tab_size; ++i)
285 {
286 fcache *c = &fcache_tab[i];
287 if (c->file_path && !strcmp (c->file_path, file_path))
288 {
289 ++c->use_count;
290 r = c;
291 }
292 }
293
294 if (r)
295 ++r->use_count;
296
297 return r;
298}
299
a476cb62 300/* Purge any mention of FILENAME from the cache of files used for
301 printing source code. For use in selftests when working
302 with tempfiles. */
303
304void
305diagnostics_file_cache_forcibly_evict_file (const char *file_path)
306{
307 gcc_assert (file_path);
308
309 fcache *r = lookup_file_in_cache_tab (file_path);
310 if (!r)
311 /* Not found. */
312 return;
313
314 r->file_path = NULL;
315 if (r->fp)
316 fclose (r->fp);
317 r->fp = NULL;
318 r->nb_read = 0;
319 r->line_start_idx = 0;
320 r->line_num = 0;
321 r->line_record.truncate (0);
322 r->use_count = 0;
323 r->total_lines = 0;
fe066ce3 324 r->missing_trailing_newline = true;
a476cb62 325}
326
ffc2c526 327/* Return the file cache that has been less used, recently, or the
328 first empty one. If HIGHEST_USE_COUNT is non-null,
329 *HIGHEST_USE_COUNT is set to the highest use count of the entries
330 in the cache table. */
331
332static fcache*
333evicted_cache_tab_entry (unsigned *highest_use_count)
334{
335 diagnostic_file_cache_init ();
336
337 fcache *to_evict = &fcache_tab[0];
338 unsigned huc = to_evict->use_count;
339 for (unsigned i = 1; i < fcache_tab_size; ++i)
340 {
341 fcache *c = &fcache_tab[i];
342 bool c_is_empty = (c->file_path == NULL);
343
344 if (c->use_count < to_evict->use_count
345 || (to_evict->file_path && c_is_empty))
346 /* We evict C because it's either an entry with a lower use
347 count or one that is empty. */
348 to_evict = c;
349
350 if (huc < c->use_count)
351 huc = c->use_count;
352
353 if (c_is_empty)
354 /* We've reached the end of the cache; subsequent elements are
355 all empty. */
356 break;
357 }
358
359 if (highest_use_count)
360 *highest_use_count = huc;
361
362 return to_evict;
363}
364
365/* Create the cache used for the content of a given file to be
366 accessed by caret diagnostic. This cache is added to an array of
367 cache and can be retrieved by lookup_file_in_cache_tab. This
368 function returns the created cache. Note that only the last
369 fcache_tab_size files are cached. */
370
371static fcache*
372add_file_to_cache_tab (const char *file_path)
373{
374
375 FILE *fp = fopen (file_path, "r");
c1cc4419 376 if (fp == NULL)
377 return NULL;
ffc2c526 378
379 unsigned highest_use_count = 0;
380 fcache *r = evicted_cache_tab_entry (&highest_use_count);
381 r->file_path = file_path;
382 if (r->fp)
383 fclose (r->fp);
384 r->fp = fp;
385 r->nb_read = 0;
386 r->line_start_idx = 0;
387 r->line_num = 0;
388 r->line_record.truncate (0);
389 /* Ensure that this cache entry doesn't get evicted next time
390 add_file_to_cache_tab is called. */
391 r->use_count = ++highest_use_count;
392 r->total_lines = total_lines_num (file_path);
fe066ce3 393 r->missing_trailing_newline = true;
ffc2c526 394
395 return r;
396}
397
398/* Lookup the cache used for the content of a given file accessed by
399 caret diagnostic. If no cached file was found, create a new cache
400 for this file, add it to the array of cached file and return
401 it. */
402
403static fcache*
404lookup_or_add_file_to_cache_tab (const char *file_path)
405{
406 fcache *r = lookup_file_in_cache_tab (file_path);
407 if (r == NULL)
408 r = add_file_to_cache_tab (file_path);
409 return r;
410}
411
412/* Default constructor for a cache of file used by caret
413 diagnostic. */
414
415fcache::fcache ()
416: use_count (0), file_path (NULL), fp (NULL), data (0),
417 size (0), nb_read (0), line_start_idx (0), line_num (0),
fe066ce3 418 total_lines (0), missing_trailing_newline (true)
ffc2c526 419{
420 line_record.create (0);
421}
422
423/* Destructor for a cache of file used by caret diagnostic. */
424
425fcache::~fcache ()
426{
427 if (fp)
428 {
429 fclose (fp);
430 fp = NULL;
431 }
432 if (data)
433 {
434 XDELETEVEC (data);
435 data = 0;
436 }
437 line_record.release ();
438}
439
440/* Returns TRUE iff the cache would need to be filled with data coming
441 from the file. That is, either the cache is empty or full or the
442 current line is empty. Note that if the cache is full, it would
443 need to be extended and filled again. */
444
445static bool
446needs_read (fcache *c)
447{
448 return (c->nb_read == 0
449 || c->nb_read == c->size
450 || (c->line_start_idx >= c->nb_read - 1));
451}
452
453/* Return TRUE iff the cache is full and thus needs to be
454 extended. */
455
456static bool
457needs_grow (fcache *c)
458{
459 return c->nb_read == c->size;
460}
461
462/* Grow the cache if it needs to be extended. */
463
464static void
465maybe_grow (fcache *c)
5a983084 466{
ffc2c526 467 if (!needs_grow (c))
468 return;
469
470 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
2e24ac9b 471 c->data = XRESIZEVEC (char, c->data, size);
ffc2c526 472 c->size = size;
473}
5a983084 474
ffc2c526 475/* Read more data into the cache. Extends the cache if need be.
476 Returns TRUE iff new data could be read. */
477
478static bool
479read_data (fcache *c)
480{
481 if (feof (c->fp) || ferror (c->fp))
482 return false;
483
484 maybe_grow (c);
485
486 char * from = c->data + c->nb_read;
487 size_t to_read = c->size - c->nb_read;
488 size_t nb_read = fread (from, 1, to_read, c->fp);
489
490 if (ferror (c->fp))
491 return false;
492
493 c->nb_read += nb_read;
494 return !!nb_read;
495}
496
497/* Read new data iff the cache needs to be filled with more data
498 coming from the file FP. Return TRUE iff the cache was filled with
499 mode data. */
500
501static bool
502maybe_read_data (fcache *c)
503{
504 if (!needs_read (c))
505 return false;
506 return read_data (c);
507}
508
509/* Read a new line from file FP, using C as a cache for the data
510 coming from the file. Upon successful completion, *LINE is set to
2e24ac9b 511 the beginning of the line found. *LINE points directly in the
512 line cache and is only valid until the next call of get_next_line.
ffc2c526 513 *LINE_LEN is set to the length of the line. Note that the line
514 does not contain any terminal delimiter. This function returns
515 true if some data was read or process from the cache, false
2e24ac9b 516 otherwise. Note that subsequent calls to get_next_line might
517 make the content of *LINE invalid. */
ffc2c526 518
519static bool
520get_next_line (fcache *c, char **line, ssize_t *line_len)
521{
522 /* Fill the cache with data to process. */
523 maybe_read_data (c);
524
525 size_t remaining_size = c->nb_read - c->line_start_idx;
526 if (remaining_size == 0)
527 /* There is no more data to process. */
528 return false;
529
530 char *line_start = c->data + c->line_start_idx;
531
532 char *next_line_start = NULL;
533 size_t len = 0;
534 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
535 if (line_end == NULL)
5a983084 536 {
ffc2c526 537 /* We haven't found the end-of-line delimiter in the cache.
538 Fill the cache with more data from the file and look for the
539 '\n'. */
540 while (maybe_read_data (c))
541 {
542 line_start = c->data + c->line_start_idx;
543 remaining_size = c->nb_read - c->line_start_idx;
544 line_end = (char *) memchr (line_start, '\n', remaining_size);
545 if (line_end != NULL)
546 {
547 next_line_start = line_end + 1;
548 break;
549 }
550 }
551 if (line_end == NULL)
fe066ce3 552 {
553 /* We've loadded all the file into the cache and still no
554 '\n'. Let's say the line ends up at one byte passed the
555 end of the file. This is to stay consistent with the case
556 of when the line ends up with a '\n' and line_end points to
557 that terminal '\n'. That consistency is useful below in
558 the len calculation. */
559 line_end = c->data + c->nb_read ;
560 c->missing_trailing_newline = true;
561 }
562 else
563 c->missing_trailing_newline = false;
5a983084 564 }
ffc2c526 565 else
fe066ce3 566 {
567 next_line_start = line_end + 1;
568 c->missing_trailing_newline = false;
569 }
ffc2c526 570
571 if (ferror (c->fp))
2e24ac9b 572 return false;
ffc2c526 573
574 /* At this point, we've found the end of the of line. It either
575 points to the '\n' or to one byte after the last byte of the
576 file. */
577 gcc_assert (line_end != NULL);
5a983084 578
ffc2c526 579 len = line_end - line_start;
580
581 if (c->line_start_idx < c->nb_read)
582 *line = line_start;
583
584 ++c->line_num;
585
586 /* Before we update our line record, make sure the hint about the
587 total number of lines of the file is correct. If it's not, then
588 we give up recording line boundaries from now on. */
589 bool update_line_record = true;
590 if (c->line_num > c->total_lines)
591 update_line_record = false;
592
593 /* Now update our line record so that re-reading lines from the
594 before c->line_start_idx is faster. */
595 if (update_line_record
596 && c->line_record.length () < fcache_line_record_size)
597 {
598 /* If the file lines fits in the line record, we just record all
599 its lines ...*/
600 if (c->total_lines <= fcache_line_record_size
601 && c->line_num > c->line_record.length ())
602 c->line_record.safe_push (fcache::line_info (c->line_num,
603 c->line_start_idx,
604 line_end - c->data));
605 else if (c->total_lines > fcache_line_record_size)
606 {
607 /* ... otherwise, we just scale total_lines down to
608 (fcache_line_record_size lines. */
609 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
610 if (c->line_record.length () == 0
611 || n >= c->line_record.length ())
612 c->line_record.safe_push (fcache::line_info (c->line_num,
613 c->line_start_idx,
614 line_end - c->data));
615 }
616 }
617
618 /* Update c->line_start_idx so that it points to the next line to be
619 read. */
620 if (next_line_start)
621 c->line_start_idx = next_line_start - c->data;
622 else
623 /* We didn't find any terminal '\n'. Let's consider that the end
624 of line is the end of the data in the cache. The next
625 invocation of get_next_line will either read more data from the
626 underlying file or return false early because we've reached the
627 end of the file. */
628 c->line_start_idx = c->nb_read;
629
630 *line_len = len;
631
632 return true;
633}
634
ffc2c526 635/* Consume the next bytes coming from the cache (or from its
636 underlying file if there are remaining unread bytes in the file)
637 until we reach the next end-of-line (or end-of-file). There is no
638 copying from the cache involved. Return TRUE upon successful
639 completion. */
640
641static bool
642goto_next_line (fcache *cache)
643{
644 char *l;
645 ssize_t len;
646
647 return get_next_line (cache, &l, &len);
648}
649
650/* Read an arbitrary line number LINE_NUM from the file cached in C.
2e24ac9b 651 If the line was read successfully, *LINE points to the beginning
652 of the line in the file cache and *LINE_LEN is the length of the
653 line. *LINE is not nul-terminated, but may contain zero bytes.
654 *LINE is only valid until the next call of read_line_num.
ffc2c526 655 This function returns bool if a line was read. */
656
657static bool
658read_line_num (fcache *c, size_t line_num,
2e24ac9b 659 char **line, ssize_t *line_len)
ffc2c526 660{
661 gcc_assert (line_num > 0);
662
663 if (line_num <= c->line_num)
fc3eff88 664 {
ffc2c526 665 /* We've been asked to read lines that are before c->line_num.
666 So lets use our line record (if it's not empty) to try to
667 avoid re-reading the file from the beginning again. */
13225ff5 668
ffc2c526 669 if (c->line_record.is_empty ())
5a983084 670 {
ffc2c526 671 c->line_start_idx = 0;
672 c->line_num = 0;
673 }
674 else
675 {
676 fcache::line_info *i = NULL;
677 if (c->total_lines <= fcache_line_record_size)
678 {
679 /* In languages where the input file is not totally
680 preprocessed up front, the c->total_lines hint
681 can be smaller than the number of lines of the
682 file. In that case, only the first
683 c->total_lines have been recorded.
684
685 Otherwise, the first c->total_lines we've read have
686 their start/end recorded here. */
687 i = (line_num <= c->total_lines)
688 ? &c->line_record[line_num - 1]
689 : &c->line_record[c->total_lines - 1];
690 gcc_assert (i->line_num <= line_num);
691 }
692 else
693 {
694 /* So the file had more lines than our line record
695 size. Thus the number of lines we've recorded has
696 been scaled down to fcache_line_reacord_size. Let's
697 pick the start/end of the recorded line that is
698 closest to line_num. */
699 size_t n = (line_num <= c->total_lines)
700 ? line_num * fcache_line_record_size / c->total_lines
701 : c ->line_record.length () - 1;
702 if (n < c->line_record.length ())
703 {
704 i = &c->line_record[n];
705 gcc_assert (i->line_num <= line_num);
706 }
707 }
708
709 if (i && i->line_num == line_num)
710 {
2e24ac9b 711 /* We have the start/end of the line. */
712 *line = c->data + i->start_pos;
713 *line_len = i->end_pos - i->start_pos;
ffc2c526 714 return true;
715 }
716
717 if (i)
718 {
719 c->line_start_idx = i->start_pos;
720 c->line_num = i->line_num - 1;
721 }
722 else
723 {
724 c->line_start_idx = 0;
725 c->line_num = 0;
726 }
5a983084 727 }
5a983084 728 }
ffc2c526 729
730 /* Let's walk from line c->line_num up to line_num - 1, without
731 copying any line. */
732 while (c->line_num < line_num - 1)
733 if (!goto_next_line (c))
734 return false;
735
736 /* The line we want is the next one. Let's read and copy it back to
737 the caller. */
2e24ac9b 738 return get_next_line (c, line, line_len);
5a983084 739}
740
2e24ac9b 741/* Return the physical source line that corresponds to FILE_PATH/LINE.
742 The line is not nul-terminated. The returned pointer is only
743 valid until the next call of location_get_source_line.
744 Note that the line can contain several null characters,
0bce23e1 745 so the returned value's length has the actual length of the line.
746 If the function fails, a NULL char_span is returned. */
5a983084 747
0bce23e1 748char_span
749location_get_source_line (const char *file_path, int line)
5a983084 750{
7b645785 751 char *buffer = NULL;
2e24ac9b 752 ssize_t len;
ffc2c526 753
be812248 754 if (line == 0)
0bce23e1 755 return char_span (NULL, 0);
9e8234d0 756
be812248 757 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
9e8234d0 758 if (c == NULL)
0bce23e1 759 return char_span (NULL, 0);
9e8234d0 760
be812248 761 bool read = read_line_num (c, line, &buffer, &len);
0bce23e1 762 if (!read)
763 return char_span (NULL, 0);
5a983084 764
0bce23e1 765 return char_span (buffer, len);
5a983084 766}
767
fe066ce3 768/* Determine if FILE_PATH missing a trailing newline on its final line.
769 Only valid to call once all of the file has been loaded, by
770 requesting a line number beyond the end of the file. */
771
772bool
773location_missing_trailing_newline (const char *file_path)
774{
775 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
776 if (c == NULL)
777 return false;
778
779 return c->missing_trailing_newline;
780}
781
a4cfdfed 782/* Test if the location originates from the spelling location of a
783 builtin-tokens. That is, return TRUE if LOC is a (possibly
784 virtual) location of a built-in token that appears in the expansion
785 list of a macro. Please note that this function also works on
786 tokens that result from built-in tokens. For instance, the
787 function would return true if passed a token "4" that is the result
788 of the expansion of the built-in __LINE__ macro. */
789bool
be1e7283 790is_location_from_builtin_token (location_t loc)
a4cfdfed 791{
551e34da 792 const line_map_ordinary *map = NULL;
a4cfdfed 793 loc = linemap_resolve_location (line_table, loc,
794 LRK_SPELLING_LOCATION, &map);
795 return loc == BUILTINS_LOCATION;
796}
797
39107655 798/* Expand the source location LOC into a human readable location. If
799 LOC is virtual, it resolves to the expansion point of the involved
800 macro. If LOC resolves to a builtin location, the file name of the
801 readable location is set to the string "<built-in>". */
802
803expanded_location
be1e7283 804expand_location (location_t loc)
39107655 805{
56df12ff 806 return expand_location_1 (loc, /*expansion_point_p=*/true,
807 LOCATION_ASPECT_CARET);
39107655 808}
809
810/* Expand the source location LOC into a human readable location. If
811 LOC is virtual, it resolves to the expansion location of the
812 relevant macro. If LOC resolves to a builtin location, the file
813 name of the readable location is set to the string
814 "<built-in>". */
815
816expanded_location
be1e7283 817expand_location_to_spelling_point (location_t loc,
e2b0b327 818 enum location_aspect aspect)
39107655 819{
e2b0b327 820 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
39107655 821}
822
f0479000 823/* The rich_location class within libcpp requires a way to expand
be1e7283 824 location_t instances, and relies on the client code
f0479000 825 providing a symbol named
826 linemap_client_expand_location_to_spelling_point
827 to do this.
828
829 This is the implementation for libcommon.a (all host binaries),
56df12ff 830 which simply calls into expand_location_1. */
f0479000 831
832expanded_location
be1e7283 833linemap_client_expand_location_to_spelling_point (location_t loc,
56df12ff 834 enum location_aspect aspect)
f0479000 835{
56df12ff 836 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
f0479000 837}
838
839
fb2edec0 840/* If LOCATION is in a system header and if it is a virtual location for
841 a token coming from the expansion of a macro, unwind it to the
842 location of the expansion point of the macro. Otherwise, just return
db30b351 843 LOCATION.
844
845 This is used for instance when we want to emit diagnostics about a
fb2edec0 846 token that may be located in a macro that is itself defined in a
847 system header, for example, for the NULL macro. In such a case, if
848 LOCATION were passed directly to diagnostic functions such as
849 warning_at, the diagnostic would be suppressed (unless
850 -Wsystem-headers). */
db30b351 851
be1e7283 852location_t
853expansion_point_location_if_in_system_header (location_t location)
db30b351 854{
855 if (in_system_header_at (location))
856 location = linemap_resolve_location (line_table, location,
857 LRK_MACRO_EXPANSION_POINT,
858 NULL);
859 return location;
860}
39107655 861
5d4db8ef 862/* If LOCATION is a virtual location for a token coming from the expansion
863 of a macro, unwind to the location of the expansion point of the macro. */
864
be1e7283 865location_t
866expansion_point_location (location_t location)
5d4db8ef 867{
868 return linemap_resolve_location (line_table, location,
869 LRK_MACRO_EXPANSION_POINT, NULL);
870}
871
f17776ff 872/* Construct a location with caret at CARET, ranging from START to
873 finish e.g.
874
875 11111111112
876 12345678901234567890
877 522
878 523 return foo + bar;
879 ~~~~^~~~~
880 524
881
882 The location's caret is at the "+", line 523 column 15, but starts
883 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
884 of "bar" at column 19. */
885
886location_t
887make_location (location_t caret, location_t start, location_t finish)
888{
889 location_t pure_loc = get_pure_location (caret);
890 source_range src_range;
aca2a315 891 src_range.m_start = get_start (start);
892 src_range.m_finish = get_finish (finish);
f17776ff 893 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
894 pure_loc,
895 src_range,
896 NULL);
897 return combined_loc;
898}
899
cb4d9ee2 900/* Same as above, but taking a source range rather than two locations. */
901
902location_t
903make_location (location_t caret, source_range src_range)
904{
905 location_t pure_loc = get_pure_location (caret);
906 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
907}
908
e77b8253 909/* Dump statistics to stderr about the memory usage of the line_table
910 set of line maps. This also displays some statistics about macro
911 expansion. */
912
913void
914dump_line_table_statistics (void)
915{
916 struct linemap_stats s;
2a688977 917 long total_used_map_size,
e77b8253 918 macro_maps_size,
919 total_allocated_map_size;
920
921 memset (&s, 0, sizeof (s));
922
923 linemap_get_statistics (line_table, &s);
924
925 macro_maps_size = s.macro_maps_used_size
926 + s.macro_maps_locations_size;
927
928 total_allocated_map_size = s.ordinary_maps_allocated_size
929 + s.macro_maps_allocated_size
930 + s.macro_maps_locations_size;
931
932 total_used_map_size = s.ordinary_maps_used_size
933 + s.macro_maps_used_size
934 + s.macro_maps_locations_size;
935
2a688977 936 fprintf (stderr, "Number of expanded macros: %5ld\n",
e77b8253 937 s.num_expanded_macros);
938 if (s.num_expanded_macros != 0)
2a688977 939 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
e77b8253 940 s.num_macro_tokens / s.num_expanded_macros);
941 fprintf (stderr,
942 "\nLine Table allocations during the "
7a413494 943 "compilation process\n");
03fac02c 944 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
7a413494 945 SIZE_AMOUNT (s.num_ordinary_maps_used));
03fac02c 946 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
7a413494 947 SIZE_AMOUNT (s.ordinary_maps_used_size));
03fac02c 948 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
7a413494 949 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
03fac02c 950 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
7a413494 951 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
03fac02c 952 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
7a413494 953 SIZE_AMOUNT (s.num_macro_maps_used));
03fac02c 954 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
7a413494 955 SIZE_AMOUNT (s.macro_maps_used_size));
03fac02c 956 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
7a413494 957 SIZE_AMOUNT (s.macro_maps_locations_size));
03fac02c 958 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
7a413494 959 SIZE_AMOUNT (macro_maps_size));
03fac02c 960 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
7a413494 961 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
03fac02c 962 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
7a413494 963 SIZE_AMOUNT (total_allocated_map_size));
03fac02c 964 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
7a413494 965 SIZE_AMOUNT (total_used_map_size));
03fac02c 966 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
7a413494 967 SIZE_AMOUNT (s.adhoc_table_size));
03fac02c 968 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
7a413494 969 SIZE_AMOUNT (s.adhoc_table_entries_used));
03fac02c 970 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
7a413494 971 SIZE_AMOUNT (line_table->num_optimized_ranges));
03fac02c 972 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
7a413494 973 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
0ffb4474 974
e77b8253 975 fprintf (stderr, "\n");
976}
28f17529 977
978/* Get location one beyond the final location in ordinary map IDX. */
979
be1e7283 980static location_t
28f17529 981get_end_location (struct line_maps *set, unsigned int idx)
982{
983 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
984 return set->highest_location;
985
986 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
987 return MAP_START_LOCATION (next_map);
988}
989
990/* Helper function for write_digit_row. */
991
992static void
993write_digit (FILE *stream, int digit)
994{
995 fputc ('0' + (digit % 10), stream);
996}
997
998/* Helper function for dump_location_info.
999 Write a row of numbers to STREAM, numbering a source line,
1000 giving the units, tens, hundreds etc of the column number. */
1001
1002static void
1003write_digit_row (FILE *stream, int indent,
a96cefb2 1004 const line_map_ordinary *map,
be1e7283 1005 location_t loc, int max_col, int divisor)
28f17529 1006{
1007 fprintf (stream, "%*c", indent, ' ');
1008 fprintf (stream, "|");
1009 for (int column = 1; column < max_col; column++)
1010 {
be1e7283 1011 location_t column_loc = loc + (column << map->m_range_bits);
28f17529 1012 write_digit (stream, column_loc / divisor);
1013 }
1014 fprintf (stream, "\n");
1015}
1016
1017/* Write a half-closed (START) / half-open (END) interval of
be1e7283 1018 location_t to STREAM. */
28f17529 1019
1020static void
1021dump_location_range (FILE *stream,
be1e7283 1022 location_t start, location_t end)
28f17529 1023{
1024 fprintf (stream,
be1e7283 1025 " location_t interval: %u <= loc < %u\n",
28f17529 1026 start, end);
1027}
1028
1029/* Write a labelled description of a half-closed (START) / half-open (END)
be1e7283 1030 interval of location_t to STREAM. */
28f17529 1031
1032static void
1033dump_labelled_location_range (FILE *stream,
1034 const char *name,
be1e7283 1035 location_t start, location_t end)
28f17529 1036{
1037 fprintf (stream, "%s\n", name);
1038 dump_location_range (stream, start, end);
1039 fprintf (stream, "\n");
1040}
1041
1042/* Write a visualization of the locations in the line_table to STREAM. */
1043
1044void
1045dump_location_info (FILE *stream)
1046{
1047 /* Visualize the reserved locations. */
1048 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1049 0, RESERVED_LOCATION_COUNT);
1050
1051 /* Visualize the ordinary line_map instances, rendering the sources. */
1052 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1053 {
be1e7283 1054 location_t end_location = get_end_location (line_table, idx);
28f17529 1055 /* half-closed: doesn't include this one. */
1056
551e34da 1057 const line_map_ordinary *map
1058 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
28f17529 1059 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1060 dump_location_range (stream,
1061 MAP_START_LOCATION (map), end_location);
1062 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1063 fprintf (stream, " starting at line: %i\n",
1064 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
a96cefb2 1065 fprintf (stream, " column and range bits: %i\n",
1066 map->m_column_and_range_bits);
28f17529 1067 fprintf (stream, " column bits: %i\n",
a96cefb2 1068 map->m_column_and_range_bits - map->m_range_bits);
1069 fprintf (stream, " range bits: %i\n",
1070 map->m_range_bits);
934182c6 1071 const char * reason;
1072 switch (map->reason) {
1073 case LC_ENTER:
1074 reason = "LC_ENTER";
1075 break;
1076 case LC_LEAVE:
1077 reason = "LC_LEAVE";
1078 break;
1079 case LC_RENAME:
1080 reason = "LC_RENAME";
1081 break;
1082 case LC_RENAME_VERBATIM:
1083 reason = "LC_RENAME_VERBATIM";
1084 break;
1085 case LC_ENTER_MACRO:
1086 reason = "LC_RENAME_MACRO";
1087 break;
1088 default:
1089 reason = "Unknown";
1090 }
1091 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1092
1093 const line_map_ordinary *includer_map
1094 = linemap_included_from_linemap (line_table, map);
1095 fprintf (stream, " included from location: %d",
1096 linemap_included_from (map));
1097 if (includer_map) {
1098 fprintf (stream, " (in ordinary map %d)",
1099 int (includer_map - line_table->info_ordinary.maps));
1100 }
1101 fprintf (stream, "\n");
28f17529 1102
1103 /* Render the span of source lines that this "map" covers. */
be1e7283 1104 for (location_t loc = MAP_START_LOCATION (map);
28f17529 1105 loc < end_location;
a96cefb2 1106 loc += (1 << map->m_range_bits) )
28f17529 1107 {
a96cefb2 1108 gcc_assert (pure_location_p (line_table, loc) );
1109
28f17529 1110 expanded_location exploc
1111 = linemap_expand_location (line_table, map, loc);
1112
c9281ef8 1113 if (exploc.column == 0)
28f17529 1114 {
1115 /* Beginning of a new source line: draw the line. */
1116
0bce23e1 1117 char_span line_text = location_get_source_line (exploc.file,
1118 exploc.line);
28f17529 1119 if (!line_text)
1120 break;
1121 fprintf (stream,
1122 "%s:%3i|loc:%5i|%.*s\n",
1123 exploc.file, exploc.line,
1124 loc,
0bce23e1 1125 (int)line_text.length (), line_text.get_buffer ());
28f17529 1126
1127 /* "loc" is at column 0, which means "the whole line".
1128 Render the locations *within* the line, by underlining
be1e7283 1129 it, showing the location_t numeric values
28f17529 1130 at each column. */
0bce23e1 1131 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1132 if (max_col > line_text.length ())
1133 max_col = line_text.length () + 1;
28f17529 1134
934182c6 1135 int len_lnum = num_digits (exploc.line);
1136 if (len_lnum < 3)
1137 len_lnum = 3;
1138 int len_loc = num_digits (loc);
1139 if (len_loc < 5)
1140 len_loc = 5;
1141
1142 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
28f17529 1143
1144 /* Thousands. */
1145 if (end_location > 999)
a96cefb2 1146 write_digit_row (stream, indent, map, loc, max_col, 1000);
28f17529 1147
1148 /* Hundreds. */
1149 if (end_location > 99)
a96cefb2 1150 write_digit_row (stream, indent, map, loc, max_col, 100);
28f17529 1151
1152 /* Tens. */
a96cefb2 1153 write_digit_row (stream, indent, map, loc, max_col, 10);
28f17529 1154
1155 /* Units. */
a96cefb2 1156 write_digit_row (stream, indent, map, loc, max_col, 1);
28f17529 1157 }
1158 }
1159 fprintf (stream, "\n");
1160 }
1161
1162 /* Visualize unallocated values. */
1163 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1164 line_table->highest_location,
1165 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1166
1167 /* Visualize the macro line_map instances, rendering the sources. */
1168 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1169 {
be1e7283 1170 /* Each macro map that is allocated owns location_t values
28f17529 1171 that are *lower* that the one before them.
1172 Hence it's meaningful to view them either in order of ascending
1173 source locations, or in order of ascending macro map index. */
be1e7283 1174 const bool ascending_location_ts = true;
1175 unsigned int idx = (ascending_location_ts
28f17529 1176 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1177 : i);
551e34da 1178 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
28f17529 1179 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1180 idx,
1181 linemap_map_get_macro_name (map),
1182 MACRO_MAP_NUM_MACRO_TOKENS (map));
1183 dump_location_range (stream,
1184 map->start_location,
1185 (map->start_location
1186 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1187 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1188 "expansion point is location %i",
1189 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1190 fprintf (stream, " map->start_location: %u\n",
1191 map->start_location);
1192
1193 fprintf (stream, " macro_locations:\n");
1194 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1195 {
be1e7283 1196 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1197 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
28f17529 1198
1199 /* linemap_add_macro_token encodes token numbers in an expansion
1200 by putting them after MAP_START_LOCATION. */
1201
1202 /* I'm typically seeing 4 uninitialized entries at the end of
1203 0xafafafaf.
1204 This appears to be due to macro.c:replace_args
1205 adding 2 extra args for padding tokens; presumably there may
1206 be a leading and/or trailing padding token injected,
1207 each for 2 more location slots.
be1e7283 1208 This would explain there being up to 4 location_ts slots
28f17529 1209 that may be uninitialized. */
1210
1211 fprintf (stream, " %u: %u, %u\n",
1212 i,
1213 x,
1214 y);
1215 if (x == y)
1216 {
1217 if (x < MAP_START_LOCATION (map))
1218 inform (x, "token %u has x-location == y-location == %u", i, x);
1219 else
1220 fprintf (stream,
1221 "x-location == y-location == %u encodes token # %u\n",
1222 x, x - MAP_START_LOCATION (map));
1223 }
1224 else
1225 {
1226 inform (x, "token %u has x-location == %u", i, x);
1227 inform (x, "token %u has y-location == %u", i, y);
1228 }
1229 }
1230 fprintf (stream, "\n");
1231 }
1232
be1e7283 1233 /* It appears that MAX_LOCATION_T itself is never assigned to a
28f17529 1234 macro map, presumably due to an off-by-one error somewhere
1235 between the logic in linemap_enter_macro and
1236 LINEMAPS_MACRO_LOWEST_LOCATION. */
be1e7283 1237 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1238 MAX_LOCATION_T,
1239 MAX_LOCATION_T + 1);
28f17529 1240
1241 /* Visualize ad-hoc values. */
1242 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
be1e7283 1243 MAX_LOCATION_T + 1, UINT_MAX);
28f17529 1244}
99b4f3a2 1245
d4166bdc 1246/* string_concat's constructor. */
1247
1248string_concat::string_concat (int num, location_t *locs)
1249 : m_num (num)
1250{
1251 m_locs = ggc_vec_alloc <location_t> (num);
1252 for (int i = 0; i < num; i++)
1253 m_locs[i] = locs[i];
1254}
1255
1256/* string_concat_db's constructor. */
1257
1258string_concat_db::string_concat_db ()
1259{
1260 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1261}
1262
1263/* Record that a string concatenation occurred, covering NUM
1264 string literal tokens. LOCS is an array of size NUM, containing the
1265 locations of the tokens. A copy of LOCS is taken. */
1266
1267void
1268string_concat_db::record_string_concatenation (int num, location_t *locs)
1269{
1270 gcc_assert (num > 1);
1271 gcc_assert (locs);
1272
1273 location_t key_loc = get_key_loc (locs[0]);
1274
1275 string_concat *concat
1276 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1277 m_table->put (key_loc, concat);
1278}
1279
1280/* Determine if LOC was the location of the the initial token of a
1281 concatenation of string literal tokens.
1282 If so, *OUT_NUM is written to with the number of tokens, and
1283 *OUT_LOCS with the location of an array of locations of the
1284 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1285 storage owned by the string_concat_db.
1286 Otherwise, return false. */
1287
1288bool
1289string_concat_db::get_string_concatenation (location_t loc,
1290 int *out_num,
1291 location_t **out_locs)
1292{
1293 gcc_assert (out_num);
1294 gcc_assert (out_locs);
1295
1296 location_t key_loc = get_key_loc (loc);
1297
1298 string_concat **concat = m_table->get (key_loc);
1299 if (!concat)
1300 return false;
1301
1302 *out_num = (*concat)->m_num;
1303 *out_locs =(*concat)->m_locs;
1304 return true;
1305}
1306
1307/* Internal function. Canonicalize LOC into a form suitable for
1308 use as a key within the database, stripping away macro expansion,
1309 ad-hoc information, and range information, using the location of
1310 the start of LOC within an ordinary linemap. */
1311
1312location_t
1313string_concat_db::get_key_loc (location_t loc)
1314{
1315 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1316 NULL);
1317
1318 loc = get_range_from_loc (line_table, loc).m_start;
1319
1320 return loc;
1321}
1322
1323/* Helper class for use within get_substring_ranges_for_loc.
1324 An vec of cpp_string with responsibility for releasing all of the
1325 str->text for each str in the vector. */
1326
1327class auto_cpp_string_vec : public auto_vec <cpp_string>
1328{
1329 public:
1330 auto_cpp_string_vec (int alloc)
1331 : auto_vec <cpp_string> (alloc) {}
1332
1333 ~auto_cpp_string_vec ()
1334 {
1335 /* Clean up the copies within this vec. */
1336 int i;
1337 cpp_string *str;
1338 FOR_EACH_VEC_ELT (*this, i, str)
1339 free (const_cast <unsigned char *> (str->text));
1340 }
1341};
1342
1343/* Attempt to populate RANGES with source location information on the
1344 individual characters within the string literal found at STRLOC.
1345 If CONCATS is non-NULL, then any string literals that the token at
1346 STRLOC was concatenated with are also added to RANGES.
1347
1348 Return NULL if successful, or an error message if any errors occurred (in
1349 which case RANGES may be only partially populated and should not
1350 be used).
1351
1352 This is implemented by re-parsing the relevant source line(s). */
1353
1354static const char *
1355get_substring_ranges_for_loc (cpp_reader *pfile,
1356 string_concat_db *concats,
1357 location_t strloc,
1358 enum cpp_ttype type,
1359 cpp_substring_ranges &ranges)
1360{
1361 gcc_assert (pfile);
1362
1363 if (strloc == UNKNOWN_LOCATION)
1364 return "unknown location";
1365
8df44fbf 1366 /* Reparsing the strings requires accurate location information.
1367 If -ftrack-macro-expansion has been overridden from its default
1368 of 2, then we might have a location of a macro expansion point,
1369 rather than the location of the literal itself.
1370 Avoid this by requiring that we have full macro expansion tracking
1371 for substring locations to be available. */
1372 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1373 return "track_macro_expansion != 2";
1374
a4d96eb7 1375 /* If #line or # 44 "file"-style directives are present, then there's
1376 no guarantee that the line numbers we have can be used to locate
1377 the strings. For example, we might have a .i file with # directives
1378 pointing back to lines within a .c file, but the .c file might
1379 have been edited since the .i file was created.
1380 In such a case, the safest course is to disable on-demand substring
1381 locations. */
1382 if (line_table->seen_line_directive)
1383 return "seen line directive";
1384
d4166bdc 1385 /* If string concatenation has occurred at STRLOC, get the locations
1386 of all of the literal tokens making up the compound string.
1387 Otherwise, just use STRLOC. */
1388 int num_locs = 1;
1389 location_t *strlocs = &strloc;
1390 if (concats)
1391 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1392
1393 auto_cpp_string_vec strs (num_locs);
1394 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1395 for (int i = 0; i < num_locs; i++)
1396 {
1397 /* Get range of strloc. We will use it to locate the start and finish
1398 of the literal token within the line. */
1399 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1400
1401 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
e2b0b327 1402 {
1403 /* If the string token was within a macro expansion, then we can
1404 cope with it for the simple case where we have a single token.
1405 Otherwise, bail out. */
1406 if (src_range.m_start != src_range.m_finish)
1407 return "macro expansion";
1408 }
1409 else
1410 {
1411 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1412 /* If so, we can't reliably determine where the token started within
1413 its line. */
1414 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1415
1416 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1417 /* If so, we can't reliably determine where the token finished
1418 within its line. */
1419 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1420 }
d4166bdc 1421
1422 expanded_location start
e2b0b327 1423 = expand_location_to_spelling_point (src_range.m_start,
1424 LOCATION_ASPECT_START);
d4166bdc 1425 expanded_location finish
e2b0b327 1426 = expand_location_to_spelling_point (src_range.m_finish,
1427 LOCATION_ASPECT_FINISH);
d4166bdc 1428 if (start.file != finish.file)
1429 return "range endpoints are in different files";
1430 if (start.line != finish.line)
1431 return "range endpoints are on different lines";
1432 if (start.column > finish.column)
1433 return "range endpoints are reversed";
1434
0bce23e1 1435 char_span line = location_get_source_line (start.file, start.line);
1436 if (!line)
d4166bdc 1437 return "unable to read source line";
1438
1439 /* Determine the location of the literal (including quotes
1440 and leading prefix chars, such as the 'u' in a u""
1441 token). */
0bce23e1 1442 size_t literal_length = finish.column - start.column + 1;
d4166bdc 1443
44128dbe 1444 /* Ensure that we don't crash if we got the wrong location. */
0bce23e1 1445 if (line.length () < (start.column - 1 + literal_length))
44128dbe 1446 return "line is not wide enough";
1447
0bce23e1 1448 char_span literal = line.subspan (start.column - 1, literal_length);
1449
d4166bdc 1450 cpp_string from;
1451 from.len = literal_length;
1452 /* Make a copy of the literal, to avoid having to rely on
1453 the lifetime of the copy of the line within the cache.
1454 This will be released by the auto_cpp_string_vec dtor. */
0bce23e1 1455 from.text = (unsigned char *)literal.xstrdup ();
d4166bdc 1456 strs.safe_push (from);
1457
1458 /* For very long lines, a new linemap could have started
1459 halfway through the token.
1460 Ensure that the loc_reader uses the linemap of the
1461 *end* of the token for its start location. */
b9436c5b 1462 const line_map_ordinary *start_ord_map;
1463 linemap_resolve_location (line_table, src_range.m_start,
1464 LRK_SPELLING_LOCATION, &start_ord_map);
d4166bdc 1465 const line_map_ordinary *final_ord_map;
1466 linemap_resolve_location (line_table, src_range.m_finish,
b9436c5b 1467 LRK_SPELLING_LOCATION, &final_ord_map);
99069acd 1468 if (start_ord_map == NULL || final_ord_map == NULL)
1469 return "failed to get ordinary maps";
b9436c5b 1470 /* Bulletproofing. We ought to only have different ordinary maps
1471 for start vs finish due to line-length jumps. */
1472 if (start_ord_map != final_ord_map
1473 && start_ord_map->to_file != final_ord_map->to_file)
2df8a4a6 1474 return "start and finish are spelled in different ordinary maps";
1475 /* The file from linemap_resolve_location ought to match that from
1476 expand_location_to_spelling_point. */
1477 if (start_ord_map->to_file != start.file)
1478 return "mismatching file after resolving linemap";
1479
d4166bdc 1480 location_t start_loc
1481 = linemap_position_for_line_and_column (line_table, final_ord_map,
1482 start.line, start.column);
1483
1484 cpp_string_location_reader loc_reader (start_loc, line_table);
1485 loc_readers.safe_push (loc_reader);
1486 }
1487
1488 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1489 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1490 loc_readers.address (),
1491 num_locs, &ranges, type);
1492 if (err)
1493 return err;
1494
1495 /* Success: "ranges" should now contain information on the string. */
1496 return NULL;
1497}
1498
5927e78e 1499/* Attempt to populate *OUT_LOC with source location information on the
1500 given characters within the string literal found at STRLOC.
1501 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1502 character set.
1503
1504 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1505 and string literal "012345\n789"
1506 *OUT_LOC is written to with:
1507 "012345\n789"
1508 ~^~~~~
1509
d4166bdc 1510 If CONCATS is non-NULL, then any string literals that the token at
1511 STRLOC was concatenated with are also considered.
1512
1513 This is implemented by re-parsing the relevant source line(s).
1514
1515 Return NULL if successful, or an error message if any errors occurred.
1516 Error messages are intended for GCC developers (to help debugging) rather
1517 than for end-users. */
1518
1519const char *
be1e7283 1520get_location_within_string (cpp_reader *pfile,
1521 string_concat_db *concats,
1522 location_t strloc,
1523 enum cpp_ttype type,
1524 int caret_idx, int start_idx, int end_idx,
1525 location_t *out_loc)
5927e78e 1526{
1527 gcc_checking_assert (caret_idx >= 0);
d4166bdc 1528 gcc_checking_assert (start_idx >= 0);
1529 gcc_checking_assert (end_idx >= 0);
5927e78e 1530 gcc_assert (out_loc);
d4166bdc 1531
1532 cpp_substring_ranges ranges;
1533 const char *err
1534 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1535 if (err)
1536 return err;
1537
5927e78e 1538 if (caret_idx >= ranges.get_num_ranges ())
1539 return "caret_idx out of range";
d4166bdc 1540 if (start_idx >= ranges.get_num_ranges ())
1541 return "start_idx out of range";
1542 if (end_idx >= ranges.get_num_ranges ())
1543 return "end_idx out of range";
1544
5927e78e 1545 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1546 ranges.get_range (start_idx).m_start,
1547 ranges.get_range (end_idx).m_finish);
1548 return NULL;
1549}
1550
45183e4c 1551#if CHECKING_P
1552
1553namespace selftest {
1554
1555/* Selftests of location handling. */
1556
5927e78e 1557/* Attempt to populate *OUT_RANGE with source location information on the
1558 given character within the string literal found at STRLOC.
1559 CHAR_IDX refers to an offset within the execution character set.
1560 If CONCATS is non-NULL, then any string literals that the token at
1561 STRLOC was concatenated with are also considered.
1562
1563 This is implemented by re-parsing the relevant source line(s).
1564
1565 Return NULL if successful, or an error message if any errors occurred.
1566 Error messages are intended for GCC developers (to help debugging) rather
1567 than for end-users. */
1568
1569static const char *
1570get_source_range_for_char (cpp_reader *pfile,
1571 string_concat_db *concats,
1572 location_t strloc,
1573 enum cpp_ttype type,
1574 int char_idx,
1575 source_range *out_range)
1576{
1577 gcc_checking_assert (char_idx >= 0);
1578 gcc_assert (out_range);
1579
1580 cpp_substring_ranges ranges;
1581 const char *err
1582 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1583 if (err)
1584 return err;
1585
1586 if (char_idx >= ranges.get_num_ranges ())
1587 return "char_idx out of range";
1588
1589 *out_range = ranges.get_range (char_idx);
d4166bdc 1590 return NULL;
1591}
1592
5927e78e 1593/* As get_source_range_for_char, but write to *OUT the number
d4166bdc 1594 of ranges that are available. */
1595
45183e4c 1596static const char *
d4166bdc 1597get_num_source_ranges_for_substring (cpp_reader *pfile,
1598 string_concat_db *concats,
1599 location_t strloc,
1600 enum cpp_ttype type,
1601 int *out)
1602{
1603 gcc_assert (out);
1604
1605 cpp_substring_ranges ranges;
1606 const char *err
1607 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1608
1609 if (err)
1610 return err;
1611
1612 *out = ranges.get_num_ranges ();
1613 return NULL;
1614}
1615
99b4f3a2 1616/* Selftests of location handling. */
1617
d73881b0 1618/* Verify that compare() on linenum_type handles comparisons over the full
1619 range of the type. */
1620
1621static void
1622test_linenum_comparisons ()
1623{
1624 linenum_type min_line (0);
1625 linenum_type max_line (0xffffffff);
1626 ASSERT_EQ (0, compare (min_line, min_line));
1627 ASSERT_EQ (0, compare (max_line, max_line));
1628
1629 ASSERT_GT (compare (max_line, min_line), 0);
1630 ASSERT_LT (compare (min_line, max_line), 0);
1631}
1632
b73690a4 1633/* Helper function for verifying location data: when location_t
1634 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1635 as having column 0. */
1636
1637static bool
1638should_have_column_data_p (location_t loc)
1639{
1640 if (IS_ADHOC_LOC (loc))
1641 loc = get_location_from_adhoc_loc (line_table, loc);
1642 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1643 return false;
1644 return true;
1645}
1646
1647/* Selftest for should_have_column_data_p. */
1648
1649static void
1650test_should_have_column_data_p ()
1651{
1652 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1653 ASSERT_TRUE
1654 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1655 ASSERT_FALSE
1656 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1657}
1658
99b4f3a2 1659/* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1660 on LOC. */
1661
1662static void
1663assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1664 location_t loc)
1665{
1666 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1667 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
b73690a4 1668 /* If location_t values are sufficiently high, then column numbers
1669 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1670 When close to the threshold, column numbers *may* be present: if
1671 the final linemap before the threshold contains a line that straddles
1672 the threshold, locations in that line have column information. */
1673 if (should_have_column_data_p (loc))
1674 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1675}
1676
7ec388ed 1677/* Various selftests involve constructing a line table and one or more
1678 line maps within it.
b73690a4 1679
1680 For maximum test coverage we want to run these tests with a variety
1681 of situations:
1682 - line_table->default_range_bits: some frontends use a non-zero value
1683 and others use zero
1684 - the fallback modes within line-map.c: there are various threshold
be1e7283 1685 values for location_t beyond line-map.c changes
b73690a4 1686 behavior (disabling of the range-packing optimization, disabling
1687 of column-tracking). We can exercise these by starting the line_table
1688 at interesting values at or near these thresholds.
1689
1690 The following struct describes a particular case within our test
1691 matrix. */
1692
1693struct line_table_case
1694{
1695 line_table_case (int default_range_bits, int base_location)
1696 : m_default_range_bits (default_range_bits),
1697 m_base_location (base_location)
1698 {}
1699
1700 int m_default_range_bits;
1701 int m_base_location;
1702};
1703
7ec388ed 1704/* Constructor. Store the old value of line_table, and create a new
1705 one, using sane defaults. */
b73690a4 1706
7ec388ed 1707line_table_test::line_table_test ()
b73690a4 1708{
7ec388ed 1709 gcc_assert (saved_line_table == NULL);
1710 saved_line_table = line_table;
1711 line_table = ggc_alloc<line_maps> ();
1712 linemap_init (line_table, BUILTINS_LOCATION);
1713 gcc_assert (saved_line_table->reallocator);
1714 line_table->reallocator = saved_line_table->reallocator;
1715 gcc_assert (saved_line_table->round_alloc_size);
1716 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1717 line_table->default_range_bits = 0;
1718}
b73690a4 1719
1720/* Constructor. Store the old value of line_table, and create a new
1721 one, using the sitation described in CASE_. */
1722
7ec388ed 1723line_table_test::line_table_test (const line_table_case &case_)
b73690a4 1724{
7ec388ed 1725 gcc_assert (saved_line_table == NULL);
1726 saved_line_table = line_table;
b73690a4 1727 line_table = ggc_alloc<line_maps> ();
1728 linemap_init (line_table, BUILTINS_LOCATION);
7ec388ed 1729 gcc_assert (saved_line_table->reallocator);
1730 line_table->reallocator = saved_line_table->reallocator;
1731 gcc_assert (saved_line_table->round_alloc_size);
1732 line_table->round_alloc_size = saved_line_table->round_alloc_size;
b73690a4 1733 line_table->default_range_bits = case_.m_default_range_bits;
1734 if (case_.m_base_location)
1735 {
1736 line_table->highest_location = case_.m_base_location;
1737 line_table->highest_line = case_.m_base_location;
1738 }
1739}
1740
1741/* Destructor. Restore the old value of line_table. */
1742
7ec388ed 1743line_table_test::~line_table_test ()
b73690a4 1744{
7ec388ed 1745 gcc_assert (saved_line_table != NULL);
1746 line_table = saved_line_table;
1747 saved_line_table = NULL;
99b4f3a2 1748}
1749
1750/* Verify basic operation of ordinary linemaps. */
1751
1752static void
b73690a4 1753test_accessing_ordinary_linemaps (const line_table_case &case_)
99b4f3a2 1754{
7ec388ed 1755 line_table_test ltt (case_);
b73690a4 1756
99b4f3a2 1757 /* Build a simple linemap describing some locations. */
1758 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1759
1760 linemap_line_start (line_table, 1, 100);
1761 location_t loc_a = linemap_position_for_column (line_table, 1);
1762 location_t loc_b = linemap_position_for_column (line_table, 23);
1763
1764 linemap_line_start (line_table, 2, 100);
1765 location_t loc_c = linemap_position_for_column (line_table, 1);
1766 location_t loc_d = linemap_position_for_column (line_table, 17);
1767
1768 /* Example of a very long line. */
1769 linemap_line_start (line_table, 3, 2000);
1770 location_t loc_e = linemap_position_for_column (line_table, 700);
1771
732cf036 1772 /* Transitioning back to a short line. */
1773 linemap_line_start (line_table, 4, 0);
1774 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1775
1776 if (should_have_column_data_p (loc_back_to_short))
1777 {
1778 /* Verify that we switched to short lines in the linemap. */
1779 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1780 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1781 }
1782
9348467c 1783 /* Example of a line that will eventually be seen to be longer
1784 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1785 below that. */
1786 linemap_line_start (line_table, 5, 2000);
1787
1788 location_t loc_start_of_very_long_line
1789 = linemap_position_for_column (line_table, 2000);
1790 location_t loc_too_wide
1791 = linemap_position_for_column (line_table, 4097);
1792 location_t loc_too_wide_2
1793 = linemap_position_for_column (line_table, 4098);
1794
1795 /* ...and back to a sane line length. */
1796 linemap_line_start (line_table, 6, 100);
1797 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1798
99b4f3a2 1799 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1800
1801 /* Multiple files. */
1802 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1803 linemap_line_start (line_table, 1, 200);
1804 location_t loc_f = linemap_position_for_column (line_table, 150);
1805 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1806
1807 /* Verify that we can recover the location info. */
1808 assert_loceq ("foo.c", 1, 1, loc_a);
1809 assert_loceq ("foo.c", 1, 23, loc_b);
1810 assert_loceq ("foo.c", 2, 1, loc_c);
1811 assert_loceq ("foo.c", 2, 17, loc_d);
1812 assert_loceq ("foo.c", 3, 700, loc_e);
732cf036 1813 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
9348467c 1814
1815 /* In the very wide line, the initial location should be fully tracked. */
1816 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1817 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1818 be disabled. */
1819 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1820 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1821 /*...and column-tracking should be re-enabled for subsequent lines. */
1822 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1823
99b4f3a2 1824 assert_loceq ("bar.c", 1, 150, loc_f);
1825
1826 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
f17776ff 1827 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1828
1829 /* Verify using make_location to build a range, and extracting data
1830 back from it. */
1831 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1832 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1833 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1834 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1835 ASSERT_EQ (loc_b, src_range.m_start);
1836 ASSERT_EQ (loc_d, src_range.m_finish);
99b4f3a2 1837}
1838
1839/* Verify various properties of UNKNOWN_LOCATION. */
1840
1841static void
1842test_unknown_location ()
1843{
1844 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1845 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1846 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1847}
1848
1849/* Verify various properties of BUILTINS_LOCATION. */
1850
1851static void
1852test_builtins ()
1853{
82e14468 1854 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
99b4f3a2 1855 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1856}
1857
aca2a315 1858/* Regression test for make_location.
1330da90 1859 Ensure that we use pure locations for the start/finish of the range,
1860 rather than storing a packed or ad-hoc range as the start/finish. */
aca2a315 1861
1862static void
1863test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1864{
1865 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1866 with C++ frontend.
1867 ....................0000000001111111111222.
1868 ....................1234567890123456789012. */
1869 const char *content = " r += !aaa == bbb;\n";
1870 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1871 line_table_test ltt (case_);
1872 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1873
1874 const location_t c11 = linemap_position_for_column (line_table, 11);
1875 const location_t c12 = linemap_position_for_column (line_table, 12);
1876 const location_t c13 = linemap_position_for_column (line_table, 13);
1877 const location_t c14 = linemap_position_for_column (line_table, 14);
1878 const location_t c21 = linemap_position_for_column (line_table, 21);
1879
1880 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1881 return;
1882
1883 /* Use column 13 for the caret location, arbitrarily, to verify that we
1884 handle start != caret. */
1885 const location_t aaa = make_location (c13, c12, c14);
1886 ASSERT_EQ (c13, get_pure_location (aaa));
1887 ASSERT_EQ (c12, get_start (aaa));
1888 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1889 ASSERT_EQ (c14, get_finish (aaa));
1890 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1891
1892 /* Make a location using a location with a range as the start-point. */
1893 const location_t not_aaa = make_location (c11, aaa, c14);
1894 ASSERT_EQ (c11, get_pure_location (not_aaa));
1895 /* It should use the start location of the range, not store the range
1896 itself. */
1897 ASSERT_EQ (c12, get_start (not_aaa));
1898 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1899 ASSERT_EQ (c14, get_finish (not_aaa));
1900 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1901
1902 /* Similarly, make a location with a range as the end-point. */
1903 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1904 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1905 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1906 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1907 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1908 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1909 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1910 /* It should use the finish location of the range, not store the range
1911 itself. */
1912 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1913 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1914 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1915 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1916 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1917}
1918
99b4f3a2 1919/* Verify reading of input files (e.g. for caret-based diagnostics). */
1920
1921static void
1922test_reading_source_line ()
1923{
423bd600 1924 /* Create a tempfile and write some text to it. */
b73690a4 1925 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1926 "01234567890123456789\n"
1927 "This is the test text\n"
2e24ac9b 1928 "This is the 3rd line");
423bd600 1929
1930 /* Read back a specific line from the tempfile. */
0bce23e1 1931 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1932 ASSERT_TRUE (source_line);
1933 ASSERT_TRUE (source_line.get_buffer () != NULL);
1934 ASSERT_EQ (20, source_line.length ());
2e24ac9b 1935 ASSERT_TRUE (!strncmp ("This is the 3rd line",
0bce23e1 1936 source_line.get_buffer (), source_line.length ()));
2e24ac9b 1937
0bce23e1 1938 source_line = location_get_source_line (tmp.get_filename (), 2);
1939 ASSERT_TRUE (source_line);
1940 ASSERT_TRUE (source_line.get_buffer () != NULL);
1941 ASSERT_EQ (21, source_line.length ());
2e24ac9b 1942 ASSERT_TRUE (!strncmp ("This is the test text",
0bce23e1 1943 source_line.get_buffer (), source_line.length ()));
423bd600 1944
0bce23e1 1945 source_line = location_get_source_line (tmp.get_filename (), 4);
1946 ASSERT_FALSE (source_line);
1947 ASSERT_TRUE (source_line.get_buffer () == NULL);
99b4f3a2 1948}
1949
b73690a4 1950/* Tests of lexing. */
1951
1952/* Verify that token TOK from PARSER has cpp_token_as_text
1953 equal to EXPECTED_TEXT. */
1954
1955#define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1956 SELFTEST_BEGIN_STMT \
1957 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1958 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1959 SELFTEST_END_STMT
1960
1961/* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1962 and ranges from EXP_START_COL to EXP_FINISH_COL.
1963 Use LOC as the effective location of the selftest. */
1964
1965static void
1966assert_token_loc_eq (const location &loc,
1967 const cpp_token *tok,
1968 const char *exp_filename, int exp_linenum,
1969 int exp_start_col, int exp_finish_col)
1970{
1971 location_t tok_loc = tok->src_loc;
1972 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1973 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1974
1975 /* If location_t values are sufficiently high, then column numbers
1976 will be unavailable. */
1977 if (!should_have_column_data_p (tok_loc))
1978 return;
1979
1980 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1981 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1982 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1983 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1984}
1985
1986/* Use assert_token_loc_eq to verify the TOK->src_loc, using
1987 SELFTEST_LOCATION as the effective location of the selftest. */
1988
1989#define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1990 EXP_START_COL, EXP_FINISH_COL) \
1991 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1992 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1993
1994/* Test of lexing a file using libcpp, verifying tokens and their
1995 location information. */
1996
1997static void
1998test_lexer (const line_table_case &case_)
1999{
2000 /* Create a tempfile and write some text to it. */
2001 const char *content =
2002 /*00000000011111111112222222222333333.3333444444444.455555555556
2003 12345678901234567890123456789012345.6789012345678.901234567890. */
2004 ("test_name /* c-style comment */\n"
2005 " \"test literal\"\n"
2006 " // test c++-style comment\n"
2007 " 42\n");
2008 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2009
7ec388ed 2010 line_table_test ltt (case_);
b73690a4 2011
2012 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2013
2014 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2015 ASSERT_NE (fname, NULL);
2016
2017 /* Verify that we get the expected tokens back, with the correct
2018 location information. */
2019
2020 location_t loc;
2021 const cpp_token *tok;
2022 tok = cpp_get_token_with_location (parser, &loc);
2023 ASSERT_NE (tok, NULL);
2024 ASSERT_EQ (tok->type, CPP_NAME);
2025 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2026 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2027
2028 tok = cpp_get_token_with_location (parser, &loc);
2029 ASSERT_NE (tok, NULL);
2030 ASSERT_EQ (tok->type, CPP_STRING);
2031 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2032 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2033
2034 tok = cpp_get_token_with_location (parser, &loc);
2035 ASSERT_NE (tok, NULL);
2036 ASSERT_EQ (tok->type, CPP_NUMBER);
2037 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2038 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2039
2040 tok = cpp_get_token_with_location (parser, &loc);
2041 ASSERT_NE (tok, NULL);
2042 ASSERT_EQ (tok->type, CPP_EOF);
2043
2044 cpp_finish (parser, NULL);
2045 cpp_destroy (parser);
2046}
2047
d4166bdc 2048/* Forward decls. */
2049
2050struct lexer_test;
2051class lexer_test_options;
2052
2053/* A class for specifying options of a lexer_test.
2054 The "apply" vfunc is called during the lexer_test constructor. */
2055
2056class lexer_test_options
2057{
2058 public:
2059 virtual void apply (lexer_test &) = 0;
2060};
2061
c6a7d9e9 2062/* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2063 in its dtor.
2064
2065 This is needed by struct lexer_test to ensure that the cleanup of the
2066 cpp_reader happens *after* the cleanup of the temp_source_file. */
2067
2068class cpp_reader_ptr
2069{
2070 public:
2071 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2072
2073 ~cpp_reader_ptr ()
2074 {
2075 cpp_finish (m_ptr, NULL);
2076 cpp_destroy (m_ptr);
2077 }
2078
2079 operator cpp_reader * () const { return m_ptr; }
2080
2081 private:
2082 cpp_reader *m_ptr;
2083};
2084
d4166bdc 2085/* A struct for writing lexer tests. */
2086
2087struct lexer_test
2088{
2089 lexer_test (const line_table_case &case_, const char *content,
2090 lexer_test_options *options);
2091 ~lexer_test ();
2092
2093 const cpp_token *get_token ();
2094
c6a7d9e9 2095 /* The ordering of these fields matters.
2096 The line_table_test must be first, since the cpp_reader_ptr
2097 uses it.
2098 The cpp_reader must be cleaned up *after* the temp_source_file
2099 since the filenames in input.c's input cache are owned by the
2100 cpp_reader; in particular, when ~temp_source_file evicts the
2101 filename the filenames must still be alive. */
7ec388ed 2102 line_table_test m_ltt;
c6a7d9e9 2103 cpp_reader_ptr m_parser;
2104 temp_source_file m_tempfile;
d4166bdc 2105 string_concat_db m_concats;
0ccd6e7a 2106 bool m_implicitly_expect_EOF;
d4166bdc 2107};
2108
2109/* Use an EBCDIC encoding for the execution charset, specifically
2110 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2111
2112 This exercises iconv integration within libcpp.
2113 Not every build of iconv supports the given charset,
2114 so we need to flag this error and handle it gracefully. */
2115
2116class ebcdic_execution_charset : public lexer_test_options
2117{
2118 public:
2119 ebcdic_execution_charset () : m_num_iconv_errors (0)
2120 {
2121 gcc_assert (s_singleton == NULL);
2122 s_singleton = this;
2123 }
2124 ~ebcdic_execution_charset ()
2125 {
2126 gcc_assert (s_singleton == this);
2127 s_singleton = NULL;
2128 }
2129
2130 void apply (lexer_test &test) FINAL OVERRIDE
2131 {
2132 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2133 cpp_opts->narrow_charset = "IBM1047";
2134
2135 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
fb225cf1 2136 callbacks->diagnostic = on_diagnostic;
d4166bdc 2137 }
2138
fb225cf1 2139 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2140 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2141 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2142 rich_location *richloc ATTRIBUTE_UNUSED,
2143 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
d4166bdc 2144 ATTRIBUTE_FPTR_PRINTF(5,0)
2145 {
2146 gcc_assert (s_singleton);
9a784cf5 2147 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2148 const char *msg = "conversion from %s to %s not supported by iconv";
2149#ifdef ENABLE_NLS
2150 msg = dgettext ("cpplib", msg);
2151#endif
d4166bdc 2152 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2153 when the local iconv build doesn't support the conversion. */
9a784cf5 2154 if (strcmp (msgid, msg) == 0)
d4166bdc 2155 {
2156 s_singleton->m_num_iconv_errors++;
2157 return true;
2158 }
2159
2160 /* Otherwise, we have an unexpected error. */
2161 abort ();
2162 }
2163
2164 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2165
2166 private:
2167 static ebcdic_execution_charset *s_singleton;
2168 int m_num_iconv_errors;
2169};
2170
2171ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2172
fb225cf1 2173/* A lexer_test_options subclass that records a list of diagnostic
0ccd6e7a 2174 messages emitted by the lexer. */
2175
fb225cf1 2176class lexer_diagnostic_sink : public lexer_test_options
0ccd6e7a 2177{
2178 public:
fb225cf1 2179 lexer_diagnostic_sink ()
0ccd6e7a 2180 {
2181 gcc_assert (s_singleton == NULL);
2182 s_singleton = this;
2183 }
fb225cf1 2184 ~lexer_diagnostic_sink ()
0ccd6e7a 2185 {
2186 gcc_assert (s_singleton == this);
2187 s_singleton = NULL;
2188
2189 int i;
2190 char *str;
fb225cf1 2191 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
0ccd6e7a 2192 free (str);
2193 }
2194
2195 void apply (lexer_test &test) FINAL OVERRIDE
2196 {
2197 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
fb225cf1 2198 callbacks->diagnostic = on_diagnostic;
0ccd6e7a 2199 }
2200
fb225cf1 2201 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2202 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2203 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2204 rich_location *richloc ATTRIBUTE_UNUSED,
2205 const char *msgid, va_list *ap)
0ccd6e7a 2206 ATTRIBUTE_FPTR_PRINTF(5,0)
2207 {
2208 char *msg = xvasprintf (msgid, *ap);
fb225cf1 2209 s_singleton->m_diagnostics.safe_push (msg);
0ccd6e7a 2210 return true;
2211 }
2212
fb225cf1 2213 auto_vec<char *> m_diagnostics;
0ccd6e7a 2214
2215 private:
fb225cf1 2216 static lexer_diagnostic_sink *s_singleton;
0ccd6e7a 2217};
2218
fb225cf1 2219lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
0ccd6e7a 2220
d4166bdc 2221/* Constructor. Override line_table with a new instance based on CASE_,
2222 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2223 start parsing the tempfile. */
2224
2225lexer_test::lexer_test (const line_table_case &case_, const char *content,
c6a7d9e9 2226 lexer_test_options *options)
2227: m_ltt (case_),
2228 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
d4166bdc 2229 /* Create a tempfile and write the text to it. */
2230 m_tempfile (SELFTEST_LOCATION, ".c", content),
0ccd6e7a 2231 m_concats (),
2232 m_implicitly_expect_EOF (true)
d4166bdc 2233{
2234 if (options)
2235 options->apply (*this);
2236
2237 cpp_init_iconv (m_parser);
2238
2239 /* Parse the file. */
2240 const char *fname = cpp_read_main_file (m_parser,
2241 m_tempfile.get_filename ());
2242 ASSERT_NE (fname, NULL);
2243}
2244
0ccd6e7a 2245/* Destructor. By default, verify that the next token in m_parser is EOF. */
d4166bdc 2246
2247lexer_test::~lexer_test ()
2248{
2249 location_t loc;
2250 const cpp_token *tok;
2251
0ccd6e7a 2252 if (m_implicitly_expect_EOF)
2253 {
2254 tok = cpp_get_token_with_location (m_parser, &loc);
2255 ASSERT_NE (tok, NULL);
2256 ASSERT_EQ (tok->type, CPP_EOF);
2257 }
d4166bdc 2258}
2259
2260/* Get the next token from m_parser. */
2261
2262const cpp_token *
2263lexer_test::get_token ()
2264{
2265 location_t loc;
2266 const cpp_token *tok;
2267
2268 tok = cpp_get_token_with_location (m_parser, &loc);
2269 ASSERT_NE (tok, NULL);
2270 return tok;
2271}
2272
2273/* Verify that locations within string literals are correctly handled. */
2274
2275/* Verify get_source_range_for_substring for token(s) at STRLOC,
2276 using the string concatenation database for TEST.
2277
2278 Assert that the character at index IDX is on EXPECTED_LINE,
2279 and that it begins at column EXPECTED_START_COL and ends at
2280 EXPECTED_FINISH_COL (unless the locations are beyond
2281 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2282 columns). */
2283
2284static void
2285assert_char_at_range (const location &loc,
2286 lexer_test& test,
2287 location_t strloc, enum cpp_ttype type, int idx,
2288 int expected_line, int expected_start_col,
2289 int expected_finish_col)
2290{
2291 cpp_reader *pfile = test.m_parser;
2292 string_concat_db *concats = &test.m_concats;
2293
be516c70 2294 source_range actual_range = source_range();
d4166bdc 2295 const char *err
5927e78e 2296 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2297 &actual_range);
d4166bdc 2298 if (should_have_column_data_p (strloc))
2299 ASSERT_EQ_AT (loc, NULL, err);
2300 else
2301 {
2302 ASSERT_STREQ_AT (loc,
2303 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2304 err);
2305 return;
2306 }
2307
2308 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2309 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2310 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2311 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2312
2313 if (should_have_column_data_p (actual_range.m_start))
2314 {
2315 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2316 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2317 }
2318 if (should_have_column_data_p (actual_range.m_finish))
2319 {
2320 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2321 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2322 }
2323}
2324
2325/* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2326 the effective location of any errors. */
2327
2328#define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2329 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2330 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2331 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2332 (EXPECTED_FINISH_COL))
2333
2334/* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2335 using the string concatenation database for TEST.
2336
2337 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2338
2339static void
2340assert_num_substring_ranges (const location &loc,
2341 lexer_test& test,
2342 location_t strloc,
2343 enum cpp_ttype type,
2344 int expected_num_ranges)
2345{
2346 cpp_reader *pfile = test.m_parser;
2347 string_concat_db *concats = &test.m_concats;
2348
45183e4c 2349 int actual_num_ranges = -1;
d4166bdc 2350 const char *err
2351 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2352 &actual_num_ranges);
2353 if (should_have_column_data_p (strloc))
2354 ASSERT_EQ_AT (loc, NULL, err);
2355 else
2356 {
2357 ASSERT_STREQ_AT (loc,
2358 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2359 err);
2360 return;
2361 }
2362 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2363}
2364
2365/* Macro for calling assert_num_substring_ranges, supplying
2366 SELFTEST_LOCATION for the effective location of any errors. */
2367
2368#define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2369 EXPECTED_NUM_RANGES) \
2370 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2371 (TYPE), (EXPECTED_NUM_RANGES))
2372
2373
2374/* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2375 returns an error (using the string concatenation database for TEST). */
2376
2377static void
2378assert_has_no_substring_ranges (const location &loc,
2379 lexer_test& test,
2380 location_t strloc,
2381 enum cpp_ttype type,
2382 const char *expected_err)
2383{
2384 cpp_reader *pfile = test.m_parser;
2385 string_concat_db *concats = &test.m_concats;
2386 cpp_substring_ranges ranges;
2387 const char *actual_err
2388 = get_substring_ranges_for_loc (pfile, concats, strloc,
2389 type, ranges);
2390 if (should_have_column_data_p (strloc))
2391 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2392 else
2393 ASSERT_STREQ_AT (loc,
2394 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2395 actual_err);
2396}
2397
2398#define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2399 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2400 (STRLOC), (TYPE), (ERR))
2401
2402/* Lex a simple string literal. Verify the substring location data, before
2403 and after running cpp_interpret_string on it. */
2404
2405static void
2406test_lexer_string_locations_simple (const line_table_case &case_)
2407{
2408 /* Digits 0-9 (with 0 at column 10), the simple way.
2409 ....................000000000.11111111112.2222222223333333333
2410 ....................123456789.01234567890.1234567890123456789
2411 We add a trailing comment to ensure that we correctly locate
2412 the end of the string literal token. */
2413 const char *content = " \"0123456789\" /* not a string */\n";
2414 lexer_test test (case_, content, NULL);
2415
2416 /* Verify that we get the expected token back, with the correct
2417 location information. */
2418 const cpp_token *tok = test.get_token ();
2419 ASSERT_EQ (tok->type, CPP_STRING);
2420 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2421 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2422
2423 /* At this point in lexing, the quote characters are treated as part of
2424 the string (they are stripped off by cpp_interpret_string). */
2425
2426 ASSERT_EQ (tok->val.str.len, 12);
2427
2428 /* Verify that cpp_interpret_string works. */
2429 cpp_string dst_string;
2430 const enum cpp_ttype type = CPP_STRING;
2431 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2432 &dst_string, type);
2433 ASSERT_TRUE (result);
2434 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2435 free (const_cast <unsigned char *> (dst_string.text));
2436
2437 /* Verify ranges of individual characters. This no longer includes the
7413e757 2438 opening quote, but does include the closing quote. */
2439 for (int i = 0; i <= 10; i++)
d4166bdc 2440 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2441 10 + i, 10 + i);
2442
7413e757 2443 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
d4166bdc 2444}
2445
2446/* As test_lexer_string_locations_simple, but use an EBCDIC execution
2447 encoding. */
2448
2449static void
2450test_lexer_string_locations_ebcdic (const line_table_case &case_)
2451{
2452 /* EBCDIC support requires iconv. */
2453 if (!HAVE_ICONV)
2454 return;
2455
2456 /* Digits 0-9 (with 0 at column 10), the simple way.
2457 ....................000000000.11111111112.2222222223333333333
2458 ....................123456789.01234567890.1234567890123456789
2459 We add a trailing comment to ensure that we correctly locate
2460 the end of the string literal token. */
2461 const char *content = " \"0123456789\" /* not a string */\n";
2462 ebcdic_execution_charset use_ebcdic;
2463 lexer_test test (case_, content, &use_ebcdic);
2464
2465 /* Verify that we get the expected token back, with the correct
2466 location information. */
2467 const cpp_token *tok = test.get_token ();
2468 ASSERT_EQ (tok->type, CPP_STRING);
2469 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2470 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2471
2472 /* At this point in lexing, the quote characters are treated as part of
2473 the string (they are stripped off by cpp_interpret_string). */
2474
2475 ASSERT_EQ (tok->val.str.len, 12);
2476
2477 /* The remainder of the test requires an iconv implementation that
2478 can convert from UTF-8 to the EBCDIC encoding requested above. */
2479 if (use_ebcdic.iconv_errors_occurred_p ())
2480 return;
2481
2482 /* Verify that cpp_interpret_string works. */
2483 cpp_string dst_string;
2484 const enum cpp_ttype type = CPP_STRING;
2485 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2486 &dst_string, type);
2487 ASSERT_TRUE (result);
2488 /* We should now have EBCDIC-encoded text, specifically
2489 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2490 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2491 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2492 (const char *)dst_string.text);
2493 free (const_cast <unsigned char *> (dst_string.text));
2494
2495 /* Verify that we don't attempt to record substring location information
2496 for such cases. */
2497 ASSERT_HAS_NO_SUBSTRING_RANGES
2498 (test, tok->src_loc, type,
2499 "execution character set != source character set");
2500}
2501
2502/* Lex a string literal containing a hex-escaped character.
2503 Verify the substring location data, before and after running
2504 cpp_interpret_string on it. */
2505
2506static void
2507test_lexer_string_locations_hex (const line_table_case &case_)
2508{
2509 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2510 and with a space in place of digit 6, to terminate the escaped
2511 hex code.
2512 ....................000000000.111111.11112222.
2513 ....................123456789.012345.67890123. */
2514 const char *content = " \"01234\\x35 789\"\n";
2515 lexer_test test (case_, content, NULL);
2516
2517 /* Verify that we get the expected token back, with the correct
2518 location information. */
2519 const cpp_token *tok = test.get_token ();
2520 ASSERT_EQ (tok->type, CPP_STRING);
2521 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2522 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2523
2524 /* At this point in lexing, the quote characters are treated as part of
2525 the string (they are stripped off by cpp_interpret_string). */
2526 ASSERT_EQ (tok->val.str.len, 15);
2527
2528 /* Verify that cpp_interpret_string works. */
2529 cpp_string dst_string;
2530 const enum cpp_ttype type = CPP_STRING;
2531 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2532 &dst_string, type);
2533 ASSERT_TRUE (result);
2534 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2535 free (const_cast <unsigned char *> (dst_string.text));
2536
2537 /* Verify ranges of individual characters. This no longer includes the
7413e757 2538 opening quote, but does include the closing quote. */
d4166bdc 2539 for (int i = 0; i <= 4; i++)
2540 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2541 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
7413e757 2542 for (int i = 6; i <= 10; i++)
d4166bdc 2543 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2544
7413e757 2545 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
d4166bdc 2546}
2547
2548/* Lex a string literal containing an octal-escaped character.
2549 Verify the substring location data after running cpp_interpret_string
2550 on it. */
2551
2552static void
2553test_lexer_string_locations_oct (const line_table_case &case_)
2554{
2555 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2556 and with a space in place of digit 6, to terminate the escaped
2557 octal code.
2558 ....................000000000.111111.11112222.2222223333333333444
2559 ....................123456789.012345.67890123.4567890123456789012 */
2560 const char *content = " \"01234\\065 789\" /* not a string */\n";
2561 lexer_test test (case_, content, NULL);
2562
2563 /* Verify that we get the expected token back, with the correct
2564 location information. */
2565 const cpp_token *tok = test.get_token ();
2566 ASSERT_EQ (tok->type, CPP_STRING);
2567 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2568
2569 /* Verify that cpp_interpret_string works. */
2570 cpp_string dst_string;
2571 const enum cpp_ttype type = CPP_STRING;
2572 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2573 &dst_string, type);
2574 ASSERT_TRUE (result);
2575 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2576 free (const_cast <unsigned char *> (dst_string.text));
2577
2578 /* Verify ranges of individual characters. This no longer includes the
7413e757 2579 opening quote, but does include the closing quote. */
d4166bdc 2580 for (int i = 0; i < 5; i++)
2581 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2582 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
7413e757 2583 for (int i = 6; i <= 10; i++)
d4166bdc 2584 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2585
7413e757 2586 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
d4166bdc 2587}
2588
2589/* Test of string literal containing letter escapes. */
2590
2591static void
2592test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2593{
2594 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2595 .....................000000000.1.11111.1.1.11222.22222223333333
2596 .....................123456789.0.12345.6.7.89012.34567890123456. */
2597 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2598 lexer_test test (case_, content, NULL);
2599
2600 /* Verify that we get the expected tokens back. */
2601 const cpp_token *tok = test.get_token ();
2602 ASSERT_EQ (tok->type, CPP_STRING);
2603 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2604
2605 /* Verify ranges of individual characters. */
2606 /* "\t". */
2607 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2608 0, 1, 10, 11);
2609 /* "foo". */
2610 for (int i = 1; i <= 3; i++)
2611 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2612 i, 1, 11 + i, 11 + i);
2613 /* "\\" and "\n". */
2614 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2615 4, 1, 15, 16);
2616 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2617 5, 1, 17, 18);
2618
7413e757 2619 /* "bar" and closing quote for nul-terminator. */
2620 for (int i = 6; i <= 9; i++)
d4166bdc 2621 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2622 i, 1, 13 + i, 13 + i);
2623
7413e757 2624 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
d4166bdc 2625}
2626
2627/* Another test of a string literal containing a letter escape.
2628 Based on string seen in
2629 printf ("%-%\n");
2630 in gcc.dg/format/c90-printf-1.c. */
2631
2632static void
2633test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2634{
2635 /* .....................000000000.1111.11.1111.22222222223.
2636 .....................123456789.0123.45.6789.01234567890. */
2637 const char *content = (" \"%-%\\n\" /* non-str */\n");
2638 lexer_test test (case_, content, NULL);
2639
2640 /* Verify that we get the expected tokens back. */
2641 const cpp_token *tok = test.get_token ();
2642 ASSERT_EQ (tok->type, CPP_STRING);
2643 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2644
2645 /* Verify ranges of individual characters. */
2646 /* "%-%". */
2647 for (int i = 0; i < 3; i++)
2648 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2649 i, 1, 10 + i, 10 + i);
2650 /* "\n". */
2651 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2652 3, 1, 13, 14);
2653
7413e757 2654 /* Closing quote for nul-terminator. */
2655 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2656 4, 1, 15, 15);
2657
2658 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
d4166bdc 2659}
2660
2661/* Lex a string literal containing UCN 4 characters.
2662 Verify the substring location data after running cpp_interpret_string
2663 on it. */
2664
2665static void
2666test_lexer_string_locations_ucn4 (const line_table_case &case_)
2667{
2668 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2669 as UCN 4.
2670 ....................000000000.111111.111122.222222223.33333333344444
2671 ....................123456789.012345.678901.234567890.12345678901234 */
2672 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2673 lexer_test test (case_, content, NULL);
2674
2675 /* Verify that we get the expected token back, with the correct
2676 location information. */
2677 const cpp_token *tok = test.get_token ();
2678 ASSERT_EQ (tok->type, CPP_STRING);
2679 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2680
2681 /* Verify that cpp_interpret_string works.
2682 The string should be encoded in the execution character
2683 set. Assuming that that is UTF-8, we should have the following:
2684 ----------- ---- ----- ------- ----------------
2685 Byte offset Byte Octal Unicode Source Column(s)
2686 ----------- ---- ----- ------- ----------------
2687 0 0x30 '0' 10
2688 1 0x31 '1' 11
2689 2 0x32 '2' 12
2690 3 0x33 '3' 13
2691 4 0x34 '4' 14
2692 5 0xE2 \342 U+2174 15-20
2693 6 0x85 \205 (cont) 15-20
2694 7 0xB4 \264 (cont) 15-20
2695 8 0xE2 \342 U+2175 21-26
2696 9 0x85 \205 (cont) 21-26
2697 10 0xB5 \265 (cont) 21-26
2698 11 0x37 '7' 27
2699 12 0x38 '8' 28
2700 13 0x39 '9' 29
7413e757 2701 14 0x00 30 (closing quote)
d4166bdc 2702 ----------- ---- ----- ------- ---------------. */
2703
2704 cpp_string dst_string;
2705 const enum cpp_ttype type = CPP_STRING;
2706 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2707 &dst_string, type);
2708 ASSERT_TRUE (result);
2709 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2710 (const char *)dst_string.text);
2711 free (const_cast <unsigned char *> (dst_string.text));
2712
2713 /* Verify ranges of individual characters. This no longer includes the
7413e757 2714 opening quote, but does include the closing quote.
d4166bdc 2715 '01234'. */
2716 for (int i = 0; i <= 4; i++)
2717 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2718 /* U+2174. */
2719 for (int i = 5; i <= 7; i++)
2720 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2721 /* U+2175. */
2722 for (int i = 8; i <= 10; i++)
2723 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
7413e757 2724 /* '789' and nul terminator */
2725 for (int i = 11; i <= 14; i++)
d4166bdc 2726 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2727
7413e757 2728 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
d4166bdc 2729}
2730
2731/* Lex a string literal containing UCN 8 characters.
2732 Verify the substring location data after running cpp_interpret_string
2733 on it. */
2734
2735static void
2736test_lexer_string_locations_ucn8 (const line_table_case &case_)
2737{
2738 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2739 ....................000000000.111111.1111222222.2222333333333.344444
2740 ....................123456789.012345.6789012345.6789012345678.901234 */
2741 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2742 lexer_test test (case_, content, NULL);
2743
2744 /* Verify that we get the expected token back, with the correct
2745 location information. */
2746 const cpp_token *tok = test.get_token ();
2747 ASSERT_EQ (tok->type, CPP_STRING);
2748 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2749 "\"01234\\U00002174\\U00002175789\"");
2750
2751 /* Verify that cpp_interpret_string works.
2752 The UTF-8 encoding of the string is identical to that from
2753 the ucn4 testcase above; the only difference is the column
2754 locations. */
2755 cpp_string dst_string;
2756 const enum cpp_ttype type = CPP_STRING;
2757 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2758 &dst_string, type);
2759 ASSERT_TRUE (result);
2760 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2761 (const char *)dst_string.text);
2762 free (const_cast <unsigned char *> (dst_string.text));
2763
2764 /* Verify ranges of individual characters. This no longer includes the
7413e757 2765 opening quote, but does include the closing quote.
d4166bdc 2766 '01234'. */
2767 for (int i = 0; i <= 4; i++)
2768 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2769 /* U+2174. */
2770 for (int i = 5; i <= 7; i++)
2771 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2772 /* U+2175. */
2773 for (int i = 8; i <= 10; i++)
2774 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2775 /* '789' at columns 35-37 */
2776 for (int i = 11; i <= 13; i++)
2777 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
7413e757 2778 /* Closing quote/nul-terminator at column 38. */
2779 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
d4166bdc 2780
7413e757 2781 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
d4166bdc 2782}
2783
2784/* Fetch a big-endian 32-bit value and convert to host endianness. */
2785
2786static uint32_t
2787uint32_from_big_endian (const uint32_t *ptr_be_value)
2788{
2789 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2790 return (((uint32_t) buf[0] << 24)
2791 | ((uint32_t) buf[1] << 16)
2792 | ((uint32_t) buf[2] << 8)
2793 | (uint32_t) buf[3]);
2794}
2795
2796/* Lex a wide string literal and verify that attempts to read substring
2797 location data from it fail gracefully. */
2798
2799static void
2800test_lexer_string_locations_wide_string (const line_table_case &case_)
2801{
2802 /* Digits 0-9.
2803 ....................000000000.11111111112.22222222233333
2804 ....................123456789.01234567890.12345678901234 */
2805 const char *content = " L\"0123456789\" /* non-str */\n";
2806 lexer_test test (case_, content, NULL);
2807
2808 /* Verify that we get the expected token back, with the correct
2809 location information. */
2810 const cpp_token *tok = test.get_token ();
2811 ASSERT_EQ (tok->type, CPP_WSTRING);
2812 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2813
2814 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2815 cpp_string dst_string;
2816 const enum cpp_ttype type = CPP_WSTRING;
2817 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2818 &dst_string, type);
2819 ASSERT_TRUE (result);
2820 /* The cpp_reader defaults to big-endian with
2821 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2822 now be encoded as UTF-32BE. */
2823 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2824 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2825 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2826 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2827 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2828 free (const_cast <unsigned char *> (dst_string.text));
2829
2830 /* We don't yet support generating substring location information
2831 for L"" strings. */
2832 ASSERT_HAS_NO_SUBSTRING_RANGES
2833 (test, tok->src_loc, type,
2834 "execution character set != source character set");
2835}
2836
2837/* Fetch a big-endian 16-bit value and convert to host endianness. */
2838
2839static uint16_t
2840uint16_from_big_endian (const uint16_t *ptr_be_value)
2841{
2842 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2843 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2844}
2845
2846/* Lex a u"" string literal and verify that attempts to read substring
2847 location data from it fail gracefully. */
2848
2849static void
2850test_lexer_string_locations_string16 (const line_table_case &case_)
2851{
2852 /* Digits 0-9.
2853 ....................000000000.11111111112.22222222233333
2854 ....................123456789.01234567890.12345678901234 */
2855 const char *content = " u\"0123456789\" /* non-str */\n";
2856 lexer_test test (case_, content, NULL);
2857
2858 /* Verify that we get the expected token back, with the correct
2859 location information. */
2860 const cpp_token *tok = test.get_token ();
2861 ASSERT_EQ (tok->type, CPP_STRING16);
2862 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2863
2864 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2865 cpp_string dst_string;
2866 const enum cpp_ttype type = CPP_STRING16;
2867 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2868 &dst_string, type);
2869 ASSERT_TRUE (result);
2870
2871 /* The cpp_reader defaults to big-endian, so dst_string should
2872 now be encoded as UTF-16BE. */
2873 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2874 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2875 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2876 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2877 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2878 free (const_cast <unsigned char *> (dst_string.text));
2879
2880 /* We don't yet support generating substring location information
2881 for L"" strings. */
2882 ASSERT_HAS_NO_SUBSTRING_RANGES
2883 (test, tok->src_loc, type,
2884 "execution character set != source character set");
2885}
2886
2887/* Lex a U"" string literal and verify that attempts to read substring
2888 location data from it fail gracefully. */
2889
2890static void
2891test_lexer_string_locations_string32 (const line_table_case &case_)
2892{
2893 /* Digits 0-9.
2894 ....................000000000.11111111112.22222222233333
2895 ....................123456789.01234567890.12345678901234 */
2896 const char *content = " U\"0123456789\" /* non-str */\n";
2897 lexer_test test (case_, content, NULL);
2898
2899 /* Verify that we get the expected token back, with the correct
2900 location information. */
2901 const cpp_token *tok = test.get_token ();
2902 ASSERT_EQ (tok->type, CPP_STRING32);
2903 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2904
2905 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2906 cpp_string dst_string;
2907 const enum cpp_ttype type = CPP_STRING32;
2908 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2909 &dst_string, type);
2910 ASSERT_TRUE (result);
2911
2912 /* The cpp_reader defaults to big-endian, so dst_string should
2913 now be encoded as UTF-32BE. */
2914 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2915 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2916 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2917 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2918 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2919 free (const_cast <unsigned char *> (dst_string.text));
2920
2921 /* We don't yet support generating substring location information
2922 for L"" strings. */
2923 ASSERT_HAS_NO_SUBSTRING_RANGES
2924 (test, tok->src_loc, type,
2925 "execution character set != source character set");
2926}
2927
2928/* Lex a u8-string literal.
2929 Verify the substring location data after running cpp_interpret_string
2930 on it. */
2931
2932static void
2933test_lexer_string_locations_u8 (const line_table_case &case_)
2934{
2935 /* Digits 0-9.
2936 ....................000000000.11111111112.22222222233333
2937 ....................123456789.01234567890.12345678901234 */
2938 const char *content = " u8\"0123456789\" /* non-str */\n";
2939 lexer_test test (case_, content, NULL);
2940
2941 /* Verify that we get the expected token back, with the correct
2942 location information. */
2943 const cpp_token *tok = test.get_token ();
2944 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2945 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2946
2947 /* Verify that cpp_interpret_string works. */
2948 cpp_string dst_string;
2949 const enum cpp_ttype type = CPP_STRING;
2950 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2951 &dst_string, type);
2952 ASSERT_TRUE (result);
2953 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2954 free (const_cast <unsigned char *> (dst_string.text));
2955
2956 /* Verify ranges of individual characters. This no longer includes the
7413e757 2957 opening quote, but does include the closing quote. */
2958 for (int i = 0; i <= 10; i++)
d4166bdc 2959 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2960}
2961
2962/* Lex a string literal containing UTF-8 source characters.
2963 Verify the substring location data after running cpp_interpret_string
2964 on it. */
2965
2966static void
2967test_lexer_string_locations_utf8_source (const line_table_case &case_)
2968{
2969 /* This string literal is written out to the source file as UTF-8,
2970 and is of the form "before mojibake after", where "mojibake"
2971 is written as the following four unicode code points:
2972 U+6587 CJK UNIFIED IDEOGRAPH-6587
2973 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2974 U+5316 CJK UNIFIED IDEOGRAPH-5316
2975 U+3051 HIRAGANA LETTER KE.
2976 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2977 "before" and "after" are 1 byte per unicode character.
2978
2979 The numbering shown are "columns", which are *byte* numbers within
2980 the line, rather than unicode character numbers.
2981
2982 .................... 000000000.1111111.
2983 .................... 123456789.0123456. */
2984 const char *content = (" \"before "
2985 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2986 UTF-8: 0xE6 0x96 0x87
2987 C octal escaped UTF-8: \346\226\207
2988 "column" numbers: 17-19. */
2989 "\346\226\207"
2990
2991 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2992 UTF-8: 0xE5 0xAD 0x97
2993 C octal escaped UTF-8: \345\255\227
2994 "column" numbers: 20-22. */
2995 "\345\255\227"
2996
2997 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2998 UTF-8: 0xE5 0x8C 0x96
2999 C octal escaped UTF-8: \345\214\226
3000 "column" numbers: 23-25. */
3001 "\345\214\226"
3002
3003 /* U+3051 HIRAGANA LETTER KE
3004 UTF-8: 0xE3 0x81 0x91
3005 C octal escaped UTF-8: \343\201\221
3006 "column" numbers: 26-28. */
3007 "\343\201\221"
3008
3009 /* column numbers 29 onwards
3010 2333333.33334444444444
3011 9012345.67890123456789. */
3012 " after\" /* non-str */\n");
3013 lexer_test test (case_, content, NULL);
3014
3015 /* Verify that we get the expected token back, with the correct
3016 location information. */
3017 const cpp_token *tok = test.get_token ();
3018 ASSERT_EQ (tok->type, CPP_STRING);
3019 ASSERT_TOKEN_AS_TEXT_EQ
3020 (test.m_parser, tok,
3021 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3022
3023 /* Verify that cpp_interpret_string works. */
3024 cpp_string dst_string;
3025 const enum cpp_ttype type = CPP_STRING;
3026 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3027 &dst_string, type);
3028 ASSERT_TRUE (result);
3029 ASSERT_STREQ
3030 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3031 (const char *)dst_string.text);
3032 free (const_cast <unsigned char *> (dst_string.text));
3033
3034 /* Verify ranges of individual characters. This no longer includes the
7413e757 3035 opening quote, but does include the closing quote.
d4166bdc 3036 Assuming that both source and execution encodings are UTF-8, we have
7413e757 3037 a run of 25 octets in each, plus the NUL terminator. */
d4166bdc 3038 for (int i = 0; i < 25; i++)
3039 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
7413e757 3040 /* NUL-terminator should use the closing quote at column 35. */
3041 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
d4166bdc 3042
7413e757 3043 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
d4166bdc 3044}
3045
3046/* Test of string literal concatenation. */
3047
3048static void
3049test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3050{
3051 /* Digits 0-9.
3052 .....................000000000.111111.11112222222222
3053 .....................123456789.012345.67890123456789. */
3054 const char *content = (" \"01234\" /* non-str */\n"
3055 " \"56789\" /* non-str */\n");
3056 lexer_test test (case_, content, NULL);
3057
3058 location_t input_locs[2];
3059
3060 /* Verify that we get the expected tokens back. */
3061 auto_vec <cpp_string> input_strings;
3062 const cpp_token *tok_a = test.get_token ();
3063 ASSERT_EQ (tok_a->type, CPP_STRING);
3064 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3065 input_strings.safe_push (tok_a->val.str);
3066 input_locs[0] = tok_a->src_loc;
3067
3068 const cpp_token *tok_b = test.get_token ();
3069 ASSERT_EQ (tok_b->type, CPP_STRING);
3070 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3071 input_strings.safe_push (tok_b->val.str);
3072 input_locs[1] = tok_b->src_loc;
3073
3074 /* Verify that cpp_interpret_string works. */
3075 cpp_string dst_string;
3076 const enum cpp_ttype type = CPP_STRING;
3077 bool result = cpp_interpret_string (test.m_parser,
3078 input_strings.address (), 2,
3079 &dst_string, type);
3080 ASSERT_TRUE (result);
3081 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3082 free (const_cast <unsigned char *> (dst_string.text));
3083
3084 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3085 test.m_concats.record_string_concatenation (2, input_locs);
3086
3087 location_t initial_loc = input_locs[0];
3088
7413e757 3089 /* "01234" on line 1. */
d4166bdc 3090 for (int i = 0; i <= 4; i++)
3091 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
7413e757 3092 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3093 for (int i = 5; i <= 10; i++)
d4166bdc 3094 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3095
7413e757 3096 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
d4166bdc 3097}
3098
3099/* Another test of string literal concatenation. */
3100
3101static void
3102test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3103{
3104 /* Digits 0-9.
3105 .....................000000000.111.11111112222222
3106 .....................123456789.012.34567890123456. */
3107 const char *content = (" \"01\" /* non-str */\n"
3108 " \"23\" /* non-str */\n"
3109 " \"45\" /* non-str */\n"
3110 " \"67\" /* non-str */\n"
3111 " \"89\" /* non-str */\n");
3112 lexer_test test (case_, content, NULL);
3113
3114 auto_vec <cpp_string> input_strings;
3115 location_t input_locs[5];
3116
3117 /* Verify that we get the expected tokens back. */
3118 for (int i = 0; i < 5; i++)
3119 {
3120 const cpp_token *tok = test.get_token ();
3121 ASSERT_EQ (tok->type, CPP_STRING);
3122 input_strings.safe_push (tok->val.str);
3123 input_locs[i] = tok->src_loc;
3124 }
3125
3126 /* Verify that cpp_interpret_string works. */
3127 cpp_string dst_string;
3128 const enum cpp_ttype type = CPP_STRING;
3129 bool result = cpp_interpret_string (test.m_parser,
3130 input_strings.address (), 5,
3131 &dst_string, type);
3132 ASSERT_TRUE (result);
3133 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3134 free (const_cast <unsigned char *> (dst_string.text));
3135
3136 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3137 test.m_concats.record_string_concatenation (5, input_locs);
3138
3139 location_t initial_loc = input_locs[0];
3140
3141 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3142 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3143 and expect get_source_range_for_substring to fail.
3144 However, for a string concatenation test, we can have a case
3145 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3146 but subsequent strings can be after it.
3147 Attempting to detect this within assert_char_at_range
3148 would overcomplicate the logic for the common test cases, so
3149 we detect it here. */
3150 if (should_have_column_data_p (input_locs[0])
3151 && !should_have_column_data_p (input_locs[4]))
3152 {
3153 /* Verify that get_source_range_for_substring gracefully rejects
3154 this case. */
3155 source_range actual_range;
3156 const char *err
5927e78e 3157 = get_source_range_for_char (test.m_parser, &test.m_concats,
3158 initial_loc, type, 0, &actual_range);
d4166bdc 3159 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3160 return;
3161 }
3162
3163 for (int i = 0; i < 5; i++)
3164 for (int j = 0; j < 2; j++)
3165 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3166 i + 1, 10 + j, 10 + j);
3167
7413e757 3168 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3169 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3170
3171 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
d4166bdc 3172}
3173
3174/* Another test of string literal concatenation, this time combined with
3175 various kinds of escaped characters. */
3176
3177static void
3178test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3179{
3180 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3181 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3182 const char *content
3183 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3184 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3185 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3186 lexer_test test (case_, content, NULL);
3187
3188 auto_vec <cpp_string> input_strings;
3189 location_t input_locs[4];
3190
3191 /* Verify that we get the expected tokens back. */
3192 for (int i = 0; i < 4; i++)
3193 {
3194 const cpp_token *tok = test.get_token ();
3195 ASSERT_EQ (tok->type, CPP_STRING);
3196 input_strings.safe_push (tok->val.str);
3197 input_locs[i] = tok->src_loc;
3198 }
3199
3200 /* Verify that cpp_interpret_string works. */
3201 cpp_string dst_string;
3202 const enum cpp_ttype type = CPP_STRING;
3203 bool result = cpp_interpret_string (test.m_parser,
3204 input_strings.address (), 4,
3205 &dst_string, type);
3206 ASSERT_TRUE (result);
3207 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3208 free (const_cast <unsigned char *> (dst_string.text));
3209
3210 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3211 test.m_concats.record_string_concatenation (4, input_locs);
3212
3213 location_t initial_loc = input_locs[0];
3214
3215 for (int i = 0; i <= 4; i++)
3216 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3217 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3218 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3219 for (int i = 7; i <= 9; i++)
3220 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3221
7413e757 3222 /* NUL-terminator should use the location of the final closing quote. */
3223 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3224
3225 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
d4166bdc 3226}
3227
3228/* Test of string literal in a macro. */
3229
3230static void
3231test_lexer_string_locations_macro (const line_table_case &case_)
3232{
3233 /* Digits 0-9.
3234 .....................0000000001111111111.22222222223.
3235 .....................1234567890123456789.01234567890. */
3236 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3237 " MACRO");
3238 lexer_test test (case_, content, NULL);
3239
3240 /* Verify that we get the expected tokens back. */
3241 const cpp_token *tok = test.get_token ();
3242 ASSERT_EQ (tok->type, CPP_PADDING);
3243
3244 tok = test.get_token ();
3245 ASSERT_EQ (tok->type, CPP_STRING);
3246 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3247
3248 /* Verify ranges of individual characters. We ought to
3249 see columns within the macro definition. */
7413e757 3250 for (int i = 0; i <= 10; i++)
d4166bdc 3251 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3252 i, 1, 20 + i, 20 + i);
3253
7413e757 3254 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
d4166bdc 3255
3256 tok = test.get_token ();
3257 ASSERT_EQ (tok->type, CPP_PADDING);
3258}
3259
3260/* Test of stringification of a macro argument. */
3261
3262static void
3263test_lexer_string_locations_stringified_macro_argument
3264 (const line_table_case &case_)
3265{
3266 /* .....................000000000111111111122222222223.
3267 .....................123456789012345678901234567890. */
3268 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3269 "MACRO(foo)\n");
3270 lexer_test test (case_, content, NULL);
3271
3272 /* Verify that we get the expected token back. */
3273 const cpp_token *tok = test.get_token ();
3274 ASSERT_EQ (tok->type, CPP_PADDING);
3275
3276 tok = test.get_token ();
3277 ASSERT_EQ (tok->type, CPP_STRING);
3278 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3279
3280 /* We don't support getting the location of a stringified macro
3281 argument. Verify that it fails gracefully. */
3282 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3283 "cpp_interpret_string_1 failed");
3284
3285 tok = test.get_token ();
3286 ASSERT_EQ (tok->type, CPP_PADDING);
3287
3288 tok = test.get_token ();
3289 ASSERT_EQ (tok->type, CPP_PADDING);
3290}
3291
3292/* Ensure that we are fail gracefully if something attempts to pass
3293 in a location that isn't a string literal token. Seen on this code:
3294
3295 const char a[] = " %d ";
3296 __builtin_printf (a, 0.5);
3297 ^
3298
3299 when c-format.c erroneously used the indicated one-character
3300 location as the format string location, leading to a read past the
3301 end of a string buffer in cpp_interpret_string_1. */
3302
3303static void
3304test_lexer_string_locations_non_string (const line_table_case &case_)
3305{
3306 /* .....................000000000111111111122222222223.
3307 .....................123456789012345678901234567890. */
3308 const char *content = (" a\n");
3309 lexer_test test (case_, content, NULL);
3310
3311 /* Verify that we get the expected token back. */
3312 const cpp_token *tok = test.get_token ();
3313 ASSERT_EQ (tok->type, CPP_NAME);
3314 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3315
3316 /* At this point, libcpp is attempting to interpret the name as a
3317 string literal, despite it not starting with a quote. We don't detect
3318 that, but we should at least fail gracefully. */
3319 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3320 "cpp_interpret_string_1 failed");
3321}
3322
3323/* Ensure that we can read substring information for a token which
3324 starts in one linemap and ends in another . Adapted from
3325 gcc.dg/cpp/pr69985.c. */
3326
3327static void
3328test_lexer_string_locations_long_line (const line_table_case &case_)
3329{
3330 /* .....................000000.000111111111
3331 .....................123456.789012346789. */
3332 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3333 " \"0123456789012345678901234567890123456789"
3334 "0123456789012345678901234567890123456789"
3335 "0123456789012345678901234567890123456789"
3336 "0123456789\"\n");
3337
3338 lexer_test test (case_, content, NULL);
3339
3340 /* Verify that we get the expected token back. */
3341 const cpp_token *tok = test.get_token ();
3342 ASSERT_EQ (tok->type, CPP_STRING);
3343
3344 if (!should_have_column_data_p (line_table->highest_location))
3345 return;
3346
3347 /* Verify ranges of individual characters. */
7413e757 3348 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3349 for (int i = 0; i < 131; i++)
d4166bdc 3350 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3351 i, 2, 7 + i, 7 + i);
3352}
3353
f9f26759 3354/* Test of locations within a raw string that doesn't contain a newline. */
3355
3356static void
3357test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3358{
3359 /* .....................00.0000000111111111122.
3360 .....................12.3456789012345678901. */
3361 const char *content = ("R\"foo(0123456789)foo\"\n");
3362 lexer_test test (case_, content, NULL);
3363
3364 /* Verify that we get the expected token back. */
3365 const cpp_token *tok = test.get_token ();
3366 ASSERT_EQ (tok->type, CPP_STRING);
3367
3368 /* Verify that cpp_interpret_string works. */
3369 cpp_string dst_string;
3370 const enum cpp_ttype type = CPP_STRING;
3371 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3372 &dst_string, type);
3373 ASSERT_TRUE (result);
3374 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3375 free (const_cast <unsigned char *> (dst_string.text));
3376
3377 if (!should_have_column_data_p (line_table->highest_location))
3378 return;
3379
3380 /* 0-9, plus the nil terminator. */
3381 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3382 for (int i = 0; i < 11; i++)
3383 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3384 i, 1, 7 + i, 7 + i);
3385}
3386
3387/* Test of locations within a raw string that contains a newline. */
3388
3389static void
3390test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3391{
3392 /* .....................00.0000.
3393 .....................12.3456. */
3394 const char *content = ("R\"foo(\n"
3395 /* .....................00000.
3396 .....................12345. */
3397 "hello\n"
3398 "world\n"
3399 /* .....................00000.
3400 .....................12345. */
3401 ")foo\"\n");
3402 lexer_test test (case_, content, NULL);
3403
3404 /* Verify that we get the expected token back. */
3405 const cpp_token *tok = test.get_token ();
3406 ASSERT_EQ (tok->type, CPP_STRING);
3407
3408 /* Verify that cpp_interpret_string works. */
3409 cpp_string dst_string;
3410 const enum cpp_ttype type = CPP_STRING;
3411 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3412 &dst_string, type);
3413 ASSERT_TRUE (result);
3414 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3415 free (const_cast <unsigned char *> (dst_string.text));
3416
3417 if (!should_have_column_data_p (line_table->highest_location))
3418 return;
3419
3420 /* Currently we don't support locations within raw strings that
3421 contain newlines. */
3422 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3423 "range endpoints are on different lines");
3424}
3425
0ccd6e7a 3426/* Test of parsing an unterminated raw string. */
3427
3428static void
3429test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3430{
3431 const char *content = "R\"ouch()ouCh\" /* etc */";
3432
fb225cf1 3433 lexer_diagnostic_sink diagnostics;
3434 lexer_test test (case_, content, &diagnostics);
0ccd6e7a 3435 test.m_implicitly_expect_EOF = false;
3436
3437 /* Attempt to parse the raw string. */
3438 const cpp_token *tok = test.get_token ();
3439 ASSERT_EQ (tok->type, CPP_EOF);
3440
fb225cf1 3441 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
0ccd6e7a 3442 /* We expect the message "unterminated raw string"
3443 in the "cpplib" translation domain.
3444 It's not clear that dgettext is available on all supported hosts,
3445 so this assertion is commented-out for now.
3446 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
fb225cf1 3447 diagnostics.m_diagnostics[0]);
0ccd6e7a 3448 */
3449}
3450
d4166bdc 3451/* Test of lexing char constants. */
3452
3453static void
3454test_lexer_char_constants (const line_table_case &case_)
3455{
3456 /* Various char constants.
3457 .....................0000000001111111111.22222222223.
3458 .....................1234567890123456789.01234567890. */
3459 const char *content = (" 'a'\n"
3460 " u'a'\n"
3461 " U'a'\n"
3462 " L'a'\n"
3463 " 'abc'\n");
3464 lexer_test test (case_, content, NULL);
3465
3466 /* Verify that we get the expected tokens back. */
3467 /* 'a'. */
3468 const cpp_token *tok = test.get_token ();
3469 ASSERT_EQ (tok->type, CPP_CHAR);
3470 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3471
3472 unsigned int chars_seen;
3473 int unsignedp;
3474 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3475 &chars_seen, &unsignedp);
3476 ASSERT_EQ (cc, 'a');
3477 ASSERT_EQ (chars_seen, 1);
3478
3479 /* u'a'. */
3480 tok = test.get_token ();
3481 ASSERT_EQ (tok->type, CPP_CHAR16);
3482 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3483
3484 /* U'a'. */
3485 tok = test.get_token ();
3486 ASSERT_EQ (tok->type, CPP_CHAR32);
3487 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3488
3489 /* L'a'. */
3490 tok = test.get_token ();
3491 ASSERT_EQ (tok->type, CPP_WCHAR);
3492 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3493
3494 /* 'abc' (c-char-sequence). */
3495 tok = test.get_token ();
3496 ASSERT_EQ (tok->type, CPP_CHAR);
3497 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3498}
b73690a4 3499/* A table of interesting location_t values, giving one axis of our test
3500 matrix. */
3501
3502static const location_t boundary_locations[] = {
3503 /* Zero means "don't override the default values for a new line_table". */
3504 0,
3505
3506 /* An arbitrary non-zero value that isn't close to one of
3507 the boundary values below. */
3508 0x10000,
3509
3510 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3511 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3512 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3513 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3514 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3515 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3516
3517 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3518 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3519 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3520 LINE_MAP_MAX_LOCATION_WITH_COLS,
3521 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3522 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3523};
3524
7ec388ed 3525/* Run TESTCASE multiple times, once for each case in our test matrix. */
99b4f3a2 3526
3527void
7ec388ed 3528for_each_line_table_case (void (*testcase) (const line_table_case &))
99b4f3a2 3529{
b73690a4 3530 /* As noted above in the description of struct line_table_case,
3531 we want to explore a test matrix of interesting line_table
3532 situations, running various selftests for each case within the
3533 matrix. */
3534
3535 /* Run all tests with:
3536 (a) line_table->default_range_bits == 0, and
3537 (b) line_table->default_range_bits == 5. */
3538 int num_cases_tested = 0;
3539 for (int default_range_bits = 0; default_range_bits <= 5;
3540 default_range_bits += 5)
3541 {
3542 /* ...and use each of the "interesting" location values as
3543 the starting location within line_table. */
3544 const int num_boundary_locations
3545 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3546 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3547 {
3548 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3549
7ec388ed 3550 testcase (c);
b73690a4 3551
3552 num_cases_tested++;
3553 }
3554 }
3555
3556 /* Verify that we fully covered the test matrix. */
3557 ASSERT_EQ (num_cases_tested, 2 * 12);
7ec388ed 3558}
3559
3560/* Run all of the selftests within this file. */
3561
3562void
3563input_c_tests ()
3564{
d73881b0 3565 test_linenum_comparisons ();
7ec388ed 3566 test_should_have_column_data_p ();
3567 test_unknown_location ();
3568 test_builtins ();
aca2a315 3569 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
7ec388ed 3570
3571 for_each_line_table_case (test_accessing_ordinary_linemaps);
3572 for_each_line_table_case (test_lexer);
3573 for_each_line_table_case (test_lexer_string_locations_simple);
3574 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3575 for_each_line_table_case (test_lexer_string_locations_hex);
3576 for_each_line_table_case (test_lexer_string_locations_oct);
3577 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3578 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3579 for_each_line_table_case (test_lexer_string_locations_ucn4);
3580 for_each_line_table_case (test_lexer_string_locations_ucn8);
3581 for_each_line_table_case (test_lexer_string_locations_wide_string);
3582 for_each_line_table_case (test_lexer_string_locations_string16);
3583 for_each_line_table_case (test_lexer_string_locations_string32);
3584 for_each_line_table_case (test_lexer_string_locations_u8);
3585 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3586 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3587 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3588 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3589 for_each_line_table_case (test_lexer_string_locations_macro);
3590 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3591 for_each_line_table_case (test_lexer_string_locations_non_string);
3592 for_each_line_table_case (test_lexer_string_locations_long_line);
f9f26759 3593 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3594 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
0ccd6e7a 3595 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
7ec388ed 3596 for_each_line_table_case (test_lexer_char_constants);
b73690a4 3597
99b4f3a2 3598 test_reading_source_line ();
3599}
3600
3601} // namespace selftest
3602
3603#endif /* CHECKING_P */