]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/input.c
Allow automatics in equivalences
[thirdparty/gcc.git] / gcc / input.c
CommitLineData
37ba4887 1/* Data and functions related to line maps and input files.
fbd26352 2 Copyright (C) 2004-2019 Free Software Foundation, Inc.
37ba4887 3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "intl.h"
934182c6 24#include "diagnostic.h"
28f17529 25#include "diagnostic-core.h"
99b4f3a2 26#include "selftest.h"
b73690a4 27#include "cpplib.h"
ffc2c526 28
e2f73ee8 29#ifndef HAVE_ICONV
30#define HAVE_ICONV 0
31#endif
32
ffc2c526 33/* This is a cache used by get_next_line to store the content of a
34 file to be searched for file lines. */
251317e4 35class fcache
ffc2c526 36{
251317e4 37public:
ffc2c526 38 /* These are information used to store a line boundary. */
251317e4 39 class line_info
ffc2c526 40 {
251317e4 41 public:
ffc2c526 42 /* The line number. It starts from 1. */
43 size_t line_num;
44
45 /* The position (byte count) of the beginning of the line,
46 relative to the file data pointer. This starts at zero. */
47 size_t start_pos;
48
49 /* The position (byte count) of the last byte of the line. This
50 normally points to the '\n' character, or to one byte after the
51 last byte of the file, if the file doesn't contain a '\n'
52 character. */
53 size_t end_pos;
54
55 line_info (size_t l, size_t s, size_t e)
56 : line_num (l), start_pos (s), end_pos (e)
57 {}
58
59 line_info ()
60 :line_num (0), start_pos (0), end_pos (0)
61 {}
62 };
63
64 /* The number of time this file has been accessed. This is used
65 to designate which file cache to evict from the cache
66 array. */
67 unsigned use_count;
68
c6a7d9e9 69 /* The file_path is the key for identifying a particular file in
70 the cache.
71 For libcpp-using code, the underlying buffer for this field is
72 owned by the corresponding _cpp_file within the cpp_reader. */
ffc2c526 73 const char *file_path;
74
75 FILE *fp;
76
77 /* This points to the content of the file that we've read so
78 far. */
79 char *data;
80
81 /* The size of the DATA array above.*/
82 size_t size;
83
84 /* The number of bytes read from the underlying file so far. This
85 must be less (or equal) than SIZE above. */
86 size_t nb_read;
87
88 /* The index of the beginning of the current line. */
89 size_t line_start_idx;
90
91 /* The number of the previous line read. This starts at 1. Zero
92 means we've read no line so far. */
93 size_t line_num;
94
95 /* This is the total number of lines of the current file. At the
96 moment, we try to get this information from the line map
97 subsystem. Note that this is just a hint. When using the C++
98 front-end, this hint is correct because the input file is then
99 completely tokenized before parsing starts; so the line map knows
100 the number of lines before compilation really starts. For e.g,
101 the C front-end, it can happen that we start emitting diagnostics
102 before the line map has seen the end of the file. */
103 size_t total_lines;
104
fe066ce3 105 /* Could this file be missing a trailing newline on its final line?
106 Initially true (to cope with empty files), set to true/false
107 as each line is read. */
108 bool missing_trailing_newline;
109
ffc2c526 110 /* This is a record of the beginning and end of the lines we've seen
111 while reading the file. This is useful to avoid walking the data
112 from the beginning when we are asked to read a line that is
113 before LINE_START_IDX above. Note that the maximum size of this
114 record is fcache_line_record_size, so that the memory consumption
115 doesn't explode. We thus scale total_lines down to
116 fcache_line_record_size. */
117 vec<line_info, va_heap> line_record;
118
119 fcache ();
120 ~fcache ();
121};
37ba4887 122
123/* Current position in real source file. */
124
415309e2 125location_t input_location = UNKNOWN_LOCATION;
37ba4887 126
2e966e2a 127class line_maps *line_table;
37ba4887 128
7ec388ed 129/* A stashed copy of "line_table" for use by selftest::line_table_test.
130 This needs to be a global so that it can be a GC root, and thus
131 prevent the stashed copy from being garbage-collected if the GC runs
132 during a line_table_test. */
133
2e966e2a 134class line_maps *saved_line_table;
7ec388ed 135
ffc2c526 136static fcache *fcache_tab;
137static const size_t fcache_tab_size = 16;
138static const size_t fcache_buffer_size = 4 * 1024;
139static const size_t fcache_line_record_size = 100;
140
5ebe2143 141/* Expand the source location LOC into a human readable location. If
142 LOC resolves to a builtin location, the file name of the readable
39107655 143 location is set to the string "<built-in>". If EXPANSION_POINT_P is
144 TRUE and LOC is virtual, then it is resolved to the expansion
145 point of the involved macro. Otherwise, it is resolved to the
bd172d61 146 spelling location of the token.
147
148 When resolving to the spelling location of the token, if the
149 resulting location is for a built-in location (that is, it has no
150 associated line/column) in the context of a macro expansion, the
151 returned location is the first one (while unwinding the macro
152 location towards its expansion point) that is in real source
56df12ff 153 code.
154
155 ASPECT controls which part of the location to use. */
39107655 156
157static expanded_location
be1e7283 158expand_location_1 (location_t loc,
56df12ff 159 bool expansion_point_p,
160 enum location_aspect aspect)
37ba4887 161{
162 expanded_location xloc;
551e34da 163 const line_map_ordinary *map;
bd172d61 164 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
5169661d 165 tree block = NULL;
166
167 if (IS_ADHOC_LOC (loc))
168 {
169 block = LOCATION_BLOCK (loc);
170 loc = LOCATION_LOCUS (loc);
171 }
bd172d61 172
173 memset (&xloc, 0, sizeof (xloc));
5ebe2143 174
bd172d61 175 if (loc >= RESERVED_LOCATION_COUNT)
176 {
177 if (!expansion_point_p)
178 {
179 /* We want to resolve LOC to its spelling location.
180
181 But if that spelling location is a reserved location that
182 appears in the context of a macro expansion (like for a
183 location for a built-in token), let's consider the first
184 location (toward the expansion point) that is not reserved;
185 that is, the first location that is in real source code. */
186 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
551e34da 187 loc, NULL);
bd172d61 188 lrk = LRK_SPELLING_LOCATION;
189 }
56df12ff 190 loc = linemap_resolve_location (line_table, loc, lrk, &map);
191
192 /* loc is now either in an ordinary map, or is a reserved location.
193 If it is a compound location, the caret is in a spelling location,
194 but the start/finish might still be a virtual location.
195 Depending of what the caller asked for, we may need to recurse
196 one level in order to resolve any virtual locations in the
197 end-points. */
198 switch (aspect)
199 {
200 default:
201 gcc_unreachable ();
202 /* Fall through. */
203 case LOCATION_ASPECT_CARET:
204 break;
205 case LOCATION_ASPECT_START:
206 {
be1e7283 207 location_t start = get_start (loc);
56df12ff 208 if (start != loc)
209 return expand_location_1 (start, expansion_point_p, aspect);
210 }
211 break;
212 case LOCATION_ASPECT_FINISH:
213 {
be1e7283 214 location_t finish = get_finish (loc);
56df12ff 215 if (finish != loc)
216 return expand_location_1 (finish, expansion_point_p, aspect);
217 }
218 break;
219 }
bd172d61 220 xloc = linemap_expand_location (line_table, map, loc);
221 }
5ebe2143 222
5169661d 223 xloc.data = block;
37ba4887 224 if (loc <= BUILTINS_LOCATION)
5ebe2143 225 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
226
37ba4887 227 return xloc;
228}
e77b8253 229
ffc2c526 230/* Initialize the set of cache used for files accessed by caret
231 diagnostic. */
232
233static void
234diagnostic_file_cache_init (void)
235{
236 if (fcache_tab == NULL)
237 fcache_tab = new fcache[fcache_tab_size];
238}
239
e7683169 240/* Free the resources used by the set of cache used for files accessed
ffc2c526 241 by caret diagnostic. */
242
243void
244diagnostic_file_cache_fini (void)
245{
246 if (fcache_tab)
247 {
248 delete [] (fcache_tab);
249 fcache_tab = NULL;
250 }
251}
252
253/* Return the total lines number that have been read so far by the
254 line map (in the preprocessor) so far. For languages like C++ that
255 entirely preprocess the input file before starting to parse, this
256 equals the actual number of lines of the file. */
257
258static size_t
259total_lines_num (const char *file_path)
260{
261 size_t r = 0;
be1e7283 262 location_t l = 0;
ffc2c526 263 if (linemap_get_file_highest_location (line_table, file_path, &l))
264 {
265 gcc_assert (l >= RESERVED_LOCATION_COUNT);
266 expanded_location xloc = expand_location (l);
267 r = xloc.line;
268 }
269 return r;
270}
271
272/* Lookup the cache used for the content of a given file accessed by
273 caret diagnostic. Return the found cached file, or NULL if no
274 cached file was found. */
275
276static fcache*
277lookup_file_in_cache_tab (const char *file_path)
278{
279 if (file_path == NULL)
280 return NULL;
281
282 diagnostic_file_cache_init ();
283
284 /* This will contain the found cached file. */
285 fcache *r = NULL;
286 for (unsigned i = 0; i < fcache_tab_size; ++i)
287 {
288 fcache *c = &fcache_tab[i];
289 if (c->file_path && !strcmp (c->file_path, file_path))
290 {
291 ++c->use_count;
292 r = c;
293 }
294 }
295
296 if (r)
297 ++r->use_count;
298
299 return r;
300}
301
a476cb62 302/* Purge any mention of FILENAME from the cache of files used for
303 printing source code. For use in selftests when working
304 with tempfiles. */
305
306void
307diagnostics_file_cache_forcibly_evict_file (const char *file_path)
308{
309 gcc_assert (file_path);
310
311 fcache *r = lookup_file_in_cache_tab (file_path);
312 if (!r)
313 /* Not found. */
314 return;
315
316 r->file_path = NULL;
317 if (r->fp)
318 fclose (r->fp);
319 r->fp = NULL;
320 r->nb_read = 0;
321 r->line_start_idx = 0;
322 r->line_num = 0;
323 r->line_record.truncate (0);
324 r->use_count = 0;
325 r->total_lines = 0;
fe066ce3 326 r->missing_trailing_newline = true;
a476cb62 327}
328
ffc2c526 329/* Return the file cache that has been less used, recently, or the
330 first empty one. If HIGHEST_USE_COUNT is non-null,
331 *HIGHEST_USE_COUNT is set to the highest use count of the entries
332 in the cache table. */
333
334static fcache*
335evicted_cache_tab_entry (unsigned *highest_use_count)
336{
337 diagnostic_file_cache_init ();
338
339 fcache *to_evict = &fcache_tab[0];
340 unsigned huc = to_evict->use_count;
341 for (unsigned i = 1; i < fcache_tab_size; ++i)
342 {
343 fcache *c = &fcache_tab[i];
344 bool c_is_empty = (c->file_path == NULL);
345
346 if (c->use_count < to_evict->use_count
347 || (to_evict->file_path && c_is_empty))
348 /* We evict C because it's either an entry with a lower use
349 count or one that is empty. */
350 to_evict = c;
351
352 if (huc < c->use_count)
353 huc = c->use_count;
354
355 if (c_is_empty)
356 /* We've reached the end of the cache; subsequent elements are
357 all empty. */
358 break;
359 }
360
361 if (highest_use_count)
362 *highest_use_count = huc;
363
364 return to_evict;
365}
366
367/* Create the cache used for the content of a given file to be
368 accessed by caret diagnostic. This cache is added to an array of
369 cache and can be retrieved by lookup_file_in_cache_tab. This
370 function returns the created cache. Note that only the last
371 fcache_tab_size files are cached. */
372
373static fcache*
374add_file_to_cache_tab (const char *file_path)
375{
376
377 FILE *fp = fopen (file_path, "r");
c1cc4419 378 if (fp == NULL)
379 return NULL;
ffc2c526 380
381 unsigned highest_use_count = 0;
382 fcache *r = evicted_cache_tab_entry (&highest_use_count);
383 r->file_path = file_path;
384 if (r->fp)
385 fclose (r->fp);
386 r->fp = fp;
387 r->nb_read = 0;
388 r->line_start_idx = 0;
389 r->line_num = 0;
390 r->line_record.truncate (0);
391 /* Ensure that this cache entry doesn't get evicted next time
392 add_file_to_cache_tab is called. */
393 r->use_count = ++highest_use_count;
394 r->total_lines = total_lines_num (file_path);
fe066ce3 395 r->missing_trailing_newline = true;
ffc2c526 396
397 return r;
398}
399
400/* Lookup the cache used for the content of a given file accessed by
401 caret diagnostic. If no cached file was found, create a new cache
402 for this file, add it to the array of cached file and return
403 it. */
404
405static fcache*
406lookup_or_add_file_to_cache_tab (const char *file_path)
407{
408 fcache *r = lookup_file_in_cache_tab (file_path);
409 if (r == NULL)
410 r = add_file_to_cache_tab (file_path);
411 return r;
412}
413
414/* Default constructor for a cache of file used by caret
415 diagnostic. */
416
417fcache::fcache ()
418: use_count (0), file_path (NULL), fp (NULL), data (0),
419 size (0), nb_read (0), line_start_idx (0), line_num (0),
fe066ce3 420 total_lines (0), missing_trailing_newline (true)
ffc2c526 421{
422 line_record.create (0);
423}
424
425/* Destructor for a cache of file used by caret diagnostic. */
426
427fcache::~fcache ()
428{
429 if (fp)
430 {
431 fclose (fp);
432 fp = NULL;
433 }
434 if (data)
435 {
436 XDELETEVEC (data);
437 data = 0;
438 }
439 line_record.release ();
440}
441
442/* Returns TRUE iff the cache would need to be filled with data coming
443 from the file. That is, either the cache is empty or full or the
444 current line is empty. Note that if the cache is full, it would
445 need to be extended and filled again. */
446
447static bool
448needs_read (fcache *c)
449{
450 return (c->nb_read == 0
451 || c->nb_read == c->size
452 || (c->line_start_idx >= c->nb_read - 1));
453}
454
455/* Return TRUE iff the cache is full and thus needs to be
456 extended. */
457
458static bool
459needs_grow (fcache *c)
460{
461 return c->nb_read == c->size;
462}
463
464/* Grow the cache if it needs to be extended. */
465
466static void
467maybe_grow (fcache *c)
5a983084 468{
ffc2c526 469 if (!needs_grow (c))
470 return;
471
472 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
2e24ac9b 473 c->data = XRESIZEVEC (char, c->data, size);
ffc2c526 474 c->size = size;
475}
5a983084 476
ffc2c526 477/* Read more data into the cache. Extends the cache if need be.
478 Returns TRUE iff new data could be read. */
479
480static bool
481read_data (fcache *c)
482{
483 if (feof (c->fp) || ferror (c->fp))
484 return false;
485
486 maybe_grow (c);
487
488 char * from = c->data + c->nb_read;
489 size_t to_read = c->size - c->nb_read;
490 size_t nb_read = fread (from, 1, to_read, c->fp);
491
492 if (ferror (c->fp))
493 return false;
494
495 c->nb_read += nb_read;
496 return !!nb_read;
497}
498
499/* Read new data iff the cache needs to be filled with more data
500 coming from the file FP. Return TRUE iff the cache was filled with
501 mode data. */
502
503static bool
504maybe_read_data (fcache *c)
505{
506 if (!needs_read (c))
507 return false;
508 return read_data (c);
509}
510
511/* Read a new line from file FP, using C as a cache for the data
512 coming from the file. Upon successful completion, *LINE is set to
2e24ac9b 513 the beginning of the line found. *LINE points directly in the
514 line cache and is only valid until the next call of get_next_line.
ffc2c526 515 *LINE_LEN is set to the length of the line. Note that the line
516 does not contain any terminal delimiter. This function returns
517 true if some data was read or process from the cache, false
2e24ac9b 518 otherwise. Note that subsequent calls to get_next_line might
519 make the content of *LINE invalid. */
ffc2c526 520
521static bool
522get_next_line (fcache *c, char **line, ssize_t *line_len)
523{
524 /* Fill the cache with data to process. */
525 maybe_read_data (c);
526
527 size_t remaining_size = c->nb_read - c->line_start_idx;
528 if (remaining_size == 0)
529 /* There is no more data to process. */
530 return false;
531
532 char *line_start = c->data + c->line_start_idx;
533
534 char *next_line_start = NULL;
535 size_t len = 0;
536 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
537 if (line_end == NULL)
5a983084 538 {
ffc2c526 539 /* We haven't found the end-of-line delimiter in the cache.
540 Fill the cache with more data from the file and look for the
541 '\n'. */
542 while (maybe_read_data (c))
543 {
544 line_start = c->data + c->line_start_idx;
545 remaining_size = c->nb_read - c->line_start_idx;
546 line_end = (char *) memchr (line_start, '\n', remaining_size);
547 if (line_end != NULL)
548 {
549 next_line_start = line_end + 1;
550 break;
551 }
552 }
553 if (line_end == NULL)
fe066ce3 554 {
555 /* We've loadded all the file into the cache and still no
556 '\n'. Let's say the line ends up at one byte passed the
557 end of the file. This is to stay consistent with the case
558 of when the line ends up with a '\n' and line_end points to
559 that terminal '\n'. That consistency is useful below in
560 the len calculation. */
561 line_end = c->data + c->nb_read ;
562 c->missing_trailing_newline = true;
563 }
564 else
565 c->missing_trailing_newline = false;
5a983084 566 }
ffc2c526 567 else
fe066ce3 568 {
569 next_line_start = line_end + 1;
570 c->missing_trailing_newline = false;
571 }
ffc2c526 572
573 if (ferror (c->fp))
2e24ac9b 574 return false;
ffc2c526 575
576 /* At this point, we've found the end of the of line. It either
577 points to the '\n' or to one byte after the last byte of the
578 file. */
579 gcc_assert (line_end != NULL);
5a983084 580
ffc2c526 581 len = line_end - line_start;
582
583 if (c->line_start_idx < c->nb_read)
584 *line = line_start;
585
586 ++c->line_num;
587
588 /* Before we update our line record, make sure the hint about the
589 total number of lines of the file is correct. If it's not, then
590 we give up recording line boundaries from now on. */
591 bool update_line_record = true;
592 if (c->line_num > c->total_lines)
593 update_line_record = false;
594
595 /* Now update our line record so that re-reading lines from the
596 before c->line_start_idx is faster. */
597 if (update_line_record
598 && c->line_record.length () < fcache_line_record_size)
599 {
600 /* If the file lines fits in the line record, we just record all
601 its lines ...*/
602 if (c->total_lines <= fcache_line_record_size
603 && c->line_num > c->line_record.length ())
604 c->line_record.safe_push (fcache::line_info (c->line_num,
605 c->line_start_idx,
606 line_end - c->data));
607 else if (c->total_lines > fcache_line_record_size)
608 {
609 /* ... otherwise, we just scale total_lines down to
610 (fcache_line_record_size lines. */
611 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
612 if (c->line_record.length () == 0
613 || n >= c->line_record.length ())
614 c->line_record.safe_push (fcache::line_info (c->line_num,
615 c->line_start_idx,
616 line_end - c->data));
617 }
618 }
619
620 /* Update c->line_start_idx so that it points to the next line to be
621 read. */
622 if (next_line_start)
623 c->line_start_idx = next_line_start - c->data;
624 else
625 /* We didn't find any terminal '\n'. Let's consider that the end
626 of line is the end of the data in the cache. The next
627 invocation of get_next_line will either read more data from the
628 underlying file or return false early because we've reached the
629 end of the file. */
630 c->line_start_idx = c->nb_read;
631
632 *line_len = len;
633
634 return true;
635}
636
ffc2c526 637/* Consume the next bytes coming from the cache (or from its
638 underlying file if there are remaining unread bytes in the file)
639 until we reach the next end-of-line (or end-of-file). There is no
640 copying from the cache involved. Return TRUE upon successful
641 completion. */
642
643static bool
644goto_next_line (fcache *cache)
645{
646 char *l;
647 ssize_t len;
648
649 return get_next_line (cache, &l, &len);
650}
651
652/* Read an arbitrary line number LINE_NUM from the file cached in C.
2e24ac9b 653 If the line was read successfully, *LINE points to the beginning
654 of the line in the file cache and *LINE_LEN is the length of the
655 line. *LINE is not nul-terminated, but may contain zero bytes.
656 *LINE is only valid until the next call of read_line_num.
ffc2c526 657 This function returns bool if a line was read. */
658
659static bool
660read_line_num (fcache *c, size_t line_num,
2e24ac9b 661 char **line, ssize_t *line_len)
ffc2c526 662{
663 gcc_assert (line_num > 0);
664
665 if (line_num <= c->line_num)
fc3eff88 666 {
ffc2c526 667 /* We've been asked to read lines that are before c->line_num.
668 So lets use our line record (if it's not empty) to try to
669 avoid re-reading the file from the beginning again. */
13225ff5 670
ffc2c526 671 if (c->line_record.is_empty ())
5a983084 672 {
ffc2c526 673 c->line_start_idx = 0;
674 c->line_num = 0;
675 }
676 else
677 {
678 fcache::line_info *i = NULL;
679 if (c->total_lines <= fcache_line_record_size)
680 {
681 /* In languages where the input file is not totally
682 preprocessed up front, the c->total_lines hint
683 can be smaller than the number of lines of the
684 file. In that case, only the first
685 c->total_lines have been recorded.
686
687 Otherwise, the first c->total_lines we've read have
688 their start/end recorded here. */
689 i = (line_num <= c->total_lines)
690 ? &c->line_record[line_num - 1]
691 : &c->line_record[c->total_lines - 1];
692 gcc_assert (i->line_num <= line_num);
693 }
694 else
695 {
696 /* So the file had more lines than our line record
697 size. Thus the number of lines we've recorded has
698 been scaled down to fcache_line_reacord_size. Let's
699 pick the start/end of the recorded line that is
700 closest to line_num. */
701 size_t n = (line_num <= c->total_lines)
702 ? line_num * fcache_line_record_size / c->total_lines
703 : c ->line_record.length () - 1;
704 if (n < c->line_record.length ())
705 {
706 i = &c->line_record[n];
707 gcc_assert (i->line_num <= line_num);
708 }
709 }
710
711 if (i && i->line_num == line_num)
712 {
2e24ac9b 713 /* We have the start/end of the line. */
714 *line = c->data + i->start_pos;
715 *line_len = i->end_pos - i->start_pos;
ffc2c526 716 return true;
717 }
718
719 if (i)
720 {
721 c->line_start_idx = i->start_pos;
722 c->line_num = i->line_num - 1;
723 }
724 else
725 {
726 c->line_start_idx = 0;
727 c->line_num = 0;
728 }
5a983084 729 }
5a983084 730 }
ffc2c526 731
732 /* Let's walk from line c->line_num up to line_num - 1, without
733 copying any line. */
734 while (c->line_num < line_num - 1)
735 if (!goto_next_line (c))
736 return false;
737
738 /* The line we want is the next one. Let's read and copy it back to
739 the caller. */
2e24ac9b 740 return get_next_line (c, line, line_len);
5a983084 741}
742
2e24ac9b 743/* Return the physical source line that corresponds to FILE_PATH/LINE.
744 The line is not nul-terminated. The returned pointer is only
745 valid until the next call of location_get_source_line.
746 Note that the line can contain several null characters,
0bce23e1 747 so the returned value's length has the actual length of the line.
748 If the function fails, a NULL char_span is returned. */
5a983084 749
0bce23e1 750char_span
751location_get_source_line (const char *file_path, int line)
5a983084 752{
7b645785 753 char *buffer = NULL;
2e24ac9b 754 ssize_t len;
ffc2c526 755
be812248 756 if (line == 0)
0bce23e1 757 return char_span (NULL, 0);
9e8234d0 758
be812248 759 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
9e8234d0 760 if (c == NULL)
0bce23e1 761 return char_span (NULL, 0);
9e8234d0 762
be812248 763 bool read = read_line_num (c, line, &buffer, &len);
0bce23e1 764 if (!read)
765 return char_span (NULL, 0);
5a983084 766
0bce23e1 767 return char_span (buffer, len);
5a983084 768}
769
fe066ce3 770/* Determine if FILE_PATH missing a trailing newline on its final line.
771 Only valid to call once all of the file has been loaded, by
772 requesting a line number beyond the end of the file. */
773
774bool
775location_missing_trailing_newline (const char *file_path)
776{
777 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
778 if (c == NULL)
779 return false;
780
781 return c->missing_trailing_newline;
782}
783
a4cfdfed 784/* Test if the location originates from the spelling location of a
785 builtin-tokens. That is, return TRUE if LOC is a (possibly
786 virtual) location of a built-in token that appears in the expansion
787 list of a macro. Please note that this function also works on
788 tokens that result from built-in tokens. For instance, the
789 function would return true if passed a token "4" that is the result
790 of the expansion of the built-in __LINE__ macro. */
791bool
be1e7283 792is_location_from_builtin_token (location_t loc)
a4cfdfed 793{
551e34da 794 const line_map_ordinary *map = NULL;
a4cfdfed 795 loc = linemap_resolve_location (line_table, loc,
796 LRK_SPELLING_LOCATION, &map);
797 return loc == BUILTINS_LOCATION;
798}
799
39107655 800/* Expand the source location LOC into a human readable location. If
801 LOC is virtual, it resolves to the expansion point of the involved
802 macro. If LOC resolves to a builtin location, the file name of the
803 readable location is set to the string "<built-in>". */
804
805expanded_location
be1e7283 806expand_location (location_t loc)
39107655 807{
56df12ff 808 return expand_location_1 (loc, /*expansion_point_p=*/true,
809 LOCATION_ASPECT_CARET);
39107655 810}
811
812/* Expand the source location LOC into a human readable location. If
813 LOC is virtual, it resolves to the expansion location of the
814 relevant macro. If LOC resolves to a builtin location, the file
815 name of the readable location is set to the string
816 "<built-in>". */
817
818expanded_location
be1e7283 819expand_location_to_spelling_point (location_t loc,
e2b0b327 820 enum location_aspect aspect)
39107655 821{
e2b0b327 822 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
39107655 823}
824
f0479000 825/* The rich_location class within libcpp requires a way to expand
be1e7283 826 location_t instances, and relies on the client code
f0479000 827 providing a symbol named
828 linemap_client_expand_location_to_spelling_point
829 to do this.
830
831 This is the implementation for libcommon.a (all host binaries),
56df12ff 832 which simply calls into expand_location_1. */
f0479000 833
834expanded_location
be1e7283 835linemap_client_expand_location_to_spelling_point (location_t loc,
56df12ff 836 enum location_aspect aspect)
f0479000 837{
56df12ff 838 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
f0479000 839}
840
841
fb2edec0 842/* If LOCATION is in a system header and if it is a virtual location for
843 a token coming from the expansion of a macro, unwind it to the
844 location of the expansion point of the macro. Otherwise, just return
db30b351 845 LOCATION.
846
847 This is used for instance when we want to emit diagnostics about a
fb2edec0 848 token that may be located in a macro that is itself defined in a
849 system header, for example, for the NULL macro. In such a case, if
850 LOCATION were passed directly to diagnostic functions such as
851 warning_at, the diagnostic would be suppressed (unless
852 -Wsystem-headers). */
db30b351 853
be1e7283 854location_t
855expansion_point_location_if_in_system_header (location_t location)
db30b351 856{
857 if (in_system_header_at (location))
858 location = linemap_resolve_location (line_table, location,
859 LRK_MACRO_EXPANSION_POINT,
860 NULL);
861 return location;
862}
39107655 863
5d4db8ef 864/* If LOCATION is a virtual location for a token coming from the expansion
865 of a macro, unwind to the location of the expansion point of the macro. */
866
be1e7283 867location_t
868expansion_point_location (location_t location)
5d4db8ef 869{
870 return linemap_resolve_location (line_table, location,
871 LRK_MACRO_EXPANSION_POINT, NULL);
872}
873
f17776ff 874/* Construct a location with caret at CARET, ranging from START to
875 finish e.g.
876
877 11111111112
878 12345678901234567890
879 522
880 523 return foo + bar;
881 ~~~~^~~~~
882 524
883
884 The location's caret is at the "+", line 523 column 15, but starts
885 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
886 of "bar" at column 19. */
887
888location_t
889make_location (location_t caret, location_t start, location_t finish)
890{
891 location_t pure_loc = get_pure_location (caret);
892 source_range src_range;
aca2a315 893 src_range.m_start = get_start (start);
894 src_range.m_finish = get_finish (finish);
f17776ff 895 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
896 pure_loc,
897 src_range,
898 NULL);
899 return combined_loc;
900}
901
cb4d9ee2 902/* Same as above, but taking a source range rather than two locations. */
903
904location_t
905make_location (location_t caret, source_range src_range)
906{
907 location_t pure_loc = get_pure_location (caret);
908 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
909}
910
e77b8253 911/* Dump statistics to stderr about the memory usage of the line_table
912 set of line maps. This also displays some statistics about macro
913 expansion. */
914
915void
916dump_line_table_statistics (void)
917{
918 struct linemap_stats s;
2a688977 919 long total_used_map_size,
e77b8253 920 macro_maps_size,
921 total_allocated_map_size;
922
923 memset (&s, 0, sizeof (s));
924
925 linemap_get_statistics (line_table, &s);
926
927 macro_maps_size = s.macro_maps_used_size
928 + s.macro_maps_locations_size;
929
930 total_allocated_map_size = s.ordinary_maps_allocated_size
931 + s.macro_maps_allocated_size
932 + s.macro_maps_locations_size;
933
934 total_used_map_size = s.ordinary_maps_used_size
935 + s.macro_maps_used_size
936 + s.macro_maps_locations_size;
937
2a688977 938 fprintf (stderr, "Number of expanded macros: %5ld\n",
e77b8253 939 s.num_expanded_macros);
940 if (s.num_expanded_macros != 0)
2a688977 941 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
e77b8253 942 s.num_macro_tokens / s.num_expanded_macros);
943 fprintf (stderr,
944 "\nLine Table allocations during the "
7a413494 945 "compilation process\n");
03fac02c 946 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
7a413494 947 SIZE_AMOUNT (s.num_ordinary_maps_used));
03fac02c 948 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
7a413494 949 SIZE_AMOUNT (s.ordinary_maps_used_size));
03fac02c 950 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
7a413494 951 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
03fac02c 952 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
7a413494 953 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
03fac02c 954 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
7a413494 955 SIZE_AMOUNT (s.num_macro_maps_used));
03fac02c 956 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
7a413494 957 SIZE_AMOUNT (s.macro_maps_used_size));
03fac02c 958 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
7a413494 959 SIZE_AMOUNT (s.macro_maps_locations_size));
03fac02c 960 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
7a413494 961 SIZE_AMOUNT (macro_maps_size));
03fac02c 962 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
7a413494 963 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
03fac02c 964 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
7a413494 965 SIZE_AMOUNT (total_allocated_map_size));
03fac02c 966 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
7a413494 967 SIZE_AMOUNT (total_used_map_size));
03fac02c 968 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
7a413494 969 SIZE_AMOUNT (s.adhoc_table_size));
03fac02c 970 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
7a413494 971 SIZE_AMOUNT (s.adhoc_table_entries_used));
03fac02c 972 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
7a413494 973 SIZE_AMOUNT (line_table->num_optimized_ranges));
03fac02c 974 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
7a413494 975 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
0ffb4474 976
e77b8253 977 fprintf (stderr, "\n");
978}
28f17529 979
980/* Get location one beyond the final location in ordinary map IDX. */
981
be1e7283 982static location_t
2e966e2a 983get_end_location (class line_maps *set, unsigned int idx)
28f17529 984{
985 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
986 return set->highest_location;
987
988 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
989 return MAP_START_LOCATION (next_map);
990}
991
992/* Helper function for write_digit_row. */
993
994static void
995write_digit (FILE *stream, int digit)
996{
997 fputc ('0' + (digit % 10), stream);
998}
999
1000/* Helper function for dump_location_info.
1001 Write a row of numbers to STREAM, numbering a source line,
1002 giving the units, tens, hundreds etc of the column number. */
1003
1004static void
1005write_digit_row (FILE *stream, int indent,
a96cefb2 1006 const line_map_ordinary *map,
be1e7283 1007 location_t loc, int max_col, int divisor)
28f17529 1008{
1009 fprintf (stream, "%*c", indent, ' ');
1010 fprintf (stream, "|");
1011 for (int column = 1; column < max_col; column++)
1012 {
be1e7283 1013 location_t column_loc = loc + (column << map->m_range_bits);
28f17529 1014 write_digit (stream, column_loc / divisor);
1015 }
1016 fprintf (stream, "\n");
1017}
1018
1019/* Write a half-closed (START) / half-open (END) interval of
be1e7283 1020 location_t to STREAM. */
28f17529 1021
1022static void
1023dump_location_range (FILE *stream,
be1e7283 1024 location_t start, location_t end)
28f17529 1025{
1026 fprintf (stream,
be1e7283 1027 " location_t interval: %u <= loc < %u\n",
28f17529 1028 start, end);
1029}
1030
1031/* Write a labelled description of a half-closed (START) / half-open (END)
be1e7283 1032 interval of location_t to STREAM. */
28f17529 1033
1034static void
1035dump_labelled_location_range (FILE *stream,
1036 const char *name,
be1e7283 1037 location_t start, location_t end)
28f17529 1038{
1039 fprintf (stream, "%s\n", name);
1040 dump_location_range (stream, start, end);
1041 fprintf (stream, "\n");
1042}
1043
1044/* Write a visualization of the locations in the line_table to STREAM. */
1045
1046void
1047dump_location_info (FILE *stream)
1048{
1049 /* Visualize the reserved locations. */
1050 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1051 0, RESERVED_LOCATION_COUNT);
1052
1053 /* Visualize the ordinary line_map instances, rendering the sources. */
1054 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1055 {
be1e7283 1056 location_t end_location = get_end_location (line_table, idx);
28f17529 1057 /* half-closed: doesn't include this one. */
1058
551e34da 1059 const line_map_ordinary *map
1060 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
28f17529 1061 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1062 dump_location_range (stream,
1063 MAP_START_LOCATION (map), end_location);
1064 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1065 fprintf (stream, " starting at line: %i\n",
1066 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
a96cefb2 1067 fprintf (stream, " column and range bits: %i\n",
1068 map->m_column_and_range_bits);
28f17529 1069 fprintf (stream, " column bits: %i\n",
a96cefb2 1070 map->m_column_and_range_bits - map->m_range_bits);
1071 fprintf (stream, " range bits: %i\n",
1072 map->m_range_bits);
934182c6 1073 const char * reason;
1074 switch (map->reason) {
1075 case LC_ENTER:
1076 reason = "LC_ENTER";
1077 break;
1078 case LC_LEAVE:
1079 reason = "LC_LEAVE";
1080 break;
1081 case LC_RENAME:
1082 reason = "LC_RENAME";
1083 break;
1084 case LC_RENAME_VERBATIM:
1085 reason = "LC_RENAME_VERBATIM";
1086 break;
1087 case LC_ENTER_MACRO:
1088 reason = "LC_RENAME_MACRO";
1089 break;
1090 default:
1091 reason = "Unknown";
1092 }
1093 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1094
1095 const line_map_ordinary *includer_map
1096 = linemap_included_from_linemap (line_table, map);
1097 fprintf (stream, " included from location: %d",
1098 linemap_included_from (map));
1099 if (includer_map) {
1100 fprintf (stream, " (in ordinary map %d)",
1101 int (includer_map - line_table->info_ordinary.maps));
1102 }
1103 fprintf (stream, "\n");
28f17529 1104
1105 /* Render the span of source lines that this "map" covers. */
be1e7283 1106 for (location_t loc = MAP_START_LOCATION (map);
28f17529 1107 loc < end_location;
a96cefb2 1108 loc += (1 << map->m_range_bits) )
28f17529 1109 {
a96cefb2 1110 gcc_assert (pure_location_p (line_table, loc) );
1111
28f17529 1112 expanded_location exploc
1113 = linemap_expand_location (line_table, map, loc);
1114
c9281ef8 1115 if (exploc.column == 0)
28f17529 1116 {
1117 /* Beginning of a new source line: draw the line. */
1118
0bce23e1 1119 char_span line_text = location_get_source_line (exploc.file,
1120 exploc.line);
28f17529 1121 if (!line_text)
1122 break;
1123 fprintf (stream,
1124 "%s:%3i|loc:%5i|%.*s\n",
1125 exploc.file, exploc.line,
1126 loc,
0bce23e1 1127 (int)line_text.length (), line_text.get_buffer ());
28f17529 1128
1129 /* "loc" is at column 0, which means "the whole line".
1130 Render the locations *within* the line, by underlining
be1e7283 1131 it, showing the location_t numeric values
28f17529 1132 at each column. */
0bce23e1 1133 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1134 if (max_col > line_text.length ())
1135 max_col = line_text.length () + 1;
28f17529 1136
934182c6 1137 int len_lnum = num_digits (exploc.line);
1138 if (len_lnum < 3)
1139 len_lnum = 3;
1140 int len_loc = num_digits (loc);
1141 if (len_loc < 5)
1142 len_loc = 5;
1143
1144 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
28f17529 1145
1146 /* Thousands. */
1147 if (end_location > 999)
a96cefb2 1148 write_digit_row (stream, indent, map, loc, max_col, 1000);
28f17529 1149
1150 /* Hundreds. */
1151 if (end_location > 99)
a96cefb2 1152 write_digit_row (stream, indent, map, loc, max_col, 100);
28f17529 1153
1154 /* Tens. */
a96cefb2 1155 write_digit_row (stream, indent, map, loc, max_col, 10);
28f17529 1156
1157 /* Units. */
a96cefb2 1158 write_digit_row (stream, indent, map, loc, max_col, 1);
28f17529 1159 }
1160 }
1161 fprintf (stream, "\n");
1162 }
1163
1164 /* Visualize unallocated values. */
1165 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1166 line_table->highest_location,
1167 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1168
1169 /* Visualize the macro line_map instances, rendering the sources. */
1170 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1171 {
be1e7283 1172 /* Each macro map that is allocated owns location_t values
28f17529 1173 that are *lower* that the one before them.
1174 Hence it's meaningful to view them either in order of ascending
1175 source locations, or in order of ascending macro map index. */
be1e7283 1176 const bool ascending_location_ts = true;
1177 unsigned int idx = (ascending_location_ts
28f17529 1178 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1179 : i);
551e34da 1180 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
28f17529 1181 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1182 idx,
1183 linemap_map_get_macro_name (map),
1184 MACRO_MAP_NUM_MACRO_TOKENS (map));
1185 dump_location_range (stream,
1186 map->start_location,
1187 (map->start_location
1188 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1189 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1190 "expansion point is location %i",
1191 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1192 fprintf (stream, " map->start_location: %u\n",
1193 map->start_location);
1194
1195 fprintf (stream, " macro_locations:\n");
1196 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1197 {
be1e7283 1198 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1199 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
28f17529 1200
1201 /* linemap_add_macro_token encodes token numbers in an expansion
1202 by putting them after MAP_START_LOCATION. */
1203
1204 /* I'm typically seeing 4 uninitialized entries at the end of
1205 0xafafafaf.
1206 This appears to be due to macro.c:replace_args
1207 adding 2 extra args for padding tokens; presumably there may
1208 be a leading and/or trailing padding token injected,
1209 each for 2 more location slots.
be1e7283 1210 This would explain there being up to 4 location_ts slots
28f17529 1211 that may be uninitialized. */
1212
1213 fprintf (stream, " %u: %u, %u\n",
1214 i,
1215 x,
1216 y);
1217 if (x == y)
1218 {
1219 if (x < MAP_START_LOCATION (map))
85b9be9b 1220 inform (x, "token %u has %<x-location == y-location == %u%>",
1221 i, x);
28f17529 1222 else
1223 fprintf (stream,
1224 "x-location == y-location == %u encodes token # %u\n",
1225 x, x - MAP_START_LOCATION (map));
1226 }
1227 else
1228 {
85b9be9b 1229 inform (x, "token %u has %<x-location == %u%>", i, x);
1230 inform (x, "token %u has %<y-location == %u%>", i, y);
28f17529 1231 }
1232 }
1233 fprintf (stream, "\n");
1234 }
1235
be1e7283 1236 /* It appears that MAX_LOCATION_T itself is never assigned to a
28f17529 1237 macro map, presumably due to an off-by-one error somewhere
1238 between the logic in linemap_enter_macro and
1239 LINEMAPS_MACRO_LOWEST_LOCATION. */
be1e7283 1240 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1241 MAX_LOCATION_T,
1242 MAX_LOCATION_T + 1);
28f17529 1243
1244 /* Visualize ad-hoc values. */
1245 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
be1e7283 1246 MAX_LOCATION_T + 1, UINT_MAX);
28f17529 1247}
99b4f3a2 1248
d4166bdc 1249/* string_concat's constructor. */
1250
1251string_concat::string_concat (int num, location_t *locs)
1252 : m_num (num)
1253{
1254 m_locs = ggc_vec_alloc <location_t> (num);
1255 for (int i = 0; i < num; i++)
1256 m_locs[i] = locs[i];
1257}
1258
1259/* string_concat_db's constructor. */
1260
1261string_concat_db::string_concat_db ()
1262{
1263 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1264}
1265
1266/* Record that a string concatenation occurred, covering NUM
1267 string literal tokens. LOCS is an array of size NUM, containing the
1268 locations of the tokens. A copy of LOCS is taken. */
1269
1270void
1271string_concat_db::record_string_concatenation (int num, location_t *locs)
1272{
1273 gcc_assert (num > 1);
1274 gcc_assert (locs);
1275
1276 location_t key_loc = get_key_loc (locs[0]);
1277
1278 string_concat *concat
1279 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1280 m_table->put (key_loc, concat);
1281}
1282
1283/* Determine if LOC was the location of the the initial token of a
1284 concatenation of string literal tokens.
1285 If so, *OUT_NUM is written to with the number of tokens, and
1286 *OUT_LOCS with the location of an array of locations of the
1287 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1288 storage owned by the string_concat_db.
1289 Otherwise, return false. */
1290
1291bool
1292string_concat_db::get_string_concatenation (location_t loc,
1293 int *out_num,
1294 location_t **out_locs)
1295{
1296 gcc_assert (out_num);
1297 gcc_assert (out_locs);
1298
1299 location_t key_loc = get_key_loc (loc);
1300
1301 string_concat **concat = m_table->get (key_loc);
1302 if (!concat)
1303 return false;
1304
1305 *out_num = (*concat)->m_num;
1306 *out_locs =(*concat)->m_locs;
1307 return true;
1308}
1309
1310/* Internal function. Canonicalize LOC into a form suitable for
1311 use as a key within the database, stripping away macro expansion,
1312 ad-hoc information, and range information, using the location of
1313 the start of LOC within an ordinary linemap. */
1314
1315location_t
1316string_concat_db::get_key_loc (location_t loc)
1317{
1318 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1319 NULL);
1320
1321 loc = get_range_from_loc (line_table, loc).m_start;
1322
1323 return loc;
1324}
1325
1326/* Helper class for use within get_substring_ranges_for_loc.
1327 An vec of cpp_string with responsibility for releasing all of the
1328 str->text for each str in the vector. */
1329
1330class auto_cpp_string_vec : public auto_vec <cpp_string>
1331{
1332 public:
1333 auto_cpp_string_vec (int alloc)
1334 : auto_vec <cpp_string> (alloc) {}
1335
1336 ~auto_cpp_string_vec ()
1337 {
1338 /* Clean up the copies within this vec. */
1339 int i;
1340 cpp_string *str;
1341 FOR_EACH_VEC_ELT (*this, i, str)
1342 free (const_cast <unsigned char *> (str->text));
1343 }
1344};
1345
1346/* Attempt to populate RANGES with source location information on the
1347 individual characters within the string literal found at STRLOC.
1348 If CONCATS is non-NULL, then any string literals that the token at
1349 STRLOC was concatenated with are also added to RANGES.
1350
1351 Return NULL if successful, or an error message if any errors occurred (in
1352 which case RANGES may be only partially populated and should not
1353 be used).
1354
1355 This is implemented by re-parsing the relevant source line(s). */
1356
1357static const char *
1358get_substring_ranges_for_loc (cpp_reader *pfile,
1359 string_concat_db *concats,
1360 location_t strloc,
1361 enum cpp_ttype type,
1362 cpp_substring_ranges &ranges)
1363{
1364 gcc_assert (pfile);
1365
1366 if (strloc == UNKNOWN_LOCATION)
1367 return "unknown location";
1368
8df44fbf 1369 /* Reparsing the strings requires accurate location information.
1370 If -ftrack-macro-expansion has been overridden from its default
1371 of 2, then we might have a location of a macro expansion point,
1372 rather than the location of the literal itself.
1373 Avoid this by requiring that we have full macro expansion tracking
1374 for substring locations to be available. */
1375 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1376 return "track_macro_expansion != 2";
1377
a4d96eb7 1378 /* If #line or # 44 "file"-style directives are present, then there's
1379 no guarantee that the line numbers we have can be used to locate
1380 the strings. For example, we might have a .i file with # directives
1381 pointing back to lines within a .c file, but the .c file might
1382 have been edited since the .i file was created.
1383 In such a case, the safest course is to disable on-demand substring
1384 locations. */
1385 if (line_table->seen_line_directive)
1386 return "seen line directive";
1387
d4166bdc 1388 /* If string concatenation has occurred at STRLOC, get the locations
1389 of all of the literal tokens making up the compound string.
1390 Otherwise, just use STRLOC. */
1391 int num_locs = 1;
1392 location_t *strlocs = &strloc;
1393 if (concats)
1394 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1395
1396 auto_cpp_string_vec strs (num_locs);
1397 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1398 for (int i = 0; i < num_locs; i++)
1399 {
1400 /* Get range of strloc. We will use it to locate the start and finish
1401 of the literal token within the line. */
1402 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1403
1404 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
e2b0b327 1405 {
1406 /* If the string token was within a macro expansion, then we can
1407 cope with it for the simple case where we have a single token.
1408 Otherwise, bail out. */
1409 if (src_range.m_start != src_range.m_finish)
1410 return "macro expansion";
1411 }
1412 else
1413 {
1414 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1415 /* If so, we can't reliably determine where the token started within
1416 its line. */
1417 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1418
1419 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1420 /* If so, we can't reliably determine where the token finished
1421 within its line. */
1422 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1423 }
d4166bdc 1424
1425 expanded_location start
e2b0b327 1426 = expand_location_to_spelling_point (src_range.m_start,
1427 LOCATION_ASPECT_START);
d4166bdc 1428 expanded_location finish
e2b0b327 1429 = expand_location_to_spelling_point (src_range.m_finish,
1430 LOCATION_ASPECT_FINISH);
d4166bdc 1431 if (start.file != finish.file)
1432 return "range endpoints are in different files";
1433 if (start.line != finish.line)
1434 return "range endpoints are on different lines";
1435 if (start.column > finish.column)
1436 return "range endpoints are reversed";
1437
0bce23e1 1438 char_span line = location_get_source_line (start.file, start.line);
1439 if (!line)
d4166bdc 1440 return "unable to read source line";
1441
1442 /* Determine the location of the literal (including quotes
1443 and leading prefix chars, such as the 'u' in a u""
1444 token). */
0bce23e1 1445 size_t literal_length = finish.column - start.column + 1;
d4166bdc 1446
44128dbe 1447 /* Ensure that we don't crash if we got the wrong location. */
0bce23e1 1448 if (line.length () < (start.column - 1 + literal_length))
44128dbe 1449 return "line is not wide enough";
1450
0bce23e1 1451 char_span literal = line.subspan (start.column - 1, literal_length);
1452
d4166bdc 1453 cpp_string from;
1454 from.len = literal_length;
1455 /* Make a copy of the literal, to avoid having to rely on
1456 the lifetime of the copy of the line within the cache.
1457 This will be released by the auto_cpp_string_vec dtor. */
0bce23e1 1458 from.text = (unsigned char *)literal.xstrdup ();
d4166bdc 1459 strs.safe_push (from);
1460
1461 /* For very long lines, a new linemap could have started
1462 halfway through the token.
1463 Ensure that the loc_reader uses the linemap of the
1464 *end* of the token for its start location. */
b9436c5b 1465 const line_map_ordinary *start_ord_map;
1466 linemap_resolve_location (line_table, src_range.m_start,
1467 LRK_SPELLING_LOCATION, &start_ord_map);
d4166bdc 1468 const line_map_ordinary *final_ord_map;
1469 linemap_resolve_location (line_table, src_range.m_finish,
b9436c5b 1470 LRK_SPELLING_LOCATION, &final_ord_map);
99069acd 1471 if (start_ord_map == NULL || final_ord_map == NULL)
1472 return "failed to get ordinary maps";
b9436c5b 1473 /* Bulletproofing. We ought to only have different ordinary maps
1474 for start vs finish due to line-length jumps. */
1475 if (start_ord_map != final_ord_map
1476 && start_ord_map->to_file != final_ord_map->to_file)
2df8a4a6 1477 return "start and finish are spelled in different ordinary maps";
1478 /* The file from linemap_resolve_location ought to match that from
1479 expand_location_to_spelling_point. */
1480 if (start_ord_map->to_file != start.file)
1481 return "mismatching file after resolving linemap";
1482
d4166bdc 1483 location_t start_loc
1484 = linemap_position_for_line_and_column (line_table, final_ord_map,
1485 start.line, start.column);
1486
1487 cpp_string_location_reader loc_reader (start_loc, line_table);
1488 loc_readers.safe_push (loc_reader);
1489 }
1490
1491 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1492 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1493 loc_readers.address (),
1494 num_locs, &ranges, type);
1495 if (err)
1496 return err;
1497
1498 /* Success: "ranges" should now contain information on the string. */
1499 return NULL;
1500}
1501
5927e78e 1502/* Attempt to populate *OUT_LOC with source location information on the
1503 given characters within the string literal found at STRLOC.
1504 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1505 character set.
1506
1507 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1508 and string literal "012345\n789"
1509 *OUT_LOC is written to with:
1510 "012345\n789"
1511 ~^~~~~
1512
d4166bdc 1513 If CONCATS is non-NULL, then any string literals that the token at
1514 STRLOC was concatenated with are also considered.
1515
1516 This is implemented by re-parsing the relevant source line(s).
1517
1518 Return NULL if successful, or an error message if any errors occurred.
1519 Error messages are intended for GCC developers (to help debugging) rather
1520 than for end-users. */
1521
1522const char *
be1e7283 1523get_location_within_string (cpp_reader *pfile,
1524 string_concat_db *concats,
1525 location_t strloc,
1526 enum cpp_ttype type,
1527 int caret_idx, int start_idx, int end_idx,
1528 location_t *out_loc)
5927e78e 1529{
1530 gcc_checking_assert (caret_idx >= 0);
d4166bdc 1531 gcc_checking_assert (start_idx >= 0);
1532 gcc_checking_assert (end_idx >= 0);
5927e78e 1533 gcc_assert (out_loc);
d4166bdc 1534
1535 cpp_substring_ranges ranges;
1536 const char *err
1537 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1538 if (err)
1539 return err;
1540
5927e78e 1541 if (caret_idx >= ranges.get_num_ranges ())
1542 return "caret_idx out of range";
d4166bdc 1543 if (start_idx >= ranges.get_num_ranges ())
1544 return "start_idx out of range";
1545 if (end_idx >= ranges.get_num_ranges ())
1546 return "end_idx out of range";
1547
5927e78e 1548 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1549 ranges.get_range (start_idx).m_start,
1550 ranges.get_range (end_idx).m_finish);
1551 return NULL;
1552}
1553
45183e4c 1554#if CHECKING_P
1555
1556namespace selftest {
1557
1558/* Selftests of location handling. */
1559
5927e78e 1560/* Attempt to populate *OUT_RANGE with source location information on the
1561 given character within the string literal found at STRLOC.
1562 CHAR_IDX refers to an offset within the execution character set.
1563 If CONCATS is non-NULL, then any string literals that the token at
1564 STRLOC was concatenated with are also considered.
1565
1566 This is implemented by re-parsing the relevant source line(s).
1567
1568 Return NULL if successful, or an error message if any errors occurred.
1569 Error messages are intended for GCC developers (to help debugging) rather
1570 than for end-users. */
1571
1572static const char *
1573get_source_range_for_char (cpp_reader *pfile,
1574 string_concat_db *concats,
1575 location_t strloc,
1576 enum cpp_ttype type,
1577 int char_idx,
1578 source_range *out_range)
1579{
1580 gcc_checking_assert (char_idx >= 0);
1581 gcc_assert (out_range);
1582
1583 cpp_substring_ranges ranges;
1584 const char *err
1585 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1586 if (err)
1587 return err;
1588
1589 if (char_idx >= ranges.get_num_ranges ())
1590 return "char_idx out of range";
1591
1592 *out_range = ranges.get_range (char_idx);
d4166bdc 1593 return NULL;
1594}
1595
5927e78e 1596/* As get_source_range_for_char, but write to *OUT the number
d4166bdc 1597 of ranges that are available. */
1598
45183e4c 1599static const char *
d4166bdc 1600get_num_source_ranges_for_substring (cpp_reader *pfile,
1601 string_concat_db *concats,
1602 location_t strloc,
1603 enum cpp_ttype type,
1604 int *out)
1605{
1606 gcc_assert (out);
1607
1608 cpp_substring_ranges ranges;
1609 const char *err
1610 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1611
1612 if (err)
1613 return err;
1614
1615 *out = ranges.get_num_ranges ();
1616 return NULL;
1617}
1618
99b4f3a2 1619/* Selftests of location handling. */
1620
d73881b0 1621/* Verify that compare() on linenum_type handles comparisons over the full
1622 range of the type. */
1623
1624static void
1625test_linenum_comparisons ()
1626{
1627 linenum_type min_line (0);
1628 linenum_type max_line (0xffffffff);
1629 ASSERT_EQ (0, compare (min_line, min_line));
1630 ASSERT_EQ (0, compare (max_line, max_line));
1631
1632 ASSERT_GT (compare (max_line, min_line), 0);
1633 ASSERT_LT (compare (min_line, max_line), 0);
1634}
1635
b73690a4 1636/* Helper function for verifying location data: when location_t
1637 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1638 as having column 0. */
1639
1640static bool
1641should_have_column_data_p (location_t loc)
1642{
1643 if (IS_ADHOC_LOC (loc))
1644 loc = get_location_from_adhoc_loc (line_table, loc);
1645 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1646 return false;
1647 return true;
1648}
1649
1650/* Selftest for should_have_column_data_p. */
1651
1652static void
1653test_should_have_column_data_p ()
1654{
1655 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1656 ASSERT_TRUE
1657 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1658 ASSERT_FALSE
1659 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1660}
1661
99b4f3a2 1662/* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1663 on LOC. */
1664
1665static void
1666assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1667 location_t loc)
1668{
1669 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1670 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
b73690a4 1671 /* If location_t values are sufficiently high, then column numbers
1672 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1673 When close to the threshold, column numbers *may* be present: if
1674 the final linemap before the threshold contains a line that straddles
1675 the threshold, locations in that line have column information. */
1676 if (should_have_column_data_p (loc))
1677 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1678}
1679
7ec388ed 1680/* Various selftests involve constructing a line table and one or more
1681 line maps within it.
b73690a4 1682
1683 For maximum test coverage we want to run these tests with a variety
1684 of situations:
1685 - line_table->default_range_bits: some frontends use a non-zero value
1686 and others use zero
1687 - the fallback modes within line-map.c: there are various threshold
be1e7283 1688 values for location_t beyond line-map.c changes
b73690a4 1689 behavior (disabling of the range-packing optimization, disabling
1690 of column-tracking). We can exercise these by starting the line_table
1691 at interesting values at or near these thresholds.
1692
1693 The following struct describes a particular case within our test
1694 matrix. */
1695
251317e4 1696class line_table_case
b73690a4 1697{
251317e4 1698public:
b73690a4 1699 line_table_case (int default_range_bits, int base_location)
1700 : m_default_range_bits (default_range_bits),
1701 m_base_location (base_location)
1702 {}
1703
1704 int m_default_range_bits;
1705 int m_base_location;
1706};
1707
7ec388ed 1708/* Constructor. Store the old value of line_table, and create a new
1709 one, using sane defaults. */
b73690a4 1710
7ec388ed 1711line_table_test::line_table_test ()
b73690a4 1712{
7ec388ed 1713 gcc_assert (saved_line_table == NULL);
1714 saved_line_table = line_table;
1715 line_table = ggc_alloc<line_maps> ();
1716 linemap_init (line_table, BUILTINS_LOCATION);
1717 gcc_assert (saved_line_table->reallocator);
1718 line_table->reallocator = saved_line_table->reallocator;
1719 gcc_assert (saved_line_table->round_alloc_size);
1720 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1721 line_table->default_range_bits = 0;
1722}
b73690a4 1723
1724/* Constructor. Store the old value of line_table, and create a new
1725 one, using the sitation described in CASE_. */
1726
7ec388ed 1727line_table_test::line_table_test (const line_table_case &case_)
b73690a4 1728{
7ec388ed 1729 gcc_assert (saved_line_table == NULL);
1730 saved_line_table = line_table;
b73690a4 1731 line_table = ggc_alloc<line_maps> ();
1732 linemap_init (line_table, BUILTINS_LOCATION);
7ec388ed 1733 gcc_assert (saved_line_table->reallocator);
1734 line_table->reallocator = saved_line_table->reallocator;
1735 gcc_assert (saved_line_table->round_alloc_size);
1736 line_table->round_alloc_size = saved_line_table->round_alloc_size;
b73690a4 1737 line_table->default_range_bits = case_.m_default_range_bits;
1738 if (case_.m_base_location)
1739 {
1740 line_table->highest_location = case_.m_base_location;
1741 line_table->highest_line = case_.m_base_location;
1742 }
1743}
1744
1745/* Destructor. Restore the old value of line_table. */
1746
7ec388ed 1747line_table_test::~line_table_test ()
b73690a4 1748{
7ec388ed 1749 gcc_assert (saved_line_table != NULL);
1750 line_table = saved_line_table;
1751 saved_line_table = NULL;
99b4f3a2 1752}
1753
1754/* Verify basic operation of ordinary linemaps. */
1755
1756static void
b73690a4 1757test_accessing_ordinary_linemaps (const line_table_case &case_)
99b4f3a2 1758{
7ec388ed 1759 line_table_test ltt (case_);
b73690a4 1760
99b4f3a2 1761 /* Build a simple linemap describing some locations. */
1762 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1763
1764 linemap_line_start (line_table, 1, 100);
1765 location_t loc_a = linemap_position_for_column (line_table, 1);
1766 location_t loc_b = linemap_position_for_column (line_table, 23);
1767
1768 linemap_line_start (line_table, 2, 100);
1769 location_t loc_c = linemap_position_for_column (line_table, 1);
1770 location_t loc_d = linemap_position_for_column (line_table, 17);
1771
1772 /* Example of a very long line. */
1773 linemap_line_start (line_table, 3, 2000);
1774 location_t loc_e = linemap_position_for_column (line_table, 700);
1775
732cf036 1776 /* Transitioning back to a short line. */
1777 linemap_line_start (line_table, 4, 0);
1778 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1779
1780 if (should_have_column_data_p (loc_back_to_short))
1781 {
1782 /* Verify that we switched to short lines in the linemap. */
1783 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1784 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1785 }
1786
9348467c 1787 /* Example of a line that will eventually be seen to be longer
1788 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1789 below that. */
1790 linemap_line_start (line_table, 5, 2000);
1791
1792 location_t loc_start_of_very_long_line
1793 = linemap_position_for_column (line_table, 2000);
1794 location_t loc_too_wide
1795 = linemap_position_for_column (line_table, 4097);
1796 location_t loc_too_wide_2
1797 = linemap_position_for_column (line_table, 4098);
1798
1799 /* ...and back to a sane line length. */
1800 linemap_line_start (line_table, 6, 100);
1801 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1802
99b4f3a2 1803 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1804
1805 /* Multiple files. */
1806 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1807 linemap_line_start (line_table, 1, 200);
1808 location_t loc_f = linemap_position_for_column (line_table, 150);
1809 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1810
1811 /* Verify that we can recover the location info. */
1812 assert_loceq ("foo.c", 1, 1, loc_a);
1813 assert_loceq ("foo.c", 1, 23, loc_b);
1814 assert_loceq ("foo.c", 2, 1, loc_c);
1815 assert_loceq ("foo.c", 2, 17, loc_d);
1816 assert_loceq ("foo.c", 3, 700, loc_e);
732cf036 1817 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
9348467c 1818
1819 /* In the very wide line, the initial location should be fully tracked. */
1820 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1821 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1822 be disabled. */
1823 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1824 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1825 /*...and column-tracking should be re-enabled for subsequent lines. */
1826 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1827
99b4f3a2 1828 assert_loceq ("bar.c", 1, 150, loc_f);
1829
1830 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
f17776ff 1831 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1832
1833 /* Verify using make_location to build a range, and extracting data
1834 back from it. */
1835 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1836 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1837 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1838 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1839 ASSERT_EQ (loc_b, src_range.m_start);
1840 ASSERT_EQ (loc_d, src_range.m_finish);
99b4f3a2 1841}
1842
1843/* Verify various properties of UNKNOWN_LOCATION. */
1844
1845static void
1846test_unknown_location ()
1847{
1848 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1849 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1850 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1851}
1852
1853/* Verify various properties of BUILTINS_LOCATION. */
1854
1855static void
1856test_builtins ()
1857{
82e14468 1858 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
99b4f3a2 1859 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1860}
1861
aca2a315 1862/* Regression test for make_location.
1330da90 1863 Ensure that we use pure locations for the start/finish of the range,
1864 rather than storing a packed or ad-hoc range as the start/finish. */
aca2a315 1865
1866static void
1867test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1868{
1869 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1870 with C++ frontend.
1871 ....................0000000001111111111222.
1872 ....................1234567890123456789012. */
1873 const char *content = " r += !aaa == bbb;\n";
1874 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1875 line_table_test ltt (case_);
1876 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1877
1878 const location_t c11 = linemap_position_for_column (line_table, 11);
1879 const location_t c12 = linemap_position_for_column (line_table, 12);
1880 const location_t c13 = linemap_position_for_column (line_table, 13);
1881 const location_t c14 = linemap_position_for_column (line_table, 14);
1882 const location_t c21 = linemap_position_for_column (line_table, 21);
1883
1884 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1885 return;
1886
1887 /* Use column 13 for the caret location, arbitrarily, to verify that we
1888 handle start != caret. */
1889 const location_t aaa = make_location (c13, c12, c14);
1890 ASSERT_EQ (c13, get_pure_location (aaa));
1891 ASSERT_EQ (c12, get_start (aaa));
1892 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1893 ASSERT_EQ (c14, get_finish (aaa));
1894 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1895
1896 /* Make a location using a location with a range as the start-point. */
1897 const location_t not_aaa = make_location (c11, aaa, c14);
1898 ASSERT_EQ (c11, get_pure_location (not_aaa));
1899 /* It should use the start location of the range, not store the range
1900 itself. */
1901 ASSERT_EQ (c12, get_start (not_aaa));
1902 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1903 ASSERT_EQ (c14, get_finish (not_aaa));
1904 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1905
1906 /* Similarly, make a location with a range as the end-point. */
1907 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1908 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1909 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1910 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1911 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1912 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1913 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1914 /* It should use the finish location of the range, not store the range
1915 itself. */
1916 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1917 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1918 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1919 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1920 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1921}
1922
99b4f3a2 1923/* Verify reading of input files (e.g. for caret-based diagnostics). */
1924
1925static void
1926test_reading_source_line ()
1927{
423bd600 1928 /* Create a tempfile and write some text to it. */
b73690a4 1929 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1930 "01234567890123456789\n"
1931 "This is the test text\n"
2e24ac9b 1932 "This is the 3rd line");
423bd600 1933
1934 /* Read back a specific line from the tempfile. */
0bce23e1 1935 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1936 ASSERT_TRUE (source_line);
1937 ASSERT_TRUE (source_line.get_buffer () != NULL);
1938 ASSERT_EQ (20, source_line.length ());
2e24ac9b 1939 ASSERT_TRUE (!strncmp ("This is the 3rd line",
0bce23e1 1940 source_line.get_buffer (), source_line.length ()));
2e24ac9b 1941
0bce23e1 1942 source_line = location_get_source_line (tmp.get_filename (), 2);
1943 ASSERT_TRUE (source_line);
1944 ASSERT_TRUE (source_line.get_buffer () != NULL);
1945 ASSERT_EQ (21, source_line.length ());
2e24ac9b 1946 ASSERT_TRUE (!strncmp ("This is the test text",
0bce23e1 1947 source_line.get_buffer (), source_line.length ()));
423bd600 1948
0bce23e1 1949 source_line = location_get_source_line (tmp.get_filename (), 4);
1950 ASSERT_FALSE (source_line);
1951 ASSERT_TRUE (source_line.get_buffer () == NULL);
99b4f3a2 1952}
1953
b73690a4 1954/* Tests of lexing. */
1955
1956/* Verify that token TOK from PARSER has cpp_token_as_text
1957 equal to EXPECTED_TEXT. */
1958
1959#define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1960 SELFTEST_BEGIN_STMT \
1961 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1962 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1963 SELFTEST_END_STMT
1964
1965/* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1966 and ranges from EXP_START_COL to EXP_FINISH_COL.
1967 Use LOC as the effective location of the selftest. */
1968
1969static void
1970assert_token_loc_eq (const location &loc,
1971 const cpp_token *tok,
1972 const char *exp_filename, int exp_linenum,
1973 int exp_start_col, int exp_finish_col)
1974{
1975 location_t tok_loc = tok->src_loc;
1976 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1977 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1978
1979 /* If location_t values are sufficiently high, then column numbers
1980 will be unavailable. */
1981 if (!should_have_column_data_p (tok_loc))
1982 return;
1983
1984 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1985 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1986 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1987 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1988}
1989
1990/* Use assert_token_loc_eq to verify the TOK->src_loc, using
1991 SELFTEST_LOCATION as the effective location of the selftest. */
1992
1993#define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1994 EXP_START_COL, EXP_FINISH_COL) \
1995 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1996 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1997
1998/* Test of lexing a file using libcpp, verifying tokens and their
1999 location information. */
2000
2001static void
2002test_lexer (const line_table_case &case_)
2003{
2004 /* Create a tempfile and write some text to it. */
2005 const char *content =
2006 /*00000000011111111112222222222333333.3333444444444.455555555556
2007 12345678901234567890123456789012345.6789012345678.901234567890. */
2008 ("test_name /* c-style comment */\n"
2009 " \"test literal\"\n"
2010 " // test c++-style comment\n"
2011 " 42\n");
2012 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2013
7ec388ed 2014 line_table_test ltt (case_);
b73690a4 2015
2016 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2017
2018 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2019 ASSERT_NE (fname, NULL);
2020
2021 /* Verify that we get the expected tokens back, with the correct
2022 location information. */
2023
2024 location_t loc;
2025 const cpp_token *tok;
2026 tok = cpp_get_token_with_location (parser, &loc);
2027 ASSERT_NE (tok, NULL);
2028 ASSERT_EQ (tok->type, CPP_NAME);
2029 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2030 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2031
2032 tok = cpp_get_token_with_location (parser, &loc);
2033 ASSERT_NE (tok, NULL);
2034 ASSERT_EQ (tok->type, CPP_STRING);
2035 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2036 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2037
2038 tok = cpp_get_token_with_location (parser, &loc);
2039 ASSERT_NE (tok, NULL);
2040 ASSERT_EQ (tok->type, CPP_NUMBER);
2041 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2042 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2043
2044 tok = cpp_get_token_with_location (parser, &loc);
2045 ASSERT_NE (tok, NULL);
2046 ASSERT_EQ (tok->type, CPP_EOF);
2047
2048 cpp_finish (parser, NULL);
2049 cpp_destroy (parser);
2050}
2051
d4166bdc 2052/* Forward decls. */
2053
2e966e2a 2054class lexer_test;
d4166bdc 2055class lexer_test_options;
2056
2057/* A class for specifying options of a lexer_test.
2058 The "apply" vfunc is called during the lexer_test constructor. */
2059
2060class lexer_test_options
2061{
2062 public:
2063 virtual void apply (lexer_test &) = 0;
2064};
2065
c6a7d9e9 2066/* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2067 in its dtor.
2068
2069 This is needed by struct lexer_test to ensure that the cleanup of the
2070 cpp_reader happens *after* the cleanup of the temp_source_file. */
2071
2072class cpp_reader_ptr
2073{
2074 public:
2075 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2076
2077 ~cpp_reader_ptr ()
2078 {
2079 cpp_finish (m_ptr, NULL);
2080 cpp_destroy (m_ptr);
2081 }
2082
2083 operator cpp_reader * () const { return m_ptr; }
2084
2085 private:
2086 cpp_reader *m_ptr;
2087};
2088
d4166bdc 2089/* A struct for writing lexer tests. */
2090
251317e4 2091class lexer_test
d4166bdc 2092{
251317e4 2093public:
d4166bdc 2094 lexer_test (const line_table_case &case_, const char *content,
2095 lexer_test_options *options);
2096 ~lexer_test ();
2097
2098 const cpp_token *get_token ();
2099
c6a7d9e9 2100 /* The ordering of these fields matters.
2101 The line_table_test must be first, since the cpp_reader_ptr
2102 uses it.
2103 The cpp_reader must be cleaned up *after* the temp_source_file
2104 since the filenames in input.c's input cache are owned by the
2105 cpp_reader; in particular, when ~temp_source_file evicts the
2106 filename the filenames must still be alive. */
7ec388ed 2107 line_table_test m_ltt;
c6a7d9e9 2108 cpp_reader_ptr m_parser;
2109 temp_source_file m_tempfile;
d4166bdc 2110 string_concat_db m_concats;
0ccd6e7a 2111 bool m_implicitly_expect_EOF;
d4166bdc 2112};
2113
2114/* Use an EBCDIC encoding for the execution charset, specifically
2115 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2116
2117 This exercises iconv integration within libcpp.
2118 Not every build of iconv supports the given charset,
2119 so we need to flag this error and handle it gracefully. */
2120
2121class ebcdic_execution_charset : public lexer_test_options
2122{
2123 public:
2124 ebcdic_execution_charset () : m_num_iconv_errors (0)
2125 {
2126 gcc_assert (s_singleton == NULL);
2127 s_singleton = this;
2128 }
2129 ~ebcdic_execution_charset ()
2130 {
2131 gcc_assert (s_singleton == this);
2132 s_singleton = NULL;
2133 }
2134
2135 void apply (lexer_test &test) FINAL OVERRIDE
2136 {
2137 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2138 cpp_opts->narrow_charset = "IBM1047";
2139
2140 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
fb225cf1 2141 callbacks->diagnostic = on_diagnostic;
d4166bdc 2142 }
2143
fb225cf1 2144 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2145 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2146 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2147 rich_location *richloc ATTRIBUTE_UNUSED,
2148 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
d4166bdc 2149 ATTRIBUTE_FPTR_PRINTF(5,0)
2150 {
2151 gcc_assert (s_singleton);
9a784cf5 2152 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2153 const char *msg = "conversion from %s to %s not supported by iconv";
2154#ifdef ENABLE_NLS
2155 msg = dgettext ("cpplib", msg);
2156#endif
d4166bdc 2157 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2158 when the local iconv build doesn't support the conversion. */
9a784cf5 2159 if (strcmp (msgid, msg) == 0)
d4166bdc 2160 {
2161 s_singleton->m_num_iconv_errors++;
2162 return true;
2163 }
2164
2165 /* Otherwise, we have an unexpected error. */
2166 abort ();
2167 }
2168
2169 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2170
2171 private:
2172 static ebcdic_execution_charset *s_singleton;
2173 int m_num_iconv_errors;
2174};
2175
2176ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2177
fb225cf1 2178/* A lexer_test_options subclass that records a list of diagnostic
0ccd6e7a 2179 messages emitted by the lexer. */
2180
fb225cf1 2181class lexer_diagnostic_sink : public lexer_test_options
0ccd6e7a 2182{
2183 public:
fb225cf1 2184 lexer_diagnostic_sink ()
0ccd6e7a 2185 {
2186 gcc_assert (s_singleton == NULL);
2187 s_singleton = this;
2188 }
fb225cf1 2189 ~lexer_diagnostic_sink ()
0ccd6e7a 2190 {
2191 gcc_assert (s_singleton == this);
2192 s_singleton = NULL;
2193
2194 int i;
2195 char *str;
fb225cf1 2196 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
0ccd6e7a 2197 free (str);
2198 }
2199
2200 void apply (lexer_test &test) FINAL OVERRIDE
2201 {
2202 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
fb225cf1 2203 callbacks->diagnostic = on_diagnostic;
0ccd6e7a 2204 }
2205
fb225cf1 2206 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2207 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2208 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2209 rich_location *richloc ATTRIBUTE_UNUSED,
2210 const char *msgid, va_list *ap)
0ccd6e7a 2211 ATTRIBUTE_FPTR_PRINTF(5,0)
2212 {
2213 char *msg = xvasprintf (msgid, *ap);
fb225cf1 2214 s_singleton->m_diagnostics.safe_push (msg);
0ccd6e7a 2215 return true;
2216 }
2217
fb225cf1 2218 auto_vec<char *> m_diagnostics;
0ccd6e7a 2219
2220 private:
fb225cf1 2221 static lexer_diagnostic_sink *s_singleton;
0ccd6e7a 2222};
2223
fb225cf1 2224lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
0ccd6e7a 2225
d4166bdc 2226/* Constructor. Override line_table with a new instance based on CASE_,
2227 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2228 start parsing the tempfile. */
2229
2230lexer_test::lexer_test (const line_table_case &case_, const char *content,
c6a7d9e9 2231 lexer_test_options *options)
2232: m_ltt (case_),
2233 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
d4166bdc 2234 /* Create a tempfile and write the text to it. */
2235 m_tempfile (SELFTEST_LOCATION, ".c", content),
0ccd6e7a 2236 m_concats (),
2237 m_implicitly_expect_EOF (true)
d4166bdc 2238{
2239 if (options)
2240 options->apply (*this);
2241
2242 cpp_init_iconv (m_parser);
2243
2244 /* Parse the file. */
2245 const char *fname = cpp_read_main_file (m_parser,
2246 m_tempfile.get_filename ());
2247 ASSERT_NE (fname, NULL);
2248}
2249
0ccd6e7a 2250/* Destructor. By default, verify that the next token in m_parser is EOF. */
d4166bdc 2251
2252lexer_test::~lexer_test ()
2253{
2254 location_t loc;
2255 const cpp_token *tok;
2256
0ccd6e7a 2257 if (m_implicitly_expect_EOF)
2258 {
2259 tok = cpp_get_token_with_location (m_parser, &loc);
2260 ASSERT_NE (tok, NULL);
2261 ASSERT_EQ (tok->type, CPP_EOF);
2262 }
d4166bdc 2263}
2264
2265/* Get the next token from m_parser. */
2266
2267const cpp_token *
2268lexer_test::get_token ()
2269{
2270 location_t loc;
2271 const cpp_token *tok;
2272
2273 tok = cpp_get_token_with_location (m_parser, &loc);
2274 ASSERT_NE (tok, NULL);
2275 return tok;
2276}
2277
2278/* Verify that locations within string literals are correctly handled. */
2279
2280/* Verify get_source_range_for_substring for token(s) at STRLOC,
2281 using the string concatenation database for TEST.
2282
2283 Assert that the character at index IDX is on EXPECTED_LINE,
2284 and that it begins at column EXPECTED_START_COL and ends at
2285 EXPECTED_FINISH_COL (unless the locations are beyond
2286 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2287 columns). */
2288
2289static void
2290assert_char_at_range (const location &loc,
2291 lexer_test& test,
2292 location_t strloc, enum cpp_ttype type, int idx,
2293 int expected_line, int expected_start_col,
2294 int expected_finish_col)
2295{
2296 cpp_reader *pfile = test.m_parser;
2297 string_concat_db *concats = &test.m_concats;
2298
be516c70 2299 source_range actual_range = source_range();
d4166bdc 2300 const char *err
5927e78e 2301 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2302 &actual_range);
d4166bdc 2303 if (should_have_column_data_p (strloc))
2304 ASSERT_EQ_AT (loc, NULL, err);
2305 else
2306 {
2307 ASSERT_STREQ_AT (loc,
2308 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2309 err);
2310 return;
2311 }
2312
2313 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2314 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2315 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2316 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2317
2318 if (should_have_column_data_p (actual_range.m_start))
2319 {
2320 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2321 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2322 }
2323 if (should_have_column_data_p (actual_range.m_finish))
2324 {
2325 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2326 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2327 }
2328}
2329
2330/* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2331 the effective location of any errors. */
2332
2333#define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2334 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2335 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2336 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2337 (EXPECTED_FINISH_COL))
2338
2339/* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2340 using the string concatenation database for TEST.
2341
2342 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2343
2344static void
2345assert_num_substring_ranges (const location &loc,
2346 lexer_test& test,
2347 location_t strloc,
2348 enum cpp_ttype type,
2349 int expected_num_ranges)
2350{
2351 cpp_reader *pfile = test.m_parser;
2352 string_concat_db *concats = &test.m_concats;
2353
45183e4c 2354 int actual_num_ranges = -1;
d4166bdc 2355 const char *err
2356 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2357 &actual_num_ranges);
2358 if (should_have_column_data_p (strloc))
2359 ASSERT_EQ_AT (loc, NULL, err);
2360 else
2361 {
2362 ASSERT_STREQ_AT (loc,
2363 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2364 err);
2365 return;
2366 }
2367 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2368}
2369
2370/* Macro for calling assert_num_substring_ranges, supplying
2371 SELFTEST_LOCATION for the effective location of any errors. */
2372
2373#define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2374 EXPECTED_NUM_RANGES) \
2375 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2376 (TYPE), (EXPECTED_NUM_RANGES))
2377
2378
2379/* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2380 returns an error (using the string concatenation database for TEST). */
2381
2382static void
2383assert_has_no_substring_ranges (const location &loc,
2384 lexer_test& test,
2385 location_t strloc,
2386 enum cpp_ttype type,
2387 const char *expected_err)
2388{
2389 cpp_reader *pfile = test.m_parser;
2390 string_concat_db *concats = &test.m_concats;
2391 cpp_substring_ranges ranges;
2392 const char *actual_err
2393 = get_substring_ranges_for_loc (pfile, concats, strloc,
2394 type, ranges);
2395 if (should_have_column_data_p (strloc))
2396 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2397 else
2398 ASSERT_STREQ_AT (loc,
2399 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2400 actual_err);
2401}
2402
2403#define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2404 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2405 (STRLOC), (TYPE), (ERR))
2406
2407/* Lex a simple string literal. Verify the substring location data, before
2408 and after running cpp_interpret_string on it. */
2409
2410static void
2411test_lexer_string_locations_simple (const line_table_case &case_)
2412{
2413 /* Digits 0-9 (with 0 at column 10), the simple way.
2414 ....................000000000.11111111112.2222222223333333333
2415 ....................123456789.01234567890.1234567890123456789
2416 We add a trailing comment to ensure that we correctly locate
2417 the end of the string literal token. */
2418 const char *content = " \"0123456789\" /* not a string */\n";
2419 lexer_test test (case_, content, NULL);
2420
2421 /* Verify that we get the expected token back, with the correct
2422 location information. */
2423 const cpp_token *tok = test.get_token ();
2424 ASSERT_EQ (tok->type, CPP_STRING);
2425 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2426 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2427
2428 /* At this point in lexing, the quote characters are treated as part of
2429 the string (they are stripped off by cpp_interpret_string). */
2430
2431 ASSERT_EQ (tok->val.str.len, 12);
2432
2433 /* Verify that cpp_interpret_string works. */
2434 cpp_string dst_string;
2435 const enum cpp_ttype type = CPP_STRING;
2436 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2437 &dst_string, type);
2438 ASSERT_TRUE (result);
2439 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2440 free (const_cast <unsigned char *> (dst_string.text));
2441
2442 /* Verify ranges of individual characters. This no longer includes the
7413e757 2443 opening quote, but does include the closing quote. */
2444 for (int i = 0; i <= 10; i++)
d4166bdc 2445 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2446 10 + i, 10 + i);
2447
7413e757 2448 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
d4166bdc 2449}
2450
2451/* As test_lexer_string_locations_simple, but use an EBCDIC execution
2452 encoding. */
2453
2454static void
2455test_lexer_string_locations_ebcdic (const line_table_case &case_)
2456{
2457 /* EBCDIC support requires iconv. */
2458 if (!HAVE_ICONV)
2459 return;
2460
2461 /* Digits 0-9 (with 0 at column 10), the simple way.
2462 ....................000000000.11111111112.2222222223333333333
2463 ....................123456789.01234567890.1234567890123456789
2464 We add a trailing comment to ensure that we correctly locate
2465 the end of the string literal token. */
2466 const char *content = " \"0123456789\" /* not a string */\n";
2467 ebcdic_execution_charset use_ebcdic;
2468 lexer_test test (case_, content, &use_ebcdic);
2469
2470 /* Verify that we get the expected token back, with the correct
2471 location information. */
2472 const cpp_token *tok = test.get_token ();
2473 ASSERT_EQ (tok->type, CPP_STRING);
2474 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2475 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2476
2477 /* At this point in lexing, the quote characters are treated as part of
2478 the string (they are stripped off by cpp_interpret_string). */
2479
2480 ASSERT_EQ (tok->val.str.len, 12);
2481
2482 /* The remainder of the test requires an iconv implementation that
2483 can convert from UTF-8 to the EBCDIC encoding requested above. */
2484 if (use_ebcdic.iconv_errors_occurred_p ())
2485 return;
2486
2487 /* Verify that cpp_interpret_string works. */
2488 cpp_string dst_string;
2489 const enum cpp_ttype type = CPP_STRING;
2490 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2491 &dst_string, type);
2492 ASSERT_TRUE (result);
2493 /* We should now have EBCDIC-encoded text, specifically
2494 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2495 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2496 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2497 (const char *)dst_string.text);
2498 free (const_cast <unsigned char *> (dst_string.text));
2499
2500 /* Verify that we don't attempt to record substring location information
2501 for such cases. */
2502 ASSERT_HAS_NO_SUBSTRING_RANGES
2503 (test, tok->src_loc, type,
2504 "execution character set != source character set");
2505}
2506
2507/* Lex a string literal containing a hex-escaped character.
2508 Verify the substring location data, before and after running
2509 cpp_interpret_string on it. */
2510
2511static void
2512test_lexer_string_locations_hex (const line_table_case &case_)
2513{
2514 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2515 and with a space in place of digit 6, to terminate the escaped
2516 hex code.
2517 ....................000000000.111111.11112222.
2518 ....................123456789.012345.67890123. */
2519 const char *content = " \"01234\\x35 789\"\n";
2520 lexer_test test (case_, content, NULL);
2521
2522 /* Verify that we get the expected token back, with the correct
2523 location information. */
2524 const cpp_token *tok = test.get_token ();
2525 ASSERT_EQ (tok->type, CPP_STRING);
2526 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2527 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2528
2529 /* At this point in lexing, the quote characters are treated as part of
2530 the string (they are stripped off by cpp_interpret_string). */
2531 ASSERT_EQ (tok->val.str.len, 15);
2532
2533 /* Verify that cpp_interpret_string works. */
2534 cpp_string dst_string;
2535 const enum cpp_ttype type = CPP_STRING;
2536 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2537 &dst_string, type);
2538 ASSERT_TRUE (result);
2539 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2540 free (const_cast <unsigned char *> (dst_string.text));
2541
2542 /* Verify ranges of individual characters. This no longer includes the
7413e757 2543 opening quote, but does include the closing quote. */
d4166bdc 2544 for (int i = 0; i <= 4; i++)
2545 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2546 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
7413e757 2547 for (int i = 6; i <= 10; i++)
d4166bdc 2548 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2549
7413e757 2550 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
d4166bdc 2551}
2552
2553/* Lex a string literal containing an octal-escaped character.
2554 Verify the substring location data after running cpp_interpret_string
2555 on it. */
2556
2557static void
2558test_lexer_string_locations_oct (const line_table_case &case_)
2559{
2560 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2561 and with a space in place of digit 6, to terminate the escaped
2562 octal code.
2563 ....................000000000.111111.11112222.2222223333333333444
2564 ....................123456789.012345.67890123.4567890123456789012 */
2565 const char *content = " \"01234\\065 789\" /* not a string */\n";
2566 lexer_test test (case_, content, NULL);
2567
2568 /* Verify that we get the expected token back, with the correct
2569 location information. */
2570 const cpp_token *tok = test.get_token ();
2571 ASSERT_EQ (tok->type, CPP_STRING);
2572 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2573
2574 /* Verify that cpp_interpret_string works. */
2575 cpp_string dst_string;
2576 const enum cpp_ttype type = CPP_STRING;
2577 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2578 &dst_string, type);
2579 ASSERT_TRUE (result);
2580 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2581 free (const_cast <unsigned char *> (dst_string.text));
2582
2583 /* Verify ranges of individual characters. This no longer includes the
7413e757 2584 opening quote, but does include the closing quote. */
d4166bdc 2585 for (int i = 0; i < 5; i++)
2586 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2587 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
7413e757 2588 for (int i = 6; i <= 10; i++)
d4166bdc 2589 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2590
7413e757 2591 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
d4166bdc 2592}
2593
2594/* Test of string literal containing letter escapes. */
2595
2596static void
2597test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2598{
2599 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2600 .....................000000000.1.11111.1.1.11222.22222223333333
2601 .....................123456789.0.12345.6.7.89012.34567890123456. */
2602 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2603 lexer_test test (case_, content, NULL);
2604
2605 /* Verify that we get the expected tokens back. */
2606 const cpp_token *tok = test.get_token ();
2607 ASSERT_EQ (tok->type, CPP_STRING);
2608 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2609
2610 /* Verify ranges of individual characters. */
2611 /* "\t". */
2612 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2613 0, 1, 10, 11);
2614 /* "foo". */
2615 for (int i = 1; i <= 3; i++)
2616 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2617 i, 1, 11 + i, 11 + i);
2618 /* "\\" and "\n". */
2619 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2620 4, 1, 15, 16);
2621 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2622 5, 1, 17, 18);
2623
7413e757 2624 /* "bar" and closing quote for nul-terminator. */
2625 for (int i = 6; i <= 9; i++)
d4166bdc 2626 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2627 i, 1, 13 + i, 13 + i);
2628
7413e757 2629 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
d4166bdc 2630}
2631
2632/* Another test of a string literal containing a letter escape.
2633 Based on string seen in
2634 printf ("%-%\n");
2635 in gcc.dg/format/c90-printf-1.c. */
2636
2637static void
2638test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2639{
2640 /* .....................000000000.1111.11.1111.22222222223.
2641 .....................123456789.0123.45.6789.01234567890. */
2642 const char *content = (" \"%-%\\n\" /* non-str */\n");
2643 lexer_test test (case_, content, NULL);
2644
2645 /* Verify that we get the expected tokens back. */
2646 const cpp_token *tok = test.get_token ();
2647 ASSERT_EQ (tok->type, CPP_STRING);
2648 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2649
2650 /* Verify ranges of individual characters. */
2651 /* "%-%". */
2652 for (int i = 0; i < 3; i++)
2653 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2654 i, 1, 10 + i, 10 + i);
2655 /* "\n". */
2656 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2657 3, 1, 13, 14);
2658
7413e757 2659 /* Closing quote for nul-terminator. */
2660 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2661 4, 1, 15, 15);
2662
2663 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
d4166bdc 2664}
2665
2666/* Lex a string literal containing UCN 4 characters.
2667 Verify the substring location data after running cpp_interpret_string
2668 on it. */
2669
2670static void
2671test_lexer_string_locations_ucn4 (const line_table_case &case_)
2672{
2673 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2674 as UCN 4.
2675 ....................000000000.111111.111122.222222223.33333333344444
2676 ....................123456789.012345.678901.234567890.12345678901234 */
2677 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2678 lexer_test test (case_, content, NULL);
2679
2680 /* Verify that we get the expected token back, with the correct
2681 location information. */
2682 const cpp_token *tok = test.get_token ();
2683 ASSERT_EQ (tok->type, CPP_STRING);
2684 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2685
2686 /* Verify that cpp_interpret_string works.
2687 The string should be encoded in the execution character
2688 set. Assuming that that is UTF-8, we should have the following:
2689 ----------- ---- ----- ------- ----------------
2690 Byte offset Byte Octal Unicode Source Column(s)
2691 ----------- ---- ----- ------- ----------------
2692 0 0x30 '0' 10
2693 1 0x31 '1' 11
2694 2 0x32 '2' 12
2695 3 0x33 '3' 13
2696 4 0x34 '4' 14
2697 5 0xE2 \342 U+2174 15-20
2698 6 0x85 \205 (cont) 15-20
2699 7 0xB4 \264 (cont) 15-20
2700 8 0xE2 \342 U+2175 21-26
2701 9 0x85 \205 (cont) 21-26
2702 10 0xB5 \265 (cont) 21-26
2703 11 0x37 '7' 27
2704 12 0x38 '8' 28
2705 13 0x39 '9' 29
7413e757 2706 14 0x00 30 (closing quote)
d4166bdc 2707 ----------- ---- ----- ------- ---------------. */
2708
2709 cpp_string dst_string;
2710 const enum cpp_ttype type = CPP_STRING;
2711 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2712 &dst_string, type);
2713 ASSERT_TRUE (result);
2714 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2715 (const char *)dst_string.text);
2716 free (const_cast <unsigned char *> (dst_string.text));
2717
2718 /* Verify ranges of individual characters. This no longer includes the
7413e757 2719 opening quote, but does include the closing quote.
d4166bdc 2720 '01234'. */
2721 for (int i = 0; i <= 4; i++)
2722 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2723 /* U+2174. */
2724 for (int i = 5; i <= 7; i++)
2725 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2726 /* U+2175. */
2727 for (int i = 8; i <= 10; i++)
2728 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
7413e757 2729 /* '789' and nul terminator */
2730 for (int i = 11; i <= 14; i++)
d4166bdc 2731 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2732
7413e757 2733 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
d4166bdc 2734}
2735
2736/* Lex a string literal containing UCN 8 characters.
2737 Verify the substring location data after running cpp_interpret_string
2738 on it. */
2739
2740static void
2741test_lexer_string_locations_ucn8 (const line_table_case &case_)
2742{
2743 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2744 ....................000000000.111111.1111222222.2222333333333.344444
2745 ....................123456789.012345.6789012345.6789012345678.901234 */
2746 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2747 lexer_test test (case_, content, NULL);
2748
2749 /* Verify that we get the expected token back, with the correct
2750 location information. */
2751 const cpp_token *tok = test.get_token ();
2752 ASSERT_EQ (tok->type, CPP_STRING);
2753 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2754 "\"01234\\U00002174\\U00002175789\"");
2755
2756 /* Verify that cpp_interpret_string works.
2757 The UTF-8 encoding of the string is identical to that from
2758 the ucn4 testcase above; the only difference is the column
2759 locations. */
2760 cpp_string dst_string;
2761 const enum cpp_ttype type = CPP_STRING;
2762 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2763 &dst_string, type);
2764 ASSERT_TRUE (result);
2765 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2766 (const char *)dst_string.text);
2767 free (const_cast <unsigned char *> (dst_string.text));
2768
2769 /* Verify ranges of individual characters. This no longer includes the
7413e757 2770 opening quote, but does include the closing quote.
d4166bdc 2771 '01234'. */
2772 for (int i = 0; i <= 4; i++)
2773 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2774 /* U+2174. */
2775 for (int i = 5; i <= 7; i++)
2776 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2777 /* U+2175. */
2778 for (int i = 8; i <= 10; i++)
2779 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2780 /* '789' at columns 35-37 */
2781 for (int i = 11; i <= 13; i++)
2782 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
7413e757 2783 /* Closing quote/nul-terminator at column 38. */
2784 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
d4166bdc 2785
7413e757 2786 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
d4166bdc 2787}
2788
2789/* Fetch a big-endian 32-bit value and convert to host endianness. */
2790
2791static uint32_t
2792uint32_from_big_endian (const uint32_t *ptr_be_value)
2793{
2794 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2795 return (((uint32_t) buf[0] << 24)
2796 | ((uint32_t) buf[1] << 16)
2797 | ((uint32_t) buf[2] << 8)
2798 | (uint32_t) buf[3]);
2799}
2800
2801/* Lex a wide string literal and verify that attempts to read substring
2802 location data from it fail gracefully. */
2803
2804static void
2805test_lexer_string_locations_wide_string (const line_table_case &case_)
2806{
2807 /* Digits 0-9.
2808 ....................000000000.11111111112.22222222233333
2809 ....................123456789.01234567890.12345678901234 */
2810 const char *content = " L\"0123456789\" /* non-str */\n";
2811 lexer_test test (case_, content, NULL);
2812
2813 /* Verify that we get the expected token back, with the correct
2814 location information. */
2815 const cpp_token *tok = test.get_token ();
2816 ASSERT_EQ (tok->type, CPP_WSTRING);
2817 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2818
2819 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2820 cpp_string dst_string;
2821 const enum cpp_ttype type = CPP_WSTRING;
2822 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2823 &dst_string, type);
2824 ASSERT_TRUE (result);
2825 /* The cpp_reader defaults to big-endian with
2826 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2827 now be encoded as UTF-32BE. */
2828 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2829 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2830 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2831 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2832 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2833 free (const_cast <unsigned char *> (dst_string.text));
2834
2835 /* We don't yet support generating substring location information
2836 for L"" strings. */
2837 ASSERT_HAS_NO_SUBSTRING_RANGES
2838 (test, tok->src_loc, type,
2839 "execution character set != source character set");
2840}
2841
2842/* Fetch a big-endian 16-bit value and convert to host endianness. */
2843
2844static uint16_t
2845uint16_from_big_endian (const uint16_t *ptr_be_value)
2846{
2847 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2848 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2849}
2850
2851/* Lex a u"" string literal and verify that attempts to read substring
2852 location data from it fail gracefully. */
2853
2854static void
2855test_lexer_string_locations_string16 (const line_table_case &case_)
2856{
2857 /* Digits 0-9.
2858 ....................000000000.11111111112.22222222233333
2859 ....................123456789.01234567890.12345678901234 */
2860 const char *content = " u\"0123456789\" /* non-str */\n";
2861 lexer_test test (case_, content, NULL);
2862
2863 /* Verify that we get the expected token back, with the correct
2864 location information. */
2865 const cpp_token *tok = test.get_token ();
2866 ASSERT_EQ (tok->type, CPP_STRING16);
2867 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2868
2869 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2870 cpp_string dst_string;
2871 const enum cpp_ttype type = CPP_STRING16;
2872 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2873 &dst_string, type);
2874 ASSERT_TRUE (result);
2875
2876 /* The cpp_reader defaults to big-endian, so dst_string should
2877 now be encoded as UTF-16BE. */
2878 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2879 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2880 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2881 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2882 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2883 free (const_cast <unsigned char *> (dst_string.text));
2884
2885 /* We don't yet support generating substring location information
2886 for L"" strings. */
2887 ASSERT_HAS_NO_SUBSTRING_RANGES
2888 (test, tok->src_loc, type,
2889 "execution character set != source character set");
2890}
2891
2892/* Lex a U"" string literal and verify that attempts to read substring
2893 location data from it fail gracefully. */
2894
2895static void
2896test_lexer_string_locations_string32 (const line_table_case &case_)
2897{
2898 /* Digits 0-9.
2899 ....................000000000.11111111112.22222222233333
2900 ....................123456789.01234567890.12345678901234 */
2901 const char *content = " U\"0123456789\" /* non-str */\n";
2902 lexer_test test (case_, content, NULL);
2903
2904 /* Verify that we get the expected token back, with the correct
2905 location information. */
2906 const cpp_token *tok = test.get_token ();
2907 ASSERT_EQ (tok->type, CPP_STRING32);
2908 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2909
2910 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2911 cpp_string dst_string;
2912 const enum cpp_ttype type = CPP_STRING32;
2913 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2914 &dst_string, type);
2915 ASSERT_TRUE (result);
2916
2917 /* The cpp_reader defaults to big-endian, so dst_string should
2918 now be encoded as UTF-32BE. */
2919 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2920 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2921 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2922 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2923 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2924 free (const_cast <unsigned char *> (dst_string.text));
2925
2926 /* We don't yet support generating substring location information
2927 for L"" strings. */
2928 ASSERT_HAS_NO_SUBSTRING_RANGES
2929 (test, tok->src_loc, type,
2930 "execution character set != source character set");
2931}
2932
2933/* Lex a u8-string literal.
2934 Verify the substring location data after running cpp_interpret_string
2935 on it. */
2936
2937static void
2938test_lexer_string_locations_u8 (const line_table_case &case_)
2939{
2940 /* Digits 0-9.
2941 ....................000000000.11111111112.22222222233333
2942 ....................123456789.01234567890.12345678901234 */
2943 const char *content = " u8\"0123456789\" /* non-str */\n";
2944 lexer_test test (case_, content, NULL);
2945
2946 /* Verify that we get the expected token back, with the correct
2947 location information. */
2948 const cpp_token *tok = test.get_token ();
2949 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2950 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2951
2952 /* Verify that cpp_interpret_string works. */
2953 cpp_string dst_string;
2954 const enum cpp_ttype type = CPP_STRING;
2955 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2956 &dst_string, type);
2957 ASSERT_TRUE (result);
2958 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2959 free (const_cast <unsigned char *> (dst_string.text));
2960
2961 /* Verify ranges of individual characters. This no longer includes the
7413e757 2962 opening quote, but does include the closing quote. */
2963 for (int i = 0; i <= 10; i++)
d4166bdc 2964 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2965}
2966
2967/* Lex a string literal containing UTF-8 source characters.
2968 Verify the substring location data after running cpp_interpret_string
2969 on it. */
2970
2971static void
2972test_lexer_string_locations_utf8_source (const line_table_case &case_)
2973{
2974 /* This string literal is written out to the source file as UTF-8,
2975 and is of the form "before mojibake after", where "mojibake"
2976 is written as the following four unicode code points:
2977 U+6587 CJK UNIFIED IDEOGRAPH-6587
2978 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2979 U+5316 CJK UNIFIED IDEOGRAPH-5316
2980 U+3051 HIRAGANA LETTER KE.
2981 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2982 "before" and "after" are 1 byte per unicode character.
2983
2984 The numbering shown are "columns", which are *byte* numbers within
2985 the line, rather than unicode character numbers.
2986
2987 .................... 000000000.1111111.
2988 .................... 123456789.0123456. */
2989 const char *content = (" \"before "
2990 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2991 UTF-8: 0xE6 0x96 0x87
2992 C octal escaped UTF-8: \346\226\207
2993 "column" numbers: 17-19. */
2994 "\346\226\207"
2995
2996 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2997 UTF-8: 0xE5 0xAD 0x97
2998 C octal escaped UTF-8: \345\255\227
2999 "column" numbers: 20-22. */
3000 "\345\255\227"
3001
3002 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3003 UTF-8: 0xE5 0x8C 0x96
3004 C octal escaped UTF-8: \345\214\226
3005 "column" numbers: 23-25. */
3006 "\345\214\226"
3007
3008 /* U+3051 HIRAGANA LETTER KE
3009 UTF-8: 0xE3 0x81 0x91
3010 C octal escaped UTF-8: \343\201\221
3011 "column" numbers: 26-28. */
3012 "\343\201\221"
3013
3014 /* column numbers 29 onwards
3015 2333333.33334444444444
3016 9012345.67890123456789. */
3017 " after\" /* non-str */\n");
3018 lexer_test test (case_, content, NULL);
3019
3020 /* Verify that we get the expected token back, with the correct
3021 location information. */
3022 const cpp_token *tok = test.get_token ();
3023 ASSERT_EQ (tok->type, CPP_STRING);
3024 ASSERT_TOKEN_AS_TEXT_EQ
3025 (test.m_parser, tok,
3026 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3027
3028 /* Verify that cpp_interpret_string works. */
3029 cpp_string dst_string;
3030 const enum cpp_ttype type = CPP_STRING;
3031 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3032 &dst_string, type);
3033 ASSERT_TRUE (result);
3034 ASSERT_STREQ
3035 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3036 (const char *)dst_string.text);
3037 free (const_cast <unsigned char *> (dst_string.text));
3038
3039 /* Verify ranges of individual characters. This no longer includes the
7413e757 3040 opening quote, but does include the closing quote.
d4166bdc 3041 Assuming that both source and execution encodings are UTF-8, we have
7413e757 3042 a run of 25 octets in each, plus the NUL terminator. */
d4166bdc 3043 for (int i = 0; i < 25; i++)
3044 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
7413e757 3045 /* NUL-terminator should use the closing quote at column 35. */
3046 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
d4166bdc 3047
7413e757 3048 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
d4166bdc 3049}
3050
3051/* Test of string literal concatenation. */
3052
3053static void
3054test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3055{
3056 /* Digits 0-9.
3057 .....................000000000.111111.11112222222222
3058 .....................123456789.012345.67890123456789. */
3059 const char *content = (" \"01234\" /* non-str */\n"
3060 " \"56789\" /* non-str */\n");
3061 lexer_test test (case_, content, NULL);
3062
3063 location_t input_locs[2];
3064
3065 /* Verify that we get the expected tokens back. */
3066 auto_vec <cpp_string> input_strings;
3067 const cpp_token *tok_a = test.get_token ();
3068 ASSERT_EQ (tok_a->type, CPP_STRING);
3069 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3070 input_strings.safe_push (tok_a->val.str);
3071 input_locs[0] = tok_a->src_loc;
3072
3073 const cpp_token *tok_b = test.get_token ();
3074 ASSERT_EQ (tok_b->type, CPP_STRING);
3075 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3076 input_strings.safe_push (tok_b->val.str);
3077 input_locs[1] = tok_b->src_loc;
3078
3079 /* Verify that cpp_interpret_string works. */
3080 cpp_string dst_string;
3081 const enum cpp_ttype type = CPP_STRING;
3082 bool result = cpp_interpret_string (test.m_parser,
3083 input_strings.address (), 2,
3084 &dst_string, type);
3085 ASSERT_TRUE (result);
3086 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3087 free (const_cast <unsigned char *> (dst_string.text));
3088
3089 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3090 test.m_concats.record_string_concatenation (2, input_locs);
3091
3092 location_t initial_loc = input_locs[0];
3093
7413e757 3094 /* "01234" on line 1. */
d4166bdc 3095 for (int i = 0; i <= 4; i++)
3096 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
7413e757 3097 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3098 for (int i = 5; i <= 10; i++)
d4166bdc 3099 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3100
7413e757 3101 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
d4166bdc 3102}
3103
3104/* Another test of string literal concatenation. */
3105
3106static void
3107test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3108{
3109 /* Digits 0-9.
3110 .....................000000000.111.11111112222222
3111 .....................123456789.012.34567890123456. */
3112 const char *content = (" \"01\" /* non-str */\n"
3113 " \"23\" /* non-str */\n"
3114 " \"45\" /* non-str */\n"
3115 " \"67\" /* non-str */\n"
3116 " \"89\" /* non-str */\n");
3117 lexer_test test (case_, content, NULL);
3118
3119 auto_vec <cpp_string> input_strings;
3120 location_t input_locs[5];
3121
3122 /* Verify that we get the expected tokens back. */
3123 for (int i = 0; i < 5; i++)
3124 {
3125 const cpp_token *tok = test.get_token ();
3126 ASSERT_EQ (tok->type, CPP_STRING);
3127 input_strings.safe_push (tok->val.str);
3128 input_locs[i] = tok->src_loc;
3129 }
3130
3131 /* Verify that cpp_interpret_string works. */
3132 cpp_string dst_string;
3133 const enum cpp_ttype type = CPP_STRING;
3134 bool result = cpp_interpret_string (test.m_parser,
3135 input_strings.address (), 5,
3136 &dst_string, type);
3137 ASSERT_TRUE (result);
3138 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3139 free (const_cast <unsigned char *> (dst_string.text));
3140
3141 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3142 test.m_concats.record_string_concatenation (5, input_locs);
3143
3144 location_t initial_loc = input_locs[0];
3145
3146 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3147 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3148 and expect get_source_range_for_substring to fail.
3149 However, for a string concatenation test, we can have a case
3150 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3151 but subsequent strings can be after it.
3152 Attempting to detect this within assert_char_at_range
3153 would overcomplicate the logic for the common test cases, so
3154 we detect it here. */
3155 if (should_have_column_data_p (input_locs[0])
3156 && !should_have_column_data_p (input_locs[4]))
3157 {
3158 /* Verify that get_source_range_for_substring gracefully rejects
3159 this case. */
3160 source_range actual_range;
3161 const char *err
5927e78e 3162 = get_source_range_for_char (test.m_parser, &test.m_concats,
3163 initial_loc, type, 0, &actual_range);
d4166bdc 3164 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3165 return;
3166 }
3167
3168 for (int i = 0; i < 5; i++)
3169 for (int j = 0; j < 2; j++)
3170 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3171 i + 1, 10 + j, 10 + j);
3172
7413e757 3173 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3174 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3175
3176 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
d4166bdc 3177}
3178
3179/* Another test of string literal concatenation, this time combined with
3180 various kinds of escaped characters. */
3181
3182static void
3183test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3184{
3185 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3186 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3187 const char *content
3188 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3189 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3190 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3191 lexer_test test (case_, content, NULL);
3192
3193 auto_vec <cpp_string> input_strings;
3194 location_t input_locs[4];
3195
3196 /* Verify that we get the expected tokens back. */
3197 for (int i = 0; i < 4; i++)
3198 {
3199 const cpp_token *tok = test.get_token ();
3200 ASSERT_EQ (tok->type, CPP_STRING);
3201 input_strings.safe_push (tok->val.str);
3202 input_locs[i] = tok->src_loc;
3203 }
3204
3205 /* Verify that cpp_interpret_string works. */
3206 cpp_string dst_string;
3207 const enum cpp_ttype type = CPP_STRING;
3208 bool result = cpp_interpret_string (test.m_parser,
3209 input_strings.address (), 4,
3210 &dst_string, type);
3211 ASSERT_TRUE (result);
3212 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3213 free (const_cast <unsigned char *> (dst_string.text));
3214
3215 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3216 test.m_concats.record_string_concatenation (4, input_locs);
3217
3218 location_t initial_loc = input_locs[0];
3219
3220 for (int i = 0; i <= 4; i++)
3221 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3222 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3223 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3224 for (int i = 7; i <= 9; i++)
3225 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3226
7413e757 3227 /* NUL-terminator should use the location of the final closing quote. */
3228 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3229
3230 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
d4166bdc 3231}
3232
3233/* Test of string literal in a macro. */
3234
3235static void
3236test_lexer_string_locations_macro (const line_table_case &case_)
3237{
3238 /* Digits 0-9.
3239 .....................0000000001111111111.22222222223.
3240 .....................1234567890123456789.01234567890. */
3241 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3242 " MACRO");
3243 lexer_test test (case_, content, NULL);
3244
3245 /* Verify that we get the expected tokens back. */
3246 const cpp_token *tok = test.get_token ();
3247 ASSERT_EQ (tok->type, CPP_PADDING);
3248
3249 tok = test.get_token ();
3250 ASSERT_EQ (tok->type, CPP_STRING);
3251 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3252
3253 /* Verify ranges of individual characters. We ought to
3254 see columns within the macro definition. */
7413e757 3255 for (int i = 0; i <= 10; i++)
d4166bdc 3256 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3257 i, 1, 20 + i, 20 + i);
3258
7413e757 3259 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
d4166bdc 3260
3261 tok = test.get_token ();
3262 ASSERT_EQ (tok->type, CPP_PADDING);
3263}
3264
3265/* Test of stringification of a macro argument. */
3266
3267static void
3268test_lexer_string_locations_stringified_macro_argument
3269 (const line_table_case &case_)
3270{
3271 /* .....................000000000111111111122222222223.
3272 .....................123456789012345678901234567890. */
3273 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3274 "MACRO(foo)\n");
3275 lexer_test test (case_, content, NULL);
3276
3277 /* Verify that we get the expected token back. */
3278 const cpp_token *tok = test.get_token ();
3279 ASSERT_EQ (tok->type, CPP_PADDING);
3280
3281 tok = test.get_token ();
3282 ASSERT_EQ (tok->type, CPP_STRING);
3283 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3284
3285 /* We don't support getting the location of a stringified macro
3286 argument. Verify that it fails gracefully. */
3287 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3288 "cpp_interpret_string_1 failed");
3289
3290 tok = test.get_token ();
3291 ASSERT_EQ (tok->type, CPP_PADDING);
3292
3293 tok = test.get_token ();
3294 ASSERT_EQ (tok->type, CPP_PADDING);
3295}
3296
3297/* Ensure that we are fail gracefully if something attempts to pass
3298 in a location that isn't a string literal token. Seen on this code:
3299
3300 const char a[] = " %d ";
3301 __builtin_printf (a, 0.5);
3302 ^
3303
3304 when c-format.c erroneously used the indicated one-character
3305 location as the format string location, leading to a read past the
3306 end of a string buffer in cpp_interpret_string_1. */
3307
3308static void
3309test_lexer_string_locations_non_string (const line_table_case &case_)
3310{
3311 /* .....................000000000111111111122222222223.
3312 .....................123456789012345678901234567890. */
3313 const char *content = (" a\n");
3314 lexer_test test (case_, content, NULL);
3315
3316 /* Verify that we get the expected token back. */
3317 const cpp_token *tok = test.get_token ();
3318 ASSERT_EQ (tok->type, CPP_NAME);
3319 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3320
3321 /* At this point, libcpp is attempting to interpret the name as a
3322 string literal, despite it not starting with a quote. We don't detect
3323 that, but we should at least fail gracefully. */
3324 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3325 "cpp_interpret_string_1 failed");
3326}
3327
3328/* Ensure that we can read substring information for a token which
3329 starts in one linemap and ends in another . Adapted from
3330 gcc.dg/cpp/pr69985.c. */
3331
3332static void
3333test_lexer_string_locations_long_line (const line_table_case &case_)
3334{
3335 /* .....................000000.000111111111
3336 .....................123456.789012346789. */
3337 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3338 " \"0123456789012345678901234567890123456789"
3339 "0123456789012345678901234567890123456789"
3340 "0123456789012345678901234567890123456789"
3341 "0123456789\"\n");
3342
3343 lexer_test test (case_, content, NULL);
3344
3345 /* Verify that we get the expected token back. */
3346 const cpp_token *tok = test.get_token ();
3347 ASSERT_EQ (tok->type, CPP_STRING);
3348
3349 if (!should_have_column_data_p (line_table->highest_location))
3350 return;
3351
3352 /* Verify ranges of individual characters. */
7413e757 3353 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3354 for (int i = 0; i < 131; i++)
d4166bdc 3355 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3356 i, 2, 7 + i, 7 + i);
3357}
3358
f9f26759 3359/* Test of locations within a raw string that doesn't contain a newline. */
3360
3361static void
3362test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3363{
3364 /* .....................00.0000000111111111122.
3365 .....................12.3456789012345678901. */
3366 const char *content = ("R\"foo(0123456789)foo\"\n");
3367 lexer_test test (case_, content, NULL);
3368
3369 /* Verify that we get the expected token back. */
3370 const cpp_token *tok = test.get_token ();
3371 ASSERT_EQ (tok->type, CPP_STRING);
3372
3373 /* Verify that cpp_interpret_string works. */
3374 cpp_string dst_string;
3375 const enum cpp_ttype type = CPP_STRING;
3376 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3377 &dst_string, type);
3378 ASSERT_TRUE (result);
3379 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3380 free (const_cast <unsigned char *> (dst_string.text));
3381
3382 if (!should_have_column_data_p (line_table->highest_location))
3383 return;
3384
3385 /* 0-9, plus the nil terminator. */
3386 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3387 for (int i = 0; i < 11; i++)
3388 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3389 i, 1, 7 + i, 7 + i);
3390}
3391
3392/* Test of locations within a raw string that contains a newline. */
3393
3394static void
3395test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3396{
3397 /* .....................00.0000.
3398 .....................12.3456. */
3399 const char *content = ("R\"foo(\n"
3400 /* .....................00000.
3401 .....................12345. */
3402 "hello\n"
3403 "world\n"
3404 /* .....................00000.
3405 .....................12345. */
3406 ")foo\"\n");
3407 lexer_test test (case_, content, NULL);
3408
3409 /* Verify that we get the expected token back. */
3410 const cpp_token *tok = test.get_token ();
3411 ASSERT_EQ (tok->type, CPP_STRING);
3412
3413 /* Verify that cpp_interpret_string works. */
3414 cpp_string dst_string;
3415 const enum cpp_ttype type = CPP_STRING;
3416 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3417 &dst_string, type);
3418 ASSERT_TRUE (result);
3419 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3420 free (const_cast <unsigned char *> (dst_string.text));
3421
3422 if (!should_have_column_data_p (line_table->highest_location))
3423 return;
3424
3425 /* Currently we don't support locations within raw strings that
3426 contain newlines. */
3427 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3428 "range endpoints are on different lines");
3429}
3430
0ccd6e7a 3431/* Test of parsing an unterminated raw string. */
3432
3433static void
3434test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3435{
3436 const char *content = "R\"ouch()ouCh\" /* etc */";
3437
fb225cf1 3438 lexer_diagnostic_sink diagnostics;
3439 lexer_test test (case_, content, &diagnostics);
0ccd6e7a 3440 test.m_implicitly_expect_EOF = false;
3441
3442 /* Attempt to parse the raw string. */
3443 const cpp_token *tok = test.get_token ();
3444 ASSERT_EQ (tok->type, CPP_EOF);
3445
fb225cf1 3446 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
0ccd6e7a 3447 /* We expect the message "unterminated raw string"
3448 in the "cpplib" translation domain.
3449 It's not clear that dgettext is available on all supported hosts,
3450 so this assertion is commented-out for now.
3451 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
fb225cf1 3452 diagnostics.m_diagnostics[0]);
0ccd6e7a 3453 */
3454}
3455
d4166bdc 3456/* Test of lexing char constants. */
3457
3458static void
3459test_lexer_char_constants (const line_table_case &case_)
3460{
3461 /* Various char constants.
3462 .....................0000000001111111111.22222222223.
3463 .....................1234567890123456789.01234567890. */
3464 const char *content = (" 'a'\n"
3465 " u'a'\n"
3466 " U'a'\n"
3467 " L'a'\n"
3468 " 'abc'\n");
3469 lexer_test test (case_, content, NULL);
3470
3471 /* Verify that we get the expected tokens back. */
3472 /* 'a'. */
3473 const cpp_token *tok = test.get_token ();
3474 ASSERT_EQ (tok->type, CPP_CHAR);
3475 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3476
3477 unsigned int chars_seen;
3478 int unsignedp;
3479 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3480 &chars_seen, &unsignedp);
3481 ASSERT_EQ (cc, 'a');
3482 ASSERT_EQ (chars_seen, 1);
3483
3484 /* u'a'. */
3485 tok = test.get_token ();
3486 ASSERT_EQ (tok->type, CPP_CHAR16);
3487 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3488
3489 /* U'a'. */
3490 tok = test.get_token ();
3491 ASSERT_EQ (tok->type, CPP_CHAR32);
3492 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3493
3494 /* L'a'. */
3495 tok = test.get_token ();
3496 ASSERT_EQ (tok->type, CPP_WCHAR);
3497 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3498
3499 /* 'abc' (c-char-sequence). */
3500 tok = test.get_token ();
3501 ASSERT_EQ (tok->type, CPP_CHAR);
3502 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3503}
b73690a4 3504/* A table of interesting location_t values, giving one axis of our test
3505 matrix. */
3506
3507static const location_t boundary_locations[] = {
3508 /* Zero means "don't override the default values for a new line_table". */
3509 0,
3510
3511 /* An arbitrary non-zero value that isn't close to one of
3512 the boundary values below. */
3513 0x10000,
3514
3515 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3516 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3517 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3518 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3519 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3520 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3521
3522 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3523 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3524 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3525 LINE_MAP_MAX_LOCATION_WITH_COLS,
3526 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3527 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3528};
3529
7ec388ed 3530/* Run TESTCASE multiple times, once for each case in our test matrix. */
99b4f3a2 3531
3532void
7ec388ed 3533for_each_line_table_case (void (*testcase) (const line_table_case &))
99b4f3a2 3534{
b73690a4 3535 /* As noted above in the description of struct line_table_case,
3536 we want to explore a test matrix of interesting line_table
3537 situations, running various selftests for each case within the
3538 matrix. */
3539
3540 /* Run all tests with:
3541 (a) line_table->default_range_bits == 0, and
3542 (b) line_table->default_range_bits == 5. */
3543 int num_cases_tested = 0;
3544 for (int default_range_bits = 0; default_range_bits <= 5;
3545 default_range_bits += 5)
3546 {
3547 /* ...and use each of the "interesting" location values as
3548 the starting location within line_table. */
3549 const int num_boundary_locations
3550 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3551 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3552 {
3553 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3554
7ec388ed 3555 testcase (c);
b73690a4 3556
3557 num_cases_tested++;
3558 }
3559 }
3560
3561 /* Verify that we fully covered the test matrix. */
3562 ASSERT_EQ (num_cases_tested, 2 * 12);
7ec388ed 3563}
3564
e7e0c93c 3565/* Verify that when presented with a consecutive pair of locations with
3566 a very large line offset, we don't attempt to consolidate them into
3567 a single ordinary linemap where the line offsets within the line map
3568 would lead to overflow (PR lto/88147). */
3569
3570static void
3571test_line_offset_overflow ()
3572{
3573 line_table_test ltt (line_table_case (5, 0));
3574
3575 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3576 linemap_line_start (line_table, 1, 100);
3577 location_t loc_a = linemap_line_start (line_table, 2578, 255);
3578 assert_loceq ("foo.c", 2578, 0, loc_a);
3579
3580 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3581 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3582 ASSERT_EQ (ordmap_a->m_range_bits, 5);
3583
3584 location_t loc_b = linemap_line_start (line_table, 404198, 512);
3585 assert_loceq ("foo.c", 404198, 0, loc_b);
3586
3587 /* We should have started a new linemap, rather than attempting to store
3588 a very large line offset. */
3589 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3590 ASSERT_NE (ordmap_a, ordmap_b);
3591}
3592
7ec388ed 3593/* Run all of the selftests within this file. */
3594
3595void
3596input_c_tests ()
3597{
d73881b0 3598 test_linenum_comparisons ();
7ec388ed 3599 test_should_have_column_data_p ();
3600 test_unknown_location ();
3601 test_builtins ();
aca2a315 3602 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
7ec388ed 3603
3604 for_each_line_table_case (test_accessing_ordinary_linemaps);
3605 for_each_line_table_case (test_lexer);
3606 for_each_line_table_case (test_lexer_string_locations_simple);
3607 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3608 for_each_line_table_case (test_lexer_string_locations_hex);
3609 for_each_line_table_case (test_lexer_string_locations_oct);
3610 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3611 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3612 for_each_line_table_case (test_lexer_string_locations_ucn4);
3613 for_each_line_table_case (test_lexer_string_locations_ucn8);
3614 for_each_line_table_case (test_lexer_string_locations_wide_string);
3615 for_each_line_table_case (test_lexer_string_locations_string16);
3616 for_each_line_table_case (test_lexer_string_locations_string32);
3617 for_each_line_table_case (test_lexer_string_locations_u8);
3618 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3619 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3620 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3621 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3622 for_each_line_table_case (test_lexer_string_locations_macro);
3623 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3624 for_each_line_table_case (test_lexer_string_locations_non_string);
3625 for_each_line_table_case (test_lexer_string_locations_long_line);
f9f26759 3626 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3627 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
0ccd6e7a 3628 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
7ec388ed 3629 for_each_line_table_case (test_lexer_char_constants);
b73690a4 3630
99b4f3a2 3631 test_reading_source_line ();
e7e0c93c 3632
3633 test_line_offset_overflow ();
99b4f3a2 3634}
3635
3636} // namespace selftest
3637
3638#endif /* CHECKING_P */