]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/input.c
PR c++/61339 - add mismatch between struct and class [-Wmismatched-tags] to non-bugs
[thirdparty/gcc.git] / gcc / input.c
CommitLineData
447924ef 1/* Data and functions related to line maps and input files.
a5544970 2 Copyright (C) 2004-2019 Free Software Foundation, Inc.
447924ef
JM
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "intl.h"
bc65bad2 24#include "diagnostic.h"
ba4ad400 25#include "diagnostic-core.h"
d9b950dd 26#include "selftest.h"
741d3be5 27#include "cpplib.h"
7ecc3eb9 28
a7d79e5c
DM
29#ifndef HAVE_ICONV
30#define HAVE_ICONV 0
31#endif
32
7ecc3eb9
DS
33/* This is a cache used by get_next_line to store the content of a
34 file to be searched for file lines. */
6c1dae73 35class fcache
7ecc3eb9 36{
6c1dae73 37public:
7ecc3eb9 38 /* These are information used to store a line boundary. */
6c1dae73 39 class line_info
7ecc3eb9 40 {
6c1dae73 41 public:
7ecc3eb9
DS
42 /* The line number. It starts from 1. */
43 size_t line_num;
44
45 /* The position (byte count) of the beginning of the line,
46 relative to the file data pointer. This starts at zero. */
47 size_t start_pos;
48
49 /* The position (byte count) of the last byte of the line. This
50 normally points to the '\n' character, or to one byte after the
51 last byte of the file, if the file doesn't contain a '\n'
52 character. */
53 size_t end_pos;
54
55 line_info (size_t l, size_t s, size_t e)
56 : line_num (l), start_pos (s), end_pos (e)
57 {}
58
59 line_info ()
60 :line_num (0), start_pos (0), end_pos (0)
61 {}
62 };
63
64 /* The number of time this file has been accessed. This is used
65 to designate which file cache to evict from the cache
66 array. */
67 unsigned use_count;
68
f5ea989d
DM
69 /* The file_path is the key for identifying a particular file in
70 the cache.
71 For libcpp-using code, the underlying buffer for this field is
72 owned by the corresponding _cpp_file within the cpp_reader. */
7ecc3eb9
DS
73 const char *file_path;
74
75 FILE *fp;
76
77 /* This points to the content of the file that we've read so
78 far. */
79 char *data;
80
81 /* The size of the DATA array above.*/
82 size_t size;
83
84 /* The number of bytes read from the underlying file so far. This
85 must be less (or equal) than SIZE above. */
86 size_t nb_read;
87
88 /* The index of the beginning of the current line. */
89 size_t line_start_idx;
90
91 /* The number of the previous line read. This starts at 1. Zero
92 means we've read no line so far. */
93 size_t line_num;
94
95 /* This is the total number of lines of the current file. At the
96 moment, we try to get this information from the line map
97 subsystem. Note that this is just a hint. When using the C++
98 front-end, this hint is correct because the input file is then
99 completely tokenized before parsing starts; so the line map knows
100 the number of lines before compilation really starts. For e.g,
101 the C front-end, it can happen that we start emitting diagnostics
102 before the line map has seen the end of the file. */
103 size_t total_lines;
104
c65236d6
DM
105 /* Could this file be missing a trailing newline on its final line?
106 Initially true (to cope with empty files), set to true/false
107 as each line is read. */
108 bool missing_trailing_newline;
109
7ecc3eb9
DS
110 /* This is a record of the beginning and end of the lines we've seen
111 while reading the file. This is useful to avoid walking the data
112 from the beginning when we are asked to read a line that is
113 before LINE_START_IDX above. Note that the maximum size of this
114 record is fcache_line_record_size, so that the memory consumption
115 doesn't explode. We thus scale total_lines down to
116 fcache_line_record_size. */
117 vec<line_info, va_heap> line_record;
118
119 fcache ();
120 ~fcache ();
121};
447924ef
JM
122
123/* Current position in real source file. */
124
3edf64aa 125location_t input_location = UNKNOWN_LOCATION;
447924ef 126
99b1c316 127class line_maps *line_table;
447924ef 128
f87e22c5
DM
129/* A stashed copy of "line_table" for use by selftest::line_table_test.
130 This needs to be a global so that it can be a GC root, and thus
131 prevent the stashed copy from being garbage-collected if the GC runs
132 during a line_table_test. */
133
99b1c316 134class line_maps *saved_line_table;
f87e22c5 135
7ecc3eb9
DS
136static fcache *fcache_tab;
137static const size_t fcache_tab_size = 16;
138static const size_t fcache_buffer_size = 4 * 1024;
139static const size_t fcache_line_record_size = 100;
140
84756fd4
DS
141/* Expand the source location LOC into a human readable location. If
142 LOC resolves to a builtin location, the file name of the readable
7eb918cc
DS
143 location is set to the string "<built-in>". If EXPANSION_POINT_P is
144 TRUE and LOC is virtual, then it is resolved to the expansion
145 point of the involved macro. Otherwise, it is resolved to the
c4ca1a09
DS
146 spelling location of the token.
147
148 When resolving to the spelling location of the token, if the
149 resulting location is for a built-in location (that is, it has no
150 associated line/column) in the context of a macro expansion, the
151 returned location is the first one (while unwinding the macro
152 location towards its expansion point) that is in real source
c471c6ed
DM
153 code.
154
155 ASPECT controls which part of the location to use. */
7eb918cc
DS
156
157static expanded_location
620e594b 158expand_location_1 (location_t loc,
c471c6ed
DM
159 bool expansion_point_p,
160 enum location_aspect aspect)
447924ef
JM
161{
162 expanded_location xloc;
0e50b624 163 const line_map_ordinary *map;
c4ca1a09 164 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
5368224f
DC
165 tree block = NULL;
166
167 if (IS_ADHOC_LOC (loc))
168 {
169 block = LOCATION_BLOCK (loc);
170 loc = LOCATION_LOCUS (loc);
171 }
c4ca1a09
DS
172
173 memset (&xloc, 0, sizeof (xloc));
84756fd4 174
c4ca1a09
DS
175 if (loc >= RESERVED_LOCATION_COUNT)
176 {
177 if (!expansion_point_p)
178 {
179 /* We want to resolve LOC to its spelling location.
180
181 But if that spelling location is a reserved location that
182 appears in the context of a macro expansion (like for a
183 location for a built-in token), let's consider the first
184 location (toward the expansion point) that is not reserved;
185 that is, the first location that is in real source code. */
186 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
0e50b624 187 loc, NULL);
c4ca1a09
DS
188 lrk = LRK_SPELLING_LOCATION;
189 }
c471c6ed
DM
190 loc = linemap_resolve_location (line_table, loc, lrk, &map);
191
192 /* loc is now either in an ordinary map, or is a reserved location.
193 If it is a compound location, the caret is in a spelling location,
194 but the start/finish might still be a virtual location.
195 Depending of what the caller asked for, we may need to recurse
196 one level in order to resolve any virtual locations in the
197 end-points. */
198 switch (aspect)
199 {
200 default:
201 gcc_unreachable ();
202 /* Fall through. */
203 case LOCATION_ASPECT_CARET:
204 break;
205 case LOCATION_ASPECT_START:
206 {
620e594b 207 location_t start = get_start (loc);
c471c6ed
DM
208 if (start != loc)
209 return expand_location_1 (start, expansion_point_p, aspect);
210 }
211 break;
212 case LOCATION_ASPECT_FINISH:
213 {
620e594b 214 location_t finish = get_finish (loc);
c471c6ed
DM
215 if (finish != loc)
216 return expand_location_1 (finish, expansion_point_p, aspect);
217 }
218 break;
219 }
c4ca1a09
DS
220 xloc = linemap_expand_location (line_table, map, loc);
221 }
84756fd4 222
5368224f 223 xloc.data = block;
447924ef 224 if (loc <= BUILTINS_LOCATION)
84756fd4
DS
225 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
226
447924ef
JM
227 return xloc;
228}
64a1a422 229
7ecc3eb9
DS
230/* Initialize the set of cache used for files accessed by caret
231 diagnostic. */
232
233static void
234diagnostic_file_cache_init (void)
235{
236 if (fcache_tab == NULL)
237 fcache_tab = new fcache[fcache_tab_size];
238}
239
592f32fa 240/* Free the resources used by the set of cache used for files accessed
7ecc3eb9
DS
241 by caret diagnostic. */
242
243void
244diagnostic_file_cache_fini (void)
245{
246 if (fcache_tab)
247 {
248 delete [] (fcache_tab);
249 fcache_tab = NULL;
250 }
251}
252
253/* Return the total lines number that have been read so far by the
254 line map (in the preprocessor) so far. For languages like C++ that
255 entirely preprocess the input file before starting to parse, this
256 equals the actual number of lines of the file. */
257
258static size_t
259total_lines_num (const char *file_path)
260{
261 size_t r = 0;
620e594b 262 location_t l = 0;
7ecc3eb9
DS
263 if (linemap_get_file_highest_location (line_table, file_path, &l))
264 {
265 gcc_assert (l >= RESERVED_LOCATION_COUNT);
266 expanded_location xloc = expand_location (l);
267 r = xloc.line;
268 }
269 return r;
270}
271
272/* Lookup the cache used for the content of a given file accessed by
273 caret diagnostic. Return the found cached file, or NULL if no
274 cached file was found. */
275
276static fcache*
277lookup_file_in_cache_tab (const char *file_path)
278{
279 if (file_path == NULL)
280 return NULL;
281
282 diagnostic_file_cache_init ();
283
284 /* This will contain the found cached file. */
285 fcache *r = NULL;
286 for (unsigned i = 0; i < fcache_tab_size; ++i)
287 {
288 fcache *c = &fcache_tab[i];
289 if (c->file_path && !strcmp (c->file_path, file_path))
290 {
291 ++c->use_count;
292 r = c;
293 }
294 }
295
296 if (r)
297 ++r->use_count;
298
299 return r;
300}
301
f89b03b6
DM
302/* Purge any mention of FILENAME from the cache of files used for
303 printing source code. For use in selftests when working
304 with tempfiles. */
305
306void
307diagnostics_file_cache_forcibly_evict_file (const char *file_path)
308{
309 gcc_assert (file_path);
310
311 fcache *r = lookup_file_in_cache_tab (file_path);
312 if (!r)
313 /* Not found. */
314 return;
315
316 r->file_path = NULL;
317 if (r->fp)
318 fclose (r->fp);
319 r->fp = NULL;
320 r->nb_read = 0;
321 r->line_start_idx = 0;
322 r->line_num = 0;
323 r->line_record.truncate (0);
324 r->use_count = 0;
325 r->total_lines = 0;
c65236d6 326 r->missing_trailing_newline = true;
f89b03b6
DM
327}
328
7ecc3eb9
DS
329/* Return the file cache that has been less used, recently, or the
330 first empty one. If HIGHEST_USE_COUNT is non-null,
331 *HIGHEST_USE_COUNT is set to the highest use count of the entries
332 in the cache table. */
333
334static fcache*
335evicted_cache_tab_entry (unsigned *highest_use_count)
336{
337 diagnostic_file_cache_init ();
338
339 fcache *to_evict = &fcache_tab[0];
340 unsigned huc = to_evict->use_count;
341 for (unsigned i = 1; i < fcache_tab_size; ++i)
342 {
343 fcache *c = &fcache_tab[i];
344 bool c_is_empty = (c->file_path == NULL);
345
346 if (c->use_count < to_evict->use_count
347 || (to_evict->file_path && c_is_empty))
348 /* We evict C because it's either an entry with a lower use
349 count or one that is empty. */
350 to_evict = c;
351
352 if (huc < c->use_count)
353 huc = c->use_count;
354
355 if (c_is_empty)
356 /* We've reached the end of the cache; subsequent elements are
357 all empty. */
358 break;
359 }
360
361 if (highest_use_count)
362 *highest_use_count = huc;
363
364 return to_evict;
365}
366
367/* Create the cache used for the content of a given file to be
368 accessed by caret diagnostic. This cache is added to an array of
369 cache and can be retrieved by lookup_file_in_cache_tab. This
370 function returns the created cache. Note that only the last
371 fcache_tab_size files are cached. */
372
373static fcache*
374add_file_to_cache_tab (const char *file_path)
375{
376
377 FILE *fp = fopen (file_path, "r");
317363b4
DS
378 if (fp == NULL)
379 return NULL;
7ecc3eb9
DS
380
381 unsigned highest_use_count = 0;
382 fcache *r = evicted_cache_tab_entry (&highest_use_count);
383 r->file_path = file_path;
384 if (r->fp)
385 fclose (r->fp);
386 r->fp = fp;
387 r->nb_read = 0;
388 r->line_start_idx = 0;
389 r->line_num = 0;
390 r->line_record.truncate (0);
391 /* Ensure that this cache entry doesn't get evicted next time
392 add_file_to_cache_tab is called. */
393 r->use_count = ++highest_use_count;
394 r->total_lines = total_lines_num (file_path);
c65236d6 395 r->missing_trailing_newline = true;
7ecc3eb9
DS
396
397 return r;
398}
399
400/* Lookup the cache used for the content of a given file accessed by
401 caret diagnostic. If no cached file was found, create a new cache
402 for this file, add it to the array of cached file and return
403 it. */
404
405static fcache*
406lookup_or_add_file_to_cache_tab (const char *file_path)
407{
408 fcache *r = lookup_file_in_cache_tab (file_path);
409 if (r == NULL)
410 r = add_file_to_cache_tab (file_path);
411 return r;
412}
413
414/* Default constructor for a cache of file used by caret
415 diagnostic. */
416
417fcache::fcache ()
418: use_count (0), file_path (NULL), fp (NULL), data (0),
419 size (0), nb_read (0), line_start_idx (0), line_num (0),
c65236d6 420 total_lines (0), missing_trailing_newline (true)
7ecc3eb9
DS
421{
422 line_record.create (0);
423}
424
425/* Destructor for a cache of file used by caret diagnostic. */
426
427fcache::~fcache ()
428{
429 if (fp)
430 {
431 fclose (fp);
432 fp = NULL;
433 }
434 if (data)
435 {
436 XDELETEVEC (data);
437 data = 0;
438 }
439 line_record.release ();
440}
441
442/* Returns TRUE iff the cache would need to be filled with data coming
443 from the file. That is, either the cache is empty or full or the
444 current line is empty. Note that if the cache is full, it would
445 need to be extended and filled again. */
446
447static bool
448needs_read (fcache *c)
449{
450 return (c->nb_read == 0
451 || c->nb_read == c->size
452 || (c->line_start_idx >= c->nb_read - 1));
453}
454
455/* Return TRUE iff the cache is full and thus needs to be
456 extended. */
457
458static bool
459needs_grow (fcache *c)
460{
461 return c->nb_read == c->size;
462}
463
464/* Grow the cache if it needs to be extended. */
465
466static void
467maybe_grow (fcache *c)
9fec0042 468{
7ecc3eb9
DS
469 if (!needs_grow (c))
470 return;
471
472 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
1adae327 473 c->data = XRESIZEVEC (char, c->data, size);
7ecc3eb9
DS
474 c->size = size;
475}
9fec0042 476
7ecc3eb9
DS
477/* Read more data into the cache. Extends the cache if need be.
478 Returns TRUE iff new data could be read. */
479
480static bool
481read_data (fcache *c)
482{
483 if (feof (c->fp) || ferror (c->fp))
484 return false;
485
486 maybe_grow (c);
487
488 char * from = c->data + c->nb_read;
489 size_t to_read = c->size - c->nb_read;
490 size_t nb_read = fread (from, 1, to_read, c->fp);
491
492 if (ferror (c->fp))
493 return false;
494
495 c->nb_read += nb_read;
496 return !!nb_read;
497}
498
499/* Read new data iff the cache needs to be filled with more data
500 coming from the file FP. Return TRUE iff the cache was filled with
501 mode data. */
502
503static bool
504maybe_read_data (fcache *c)
505{
506 if (!needs_read (c))
507 return false;
508 return read_data (c);
509}
510
511/* Read a new line from file FP, using C as a cache for the data
512 coming from the file. Upon successful completion, *LINE is set to
1adae327
BE
513 the beginning of the line found. *LINE points directly in the
514 line cache and is only valid until the next call of get_next_line.
7ecc3eb9
DS
515 *LINE_LEN is set to the length of the line. Note that the line
516 does not contain any terminal delimiter. This function returns
517 true if some data was read or process from the cache, false
1adae327
BE
518 otherwise. Note that subsequent calls to get_next_line might
519 make the content of *LINE invalid. */
7ecc3eb9
DS
520
521static bool
522get_next_line (fcache *c, char **line, ssize_t *line_len)
523{
524 /* Fill the cache with data to process. */
525 maybe_read_data (c);
526
527 size_t remaining_size = c->nb_read - c->line_start_idx;
528 if (remaining_size == 0)
529 /* There is no more data to process. */
530 return false;
531
532 char *line_start = c->data + c->line_start_idx;
533
534 char *next_line_start = NULL;
535 size_t len = 0;
536 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
537 if (line_end == NULL)
9fec0042 538 {
7ecc3eb9
DS
539 /* We haven't found the end-of-line delimiter in the cache.
540 Fill the cache with more data from the file and look for the
541 '\n'. */
542 while (maybe_read_data (c))
543 {
544 line_start = c->data + c->line_start_idx;
545 remaining_size = c->nb_read - c->line_start_idx;
546 line_end = (char *) memchr (line_start, '\n', remaining_size);
547 if (line_end != NULL)
548 {
549 next_line_start = line_end + 1;
550 break;
551 }
552 }
553 if (line_end == NULL)
c65236d6
DM
554 {
555 /* We've loadded all the file into the cache and still no
556 '\n'. Let's say the line ends up at one byte passed the
557 end of the file. This is to stay consistent with the case
558 of when the line ends up with a '\n' and line_end points to
559 that terminal '\n'. That consistency is useful below in
560 the len calculation. */
561 line_end = c->data + c->nb_read ;
562 c->missing_trailing_newline = true;
563 }
564 else
565 c->missing_trailing_newline = false;
9fec0042 566 }
7ecc3eb9 567 else
c65236d6
DM
568 {
569 next_line_start = line_end + 1;
570 c->missing_trailing_newline = false;
571 }
7ecc3eb9
DS
572
573 if (ferror (c->fp))
1adae327 574 return false;
7ecc3eb9
DS
575
576 /* At this point, we've found the end of the of line. It either
577 points to the '\n' or to one byte after the last byte of the
578 file. */
579 gcc_assert (line_end != NULL);
9fec0042 580
7ecc3eb9
DS
581 len = line_end - line_start;
582
583 if (c->line_start_idx < c->nb_read)
584 *line = line_start;
585
586 ++c->line_num;
587
588 /* Before we update our line record, make sure the hint about the
589 total number of lines of the file is correct. If it's not, then
590 we give up recording line boundaries from now on. */
591 bool update_line_record = true;
592 if (c->line_num > c->total_lines)
593 update_line_record = false;
594
595 /* Now update our line record so that re-reading lines from the
596 before c->line_start_idx is faster. */
597 if (update_line_record
598 && c->line_record.length () < fcache_line_record_size)
599 {
600 /* If the file lines fits in the line record, we just record all
601 its lines ...*/
602 if (c->total_lines <= fcache_line_record_size
603 && c->line_num > c->line_record.length ())
604 c->line_record.safe_push (fcache::line_info (c->line_num,
605 c->line_start_idx,
606 line_end - c->data));
607 else if (c->total_lines > fcache_line_record_size)
608 {
609 /* ... otherwise, we just scale total_lines down to
610 (fcache_line_record_size lines. */
611 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
612 if (c->line_record.length () == 0
613 || n >= c->line_record.length ())
614 c->line_record.safe_push (fcache::line_info (c->line_num,
615 c->line_start_idx,
616 line_end - c->data));
617 }
618 }
619
620 /* Update c->line_start_idx so that it points to the next line to be
621 read. */
622 if (next_line_start)
623 c->line_start_idx = next_line_start - c->data;
624 else
625 /* We didn't find any terminal '\n'. Let's consider that the end
626 of line is the end of the data in the cache. The next
627 invocation of get_next_line will either read more data from the
628 underlying file or return false early because we've reached the
629 end of the file. */
630 c->line_start_idx = c->nb_read;
631
632 *line_len = len;
633
634 return true;
635}
636
7ecc3eb9
DS
637/* Consume the next bytes coming from the cache (or from its
638 underlying file if there are remaining unread bytes in the file)
639 until we reach the next end-of-line (or end-of-file). There is no
640 copying from the cache involved. Return TRUE upon successful
641 completion. */
642
643static bool
644goto_next_line (fcache *cache)
645{
646 char *l;
647 ssize_t len;
648
649 return get_next_line (cache, &l, &len);
650}
651
652/* Read an arbitrary line number LINE_NUM from the file cached in C.
1adae327
BE
653 If the line was read successfully, *LINE points to the beginning
654 of the line in the file cache and *LINE_LEN is the length of the
655 line. *LINE is not nul-terminated, but may contain zero bytes.
656 *LINE is only valid until the next call of read_line_num.
7ecc3eb9
DS
657 This function returns bool if a line was read. */
658
659static bool
660read_line_num (fcache *c, size_t line_num,
1adae327 661 char **line, ssize_t *line_len)
7ecc3eb9
DS
662{
663 gcc_assert (line_num > 0);
664
665 if (line_num <= c->line_num)
9789a912 666 {
7ecc3eb9
DS
667 /* We've been asked to read lines that are before c->line_num.
668 So lets use our line record (if it's not empty) to try to
669 avoid re-reading the file from the beginning again. */
7f4d640c 670
7ecc3eb9 671 if (c->line_record.is_empty ())
9fec0042 672 {
7ecc3eb9
DS
673 c->line_start_idx = 0;
674 c->line_num = 0;
675 }
676 else
677 {
678 fcache::line_info *i = NULL;
679 if (c->total_lines <= fcache_line_record_size)
680 {
681 /* In languages where the input file is not totally
682 preprocessed up front, the c->total_lines hint
683 can be smaller than the number of lines of the
684 file. In that case, only the first
685 c->total_lines have been recorded.
686
687 Otherwise, the first c->total_lines we've read have
688 their start/end recorded here. */
689 i = (line_num <= c->total_lines)
690 ? &c->line_record[line_num - 1]
691 : &c->line_record[c->total_lines - 1];
692 gcc_assert (i->line_num <= line_num);
693 }
694 else
695 {
696 /* So the file had more lines than our line record
697 size. Thus the number of lines we've recorded has
698 been scaled down to fcache_line_reacord_size. Let's
699 pick the start/end of the recorded line that is
700 closest to line_num. */
701 size_t n = (line_num <= c->total_lines)
702 ? line_num * fcache_line_record_size / c->total_lines
703 : c ->line_record.length () - 1;
704 if (n < c->line_record.length ())
705 {
706 i = &c->line_record[n];
707 gcc_assert (i->line_num <= line_num);
708 }
709 }
710
711 if (i && i->line_num == line_num)
712 {
1adae327
BE
713 /* We have the start/end of the line. */
714 *line = c->data + i->start_pos;
715 *line_len = i->end_pos - i->start_pos;
7ecc3eb9
DS
716 return true;
717 }
718
719 if (i)
720 {
721 c->line_start_idx = i->start_pos;
722 c->line_num = i->line_num - 1;
723 }
724 else
725 {
726 c->line_start_idx = 0;
727 c->line_num = 0;
728 }
9fec0042 729 }
9fec0042 730 }
7ecc3eb9
DS
731
732 /* Let's walk from line c->line_num up to line_num - 1, without
733 copying any line. */
734 while (c->line_num < line_num - 1)
735 if (!goto_next_line (c))
736 return false;
737
738 /* The line we want is the next one. Let's read and copy it back to
739 the caller. */
1adae327 740 return get_next_line (c, line, line_len);
9fec0042
MLI
741}
742
1adae327
BE
743/* Return the physical source line that corresponds to FILE_PATH/LINE.
744 The line is not nul-terminated. The returned pointer is only
745 valid until the next call of location_get_source_line.
746 Note that the line can contain several null characters,
7761dfbe
DM
747 so the returned value's length has the actual length of the line.
748 If the function fails, a NULL char_span is returned. */
9fec0042 749
7761dfbe
DM
750char_span
751location_get_source_line (const char *file_path, int line)
9fec0042 752{
ac2a97db 753 char *buffer = NULL;
1adae327 754 ssize_t len;
7ecc3eb9 755
31bdd08a 756 if (line == 0)
7761dfbe 757 return char_span (NULL, 0);
367c8286 758
31bdd08a 759 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
367c8286 760 if (c == NULL)
7761dfbe 761 return char_span (NULL, 0);
367c8286 762
31bdd08a 763 bool read = read_line_num (c, line, &buffer, &len);
7761dfbe
DM
764 if (!read)
765 return char_span (NULL, 0);
9fec0042 766
7761dfbe 767 return char_span (buffer, len);
9fec0042
MLI
768}
769
c65236d6
DM
770/* Determine if FILE_PATH missing a trailing newline on its final line.
771 Only valid to call once all of the file has been loaded, by
772 requesting a line number beyond the end of the file. */
773
774bool
775location_missing_trailing_newline (const char *file_path)
776{
777 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
778 if (c == NULL)
779 return false;
780
781 return c->missing_trailing_newline;
782}
783
c468587a
DS
784/* Test if the location originates from the spelling location of a
785 builtin-tokens. That is, return TRUE if LOC is a (possibly
786 virtual) location of a built-in token that appears in the expansion
787 list of a macro. Please note that this function also works on
788 tokens that result from built-in tokens. For instance, the
789 function would return true if passed a token "4" that is the result
790 of the expansion of the built-in __LINE__ macro. */
791bool
620e594b 792is_location_from_builtin_token (location_t loc)
c468587a 793{
0e50b624 794 const line_map_ordinary *map = NULL;
c468587a
DS
795 loc = linemap_resolve_location (line_table, loc,
796 LRK_SPELLING_LOCATION, &map);
797 return loc == BUILTINS_LOCATION;
798}
799
7eb918cc
DS
800/* Expand the source location LOC into a human readable location. If
801 LOC is virtual, it resolves to the expansion point of the involved
802 macro. If LOC resolves to a builtin location, the file name of the
803 readable location is set to the string "<built-in>". */
804
805expanded_location
620e594b 806expand_location (location_t loc)
7eb918cc 807{
c471c6ed
DM
808 return expand_location_1 (loc, /*expansion_point_p=*/true,
809 LOCATION_ASPECT_CARET);
7eb918cc
DS
810}
811
812/* Expand the source location LOC into a human readable location. If
813 LOC is virtual, it resolves to the expansion location of the
814 relevant macro. If LOC resolves to a builtin location, the file
815 name of the readable location is set to the string
816 "<built-in>". */
817
818expanded_location
620e594b 819expand_location_to_spelling_point (location_t loc,
0d48e877 820 enum location_aspect aspect)
7eb918cc 821{
0d48e877 822 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
7eb918cc
DS
823}
824
8a645150 825/* The rich_location class within libcpp requires a way to expand
620e594b 826 location_t instances, and relies on the client code
8a645150
DM
827 providing a symbol named
828 linemap_client_expand_location_to_spelling_point
829 to do this.
830
831 This is the implementation for libcommon.a (all host binaries),
c471c6ed 832 which simply calls into expand_location_1. */
8a645150
DM
833
834expanded_location
620e594b 835linemap_client_expand_location_to_spelling_point (location_t loc,
c471c6ed 836 enum location_aspect aspect)
8a645150 837{
c471c6ed 838 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
8a645150
DM
839}
840
841
e1f0c178
MLI
842/* If LOCATION is in a system header and if it is a virtual location for
843 a token coming from the expansion of a macro, unwind it to the
844 location of the expansion point of the macro. Otherwise, just return
70dc395a
DS
845 LOCATION.
846
847 This is used for instance when we want to emit diagnostics about a
e1f0c178
MLI
848 token that may be located in a macro that is itself defined in a
849 system header, for example, for the NULL macro. In such a case, if
850 LOCATION were passed directly to diagnostic functions such as
851 warning_at, the diagnostic would be suppressed (unless
852 -Wsystem-headers). */
70dc395a 853
620e594b
DM
854location_t
855expansion_point_location_if_in_system_header (location_t location)
70dc395a
DS
856{
857 if (in_system_header_at (location))
858 location = linemap_resolve_location (line_table, location,
859 LRK_MACRO_EXPANSION_POINT,
860 NULL);
861 return location;
862}
7eb918cc 863
79ce98bc
MP
864/* If LOCATION is a virtual location for a token coming from the expansion
865 of a macro, unwind to the location of the expansion point of the macro. */
866
620e594b
DM
867location_t
868expansion_point_location (location_t location)
79ce98bc
MP
869{
870 return linemap_resolve_location (line_table, location,
871 LRK_MACRO_EXPANSION_POINT, NULL);
872}
873
a01fc549
DM
874/* Construct a location with caret at CARET, ranging from START to
875 finish e.g.
876
877 11111111112
878 12345678901234567890
879 522
880 523 return foo + bar;
881 ~~~~^~~~~
882 524
883
884 The location's caret is at the "+", line 523 column 15, but starts
885 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
886 of "bar" at column 19. */
887
888location_t
889make_location (location_t caret, location_t start, location_t finish)
890{
891 location_t pure_loc = get_pure_location (caret);
892 source_range src_range;
9144eabb
DM
893 src_range.m_start = get_start (start);
894 src_range.m_finish = get_finish (finish);
a01fc549
DM
895 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
896 pure_loc,
897 src_range,
898 NULL);
899 return combined_loc;
900}
901
a32c8316
MP
902/* Same as above, but taking a source range rather than two locations. */
903
904location_t
905make_location (location_t caret, source_range src_range)
906{
907 location_t pure_loc = get_pure_location (caret);
908 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
909}
910
64a1a422
TT
911/* Dump statistics to stderr about the memory usage of the line_table
912 set of line maps. This also displays some statistics about macro
913 expansion. */
914
915void
916dump_line_table_statistics (void)
917{
918 struct linemap_stats s;
d17687f6 919 long total_used_map_size,
64a1a422
TT
920 macro_maps_size,
921 total_allocated_map_size;
922
923 memset (&s, 0, sizeof (s));
924
925 linemap_get_statistics (line_table, &s);
926
927 macro_maps_size = s.macro_maps_used_size
928 + s.macro_maps_locations_size;
929
930 total_allocated_map_size = s.ordinary_maps_allocated_size
931 + s.macro_maps_allocated_size
932 + s.macro_maps_locations_size;
933
934 total_used_map_size = s.ordinary_maps_used_size
935 + s.macro_maps_used_size
936 + s.macro_maps_locations_size;
937
d17687f6 938 fprintf (stderr, "Number of expanded macros: %5ld\n",
64a1a422
TT
939 s.num_expanded_macros);
940 if (s.num_expanded_macros != 0)
d17687f6 941 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
64a1a422
TT
942 s.num_macro_tokens / s.num_expanded_macros);
943 fprintf (stderr,
944 "\nLine Table allocations during the "
40ce7fa6 945 "compilation process\n");
a0b48080 946 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
40ce7fa6 947 SIZE_AMOUNT (s.num_ordinary_maps_used));
a0b48080 948 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
40ce7fa6 949 SIZE_AMOUNT (s.ordinary_maps_used_size));
a0b48080 950 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
40ce7fa6 951 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
a0b48080 952 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
40ce7fa6 953 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
a0b48080 954 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
40ce7fa6 955 SIZE_AMOUNT (s.num_macro_maps_used));
a0b48080 956 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
40ce7fa6 957 SIZE_AMOUNT (s.macro_maps_used_size));
a0b48080 958 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
40ce7fa6 959 SIZE_AMOUNT (s.macro_maps_locations_size));
a0b48080 960 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
40ce7fa6 961 SIZE_AMOUNT (macro_maps_size));
a0b48080 962 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
40ce7fa6 963 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
a0b48080 964 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
40ce7fa6 965 SIZE_AMOUNT (total_allocated_map_size));
a0b48080 966 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
40ce7fa6 967 SIZE_AMOUNT (total_used_map_size));
a0b48080 968 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
40ce7fa6 969 SIZE_AMOUNT (s.adhoc_table_size));
a0b48080 970 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
40ce7fa6 971 SIZE_AMOUNT (s.adhoc_table_entries_used));
a0b48080 972 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
40ce7fa6 973 SIZE_AMOUNT (line_table->num_optimized_ranges));
a0b48080 974 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
40ce7fa6 975 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
ee015909 976
64a1a422
TT
977 fprintf (stderr, "\n");
978}
ba4ad400
DM
979
980/* Get location one beyond the final location in ordinary map IDX. */
981
620e594b 982static location_t
99b1c316 983get_end_location (class line_maps *set, unsigned int idx)
ba4ad400
DM
984{
985 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
986 return set->highest_location;
987
988 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
989 return MAP_START_LOCATION (next_map);
990}
991
992/* Helper function for write_digit_row. */
993
994static void
995write_digit (FILE *stream, int digit)
996{
997 fputc ('0' + (digit % 10), stream);
998}
999
1000/* Helper function for dump_location_info.
1001 Write a row of numbers to STREAM, numbering a source line,
1002 giving the units, tens, hundreds etc of the column number. */
1003
1004static void
1005write_digit_row (FILE *stream, int indent,
ebedc9a3 1006 const line_map_ordinary *map,
620e594b 1007 location_t loc, int max_col, int divisor)
ba4ad400
DM
1008{
1009 fprintf (stream, "%*c", indent, ' ');
1010 fprintf (stream, "|");
1011 for (int column = 1; column < max_col; column++)
1012 {
620e594b 1013 location_t column_loc = loc + (column << map->m_range_bits);
ba4ad400
DM
1014 write_digit (stream, column_loc / divisor);
1015 }
1016 fprintf (stream, "\n");
1017}
1018
1019/* Write a half-closed (START) / half-open (END) interval of
620e594b 1020 location_t to STREAM. */
ba4ad400
DM
1021
1022static void
1023dump_location_range (FILE *stream,
620e594b 1024 location_t start, location_t end)
ba4ad400
DM
1025{
1026 fprintf (stream,
620e594b 1027 " location_t interval: %u <= loc < %u\n",
ba4ad400
DM
1028 start, end);
1029}
1030
1031/* Write a labelled description of a half-closed (START) / half-open (END)
620e594b 1032 interval of location_t to STREAM. */
ba4ad400
DM
1033
1034static void
1035dump_labelled_location_range (FILE *stream,
1036 const char *name,
620e594b 1037 location_t start, location_t end)
ba4ad400
DM
1038{
1039 fprintf (stream, "%s\n", name);
1040 dump_location_range (stream, start, end);
1041 fprintf (stream, "\n");
1042}
1043
1044/* Write a visualization of the locations in the line_table to STREAM. */
1045
1046void
1047dump_location_info (FILE *stream)
1048{
1049 /* Visualize the reserved locations. */
1050 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1051 0, RESERVED_LOCATION_COUNT);
1052
1053 /* Visualize the ordinary line_map instances, rendering the sources. */
1054 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1055 {
620e594b 1056 location_t end_location = get_end_location (line_table, idx);
ba4ad400
DM
1057 /* half-closed: doesn't include this one. */
1058
0e50b624
DM
1059 const line_map_ordinary *map
1060 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
ba4ad400
DM
1061 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1062 dump_location_range (stream,
1063 MAP_START_LOCATION (map), end_location);
1064 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1065 fprintf (stream, " starting at line: %i\n",
1066 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
ebedc9a3
DM
1067 fprintf (stream, " column and range bits: %i\n",
1068 map->m_column_and_range_bits);
ba4ad400 1069 fprintf (stream, " column bits: %i\n",
ebedc9a3
DM
1070 map->m_column_and_range_bits - map->m_range_bits);
1071 fprintf (stream, " range bits: %i\n",
1072 map->m_range_bits);
bc65bad2
MG
1073 const char * reason;
1074 switch (map->reason) {
1075 case LC_ENTER:
1076 reason = "LC_ENTER";
1077 break;
1078 case LC_LEAVE:
1079 reason = "LC_LEAVE";
1080 break;
1081 case LC_RENAME:
1082 reason = "LC_RENAME";
1083 break;
1084 case LC_RENAME_VERBATIM:
1085 reason = "LC_RENAME_VERBATIM";
1086 break;
1087 case LC_ENTER_MACRO:
1088 reason = "LC_RENAME_MACRO";
1089 break;
1090 default:
1091 reason = "Unknown";
1092 }
1093 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1094
1095 const line_map_ordinary *includer_map
1096 = linemap_included_from_linemap (line_table, map);
1097 fprintf (stream, " included from location: %d",
1098 linemap_included_from (map));
1099 if (includer_map) {
1100 fprintf (stream, " (in ordinary map %d)",
1101 int (includer_map - line_table->info_ordinary.maps));
1102 }
1103 fprintf (stream, "\n");
ba4ad400
DM
1104
1105 /* Render the span of source lines that this "map" covers. */
620e594b 1106 for (location_t loc = MAP_START_LOCATION (map);
ba4ad400 1107 loc < end_location;
ebedc9a3 1108 loc += (1 << map->m_range_bits) )
ba4ad400 1109 {
ebedc9a3
DM
1110 gcc_assert (pure_location_p (line_table, loc) );
1111
ba4ad400
DM
1112 expanded_location exploc
1113 = linemap_expand_location (line_table, map, loc);
1114
01512446 1115 if (exploc.column == 0)
ba4ad400
DM
1116 {
1117 /* Beginning of a new source line: draw the line. */
1118
7761dfbe
DM
1119 char_span line_text = location_get_source_line (exploc.file,
1120 exploc.line);
ba4ad400
DM
1121 if (!line_text)
1122 break;
1123 fprintf (stream,
1124 "%s:%3i|loc:%5i|%.*s\n",
1125 exploc.file, exploc.line,
1126 loc,
7761dfbe 1127 (int)line_text.length (), line_text.get_buffer ());
ba4ad400
DM
1128
1129 /* "loc" is at column 0, which means "the whole line".
1130 Render the locations *within* the line, by underlining
620e594b 1131 it, showing the location_t numeric values
ba4ad400 1132 at each column. */
7761dfbe
DM
1133 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1134 if (max_col > line_text.length ())
1135 max_col = line_text.length () + 1;
ba4ad400 1136
bc65bad2
MG
1137 int len_lnum = num_digits (exploc.line);
1138 if (len_lnum < 3)
1139 len_lnum = 3;
1140 int len_loc = num_digits (loc);
1141 if (len_loc < 5)
1142 len_loc = 5;
1143
1144 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
ba4ad400
DM
1145
1146 /* Thousands. */
1147 if (end_location > 999)
ebedc9a3 1148 write_digit_row (stream, indent, map, loc, max_col, 1000);
ba4ad400
DM
1149
1150 /* Hundreds. */
1151 if (end_location > 99)
ebedc9a3 1152 write_digit_row (stream, indent, map, loc, max_col, 100);
ba4ad400
DM
1153
1154 /* Tens. */
ebedc9a3 1155 write_digit_row (stream, indent, map, loc, max_col, 10);
ba4ad400
DM
1156
1157 /* Units. */
ebedc9a3 1158 write_digit_row (stream, indent, map, loc, max_col, 1);
ba4ad400
DM
1159 }
1160 }
1161 fprintf (stream, "\n");
1162 }
1163
1164 /* Visualize unallocated values. */
1165 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1166 line_table->highest_location,
1167 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1168
1169 /* Visualize the macro line_map instances, rendering the sources. */
1170 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1171 {
620e594b 1172 /* Each macro map that is allocated owns location_t values
ba4ad400
DM
1173 that are *lower* that the one before them.
1174 Hence it's meaningful to view them either in order of ascending
1175 source locations, or in order of ascending macro map index. */
620e594b
DM
1176 const bool ascending_location_ts = true;
1177 unsigned int idx = (ascending_location_ts
ba4ad400
DM
1178 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1179 : i);
0e50b624 1180 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
ba4ad400
DM
1181 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1182 idx,
1183 linemap_map_get_macro_name (map),
1184 MACRO_MAP_NUM_MACRO_TOKENS (map));
1185 dump_location_range (stream,
1186 map->start_location,
1187 (map->start_location
1188 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1189 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1190 "expansion point is location %i",
1191 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1192 fprintf (stream, " map->start_location: %u\n",
1193 map->start_location);
1194
1195 fprintf (stream, " macro_locations:\n");
1196 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1197 {
620e594b
DM
1198 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1199 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
ba4ad400
DM
1200
1201 /* linemap_add_macro_token encodes token numbers in an expansion
1202 by putting them after MAP_START_LOCATION. */
1203
1204 /* I'm typically seeing 4 uninitialized entries at the end of
1205 0xafafafaf.
1206 This appears to be due to macro.c:replace_args
1207 adding 2 extra args for padding tokens; presumably there may
1208 be a leading and/or trailing padding token injected,
1209 each for 2 more location slots.
620e594b 1210 This would explain there being up to 4 location_ts slots
ba4ad400
DM
1211 that may be uninitialized. */
1212
1213 fprintf (stream, " %u: %u, %u\n",
1214 i,
1215 x,
1216 y);
1217 if (x == y)
1218 {
1219 if (x < MAP_START_LOCATION (map))
a9c697b8
MS
1220 inform (x, "token %u has %<x-location == y-location == %u%>",
1221 i, x);
ba4ad400
DM
1222 else
1223 fprintf (stream,
1224 "x-location == y-location == %u encodes token # %u\n",
1225 x, x - MAP_START_LOCATION (map));
1226 }
1227 else
1228 {
a9c697b8
MS
1229 inform (x, "token %u has %<x-location == %u%>", i, x);
1230 inform (x, "token %u has %<y-location == %u%>", i, y);
ba4ad400
DM
1231 }
1232 }
1233 fprintf (stream, "\n");
1234 }
1235
620e594b 1236 /* It appears that MAX_LOCATION_T itself is never assigned to a
ba4ad400
DM
1237 macro map, presumably due to an off-by-one error somewhere
1238 between the logic in linemap_enter_macro and
1239 LINEMAPS_MACRO_LOWEST_LOCATION. */
620e594b
DM
1240 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1241 MAX_LOCATION_T,
1242 MAX_LOCATION_T + 1);
ba4ad400
DM
1243
1244 /* Visualize ad-hoc values. */
1245 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
620e594b 1246 MAX_LOCATION_T + 1, UINT_MAX);
ba4ad400 1247}
d9b950dd 1248
88fa5555
DM
1249/* string_concat's constructor. */
1250
1251string_concat::string_concat (int num, location_t *locs)
1252 : m_num (num)
1253{
1254 m_locs = ggc_vec_alloc <location_t> (num);
1255 for (int i = 0; i < num; i++)
1256 m_locs[i] = locs[i];
1257}
1258
1259/* string_concat_db's constructor. */
1260
1261string_concat_db::string_concat_db ()
1262{
1263 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1264}
1265
1266/* Record that a string concatenation occurred, covering NUM
1267 string literal tokens. LOCS is an array of size NUM, containing the
1268 locations of the tokens. A copy of LOCS is taken. */
1269
1270void
1271string_concat_db::record_string_concatenation (int num, location_t *locs)
1272{
1273 gcc_assert (num > 1);
1274 gcc_assert (locs);
1275
1276 location_t key_loc = get_key_loc (locs[0]);
1277
1278 string_concat *concat
1279 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1280 m_table->put (key_loc, concat);
1281}
1282
1283/* Determine if LOC was the location of the the initial token of a
1284 concatenation of string literal tokens.
1285 If so, *OUT_NUM is written to with the number of tokens, and
1286 *OUT_LOCS with the location of an array of locations of the
1287 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1288 storage owned by the string_concat_db.
1289 Otherwise, return false. */
1290
1291bool
1292string_concat_db::get_string_concatenation (location_t loc,
1293 int *out_num,
1294 location_t **out_locs)
1295{
1296 gcc_assert (out_num);
1297 gcc_assert (out_locs);
1298
1299 location_t key_loc = get_key_loc (loc);
1300
1301 string_concat **concat = m_table->get (key_loc);
1302 if (!concat)
1303 return false;
1304
1305 *out_num = (*concat)->m_num;
1306 *out_locs =(*concat)->m_locs;
1307 return true;
1308}
1309
1310/* Internal function. Canonicalize LOC into a form suitable for
1311 use as a key within the database, stripping away macro expansion,
1312 ad-hoc information, and range information, using the location of
1313 the start of LOC within an ordinary linemap. */
1314
1315location_t
1316string_concat_db::get_key_loc (location_t loc)
1317{
1318 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1319 NULL);
1320
1321 loc = get_range_from_loc (line_table, loc).m_start;
1322
1323 return loc;
1324}
1325
1326/* Helper class for use within get_substring_ranges_for_loc.
1327 An vec of cpp_string with responsibility for releasing all of the
1328 str->text for each str in the vector. */
1329
1330class auto_cpp_string_vec : public auto_vec <cpp_string>
1331{
1332 public:
1333 auto_cpp_string_vec (int alloc)
1334 : auto_vec <cpp_string> (alloc) {}
1335
1336 ~auto_cpp_string_vec ()
1337 {
1338 /* Clean up the copies within this vec. */
1339 int i;
1340 cpp_string *str;
1341 FOR_EACH_VEC_ELT (*this, i, str)
1342 free (const_cast <unsigned char *> (str->text));
1343 }
1344};
1345
1346/* Attempt to populate RANGES with source location information on the
1347 individual characters within the string literal found at STRLOC.
1348 If CONCATS is non-NULL, then any string literals that the token at
1349 STRLOC was concatenated with are also added to RANGES.
1350
1351 Return NULL if successful, or an error message if any errors occurred (in
1352 which case RANGES may be only partially populated and should not
1353 be used).
1354
1355 This is implemented by re-parsing the relevant source line(s). */
1356
1357static const char *
1358get_substring_ranges_for_loc (cpp_reader *pfile,
1359 string_concat_db *concats,
1360 location_t strloc,
1361 enum cpp_ttype type,
1362 cpp_substring_ranges &ranges)
1363{
1364 gcc_assert (pfile);
1365
1366 if (strloc == UNKNOWN_LOCATION)
1367 return "unknown location";
1368
67b5d0b2
DM
1369 /* Reparsing the strings requires accurate location information.
1370 If -ftrack-macro-expansion has been overridden from its default
1371 of 2, then we might have a location of a macro expansion point,
1372 rather than the location of the literal itself.
1373 Avoid this by requiring that we have full macro expansion tracking
1374 for substring locations to be available. */
1375 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1376 return "track_macro_expansion != 2";
1377
94f597df
DM
1378 /* If #line or # 44 "file"-style directives are present, then there's
1379 no guarantee that the line numbers we have can be used to locate
1380 the strings. For example, we might have a .i file with # directives
1381 pointing back to lines within a .c file, but the .c file might
1382 have been edited since the .i file was created.
1383 In such a case, the safest course is to disable on-demand substring
1384 locations. */
1385 if (line_table->seen_line_directive)
1386 return "seen line directive";
1387
88fa5555
DM
1388 /* If string concatenation has occurred at STRLOC, get the locations
1389 of all of the literal tokens making up the compound string.
1390 Otherwise, just use STRLOC. */
1391 int num_locs = 1;
1392 location_t *strlocs = &strloc;
1393 if (concats)
1394 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1395
1396 auto_cpp_string_vec strs (num_locs);
1397 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1398 for (int i = 0; i < num_locs; i++)
1399 {
1400 /* Get range of strloc. We will use it to locate the start and finish
1401 of the literal token within the line. */
1402 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1403
1404 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
0d48e877
DM
1405 {
1406 /* If the string token was within a macro expansion, then we can
1407 cope with it for the simple case where we have a single token.
1408 Otherwise, bail out. */
1409 if (src_range.m_start != src_range.m_finish)
1410 return "macro expansion";
1411 }
1412 else
1413 {
1414 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1415 /* If so, we can't reliably determine where the token started within
1416 its line. */
1417 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1418
1419 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1420 /* If so, we can't reliably determine where the token finished
1421 within its line. */
1422 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1423 }
88fa5555
DM
1424
1425 expanded_location start
0d48e877
DM
1426 = expand_location_to_spelling_point (src_range.m_start,
1427 LOCATION_ASPECT_START);
88fa5555 1428 expanded_location finish
0d48e877
DM
1429 = expand_location_to_spelling_point (src_range.m_finish,
1430 LOCATION_ASPECT_FINISH);
88fa5555
DM
1431 if (start.file != finish.file)
1432 return "range endpoints are in different files";
1433 if (start.line != finish.line)
1434 return "range endpoints are on different lines";
1435 if (start.column > finish.column)
1436 return "range endpoints are reversed";
1437
7761dfbe
DM
1438 char_span line = location_get_source_line (start.file, start.line);
1439 if (!line)
88fa5555
DM
1440 return "unable to read source line";
1441
1442 /* Determine the location of the literal (including quotes
1443 and leading prefix chars, such as the 'u' in a u""
1444 token). */
7761dfbe 1445 size_t literal_length = finish.column - start.column + 1;
88fa5555 1446
7cfa044d 1447 /* Ensure that we don't crash if we got the wrong location. */
7761dfbe 1448 if (line.length () < (start.column - 1 + literal_length))
7cfa044d
DM
1449 return "line is not wide enough";
1450
7761dfbe
DM
1451 char_span literal = line.subspan (start.column - 1, literal_length);
1452
88fa5555
DM
1453 cpp_string from;
1454 from.len = literal_length;
1455 /* Make a copy of the literal, to avoid having to rely on
1456 the lifetime of the copy of the line within the cache.
1457 This will be released by the auto_cpp_string_vec dtor. */
7761dfbe 1458 from.text = (unsigned char *)literal.xstrdup ();
88fa5555
DM
1459 strs.safe_push (from);
1460
1461 /* For very long lines, a new linemap could have started
1462 halfway through the token.
1463 Ensure that the loc_reader uses the linemap of the
1464 *end* of the token for its start location. */
05d57d65
DM
1465 const line_map_ordinary *start_ord_map;
1466 linemap_resolve_location (line_table, src_range.m_start,
1467 LRK_SPELLING_LOCATION, &start_ord_map);
88fa5555
DM
1468 const line_map_ordinary *final_ord_map;
1469 linemap_resolve_location (line_table, src_range.m_finish,
05d57d65 1470 LRK_SPELLING_LOCATION, &final_ord_map);
3d0a5393
DM
1471 if (start_ord_map == NULL || final_ord_map == NULL)
1472 return "failed to get ordinary maps";
05d57d65
DM
1473 /* Bulletproofing. We ought to only have different ordinary maps
1474 for start vs finish due to line-length jumps. */
1475 if (start_ord_map != final_ord_map
1476 && start_ord_map->to_file != final_ord_map->to_file)
ef33afeb
DM
1477 return "start and finish are spelled in different ordinary maps";
1478 /* The file from linemap_resolve_location ought to match that from
1479 expand_location_to_spelling_point. */
1480 if (start_ord_map->to_file != start.file)
1481 return "mismatching file after resolving linemap";
1482
88fa5555
DM
1483 location_t start_loc
1484 = linemap_position_for_line_and_column (line_table, final_ord_map,
1485 start.line, start.column);
1486
1487 cpp_string_location_reader loc_reader (start_loc, line_table);
1488 loc_readers.safe_push (loc_reader);
1489 }
1490
1491 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1492 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1493 loc_readers.address (),
1494 num_locs, &ranges, type);
1495 if (err)
1496 return err;
1497
1498 /* Success: "ranges" should now contain information on the string. */
1499 return NULL;
1500}
1501
65e736c0
DM
1502/* Attempt to populate *OUT_LOC with source location information on the
1503 given characters within the string literal found at STRLOC.
1504 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1505 character set.
1506
1507 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1508 and string literal "012345\n789"
1509 *OUT_LOC is written to with:
1510 "012345\n789"
1511 ~^~~~~
1512
88fa5555
DM
1513 If CONCATS is non-NULL, then any string literals that the token at
1514 STRLOC was concatenated with are also considered.
1515
1516 This is implemented by re-parsing the relevant source line(s).
1517
1518 Return NULL if successful, or an error message if any errors occurred.
1519 Error messages are intended for GCC developers (to help debugging) rather
1520 than for end-users. */
1521
1522const char *
620e594b
DM
1523get_location_within_string (cpp_reader *pfile,
1524 string_concat_db *concats,
1525 location_t strloc,
1526 enum cpp_ttype type,
1527 int caret_idx, int start_idx, int end_idx,
1528 location_t *out_loc)
65e736c0
DM
1529{
1530 gcc_checking_assert (caret_idx >= 0);
88fa5555
DM
1531 gcc_checking_assert (start_idx >= 0);
1532 gcc_checking_assert (end_idx >= 0);
65e736c0 1533 gcc_assert (out_loc);
88fa5555
DM
1534
1535 cpp_substring_ranges ranges;
1536 const char *err
1537 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1538 if (err)
1539 return err;
1540
65e736c0
DM
1541 if (caret_idx >= ranges.get_num_ranges ())
1542 return "caret_idx out of range";
88fa5555
DM
1543 if (start_idx >= ranges.get_num_ranges ())
1544 return "start_idx out of range";
1545 if (end_idx >= ranges.get_num_ranges ())
1546 return "end_idx out of range";
1547
65e736c0
DM
1548 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1549 ranges.get_range (start_idx).m_start,
1550 ranges.get_range (end_idx).m_finish);
1551 return NULL;
1552}
1553
0e06d2b3
DM
1554#if CHECKING_P
1555
1556namespace selftest {
1557
1558/* Selftests of location handling. */
1559
65e736c0
DM
1560/* Attempt to populate *OUT_RANGE with source location information on the
1561 given character within the string literal found at STRLOC.
1562 CHAR_IDX refers to an offset within the execution character set.
1563 If CONCATS is non-NULL, then any string literals that the token at
1564 STRLOC was concatenated with are also considered.
1565
1566 This is implemented by re-parsing the relevant source line(s).
1567
1568 Return NULL if successful, or an error message if any errors occurred.
1569 Error messages are intended for GCC developers (to help debugging) rather
1570 than for end-users. */
1571
1572static const char *
1573get_source_range_for_char (cpp_reader *pfile,
1574 string_concat_db *concats,
1575 location_t strloc,
1576 enum cpp_ttype type,
1577 int char_idx,
1578 source_range *out_range)
1579{
1580 gcc_checking_assert (char_idx >= 0);
1581 gcc_assert (out_range);
1582
1583 cpp_substring_ranges ranges;
1584 const char *err
1585 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1586 if (err)
1587 return err;
1588
1589 if (char_idx >= ranges.get_num_ranges ())
1590 return "char_idx out of range";
1591
1592 *out_range = ranges.get_range (char_idx);
88fa5555
DM
1593 return NULL;
1594}
1595
65e736c0 1596/* As get_source_range_for_char, but write to *OUT the number
88fa5555
DM
1597 of ranges that are available. */
1598
0e06d2b3 1599static const char *
88fa5555
DM
1600get_num_source_ranges_for_substring (cpp_reader *pfile,
1601 string_concat_db *concats,
1602 location_t strloc,
1603 enum cpp_ttype type,
1604 int *out)
1605{
1606 gcc_assert (out);
1607
1608 cpp_substring_ranges ranges;
1609 const char *err
1610 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1611
1612 if (err)
1613 return err;
1614
1615 *out = ranges.get_num_ranges ();
1616 return NULL;
1617}
1618
d9b950dd
DM
1619/* Selftests of location handling. */
1620
082284da
DM
1621/* Verify that compare() on linenum_type handles comparisons over the full
1622 range of the type. */
1623
1624static void
1625test_linenum_comparisons ()
1626{
1627 linenum_type min_line (0);
1628 linenum_type max_line (0xffffffff);
1629 ASSERT_EQ (0, compare (min_line, min_line));
1630 ASSERT_EQ (0, compare (max_line, max_line));
1631
1632 ASSERT_GT (compare (max_line, min_line), 0);
1633 ASSERT_LT (compare (min_line, max_line), 0);
1634}
1635
741d3be5
DM
1636/* Helper function for verifying location data: when location_t
1637 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1638 as having column 0. */
1639
1640static bool
1641should_have_column_data_p (location_t loc)
1642{
1643 if (IS_ADHOC_LOC (loc))
1644 loc = get_location_from_adhoc_loc (line_table, loc);
1645 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1646 return false;
1647 return true;
1648}
1649
1650/* Selftest for should_have_column_data_p. */
1651
1652static void
1653test_should_have_column_data_p ()
1654{
1655 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1656 ASSERT_TRUE
1657 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1658 ASSERT_FALSE
1659 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1660}
1661
d9b950dd
DM
1662/* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1663 on LOC. */
1664
1665static void
1666assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1667 location_t loc)
1668{
1669 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1670 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
741d3be5
DM
1671 /* If location_t values are sufficiently high, then column numbers
1672 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1673 When close to the threshold, column numbers *may* be present: if
1674 the final linemap before the threshold contains a line that straddles
1675 the threshold, locations in that line have column information. */
1676 if (should_have_column_data_p (loc))
1677 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1678}
1679
f87e22c5
DM
1680/* Various selftests involve constructing a line table and one or more
1681 line maps within it.
741d3be5
DM
1682
1683 For maximum test coverage we want to run these tests with a variety
1684 of situations:
1685 - line_table->default_range_bits: some frontends use a non-zero value
1686 and others use zero
1687 - the fallback modes within line-map.c: there are various threshold
620e594b 1688 values for location_t beyond line-map.c changes
741d3be5
DM
1689 behavior (disabling of the range-packing optimization, disabling
1690 of column-tracking). We can exercise these by starting the line_table
1691 at interesting values at or near these thresholds.
1692
1693 The following struct describes a particular case within our test
1694 matrix. */
1695
6c1dae73 1696class line_table_case
741d3be5 1697{
6c1dae73 1698public:
741d3be5
DM
1699 line_table_case (int default_range_bits, int base_location)
1700 : m_default_range_bits (default_range_bits),
1701 m_base_location (base_location)
1702 {}
1703
1704 int m_default_range_bits;
1705 int m_base_location;
1706};
1707
f87e22c5
DM
1708/* Constructor. Store the old value of line_table, and create a new
1709 one, using sane defaults. */
741d3be5 1710
f87e22c5 1711line_table_test::line_table_test ()
741d3be5 1712{
f87e22c5
DM
1713 gcc_assert (saved_line_table == NULL);
1714 saved_line_table = line_table;
1715 line_table = ggc_alloc<line_maps> ();
1716 linemap_init (line_table, BUILTINS_LOCATION);
1717 gcc_assert (saved_line_table->reallocator);
1718 line_table->reallocator = saved_line_table->reallocator;
1719 gcc_assert (saved_line_table->round_alloc_size);
1720 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1721 line_table->default_range_bits = 0;
1722}
741d3be5
DM
1723
1724/* Constructor. Store the old value of line_table, and create a new
1725 one, using the sitation described in CASE_. */
1726
f87e22c5 1727line_table_test::line_table_test (const line_table_case &case_)
741d3be5 1728{
f87e22c5
DM
1729 gcc_assert (saved_line_table == NULL);
1730 saved_line_table = line_table;
741d3be5
DM
1731 line_table = ggc_alloc<line_maps> ();
1732 linemap_init (line_table, BUILTINS_LOCATION);
f87e22c5
DM
1733 gcc_assert (saved_line_table->reallocator);
1734 line_table->reallocator = saved_line_table->reallocator;
1735 gcc_assert (saved_line_table->round_alloc_size);
1736 line_table->round_alloc_size = saved_line_table->round_alloc_size;
741d3be5
DM
1737 line_table->default_range_bits = case_.m_default_range_bits;
1738 if (case_.m_base_location)
1739 {
1740 line_table->highest_location = case_.m_base_location;
1741 line_table->highest_line = case_.m_base_location;
1742 }
1743}
1744
1745/* Destructor. Restore the old value of line_table. */
1746
f87e22c5 1747line_table_test::~line_table_test ()
741d3be5 1748{
f87e22c5
DM
1749 gcc_assert (saved_line_table != NULL);
1750 line_table = saved_line_table;
1751 saved_line_table = NULL;
d9b950dd
DM
1752}
1753
1754/* Verify basic operation of ordinary linemaps. */
1755
1756static void
741d3be5 1757test_accessing_ordinary_linemaps (const line_table_case &case_)
d9b950dd 1758{
f87e22c5 1759 line_table_test ltt (case_);
741d3be5 1760
d9b950dd
DM
1761 /* Build a simple linemap describing some locations. */
1762 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1763
1764 linemap_line_start (line_table, 1, 100);
1765 location_t loc_a = linemap_position_for_column (line_table, 1);
1766 location_t loc_b = linemap_position_for_column (line_table, 23);
1767
1768 linemap_line_start (line_table, 2, 100);
1769 location_t loc_c = linemap_position_for_column (line_table, 1);
1770 location_t loc_d = linemap_position_for_column (line_table, 17);
1771
1772 /* Example of a very long line. */
1773 linemap_line_start (line_table, 3, 2000);
1774 location_t loc_e = linemap_position_for_column (line_table, 700);
1775
5ccf1d8d
DM
1776 /* Transitioning back to a short line. */
1777 linemap_line_start (line_table, 4, 0);
1778 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1779
1780 if (should_have_column_data_p (loc_back_to_short))
1781 {
1782 /* Verify that we switched to short lines in the linemap. */
1783 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1784 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1785 }
1786
b9f4757f
DM
1787 /* Example of a line that will eventually be seen to be longer
1788 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1789 below that. */
1790 linemap_line_start (line_table, 5, 2000);
1791
1792 location_t loc_start_of_very_long_line
1793 = linemap_position_for_column (line_table, 2000);
1794 location_t loc_too_wide
1795 = linemap_position_for_column (line_table, 4097);
1796 location_t loc_too_wide_2
1797 = linemap_position_for_column (line_table, 4098);
1798
1799 /* ...and back to a sane line length. */
1800 linemap_line_start (line_table, 6, 100);
1801 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1802
d9b950dd
DM
1803 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1804
1805 /* Multiple files. */
1806 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1807 linemap_line_start (line_table, 1, 200);
1808 location_t loc_f = linemap_position_for_column (line_table, 150);
1809 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1810
1811 /* Verify that we can recover the location info. */
1812 assert_loceq ("foo.c", 1, 1, loc_a);
1813 assert_loceq ("foo.c", 1, 23, loc_b);
1814 assert_loceq ("foo.c", 2, 1, loc_c);
1815 assert_loceq ("foo.c", 2, 17, loc_d);
1816 assert_loceq ("foo.c", 3, 700, loc_e);
5ccf1d8d 1817 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
b9f4757f
DM
1818
1819 /* In the very wide line, the initial location should be fully tracked. */
1820 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1821 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1822 be disabled. */
1823 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1824 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1825 /*...and column-tracking should be re-enabled for subsequent lines. */
1826 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1827
d9b950dd
DM
1828 assert_loceq ("bar.c", 1, 150, loc_f);
1829
1830 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
a01fc549
DM
1831 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1832
1833 /* Verify using make_location to build a range, and extracting data
1834 back from it. */
1835 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1836 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1837 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1838 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1839 ASSERT_EQ (loc_b, src_range.m_start);
1840 ASSERT_EQ (loc_d, src_range.m_finish);
d9b950dd
DM
1841}
1842
1843/* Verify various properties of UNKNOWN_LOCATION. */
1844
1845static void
1846test_unknown_location ()
1847{
1848 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1849 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1850 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1851}
1852
1853/* Verify various properties of BUILTINS_LOCATION. */
1854
1855static void
1856test_builtins ()
1857{
10d2fc23 1858 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
d9b950dd
DM
1859 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1860}
1861
9144eabb 1862/* Regression test for make_location.
cfa435e1
DM
1863 Ensure that we use pure locations for the start/finish of the range,
1864 rather than storing a packed or ad-hoc range as the start/finish. */
9144eabb
DM
1865
1866static void
1867test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1868{
1869 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1870 with C++ frontend.
1871 ....................0000000001111111111222.
1872 ....................1234567890123456789012. */
1873 const char *content = " r += !aaa == bbb;\n";
1874 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1875 line_table_test ltt (case_);
1876 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1877
1878 const location_t c11 = linemap_position_for_column (line_table, 11);
1879 const location_t c12 = linemap_position_for_column (line_table, 12);
1880 const location_t c13 = linemap_position_for_column (line_table, 13);
1881 const location_t c14 = linemap_position_for_column (line_table, 14);
1882 const location_t c21 = linemap_position_for_column (line_table, 21);
1883
1884 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1885 return;
1886
1887 /* Use column 13 for the caret location, arbitrarily, to verify that we
1888 handle start != caret. */
1889 const location_t aaa = make_location (c13, c12, c14);
1890 ASSERT_EQ (c13, get_pure_location (aaa));
1891 ASSERT_EQ (c12, get_start (aaa));
1892 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1893 ASSERT_EQ (c14, get_finish (aaa));
1894 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1895
1896 /* Make a location using a location with a range as the start-point. */
1897 const location_t not_aaa = make_location (c11, aaa, c14);
1898 ASSERT_EQ (c11, get_pure_location (not_aaa));
1899 /* It should use the start location of the range, not store the range
1900 itself. */
1901 ASSERT_EQ (c12, get_start (not_aaa));
1902 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1903 ASSERT_EQ (c14, get_finish (not_aaa));
1904 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1905
1906 /* Similarly, make a location with a range as the end-point. */
1907 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1908 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1909 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1910 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1911 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1912 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1913 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1914 /* It should use the finish location of the range, not store the range
1915 itself. */
1916 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1917 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1918 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1919 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1920 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1921}
1922
d9b950dd
DM
1923/* Verify reading of input files (e.g. for caret-based diagnostics). */
1924
1925static void
1926test_reading_source_line ()
1927{
85ecd05c 1928 /* Create a tempfile and write some text to it. */
741d3be5
DM
1929 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1930 "01234567890123456789\n"
1931 "This is the test text\n"
1adae327 1932 "This is the 3rd line");
85ecd05c
DM
1933
1934 /* Read back a specific line from the tempfile. */
7761dfbe
DM
1935 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1936 ASSERT_TRUE (source_line);
1937 ASSERT_TRUE (source_line.get_buffer () != NULL);
1938 ASSERT_EQ (20, source_line.length ());
1adae327 1939 ASSERT_TRUE (!strncmp ("This is the 3rd line",
7761dfbe 1940 source_line.get_buffer (), source_line.length ()));
1adae327 1941
7761dfbe
DM
1942 source_line = location_get_source_line (tmp.get_filename (), 2);
1943 ASSERT_TRUE (source_line);
1944 ASSERT_TRUE (source_line.get_buffer () != NULL);
1945 ASSERT_EQ (21, source_line.length ());
1adae327 1946 ASSERT_TRUE (!strncmp ("This is the test text",
7761dfbe 1947 source_line.get_buffer (), source_line.length ()));
85ecd05c 1948
7761dfbe
DM
1949 source_line = location_get_source_line (tmp.get_filename (), 4);
1950 ASSERT_FALSE (source_line);
1951 ASSERT_TRUE (source_line.get_buffer () == NULL);
d9b950dd
DM
1952}
1953
741d3be5
DM
1954/* Tests of lexing. */
1955
1956/* Verify that token TOK from PARSER has cpp_token_as_text
1957 equal to EXPECTED_TEXT. */
1958
1959#define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1960 SELFTEST_BEGIN_STMT \
1961 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1962 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1963 SELFTEST_END_STMT
1964
1965/* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1966 and ranges from EXP_START_COL to EXP_FINISH_COL.
1967 Use LOC as the effective location of the selftest. */
1968
1969static void
1970assert_token_loc_eq (const location &loc,
1971 const cpp_token *tok,
1972 const char *exp_filename, int exp_linenum,
1973 int exp_start_col, int exp_finish_col)
1974{
1975 location_t tok_loc = tok->src_loc;
1976 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1977 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1978
1979 /* If location_t values are sufficiently high, then column numbers
1980 will be unavailable. */
1981 if (!should_have_column_data_p (tok_loc))
1982 return;
1983
1984 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1985 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1986 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1987 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1988}
1989
1990/* Use assert_token_loc_eq to verify the TOK->src_loc, using
1991 SELFTEST_LOCATION as the effective location of the selftest. */
1992
1993#define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1994 EXP_START_COL, EXP_FINISH_COL) \
1995 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1996 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1997
1998/* Test of lexing a file using libcpp, verifying tokens and their
1999 location information. */
2000
2001static void
2002test_lexer (const line_table_case &case_)
2003{
2004 /* Create a tempfile and write some text to it. */
2005 const char *content =
2006 /*00000000011111111112222222222333333.3333444444444.455555555556
2007 12345678901234567890123456789012345.6789012345678.901234567890. */
2008 ("test_name /* c-style comment */\n"
2009 " \"test literal\"\n"
2010 " // test c++-style comment\n"
2011 " 42\n");
2012 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2013
f87e22c5 2014 line_table_test ltt (case_);
741d3be5
DM
2015
2016 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2017
2018 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2019 ASSERT_NE (fname, NULL);
2020
2021 /* Verify that we get the expected tokens back, with the correct
2022 location information. */
2023
2024 location_t loc;
2025 const cpp_token *tok;
2026 tok = cpp_get_token_with_location (parser, &loc);
2027 ASSERT_NE (tok, NULL);
2028 ASSERT_EQ (tok->type, CPP_NAME);
2029 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2030 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2031
2032 tok = cpp_get_token_with_location (parser, &loc);
2033 ASSERT_NE (tok, NULL);
2034 ASSERT_EQ (tok->type, CPP_STRING);
2035 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2036 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2037
2038 tok = cpp_get_token_with_location (parser, &loc);
2039 ASSERT_NE (tok, NULL);
2040 ASSERT_EQ (tok->type, CPP_NUMBER);
2041 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2042 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2043
2044 tok = cpp_get_token_with_location (parser, &loc);
2045 ASSERT_NE (tok, NULL);
2046 ASSERT_EQ (tok->type, CPP_EOF);
2047
2048 cpp_finish (parser, NULL);
2049 cpp_destroy (parser);
2050}
2051
88fa5555
DM
2052/* Forward decls. */
2053
99b1c316 2054class lexer_test;
88fa5555
DM
2055class lexer_test_options;
2056
2057/* A class for specifying options of a lexer_test.
2058 The "apply" vfunc is called during the lexer_test constructor. */
2059
2060class lexer_test_options
2061{
2062 public:
2063 virtual void apply (lexer_test &) = 0;
2064};
2065
f5ea989d
DM
2066/* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2067 in its dtor.
2068
2069 This is needed by struct lexer_test to ensure that the cleanup of the
2070 cpp_reader happens *after* the cleanup of the temp_source_file. */
2071
2072class cpp_reader_ptr
2073{
2074 public:
2075 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2076
2077 ~cpp_reader_ptr ()
2078 {
2079 cpp_finish (m_ptr, NULL);
2080 cpp_destroy (m_ptr);
2081 }
2082
2083 operator cpp_reader * () const { return m_ptr; }
2084
2085 private:
2086 cpp_reader *m_ptr;
2087};
2088
88fa5555
DM
2089/* A struct for writing lexer tests. */
2090
6c1dae73 2091class lexer_test
88fa5555 2092{
6c1dae73 2093public:
88fa5555
DM
2094 lexer_test (const line_table_case &case_, const char *content,
2095 lexer_test_options *options);
2096 ~lexer_test ();
2097
2098 const cpp_token *get_token ();
2099
f5ea989d
DM
2100 /* The ordering of these fields matters.
2101 The line_table_test must be first, since the cpp_reader_ptr
2102 uses it.
2103 The cpp_reader must be cleaned up *after* the temp_source_file
2104 since the filenames in input.c's input cache are owned by the
2105 cpp_reader; in particular, when ~temp_source_file evicts the
2106 filename the filenames must still be alive. */
f87e22c5 2107 line_table_test m_ltt;
f5ea989d
DM
2108 cpp_reader_ptr m_parser;
2109 temp_source_file m_tempfile;
88fa5555 2110 string_concat_db m_concats;
a3998c2f 2111 bool m_implicitly_expect_EOF;
88fa5555
DM
2112};
2113
2114/* Use an EBCDIC encoding for the execution charset, specifically
2115 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2116
2117 This exercises iconv integration within libcpp.
2118 Not every build of iconv supports the given charset,
2119 so we need to flag this error and handle it gracefully. */
2120
2121class ebcdic_execution_charset : public lexer_test_options
2122{
2123 public:
2124 ebcdic_execution_charset () : m_num_iconv_errors (0)
2125 {
2126 gcc_assert (s_singleton == NULL);
2127 s_singleton = this;
2128 }
2129 ~ebcdic_execution_charset ()
2130 {
2131 gcc_assert (s_singleton == this);
2132 s_singleton = NULL;
2133 }
2134
2135 void apply (lexer_test &test) FINAL OVERRIDE
2136 {
2137 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2138 cpp_opts->narrow_charset = "IBM1047";
2139
2140 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
c24300ba 2141 callbacks->diagnostic = on_diagnostic;
88fa5555
DM
2142 }
2143
c24300ba
DM
2144 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2145 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2146 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2147 rich_location *richloc ATTRIBUTE_UNUSED,
2148 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
88fa5555
DM
2149 ATTRIBUTE_FPTR_PRINTF(5,0)
2150 {
2151 gcc_assert (s_singleton);
a7085816
JJ
2152 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2153 const char *msg = "conversion from %s to %s not supported by iconv";
2154#ifdef ENABLE_NLS
2155 msg = dgettext ("cpplib", msg);
2156#endif
88fa5555
DM
2157 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2158 when the local iconv build doesn't support the conversion. */
a7085816 2159 if (strcmp (msgid, msg) == 0)
88fa5555
DM
2160 {
2161 s_singleton->m_num_iconv_errors++;
2162 return true;
2163 }
2164
2165 /* Otherwise, we have an unexpected error. */
2166 abort ();
2167 }
2168
2169 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2170
2171 private:
2172 static ebcdic_execution_charset *s_singleton;
2173 int m_num_iconv_errors;
2174};
2175
2176ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2177
c24300ba 2178/* A lexer_test_options subclass that records a list of diagnostic
a3998c2f
DM
2179 messages emitted by the lexer. */
2180
c24300ba 2181class lexer_diagnostic_sink : public lexer_test_options
a3998c2f
DM
2182{
2183 public:
c24300ba 2184 lexer_diagnostic_sink ()
a3998c2f
DM
2185 {
2186 gcc_assert (s_singleton == NULL);
2187 s_singleton = this;
2188 }
c24300ba 2189 ~lexer_diagnostic_sink ()
a3998c2f
DM
2190 {
2191 gcc_assert (s_singleton == this);
2192 s_singleton = NULL;
2193
2194 int i;
2195 char *str;
c24300ba 2196 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
a3998c2f
DM
2197 free (str);
2198 }
2199
2200 void apply (lexer_test &test) FINAL OVERRIDE
2201 {
2202 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
c24300ba 2203 callbacks->diagnostic = on_diagnostic;
a3998c2f
DM
2204 }
2205
c24300ba
DM
2206 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2207 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2208 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2209 rich_location *richloc ATTRIBUTE_UNUSED,
2210 const char *msgid, va_list *ap)
a3998c2f
DM
2211 ATTRIBUTE_FPTR_PRINTF(5,0)
2212 {
2213 char *msg = xvasprintf (msgid, *ap);
c24300ba 2214 s_singleton->m_diagnostics.safe_push (msg);
a3998c2f
DM
2215 return true;
2216 }
2217
c24300ba 2218 auto_vec<char *> m_diagnostics;
a3998c2f
DM
2219
2220 private:
c24300ba 2221 static lexer_diagnostic_sink *s_singleton;
a3998c2f
DM
2222};
2223
c24300ba 2224lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
a3998c2f 2225
88fa5555
DM
2226/* Constructor. Override line_table with a new instance based on CASE_,
2227 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2228 start parsing the tempfile. */
2229
2230lexer_test::lexer_test (const line_table_case &case_, const char *content,
f5ea989d
DM
2231 lexer_test_options *options)
2232: m_ltt (case_),
2233 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
88fa5555
DM
2234 /* Create a tempfile and write the text to it. */
2235 m_tempfile (SELFTEST_LOCATION, ".c", content),
a3998c2f
DM
2236 m_concats (),
2237 m_implicitly_expect_EOF (true)
88fa5555
DM
2238{
2239 if (options)
2240 options->apply (*this);
2241
2242 cpp_init_iconv (m_parser);
2243
2244 /* Parse the file. */
2245 const char *fname = cpp_read_main_file (m_parser,
2246 m_tempfile.get_filename ());
2247 ASSERT_NE (fname, NULL);
2248}
2249
a3998c2f 2250/* Destructor. By default, verify that the next token in m_parser is EOF. */
88fa5555
DM
2251
2252lexer_test::~lexer_test ()
2253{
2254 location_t loc;
2255 const cpp_token *tok;
2256
a3998c2f
DM
2257 if (m_implicitly_expect_EOF)
2258 {
2259 tok = cpp_get_token_with_location (m_parser, &loc);
2260 ASSERT_NE (tok, NULL);
2261 ASSERT_EQ (tok->type, CPP_EOF);
2262 }
88fa5555
DM
2263}
2264
2265/* Get the next token from m_parser. */
2266
2267const cpp_token *
2268lexer_test::get_token ()
2269{
2270 location_t loc;
2271 const cpp_token *tok;
2272
2273 tok = cpp_get_token_with_location (m_parser, &loc);
2274 ASSERT_NE (tok, NULL);
2275 return tok;
2276}
2277
2278/* Verify that locations within string literals are correctly handled. */
2279
2280/* Verify get_source_range_for_substring for token(s) at STRLOC,
2281 using the string concatenation database for TEST.
2282
2283 Assert that the character at index IDX is on EXPECTED_LINE,
2284 and that it begins at column EXPECTED_START_COL and ends at
2285 EXPECTED_FINISH_COL (unless the locations are beyond
2286 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2287 columns). */
2288
2289static void
2290assert_char_at_range (const location &loc,
2291 lexer_test& test,
2292 location_t strloc, enum cpp_ttype type, int idx,
2293 int expected_line, int expected_start_col,
2294 int expected_finish_col)
2295{
2296 cpp_reader *pfile = test.m_parser;
2297 string_concat_db *concats = &test.m_concats;
2298
a954833d 2299 source_range actual_range = source_range();
88fa5555 2300 const char *err
65e736c0
DM
2301 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2302 &actual_range);
88fa5555
DM
2303 if (should_have_column_data_p (strloc))
2304 ASSERT_EQ_AT (loc, NULL, err);
2305 else
2306 {
2307 ASSERT_STREQ_AT (loc,
2308 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2309 err);
2310 return;
2311 }
2312
2313 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2314 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2315 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2316 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2317
2318 if (should_have_column_data_p (actual_range.m_start))
2319 {
2320 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2321 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2322 }
2323 if (should_have_column_data_p (actual_range.m_finish))
2324 {
2325 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2326 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2327 }
2328}
2329
2330/* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2331 the effective location of any errors. */
2332
2333#define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2334 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2335 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2336 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2337 (EXPECTED_FINISH_COL))
2338
2339/* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2340 using the string concatenation database for TEST.
2341
2342 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2343
2344static void
2345assert_num_substring_ranges (const location &loc,
2346 lexer_test& test,
2347 location_t strloc,
2348 enum cpp_ttype type,
2349 int expected_num_ranges)
2350{
2351 cpp_reader *pfile = test.m_parser;
2352 string_concat_db *concats = &test.m_concats;
2353
0e06d2b3 2354 int actual_num_ranges = -1;
88fa5555
DM
2355 const char *err
2356 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2357 &actual_num_ranges);
2358 if (should_have_column_data_p (strloc))
2359 ASSERT_EQ_AT (loc, NULL, err);
2360 else
2361 {
2362 ASSERT_STREQ_AT (loc,
2363 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2364 err);
2365 return;
2366 }
2367 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2368}
2369
2370/* Macro for calling assert_num_substring_ranges, supplying
2371 SELFTEST_LOCATION for the effective location of any errors. */
2372
2373#define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2374 EXPECTED_NUM_RANGES) \
2375 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2376 (TYPE), (EXPECTED_NUM_RANGES))
2377
2378
2379/* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2380 returns an error (using the string concatenation database for TEST). */
2381
2382static void
2383assert_has_no_substring_ranges (const location &loc,
2384 lexer_test& test,
2385 location_t strloc,
2386 enum cpp_ttype type,
2387 const char *expected_err)
2388{
2389 cpp_reader *pfile = test.m_parser;
2390 string_concat_db *concats = &test.m_concats;
2391 cpp_substring_ranges ranges;
2392 const char *actual_err
2393 = get_substring_ranges_for_loc (pfile, concats, strloc,
2394 type, ranges);
2395 if (should_have_column_data_p (strloc))
2396 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2397 else
2398 ASSERT_STREQ_AT (loc,
2399 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2400 actual_err);
2401}
2402
2403#define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2404 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2405 (STRLOC), (TYPE), (ERR))
2406
2407/* Lex a simple string literal. Verify the substring location data, before
2408 and after running cpp_interpret_string on it. */
2409
2410static void
2411test_lexer_string_locations_simple (const line_table_case &case_)
2412{
2413 /* Digits 0-9 (with 0 at column 10), the simple way.
2414 ....................000000000.11111111112.2222222223333333333
2415 ....................123456789.01234567890.1234567890123456789
2416 We add a trailing comment to ensure that we correctly locate
2417 the end of the string literal token. */
2418 const char *content = " \"0123456789\" /* not a string */\n";
2419 lexer_test test (case_, content, NULL);
2420
2421 /* Verify that we get the expected token back, with the correct
2422 location information. */
2423 const cpp_token *tok = test.get_token ();
2424 ASSERT_EQ (tok->type, CPP_STRING);
2425 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2426 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2427
2428 /* At this point in lexing, the quote characters are treated as part of
2429 the string (they are stripped off by cpp_interpret_string). */
2430
2431 ASSERT_EQ (tok->val.str.len, 12);
2432
2433 /* Verify that cpp_interpret_string works. */
2434 cpp_string dst_string;
2435 const enum cpp_ttype type = CPP_STRING;
2436 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2437 &dst_string, type);
2438 ASSERT_TRUE (result);
2439 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2440 free (const_cast <unsigned char *> (dst_string.text));
2441
2442 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3
DM
2443 opening quote, but does include the closing quote. */
2444 for (int i = 0; i <= 10; i++)
88fa5555
DM
2445 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2446 10 + i, 10 + i);
2447
bbd6fcf3 2448 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
88fa5555
DM
2449}
2450
2451/* As test_lexer_string_locations_simple, but use an EBCDIC execution
2452 encoding. */
2453
2454static void
2455test_lexer_string_locations_ebcdic (const line_table_case &case_)
2456{
2457 /* EBCDIC support requires iconv. */
2458 if (!HAVE_ICONV)
2459 return;
2460
2461 /* Digits 0-9 (with 0 at column 10), the simple way.
2462 ....................000000000.11111111112.2222222223333333333
2463 ....................123456789.01234567890.1234567890123456789
2464 We add a trailing comment to ensure that we correctly locate
2465 the end of the string literal token. */
2466 const char *content = " \"0123456789\" /* not a string */\n";
2467 ebcdic_execution_charset use_ebcdic;
2468 lexer_test test (case_, content, &use_ebcdic);
2469
2470 /* Verify that we get the expected token back, with the correct
2471 location information. */
2472 const cpp_token *tok = test.get_token ();
2473 ASSERT_EQ (tok->type, CPP_STRING);
2474 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2475 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2476
2477 /* At this point in lexing, the quote characters are treated as part of
2478 the string (they are stripped off by cpp_interpret_string). */
2479
2480 ASSERT_EQ (tok->val.str.len, 12);
2481
2482 /* The remainder of the test requires an iconv implementation that
2483 can convert from UTF-8 to the EBCDIC encoding requested above. */
2484 if (use_ebcdic.iconv_errors_occurred_p ())
2485 return;
2486
2487 /* Verify that cpp_interpret_string works. */
2488 cpp_string dst_string;
2489 const enum cpp_ttype type = CPP_STRING;
2490 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2491 &dst_string, type);
2492 ASSERT_TRUE (result);
2493 /* We should now have EBCDIC-encoded text, specifically
2494 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2495 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2496 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2497 (const char *)dst_string.text);
2498 free (const_cast <unsigned char *> (dst_string.text));
2499
2500 /* Verify that we don't attempt to record substring location information
2501 for such cases. */
2502 ASSERT_HAS_NO_SUBSTRING_RANGES
2503 (test, tok->src_loc, type,
2504 "execution character set != source character set");
2505}
2506
2507/* Lex a string literal containing a hex-escaped character.
2508 Verify the substring location data, before and after running
2509 cpp_interpret_string on it. */
2510
2511static void
2512test_lexer_string_locations_hex (const line_table_case &case_)
2513{
2514 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2515 and with a space in place of digit 6, to terminate the escaped
2516 hex code.
2517 ....................000000000.111111.11112222.
2518 ....................123456789.012345.67890123. */
2519 const char *content = " \"01234\\x35 789\"\n";
2520 lexer_test test (case_, content, NULL);
2521
2522 /* Verify that we get the expected token back, with the correct
2523 location information. */
2524 const cpp_token *tok = test.get_token ();
2525 ASSERT_EQ (tok->type, CPP_STRING);
2526 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2527 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2528
2529 /* At this point in lexing, the quote characters are treated as part of
2530 the string (they are stripped off by cpp_interpret_string). */
2531 ASSERT_EQ (tok->val.str.len, 15);
2532
2533 /* Verify that cpp_interpret_string works. */
2534 cpp_string dst_string;
2535 const enum cpp_ttype type = CPP_STRING;
2536 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2537 &dst_string, type);
2538 ASSERT_TRUE (result);
2539 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2540 free (const_cast <unsigned char *> (dst_string.text));
2541
2542 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2543 opening quote, but does include the closing quote. */
88fa5555
DM
2544 for (int i = 0; i <= 4; i++)
2545 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2546 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
bbd6fcf3 2547 for (int i = 6; i <= 10; i++)
88fa5555
DM
2548 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2549
bbd6fcf3 2550 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
88fa5555
DM
2551}
2552
2553/* Lex a string literal containing an octal-escaped character.
2554 Verify the substring location data after running cpp_interpret_string
2555 on it. */
2556
2557static void
2558test_lexer_string_locations_oct (const line_table_case &case_)
2559{
2560 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2561 and with a space in place of digit 6, to terminate the escaped
2562 octal code.
2563 ....................000000000.111111.11112222.2222223333333333444
2564 ....................123456789.012345.67890123.4567890123456789012 */
2565 const char *content = " \"01234\\065 789\" /* not a string */\n";
2566 lexer_test test (case_, content, NULL);
2567
2568 /* Verify that we get the expected token back, with the correct
2569 location information. */
2570 const cpp_token *tok = test.get_token ();
2571 ASSERT_EQ (tok->type, CPP_STRING);
2572 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2573
2574 /* Verify that cpp_interpret_string works. */
2575 cpp_string dst_string;
2576 const enum cpp_ttype type = CPP_STRING;
2577 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2578 &dst_string, type);
2579 ASSERT_TRUE (result);
2580 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2581 free (const_cast <unsigned char *> (dst_string.text));
2582
2583 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2584 opening quote, but does include the closing quote. */
88fa5555
DM
2585 for (int i = 0; i < 5; i++)
2586 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2587 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
bbd6fcf3 2588 for (int i = 6; i <= 10; i++)
88fa5555
DM
2589 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2590
bbd6fcf3 2591 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
88fa5555
DM
2592}
2593
2594/* Test of string literal containing letter escapes. */
2595
2596static void
2597test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2598{
2599 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2600 .....................000000000.1.11111.1.1.11222.22222223333333
2601 .....................123456789.0.12345.6.7.89012.34567890123456. */
2602 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2603 lexer_test test (case_, content, NULL);
2604
2605 /* Verify that we get the expected tokens back. */
2606 const cpp_token *tok = test.get_token ();
2607 ASSERT_EQ (tok->type, CPP_STRING);
2608 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2609
2610 /* Verify ranges of individual characters. */
2611 /* "\t". */
2612 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2613 0, 1, 10, 11);
2614 /* "foo". */
2615 for (int i = 1; i <= 3; i++)
2616 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2617 i, 1, 11 + i, 11 + i);
2618 /* "\\" and "\n". */
2619 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2620 4, 1, 15, 16);
2621 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2622 5, 1, 17, 18);
2623
bbd6fcf3
DM
2624 /* "bar" and closing quote for nul-terminator. */
2625 for (int i = 6; i <= 9; i++)
88fa5555
DM
2626 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2627 i, 1, 13 + i, 13 + i);
2628
bbd6fcf3 2629 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
88fa5555
DM
2630}
2631
2632/* Another test of a string literal containing a letter escape.
2633 Based on string seen in
2634 printf ("%-%\n");
2635 in gcc.dg/format/c90-printf-1.c. */
2636
2637static void
2638test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2639{
2640 /* .....................000000000.1111.11.1111.22222222223.
2641 .....................123456789.0123.45.6789.01234567890. */
2642 const char *content = (" \"%-%\\n\" /* non-str */\n");
2643 lexer_test test (case_, content, NULL);
2644
2645 /* Verify that we get the expected tokens back. */
2646 const cpp_token *tok = test.get_token ();
2647 ASSERT_EQ (tok->type, CPP_STRING);
2648 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2649
2650 /* Verify ranges of individual characters. */
2651 /* "%-%". */
2652 for (int i = 0; i < 3; i++)
2653 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2654 i, 1, 10 + i, 10 + i);
2655 /* "\n". */
2656 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2657 3, 1, 13, 14);
2658
bbd6fcf3
DM
2659 /* Closing quote for nul-terminator. */
2660 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2661 4, 1, 15, 15);
2662
2663 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
88fa5555
DM
2664}
2665
2666/* Lex a string literal containing UCN 4 characters.
2667 Verify the substring location data after running cpp_interpret_string
2668 on it. */
2669
2670static void
2671test_lexer_string_locations_ucn4 (const line_table_case &case_)
2672{
2673 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2674 as UCN 4.
2675 ....................000000000.111111.111122.222222223.33333333344444
2676 ....................123456789.012345.678901.234567890.12345678901234 */
2677 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2678 lexer_test test (case_, content, NULL);
2679
2680 /* Verify that we get the expected token back, with the correct
2681 location information. */
2682 const cpp_token *tok = test.get_token ();
2683 ASSERT_EQ (tok->type, CPP_STRING);
2684 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2685
2686 /* Verify that cpp_interpret_string works.
2687 The string should be encoded in the execution character
2688 set. Assuming that that is UTF-8, we should have the following:
2689 ----------- ---- ----- ------- ----------------
2690 Byte offset Byte Octal Unicode Source Column(s)
2691 ----------- ---- ----- ------- ----------------
2692 0 0x30 '0' 10
2693 1 0x31 '1' 11
2694 2 0x32 '2' 12
2695 3 0x33 '3' 13
2696 4 0x34 '4' 14
2697 5 0xE2 \342 U+2174 15-20
2698 6 0x85 \205 (cont) 15-20
2699 7 0xB4 \264 (cont) 15-20
2700 8 0xE2 \342 U+2175 21-26
2701 9 0x85 \205 (cont) 21-26
2702 10 0xB5 \265 (cont) 21-26
2703 11 0x37 '7' 27
2704 12 0x38 '8' 28
2705 13 0x39 '9' 29
bbd6fcf3 2706 14 0x00 30 (closing quote)
88fa5555
DM
2707 ----------- ---- ----- ------- ---------------. */
2708
2709 cpp_string dst_string;
2710 const enum cpp_ttype type = CPP_STRING;
2711 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2712 &dst_string, type);
2713 ASSERT_TRUE (result);
2714 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2715 (const char *)dst_string.text);
2716 free (const_cast <unsigned char *> (dst_string.text));
2717
2718 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2719 opening quote, but does include the closing quote.
88fa5555
DM
2720 '01234'. */
2721 for (int i = 0; i <= 4; i++)
2722 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2723 /* U+2174. */
2724 for (int i = 5; i <= 7; i++)
2725 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2726 /* U+2175. */
2727 for (int i = 8; i <= 10; i++)
2728 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
bbd6fcf3
DM
2729 /* '789' and nul terminator */
2730 for (int i = 11; i <= 14; i++)
88fa5555
DM
2731 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2732
bbd6fcf3 2733 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
88fa5555
DM
2734}
2735
2736/* Lex a string literal containing UCN 8 characters.
2737 Verify the substring location data after running cpp_interpret_string
2738 on it. */
2739
2740static void
2741test_lexer_string_locations_ucn8 (const line_table_case &case_)
2742{
2743 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2744 ....................000000000.111111.1111222222.2222333333333.344444
2745 ....................123456789.012345.6789012345.6789012345678.901234 */
2746 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2747 lexer_test test (case_, content, NULL);
2748
2749 /* Verify that we get the expected token back, with the correct
2750 location information. */
2751 const cpp_token *tok = test.get_token ();
2752 ASSERT_EQ (tok->type, CPP_STRING);
2753 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2754 "\"01234\\U00002174\\U00002175789\"");
2755
2756 /* Verify that cpp_interpret_string works.
2757 The UTF-8 encoding of the string is identical to that from
2758 the ucn4 testcase above; the only difference is the column
2759 locations. */
2760 cpp_string dst_string;
2761 const enum cpp_ttype type = CPP_STRING;
2762 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2763 &dst_string, type);
2764 ASSERT_TRUE (result);
2765 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2766 (const char *)dst_string.text);
2767 free (const_cast <unsigned char *> (dst_string.text));
2768
2769 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2770 opening quote, but does include the closing quote.
88fa5555
DM
2771 '01234'. */
2772 for (int i = 0; i <= 4; i++)
2773 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2774 /* U+2174. */
2775 for (int i = 5; i <= 7; i++)
2776 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2777 /* U+2175. */
2778 for (int i = 8; i <= 10; i++)
2779 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2780 /* '789' at columns 35-37 */
2781 for (int i = 11; i <= 13; i++)
2782 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
bbd6fcf3
DM
2783 /* Closing quote/nul-terminator at column 38. */
2784 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
88fa5555 2785
bbd6fcf3 2786 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
88fa5555
DM
2787}
2788
2789/* Fetch a big-endian 32-bit value and convert to host endianness. */
2790
2791static uint32_t
2792uint32_from_big_endian (const uint32_t *ptr_be_value)
2793{
2794 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2795 return (((uint32_t) buf[0] << 24)
2796 | ((uint32_t) buf[1] << 16)
2797 | ((uint32_t) buf[2] << 8)
2798 | (uint32_t) buf[3]);
2799}
2800
2801/* Lex a wide string literal and verify that attempts to read substring
2802 location data from it fail gracefully. */
2803
2804static void
2805test_lexer_string_locations_wide_string (const line_table_case &case_)
2806{
2807 /* Digits 0-9.
2808 ....................000000000.11111111112.22222222233333
2809 ....................123456789.01234567890.12345678901234 */
2810 const char *content = " L\"0123456789\" /* non-str */\n";
2811 lexer_test test (case_, content, NULL);
2812
2813 /* Verify that we get the expected token back, with the correct
2814 location information. */
2815 const cpp_token *tok = test.get_token ();
2816 ASSERT_EQ (tok->type, CPP_WSTRING);
2817 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2818
2819 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2820 cpp_string dst_string;
2821 const enum cpp_ttype type = CPP_WSTRING;
2822 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2823 &dst_string, type);
2824 ASSERT_TRUE (result);
2825 /* The cpp_reader defaults to big-endian with
2826 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2827 now be encoded as UTF-32BE. */
2828 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2829 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2830 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2831 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2832 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2833 free (const_cast <unsigned char *> (dst_string.text));
2834
2835 /* We don't yet support generating substring location information
2836 for L"" strings. */
2837 ASSERT_HAS_NO_SUBSTRING_RANGES
2838 (test, tok->src_loc, type,
2839 "execution character set != source character set");
2840}
2841
2842/* Fetch a big-endian 16-bit value and convert to host endianness. */
2843
2844static uint16_t
2845uint16_from_big_endian (const uint16_t *ptr_be_value)
2846{
2847 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2848 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2849}
2850
2851/* Lex a u"" string literal and verify that attempts to read substring
2852 location data from it fail gracefully. */
2853
2854static void
2855test_lexer_string_locations_string16 (const line_table_case &case_)
2856{
2857 /* Digits 0-9.
2858 ....................000000000.11111111112.22222222233333
2859 ....................123456789.01234567890.12345678901234 */
2860 const char *content = " u\"0123456789\" /* non-str */\n";
2861 lexer_test test (case_, content, NULL);
2862
2863 /* Verify that we get the expected token back, with the correct
2864 location information. */
2865 const cpp_token *tok = test.get_token ();
2866 ASSERT_EQ (tok->type, CPP_STRING16);
2867 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2868
2869 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2870 cpp_string dst_string;
2871 const enum cpp_ttype type = CPP_STRING16;
2872 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2873 &dst_string, type);
2874 ASSERT_TRUE (result);
2875
2876 /* The cpp_reader defaults to big-endian, so dst_string should
2877 now be encoded as UTF-16BE. */
2878 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2879 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2880 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2881 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2882 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2883 free (const_cast <unsigned char *> (dst_string.text));
2884
2885 /* We don't yet support generating substring location information
2886 for L"" strings. */
2887 ASSERT_HAS_NO_SUBSTRING_RANGES
2888 (test, tok->src_loc, type,
2889 "execution character set != source character set");
2890}
2891
2892/* Lex a U"" string literal and verify that attempts to read substring
2893 location data from it fail gracefully. */
2894
2895static void
2896test_lexer_string_locations_string32 (const line_table_case &case_)
2897{
2898 /* Digits 0-9.
2899 ....................000000000.11111111112.22222222233333
2900 ....................123456789.01234567890.12345678901234 */
2901 const char *content = " U\"0123456789\" /* non-str */\n";
2902 lexer_test test (case_, content, NULL);
2903
2904 /* Verify that we get the expected token back, with the correct
2905 location information. */
2906 const cpp_token *tok = test.get_token ();
2907 ASSERT_EQ (tok->type, CPP_STRING32);
2908 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2909
2910 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2911 cpp_string dst_string;
2912 const enum cpp_ttype type = CPP_STRING32;
2913 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2914 &dst_string, type);
2915 ASSERT_TRUE (result);
2916
2917 /* The cpp_reader defaults to big-endian, so dst_string should
2918 now be encoded as UTF-32BE. */
2919 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2920 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2921 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2922 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2923 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2924 free (const_cast <unsigned char *> (dst_string.text));
2925
2926 /* We don't yet support generating substring location information
2927 for L"" strings. */
2928 ASSERT_HAS_NO_SUBSTRING_RANGES
2929 (test, tok->src_loc, type,
2930 "execution character set != source character set");
2931}
2932
2933/* Lex a u8-string literal.
2934 Verify the substring location data after running cpp_interpret_string
2935 on it. */
2936
2937static void
2938test_lexer_string_locations_u8 (const line_table_case &case_)
2939{
2940 /* Digits 0-9.
2941 ....................000000000.11111111112.22222222233333
2942 ....................123456789.01234567890.12345678901234 */
2943 const char *content = " u8\"0123456789\" /* non-str */\n";
2944 lexer_test test (case_, content, NULL);
2945
2946 /* Verify that we get the expected token back, with the correct
2947 location information. */
2948 const cpp_token *tok = test.get_token ();
2949 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2950 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2951
2952 /* Verify that cpp_interpret_string works. */
2953 cpp_string dst_string;
2954 const enum cpp_ttype type = CPP_STRING;
2955 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2956 &dst_string, type);
2957 ASSERT_TRUE (result);
2958 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2959 free (const_cast <unsigned char *> (dst_string.text));
2960
2961 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3
DM
2962 opening quote, but does include the closing quote. */
2963 for (int i = 0; i <= 10; i++)
88fa5555
DM
2964 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2965}
2966
2967/* Lex a string literal containing UTF-8 source characters.
2968 Verify the substring location data after running cpp_interpret_string
2969 on it. */
2970
2971static void
2972test_lexer_string_locations_utf8_source (const line_table_case &case_)
2973{
2974 /* This string literal is written out to the source file as UTF-8,
2975 and is of the form "before mojibake after", where "mojibake"
2976 is written as the following four unicode code points:
2977 U+6587 CJK UNIFIED IDEOGRAPH-6587
2978 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2979 U+5316 CJK UNIFIED IDEOGRAPH-5316
2980 U+3051 HIRAGANA LETTER KE.
2981 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2982 "before" and "after" are 1 byte per unicode character.
2983
2984 The numbering shown are "columns", which are *byte* numbers within
2985 the line, rather than unicode character numbers.
2986
2987 .................... 000000000.1111111.
2988 .................... 123456789.0123456. */
2989 const char *content = (" \"before "
2990 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2991 UTF-8: 0xE6 0x96 0x87
2992 C octal escaped UTF-8: \346\226\207
2993 "column" numbers: 17-19. */
2994 "\346\226\207"
2995
2996 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2997 UTF-8: 0xE5 0xAD 0x97
2998 C octal escaped UTF-8: \345\255\227
2999 "column" numbers: 20-22. */
3000 "\345\255\227"
3001
3002 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3003 UTF-8: 0xE5 0x8C 0x96
3004 C octal escaped UTF-8: \345\214\226
3005 "column" numbers: 23-25. */
3006 "\345\214\226"
3007
3008 /* U+3051 HIRAGANA LETTER KE
3009 UTF-8: 0xE3 0x81 0x91
3010 C octal escaped UTF-8: \343\201\221
3011 "column" numbers: 26-28. */
3012 "\343\201\221"
3013
3014 /* column numbers 29 onwards
3015 2333333.33334444444444
3016 9012345.67890123456789. */
3017 " after\" /* non-str */\n");
3018 lexer_test test (case_, content, NULL);
3019
3020 /* Verify that we get the expected token back, with the correct
3021 location information. */
3022 const cpp_token *tok = test.get_token ();
3023 ASSERT_EQ (tok->type, CPP_STRING);
3024 ASSERT_TOKEN_AS_TEXT_EQ
3025 (test.m_parser, tok,
3026 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3027
3028 /* Verify that cpp_interpret_string works. */
3029 cpp_string dst_string;
3030 const enum cpp_ttype type = CPP_STRING;
3031 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3032 &dst_string, type);
3033 ASSERT_TRUE (result);
3034 ASSERT_STREQ
3035 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3036 (const char *)dst_string.text);
3037 free (const_cast <unsigned char *> (dst_string.text));
3038
3039 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 3040 opening quote, but does include the closing quote.
88fa5555 3041 Assuming that both source and execution encodings are UTF-8, we have
bbd6fcf3 3042 a run of 25 octets in each, plus the NUL terminator. */
88fa5555
DM
3043 for (int i = 0; i < 25; i++)
3044 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
bbd6fcf3
DM
3045 /* NUL-terminator should use the closing quote at column 35. */
3046 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
88fa5555 3047
bbd6fcf3 3048 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
88fa5555
DM
3049}
3050
3051/* Test of string literal concatenation. */
3052
3053static void
3054test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3055{
3056 /* Digits 0-9.
3057 .....................000000000.111111.11112222222222
3058 .....................123456789.012345.67890123456789. */
3059 const char *content = (" \"01234\" /* non-str */\n"
3060 " \"56789\" /* non-str */\n");
3061 lexer_test test (case_, content, NULL);
3062
3063 location_t input_locs[2];
3064
3065 /* Verify that we get the expected tokens back. */
3066 auto_vec <cpp_string> input_strings;
3067 const cpp_token *tok_a = test.get_token ();
3068 ASSERT_EQ (tok_a->type, CPP_STRING);
3069 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3070 input_strings.safe_push (tok_a->val.str);
3071 input_locs[0] = tok_a->src_loc;
3072
3073 const cpp_token *tok_b = test.get_token ();
3074 ASSERT_EQ (tok_b->type, CPP_STRING);
3075 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3076 input_strings.safe_push (tok_b->val.str);
3077 input_locs[1] = tok_b->src_loc;
3078
3079 /* Verify that cpp_interpret_string works. */
3080 cpp_string dst_string;
3081 const enum cpp_ttype type = CPP_STRING;
3082 bool result = cpp_interpret_string (test.m_parser,
3083 input_strings.address (), 2,
3084 &dst_string, type);
3085 ASSERT_TRUE (result);
3086 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3087 free (const_cast <unsigned char *> (dst_string.text));
3088
3089 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3090 test.m_concats.record_string_concatenation (2, input_locs);
3091
3092 location_t initial_loc = input_locs[0];
3093
bbd6fcf3 3094 /* "01234" on line 1. */
88fa5555
DM
3095 for (int i = 0; i <= 4; i++)
3096 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
bbd6fcf3
DM
3097 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3098 for (int i = 5; i <= 10; i++)
88fa5555
DM
3099 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3100
bbd6fcf3 3101 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
88fa5555
DM
3102}
3103
3104/* Another test of string literal concatenation. */
3105
3106static void
3107test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3108{
3109 /* Digits 0-9.
3110 .....................000000000.111.11111112222222
3111 .....................123456789.012.34567890123456. */
3112 const char *content = (" \"01\" /* non-str */\n"
3113 " \"23\" /* non-str */\n"
3114 " \"45\" /* non-str */\n"
3115 " \"67\" /* non-str */\n"
3116 " \"89\" /* non-str */\n");
3117 lexer_test test (case_, content, NULL);
3118
3119 auto_vec <cpp_string> input_strings;
3120 location_t input_locs[5];
3121
3122 /* Verify that we get the expected tokens back. */
3123 for (int i = 0; i < 5; i++)
3124 {
3125 const cpp_token *tok = test.get_token ();
3126 ASSERT_EQ (tok->type, CPP_STRING);
3127 input_strings.safe_push (tok->val.str);
3128 input_locs[i] = tok->src_loc;
3129 }
3130
3131 /* Verify that cpp_interpret_string works. */
3132 cpp_string dst_string;
3133 const enum cpp_ttype type = CPP_STRING;
3134 bool result = cpp_interpret_string (test.m_parser,
3135 input_strings.address (), 5,
3136 &dst_string, type);
3137 ASSERT_TRUE (result);
3138 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3139 free (const_cast <unsigned char *> (dst_string.text));
3140
3141 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3142 test.m_concats.record_string_concatenation (5, input_locs);
3143
3144 location_t initial_loc = input_locs[0];
3145
3146 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3147 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3148 and expect get_source_range_for_substring to fail.
3149 However, for a string concatenation test, we can have a case
3150 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3151 but subsequent strings can be after it.
3152 Attempting to detect this within assert_char_at_range
3153 would overcomplicate the logic for the common test cases, so
3154 we detect it here. */
3155 if (should_have_column_data_p (input_locs[0])
3156 && !should_have_column_data_p (input_locs[4]))
3157 {
3158 /* Verify that get_source_range_for_substring gracefully rejects
3159 this case. */
3160 source_range actual_range;
3161 const char *err
65e736c0
DM
3162 = get_source_range_for_char (test.m_parser, &test.m_concats,
3163 initial_loc, type, 0, &actual_range);
88fa5555
DM
3164 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3165 return;
3166 }
3167
3168 for (int i = 0; i < 5; i++)
3169 for (int j = 0; j < 2; j++)
3170 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3171 i + 1, 10 + j, 10 + j);
3172
bbd6fcf3
DM
3173 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3174 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3175
3176 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
88fa5555
DM
3177}
3178
3179/* Another test of string literal concatenation, this time combined with
3180 various kinds of escaped characters. */
3181
3182static void
3183test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3184{
3185 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3186 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3187 const char *content
3188 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3189 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3190 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3191 lexer_test test (case_, content, NULL);
3192
3193 auto_vec <cpp_string> input_strings;
3194 location_t input_locs[4];
3195
3196 /* Verify that we get the expected tokens back. */
3197 for (int i = 0; i < 4; i++)
3198 {
3199 const cpp_token *tok = test.get_token ();
3200 ASSERT_EQ (tok->type, CPP_STRING);
3201 input_strings.safe_push (tok->val.str);
3202 input_locs[i] = tok->src_loc;
3203 }
3204
3205 /* Verify that cpp_interpret_string works. */
3206 cpp_string dst_string;
3207 const enum cpp_ttype type = CPP_STRING;
3208 bool result = cpp_interpret_string (test.m_parser,
3209 input_strings.address (), 4,
3210 &dst_string, type);
3211 ASSERT_TRUE (result);
3212 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3213 free (const_cast <unsigned char *> (dst_string.text));
3214
3215 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3216 test.m_concats.record_string_concatenation (4, input_locs);
3217
3218 location_t initial_loc = input_locs[0];
3219
3220 for (int i = 0; i <= 4; i++)
3221 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3222 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3223 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3224 for (int i = 7; i <= 9; i++)
3225 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3226
bbd6fcf3
DM
3227 /* NUL-terminator should use the location of the final closing quote. */
3228 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3229
3230 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
88fa5555
DM
3231}
3232
3233/* Test of string literal in a macro. */
3234
3235static void
3236test_lexer_string_locations_macro (const line_table_case &case_)
3237{
3238 /* Digits 0-9.
3239 .....................0000000001111111111.22222222223.
3240 .....................1234567890123456789.01234567890. */
3241 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3242 " MACRO");
3243 lexer_test test (case_, content, NULL);
3244
3245 /* Verify that we get the expected tokens back. */
3246 const cpp_token *tok = test.get_token ();
3247 ASSERT_EQ (tok->type, CPP_PADDING);
3248
3249 tok = test.get_token ();
3250 ASSERT_EQ (tok->type, CPP_STRING);
3251 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3252
3253 /* Verify ranges of individual characters. We ought to
3254 see columns within the macro definition. */
bbd6fcf3 3255 for (int i = 0; i <= 10; i++)
88fa5555
DM
3256 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3257 i, 1, 20 + i, 20 + i);
3258
bbd6fcf3 3259 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
88fa5555
DM
3260
3261 tok = test.get_token ();
3262 ASSERT_EQ (tok->type, CPP_PADDING);
3263}
3264
3265/* Test of stringification of a macro argument. */
3266
3267static void
3268test_lexer_string_locations_stringified_macro_argument
3269 (const line_table_case &case_)
3270{
3271 /* .....................000000000111111111122222222223.
3272 .....................123456789012345678901234567890. */
3273 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3274 "MACRO(foo)\n");
3275 lexer_test test (case_, content, NULL);
3276
3277 /* Verify that we get the expected token back. */
3278 const cpp_token *tok = test.get_token ();
3279 ASSERT_EQ (tok->type, CPP_PADDING);
3280
3281 tok = test.get_token ();
3282 ASSERT_EQ (tok->type, CPP_STRING);
3283 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3284
3285 /* We don't support getting the location of a stringified macro
3286 argument. Verify that it fails gracefully. */
3287 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3288 "cpp_interpret_string_1 failed");
3289
3290 tok = test.get_token ();
3291 ASSERT_EQ (tok->type, CPP_PADDING);
3292
3293 tok = test.get_token ();
3294 ASSERT_EQ (tok->type, CPP_PADDING);
3295}
3296
3297/* Ensure that we are fail gracefully if something attempts to pass
3298 in a location that isn't a string literal token. Seen on this code:
3299
3300 const char a[] = " %d ";
3301 __builtin_printf (a, 0.5);
3302 ^
3303
3304 when c-format.c erroneously used the indicated one-character
3305 location as the format string location, leading to a read past the
3306 end of a string buffer in cpp_interpret_string_1. */
3307
3308static void
3309test_lexer_string_locations_non_string (const line_table_case &case_)
3310{
3311 /* .....................000000000111111111122222222223.
3312 .....................123456789012345678901234567890. */
3313 const char *content = (" a\n");
3314 lexer_test test (case_, content, NULL);
3315
3316 /* Verify that we get the expected token back. */
3317 const cpp_token *tok = test.get_token ();
3318 ASSERT_EQ (tok->type, CPP_NAME);
3319 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3320
3321 /* At this point, libcpp is attempting to interpret the name as a
3322 string literal, despite it not starting with a quote. We don't detect
3323 that, but we should at least fail gracefully. */
3324 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3325 "cpp_interpret_string_1 failed");
3326}
3327
3328/* Ensure that we can read substring information for a token which
3329 starts in one linemap and ends in another . Adapted from
3330 gcc.dg/cpp/pr69985.c. */
3331
3332static void
3333test_lexer_string_locations_long_line (const line_table_case &case_)
3334{
3335 /* .....................000000.000111111111
3336 .....................123456.789012346789. */
3337 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3338 " \"0123456789012345678901234567890123456789"
3339 "0123456789012345678901234567890123456789"
3340 "0123456789012345678901234567890123456789"
3341 "0123456789\"\n");
3342
3343 lexer_test test (case_, content, NULL);
3344
3345 /* Verify that we get the expected token back. */
3346 const cpp_token *tok = test.get_token ();
3347 ASSERT_EQ (tok->type, CPP_STRING);
3348
3349 if (!should_have_column_data_p (line_table->highest_location))
3350 return;
3351
3352 /* Verify ranges of individual characters. */
bbd6fcf3
DM
3353 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3354 for (int i = 0; i < 131; i++)
88fa5555
DM
3355 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3356 i, 2, 7 + i, 7 + i);
3357}
3358
b8f56412
DM
3359/* Test of locations within a raw string that doesn't contain a newline. */
3360
3361static void
3362test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3363{
3364 /* .....................00.0000000111111111122.
3365 .....................12.3456789012345678901. */
3366 const char *content = ("R\"foo(0123456789)foo\"\n");
3367 lexer_test test (case_, content, NULL);
3368
3369 /* Verify that we get the expected token back. */
3370 const cpp_token *tok = test.get_token ();
3371 ASSERT_EQ (tok->type, CPP_STRING);
3372
3373 /* Verify that cpp_interpret_string works. */
3374 cpp_string dst_string;
3375 const enum cpp_ttype type = CPP_STRING;
3376 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3377 &dst_string, type);
3378 ASSERT_TRUE (result);
3379 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3380 free (const_cast <unsigned char *> (dst_string.text));
3381
3382 if (!should_have_column_data_p (line_table->highest_location))
3383 return;
3384
3385 /* 0-9, plus the nil terminator. */
3386 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3387 for (int i = 0; i < 11; i++)
3388 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3389 i, 1, 7 + i, 7 + i);
3390}
3391
3392/* Test of locations within a raw string that contains a newline. */
3393
3394static void
3395test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3396{
3397 /* .....................00.0000.
3398 .....................12.3456. */
3399 const char *content = ("R\"foo(\n"
3400 /* .....................00000.
3401 .....................12345. */
3402 "hello\n"
3403 "world\n"
3404 /* .....................00000.
3405 .....................12345. */
3406 ")foo\"\n");
3407 lexer_test test (case_, content, NULL);
3408
3409 /* Verify that we get the expected token back. */
3410 const cpp_token *tok = test.get_token ();
3411 ASSERT_EQ (tok->type, CPP_STRING);
3412
3413 /* Verify that cpp_interpret_string works. */
3414 cpp_string dst_string;
3415 const enum cpp_ttype type = CPP_STRING;
3416 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3417 &dst_string, type);
3418 ASSERT_TRUE (result);
3419 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3420 free (const_cast <unsigned char *> (dst_string.text));
3421
3422 if (!should_have_column_data_p (line_table->highest_location))
3423 return;
3424
3425 /* Currently we don't support locations within raw strings that
3426 contain newlines. */
3427 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3428 "range endpoints are on different lines");
3429}
3430
a3998c2f
DM
3431/* Test of parsing an unterminated raw string. */
3432
3433static void
3434test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3435{
3436 const char *content = "R\"ouch()ouCh\" /* etc */";
3437
c24300ba
DM
3438 lexer_diagnostic_sink diagnostics;
3439 lexer_test test (case_, content, &diagnostics);
a3998c2f
DM
3440 test.m_implicitly_expect_EOF = false;
3441
3442 /* Attempt to parse the raw string. */
3443 const cpp_token *tok = test.get_token ();
3444 ASSERT_EQ (tok->type, CPP_EOF);
3445
c24300ba 3446 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
a3998c2f
DM
3447 /* We expect the message "unterminated raw string"
3448 in the "cpplib" translation domain.
3449 It's not clear that dgettext is available on all supported hosts,
3450 so this assertion is commented-out for now.
3451 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
c24300ba 3452 diagnostics.m_diagnostics[0]);
a3998c2f
DM
3453 */
3454}
3455
88fa5555
DM
3456/* Test of lexing char constants. */
3457
3458static void
3459test_lexer_char_constants (const line_table_case &case_)
3460{
3461 /* Various char constants.
3462 .....................0000000001111111111.22222222223.
3463 .....................1234567890123456789.01234567890. */
3464 const char *content = (" 'a'\n"
3465 " u'a'\n"
3466 " U'a'\n"
3467 " L'a'\n"
3468 " 'abc'\n");
3469 lexer_test test (case_, content, NULL);
3470
3471 /* Verify that we get the expected tokens back. */
3472 /* 'a'. */
3473 const cpp_token *tok = test.get_token ();
3474 ASSERT_EQ (tok->type, CPP_CHAR);
3475 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3476
3477 unsigned int chars_seen;
3478 int unsignedp;
3479 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3480 &chars_seen, &unsignedp);
3481 ASSERT_EQ (cc, 'a');
3482 ASSERT_EQ (chars_seen, 1);
3483
3484 /* u'a'. */
3485 tok = test.get_token ();
3486 ASSERT_EQ (tok->type, CPP_CHAR16);
3487 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3488
3489 /* U'a'. */
3490 tok = test.get_token ();
3491 ASSERT_EQ (tok->type, CPP_CHAR32);
3492 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3493
3494 /* L'a'. */
3495 tok = test.get_token ();
3496 ASSERT_EQ (tok->type, CPP_WCHAR);
3497 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3498
3499 /* 'abc' (c-char-sequence). */
3500 tok = test.get_token ();
3501 ASSERT_EQ (tok->type, CPP_CHAR);
3502 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3503}
741d3be5
DM
3504/* A table of interesting location_t values, giving one axis of our test
3505 matrix. */
3506
3507static const location_t boundary_locations[] = {
3508 /* Zero means "don't override the default values for a new line_table". */
3509 0,
3510
3511 /* An arbitrary non-zero value that isn't close to one of
3512 the boundary values below. */
3513 0x10000,
3514
3515 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3516 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3517 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3518 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3519 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3520 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3521
3522 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3523 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3524 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3525 LINE_MAP_MAX_LOCATION_WITH_COLS,
3526 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3527 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3528};
3529
f87e22c5 3530/* Run TESTCASE multiple times, once for each case in our test matrix. */
d9b950dd
DM
3531
3532void
f87e22c5 3533for_each_line_table_case (void (*testcase) (const line_table_case &))
d9b950dd 3534{
741d3be5
DM
3535 /* As noted above in the description of struct line_table_case,
3536 we want to explore a test matrix of interesting line_table
3537 situations, running various selftests for each case within the
3538 matrix. */
3539
3540 /* Run all tests with:
3541 (a) line_table->default_range_bits == 0, and
3542 (b) line_table->default_range_bits == 5. */
3543 int num_cases_tested = 0;
3544 for (int default_range_bits = 0; default_range_bits <= 5;
3545 default_range_bits += 5)
3546 {
3547 /* ...and use each of the "interesting" location values as
3548 the starting location within line_table. */
3549 const int num_boundary_locations
3550 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3551 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3552 {
3553 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3554
f87e22c5 3555 testcase (c);
741d3be5
DM
3556
3557 num_cases_tested++;
3558 }
3559 }
3560
3561 /* Verify that we fully covered the test matrix. */
3562 ASSERT_EQ (num_cases_tested, 2 * 12);
f87e22c5
DM
3563}
3564
a4553534
DM
3565/* Verify that when presented with a consecutive pair of locations with
3566 a very large line offset, we don't attempt to consolidate them into
3567 a single ordinary linemap where the line offsets within the line map
3568 would lead to overflow (PR lto/88147). */
3569
3570static void
3571test_line_offset_overflow ()
3572{
3573 line_table_test ltt (line_table_case (5, 0));
3574
3575 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3576 linemap_line_start (line_table, 1, 100);
3577 location_t loc_a = linemap_line_start (line_table, 2578, 255);
3578 assert_loceq ("foo.c", 2578, 0, loc_a);
3579
3580 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3581 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3582 ASSERT_EQ (ordmap_a->m_range_bits, 5);
3583
3584 location_t loc_b = linemap_line_start (line_table, 404198, 512);
3585 assert_loceq ("foo.c", 404198, 0, loc_b);
3586
3587 /* We should have started a new linemap, rather than attempting to store
3588 a very large line offset. */
3589 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3590 ASSERT_NE (ordmap_a, ordmap_b);
3591}
3592
f87e22c5
DM
3593/* Run all of the selftests within this file. */
3594
3595void
3596input_c_tests ()
3597{
082284da 3598 test_linenum_comparisons ();
f87e22c5
DM
3599 test_should_have_column_data_p ();
3600 test_unknown_location ();
3601 test_builtins ();
9144eabb 3602 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
f87e22c5
DM
3603
3604 for_each_line_table_case (test_accessing_ordinary_linemaps);
3605 for_each_line_table_case (test_lexer);
3606 for_each_line_table_case (test_lexer_string_locations_simple);
3607 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3608 for_each_line_table_case (test_lexer_string_locations_hex);
3609 for_each_line_table_case (test_lexer_string_locations_oct);
3610 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3611 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3612 for_each_line_table_case (test_lexer_string_locations_ucn4);
3613 for_each_line_table_case (test_lexer_string_locations_ucn8);
3614 for_each_line_table_case (test_lexer_string_locations_wide_string);
3615 for_each_line_table_case (test_lexer_string_locations_string16);
3616 for_each_line_table_case (test_lexer_string_locations_string32);
3617 for_each_line_table_case (test_lexer_string_locations_u8);
3618 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3619 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3620 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3621 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3622 for_each_line_table_case (test_lexer_string_locations_macro);
3623 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3624 for_each_line_table_case (test_lexer_string_locations_non_string);
3625 for_each_line_table_case (test_lexer_string_locations_long_line);
b8f56412
DM
3626 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3627 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
a3998c2f 3628 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
f87e22c5 3629 for_each_line_table_case (test_lexer_char_constants);
741d3be5 3630
d9b950dd 3631 test_reading_source_line ();
a4553534
DM
3632
3633 test_line_offset_overflow ();
d9b950dd
DM
3634}
3635
3636} // namespace selftest
3637
3638#endif /* CHECKING_P */