]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/input.c
lto-wrapper.c (ltrans_priorities): New static var.
[thirdparty/gcc.git] / gcc / input.c
CommitLineData
447924ef 1/* Data and functions related to line maps and input files.
85ec4feb 2 Copyright (C) 2004-2018 Free Software Foundation, Inc.
447924ef
JM
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "intl.h"
ba4ad400 24#include "diagnostic-core.h"
d9b950dd 25#include "selftest.h"
741d3be5 26#include "cpplib.h"
7ecc3eb9 27
a7d79e5c
DM
28#ifndef HAVE_ICONV
29#define HAVE_ICONV 0
30#endif
31
7ecc3eb9
DS
32/* This is a cache used by get_next_line to store the content of a
33 file to be searched for file lines. */
34struct fcache
35{
36 /* These are information used to store a line boundary. */
37 struct line_info
38 {
39 /* The line number. It starts from 1. */
40 size_t line_num;
41
42 /* The position (byte count) of the beginning of the line,
43 relative to the file data pointer. This starts at zero. */
44 size_t start_pos;
45
46 /* The position (byte count) of the last byte of the line. This
47 normally points to the '\n' character, or to one byte after the
48 last byte of the file, if the file doesn't contain a '\n'
49 character. */
50 size_t end_pos;
51
52 line_info (size_t l, size_t s, size_t e)
53 : line_num (l), start_pos (s), end_pos (e)
54 {}
55
56 line_info ()
57 :line_num (0), start_pos (0), end_pos (0)
58 {}
59 };
60
61 /* The number of time this file has been accessed. This is used
62 to designate which file cache to evict from the cache
63 array. */
64 unsigned use_count;
65
f5ea989d
DM
66 /* The file_path is the key for identifying a particular file in
67 the cache.
68 For libcpp-using code, the underlying buffer for this field is
69 owned by the corresponding _cpp_file within the cpp_reader. */
7ecc3eb9
DS
70 const char *file_path;
71
72 FILE *fp;
73
74 /* This points to the content of the file that we've read so
75 far. */
76 char *data;
77
78 /* The size of the DATA array above.*/
79 size_t size;
80
81 /* The number of bytes read from the underlying file so far. This
82 must be less (or equal) than SIZE above. */
83 size_t nb_read;
84
85 /* The index of the beginning of the current line. */
86 size_t line_start_idx;
87
88 /* The number of the previous line read. This starts at 1. Zero
89 means we've read no line so far. */
90 size_t line_num;
91
92 /* This is the total number of lines of the current file. At the
93 moment, we try to get this information from the line map
94 subsystem. Note that this is just a hint. When using the C++
95 front-end, this hint is correct because the input file is then
96 completely tokenized before parsing starts; so the line map knows
97 the number of lines before compilation really starts. For e.g,
98 the C front-end, it can happen that we start emitting diagnostics
99 before the line map has seen the end of the file. */
100 size_t total_lines;
101
c65236d6
DM
102 /* Could this file be missing a trailing newline on its final line?
103 Initially true (to cope with empty files), set to true/false
104 as each line is read. */
105 bool missing_trailing_newline;
106
7ecc3eb9
DS
107 /* This is a record of the beginning and end of the lines we've seen
108 while reading the file. This is useful to avoid walking the data
109 from the beginning when we are asked to read a line that is
110 before LINE_START_IDX above. Note that the maximum size of this
111 record is fcache_line_record_size, so that the memory consumption
112 doesn't explode. We thus scale total_lines down to
113 fcache_line_record_size. */
114 vec<line_info, va_heap> line_record;
115
116 fcache ();
117 ~fcache ();
118};
447924ef
JM
119
120/* Current position in real source file. */
121
3edf64aa 122location_t input_location = UNKNOWN_LOCATION;
447924ef
JM
123
124struct line_maps *line_table;
125
f87e22c5
DM
126/* A stashed copy of "line_table" for use by selftest::line_table_test.
127 This needs to be a global so that it can be a GC root, and thus
128 prevent the stashed copy from being garbage-collected if the GC runs
129 during a line_table_test. */
130
131struct line_maps *saved_line_table;
132
7ecc3eb9
DS
133static fcache *fcache_tab;
134static const size_t fcache_tab_size = 16;
135static const size_t fcache_buffer_size = 4 * 1024;
136static const size_t fcache_line_record_size = 100;
137
84756fd4
DS
138/* Expand the source location LOC into a human readable location. If
139 LOC resolves to a builtin location, the file name of the readable
7eb918cc
DS
140 location is set to the string "<built-in>". If EXPANSION_POINT_P is
141 TRUE and LOC is virtual, then it is resolved to the expansion
142 point of the involved macro. Otherwise, it is resolved to the
c4ca1a09
DS
143 spelling location of the token.
144
145 When resolving to the spelling location of the token, if the
146 resulting location is for a built-in location (that is, it has no
147 associated line/column) in the context of a macro expansion, the
148 returned location is the first one (while unwinding the macro
149 location towards its expansion point) that is in real source
c471c6ed
DM
150 code.
151
152 ASPECT controls which part of the location to use. */
7eb918cc
DS
153
154static expanded_location
155expand_location_1 (source_location loc,
c471c6ed
DM
156 bool expansion_point_p,
157 enum location_aspect aspect)
447924ef
JM
158{
159 expanded_location xloc;
0e50b624 160 const line_map_ordinary *map;
c4ca1a09 161 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
5368224f
DC
162 tree block = NULL;
163
164 if (IS_ADHOC_LOC (loc))
165 {
166 block = LOCATION_BLOCK (loc);
167 loc = LOCATION_LOCUS (loc);
168 }
c4ca1a09
DS
169
170 memset (&xloc, 0, sizeof (xloc));
84756fd4 171
c4ca1a09
DS
172 if (loc >= RESERVED_LOCATION_COUNT)
173 {
174 if (!expansion_point_p)
175 {
176 /* We want to resolve LOC to its spelling location.
177
178 But if that spelling location is a reserved location that
179 appears in the context of a macro expansion (like for a
180 location for a built-in token), let's consider the first
181 location (toward the expansion point) that is not reserved;
182 that is, the first location that is in real source code. */
183 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
0e50b624 184 loc, NULL);
c4ca1a09
DS
185 lrk = LRK_SPELLING_LOCATION;
186 }
c471c6ed
DM
187 loc = linemap_resolve_location (line_table, loc, lrk, &map);
188
189 /* loc is now either in an ordinary map, or is a reserved location.
190 If it is a compound location, the caret is in a spelling location,
191 but the start/finish might still be a virtual location.
192 Depending of what the caller asked for, we may need to recurse
193 one level in order to resolve any virtual locations in the
194 end-points. */
195 switch (aspect)
196 {
197 default:
198 gcc_unreachable ();
199 /* Fall through. */
200 case LOCATION_ASPECT_CARET:
201 break;
202 case LOCATION_ASPECT_START:
203 {
204 source_location start = get_start (loc);
205 if (start != loc)
206 return expand_location_1 (start, expansion_point_p, aspect);
207 }
208 break;
209 case LOCATION_ASPECT_FINISH:
210 {
211 source_location finish = get_finish (loc);
212 if (finish != loc)
213 return expand_location_1 (finish, expansion_point_p, aspect);
214 }
215 break;
216 }
c4ca1a09
DS
217 xloc = linemap_expand_location (line_table, map, loc);
218 }
84756fd4 219
5368224f 220 xloc.data = block;
447924ef 221 if (loc <= BUILTINS_LOCATION)
84756fd4
DS
222 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
223
447924ef
JM
224 return xloc;
225}
64a1a422 226
7ecc3eb9
DS
227/* Initialize the set of cache used for files accessed by caret
228 diagnostic. */
229
230static void
231diagnostic_file_cache_init (void)
232{
233 if (fcache_tab == NULL)
234 fcache_tab = new fcache[fcache_tab_size];
235}
236
592f32fa 237/* Free the resources used by the set of cache used for files accessed
7ecc3eb9
DS
238 by caret diagnostic. */
239
240void
241diagnostic_file_cache_fini (void)
242{
243 if (fcache_tab)
244 {
245 delete [] (fcache_tab);
246 fcache_tab = NULL;
247 }
248}
249
250/* Return the total lines number that have been read so far by the
251 line map (in the preprocessor) so far. For languages like C++ that
252 entirely preprocess the input file before starting to parse, this
253 equals the actual number of lines of the file. */
254
255static size_t
256total_lines_num (const char *file_path)
257{
258 size_t r = 0;
259 source_location l = 0;
260 if (linemap_get_file_highest_location (line_table, file_path, &l))
261 {
262 gcc_assert (l >= RESERVED_LOCATION_COUNT);
263 expanded_location xloc = expand_location (l);
264 r = xloc.line;
265 }
266 return r;
267}
268
269/* Lookup the cache used for the content of a given file accessed by
270 caret diagnostic. Return the found cached file, or NULL if no
271 cached file was found. */
272
273static fcache*
274lookup_file_in_cache_tab (const char *file_path)
275{
276 if (file_path == NULL)
277 return NULL;
278
279 diagnostic_file_cache_init ();
280
281 /* This will contain the found cached file. */
282 fcache *r = NULL;
283 for (unsigned i = 0; i < fcache_tab_size; ++i)
284 {
285 fcache *c = &fcache_tab[i];
286 if (c->file_path && !strcmp (c->file_path, file_path))
287 {
288 ++c->use_count;
289 r = c;
290 }
291 }
292
293 if (r)
294 ++r->use_count;
295
296 return r;
297}
298
f89b03b6
DM
299/* Purge any mention of FILENAME from the cache of files used for
300 printing source code. For use in selftests when working
301 with tempfiles. */
302
303void
304diagnostics_file_cache_forcibly_evict_file (const char *file_path)
305{
306 gcc_assert (file_path);
307
308 fcache *r = lookup_file_in_cache_tab (file_path);
309 if (!r)
310 /* Not found. */
311 return;
312
313 r->file_path = NULL;
314 if (r->fp)
315 fclose (r->fp);
316 r->fp = NULL;
317 r->nb_read = 0;
318 r->line_start_idx = 0;
319 r->line_num = 0;
320 r->line_record.truncate (0);
321 r->use_count = 0;
322 r->total_lines = 0;
c65236d6 323 r->missing_trailing_newline = true;
f89b03b6
DM
324}
325
7ecc3eb9
DS
326/* Return the file cache that has been less used, recently, or the
327 first empty one. If HIGHEST_USE_COUNT is non-null,
328 *HIGHEST_USE_COUNT is set to the highest use count of the entries
329 in the cache table. */
330
331static fcache*
332evicted_cache_tab_entry (unsigned *highest_use_count)
333{
334 diagnostic_file_cache_init ();
335
336 fcache *to_evict = &fcache_tab[0];
337 unsigned huc = to_evict->use_count;
338 for (unsigned i = 1; i < fcache_tab_size; ++i)
339 {
340 fcache *c = &fcache_tab[i];
341 bool c_is_empty = (c->file_path == NULL);
342
343 if (c->use_count < to_evict->use_count
344 || (to_evict->file_path && c_is_empty))
345 /* We evict C because it's either an entry with a lower use
346 count or one that is empty. */
347 to_evict = c;
348
349 if (huc < c->use_count)
350 huc = c->use_count;
351
352 if (c_is_empty)
353 /* We've reached the end of the cache; subsequent elements are
354 all empty. */
355 break;
356 }
357
358 if (highest_use_count)
359 *highest_use_count = huc;
360
361 return to_evict;
362}
363
364/* Create the cache used for the content of a given file to be
365 accessed by caret diagnostic. This cache is added to an array of
366 cache and can be retrieved by lookup_file_in_cache_tab. This
367 function returns the created cache. Note that only the last
368 fcache_tab_size files are cached. */
369
370static fcache*
371add_file_to_cache_tab (const char *file_path)
372{
373
374 FILE *fp = fopen (file_path, "r");
317363b4
DS
375 if (fp == NULL)
376 return NULL;
7ecc3eb9
DS
377
378 unsigned highest_use_count = 0;
379 fcache *r = evicted_cache_tab_entry (&highest_use_count);
380 r->file_path = file_path;
381 if (r->fp)
382 fclose (r->fp);
383 r->fp = fp;
384 r->nb_read = 0;
385 r->line_start_idx = 0;
386 r->line_num = 0;
387 r->line_record.truncate (0);
388 /* Ensure that this cache entry doesn't get evicted next time
389 add_file_to_cache_tab is called. */
390 r->use_count = ++highest_use_count;
391 r->total_lines = total_lines_num (file_path);
c65236d6 392 r->missing_trailing_newline = true;
7ecc3eb9
DS
393
394 return r;
395}
396
397/* Lookup the cache used for the content of a given file accessed by
398 caret diagnostic. If no cached file was found, create a new cache
399 for this file, add it to the array of cached file and return
400 it. */
401
402static fcache*
403lookup_or_add_file_to_cache_tab (const char *file_path)
404{
405 fcache *r = lookup_file_in_cache_tab (file_path);
406 if (r == NULL)
407 r = add_file_to_cache_tab (file_path);
408 return r;
409}
410
411/* Default constructor for a cache of file used by caret
412 diagnostic. */
413
414fcache::fcache ()
415: use_count (0), file_path (NULL), fp (NULL), data (0),
416 size (0), nb_read (0), line_start_idx (0), line_num (0),
c65236d6 417 total_lines (0), missing_trailing_newline (true)
7ecc3eb9
DS
418{
419 line_record.create (0);
420}
421
422/* Destructor for a cache of file used by caret diagnostic. */
423
424fcache::~fcache ()
425{
426 if (fp)
427 {
428 fclose (fp);
429 fp = NULL;
430 }
431 if (data)
432 {
433 XDELETEVEC (data);
434 data = 0;
435 }
436 line_record.release ();
437}
438
439/* Returns TRUE iff the cache would need to be filled with data coming
440 from the file. That is, either the cache is empty or full or the
441 current line is empty. Note that if the cache is full, it would
442 need to be extended and filled again. */
443
444static bool
445needs_read (fcache *c)
446{
447 return (c->nb_read == 0
448 || c->nb_read == c->size
449 || (c->line_start_idx >= c->nb_read - 1));
450}
451
452/* Return TRUE iff the cache is full and thus needs to be
453 extended. */
454
455static bool
456needs_grow (fcache *c)
457{
458 return c->nb_read == c->size;
459}
460
461/* Grow the cache if it needs to be extended. */
462
463static void
464maybe_grow (fcache *c)
9fec0042 465{
7ecc3eb9
DS
466 if (!needs_grow (c))
467 return;
468
469 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
1adae327 470 c->data = XRESIZEVEC (char, c->data, size);
7ecc3eb9
DS
471 c->size = size;
472}
9fec0042 473
7ecc3eb9
DS
474/* Read more data into the cache. Extends the cache if need be.
475 Returns TRUE iff new data could be read. */
476
477static bool
478read_data (fcache *c)
479{
480 if (feof (c->fp) || ferror (c->fp))
481 return false;
482
483 maybe_grow (c);
484
485 char * from = c->data + c->nb_read;
486 size_t to_read = c->size - c->nb_read;
487 size_t nb_read = fread (from, 1, to_read, c->fp);
488
489 if (ferror (c->fp))
490 return false;
491
492 c->nb_read += nb_read;
493 return !!nb_read;
494}
495
496/* Read new data iff the cache needs to be filled with more data
497 coming from the file FP. Return TRUE iff the cache was filled with
498 mode data. */
499
500static bool
501maybe_read_data (fcache *c)
502{
503 if (!needs_read (c))
504 return false;
505 return read_data (c);
506}
507
508/* Read a new line from file FP, using C as a cache for the data
509 coming from the file. Upon successful completion, *LINE is set to
1adae327
BE
510 the beginning of the line found. *LINE points directly in the
511 line cache and is only valid until the next call of get_next_line.
7ecc3eb9
DS
512 *LINE_LEN is set to the length of the line. Note that the line
513 does not contain any terminal delimiter. This function returns
514 true if some data was read or process from the cache, false
1adae327
BE
515 otherwise. Note that subsequent calls to get_next_line might
516 make the content of *LINE invalid. */
7ecc3eb9
DS
517
518static bool
519get_next_line (fcache *c, char **line, ssize_t *line_len)
520{
521 /* Fill the cache with data to process. */
522 maybe_read_data (c);
523
524 size_t remaining_size = c->nb_read - c->line_start_idx;
525 if (remaining_size == 0)
526 /* There is no more data to process. */
527 return false;
528
529 char *line_start = c->data + c->line_start_idx;
530
531 char *next_line_start = NULL;
532 size_t len = 0;
533 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
534 if (line_end == NULL)
9fec0042 535 {
7ecc3eb9
DS
536 /* We haven't found the end-of-line delimiter in the cache.
537 Fill the cache with more data from the file and look for the
538 '\n'. */
539 while (maybe_read_data (c))
540 {
541 line_start = c->data + c->line_start_idx;
542 remaining_size = c->nb_read - c->line_start_idx;
543 line_end = (char *) memchr (line_start, '\n', remaining_size);
544 if (line_end != NULL)
545 {
546 next_line_start = line_end + 1;
547 break;
548 }
549 }
550 if (line_end == NULL)
c65236d6
DM
551 {
552 /* We've loadded all the file into the cache and still no
553 '\n'. Let's say the line ends up at one byte passed the
554 end of the file. This is to stay consistent with the case
555 of when the line ends up with a '\n' and line_end points to
556 that terminal '\n'. That consistency is useful below in
557 the len calculation. */
558 line_end = c->data + c->nb_read ;
559 c->missing_trailing_newline = true;
560 }
561 else
562 c->missing_trailing_newline = false;
9fec0042 563 }
7ecc3eb9 564 else
c65236d6
DM
565 {
566 next_line_start = line_end + 1;
567 c->missing_trailing_newline = false;
568 }
7ecc3eb9
DS
569
570 if (ferror (c->fp))
1adae327 571 return false;
7ecc3eb9
DS
572
573 /* At this point, we've found the end of the of line. It either
574 points to the '\n' or to one byte after the last byte of the
575 file. */
576 gcc_assert (line_end != NULL);
9fec0042 577
7ecc3eb9
DS
578 len = line_end - line_start;
579
580 if (c->line_start_idx < c->nb_read)
581 *line = line_start;
582
583 ++c->line_num;
584
585 /* Before we update our line record, make sure the hint about the
586 total number of lines of the file is correct. If it's not, then
587 we give up recording line boundaries from now on. */
588 bool update_line_record = true;
589 if (c->line_num > c->total_lines)
590 update_line_record = false;
591
592 /* Now update our line record so that re-reading lines from the
593 before c->line_start_idx is faster. */
594 if (update_line_record
595 && c->line_record.length () < fcache_line_record_size)
596 {
597 /* If the file lines fits in the line record, we just record all
598 its lines ...*/
599 if (c->total_lines <= fcache_line_record_size
600 && c->line_num > c->line_record.length ())
601 c->line_record.safe_push (fcache::line_info (c->line_num,
602 c->line_start_idx,
603 line_end - c->data));
604 else if (c->total_lines > fcache_line_record_size)
605 {
606 /* ... otherwise, we just scale total_lines down to
607 (fcache_line_record_size lines. */
608 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
609 if (c->line_record.length () == 0
610 || n >= c->line_record.length ())
611 c->line_record.safe_push (fcache::line_info (c->line_num,
612 c->line_start_idx,
613 line_end - c->data));
614 }
615 }
616
617 /* Update c->line_start_idx so that it points to the next line to be
618 read. */
619 if (next_line_start)
620 c->line_start_idx = next_line_start - c->data;
621 else
622 /* We didn't find any terminal '\n'. Let's consider that the end
623 of line is the end of the data in the cache. The next
624 invocation of get_next_line will either read more data from the
625 underlying file or return false early because we've reached the
626 end of the file. */
627 c->line_start_idx = c->nb_read;
628
629 *line_len = len;
630
631 return true;
632}
633
7ecc3eb9
DS
634/* Consume the next bytes coming from the cache (or from its
635 underlying file if there are remaining unread bytes in the file)
636 until we reach the next end-of-line (or end-of-file). There is no
637 copying from the cache involved. Return TRUE upon successful
638 completion. */
639
640static bool
641goto_next_line (fcache *cache)
642{
643 char *l;
644 ssize_t len;
645
646 return get_next_line (cache, &l, &len);
647}
648
649/* Read an arbitrary line number LINE_NUM from the file cached in C.
1adae327
BE
650 If the line was read successfully, *LINE points to the beginning
651 of the line in the file cache and *LINE_LEN is the length of the
652 line. *LINE is not nul-terminated, but may contain zero bytes.
653 *LINE is only valid until the next call of read_line_num.
7ecc3eb9
DS
654 This function returns bool if a line was read. */
655
656static bool
657read_line_num (fcache *c, size_t line_num,
1adae327 658 char **line, ssize_t *line_len)
7ecc3eb9
DS
659{
660 gcc_assert (line_num > 0);
661
662 if (line_num <= c->line_num)
9789a912 663 {
7ecc3eb9
DS
664 /* We've been asked to read lines that are before c->line_num.
665 So lets use our line record (if it's not empty) to try to
666 avoid re-reading the file from the beginning again. */
7f4d640c 667
7ecc3eb9 668 if (c->line_record.is_empty ())
9fec0042 669 {
7ecc3eb9
DS
670 c->line_start_idx = 0;
671 c->line_num = 0;
672 }
673 else
674 {
675 fcache::line_info *i = NULL;
676 if (c->total_lines <= fcache_line_record_size)
677 {
678 /* In languages where the input file is not totally
679 preprocessed up front, the c->total_lines hint
680 can be smaller than the number of lines of the
681 file. In that case, only the first
682 c->total_lines have been recorded.
683
684 Otherwise, the first c->total_lines we've read have
685 their start/end recorded here. */
686 i = (line_num <= c->total_lines)
687 ? &c->line_record[line_num - 1]
688 : &c->line_record[c->total_lines - 1];
689 gcc_assert (i->line_num <= line_num);
690 }
691 else
692 {
693 /* So the file had more lines than our line record
694 size. Thus the number of lines we've recorded has
695 been scaled down to fcache_line_reacord_size. Let's
696 pick the start/end of the recorded line that is
697 closest to line_num. */
698 size_t n = (line_num <= c->total_lines)
699 ? line_num * fcache_line_record_size / c->total_lines
700 : c ->line_record.length () - 1;
701 if (n < c->line_record.length ())
702 {
703 i = &c->line_record[n];
704 gcc_assert (i->line_num <= line_num);
705 }
706 }
707
708 if (i && i->line_num == line_num)
709 {
1adae327
BE
710 /* We have the start/end of the line. */
711 *line = c->data + i->start_pos;
712 *line_len = i->end_pos - i->start_pos;
7ecc3eb9
DS
713 return true;
714 }
715
716 if (i)
717 {
718 c->line_start_idx = i->start_pos;
719 c->line_num = i->line_num - 1;
720 }
721 else
722 {
723 c->line_start_idx = 0;
724 c->line_num = 0;
725 }
9fec0042 726 }
9fec0042 727 }
7ecc3eb9
DS
728
729 /* Let's walk from line c->line_num up to line_num - 1, without
730 copying any line. */
731 while (c->line_num < line_num - 1)
732 if (!goto_next_line (c))
733 return false;
734
735 /* The line we want is the next one. Let's read and copy it back to
736 the caller. */
1adae327 737 return get_next_line (c, line, line_len);
9fec0042
MLI
738}
739
1adae327
BE
740/* Return the physical source line that corresponds to FILE_PATH/LINE.
741 The line is not nul-terminated. The returned pointer is only
742 valid until the next call of location_get_source_line.
743 Note that the line can contain several null characters,
744 so LINE_LEN, if non-null, points to the actual length of the line.
745 If the function fails, NULL is returned. */
9fec0042
MLI
746
747const char *
31bdd08a 748location_get_source_line (const char *file_path, int line,
7ecc3eb9 749 int *line_len)
9fec0042 750{
ac2a97db 751 char *buffer = NULL;
1adae327 752 ssize_t len;
7ecc3eb9 753
31bdd08a 754 if (line == 0)
367c8286
DS
755 return NULL;
756
31bdd08a 757 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
367c8286
DS
758 if (c == NULL)
759 return NULL;
760
31bdd08a 761 bool read = read_line_num (c, line, &buffer, &len);
9fec0042 762
7ecc3eb9
DS
763 if (read && line_len)
764 *line_len = len;
9fec0042 765
7ecc3eb9 766 return read ? buffer : NULL;
9fec0042
MLI
767}
768
c65236d6
DM
769/* Determine if FILE_PATH missing a trailing newline on its final line.
770 Only valid to call once all of the file has been loaded, by
771 requesting a line number beyond the end of the file. */
772
773bool
774location_missing_trailing_newline (const char *file_path)
775{
776 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
777 if (c == NULL)
778 return false;
779
780 return c->missing_trailing_newline;
781}
782
c468587a
DS
783/* Test if the location originates from the spelling location of a
784 builtin-tokens. That is, return TRUE if LOC is a (possibly
785 virtual) location of a built-in token that appears in the expansion
786 list of a macro. Please note that this function also works on
787 tokens that result from built-in tokens. For instance, the
788 function would return true if passed a token "4" that is the result
789 of the expansion of the built-in __LINE__ macro. */
790bool
791is_location_from_builtin_token (source_location loc)
792{
0e50b624 793 const line_map_ordinary *map = NULL;
c468587a
DS
794 loc = linemap_resolve_location (line_table, loc,
795 LRK_SPELLING_LOCATION, &map);
796 return loc == BUILTINS_LOCATION;
797}
798
7eb918cc
DS
799/* Expand the source location LOC into a human readable location. If
800 LOC is virtual, it resolves to the expansion point of the involved
801 macro. If LOC resolves to a builtin location, the file name of the
802 readable location is set to the string "<built-in>". */
803
804expanded_location
805expand_location (source_location loc)
806{
c471c6ed
DM
807 return expand_location_1 (loc, /*expansion_point_p=*/true,
808 LOCATION_ASPECT_CARET);
7eb918cc
DS
809}
810
811/* Expand the source location LOC into a human readable location. If
812 LOC is virtual, it resolves to the expansion location of the
813 relevant macro. If LOC resolves to a builtin location, the file
814 name of the readable location is set to the string
815 "<built-in>". */
816
817expanded_location
818expand_location_to_spelling_point (source_location loc)
819{
c471c6ed
DM
820 return expand_location_1 (loc, /*expansion_point_p=*/false,
821 LOCATION_ASPECT_CARET);
7eb918cc
DS
822}
823
8a645150
DM
824/* The rich_location class within libcpp requires a way to expand
825 source_location instances, and relies on the client code
826 providing a symbol named
827 linemap_client_expand_location_to_spelling_point
828 to do this.
829
830 This is the implementation for libcommon.a (all host binaries),
c471c6ed 831 which simply calls into expand_location_1. */
8a645150
DM
832
833expanded_location
c471c6ed
DM
834linemap_client_expand_location_to_spelling_point (source_location loc,
835 enum location_aspect aspect)
8a645150 836{
c471c6ed 837 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
8a645150
DM
838}
839
840
e1f0c178
MLI
841/* If LOCATION is in a system header and if it is a virtual location for
842 a token coming from the expansion of a macro, unwind it to the
843 location of the expansion point of the macro. Otherwise, just return
70dc395a
DS
844 LOCATION.
845
846 This is used for instance when we want to emit diagnostics about a
e1f0c178
MLI
847 token that may be located in a macro that is itself defined in a
848 system header, for example, for the NULL macro. In such a case, if
849 LOCATION were passed directly to diagnostic functions such as
850 warning_at, the diagnostic would be suppressed (unless
851 -Wsystem-headers). */
70dc395a
DS
852
853source_location
854expansion_point_location_if_in_system_header (source_location location)
855{
856 if (in_system_header_at (location))
857 location = linemap_resolve_location (line_table, location,
858 LRK_MACRO_EXPANSION_POINT,
859 NULL);
860 return location;
861}
7eb918cc 862
79ce98bc
MP
863/* If LOCATION is a virtual location for a token coming from the expansion
864 of a macro, unwind to the location of the expansion point of the macro. */
865
866source_location
867expansion_point_location (source_location location)
868{
869 return linemap_resolve_location (line_table, location,
870 LRK_MACRO_EXPANSION_POINT, NULL);
871}
872
a01fc549
DM
873/* Construct a location with caret at CARET, ranging from START to
874 finish e.g.
875
876 11111111112
877 12345678901234567890
878 522
879 523 return foo + bar;
880 ~~~~^~~~~
881 524
882
883 The location's caret is at the "+", line 523 column 15, but starts
884 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
885 of "bar" at column 19. */
886
887location_t
888make_location (location_t caret, location_t start, location_t finish)
889{
890 location_t pure_loc = get_pure_location (caret);
891 source_range src_range;
9144eabb
DM
892 src_range.m_start = get_start (start);
893 src_range.m_finish = get_finish (finish);
a01fc549
DM
894 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
895 pure_loc,
896 src_range,
897 NULL);
898 return combined_loc;
899}
900
a32c8316
MP
901/* Same as above, but taking a source range rather than two locations. */
902
903location_t
904make_location (location_t caret, source_range src_range)
905{
906 location_t pure_loc = get_pure_location (caret);
907 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
908}
909
64a1a422
TT
910#define ONE_K 1024
911#define ONE_M (ONE_K * ONE_K)
912
913/* Display a number as an integer multiple of either:
914 - 1024, if said integer is >= to 10 K (in base 2)
915 - 1024 * 1024, if said integer is >= 10 M in (base 2)
916 */
917#define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
918 ? (x) \
919 : ((x) < 10 * ONE_M \
920 ? (x) / ONE_K \
921 : (x) / ONE_M)))
922
923/* For a given integer, display either:
924 - the character 'k', if the number is higher than 10 K (in base 2)
925 but strictly lower than 10 M (in base 2)
926 - the character 'M' if the number is higher than 10 M (in base2)
927 - the charcter ' ' if the number is strictly lower than 10 K */
928#define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
929
930/* Display an integer amount as multiple of 1K or 1M (in base 2).
5764ee3c 931 Display the correct unit (either k, M, or ' ') after the amount, as
64a1a422
TT
932 well. */
933#define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
934
935/* Dump statistics to stderr about the memory usage of the line_table
936 set of line maps. This also displays some statistics about macro
937 expansion. */
938
939void
940dump_line_table_statistics (void)
941{
942 struct linemap_stats s;
d17687f6 943 long total_used_map_size,
64a1a422
TT
944 macro_maps_size,
945 total_allocated_map_size;
946
947 memset (&s, 0, sizeof (s));
948
949 linemap_get_statistics (line_table, &s);
950
951 macro_maps_size = s.macro_maps_used_size
952 + s.macro_maps_locations_size;
953
954 total_allocated_map_size = s.ordinary_maps_allocated_size
955 + s.macro_maps_allocated_size
956 + s.macro_maps_locations_size;
957
958 total_used_map_size = s.ordinary_maps_used_size
959 + s.macro_maps_used_size
960 + s.macro_maps_locations_size;
961
d17687f6 962 fprintf (stderr, "Number of expanded macros: %5ld\n",
64a1a422
TT
963 s.num_expanded_macros);
964 if (s.num_expanded_macros != 0)
d17687f6 965 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
64a1a422
TT
966 s.num_macro_tokens / s.num_expanded_macros);
967 fprintf (stderr,
968 "\nLine Table allocations during the "
969 "compilation process\n");
d17687f6 970 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
64a1a422
TT
971 SCALE (s.num_ordinary_maps_used),
972 STAT_LABEL (s.num_ordinary_maps_used));
d17687f6 973 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
64a1a422
TT
974 SCALE (s.ordinary_maps_used_size),
975 STAT_LABEL (s.ordinary_maps_used_size));
d17687f6 976 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
64a1a422
TT
977 SCALE (s.num_ordinary_maps_allocated),
978 STAT_LABEL (s.num_ordinary_maps_allocated));
d17687f6 979 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
64a1a422
TT
980 SCALE (s.ordinary_maps_allocated_size),
981 STAT_LABEL (s.ordinary_maps_allocated_size));
d17687f6 982 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
64a1a422
TT
983 SCALE (s.num_macro_maps_used),
984 STAT_LABEL (s.num_macro_maps_used));
d17687f6 985 fprintf (stderr, "Macro maps used size: %5ld%c\n",
64a1a422
TT
986 SCALE (s.macro_maps_used_size),
987 STAT_LABEL (s.macro_maps_used_size));
d17687f6 988 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
64a1a422
TT
989 SCALE (s.macro_maps_locations_size),
990 STAT_LABEL (s.macro_maps_locations_size));
d17687f6 991 fprintf (stderr, "Macro maps size: %5ld%c\n",
64a1a422
TT
992 SCALE (macro_maps_size),
993 STAT_LABEL (macro_maps_size));
d17687f6 994 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
64a1a422
TT
995 SCALE (s.duplicated_macro_maps_locations_size),
996 STAT_LABEL (s.duplicated_macro_maps_locations_size));
d17687f6 997 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
64a1a422
TT
998 SCALE (total_allocated_map_size),
999 STAT_LABEL (total_allocated_map_size));
d17687f6 1000 fprintf (stderr, "Total used maps size: %5ld%c\n",
64a1a422
TT
1001 SCALE (total_used_map_size),
1002 STAT_LABEL (total_used_map_size));
ee015909
DM
1003 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
1004 SCALE (s.adhoc_table_size),
1005 STAT_LABEL (s.adhoc_table_size));
1006 fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
1007 s.adhoc_table_entries_used);
ebedc9a3
DM
1008 fprintf (stderr, "optimized_ranges: %i\n",
1009 line_table->num_optimized_ranges);
1010 fprintf (stderr, "unoptimized_ranges: %i\n",
1011 line_table->num_unoptimized_ranges);
ee015909 1012
64a1a422
TT
1013 fprintf (stderr, "\n");
1014}
ba4ad400
DM
1015
1016/* Get location one beyond the final location in ordinary map IDX. */
1017
1018static source_location
1019get_end_location (struct line_maps *set, unsigned int idx)
1020{
1021 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1022 return set->highest_location;
1023
1024 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1025 return MAP_START_LOCATION (next_map);
1026}
1027
1028/* Helper function for write_digit_row. */
1029
1030static void
1031write_digit (FILE *stream, int digit)
1032{
1033 fputc ('0' + (digit % 10), stream);
1034}
1035
1036/* Helper function for dump_location_info.
1037 Write a row of numbers to STREAM, numbering a source line,
1038 giving the units, tens, hundreds etc of the column number. */
1039
1040static void
1041write_digit_row (FILE *stream, int indent,
ebedc9a3 1042 const line_map_ordinary *map,
ba4ad400
DM
1043 source_location loc, int max_col, int divisor)
1044{
1045 fprintf (stream, "%*c", indent, ' ');
1046 fprintf (stream, "|");
1047 for (int column = 1; column < max_col; column++)
1048 {
ebedc9a3 1049 source_location column_loc = loc + (column << map->m_range_bits);
ba4ad400
DM
1050 write_digit (stream, column_loc / divisor);
1051 }
1052 fprintf (stream, "\n");
1053}
1054
1055/* Write a half-closed (START) / half-open (END) interval of
1056 source_location to STREAM. */
1057
1058static void
1059dump_location_range (FILE *stream,
1060 source_location start, source_location end)
1061{
1062 fprintf (stream,
1063 " source_location interval: %u <= loc < %u\n",
1064 start, end);
1065}
1066
1067/* Write a labelled description of a half-closed (START) / half-open (END)
1068 interval of source_location to STREAM. */
1069
1070static void
1071dump_labelled_location_range (FILE *stream,
1072 const char *name,
1073 source_location start, source_location end)
1074{
1075 fprintf (stream, "%s\n", name);
1076 dump_location_range (stream, start, end);
1077 fprintf (stream, "\n");
1078}
1079
1080/* Write a visualization of the locations in the line_table to STREAM. */
1081
1082void
1083dump_location_info (FILE *stream)
1084{
1085 /* Visualize the reserved locations. */
1086 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1087 0, RESERVED_LOCATION_COUNT);
1088
1089 /* Visualize the ordinary line_map instances, rendering the sources. */
1090 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1091 {
1092 source_location end_location = get_end_location (line_table, idx);
1093 /* half-closed: doesn't include this one. */
1094
0e50b624
DM
1095 const line_map_ordinary *map
1096 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
ba4ad400
DM
1097 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1098 dump_location_range (stream,
1099 MAP_START_LOCATION (map), end_location);
1100 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1101 fprintf (stream, " starting at line: %i\n",
1102 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
ebedc9a3
DM
1103 fprintf (stream, " column and range bits: %i\n",
1104 map->m_column_and_range_bits);
ba4ad400 1105 fprintf (stream, " column bits: %i\n",
ebedc9a3
DM
1106 map->m_column_and_range_bits - map->m_range_bits);
1107 fprintf (stream, " range bits: %i\n",
1108 map->m_range_bits);
ba4ad400
DM
1109
1110 /* Render the span of source lines that this "map" covers. */
1111 for (source_location loc = MAP_START_LOCATION (map);
1112 loc < end_location;
ebedc9a3 1113 loc += (1 << map->m_range_bits) )
ba4ad400 1114 {
ebedc9a3
DM
1115 gcc_assert (pure_location_p (line_table, loc) );
1116
ba4ad400
DM
1117 expanded_location exploc
1118 = linemap_expand_location (line_table, map, loc);
1119
01512446 1120 if (exploc.column == 0)
ba4ad400
DM
1121 {
1122 /* Beginning of a new source line: draw the line. */
1123
1124 int line_size;
31bdd08a
DM
1125 const char *line_text = location_get_source_line (exploc.file,
1126 exploc.line,
1127 &line_size);
ba4ad400
DM
1128 if (!line_text)
1129 break;
1130 fprintf (stream,
1131 "%s:%3i|loc:%5i|%.*s\n",
1132 exploc.file, exploc.line,
1133 loc,
1134 line_size, line_text);
1135
1136 /* "loc" is at column 0, which means "the whole line".
1137 Render the locations *within* the line, by underlining
1138 it, showing the source_location numeric values
1139 at each column. */
ebedc9a3 1140 int max_col = (1 << map->m_column_and_range_bits) - 1;
ba4ad400
DM
1141 if (max_col > line_size)
1142 max_col = line_size + 1;
1143
1144 int indent = 14 + strlen (exploc.file);
1145
1146 /* Thousands. */
1147 if (end_location > 999)
ebedc9a3 1148 write_digit_row (stream, indent, map, loc, max_col, 1000);
ba4ad400
DM
1149
1150 /* Hundreds. */
1151 if (end_location > 99)
ebedc9a3 1152 write_digit_row (stream, indent, map, loc, max_col, 100);
ba4ad400
DM
1153
1154 /* Tens. */
ebedc9a3 1155 write_digit_row (stream, indent, map, loc, max_col, 10);
ba4ad400
DM
1156
1157 /* Units. */
ebedc9a3 1158 write_digit_row (stream, indent, map, loc, max_col, 1);
ba4ad400
DM
1159 }
1160 }
1161 fprintf (stream, "\n");
1162 }
1163
1164 /* Visualize unallocated values. */
1165 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1166 line_table->highest_location,
1167 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1168
1169 /* Visualize the macro line_map instances, rendering the sources. */
1170 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1171 {
1172 /* Each macro map that is allocated owns source_location values
1173 that are *lower* that the one before them.
1174 Hence it's meaningful to view them either in order of ascending
1175 source locations, or in order of ascending macro map index. */
1176 const bool ascending_source_locations = true;
1177 unsigned int idx = (ascending_source_locations
1178 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1179 : i);
0e50b624 1180 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
ba4ad400
DM
1181 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1182 idx,
1183 linemap_map_get_macro_name (map),
1184 MACRO_MAP_NUM_MACRO_TOKENS (map));
1185 dump_location_range (stream,
1186 map->start_location,
1187 (map->start_location
1188 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1189 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1190 "expansion point is location %i",
1191 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1192 fprintf (stream, " map->start_location: %u\n",
1193 map->start_location);
1194
1195 fprintf (stream, " macro_locations:\n");
1196 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1197 {
1198 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1199 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1200
1201 /* linemap_add_macro_token encodes token numbers in an expansion
1202 by putting them after MAP_START_LOCATION. */
1203
1204 /* I'm typically seeing 4 uninitialized entries at the end of
1205 0xafafafaf.
1206 This appears to be due to macro.c:replace_args
1207 adding 2 extra args for padding tokens; presumably there may
1208 be a leading and/or trailing padding token injected,
1209 each for 2 more location slots.
1210 This would explain there being up to 4 source_locations slots
1211 that may be uninitialized. */
1212
1213 fprintf (stream, " %u: %u, %u\n",
1214 i,
1215 x,
1216 y);
1217 if (x == y)
1218 {
1219 if (x < MAP_START_LOCATION (map))
1220 inform (x, "token %u has x-location == y-location == %u", i, x);
1221 else
1222 fprintf (stream,
1223 "x-location == y-location == %u encodes token # %u\n",
1224 x, x - MAP_START_LOCATION (map));
1225 }
1226 else
1227 {
1228 inform (x, "token %u has x-location == %u", i, x);
1229 inform (x, "token %u has y-location == %u", i, y);
1230 }
1231 }
1232 fprintf (stream, "\n");
1233 }
1234
1235 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1236 macro map, presumably due to an off-by-one error somewhere
1237 between the logic in linemap_enter_macro and
1238 LINEMAPS_MACRO_LOWEST_LOCATION. */
1239 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1240 MAX_SOURCE_LOCATION,
1241 MAX_SOURCE_LOCATION + 1);
1242
1243 /* Visualize ad-hoc values. */
1244 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1245 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1246}
d9b950dd 1247
88fa5555
DM
1248/* string_concat's constructor. */
1249
1250string_concat::string_concat (int num, location_t *locs)
1251 : m_num (num)
1252{
1253 m_locs = ggc_vec_alloc <location_t> (num);
1254 for (int i = 0; i < num; i++)
1255 m_locs[i] = locs[i];
1256}
1257
1258/* string_concat_db's constructor. */
1259
1260string_concat_db::string_concat_db ()
1261{
1262 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1263}
1264
1265/* Record that a string concatenation occurred, covering NUM
1266 string literal tokens. LOCS is an array of size NUM, containing the
1267 locations of the tokens. A copy of LOCS is taken. */
1268
1269void
1270string_concat_db::record_string_concatenation (int num, location_t *locs)
1271{
1272 gcc_assert (num > 1);
1273 gcc_assert (locs);
1274
1275 location_t key_loc = get_key_loc (locs[0]);
1276
1277 string_concat *concat
1278 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1279 m_table->put (key_loc, concat);
1280}
1281
1282/* Determine if LOC was the location of the the initial token of a
1283 concatenation of string literal tokens.
1284 If so, *OUT_NUM is written to with the number of tokens, and
1285 *OUT_LOCS with the location of an array of locations of the
1286 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1287 storage owned by the string_concat_db.
1288 Otherwise, return false. */
1289
1290bool
1291string_concat_db::get_string_concatenation (location_t loc,
1292 int *out_num,
1293 location_t **out_locs)
1294{
1295 gcc_assert (out_num);
1296 gcc_assert (out_locs);
1297
1298 location_t key_loc = get_key_loc (loc);
1299
1300 string_concat **concat = m_table->get (key_loc);
1301 if (!concat)
1302 return false;
1303
1304 *out_num = (*concat)->m_num;
1305 *out_locs =(*concat)->m_locs;
1306 return true;
1307}
1308
1309/* Internal function. Canonicalize LOC into a form suitable for
1310 use as a key within the database, stripping away macro expansion,
1311 ad-hoc information, and range information, using the location of
1312 the start of LOC within an ordinary linemap. */
1313
1314location_t
1315string_concat_db::get_key_loc (location_t loc)
1316{
1317 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1318 NULL);
1319
1320 loc = get_range_from_loc (line_table, loc).m_start;
1321
1322 return loc;
1323}
1324
1325/* Helper class for use within get_substring_ranges_for_loc.
1326 An vec of cpp_string with responsibility for releasing all of the
1327 str->text for each str in the vector. */
1328
1329class auto_cpp_string_vec : public auto_vec <cpp_string>
1330{
1331 public:
1332 auto_cpp_string_vec (int alloc)
1333 : auto_vec <cpp_string> (alloc) {}
1334
1335 ~auto_cpp_string_vec ()
1336 {
1337 /* Clean up the copies within this vec. */
1338 int i;
1339 cpp_string *str;
1340 FOR_EACH_VEC_ELT (*this, i, str)
1341 free (const_cast <unsigned char *> (str->text));
1342 }
1343};
1344
1345/* Attempt to populate RANGES with source location information on the
1346 individual characters within the string literal found at STRLOC.
1347 If CONCATS is non-NULL, then any string literals that the token at
1348 STRLOC was concatenated with are also added to RANGES.
1349
1350 Return NULL if successful, or an error message if any errors occurred (in
1351 which case RANGES may be only partially populated and should not
1352 be used).
1353
1354 This is implemented by re-parsing the relevant source line(s). */
1355
1356static const char *
1357get_substring_ranges_for_loc (cpp_reader *pfile,
1358 string_concat_db *concats,
1359 location_t strloc,
1360 enum cpp_ttype type,
1361 cpp_substring_ranges &ranges)
1362{
1363 gcc_assert (pfile);
1364
1365 if (strloc == UNKNOWN_LOCATION)
1366 return "unknown location";
1367
67b5d0b2
DM
1368 /* Reparsing the strings requires accurate location information.
1369 If -ftrack-macro-expansion has been overridden from its default
1370 of 2, then we might have a location of a macro expansion point,
1371 rather than the location of the literal itself.
1372 Avoid this by requiring that we have full macro expansion tracking
1373 for substring locations to be available. */
1374 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1375 return "track_macro_expansion != 2";
1376
94f597df
DM
1377 /* If #line or # 44 "file"-style directives are present, then there's
1378 no guarantee that the line numbers we have can be used to locate
1379 the strings. For example, we might have a .i file with # directives
1380 pointing back to lines within a .c file, but the .c file might
1381 have been edited since the .i file was created.
1382 In such a case, the safest course is to disable on-demand substring
1383 locations. */
1384 if (line_table->seen_line_directive)
1385 return "seen line directive";
1386
88fa5555
DM
1387 /* If string concatenation has occurred at STRLOC, get the locations
1388 of all of the literal tokens making up the compound string.
1389 Otherwise, just use STRLOC. */
1390 int num_locs = 1;
1391 location_t *strlocs = &strloc;
1392 if (concats)
1393 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1394
1395 auto_cpp_string_vec strs (num_locs);
1396 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1397 for (int i = 0; i < num_locs; i++)
1398 {
1399 /* Get range of strloc. We will use it to locate the start and finish
1400 of the literal token within the line. */
1401 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1402
1403 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1404 /* If the string is within a macro expansion, we can't get at the
1405 end location. */
1406 return "macro expansion";
1407
1408 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1409 /* If so, we can't reliably determine where the token started within
1410 its line. */
1411 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1412
1413 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1414 /* If so, we can't reliably determine where the token finished within
1415 its line. */
1416 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1417
1418 expanded_location start
1419 = expand_location_to_spelling_point (src_range.m_start);
1420 expanded_location finish
1421 = expand_location_to_spelling_point (src_range.m_finish);
1422 if (start.file != finish.file)
1423 return "range endpoints are in different files";
1424 if (start.line != finish.line)
1425 return "range endpoints are on different lines";
1426 if (start.column > finish.column)
1427 return "range endpoints are reversed";
1428
1429 int line_width;
1430 const char *line = location_get_source_line (start.file, start.line,
1431 &line_width);
1432 if (line == NULL)
1433 return "unable to read source line";
1434
1435 /* Determine the location of the literal (including quotes
1436 and leading prefix chars, such as the 'u' in a u""
1437 token). */
1438 const char *literal = line + start.column - 1;
1439 int literal_length = finish.column - start.column + 1;
1440
7cfa044d
DM
1441 /* Ensure that we don't crash if we got the wrong location. */
1442 if (line_width < (start.column - 1 + literal_length))
1443 return "line is not wide enough";
1444
88fa5555
DM
1445 cpp_string from;
1446 from.len = literal_length;
1447 /* Make a copy of the literal, to avoid having to rely on
1448 the lifetime of the copy of the line within the cache.
1449 This will be released by the auto_cpp_string_vec dtor. */
1450 from.text = XDUPVEC (unsigned char, literal, literal_length);
1451 strs.safe_push (from);
1452
1453 /* For very long lines, a new linemap could have started
1454 halfway through the token.
1455 Ensure that the loc_reader uses the linemap of the
1456 *end* of the token for its start location. */
1457 const line_map_ordinary *final_ord_map;
1458 linemap_resolve_location (line_table, src_range.m_finish,
1459 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1460 location_t start_loc
1461 = linemap_position_for_line_and_column (line_table, final_ord_map,
1462 start.line, start.column);
1463
1464 cpp_string_location_reader loc_reader (start_loc, line_table);
1465 loc_readers.safe_push (loc_reader);
1466 }
1467
1468 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1469 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1470 loc_readers.address (),
1471 num_locs, &ranges, type);
1472 if (err)
1473 return err;
1474
1475 /* Success: "ranges" should now contain information on the string. */
1476 return NULL;
1477}
1478
65e736c0
DM
1479/* Attempt to populate *OUT_LOC with source location information on the
1480 given characters within the string literal found at STRLOC.
1481 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1482 character set.
1483
1484 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1485 and string literal "012345\n789"
1486 *OUT_LOC is written to with:
1487 "012345\n789"
1488 ~^~~~~
1489
88fa5555
DM
1490 If CONCATS is non-NULL, then any string literals that the token at
1491 STRLOC was concatenated with are also considered.
1492
1493 This is implemented by re-parsing the relevant source line(s).
1494
1495 Return NULL if successful, or an error message if any errors occurred.
1496 Error messages are intended for GCC developers (to help debugging) rather
1497 than for end-users. */
1498
1499const char *
65e736c0
DM
1500get_source_location_for_substring (cpp_reader *pfile,
1501 string_concat_db *concats,
1502 location_t strloc,
1503 enum cpp_ttype type,
1504 int caret_idx, int start_idx, int end_idx,
1505 source_location *out_loc)
1506{
1507 gcc_checking_assert (caret_idx >= 0);
88fa5555
DM
1508 gcc_checking_assert (start_idx >= 0);
1509 gcc_checking_assert (end_idx >= 0);
65e736c0 1510 gcc_assert (out_loc);
88fa5555
DM
1511
1512 cpp_substring_ranges ranges;
1513 const char *err
1514 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1515 if (err)
1516 return err;
1517
65e736c0
DM
1518 if (caret_idx >= ranges.get_num_ranges ())
1519 return "caret_idx out of range";
88fa5555
DM
1520 if (start_idx >= ranges.get_num_ranges ())
1521 return "start_idx out of range";
1522 if (end_idx >= ranges.get_num_ranges ())
1523 return "end_idx out of range";
1524
65e736c0
DM
1525 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1526 ranges.get_range (start_idx).m_start,
1527 ranges.get_range (end_idx).m_finish);
1528 return NULL;
1529}
1530
0e06d2b3
DM
1531#if CHECKING_P
1532
1533namespace selftest {
1534
1535/* Selftests of location handling. */
1536
65e736c0
DM
1537/* Attempt to populate *OUT_RANGE with source location information on the
1538 given character within the string literal found at STRLOC.
1539 CHAR_IDX refers to an offset within the execution character set.
1540 If CONCATS is non-NULL, then any string literals that the token at
1541 STRLOC was concatenated with are also considered.
1542
1543 This is implemented by re-parsing the relevant source line(s).
1544
1545 Return NULL if successful, or an error message if any errors occurred.
1546 Error messages are intended for GCC developers (to help debugging) rather
1547 than for end-users. */
1548
1549static const char *
1550get_source_range_for_char (cpp_reader *pfile,
1551 string_concat_db *concats,
1552 location_t strloc,
1553 enum cpp_ttype type,
1554 int char_idx,
1555 source_range *out_range)
1556{
1557 gcc_checking_assert (char_idx >= 0);
1558 gcc_assert (out_range);
1559
1560 cpp_substring_ranges ranges;
1561 const char *err
1562 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1563 if (err)
1564 return err;
1565
1566 if (char_idx >= ranges.get_num_ranges ())
1567 return "char_idx out of range";
1568
1569 *out_range = ranges.get_range (char_idx);
88fa5555
DM
1570 return NULL;
1571}
1572
65e736c0 1573/* As get_source_range_for_char, but write to *OUT the number
88fa5555
DM
1574 of ranges that are available. */
1575
0e06d2b3 1576static const char *
88fa5555
DM
1577get_num_source_ranges_for_substring (cpp_reader *pfile,
1578 string_concat_db *concats,
1579 location_t strloc,
1580 enum cpp_ttype type,
1581 int *out)
1582{
1583 gcc_assert (out);
1584
1585 cpp_substring_ranges ranges;
1586 const char *err
1587 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1588
1589 if (err)
1590 return err;
1591
1592 *out = ranges.get_num_ranges ();
1593 return NULL;
1594}
1595
d9b950dd
DM
1596/* Selftests of location handling. */
1597
082284da
DM
1598/* Verify that compare() on linenum_type handles comparisons over the full
1599 range of the type. */
1600
1601static void
1602test_linenum_comparisons ()
1603{
1604 linenum_type min_line (0);
1605 linenum_type max_line (0xffffffff);
1606 ASSERT_EQ (0, compare (min_line, min_line));
1607 ASSERT_EQ (0, compare (max_line, max_line));
1608
1609 ASSERT_GT (compare (max_line, min_line), 0);
1610 ASSERT_LT (compare (min_line, max_line), 0);
1611}
1612
741d3be5
DM
1613/* Helper function for verifying location data: when location_t
1614 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1615 as having column 0. */
1616
1617static bool
1618should_have_column_data_p (location_t loc)
1619{
1620 if (IS_ADHOC_LOC (loc))
1621 loc = get_location_from_adhoc_loc (line_table, loc);
1622 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1623 return false;
1624 return true;
1625}
1626
1627/* Selftest for should_have_column_data_p. */
1628
1629static void
1630test_should_have_column_data_p ()
1631{
1632 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1633 ASSERT_TRUE
1634 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1635 ASSERT_FALSE
1636 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1637}
1638
d9b950dd
DM
1639/* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1640 on LOC. */
1641
1642static void
1643assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1644 location_t loc)
1645{
1646 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1647 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
741d3be5
DM
1648 /* If location_t values are sufficiently high, then column numbers
1649 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1650 When close to the threshold, column numbers *may* be present: if
1651 the final linemap before the threshold contains a line that straddles
1652 the threshold, locations in that line have column information. */
1653 if (should_have_column_data_p (loc))
1654 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1655}
1656
f87e22c5
DM
1657/* Various selftests involve constructing a line table and one or more
1658 line maps within it.
741d3be5
DM
1659
1660 For maximum test coverage we want to run these tests with a variety
1661 of situations:
1662 - line_table->default_range_bits: some frontends use a non-zero value
1663 and others use zero
1664 - the fallback modes within line-map.c: there are various threshold
1665 values for source_location/location_t beyond line-map.c changes
1666 behavior (disabling of the range-packing optimization, disabling
1667 of column-tracking). We can exercise these by starting the line_table
1668 at interesting values at or near these thresholds.
1669
1670 The following struct describes a particular case within our test
1671 matrix. */
1672
1673struct line_table_case
1674{
1675 line_table_case (int default_range_bits, int base_location)
1676 : m_default_range_bits (default_range_bits),
1677 m_base_location (base_location)
1678 {}
1679
1680 int m_default_range_bits;
1681 int m_base_location;
1682};
1683
f87e22c5
DM
1684/* Constructor. Store the old value of line_table, and create a new
1685 one, using sane defaults. */
741d3be5 1686
f87e22c5 1687line_table_test::line_table_test ()
741d3be5 1688{
f87e22c5
DM
1689 gcc_assert (saved_line_table == NULL);
1690 saved_line_table = line_table;
1691 line_table = ggc_alloc<line_maps> ();
1692 linemap_init (line_table, BUILTINS_LOCATION);
1693 gcc_assert (saved_line_table->reallocator);
1694 line_table->reallocator = saved_line_table->reallocator;
1695 gcc_assert (saved_line_table->round_alloc_size);
1696 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1697 line_table->default_range_bits = 0;
1698}
741d3be5
DM
1699
1700/* Constructor. Store the old value of line_table, and create a new
1701 one, using the sitation described in CASE_. */
1702
f87e22c5 1703line_table_test::line_table_test (const line_table_case &case_)
741d3be5 1704{
f87e22c5
DM
1705 gcc_assert (saved_line_table == NULL);
1706 saved_line_table = line_table;
741d3be5
DM
1707 line_table = ggc_alloc<line_maps> ();
1708 linemap_init (line_table, BUILTINS_LOCATION);
f87e22c5
DM
1709 gcc_assert (saved_line_table->reallocator);
1710 line_table->reallocator = saved_line_table->reallocator;
1711 gcc_assert (saved_line_table->round_alloc_size);
1712 line_table->round_alloc_size = saved_line_table->round_alloc_size;
741d3be5
DM
1713 line_table->default_range_bits = case_.m_default_range_bits;
1714 if (case_.m_base_location)
1715 {
1716 line_table->highest_location = case_.m_base_location;
1717 line_table->highest_line = case_.m_base_location;
1718 }
1719}
1720
1721/* Destructor. Restore the old value of line_table. */
1722
f87e22c5 1723line_table_test::~line_table_test ()
741d3be5 1724{
f87e22c5
DM
1725 gcc_assert (saved_line_table != NULL);
1726 line_table = saved_line_table;
1727 saved_line_table = NULL;
d9b950dd
DM
1728}
1729
1730/* Verify basic operation of ordinary linemaps. */
1731
1732static void
741d3be5 1733test_accessing_ordinary_linemaps (const line_table_case &case_)
d9b950dd 1734{
f87e22c5 1735 line_table_test ltt (case_);
741d3be5 1736
d9b950dd
DM
1737 /* Build a simple linemap describing some locations. */
1738 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1739
1740 linemap_line_start (line_table, 1, 100);
1741 location_t loc_a = linemap_position_for_column (line_table, 1);
1742 location_t loc_b = linemap_position_for_column (line_table, 23);
1743
1744 linemap_line_start (line_table, 2, 100);
1745 location_t loc_c = linemap_position_for_column (line_table, 1);
1746 location_t loc_d = linemap_position_for_column (line_table, 17);
1747
1748 /* Example of a very long line. */
1749 linemap_line_start (line_table, 3, 2000);
1750 location_t loc_e = linemap_position_for_column (line_table, 700);
1751
5ccf1d8d
DM
1752 /* Transitioning back to a short line. */
1753 linemap_line_start (line_table, 4, 0);
1754 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1755
1756 if (should_have_column_data_p (loc_back_to_short))
1757 {
1758 /* Verify that we switched to short lines in the linemap. */
1759 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1760 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1761 }
1762
b9f4757f
DM
1763 /* Example of a line that will eventually be seen to be longer
1764 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1765 below that. */
1766 linemap_line_start (line_table, 5, 2000);
1767
1768 location_t loc_start_of_very_long_line
1769 = linemap_position_for_column (line_table, 2000);
1770 location_t loc_too_wide
1771 = linemap_position_for_column (line_table, 4097);
1772 location_t loc_too_wide_2
1773 = linemap_position_for_column (line_table, 4098);
1774
1775 /* ...and back to a sane line length. */
1776 linemap_line_start (line_table, 6, 100);
1777 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1778
d9b950dd
DM
1779 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1780
1781 /* Multiple files. */
1782 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1783 linemap_line_start (line_table, 1, 200);
1784 location_t loc_f = linemap_position_for_column (line_table, 150);
1785 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1786
1787 /* Verify that we can recover the location info. */
1788 assert_loceq ("foo.c", 1, 1, loc_a);
1789 assert_loceq ("foo.c", 1, 23, loc_b);
1790 assert_loceq ("foo.c", 2, 1, loc_c);
1791 assert_loceq ("foo.c", 2, 17, loc_d);
1792 assert_loceq ("foo.c", 3, 700, loc_e);
5ccf1d8d 1793 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
b9f4757f
DM
1794
1795 /* In the very wide line, the initial location should be fully tracked. */
1796 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1797 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1798 be disabled. */
1799 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1800 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1801 /*...and column-tracking should be re-enabled for subsequent lines. */
1802 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1803
d9b950dd
DM
1804 assert_loceq ("bar.c", 1, 150, loc_f);
1805
1806 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
a01fc549
DM
1807 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1808
1809 /* Verify using make_location to build a range, and extracting data
1810 back from it. */
1811 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1812 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1813 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1814 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1815 ASSERT_EQ (loc_b, src_range.m_start);
1816 ASSERT_EQ (loc_d, src_range.m_finish);
d9b950dd
DM
1817}
1818
1819/* Verify various properties of UNKNOWN_LOCATION. */
1820
1821static void
1822test_unknown_location ()
1823{
1824 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1825 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1826 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1827}
1828
1829/* Verify various properties of BUILTINS_LOCATION. */
1830
1831static void
1832test_builtins ()
1833{
10d2fc23 1834 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
d9b950dd
DM
1835 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1836}
1837
9144eabb 1838/* Regression test for make_location.
cfa435e1
DM
1839 Ensure that we use pure locations for the start/finish of the range,
1840 rather than storing a packed or ad-hoc range as the start/finish. */
9144eabb
DM
1841
1842static void
1843test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1844{
1845 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1846 with C++ frontend.
1847 ....................0000000001111111111222.
1848 ....................1234567890123456789012. */
1849 const char *content = " r += !aaa == bbb;\n";
1850 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1851 line_table_test ltt (case_);
1852 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1853
1854 const location_t c11 = linemap_position_for_column (line_table, 11);
1855 const location_t c12 = linemap_position_for_column (line_table, 12);
1856 const location_t c13 = linemap_position_for_column (line_table, 13);
1857 const location_t c14 = linemap_position_for_column (line_table, 14);
1858 const location_t c21 = linemap_position_for_column (line_table, 21);
1859
1860 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1861 return;
1862
1863 /* Use column 13 for the caret location, arbitrarily, to verify that we
1864 handle start != caret. */
1865 const location_t aaa = make_location (c13, c12, c14);
1866 ASSERT_EQ (c13, get_pure_location (aaa));
1867 ASSERT_EQ (c12, get_start (aaa));
1868 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1869 ASSERT_EQ (c14, get_finish (aaa));
1870 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1871
1872 /* Make a location using a location with a range as the start-point. */
1873 const location_t not_aaa = make_location (c11, aaa, c14);
1874 ASSERT_EQ (c11, get_pure_location (not_aaa));
1875 /* It should use the start location of the range, not store the range
1876 itself. */
1877 ASSERT_EQ (c12, get_start (not_aaa));
1878 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1879 ASSERT_EQ (c14, get_finish (not_aaa));
1880 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1881
1882 /* Similarly, make a location with a range as the end-point. */
1883 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1884 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1885 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1886 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1887 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1888 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1889 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1890 /* It should use the finish location of the range, not store the range
1891 itself. */
1892 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1893 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1894 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1895 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1896 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1897}
1898
d9b950dd
DM
1899/* Verify reading of input files (e.g. for caret-based diagnostics). */
1900
1901static void
1902test_reading_source_line ()
1903{
85ecd05c 1904 /* Create a tempfile and write some text to it. */
741d3be5
DM
1905 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1906 "01234567890123456789\n"
1907 "This is the test text\n"
1adae327 1908 "This is the 3rd line");
85ecd05c
DM
1909
1910 /* Read back a specific line from the tempfile. */
d9b950dd 1911 int line_size;
741d3be5 1912 const char *source_line = location_get_source_line (tmp.get_filename (),
1adae327
BE
1913 3, &line_size);
1914 ASSERT_TRUE (source_line != NULL);
1915 ASSERT_EQ (20, line_size);
1916 ASSERT_TRUE (!strncmp ("This is the 3rd line",
1917 source_line, line_size));
1918
1919 source_line = location_get_source_line (tmp.get_filename (),
1920 2, &line_size);
d9b950dd 1921 ASSERT_TRUE (source_line != NULL);
85ecd05c 1922 ASSERT_EQ (21, line_size);
1adae327
BE
1923 ASSERT_TRUE (!strncmp ("This is the test text",
1924 source_line, line_size));
85ecd05c 1925
1adae327
BE
1926 source_line = location_get_source_line (tmp.get_filename (),
1927 4, &line_size);
1928 ASSERT_TRUE (source_line == NULL);
d9b950dd
DM
1929}
1930
741d3be5
DM
1931/* Tests of lexing. */
1932
1933/* Verify that token TOK from PARSER has cpp_token_as_text
1934 equal to EXPECTED_TEXT. */
1935
1936#define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1937 SELFTEST_BEGIN_STMT \
1938 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1939 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1940 SELFTEST_END_STMT
1941
1942/* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1943 and ranges from EXP_START_COL to EXP_FINISH_COL.
1944 Use LOC as the effective location of the selftest. */
1945
1946static void
1947assert_token_loc_eq (const location &loc,
1948 const cpp_token *tok,
1949 const char *exp_filename, int exp_linenum,
1950 int exp_start_col, int exp_finish_col)
1951{
1952 location_t tok_loc = tok->src_loc;
1953 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1954 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1955
1956 /* If location_t values are sufficiently high, then column numbers
1957 will be unavailable. */
1958 if (!should_have_column_data_p (tok_loc))
1959 return;
1960
1961 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1962 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1963 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1964 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1965}
1966
1967/* Use assert_token_loc_eq to verify the TOK->src_loc, using
1968 SELFTEST_LOCATION as the effective location of the selftest. */
1969
1970#define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1971 EXP_START_COL, EXP_FINISH_COL) \
1972 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1973 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1974
1975/* Test of lexing a file using libcpp, verifying tokens and their
1976 location information. */
1977
1978static void
1979test_lexer (const line_table_case &case_)
1980{
1981 /* Create a tempfile and write some text to it. */
1982 const char *content =
1983 /*00000000011111111112222222222333333.3333444444444.455555555556
1984 12345678901234567890123456789012345.6789012345678.901234567890. */
1985 ("test_name /* c-style comment */\n"
1986 " \"test literal\"\n"
1987 " // test c++-style comment\n"
1988 " 42\n");
1989 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1990
f87e22c5 1991 line_table_test ltt (case_);
741d3be5
DM
1992
1993 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1994
1995 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1996 ASSERT_NE (fname, NULL);
1997
1998 /* Verify that we get the expected tokens back, with the correct
1999 location information. */
2000
2001 location_t loc;
2002 const cpp_token *tok;
2003 tok = cpp_get_token_with_location (parser, &loc);
2004 ASSERT_NE (tok, NULL);
2005 ASSERT_EQ (tok->type, CPP_NAME);
2006 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2007 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2008
2009 tok = cpp_get_token_with_location (parser, &loc);
2010 ASSERT_NE (tok, NULL);
2011 ASSERT_EQ (tok->type, CPP_STRING);
2012 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2013 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2014
2015 tok = cpp_get_token_with_location (parser, &loc);
2016 ASSERT_NE (tok, NULL);
2017 ASSERT_EQ (tok->type, CPP_NUMBER);
2018 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2019 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2020
2021 tok = cpp_get_token_with_location (parser, &loc);
2022 ASSERT_NE (tok, NULL);
2023 ASSERT_EQ (tok->type, CPP_EOF);
2024
2025 cpp_finish (parser, NULL);
2026 cpp_destroy (parser);
2027}
2028
88fa5555
DM
2029/* Forward decls. */
2030
2031struct lexer_test;
2032class lexer_test_options;
2033
2034/* A class for specifying options of a lexer_test.
2035 The "apply" vfunc is called during the lexer_test constructor. */
2036
2037class lexer_test_options
2038{
2039 public:
2040 virtual void apply (lexer_test &) = 0;
2041};
2042
f5ea989d
DM
2043/* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2044 in its dtor.
2045
2046 This is needed by struct lexer_test to ensure that the cleanup of the
2047 cpp_reader happens *after* the cleanup of the temp_source_file. */
2048
2049class cpp_reader_ptr
2050{
2051 public:
2052 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2053
2054 ~cpp_reader_ptr ()
2055 {
2056 cpp_finish (m_ptr, NULL);
2057 cpp_destroy (m_ptr);
2058 }
2059
2060 operator cpp_reader * () const { return m_ptr; }
2061
2062 private:
2063 cpp_reader *m_ptr;
2064};
2065
88fa5555
DM
2066/* A struct for writing lexer tests. */
2067
2068struct lexer_test
2069{
2070 lexer_test (const line_table_case &case_, const char *content,
2071 lexer_test_options *options);
2072 ~lexer_test ();
2073
2074 const cpp_token *get_token ();
2075
f5ea989d
DM
2076 /* The ordering of these fields matters.
2077 The line_table_test must be first, since the cpp_reader_ptr
2078 uses it.
2079 The cpp_reader must be cleaned up *after* the temp_source_file
2080 since the filenames in input.c's input cache are owned by the
2081 cpp_reader; in particular, when ~temp_source_file evicts the
2082 filename the filenames must still be alive. */
f87e22c5 2083 line_table_test m_ltt;
f5ea989d
DM
2084 cpp_reader_ptr m_parser;
2085 temp_source_file m_tempfile;
88fa5555 2086 string_concat_db m_concats;
a3998c2f 2087 bool m_implicitly_expect_EOF;
88fa5555
DM
2088};
2089
2090/* Use an EBCDIC encoding for the execution charset, specifically
2091 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2092
2093 This exercises iconv integration within libcpp.
2094 Not every build of iconv supports the given charset,
2095 so we need to flag this error and handle it gracefully. */
2096
2097class ebcdic_execution_charset : public lexer_test_options
2098{
2099 public:
2100 ebcdic_execution_charset () : m_num_iconv_errors (0)
2101 {
2102 gcc_assert (s_singleton == NULL);
2103 s_singleton = this;
2104 }
2105 ~ebcdic_execution_charset ()
2106 {
2107 gcc_assert (s_singleton == this);
2108 s_singleton = NULL;
2109 }
2110
2111 void apply (lexer_test &test) FINAL OVERRIDE
2112 {
2113 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2114 cpp_opts->narrow_charset = "IBM1047";
2115
2116 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2117 callbacks->error = on_error;
2118 }
2119
2120 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2121 int level ATTRIBUTE_UNUSED,
2122 int reason ATTRIBUTE_UNUSED,
2123 rich_location *richloc ATTRIBUTE_UNUSED,
2124 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2125 ATTRIBUTE_FPTR_PRINTF(5,0)
2126 {
2127 gcc_assert (s_singleton);
a7085816
JJ
2128 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2129 const char *msg = "conversion from %s to %s not supported by iconv";
2130#ifdef ENABLE_NLS
2131 msg = dgettext ("cpplib", msg);
2132#endif
88fa5555
DM
2133 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2134 when the local iconv build doesn't support the conversion. */
a7085816 2135 if (strcmp (msgid, msg) == 0)
88fa5555
DM
2136 {
2137 s_singleton->m_num_iconv_errors++;
2138 return true;
2139 }
2140
2141 /* Otherwise, we have an unexpected error. */
2142 abort ();
2143 }
2144
2145 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2146
2147 private:
2148 static ebcdic_execution_charset *s_singleton;
2149 int m_num_iconv_errors;
2150};
2151
2152ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2153
a3998c2f
DM
2154/* A lexer_test_options subclass that records a list of error
2155 messages emitted by the lexer. */
2156
2157class lexer_error_sink : public lexer_test_options
2158{
2159 public:
2160 lexer_error_sink ()
2161 {
2162 gcc_assert (s_singleton == NULL);
2163 s_singleton = this;
2164 }
2165 ~lexer_error_sink ()
2166 {
2167 gcc_assert (s_singleton == this);
2168 s_singleton = NULL;
2169
2170 int i;
2171 char *str;
2172 FOR_EACH_VEC_ELT (m_errors, i, str)
2173 free (str);
2174 }
2175
2176 void apply (lexer_test &test) FINAL OVERRIDE
2177 {
2178 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2179 callbacks->error = on_error;
2180 }
2181
2182 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2183 int level ATTRIBUTE_UNUSED,
2184 int reason ATTRIBUTE_UNUSED,
2185 rich_location *richloc ATTRIBUTE_UNUSED,
2186 const char *msgid, va_list *ap)
2187 ATTRIBUTE_FPTR_PRINTF(5,0)
2188 {
2189 char *msg = xvasprintf (msgid, *ap);
2190 s_singleton->m_errors.safe_push (msg);
2191 return true;
2192 }
2193
2194 auto_vec<char *> m_errors;
2195
2196 private:
2197 static lexer_error_sink *s_singleton;
2198};
2199
2200lexer_error_sink *lexer_error_sink::s_singleton;
2201
88fa5555
DM
2202/* Constructor. Override line_table with a new instance based on CASE_,
2203 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2204 start parsing the tempfile. */
2205
2206lexer_test::lexer_test (const line_table_case &case_, const char *content,
f5ea989d
DM
2207 lexer_test_options *options)
2208: m_ltt (case_),
2209 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
88fa5555
DM
2210 /* Create a tempfile and write the text to it. */
2211 m_tempfile (SELFTEST_LOCATION, ".c", content),
a3998c2f
DM
2212 m_concats (),
2213 m_implicitly_expect_EOF (true)
88fa5555
DM
2214{
2215 if (options)
2216 options->apply (*this);
2217
2218 cpp_init_iconv (m_parser);
2219
2220 /* Parse the file. */
2221 const char *fname = cpp_read_main_file (m_parser,
2222 m_tempfile.get_filename ());
2223 ASSERT_NE (fname, NULL);
2224}
2225
a3998c2f 2226/* Destructor. By default, verify that the next token in m_parser is EOF. */
88fa5555
DM
2227
2228lexer_test::~lexer_test ()
2229{
2230 location_t loc;
2231 const cpp_token *tok;
2232
a3998c2f
DM
2233 if (m_implicitly_expect_EOF)
2234 {
2235 tok = cpp_get_token_with_location (m_parser, &loc);
2236 ASSERT_NE (tok, NULL);
2237 ASSERT_EQ (tok->type, CPP_EOF);
2238 }
88fa5555
DM
2239}
2240
2241/* Get the next token from m_parser. */
2242
2243const cpp_token *
2244lexer_test::get_token ()
2245{
2246 location_t loc;
2247 const cpp_token *tok;
2248
2249 tok = cpp_get_token_with_location (m_parser, &loc);
2250 ASSERT_NE (tok, NULL);
2251 return tok;
2252}
2253
2254/* Verify that locations within string literals are correctly handled. */
2255
2256/* Verify get_source_range_for_substring for token(s) at STRLOC,
2257 using the string concatenation database for TEST.
2258
2259 Assert that the character at index IDX is on EXPECTED_LINE,
2260 and that it begins at column EXPECTED_START_COL and ends at
2261 EXPECTED_FINISH_COL (unless the locations are beyond
2262 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2263 columns). */
2264
2265static void
2266assert_char_at_range (const location &loc,
2267 lexer_test& test,
2268 location_t strloc, enum cpp_ttype type, int idx,
2269 int expected_line, int expected_start_col,
2270 int expected_finish_col)
2271{
2272 cpp_reader *pfile = test.m_parser;
2273 string_concat_db *concats = &test.m_concats;
2274
a954833d 2275 source_range actual_range = source_range();
88fa5555 2276 const char *err
65e736c0
DM
2277 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2278 &actual_range);
88fa5555
DM
2279 if (should_have_column_data_p (strloc))
2280 ASSERT_EQ_AT (loc, NULL, err);
2281 else
2282 {
2283 ASSERT_STREQ_AT (loc,
2284 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2285 err);
2286 return;
2287 }
2288
2289 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2290 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2291 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2292 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2293
2294 if (should_have_column_data_p (actual_range.m_start))
2295 {
2296 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2297 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2298 }
2299 if (should_have_column_data_p (actual_range.m_finish))
2300 {
2301 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2302 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2303 }
2304}
2305
2306/* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2307 the effective location of any errors. */
2308
2309#define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2310 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2311 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2312 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2313 (EXPECTED_FINISH_COL))
2314
2315/* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2316 using the string concatenation database for TEST.
2317
2318 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2319
2320static void
2321assert_num_substring_ranges (const location &loc,
2322 lexer_test& test,
2323 location_t strloc,
2324 enum cpp_ttype type,
2325 int expected_num_ranges)
2326{
2327 cpp_reader *pfile = test.m_parser;
2328 string_concat_db *concats = &test.m_concats;
2329
0e06d2b3 2330 int actual_num_ranges = -1;
88fa5555
DM
2331 const char *err
2332 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2333 &actual_num_ranges);
2334 if (should_have_column_data_p (strloc))
2335 ASSERT_EQ_AT (loc, NULL, err);
2336 else
2337 {
2338 ASSERT_STREQ_AT (loc,
2339 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2340 err);
2341 return;
2342 }
2343 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2344}
2345
2346/* Macro for calling assert_num_substring_ranges, supplying
2347 SELFTEST_LOCATION for the effective location of any errors. */
2348
2349#define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2350 EXPECTED_NUM_RANGES) \
2351 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2352 (TYPE), (EXPECTED_NUM_RANGES))
2353
2354
2355/* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2356 returns an error (using the string concatenation database for TEST). */
2357
2358static void
2359assert_has_no_substring_ranges (const location &loc,
2360 lexer_test& test,
2361 location_t strloc,
2362 enum cpp_ttype type,
2363 const char *expected_err)
2364{
2365 cpp_reader *pfile = test.m_parser;
2366 string_concat_db *concats = &test.m_concats;
2367 cpp_substring_ranges ranges;
2368 const char *actual_err
2369 = get_substring_ranges_for_loc (pfile, concats, strloc,
2370 type, ranges);
2371 if (should_have_column_data_p (strloc))
2372 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2373 else
2374 ASSERT_STREQ_AT (loc,
2375 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2376 actual_err);
2377}
2378
2379#define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2380 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2381 (STRLOC), (TYPE), (ERR))
2382
2383/* Lex a simple string literal. Verify the substring location data, before
2384 and after running cpp_interpret_string on it. */
2385
2386static void
2387test_lexer_string_locations_simple (const line_table_case &case_)
2388{
2389 /* Digits 0-9 (with 0 at column 10), the simple way.
2390 ....................000000000.11111111112.2222222223333333333
2391 ....................123456789.01234567890.1234567890123456789
2392 We add a trailing comment to ensure that we correctly locate
2393 the end of the string literal token. */
2394 const char *content = " \"0123456789\" /* not a string */\n";
2395 lexer_test test (case_, content, NULL);
2396
2397 /* Verify that we get the expected token back, with the correct
2398 location information. */
2399 const cpp_token *tok = test.get_token ();
2400 ASSERT_EQ (tok->type, CPP_STRING);
2401 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2402 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2403
2404 /* At this point in lexing, the quote characters are treated as part of
2405 the string (they are stripped off by cpp_interpret_string). */
2406
2407 ASSERT_EQ (tok->val.str.len, 12);
2408
2409 /* Verify that cpp_interpret_string works. */
2410 cpp_string dst_string;
2411 const enum cpp_ttype type = CPP_STRING;
2412 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2413 &dst_string, type);
2414 ASSERT_TRUE (result);
2415 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2416 free (const_cast <unsigned char *> (dst_string.text));
2417
2418 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3
DM
2419 opening quote, but does include the closing quote. */
2420 for (int i = 0; i <= 10; i++)
88fa5555
DM
2421 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2422 10 + i, 10 + i);
2423
bbd6fcf3 2424 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
88fa5555
DM
2425}
2426
2427/* As test_lexer_string_locations_simple, but use an EBCDIC execution
2428 encoding. */
2429
2430static void
2431test_lexer_string_locations_ebcdic (const line_table_case &case_)
2432{
2433 /* EBCDIC support requires iconv. */
2434 if (!HAVE_ICONV)
2435 return;
2436
2437 /* Digits 0-9 (with 0 at column 10), the simple way.
2438 ....................000000000.11111111112.2222222223333333333
2439 ....................123456789.01234567890.1234567890123456789
2440 We add a trailing comment to ensure that we correctly locate
2441 the end of the string literal token. */
2442 const char *content = " \"0123456789\" /* not a string */\n";
2443 ebcdic_execution_charset use_ebcdic;
2444 lexer_test test (case_, content, &use_ebcdic);
2445
2446 /* Verify that we get the expected token back, with the correct
2447 location information. */
2448 const cpp_token *tok = test.get_token ();
2449 ASSERT_EQ (tok->type, CPP_STRING);
2450 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2451 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2452
2453 /* At this point in lexing, the quote characters are treated as part of
2454 the string (they are stripped off by cpp_interpret_string). */
2455
2456 ASSERT_EQ (tok->val.str.len, 12);
2457
2458 /* The remainder of the test requires an iconv implementation that
2459 can convert from UTF-8 to the EBCDIC encoding requested above. */
2460 if (use_ebcdic.iconv_errors_occurred_p ())
2461 return;
2462
2463 /* Verify that cpp_interpret_string works. */
2464 cpp_string dst_string;
2465 const enum cpp_ttype type = CPP_STRING;
2466 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2467 &dst_string, type);
2468 ASSERT_TRUE (result);
2469 /* We should now have EBCDIC-encoded text, specifically
2470 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2471 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2472 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2473 (const char *)dst_string.text);
2474 free (const_cast <unsigned char *> (dst_string.text));
2475
2476 /* Verify that we don't attempt to record substring location information
2477 for such cases. */
2478 ASSERT_HAS_NO_SUBSTRING_RANGES
2479 (test, tok->src_loc, type,
2480 "execution character set != source character set");
2481}
2482
2483/* Lex a string literal containing a hex-escaped character.
2484 Verify the substring location data, before and after running
2485 cpp_interpret_string on it. */
2486
2487static void
2488test_lexer_string_locations_hex (const line_table_case &case_)
2489{
2490 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2491 and with a space in place of digit 6, to terminate the escaped
2492 hex code.
2493 ....................000000000.111111.11112222.
2494 ....................123456789.012345.67890123. */
2495 const char *content = " \"01234\\x35 789\"\n";
2496 lexer_test test (case_, content, NULL);
2497
2498 /* Verify that we get the expected token back, with the correct
2499 location information. */
2500 const cpp_token *tok = test.get_token ();
2501 ASSERT_EQ (tok->type, CPP_STRING);
2502 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2503 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2504
2505 /* At this point in lexing, the quote characters are treated as part of
2506 the string (they are stripped off by cpp_interpret_string). */
2507 ASSERT_EQ (tok->val.str.len, 15);
2508
2509 /* Verify that cpp_interpret_string works. */
2510 cpp_string dst_string;
2511 const enum cpp_ttype type = CPP_STRING;
2512 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2513 &dst_string, type);
2514 ASSERT_TRUE (result);
2515 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2516 free (const_cast <unsigned char *> (dst_string.text));
2517
2518 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2519 opening quote, but does include the closing quote. */
88fa5555
DM
2520 for (int i = 0; i <= 4; i++)
2521 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2522 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
bbd6fcf3 2523 for (int i = 6; i <= 10; i++)
88fa5555
DM
2524 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2525
bbd6fcf3 2526 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
88fa5555
DM
2527}
2528
2529/* Lex a string literal containing an octal-escaped character.
2530 Verify the substring location data after running cpp_interpret_string
2531 on it. */
2532
2533static void
2534test_lexer_string_locations_oct (const line_table_case &case_)
2535{
2536 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2537 and with a space in place of digit 6, to terminate the escaped
2538 octal code.
2539 ....................000000000.111111.11112222.2222223333333333444
2540 ....................123456789.012345.67890123.4567890123456789012 */
2541 const char *content = " \"01234\\065 789\" /* not a string */\n";
2542 lexer_test test (case_, content, NULL);
2543
2544 /* Verify that we get the expected token back, with the correct
2545 location information. */
2546 const cpp_token *tok = test.get_token ();
2547 ASSERT_EQ (tok->type, CPP_STRING);
2548 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2549
2550 /* Verify that cpp_interpret_string works. */
2551 cpp_string dst_string;
2552 const enum cpp_ttype type = CPP_STRING;
2553 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2554 &dst_string, type);
2555 ASSERT_TRUE (result);
2556 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2557 free (const_cast <unsigned char *> (dst_string.text));
2558
2559 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2560 opening quote, but does include the closing quote. */
88fa5555
DM
2561 for (int i = 0; i < 5; i++)
2562 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2563 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
bbd6fcf3 2564 for (int i = 6; i <= 10; i++)
88fa5555
DM
2565 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2566
bbd6fcf3 2567 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
88fa5555
DM
2568}
2569
2570/* Test of string literal containing letter escapes. */
2571
2572static void
2573test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2574{
2575 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2576 .....................000000000.1.11111.1.1.11222.22222223333333
2577 .....................123456789.0.12345.6.7.89012.34567890123456. */
2578 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2579 lexer_test test (case_, content, NULL);
2580
2581 /* Verify that we get the expected tokens back. */
2582 const cpp_token *tok = test.get_token ();
2583 ASSERT_EQ (tok->type, CPP_STRING);
2584 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2585
2586 /* Verify ranges of individual characters. */
2587 /* "\t". */
2588 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2589 0, 1, 10, 11);
2590 /* "foo". */
2591 for (int i = 1; i <= 3; i++)
2592 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2593 i, 1, 11 + i, 11 + i);
2594 /* "\\" and "\n". */
2595 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2596 4, 1, 15, 16);
2597 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2598 5, 1, 17, 18);
2599
bbd6fcf3
DM
2600 /* "bar" and closing quote for nul-terminator. */
2601 for (int i = 6; i <= 9; i++)
88fa5555
DM
2602 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2603 i, 1, 13 + i, 13 + i);
2604
bbd6fcf3 2605 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
88fa5555
DM
2606}
2607
2608/* Another test of a string literal containing a letter escape.
2609 Based on string seen in
2610 printf ("%-%\n");
2611 in gcc.dg/format/c90-printf-1.c. */
2612
2613static void
2614test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2615{
2616 /* .....................000000000.1111.11.1111.22222222223.
2617 .....................123456789.0123.45.6789.01234567890. */
2618 const char *content = (" \"%-%\\n\" /* non-str */\n");
2619 lexer_test test (case_, content, NULL);
2620
2621 /* Verify that we get the expected tokens back. */
2622 const cpp_token *tok = test.get_token ();
2623 ASSERT_EQ (tok->type, CPP_STRING);
2624 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2625
2626 /* Verify ranges of individual characters. */
2627 /* "%-%". */
2628 for (int i = 0; i < 3; i++)
2629 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2630 i, 1, 10 + i, 10 + i);
2631 /* "\n". */
2632 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2633 3, 1, 13, 14);
2634
bbd6fcf3
DM
2635 /* Closing quote for nul-terminator. */
2636 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2637 4, 1, 15, 15);
2638
2639 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
88fa5555
DM
2640}
2641
2642/* Lex a string literal containing UCN 4 characters.
2643 Verify the substring location data after running cpp_interpret_string
2644 on it. */
2645
2646static void
2647test_lexer_string_locations_ucn4 (const line_table_case &case_)
2648{
2649 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2650 as UCN 4.
2651 ....................000000000.111111.111122.222222223.33333333344444
2652 ....................123456789.012345.678901.234567890.12345678901234 */
2653 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2654 lexer_test test (case_, content, NULL);
2655
2656 /* Verify that we get the expected token back, with the correct
2657 location information. */
2658 const cpp_token *tok = test.get_token ();
2659 ASSERT_EQ (tok->type, CPP_STRING);
2660 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2661
2662 /* Verify that cpp_interpret_string works.
2663 The string should be encoded in the execution character
2664 set. Assuming that that is UTF-8, we should have the following:
2665 ----------- ---- ----- ------- ----------------
2666 Byte offset Byte Octal Unicode Source Column(s)
2667 ----------- ---- ----- ------- ----------------
2668 0 0x30 '0' 10
2669 1 0x31 '1' 11
2670 2 0x32 '2' 12
2671 3 0x33 '3' 13
2672 4 0x34 '4' 14
2673 5 0xE2 \342 U+2174 15-20
2674 6 0x85 \205 (cont) 15-20
2675 7 0xB4 \264 (cont) 15-20
2676 8 0xE2 \342 U+2175 21-26
2677 9 0x85 \205 (cont) 21-26
2678 10 0xB5 \265 (cont) 21-26
2679 11 0x37 '7' 27
2680 12 0x38 '8' 28
2681 13 0x39 '9' 29
bbd6fcf3 2682 14 0x00 30 (closing quote)
88fa5555
DM
2683 ----------- ---- ----- ------- ---------------. */
2684
2685 cpp_string dst_string;
2686 const enum cpp_ttype type = CPP_STRING;
2687 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2688 &dst_string, type);
2689 ASSERT_TRUE (result);
2690 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2691 (const char *)dst_string.text);
2692 free (const_cast <unsigned char *> (dst_string.text));
2693
2694 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2695 opening quote, but does include the closing quote.
88fa5555
DM
2696 '01234'. */
2697 for (int i = 0; i <= 4; i++)
2698 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2699 /* U+2174. */
2700 for (int i = 5; i <= 7; i++)
2701 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2702 /* U+2175. */
2703 for (int i = 8; i <= 10; i++)
2704 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
bbd6fcf3
DM
2705 /* '789' and nul terminator */
2706 for (int i = 11; i <= 14; i++)
88fa5555
DM
2707 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2708
bbd6fcf3 2709 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
88fa5555
DM
2710}
2711
2712/* Lex a string literal containing UCN 8 characters.
2713 Verify the substring location data after running cpp_interpret_string
2714 on it. */
2715
2716static void
2717test_lexer_string_locations_ucn8 (const line_table_case &case_)
2718{
2719 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2720 ....................000000000.111111.1111222222.2222333333333.344444
2721 ....................123456789.012345.6789012345.6789012345678.901234 */
2722 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2723 lexer_test test (case_, content, NULL);
2724
2725 /* Verify that we get the expected token back, with the correct
2726 location information. */
2727 const cpp_token *tok = test.get_token ();
2728 ASSERT_EQ (tok->type, CPP_STRING);
2729 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2730 "\"01234\\U00002174\\U00002175789\"");
2731
2732 /* Verify that cpp_interpret_string works.
2733 The UTF-8 encoding of the string is identical to that from
2734 the ucn4 testcase above; the only difference is the column
2735 locations. */
2736 cpp_string dst_string;
2737 const enum cpp_ttype type = CPP_STRING;
2738 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2739 &dst_string, type);
2740 ASSERT_TRUE (result);
2741 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2742 (const char *)dst_string.text);
2743 free (const_cast <unsigned char *> (dst_string.text));
2744
2745 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2746 opening quote, but does include the closing quote.
88fa5555
DM
2747 '01234'. */
2748 for (int i = 0; i <= 4; i++)
2749 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2750 /* U+2174. */
2751 for (int i = 5; i <= 7; i++)
2752 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2753 /* U+2175. */
2754 for (int i = 8; i <= 10; i++)
2755 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2756 /* '789' at columns 35-37 */
2757 for (int i = 11; i <= 13; i++)
2758 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
bbd6fcf3
DM
2759 /* Closing quote/nul-terminator at column 38. */
2760 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
88fa5555 2761
bbd6fcf3 2762 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
88fa5555
DM
2763}
2764
2765/* Fetch a big-endian 32-bit value and convert to host endianness. */
2766
2767static uint32_t
2768uint32_from_big_endian (const uint32_t *ptr_be_value)
2769{
2770 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2771 return (((uint32_t) buf[0] << 24)
2772 | ((uint32_t) buf[1] << 16)
2773 | ((uint32_t) buf[2] << 8)
2774 | (uint32_t) buf[3]);
2775}
2776
2777/* Lex a wide string literal and verify that attempts to read substring
2778 location data from it fail gracefully. */
2779
2780static void
2781test_lexer_string_locations_wide_string (const line_table_case &case_)
2782{
2783 /* Digits 0-9.
2784 ....................000000000.11111111112.22222222233333
2785 ....................123456789.01234567890.12345678901234 */
2786 const char *content = " L\"0123456789\" /* non-str */\n";
2787 lexer_test test (case_, content, NULL);
2788
2789 /* Verify that we get the expected token back, with the correct
2790 location information. */
2791 const cpp_token *tok = test.get_token ();
2792 ASSERT_EQ (tok->type, CPP_WSTRING);
2793 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2794
2795 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2796 cpp_string dst_string;
2797 const enum cpp_ttype type = CPP_WSTRING;
2798 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2799 &dst_string, type);
2800 ASSERT_TRUE (result);
2801 /* The cpp_reader defaults to big-endian with
2802 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2803 now be encoded as UTF-32BE. */
2804 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2805 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2806 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2807 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2808 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2809 free (const_cast <unsigned char *> (dst_string.text));
2810
2811 /* We don't yet support generating substring location information
2812 for L"" strings. */
2813 ASSERT_HAS_NO_SUBSTRING_RANGES
2814 (test, tok->src_loc, type,
2815 "execution character set != source character set");
2816}
2817
2818/* Fetch a big-endian 16-bit value and convert to host endianness. */
2819
2820static uint16_t
2821uint16_from_big_endian (const uint16_t *ptr_be_value)
2822{
2823 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2824 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2825}
2826
2827/* Lex a u"" string literal and verify that attempts to read substring
2828 location data from it fail gracefully. */
2829
2830static void
2831test_lexer_string_locations_string16 (const line_table_case &case_)
2832{
2833 /* Digits 0-9.
2834 ....................000000000.11111111112.22222222233333
2835 ....................123456789.01234567890.12345678901234 */
2836 const char *content = " u\"0123456789\" /* non-str */\n";
2837 lexer_test test (case_, content, NULL);
2838
2839 /* Verify that we get the expected token back, with the correct
2840 location information. */
2841 const cpp_token *tok = test.get_token ();
2842 ASSERT_EQ (tok->type, CPP_STRING16);
2843 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2844
2845 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2846 cpp_string dst_string;
2847 const enum cpp_ttype type = CPP_STRING16;
2848 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2849 &dst_string, type);
2850 ASSERT_TRUE (result);
2851
2852 /* The cpp_reader defaults to big-endian, so dst_string should
2853 now be encoded as UTF-16BE. */
2854 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2855 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2856 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2857 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2858 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2859 free (const_cast <unsigned char *> (dst_string.text));
2860
2861 /* We don't yet support generating substring location information
2862 for L"" strings. */
2863 ASSERT_HAS_NO_SUBSTRING_RANGES
2864 (test, tok->src_loc, type,
2865 "execution character set != source character set");
2866}
2867
2868/* Lex a U"" string literal and verify that attempts to read substring
2869 location data from it fail gracefully. */
2870
2871static void
2872test_lexer_string_locations_string32 (const line_table_case &case_)
2873{
2874 /* Digits 0-9.
2875 ....................000000000.11111111112.22222222233333
2876 ....................123456789.01234567890.12345678901234 */
2877 const char *content = " U\"0123456789\" /* non-str */\n";
2878 lexer_test test (case_, content, NULL);
2879
2880 /* Verify that we get the expected token back, with the correct
2881 location information. */
2882 const cpp_token *tok = test.get_token ();
2883 ASSERT_EQ (tok->type, CPP_STRING32);
2884 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2885
2886 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2887 cpp_string dst_string;
2888 const enum cpp_ttype type = CPP_STRING32;
2889 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2890 &dst_string, type);
2891 ASSERT_TRUE (result);
2892
2893 /* The cpp_reader defaults to big-endian, so dst_string should
2894 now be encoded as UTF-32BE. */
2895 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2896 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2897 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2898 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2899 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2900 free (const_cast <unsigned char *> (dst_string.text));
2901
2902 /* We don't yet support generating substring location information
2903 for L"" strings. */
2904 ASSERT_HAS_NO_SUBSTRING_RANGES
2905 (test, tok->src_loc, type,
2906 "execution character set != source character set");
2907}
2908
2909/* Lex a u8-string literal.
2910 Verify the substring location data after running cpp_interpret_string
2911 on it. */
2912
2913static void
2914test_lexer_string_locations_u8 (const line_table_case &case_)
2915{
2916 /* Digits 0-9.
2917 ....................000000000.11111111112.22222222233333
2918 ....................123456789.01234567890.12345678901234 */
2919 const char *content = " u8\"0123456789\" /* non-str */\n";
2920 lexer_test test (case_, content, NULL);
2921
2922 /* Verify that we get the expected token back, with the correct
2923 location information. */
2924 const cpp_token *tok = test.get_token ();
2925 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2926 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2927
2928 /* Verify that cpp_interpret_string works. */
2929 cpp_string dst_string;
2930 const enum cpp_ttype type = CPP_STRING;
2931 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2932 &dst_string, type);
2933 ASSERT_TRUE (result);
2934 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2935 free (const_cast <unsigned char *> (dst_string.text));
2936
2937 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3
DM
2938 opening quote, but does include the closing quote. */
2939 for (int i = 0; i <= 10; i++)
88fa5555
DM
2940 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2941}
2942
2943/* Lex a string literal containing UTF-8 source characters.
2944 Verify the substring location data after running cpp_interpret_string
2945 on it. */
2946
2947static void
2948test_lexer_string_locations_utf8_source (const line_table_case &case_)
2949{
2950 /* This string literal is written out to the source file as UTF-8,
2951 and is of the form "before mojibake after", where "mojibake"
2952 is written as the following four unicode code points:
2953 U+6587 CJK UNIFIED IDEOGRAPH-6587
2954 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2955 U+5316 CJK UNIFIED IDEOGRAPH-5316
2956 U+3051 HIRAGANA LETTER KE.
2957 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2958 "before" and "after" are 1 byte per unicode character.
2959
2960 The numbering shown are "columns", which are *byte* numbers within
2961 the line, rather than unicode character numbers.
2962
2963 .................... 000000000.1111111.
2964 .................... 123456789.0123456. */
2965 const char *content = (" \"before "
2966 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2967 UTF-8: 0xE6 0x96 0x87
2968 C octal escaped UTF-8: \346\226\207
2969 "column" numbers: 17-19. */
2970 "\346\226\207"
2971
2972 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2973 UTF-8: 0xE5 0xAD 0x97
2974 C octal escaped UTF-8: \345\255\227
2975 "column" numbers: 20-22. */
2976 "\345\255\227"
2977
2978 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2979 UTF-8: 0xE5 0x8C 0x96
2980 C octal escaped UTF-8: \345\214\226
2981 "column" numbers: 23-25. */
2982 "\345\214\226"
2983
2984 /* U+3051 HIRAGANA LETTER KE
2985 UTF-8: 0xE3 0x81 0x91
2986 C octal escaped UTF-8: \343\201\221
2987 "column" numbers: 26-28. */
2988 "\343\201\221"
2989
2990 /* column numbers 29 onwards
2991 2333333.33334444444444
2992 9012345.67890123456789. */
2993 " after\" /* non-str */\n");
2994 lexer_test test (case_, content, NULL);
2995
2996 /* Verify that we get the expected token back, with the correct
2997 location information. */
2998 const cpp_token *tok = test.get_token ();
2999 ASSERT_EQ (tok->type, CPP_STRING);
3000 ASSERT_TOKEN_AS_TEXT_EQ
3001 (test.m_parser, tok,
3002 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3003
3004 /* Verify that cpp_interpret_string works. */
3005 cpp_string dst_string;
3006 const enum cpp_ttype type = CPP_STRING;
3007 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3008 &dst_string, type);
3009 ASSERT_TRUE (result);
3010 ASSERT_STREQ
3011 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3012 (const char *)dst_string.text);
3013 free (const_cast <unsigned char *> (dst_string.text));
3014
3015 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 3016 opening quote, but does include the closing quote.
88fa5555 3017 Assuming that both source and execution encodings are UTF-8, we have
bbd6fcf3 3018 a run of 25 octets in each, plus the NUL terminator. */
88fa5555
DM
3019 for (int i = 0; i < 25; i++)
3020 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
bbd6fcf3
DM
3021 /* NUL-terminator should use the closing quote at column 35. */
3022 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
88fa5555 3023
bbd6fcf3 3024 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
88fa5555
DM
3025}
3026
3027/* Test of string literal concatenation. */
3028
3029static void
3030test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3031{
3032 /* Digits 0-9.
3033 .....................000000000.111111.11112222222222
3034 .....................123456789.012345.67890123456789. */
3035 const char *content = (" \"01234\" /* non-str */\n"
3036 " \"56789\" /* non-str */\n");
3037 lexer_test test (case_, content, NULL);
3038
3039 location_t input_locs[2];
3040
3041 /* Verify that we get the expected tokens back. */
3042 auto_vec <cpp_string> input_strings;
3043 const cpp_token *tok_a = test.get_token ();
3044 ASSERT_EQ (tok_a->type, CPP_STRING);
3045 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3046 input_strings.safe_push (tok_a->val.str);
3047 input_locs[0] = tok_a->src_loc;
3048
3049 const cpp_token *tok_b = test.get_token ();
3050 ASSERT_EQ (tok_b->type, CPP_STRING);
3051 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3052 input_strings.safe_push (tok_b->val.str);
3053 input_locs[1] = tok_b->src_loc;
3054
3055 /* Verify that cpp_interpret_string works. */
3056 cpp_string dst_string;
3057 const enum cpp_ttype type = CPP_STRING;
3058 bool result = cpp_interpret_string (test.m_parser,
3059 input_strings.address (), 2,
3060 &dst_string, type);
3061 ASSERT_TRUE (result);
3062 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3063 free (const_cast <unsigned char *> (dst_string.text));
3064
3065 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3066 test.m_concats.record_string_concatenation (2, input_locs);
3067
3068 location_t initial_loc = input_locs[0];
3069
bbd6fcf3 3070 /* "01234" on line 1. */
88fa5555
DM
3071 for (int i = 0; i <= 4; i++)
3072 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
bbd6fcf3
DM
3073 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3074 for (int i = 5; i <= 10; i++)
88fa5555
DM
3075 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3076
bbd6fcf3 3077 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
88fa5555
DM
3078}
3079
3080/* Another test of string literal concatenation. */
3081
3082static void
3083test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3084{
3085 /* Digits 0-9.
3086 .....................000000000.111.11111112222222
3087 .....................123456789.012.34567890123456. */
3088 const char *content = (" \"01\" /* non-str */\n"
3089 " \"23\" /* non-str */\n"
3090 " \"45\" /* non-str */\n"
3091 " \"67\" /* non-str */\n"
3092 " \"89\" /* non-str */\n");
3093 lexer_test test (case_, content, NULL);
3094
3095 auto_vec <cpp_string> input_strings;
3096 location_t input_locs[5];
3097
3098 /* Verify that we get the expected tokens back. */
3099 for (int i = 0; i < 5; i++)
3100 {
3101 const cpp_token *tok = test.get_token ();
3102 ASSERT_EQ (tok->type, CPP_STRING);
3103 input_strings.safe_push (tok->val.str);
3104 input_locs[i] = tok->src_loc;
3105 }
3106
3107 /* Verify that cpp_interpret_string works. */
3108 cpp_string dst_string;
3109 const enum cpp_ttype type = CPP_STRING;
3110 bool result = cpp_interpret_string (test.m_parser,
3111 input_strings.address (), 5,
3112 &dst_string, type);
3113 ASSERT_TRUE (result);
3114 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3115 free (const_cast <unsigned char *> (dst_string.text));
3116
3117 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3118 test.m_concats.record_string_concatenation (5, input_locs);
3119
3120 location_t initial_loc = input_locs[0];
3121
3122 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3123 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3124 and expect get_source_range_for_substring to fail.
3125 However, for a string concatenation test, we can have a case
3126 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3127 but subsequent strings can be after it.
3128 Attempting to detect this within assert_char_at_range
3129 would overcomplicate the logic for the common test cases, so
3130 we detect it here. */
3131 if (should_have_column_data_p (input_locs[0])
3132 && !should_have_column_data_p (input_locs[4]))
3133 {
3134 /* Verify that get_source_range_for_substring gracefully rejects
3135 this case. */
3136 source_range actual_range;
3137 const char *err
65e736c0
DM
3138 = get_source_range_for_char (test.m_parser, &test.m_concats,
3139 initial_loc, type, 0, &actual_range);
88fa5555
DM
3140 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3141 return;
3142 }
3143
3144 for (int i = 0; i < 5; i++)
3145 for (int j = 0; j < 2; j++)
3146 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3147 i + 1, 10 + j, 10 + j);
3148
bbd6fcf3
DM
3149 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3150 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3151
3152 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
88fa5555
DM
3153}
3154
3155/* Another test of string literal concatenation, this time combined with
3156 various kinds of escaped characters. */
3157
3158static void
3159test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3160{
3161 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3162 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3163 const char *content
3164 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3165 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3166 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3167 lexer_test test (case_, content, NULL);
3168
3169 auto_vec <cpp_string> input_strings;
3170 location_t input_locs[4];
3171
3172 /* Verify that we get the expected tokens back. */
3173 for (int i = 0; i < 4; i++)
3174 {
3175 const cpp_token *tok = test.get_token ();
3176 ASSERT_EQ (tok->type, CPP_STRING);
3177 input_strings.safe_push (tok->val.str);
3178 input_locs[i] = tok->src_loc;
3179 }
3180
3181 /* Verify that cpp_interpret_string works. */
3182 cpp_string dst_string;
3183 const enum cpp_ttype type = CPP_STRING;
3184 bool result = cpp_interpret_string (test.m_parser,
3185 input_strings.address (), 4,
3186 &dst_string, type);
3187 ASSERT_TRUE (result);
3188 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3189 free (const_cast <unsigned char *> (dst_string.text));
3190
3191 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3192 test.m_concats.record_string_concatenation (4, input_locs);
3193
3194 location_t initial_loc = input_locs[0];
3195
3196 for (int i = 0; i <= 4; i++)
3197 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3198 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3199 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3200 for (int i = 7; i <= 9; i++)
3201 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3202
bbd6fcf3
DM
3203 /* NUL-terminator should use the location of the final closing quote. */
3204 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3205
3206 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
88fa5555
DM
3207}
3208
3209/* Test of string literal in a macro. */
3210
3211static void
3212test_lexer_string_locations_macro (const line_table_case &case_)
3213{
3214 /* Digits 0-9.
3215 .....................0000000001111111111.22222222223.
3216 .....................1234567890123456789.01234567890. */
3217 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3218 " MACRO");
3219 lexer_test test (case_, content, NULL);
3220
3221 /* Verify that we get the expected tokens back. */
3222 const cpp_token *tok = test.get_token ();
3223 ASSERT_EQ (tok->type, CPP_PADDING);
3224
3225 tok = test.get_token ();
3226 ASSERT_EQ (tok->type, CPP_STRING);
3227 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3228
3229 /* Verify ranges of individual characters. We ought to
3230 see columns within the macro definition. */
bbd6fcf3 3231 for (int i = 0; i <= 10; i++)
88fa5555
DM
3232 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3233 i, 1, 20 + i, 20 + i);
3234
bbd6fcf3 3235 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
88fa5555
DM
3236
3237 tok = test.get_token ();
3238 ASSERT_EQ (tok->type, CPP_PADDING);
3239}
3240
3241/* Test of stringification of a macro argument. */
3242
3243static void
3244test_lexer_string_locations_stringified_macro_argument
3245 (const line_table_case &case_)
3246{
3247 /* .....................000000000111111111122222222223.
3248 .....................123456789012345678901234567890. */
3249 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3250 "MACRO(foo)\n");
3251 lexer_test test (case_, content, NULL);
3252
3253 /* Verify that we get the expected token back. */
3254 const cpp_token *tok = test.get_token ();
3255 ASSERT_EQ (tok->type, CPP_PADDING);
3256
3257 tok = test.get_token ();
3258 ASSERT_EQ (tok->type, CPP_STRING);
3259 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3260
3261 /* We don't support getting the location of a stringified macro
3262 argument. Verify that it fails gracefully. */
3263 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3264 "cpp_interpret_string_1 failed");
3265
3266 tok = test.get_token ();
3267 ASSERT_EQ (tok->type, CPP_PADDING);
3268
3269 tok = test.get_token ();
3270 ASSERT_EQ (tok->type, CPP_PADDING);
3271}
3272
3273/* Ensure that we are fail gracefully if something attempts to pass
3274 in a location that isn't a string literal token. Seen on this code:
3275
3276 const char a[] = " %d ";
3277 __builtin_printf (a, 0.5);
3278 ^
3279
3280 when c-format.c erroneously used the indicated one-character
3281 location as the format string location, leading to a read past the
3282 end of a string buffer in cpp_interpret_string_1. */
3283
3284static void
3285test_lexer_string_locations_non_string (const line_table_case &case_)
3286{
3287 /* .....................000000000111111111122222222223.
3288 .....................123456789012345678901234567890. */
3289 const char *content = (" a\n");
3290 lexer_test test (case_, content, NULL);
3291
3292 /* Verify that we get the expected token back. */
3293 const cpp_token *tok = test.get_token ();
3294 ASSERT_EQ (tok->type, CPP_NAME);
3295 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3296
3297 /* At this point, libcpp is attempting to interpret the name as a
3298 string literal, despite it not starting with a quote. We don't detect
3299 that, but we should at least fail gracefully. */
3300 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3301 "cpp_interpret_string_1 failed");
3302}
3303
3304/* Ensure that we can read substring information for a token which
3305 starts in one linemap and ends in another . Adapted from
3306 gcc.dg/cpp/pr69985.c. */
3307
3308static void
3309test_lexer_string_locations_long_line (const line_table_case &case_)
3310{
3311 /* .....................000000.000111111111
3312 .....................123456.789012346789. */
3313 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3314 " \"0123456789012345678901234567890123456789"
3315 "0123456789012345678901234567890123456789"
3316 "0123456789012345678901234567890123456789"
3317 "0123456789\"\n");
3318
3319 lexer_test test (case_, content, NULL);
3320
3321 /* Verify that we get the expected token back. */
3322 const cpp_token *tok = test.get_token ();
3323 ASSERT_EQ (tok->type, CPP_STRING);
3324
3325 if (!should_have_column_data_p (line_table->highest_location))
3326 return;
3327
3328 /* Verify ranges of individual characters. */
bbd6fcf3
DM
3329 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3330 for (int i = 0; i < 131; i++)
88fa5555
DM
3331 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3332 i, 2, 7 + i, 7 + i);
3333}
3334
b8f56412
DM
3335/* Test of locations within a raw string that doesn't contain a newline. */
3336
3337static void
3338test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3339{
3340 /* .....................00.0000000111111111122.
3341 .....................12.3456789012345678901. */
3342 const char *content = ("R\"foo(0123456789)foo\"\n");
3343 lexer_test test (case_, content, NULL);
3344
3345 /* Verify that we get the expected token back. */
3346 const cpp_token *tok = test.get_token ();
3347 ASSERT_EQ (tok->type, CPP_STRING);
3348
3349 /* Verify that cpp_interpret_string works. */
3350 cpp_string dst_string;
3351 const enum cpp_ttype type = CPP_STRING;
3352 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3353 &dst_string, type);
3354 ASSERT_TRUE (result);
3355 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3356 free (const_cast <unsigned char *> (dst_string.text));
3357
3358 if (!should_have_column_data_p (line_table->highest_location))
3359 return;
3360
3361 /* 0-9, plus the nil terminator. */
3362 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3363 for (int i = 0; i < 11; i++)
3364 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3365 i, 1, 7 + i, 7 + i);
3366}
3367
3368/* Test of locations within a raw string that contains a newline. */
3369
3370static void
3371test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3372{
3373 /* .....................00.0000.
3374 .....................12.3456. */
3375 const char *content = ("R\"foo(\n"
3376 /* .....................00000.
3377 .....................12345. */
3378 "hello\n"
3379 "world\n"
3380 /* .....................00000.
3381 .....................12345. */
3382 ")foo\"\n");
3383 lexer_test test (case_, content, NULL);
3384
3385 /* Verify that we get the expected token back. */
3386 const cpp_token *tok = test.get_token ();
3387 ASSERT_EQ (tok->type, CPP_STRING);
3388
3389 /* Verify that cpp_interpret_string works. */
3390 cpp_string dst_string;
3391 const enum cpp_ttype type = CPP_STRING;
3392 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3393 &dst_string, type);
3394 ASSERT_TRUE (result);
3395 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3396 free (const_cast <unsigned char *> (dst_string.text));
3397
3398 if (!should_have_column_data_p (line_table->highest_location))
3399 return;
3400
3401 /* Currently we don't support locations within raw strings that
3402 contain newlines. */
3403 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3404 "range endpoints are on different lines");
3405}
3406
a3998c2f
DM
3407/* Test of parsing an unterminated raw string. */
3408
3409static void
3410test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3411{
3412 const char *content = "R\"ouch()ouCh\" /* etc */";
3413
3414 lexer_error_sink errors;
3415 lexer_test test (case_, content, &errors);
3416 test.m_implicitly_expect_EOF = false;
3417
3418 /* Attempt to parse the raw string. */
3419 const cpp_token *tok = test.get_token ();
3420 ASSERT_EQ (tok->type, CPP_EOF);
3421
3422 ASSERT_EQ (1, errors.m_errors.length ());
3423 /* We expect the message "unterminated raw string"
3424 in the "cpplib" translation domain.
3425 It's not clear that dgettext is available on all supported hosts,
3426 so this assertion is commented-out for now.
3427 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3428 errors.m_errors[0]);
3429 */
3430}
3431
88fa5555
DM
3432/* Test of lexing char constants. */
3433
3434static void
3435test_lexer_char_constants (const line_table_case &case_)
3436{
3437 /* Various char constants.
3438 .....................0000000001111111111.22222222223.
3439 .....................1234567890123456789.01234567890. */
3440 const char *content = (" 'a'\n"
3441 " u'a'\n"
3442 " U'a'\n"
3443 " L'a'\n"
3444 " 'abc'\n");
3445 lexer_test test (case_, content, NULL);
3446
3447 /* Verify that we get the expected tokens back. */
3448 /* 'a'. */
3449 const cpp_token *tok = test.get_token ();
3450 ASSERT_EQ (tok->type, CPP_CHAR);
3451 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3452
3453 unsigned int chars_seen;
3454 int unsignedp;
3455 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3456 &chars_seen, &unsignedp);
3457 ASSERT_EQ (cc, 'a');
3458 ASSERT_EQ (chars_seen, 1);
3459
3460 /* u'a'. */
3461 tok = test.get_token ();
3462 ASSERT_EQ (tok->type, CPP_CHAR16);
3463 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3464
3465 /* U'a'. */
3466 tok = test.get_token ();
3467 ASSERT_EQ (tok->type, CPP_CHAR32);
3468 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3469
3470 /* L'a'. */
3471 tok = test.get_token ();
3472 ASSERT_EQ (tok->type, CPP_WCHAR);
3473 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3474
3475 /* 'abc' (c-char-sequence). */
3476 tok = test.get_token ();
3477 ASSERT_EQ (tok->type, CPP_CHAR);
3478 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3479}
741d3be5
DM
3480/* A table of interesting location_t values, giving one axis of our test
3481 matrix. */
3482
3483static const location_t boundary_locations[] = {
3484 /* Zero means "don't override the default values for a new line_table". */
3485 0,
3486
3487 /* An arbitrary non-zero value that isn't close to one of
3488 the boundary values below. */
3489 0x10000,
3490
3491 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3492 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3493 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3494 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3495 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3496 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3497
3498 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3499 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3500 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3501 LINE_MAP_MAX_LOCATION_WITH_COLS,
3502 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3503 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3504};
3505
f87e22c5 3506/* Run TESTCASE multiple times, once for each case in our test matrix. */
d9b950dd
DM
3507
3508void
f87e22c5 3509for_each_line_table_case (void (*testcase) (const line_table_case &))
d9b950dd 3510{
741d3be5
DM
3511 /* As noted above in the description of struct line_table_case,
3512 we want to explore a test matrix of interesting line_table
3513 situations, running various selftests for each case within the
3514 matrix. */
3515
3516 /* Run all tests with:
3517 (a) line_table->default_range_bits == 0, and
3518 (b) line_table->default_range_bits == 5. */
3519 int num_cases_tested = 0;
3520 for (int default_range_bits = 0; default_range_bits <= 5;
3521 default_range_bits += 5)
3522 {
3523 /* ...and use each of the "interesting" location values as
3524 the starting location within line_table. */
3525 const int num_boundary_locations
3526 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3527 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3528 {
3529 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3530
f87e22c5 3531 testcase (c);
741d3be5
DM
3532
3533 num_cases_tested++;
3534 }
3535 }
3536
3537 /* Verify that we fully covered the test matrix. */
3538 ASSERT_EQ (num_cases_tested, 2 * 12);
f87e22c5
DM
3539}
3540
3541/* Run all of the selftests within this file. */
3542
3543void
3544input_c_tests ()
3545{
082284da 3546 test_linenum_comparisons ();
f87e22c5
DM
3547 test_should_have_column_data_p ();
3548 test_unknown_location ();
3549 test_builtins ();
9144eabb 3550 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
f87e22c5
DM
3551
3552 for_each_line_table_case (test_accessing_ordinary_linemaps);
3553 for_each_line_table_case (test_lexer);
3554 for_each_line_table_case (test_lexer_string_locations_simple);
3555 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3556 for_each_line_table_case (test_lexer_string_locations_hex);
3557 for_each_line_table_case (test_lexer_string_locations_oct);
3558 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3559 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3560 for_each_line_table_case (test_lexer_string_locations_ucn4);
3561 for_each_line_table_case (test_lexer_string_locations_ucn8);
3562 for_each_line_table_case (test_lexer_string_locations_wide_string);
3563 for_each_line_table_case (test_lexer_string_locations_string16);
3564 for_each_line_table_case (test_lexer_string_locations_string32);
3565 for_each_line_table_case (test_lexer_string_locations_u8);
3566 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3567 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3568 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3569 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3570 for_each_line_table_case (test_lexer_string_locations_macro);
3571 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3572 for_each_line_table_case (test_lexer_string_locations_non_string);
3573 for_each_line_table_case (test_lexer_string_locations_long_line);
b8f56412
DM
3574 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3575 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
a3998c2f 3576 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
f87e22c5 3577 for_each_line_table_case (test_lexer_char_constants);
741d3be5 3578
d9b950dd
DM
3579 test_reading_source_line ();
3580}
3581
3582} // namespace selftest
3583
3584#endif /* CHECKING_P */