]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/input.c
Fix numerous typos in comments
[thirdparty/gcc.git] / gcc / input.c
CommitLineData
37ba4887 1/* Data and functions related to line maps and input files.
aad93da1 2 Copyright (C) 2004-2017 Free Software Foundation, Inc.
37ba4887 3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "intl.h"
28f17529 24#include "diagnostic-core.h"
99b4f3a2 25#include "selftest.h"
b73690a4 26#include "cpplib.h"
ffc2c526 27
e2f73ee8 28#ifndef HAVE_ICONV
29#define HAVE_ICONV 0
30#endif
31
ffc2c526 32/* This is a cache used by get_next_line to store the content of a
33 file to be searched for file lines. */
34struct fcache
35{
36 /* These are information used to store a line boundary. */
37 struct line_info
38 {
39 /* The line number. It starts from 1. */
40 size_t line_num;
41
42 /* The position (byte count) of the beginning of the line,
43 relative to the file data pointer. This starts at zero. */
44 size_t start_pos;
45
46 /* The position (byte count) of the last byte of the line. This
47 normally points to the '\n' character, or to one byte after the
48 last byte of the file, if the file doesn't contain a '\n'
49 character. */
50 size_t end_pos;
51
52 line_info (size_t l, size_t s, size_t e)
53 : line_num (l), start_pos (s), end_pos (e)
54 {}
55
56 line_info ()
57 :line_num (0), start_pos (0), end_pos (0)
58 {}
59 };
60
61 /* The number of time this file has been accessed. This is used
62 to designate which file cache to evict from the cache
63 array. */
64 unsigned use_count;
65
c6a7d9e9 66 /* The file_path is the key for identifying a particular file in
67 the cache.
68 For libcpp-using code, the underlying buffer for this field is
69 owned by the corresponding _cpp_file within the cpp_reader. */
ffc2c526 70 const char *file_path;
71
72 FILE *fp;
73
74 /* This points to the content of the file that we've read so
75 far. */
76 char *data;
77
78 /* The size of the DATA array above.*/
79 size_t size;
80
81 /* The number of bytes read from the underlying file so far. This
82 must be less (or equal) than SIZE above. */
83 size_t nb_read;
84
85 /* The index of the beginning of the current line. */
86 size_t line_start_idx;
87
88 /* The number of the previous line read. This starts at 1. Zero
89 means we've read no line so far. */
90 size_t line_num;
91
92 /* This is the total number of lines of the current file. At the
93 moment, we try to get this information from the line map
94 subsystem. Note that this is just a hint. When using the C++
95 front-end, this hint is correct because the input file is then
96 completely tokenized before parsing starts; so the line map knows
97 the number of lines before compilation really starts. For e.g,
98 the C front-end, it can happen that we start emitting diagnostics
99 before the line map has seen the end of the file. */
100 size_t total_lines;
101
fe066ce3 102 /* Could this file be missing a trailing newline on its final line?
103 Initially true (to cope with empty files), set to true/false
104 as each line is read. */
105 bool missing_trailing_newline;
106
ffc2c526 107 /* This is a record of the beginning and end of the lines we've seen
108 while reading the file. This is useful to avoid walking the data
109 from the beginning when we are asked to read a line that is
110 before LINE_START_IDX above. Note that the maximum size of this
111 record is fcache_line_record_size, so that the memory consumption
112 doesn't explode. We thus scale total_lines down to
113 fcache_line_record_size. */
114 vec<line_info, va_heap> line_record;
115
116 fcache ();
117 ~fcache ();
118};
37ba4887 119
120/* Current position in real source file. */
121
415309e2 122location_t input_location = UNKNOWN_LOCATION;
37ba4887 123
124struct line_maps *line_table;
125
7ec388ed 126/* A stashed copy of "line_table" for use by selftest::line_table_test.
127 This needs to be a global so that it can be a GC root, and thus
128 prevent the stashed copy from being garbage-collected if the GC runs
129 during a line_table_test. */
130
131struct line_maps *saved_line_table;
132
ffc2c526 133static fcache *fcache_tab;
134static const size_t fcache_tab_size = 16;
135static const size_t fcache_buffer_size = 4 * 1024;
136static const size_t fcache_line_record_size = 100;
137
5ebe2143 138/* Expand the source location LOC into a human readable location. If
139 LOC resolves to a builtin location, the file name of the readable
39107655 140 location is set to the string "<built-in>". If EXPANSION_POINT_P is
141 TRUE and LOC is virtual, then it is resolved to the expansion
142 point of the involved macro. Otherwise, it is resolved to the
bd172d61 143 spelling location of the token.
144
145 When resolving to the spelling location of the token, if the
146 resulting location is for a built-in location (that is, it has no
147 associated line/column) in the context of a macro expansion, the
148 returned location is the first one (while unwinding the macro
149 location towards its expansion point) that is in real source
150 code. */
39107655 151
152static expanded_location
153expand_location_1 (source_location loc,
154 bool expansion_point_p)
37ba4887 155{
156 expanded_location xloc;
551e34da 157 const line_map_ordinary *map;
bd172d61 158 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
5169661d 159 tree block = NULL;
160
161 if (IS_ADHOC_LOC (loc))
162 {
163 block = LOCATION_BLOCK (loc);
164 loc = LOCATION_LOCUS (loc);
165 }
bd172d61 166
167 memset (&xloc, 0, sizeof (xloc));
5ebe2143 168
bd172d61 169 if (loc >= RESERVED_LOCATION_COUNT)
170 {
171 if (!expansion_point_p)
172 {
173 /* We want to resolve LOC to its spelling location.
174
175 But if that spelling location is a reserved location that
176 appears in the context of a macro expansion (like for a
177 location for a built-in token), let's consider the first
178 location (toward the expansion point) that is not reserved;
179 that is, the first location that is in real source code. */
180 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
551e34da 181 loc, NULL);
bd172d61 182 lrk = LRK_SPELLING_LOCATION;
183 }
184 loc = linemap_resolve_location (line_table, loc,
185 lrk, &map);
186 xloc = linemap_expand_location (line_table, map, loc);
187 }
5ebe2143 188
5169661d 189 xloc.data = block;
37ba4887 190 if (loc <= BUILTINS_LOCATION)
5ebe2143 191 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
192
37ba4887 193 return xloc;
194}
e77b8253 195
ffc2c526 196/* Initialize the set of cache used for files accessed by caret
197 diagnostic. */
198
199static void
200diagnostic_file_cache_init (void)
201{
202 if (fcache_tab == NULL)
203 fcache_tab = new fcache[fcache_tab_size];
204}
205
e7683169 206/* Free the resources used by the set of cache used for files accessed
ffc2c526 207 by caret diagnostic. */
208
209void
210diagnostic_file_cache_fini (void)
211{
212 if (fcache_tab)
213 {
214 delete [] (fcache_tab);
215 fcache_tab = NULL;
216 }
217}
218
219/* Return the total lines number that have been read so far by the
220 line map (in the preprocessor) so far. For languages like C++ that
221 entirely preprocess the input file before starting to parse, this
222 equals the actual number of lines of the file. */
223
224static size_t
225total_lines_num (const char *file_path)
226{
227 size_t r = 0;
228 source_location l = 0;
229 if (linemap_get_file_highest_location (line_table, file_path, &l))
230 {
231 gcc_assert (l >= RESERVED_LOCATION_COUNT);
232 expanded_location xloc = expand_location (l);
233 r = xloc.line;
234 }
235 return r;
236}
237
238/* Lookup the cache used for the content of a given file accessed by
239 caret diagnostic. Return the found cached file, or NULL if no
240 cached file was found. */
241
242static fcache*
243lookup_file_in_cache_tab (const char *file_path)
244{
245 if (file_path == NULL)
246 return NULL;
247
248 diagnostic_file_cache_init ();
249
250 /* This will contain the found cached file. */
251 fcache *r = NULL;
252 for (unsigned i = 0; i < fcache_tab_size; ++i)
253 {
254 fcache *c = &fcache_tab[i];
255 if (c->file_path && !strcmp (c->file_path, file_path))
256 {
257 ++c->use_count;
258 r = c;
259 }
260 }
261
262 if (r)
263 ++r->use_count;
264
265 return r;
266}
267
a476cb62 268/* Purge any mention of FILENAME from the cache of files used for
269 printing source code. For use in selftests when working
270 with tempfiles. */
271
272void
273diagnostics_file_cache_forcibly_evict_file (const char *file_path)
274{
275 gcc_assert (file_path);
276
277 fcache *r = lookup_file_in_cache_tab (file_path);
278 if (!r)
279 /* Not found. */
280 return;
281
282 r->file_path = NULL;
283 if (r->fp)
284 fclose (r->fp);
285 r->fp = NULL;
286 r->nb_read = 0;
287 r->line_start_idx = 0;
288 r->line_num = 0;
289 r->line_record.truncate (0);
290 r->use_count = 0;
291 r->total_lines = 0;
fe066ce3 292 r->missing_trailing_newline = true;
a476cb62 293}
294
ffc2c526 295/* Return the file cache that has been less used, recently, or the
296 first empty one. If HIGHEST_USE_COUNT is non-null,
297 *HIGHEST_USE_COUNT is set to the highest use count of the entries
298 in the cache table. */
299
300static fcache*
301evicted_cache_tab_entry (unsigned *highest_use_count)
302{
303 diagnostic_file_cache_init ();
304
305 fcache *to_evict = &fcache_tab[0];
306 unsigned huc = to_evict->use_count;
307 for (unsigned i = 1; i < fcache_tab_size; ++i)
308 {
309 fcache *c = &fcache_tab[i];
310 bool c_is_empty = (c->file_path == NULL);
311
312 if (c->use_count < to_evict->use_count
313 || (to_evict->file_path && c_is_empty))
314 /* We evict C because it's either an entry with a lower use
315 count or one that is empty. */
316 to_evict = c;
317
318 if (huc < c->use_count)
319 huc = c->use_count;
320
321 if (c_is_empty)
322 /* We've reached the end of the cache; subsequent elements are
323 all empty. */
324 break;
325 }
326
327 if (highest_use_count)
328 *highest_use_count = huc;
329
330 return to_evict;
331}
332
333/* Create the cache used for the content of a given file to be
334 accessed by caret diagnostic. This cache is added to an array of
335 cache and can be retrieved by lookup_file_in_cache_tab. This
336 function returns the created cache. Note that only the last
337 fcache_tab_size files are cached. */
338
339static fcache*
340add_file_to_cache_tab (const char *file_path)
341{
342
343 FILE *fp = fopen (file_path, "r");
c1cc4419 344 if (fp == NULL)
345 return NULL;
ffc2c526 346
347 unsigned highest_use_count = 0;
348 fcache *r = evicted_cache_tab_entry (&highest_use_count);
349 r->file_path = file_path;
350 if (r->fp)
351 fclose (r->fp);
352 r->fp = fp;
353 r->nb_read = 0;
354 r->line_start_idx = 0;
355 r->line_num = 0;
356 r->line_record.truncate (0);
357 /* Ensure that this cache entry doesn't get evicted next time
358 add_file_to_cache_tab is called. */
359 r->use_count = ++highest_use_count;
360 r->total_lines = total_lines_num (file_path);
fe066ce3 361 r->missing_trailing_newline = true;
ffc2c526 362
363 return r;
364}
365
366/* Lookup the cache used for the content of a given file accessed by
367 caret diagnostic. If no cached file was found, create a new cache
368 for this file, add it to the array of cached file and return
369 it. */
370
371static fcache*
372lookup_or_add_file_to_cache_tab (const char *file_path)
373{
374 fcache *r = lookup_file_in_cache_tab (file_path);
375 if (r == NULL)
376 r = add_file_to_cache_tab (file_path);
377 return r;
378}
379
380/* Default constructor for a cache of file used by caret
381 diagnostic. */
382
383fcache::fcache ()
384: use_count (0), file_path (NULL), fp (NULL), data (0),
385 size (0), nb_read (0), line_start_idx (0), line_num (0),
fe066ce3 386 total_lines (0), missing_trailing_newline (true)
ffc2c526 387{
388 line_record.create (0);
389}
390
391/* Destructor for a cache of file used by caret diagnostic. */
392
393fcache::~fcache ()
394{
395 if (fp)
396 {
397 fclose (fp);
398 fp = NULL;
399 }
400 if (data)
401 {
402 XDELETEVEC (data);
403 data = 0;
404 }
405 line_record.release ();
406}
407
408/* Returns TRUE iff the cache would need to be filled with data coming
409 from the file. That is, either the cache is empty or full or the
410 current line is empty. Note that if the cache is full, it would
411 need to be extended and filled again. */
412
413static bool
414needs_read (fcache *c)
415{
416 return (c->nb_read == 0
417 || c->nb_read == c->size
418 || (c->line_start_idx >= c->nb_read - 1));
419}
420
421/* Return TRUE iff the cache is full and thus needs to be
422 extended. */
423
424static bool
425needs_grow (fcache *c)
426{
427 return c->nb_read == c->size;
428}
429
430/* Grow the cache if it needs to be extended. */
431
432static void
433maybe_grow (fcache *c)
5a983084 434{
ffc2c526 435 if (!needs_grow (c))
436 return;
437
438 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
2e24ac9b 439 c->data = XRESIZEVEC (char, c->data, size);
ffc2c526 440 c->size = size;
441}
5a983084 442
ffc2c526 443/* Read more data into the cache. Extends the cache if need be.
444 Returns TRUE iff new data could be read. */
445
446static bool
447read_data (fcache *c)
448{
449 if (feof (c->fp) || ferror (c->fp))
450 return false;
451
452 maybe_grow (c);
453
454 char * from = c->data + c->nb_read;
455 size_t to_read = c->size - c->nb_read;
456 size_t nb_read = fread (from, 1, to_read, c->fp);
457
458 if (ferror (c->fp))
459 return false;
460
461 c->nb_read += nb_read;
462 return !!nb_read;
463}
464
465/* Read new data iff the cache needs to be filled with more data
466 coming from the file FP. Return TRUE iff the cache was filled with
467 mode data. */
468
469static bool
470maybe_read_data (fcache *c)
471{
472 if (!needs_read (c))
473 return false;
474 return read_data (c);
475}
476
477/* Read a new line from file FP, using C as a cache for the data
478 coming from the file. Upon successful completion, *LINE is set to
2e24ac9b 479 the beginning of the line found. *LINE points directly in the
480 line cache and is only valid until the next call of get_next_line.
ffc2c526 481 *LINE_LEN is set to the length of the line. Note that the line
482 does not contain any terminal delimiter. This function returns
483 true if some data was read or process from the cache, false
2e24ac9b 484 otherwise. Note that subsequent calls to get_next_line might
485 make the content of *LINE invalid. */
ffc2c526 486
487static bool
488get_next_line (fcache *c, char **line, ssize_t *line_len)
489{
490 /* Fill the cache with data to process. */
491 maybe_read_data (c);
492
493 size_t remaining_size = c->nb_read - c->line_start_idx;
494 if (remaining_size == 0)
495 /* There is no more data to process. */
496 return false;
497
498 char *line_start = c->data + c->line_start_idx;
499
500 char *next_line_start = NULL;
501 size_t len = 0;
502 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
503 if (line_end == NULL)
5a983084 504 {
ffc2c526 505 /* We haven't found the end-of-line delimiter in the cache.
506 Fill the cache with more data from the file and look for the
507 '\n'. */
508 while (maybe_read_data (c))
509 {
510 line_start = c->data + c->line_start_idx;
511 remaining_size = c->nb_read - c->line_start_idx;
512 line_end = (char *) memchr (line_start, '\n', remaining_size);
513 if (line_end != NULL)
514 {
515 next_line_start = line_end + 1;
516 break;
517 }
518 }
519 if (line_end == NULL)
fe066ce3 520 {
521 /* We've loadded all the file into the cache and still no
522 '\n'. Let's say the line ends up at one byte passed the
523 end of the file. This is to stay consistent with the case
524 of when the line ends up with a '\n' and line_end points to
525 that terminal '\n'. That consistency is useful below in
526 the len calculation. */
527 line_end = c->data + c->nb_read ;
528 c->missing_trailing_newline = true;
529 }
530 else
531 c->missing_trailing_newline = false;
5a983084 532 }
ffc2c526 533 else
fe066ce3 534 {
535 next_line_start = line_end + 1;
536 c->missing_trailing_newline = false;
537 }
ffc2c526 538
539 if (ferror (c->fp))
2e24ac9b 540 return false;
ffc2c526 541
542 /* At this point, we've found the end of the of line. It either
543 points to the '\n' or to one byte after the last byte of the
544 file. */
545 gcc_assert (line_end != NULL);
5a983084 546
ffc2c526 547 len = line_end - line_start;
548
549 if (c->line_start_idx < c->nb_read)
550 *line = line_start;
551
552 ++c->line_num;
553
554 /* Before we update our line record, make sure the hint about the
555 total number of lines of the file is correct. If it's not, then
556 we give up recording line boundaries from now on. */
557 bool update_line_record = true;
558 if (c->line_num > c->total_lines)
559 update_line_record = false;
560
561 /* Now update our line record so that re-reading lines from the
562 before c->line_start_idx is faster. */
563 if (update_line_record
564 && c->line_record.length () < fcache_line_record_size)
565 {
566 /* If the file lines fits in the line record, we just record all
567 its lines ...*/
568 if (c->total_lines <= fcache_line_record_size
569 && c->line_num > c->line_record.length ())
570 c->line_record.safe_push (fcache::line_info (c->line_num,
571 c->line_start_idx,
572 line_end - c->data));
573 else if (c->total_lines > fcache_line_record_size)
574 {
575 /* ... otherwise, we just scale total_lines down to
576 (fcache_line_record_size lines. */
577 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
578 if (c->line_record.length () == 0
579 || n >= c->line_record.length ())
580 c->line_record.safe_push (fcache::line_info (c->line_num,
581 c->line_start_idx,
582 line_end - c->data));
583 }
584 }
585
586 /* Update c->line_start_idx so that it points to the next line to be
587 read. */
588 if (next_line_start)
589 c->line_start_idx = next_line_start - c->data;
590 else
591 /* We didn't find any terminal '\n'. Let's consider that the end
592 of line is the end of the data in the cache. The next
593 invocation of get_next_line will either read more data from the
594 underlying file or return false early because we've reached the
595 end of the file. */
596 c->line_start_idx = c->nb_read;
597
598 *line_len = len;
599
600 return true;
601}
602
ffc2c526 603/* Consume the next bytes coming from the cache (or from its
604 underlying file if there are remaining unread bytes in the file)
605 until we reach the next end-of-line (or end-of-file). There is no
606 copying from the cache involved. Return TRUE upon successful
607 completion. */
608
609static bool
610goto_next_line (fcache *cache)
611{
612 char *l;
613 ssize_t len;
614
615 return get_next_line (cache, &l, &len);
616}
617
618/* Read an arbitrary line number LINE_NUM from the file cached in C.
2e24ac9b 619 If the line was read successfully, *LINE points to the beginning
620 of the line in the file cache and *LINE_LEN is the length of the
621 line. *LINE is not nul-terminated, but may contain zero bytes.
622 *LINE is only valid until the next call of read_line_num.
ffc2c526 623 This function returns bool if a line was read. */
624
625static bool
626read_line_num (fcache *c, size_t line_num,
2e24ac9b 627 char **line, ssize_t *line_len)
ffc2c526 628{
629 gcc_assert (line_num > 0);
630
631 if (line_num <= c->line_num)
fc3eff88 632 {
ffc2c526 633 /* We've been asked to read lines that are before c->line_num.
634 So lets use our line record (if it's not empty) to try to
635 avoid re-reading the file from the beginning again. */
13225ff5 636
ffc2c526 637 if (c->line_record.is_empty ())
5a983084 638 {
ffc2c526 639 c->line_start_idx = 0;
640 c->line_num = 0;
641 }
642 else
643 {
644 fcache::line_info *i = NULL;
645 if (c->total_lines <= fcache_line_record_size)
646 {
647 /* In languages where the input file is not totally
648 preprocessed up front, the c->total_lines hint
649 can be smaller than the number of lines of the
650 file. In that case, only the first
651 c->total_lines have been recorded.
652
653 Otherwise, the first c->total_lines we've read have
654 their start/end recorded here. */
655 i = (line_num <= c->total_lines)
656 ? &c->line_record[line_num - 1]
657 : &c->line_record[c->total_lines - 1];
658 gcc_assert (i->line_num <= line_num);
659 }
660 else
661 {
662 /* So the file had more lines than our line record
663 size. Thus the number of lines we've recorded has
664 been scaled down to fcache_line_reacord_size. Let's
665 pick the start/end of the recorded line that is
666 closest to line_num. */
667 size_t n = (line_num <= c->total_lines)
668 ? line_num * fcache_line_record_size / c->total_lines
669 : c ->line_record.length () - 1;
670 if (n < c->line_record.length ())
671 {
672 i = &c->line_record[n];
673 gcc_assert (i->line_num <= line_num);
674 }
675 }
676
677 if (i && i->line_num == line_num)
678 {
2e24ac9b 679 /* We have the start/end of the line. */
680 *line = c->data + i->start_pos;
681 *line_len = i->end_pos - i->start_pos;
ffc2c526 682 return true;
683 }
684
685 if (i)
686 {
687 c->line_start_idx = i->start_pos;
688 c->line_num = i->line_num - 1;
689 }
690 else
691 {
692 c->line_start_idx = 0;
693 c->line_num = 0;
694 }
5a983084 695 }
5a983084 696 }
ffc2c526 697
698 /* Let's walk from line c->line_num up to line_num - 1, without
699 copying any line. */
700 while (c->line_num < line_num - 1)
701 if (!goto_next_line (c))
702 return false;
703
704 /* The line we want is the next one. Let's read and copy it back to
705 the caller. */
2e24ac9b 706 return get_next_line (c, line, line_len);
5a983084 707}
708
2e24ac9b 709/* Return the physical source line that corresponds to FILE_PATH/LINE.
710 The line is not nul-terminated. The returned pointer is only
711 valid until the next call of location_get_source_line.
712 Note that the line can contain several null characters,
713 so LINE_LEN, if non-null, points to the actual length of the line.
714 If the function fails, NULL is returned. */
5a983084 715
716const char *
be812248 717location_get_source_line (const char *file_path, int line,
ffc2c526 718 int *line_len)
5a983084 719{
7b645785 720 char *buffer = NULL;
2e24ac9b 721 ssize_t len;
ffc2c526 722
be812248 723 if (line == 0)
9e8234d0 724 return NULL;
725
be812248 726 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
9e8234d0 727 if (c == NULL)
728 return NULL;
729
be812248 730 bool read = read_line_num (c, line, &buffer, &len);
5a983084 731
ffc2c526 732 if (read && line_len)
733 *line_len = len;
5a983084 734
ffc2c526 735 return read ? buffer : NULL;
5a983084 736}
737
fe066ce3 738/* Determine if FILE_PATH missing a trailing newline on its final line.
739 Only valid to call once all of the file has been loaded, by
740 requesting a line number beyond the end of the file. */
741
742bool
743location_missing_trailing_newline (const char *file_path)
744{
745 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
746 if (c == NULL)
747 return false;
748
749 return c->missing_trailing_newline;
750}
751
a4cfdfed 752/* Test if the location originates from the spelling location of a
753 builtin-tokens. That is, return TRUE if LOC is a (possibly
754 virtual) location of a built-in token that appears in the expansion
755 list of a macro. Please note that this function also works on
756 tokens that result from built-in tokens. For instance, the
757 function would return true if passed a token "4" that is the result
758 of the expansion of the built-in __LINE__ macro. */
759bool
760is_location_from_builtin_token (source_location loc)
761{
551e34da 762 const line_map_ordinary *map = NULL;
a4cfdfed 763 loc = linemap_resolve_location (line_table, loc,
764 LRK_SPELLING_LOCATION, &map);
765 return loc == BUILTINS_LOCATION;
766}
767
39107655 768/* Expand the source location LOC into a human readable location. If
769 LOC is virtual, it resolves to the expansion point of the involved
770 macro. If LOC resolves to a builtin location, the file name of the
771 readable location is set to the string "<built-in>". */
772
773expanded_location
774expand_location (source_location loc)
775{
776 return expand_location_1 (loc, /*expansion_point_p=*/true);
777}
778
779/* Expand the source location LOC into a human readable location. If
780 LOC is virtual, it resolves to the expansion location of the
781 relevant macro. If LOC resolves to a builtin location, the file
782 name of the readable location is set to the string
783 "<built-in>". */
784
785expanded_location
786expand_location_to_spelling_point (source_location loc)
787{
fb2edec0 788 return expand_location_1 (loc, /*expansion_point_p=*/false);
39107655 789}
790
f0479000 791/* The rich_location class within libcpp requires a way to expand
792 source_location instances, and relies on the client code
793 providing a symbol named
794 linemap_client_expand_location_to_spelling_point
795 to do this.
796
797 This is the implementation for libcommon.a (all host binaries),
798 which simply calls into expand_location_to_spelling_point. */
799
800expanded_location
801linemap_client_expand_location_to_spelling_point (source_location loc)
802{
803 return expand_location_to_spelling_point (loc);
804}
805
806
fb2edec0 807/* If LOCATION is in a system header and if it is a virtual location for
808 a token coming from the expansion of a macro, unwind it to the
809 location of the expansion point of the macro. Otherwise, just return
db30b351 810 LOCATION.
811
812 This is used for instance when we want to emit diagnostics about a
fb2edec0 813 token that may be located in a macro that is itself defined in a
814 system header, for example, for the NULL macro. In such a case, if
815 LOCATION were passed directly to diagnostic functions such as
816 warning_at, the diagnostic would be suppressed (unless
817 -Wsystem-headers). */
db30b351 818
819source_location
820expansion_point_location_if_in_system_header (source_location location)
821{
822 if (in_system_header_at (location))
823 location = linemap_resolve_location (line_table, location,
824 LRK_MACRO_EXPANSION_POINT,
825 NULL);
826 return location;
827}
39107655 828
5d4db8ef 829/* If LOCATION is a virtual location for a token coming from the expansion
830 of a macro, unwind to the location of the expansion point of the macro. */
831
832source_location
833expansion_point_location (source_location location)
834{
835 return linemap_resolve_location (line_table, location,
836 LRK_MACRO_EXPANSION_POINT, NULL);
837}
838
f17776ff 839/* Construct a location with caret at CARET, ranging from START to
840 finish e.g.
841
842 11111111112
843 12345678901234567890
844 522
845 523 return foo + bar;
846 ~~~~^~~~~
847 524
848
849 The location's caret is at the "+", line 523 column 15, but starts
850 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
851 of "bar" at column 19. */
852
853location_t
854make_location (location_t caret, location_t start, location_t finish)
855{
856 location_t pure_loc = get_pure_location (caret);
857 source_range src_range;
aca2a315 858 src_range.m_start = get_start (start);
859 src_range.m_finish = get_finish (finish);
f17776ff 860 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
861 pure_loc,
862 src_range,
863 NULL);
864 return combined_loc;
865}
866
e77b8253 867#define ONE_K 1024
868#define ONE_M (ONE_K * ONE_K)
869
870/* Display a number as an integer multiple of either:
871 - 1024, if said integer is >= to 10 K (in base 2)
872 - 1024 * 1024, if said integer is >= 10 M in (base 2)
873 */
874#define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
875 ? (x) \
876 : ((x) < 10 * ONE_M \
877 ? (x) / ONE_K \
878 : (x) / ONE_M)))
879
880/* For a given integer, display either:
881 - the character 'k', if the number is higher than 10 K (in base 2)
882 but strictly lower than 10 M (in base 2)
883 - the character 'M' if the number is higher than 10 M (in base2)
884 - the charcter ' ' if the number is strictly lower than 10 K */
885#define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
886
887/* Display an integer amount as multiple of 1K or 1M (in base 2).
2fbe7a32 888 Display the correct unit (either k, M, or ' ') after the amount, as
e77b8253 889 well. */
890#define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
891
892/* Dump statistics to stderr about the memory usage of the line_table
893 set of line maps. This also displays some statistics about macro
894 expansion. */
895
896void
897dump_line_table_statistics (void)
898{
899 struct linemap_stats s;
2a688977 900 long total_used_map_size,
e77b8253 901 macro_maps_size,
902 total_allocated_map_size;
903
904 memset (&s, 0, sizeof (s));
905
906 linemap_get_statistics (line_table, &s);
907
908 macro_maps_size = s.macro_maps_used_size
909 + s.macro_maps_locations_size;
910
911 total_allocated_map_size = s.ordinary_maps_allocated_size
912 + s.macro_maps_allocated_size
913 + s.macro_maps_locations_size;
914
915 total_used_map_size = s.ordinary_maps_used_size
916 + s.macro_maps_used_size
917 + s.macro_maps_locations_size;
918
2a688977 919 fprintf (stderr, "Number of expanded macros: %5ld\n",
e77b8253 920 s.num_expanded_macros);
921 if (s.num_expanded_macros != 0)
2a688977 922 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
e77b8253 923 s.num_macro_tokens / s.num_expanded_macros);
924 fprintf (stderr,
925 "\nLine Table allocations during the "
926 "compilation process\n");
2a688977 927 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
e77b8253 928 SCALE (s.num_ordinary_maps_used),
929 STAT_LABEL (s.num_ordinary_maps_used));
2a688977 930 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
e77b8253 931 SCALE (s.ordinary_maps_used_size),
932 STAT_LABEL (s.ordinary_maps_used_size));
2a688977 933 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
e77b8253 934 SCALE (s.num_ordinary_maps_allocated),
935 STAT_LABEL (s.num_ordinary_maps_allocated));
2a688977 936 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
e77b8253 937 SCALE (s.ordinary_maps_allocated_size),
938 STAT_LABEL (s.ordinary_maps_allocated_size));
2a688977 939 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
e77b8253 940 SCALE (s.num_macro_maps_used),
941 STAT_LABEL (s.num_macro_maps_used));
2a688977 942 fprintf (stderr, "Macro maps used size: %5ld%c\n",
e77b8253 943 SCALE (s.macro_maps_used_size),
944 STAT_LABEL (s.macro_maps_used_size));
2a688977 945 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
e77b8253 946 SCALE (s.macro_maps_locations_size),
947 STAT_LABEL (s.macro_maps_locations_size));
2a688977 948 fprintf (stderr, "Macro maps size: %5ld%c\n",
e77b8253 949 SCALE (macro_maps_size),
950 STAT_LABEL (macro_maps_size));
2a688977 951 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
e77b8253 952 SCALE (s.duplicated_macro_maps_locations_size),
953 STAT_LABEL (s.duplicated_macro_maps_locations_size));
2a688977 954 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
e77b8253 955 SCALE (total_allocated_map_size),
956 STAT_LABEL (total_allocated_map_size));
2a688977 957 fprintf (stderr, "Total used maps size: %5ld%c\n",
e77b8253 958 SCALE (total_used_map_size),
959 STAT_LABEL (total_used_map_size));
0ffb4474 960 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
961 SCALE (s.adhoc_table_size),
962 STAT_LABEL (s.adhoc_table_size));
963 fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
964 s.adhoc_table_entries_used);
a96cefb2 965 fprintf (stderr, "optimized_ranges: %i\n",
966 line_table->num_optimized_ranges);
967 fprintf (stderr, "unoptimized_ranges: %i\n",
968 line_table->num_unoptimized_ranges);
0ffb4474 969
e77b8253 970 fprintf (stderr, "\n");
971}
28f17529 972
973/* Get location one beyond the final location in ordinary map IDX. */
974
975static source_location
976get_end_location (struct line_maps *set, unsigned int idx)
977{
978 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
979 return set->highest_location;
980
981 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
982 return MAP_START_LOCATION (next_map);
983}
984
985/* Helper function for write_digit_row. */
986
987static void
988write_digit (FILE *stream, int digit)
989{
990 fputc ('0' + (digit % 10), stream);
991}
992
993/* Helper function for dump_location_info.
994 Write a row of numbers to STREAM, numbering a source line,
995 giving the units, tens, hundreds etc of the column number. */
996
997static void
998write_digit_row (FILE *stream, int indent,
a96cefb2 999 const line_map_ordinary *map,
28f17529 1000 source_location loc, int max_col, int divisor)
1001{
1002 fprintf (stream, "%*c", indent, ' ');
1003 fprintf (stream, "|");
1004 for (int column = 1; column < max_col; column++)
1005 {
a96cefb2 1006 source_location column_loc = loc + (column << map->m_range_bits);
28f17529 1007 write_digit (stream, column_loc / divisor);
1008 }
1009 fprintf (stream, "\n");
1010}
1011
1012/* Write a half-closed (START) / half-open (END) interval of
1013 source_location to STREAM. */
1014
1015static void
1016dump_location_range (FILE *stream,
1017 source_location start, source_location end)
1018{
1019 fprintf (stream,
1020 " source_location interval: %u <= loc < %u\n",
1021 start, end);
1022}
1023
1024/* Write a labelled description of a half-closed (START) / half-open (END)
1025 interval of source_location to STREAM. */
1026
1027static void
1028dump_labelled_location_range (FILE *stream,
1029 const char *name,
1030 source_location start, source_location end)
1031{
1032 fprintf (stream, "%s\n", name);
1033 dump_location_range (stream, start, end);
1034 fprintf (stream, "\n");
1035}
1036
1037/* Write a visualization of the locations in the line_table to STREAM. */
1038
1039void
1040dump_location_info (FILE *stream)
1041{
1042 /* Visualize the reserved locations. */
1043 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1044 0, RESERVED_LOCATION_COUNT);
1045
1046 /* Visualize the ordinary line_map instances, rendering the sources. */
1047 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1048 {
1049 source_location end_location = get_end_location (line_table, idx);
1050 /* half-closed: doesn't include this one. */
1051
551e34da 1052 const line_map_ordinary *map
1053 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
28f17529 1054 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1055 dump_location_range (stream,
1056 MAP_START_LOCATION (map), end_location);
1057 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1058 fprintf (stream, " starting at line: %i\n",
1059 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
a96cefb2 1060 fprintf (stream, " column and range bits: %i\n",
1061 map->m_column_and_range_bits);
28f17529 1062 fprintf (stream, " column bits: %i\n",
a96cefb2 1063 map->m_column_and_range_bits - map->m_range_bits);
1064 fprintf (stream, " range bits: %i\n",
1065 map->m_range_bits);
28f17529 1066
1067 /* Render the span of source lines that this "map" covers. */
1068 for (source_location loc = MAP_START_LOCATION (map);
1069 loc < end_location;
a96cefb2 1070 loc += (1 << map->m_range_bits) )
28f17529 1071 {
a96cefb2 1072 gcc_assert (pure_location_p (line_table, loc) );
1073
28f17529 1074 expanded_location exploc
1075 = linemap_expand_location (line_table, map, loc);
1076
1077 if (0 == exploc.column)
1078 {
1079 /* Beginning of a new source line: draw the line. */
1080
1081 int line_size;
be812248 1082 const char *line_text = location_get_source_line (exploc.file,
1083 exploc.line,
1084 &line_size);
28f17529 1085 if (!line_text)
1086 break;
1087 fprintf (stream,
1088 "%s:%3i|loc:%5i|%.*s\n",
1089 exploc.file, exploc.line,
1090 loc,
1091 line_size, line_text);
1092
1093 /* "loc" is at column 0, which means "the whole line".
1094 Render the locations *within* the line, by underlining
1095 it, showing the source_location numeric values
1096 at each column. */
a96cefb2 1097 int max_col = (1 << map->m_column_and_range_bits) - 1;
28f17529 1098 if (max_col > line_size)
1099 max_col = line_size + 1;
1100
1101 int indent = 14 + strlen (exploc.file);
1102
1103 /* Thousands. */
1104 if (end_location > 999)
a96cefb2 1105 write_digit_row (stream, indent, map, loc, max_col, 1000);
28f17529 1106
1107 /* Hundreds. */
1108 if (end_location > 99)
a96cefb2 1109 write_digit_row (stream, indent, map, loc, max_col, 100);
28f17529 1110
1111 /* Tens. */
a96cefb2 1112 write_digit_row (stream, indent, map, loc, max_col, 10);
28f17529 1113
1114 /* Units. */
a96cefb2 1115 write_digit_row (stream, indent, map, loc, max_col, 1);
28f17529 1116 }
1117 }
1118 fprintf (stream, "\n");
1119 }
1120
1121 /* Visualize unallocated values. */
1122 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1123 line_table->highest_location,
1124 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1125
1126 /* Visualize the macro line_map instances, rendering the sources. */
1127 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1128 {
1129 /* Each macro map that is allocated owns source_location values
1130 that are *lower* that the one before them.
1131 Hence it's meaningful to view them either in order of ascending
1132 source locations, or in order of ascending macro map index. */
1133 const bool ascending_source_locations = true;
1134 unsigned int idx = (ascending_source_locations
1135 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1136 : i);
551e34da 1137 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
28f17529 1138 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1139 idx,
1140 linemap_map_get_macro_name (map),
1141 MACRO_MAP_NUM_MACRO_TOKENS (map));
1142 dump_location_range (stream,
1143 map->start_location,
1144 (map->start_location
1145 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1146 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1147 "expansion point is location %i",
1148 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1149 fprintf (stream, " map->start_location: %u\n",
1150 map->start_location);
1151
1152 fprintf (stream, " macro_locations:\n");
1153 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1154 {
1155 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1156 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1157
1158 /* linemap_add_macro_token encodes token numbers in an expansion
1159 by putting them after MAP_START_LOCATION. */
1160
1161 /* I'm typically seeing 4 uninitialized entries at the end of
1162 0xafafafaf.
1163 This appears to be due to macro.c:replace_args
1164 adding 2 extra args for padding tokens; presumably there may
1165 be a leading and/or trailing padding token injected,
1166 each for 2 more location slots.
1167 This would explain there being up to 4 source_locations slots
1168 that may be uninitialized. */
1169
1170 fprintf (stream, " %u: %u, %u\n",
1171 i,
1172 x,
1173 y);
1174 if (x == y)
1175 {
1176 if (x < MAP_START_LOCATION (map))
1177 inform (x, "token %u has x-location == y-location == %u", i, x);
1178 else
1179 fprintf (stream,
1180 "x-location == y-location == %u encodes token # %u\n",
1181 x, x - MAP_START_LOCATION (map));
1182 }
1183 else
1184 {
1185 inform (x, "token %u has x-location == %u", i, x);
1186 inform (x, "token %u has y-location == %u", i, y);
1187 }
1188 }
1189 fprintf (stream, "\n");
1190 }
1191
1192 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1193 macro map, presumably due to an off-by-one error somewhere
1194 between the logic in linemap_enter_macro and
1195 LINEMAPS_MACRO_LOWEST_LOCATION. */
1196 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1197 MAX_SOURCE_LOCATION,
1198 MAX_SOURCE_LOCATION + 1);
1199
1200 /* Visualize ad-hoc values. */
1201 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1202 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1203}
99b4f3a2 1204
d4166bdc 1205/* string_concat's constructor. */
1206
1207string_concat::string_concat (int num, location_t *locs)
1208 : m_num (num)
1209{
1210 m_locs = ggc_vec_alloc <location_t> (num);
1211 for (int i = 0; i < num; i++)
1212 m_locs[i] = locs[i];
1213}
1214
1215/* string_concat_db's constructor. */
1216
1217string_concat_db::string_concat_db ()
1218{
1219 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1220}
1221
1222/* Record that a string concatenation occurred, covering NUM
1223 string literal tokens. LOCS is an array of size NUM, containing the
1224 locations of the tokens. A copy of LOCS is taken. */
1225
1226void
1227string_concat_db::record_string_concatenation (int num, location_t *locs)
1228{
1229 gcc_assert (num > 1);
1230 gcc_assert (locs);
1231
1232 location_t key_loc = get_key_loc (locs[0]);
1233
1234 string_concat *concat
1235 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1236 m_table->put (key_loc, concat);
1237}
1238
1239/* Determine if LOC was the location of the the initial token of a
1240 concatenation of string literal tokens.
1241 If so, *OUT_NUM is written to with the number of tokens, and
1242 *OUT_LOCS with the location of an array of locations of the
1243 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1244 storage owned by the string_concat_db.
1245 Otherwise, return false. */
1246
1247bool
1248string_concat_db::get_string_concatenation (location_t loc,
1249 int *out_num,
1250 location_t **out_locs)
1251{
1252 gcc_assert (out_num);
1253 gcc_assert (out_locs);
1254
1255 location_t key_loc = get_key_loc (loc);
1256
1257 string_concat **concat = m_table->get (key_loc);
1258 if (!concat)
1259 return false;
1260
1261 *out_num = (*concat)->m_num;
1262 *out_locs =(*concat)->m_locs;
1263 return true;
1264}
1265
1266/* Internal function. Canonicalize LOC into a form suitable for
1267 use as a key within the database, stripping away macro expansion,
1268 ad-hoc information, and range information, using the location of
1269 the start of LOC within an ordinary linemap. */
1270
1271location_t
1272string_concat_db::get_key_loc (location_t loc)
1273{
1274 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1275 NULL);
1276
1277 loc = get_range_from_loc (line_table, loc).m_start;
1278
1279 return loc;
1280}
1281
1282/* Helper class for use within get_substring_ranges_for_loc.
1283 An vec of cpp_string with responsibility for releasing all of the
1284 str->text for each str in the vector. */
1285
1286class auto_cpp_string_vec : public auto_vec <cpp_string>
1287{
1288 public:
1289 auto_cpp_string_vec (int alloc)
1290 : auto_vec <cpp_string> (alloc) {}
1291
1292 ~auto_cpp_string_vec ()
1293 {
1294 /* Clean up the copies within this vec. */
1295 int i;
1296 cpp_string *str;
1297 FOR_EACH_VEC_ELT (*this, i, str)
1298 free (const_cast <unsigned char *> (str->text));
1299 }
1300};
1301
1302/* Attempt to populate RANGES with source location information on the
1303 individual characters within the string literal found at STRLOC.
1304 If CONCATS is non-NULL, then any string literals that the token at
1305 STRLOC was concatenated with are also added to RANGES.
1306
1307 Return NULL if successful, or an error message if any errors occurred (in
1308 which case RANGES may be only partially populated and should not
1309 be used).
1310
1311 This is implemented by re-parsing the relevant source line(s). */
1312
1313static const char *
1314get_substring_ranges_for_loc (cpp_reader *pfile,
1315 string_concat_db *concats,
1316 location_t strloc,
1317 enum cpp_ttype type,
1318 cpp_substring_ranges &ranges)
1319{
1320 gcc_assert (pfile);
1321
1322 if (strloc == UNKNOWN_LOCATION)
1323 return "unknown location";
1324
8df44fbf 1325 /* Reparsing the strings requires accurate location information.
1326 If -ftrack-macro-expansion has been overridden from its default
1327 of 2, then we might have a location of a macro expansion point,
1328 rather than the location of the literal itself.
1329 Avoid this by requiring that we have full macro expansion tracking
1330 for substring locations to be available. */
1331 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1332 return "track_macro_expansion != 2";
1333
a4d96eb7 1334 /* If #line or # 44 "file"-style directives are present, then there's
1335 no guarantee that the line numbers we have can be used to locate
1336 the strings. For example, we might have a .i file with # directives
1337 pointing back to lines within a .c file, but the .c file might
1338 have been edited since the .i file was created.
1339 In such a case, the safest course is to disable on-demand substring
1340 locations. */
1341 if (line_table->seen_line_directive)
1342 return "seen line directive";
1343
d4166bdc 1344 /* If string concatenation has occurred at STRLOC, get the locations
1345 of all of the literal tokens making up the compound string.
1346 Otherwise, just use STRLOC. */
1347 int num_locs = 1;
1348 location_t *strlocs = &strloc;
1349 if (concats)
1350 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1351
1352 auto_cpp_string_vec strs (num_locs);
1353 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1354 for (int i = 0; i < num_locs; i++)
1355 {
1356 /* Get range of strloc. We will use it to locate the start and finish
1357 of the literal token within the line. */
1358 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1359
1360 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1361 /* If the string is within a macro expansion, we can't get at the
1362 end location. */
1363 return "macro expansion";
1364
1365 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1366 /* If so, we can't reliably determine where the token started within
1367 its line. */
1368 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1369
1370 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1371 /* If so, we can't reliably determine where the token finished within
1372 its line. */
1373 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1374
1375 expanded_location start
1376 = expand_location_to_spelling_point (src_range.m_start);
1377 expanded_location finish
1378 = expand_location_to_spelling_point (src_range.m_finish);
1379 if (start.file != finish.file)
1380 return "range endpoints are in different files";
1381 if (start.line != finish.line)
1382 return "range endpoints are on different lines";
1383 if (start.column > finish.column)
1384 return "range endpoints are reversed";
1385
1386 int line_width;
1387 const char *line = location_get_source_line (start.file, start.line,
1388 &line_width);
1389 if (line == NULL)
1390 return "unable to read source line";
1391
1392 /* Determine the location of the literal (including quotes
1393 and leading prefix chars, such as the 'u' in a u""
1394 token). */
1395 const char *literal = line + start.column - 1;
1396 int literal_length = finish.column - start.column + 1;
1397
44128dbe 1398 /* Ensure that we don't crash if we got the wrong location. */
1399 if (line_width < (start.column - 1 + literal_length))
1400 return "line is not wide enough";
1401
d4166bdc 1402 cpp_string from;
1403 from.len = literal_length;
1404 /* Make a copy of the literal, to avoid having to rely on
1405 the lifetime of the copy of the line within the cache.
1406 This will be released by the auto_cpp_string_vec dtor. */
1407 from.text = XDUPVEC (unsigned char, literal, literal_length);
1408 strs.safe_push (from);
1409
1410 /* For very long lines, a new linemap could have started
1411 halfway through the token.
1412 Ensure that the loc_reader uses the linemap of the
1413 *end* of the token for its start location. */
1414 const line_map_ordinary *final_ord_map;
1415 linemap_resolve_location (line_table, src_range.m_finish,
1416 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1417 location_t start_loc
1418 = linemap_position_for_line_and_column (line_table, final_ord_map,
1419 start.line, start.column);
1420
1421 cpp_string_location_reader loc_reader (start_loc, line_table);
1422 loc_readers.safe_push (loc_reader);
1423 }
1424
1425 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1426 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1427 loc_readers.address (),
1428 num_locs, &ranges, type);
1429 if (err)
1430 return err;
1431
1432 /* Success: "ranges" should now contain information on the string. */
1433 return NULL;
1434}
1435
5927e78e 1436/* Attempt to populate *OUT_LOC with source location information on the
1437 given characters within the string literal found at STRLOC.
1438 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1439 character set.
1440
1441 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1442 and string literal "012345\n789"
1443 *OUT_LOC is written to with:
1444 "012345\n789"
1445 ~^~~~~
1446
d4166bdc 1447 If CONCATS is non-NULL, then any string literals that the token at
1448 STRLOC was concatenated with are also considered.
1449
1450 This is implemented by re-parsing the relevant source line(s).
1451
1452 Return NULL if successful, or an error message if any errors occurred.
1453 Error messages are intended for GCC developers (to help debugging) rather
1454 than for end-users. */
1455
1456const char *
5927e78e 1457get_source_location_for_substring (cpp_reader *pfile,
1458 string_concat_db *concats,
1459 location_t strloc,
1460 enum cpp_ttype type,
1461 int caret_idx, int start_idx, int end_idx,
1462 source_location *out_loc)
1463{
1464 gcc_checking_assert (caret_idx >= 0);
d4166bdc 1465 gcc_checking_assert (start_idx >= 0);
1466 gcc_checking_assert (end_idx >= 0);
5927e78e 1467 gcc_assert (out_loc);
d4166bdc 1468
1469 cpp_substring_ranges ranges;
1470 const char *err
1471 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1472 if (err)
1473 return err;
1474
5927e78e 1475 if (caret_idx >= ranges.get_num_ranges ())
1476 return "caret_idx out of range";
d4166bdc 1477 if (start_idx >= ranges.get_num_ranges ())
1478 return "start_idx out of range";
1479 if (end_idx >= ranges.get_num_ranges ())
1480 return "end_idx out of range";
1481
5927e78e 1482 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1483 ranges.get_range (start_idx).m_start,
1484 ranges.get_range (end_idx).m_finish);
1485 return NULL;
1486}
1487
45183e4c 1488#if CHECKING_P
1489
1490namespace selftest {
1491
1492/* Selftests of location handling. */
1493
5927e78e 1494/* Attempt to populate *OUT_RANGE with source location information on the
1495 given character within the string literal found at STRLOC.
1496 CHAR_IDX refers to an offset within the execution character set.
1497 If CONCATS is non-NULL, then any string literals that the token at
1498 STRLOC was concatenated with are also considered.
1499
1500 This is implemented by re-parsing the relevant source line(s).
1501
1502 Return NULL if successful, or an error message if any errors occurred.
1503 Error messages are intended for GCC developers (to help debugging) rather
1504 than for end-users. */
1505
1506static const char *
1507get_source_range_for_char (cpp_reader *pfile,
1508 string_concat_db *concats,
1509 location_t strloc,
1510 enum cpp_ttype type,
1511 int char_idx,
1512 source_range *out_range)
1513{
1514 gcc_checking_assert (char_idx >= 0);
1515 gcc_assert (out_range);
1516
1517 cpp_substring_ranges ranges;
1518 const char *err
1519 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1520 if (err)
1521 return err;
1522
1523 if (char_idx >= ranges.get_num_ranges ())
1524 return "char_idx out of range";
1525
1526 *out_range = ranges.get_range (char_idx);
d4166bdc 1527 return NULL;
1528}
1529
5927e78e 1530/* As get_source_range_for_char, but write to *OUT the number
d4166bdc 1531 of ranges that are available. */
1532
45183e4c 1533static const char *
d4166bdc 1534get_num_source_ranges_for_substring (cpp_reader *pfile,
1535 string_concat_db *concats,
1536 location_t strloc,
1537 enum cpp_ttype type,
1538 int *out)
1539{
1540 gcc_assert (out);
1541
1542 cpp_substring_ranges ranges;
1543 const char *err
1544 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1545
1546 if (err)
1547 return err;
1548
1549 *out = ranges.get_num_ranges ();
1550 return NULL;
1551}
1552
99b4f3a2 1553/* Selftests of location handling. */
1554
b73690a4 1555/* Helper function for verifying location data: when location_t
1556 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1557 as having column 0. */
1558
1559static bool
1560should_have_column_data_p (location_t loc)
1561{
1562 if (IS_ADHOC_LOC (loc))
1563 loc = get_location_from_adhoc_loc (line_table, loc);
1564 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1565 return false;
1566 return true;
1567}
1568
1569/* Selftest for should_have_column_data_p. */
1570
1571static void
1572test_should_have_column_data_p ()
1573{
1574 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1575 ASSERT_TRUE
1576 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1577 ASSERT_FALSE
1578 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1579}
1580
99b4f3a2 1581/* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1582 on LOC. */
1583
1584static void
1585assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1586 location_t loc)
1587{
1588 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1589 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
b73690a4 1590 /* If location_t values are sufficiently high, then column numbers
1591 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1592 When close to the threshold, column numbers *may* be present: if
1593 the final linemap before the threshold contains a line that straddles
1594 the threshold, locations in that line have column information. */
1595 if (should_have_column_data_p (loc))
1596 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1597}
1598
7ec388ed 1599/* Various selftests involve constructing a line table and one or more
1600 line maps within it.
b73690a4 1601
1602 For maximum test coverage we want to run these tests with a variety
1603 of situations:
1604 - line_table->default_range_bits: some frontends use a non-zero value
1605 and others use zero
1606 - the fallback modes within line-map.c: there are various threshold
1607 values for source_location/location_t beyond line-map.c changes
1608 behavior (disabling of the range-packing optimization, disabling
1609 of column-tracking). We can exercise these by starting the line_table
1610 at interesting values at or near these thresholds.
1611
1612 The following struct describes a particular case within our test
1613 matrix. */
1614
1615struct line_table_case
1616{
1617 line_table_case (int default_range_bits, int base_location)
1618 : m_default_range_bits (default_range_bits),
1619 m_base_location (base_location)
1620 {}
1621
1622 int m_default_range_bits;
1623 int m_base_location;
1624};
1625
7ec388ed 1626/* Constructor. Store the old value of line_table, and create a new
1627 one, using sane defaults. */
b73690a4 1628
7ec388ed 1629line_table_test::line_table_test ()
b73690a4 1630{
7ec388ed 1631 gcc_assert (saved_line_table == NULL);
1632 saved_line_table = line_table;
1633 line_table = ggc_alloc<line_maps> ();
1634 linemap_init (line_table, BUILTINS_LOCATION);
1635 gcc_assert (saved_line_table->reallocator);
1636 line_table->reallocator = saved_line_table->reallocator;
1637 gcc_assert (saved_line_table->round_alloc_size);
1638 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1639 line_table->default_range_bits = 0;
1640}
b73690a4 1641
1642/* Constructor. Store the old value of line_table, and create a new
1643 one, using the sitation described in CASE_. */
1644
7ec388ed 1645line_table_test::line_table_test (const line_table_case &case_)
b73690a4 1646{
7ec388ed 1647 gcc_assert (saved_line_table == NULL);
1648 saved_line_table = line_table;
b73690a4 1649 line_table = ggc_alloc<line_maps> ();
1650 linemap_init (line_table, BUILTINS_LOCATION);
7ec388ed 1651 gcc_assert (saved_line_table->reallocator);
1652 line_table->reallocator = saved_line_table->reallocator;
1653 gcc_assert (saved_line_table->round_alloc_size);
1654 line_table->round_alloc_size = saved_line_table->round_alloc_size;
b73690a4 1655 line_table->default_range_bits = case_.m_default_range_bits;
1656 if (case_.m_base_location)
1657 {
1658 line_table->highest_location = case_.m_base_location;
1659 line_table->highest_line = case_.m_base_location;
1660 }
1661}
1662
1663/* Destructor. Restore the old value of line_table. */
1664
7ec388ed 1665line_table_test::~line_table_test ()
b73690a4 1666{
7ec388ed 1667 gcc_assert (saved_line_table != NULL);
1668 line_table = saved_line_table;
1669 saved_line_table = NULL;
99b4f3a2 1670}
1671
1672/* Verify basic operation of ordinary linemaps. */
1673
1674static void
b73690a4 1675test_accessing_ordinary_linemaps (const line_table_case &case_)
99b4f3a2 1676{
7ec388ed 1677 line_table_test ltt (case_);
b73690a4 1678
99b4f3a2 1679 /* Build a simple linemap describing some locations. */
1680 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1681
1682 linemap_line_start (line_table, 1, 100);
1683 location_t loc_a = linemap_position_for_column (line_table, 1);
1684 location_t loc_b = linemap_position_for_column (line_table, 23);
1685
1686 linemap_line_start (line_table, 2, 100);
1687 location_t loc_c = linemap_position_for_column (line_table, 1);
1688 location_t loc_d = linemap_position_for_column (line_table, 17);
1689
1690 /* Example of a very long line. */
1691 linemap_line_start (line_table, 3, 2000);
1692 location_t loc_e = linemap_position_for_column (line_table, 700);
1693
732cf036 1694 /* Transitioning back to a short line. */
1695 linemap_line_start (line_table, 4, 0);
1696 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1697
1698 if (should_have_column_data_p (loc_back_to_short))
1699 {
1700 /* Verify that we switched to short lines in the linemap. */
1701 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1702 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1703 }
1704
9348467c 1705 /* Example of a line that will eventually be seen to be longer
1706 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1707 below that. */
1708 linemap_line_start (line_table, 5, 2000);
1709
1710 location_t loc_start_of_very_long_line
1711 = linemap_position_for_column (line_table, 2000);
1712 location_t loc_too_wide
1713 = linemap_position_for_column (line_table, 4097);
1714 location_t loc_too_wide_2
1715 = linemap_position_for_column (line_table, 4098);
1716
1717 /* ...and back to a sane line length. */
1718 linemap_line_start (line_table, 6, 100);
1719 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1720
99b4f3a2 1721 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1722
1723 /* Multiple files. */
1724 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1725 linemap_line_start (line_table, 1, 200);
1726 location_t loc_f = linemap_position_for_column (line_table, 150);
1727 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1728
1729 /* Verify that we can recover the location info. */
1730 assert_loceq ("foo.c", 1, 1, loc_a);
1731 assert_loceq ("foo.c", 1, 23, loc_b);
1732 assert_loceq ("foo.c", 2, 1, loc_c);
1733 assert_loceq ("foo.c", 2, 17, loc_d);
1734 assert_loceq ("foo.c", 3, 700, loc_e);
732cf036 1735 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
9348467c 1736
1737 /* In the very wide line, the initial location should be fully tracked. */
1738 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1739 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1740 be disabled. */
1741 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1742 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1743 /*...and column-tracking should be re-enabled for subsequent lines. */
1744 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1745
99b4f3a2 1746 assert_loceq ("bar.c", 1, 150, loc_f);
1747
1748 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
f17776ff 1749 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1750
1751 /* Verify using make_location to build a range, and extracting data
1752 back from it. */
1753 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1754 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1755 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1756 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1757 ASSERT_EQ (loc_b, src_range.m_start);
1758 ASSERT_EQ (loc_d, src_range.m_finish);
99b4f3a2 1759}
1760
1761/* Verify various properties of UNKNOWN_LOCATION. */
1762
1763static void
1764test_unknown_location ()
1765{
1766 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1767 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1768 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1769}
1770
1771/* Verify various properties of BUILTINS_LOCATION. */
1772
1773static void
1774test_builtins ()
1775{
82e14468 1776 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
99b4f3a2 1777 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1778}
1779
aca2a315 1780/* Regression test for make_location.
1330da90 1781 Ensure that we use pure locations for the start/finish of the range,
1782 rather than storing a packed or ad-hoc range as the start/finish. */
aca2a315 1783
1784static void
1785test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1786{
1787 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1788 with C++ frontend.
1789 ....................0000000001111111111222.
1790 ....................1234567890123456789012. */
1791 const char *content = " r += !aaa == bbb;\n";
1792 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1793 line_table_test ltt (case_);
1794 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1795
1796 const location_t c11 = linemap_position_for_column (line_table, 11);
1797 const location_t c12 = linemap_position_for_column (line_table, 12);
1798 const location_t c13 = linemap_position_for_column (line_table, 13);
1799 const location_t c14 = linemap_position_for_column (line_table, 14);
1800 const location_t c21 = linemap_position_for_column (line_table, 21);
1801
1802 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1803 return;
1804
1805 /* Use column 13 for the caret location, arbitrarily, to verify that we
1806 handle start != caret. */
1807 const location_t aaa = make_location (c13, c12, c14);
1808 ASSERT_EQ (c13, get_pure_location (aaa));
1809 ASSERT_EQ (c12, get_start (aaa));
1810 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1811 ASSERT_EQ (c14, get_finish (aaa));
1812 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1813
1814 /* Make a location using a location with a range as the start-point. */
1815 const location_t not_aaa = make_location (c11, aaa, c14);
1816 ASSERT_EQ (c11, get_pure_location (not_aaa));
1817 /* It should use the start location of the range, not store the range
1818 itself. */
1819 ASSERT_EQ (c12, get_start (not_aaa));
1820 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1821 ASSERT_EQ (c14, get_finish (not_aaa));
1822 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1823
1824 /* Similarly, make a location with a range as the end-point. */
1825 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1826 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1827 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1828 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1829 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1830 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1831 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1832 /* It should use the finish location of the range, not store the range
1833 itself. */
1834 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1835 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1836 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1837 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1838 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1839}
1840
99b4f3a2 1841/* Verify reading of input files (e.g. for caret-based diagnostics). */
1842
1843static void
1844test_reading_source_line ()
1845{
423bd600 1846 /* Create a tempfile and write some text to it. */
b73690a4 1847 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1848 "01234567890123456789\n"
1849 "This is the test text\n"
2e24ac9b 1850 "This is the 3rd line");
423bd600 1851
1852 /* Read back a specific line from the tempfile. */
99b4f3a2 1853 int line_size;
b73690a4 1854 const char *source_line = location_get_source_line (tmp.get_filename (),
2e24ac9b 1855 3, &line_size);
1856 ASSERT_TRUE (source_line != NULL);
1857 ASSERT_EQ (20, line_size);
1858 ASSERT_TRUE (!strncmp ("This is the 3rd line",
1859 source_line, line_size));
1860
1861 source_line = location_get_source_line (tmp.get_filename (),
1862 2, &line_size);
99b4f3a2 1863 ASSERT_TRUE (source_line != NULL);
423bd600 1864 ASSERT_EQ (21, line_size);
2e24ac9b 1865 ASSERT_TRUE (!strncmp ("This is the test text",
1866 source_line, line_size));
423bd600 1867
2e24ac9b 1868 source_line = location_get_source_line (tmp.get_filename (),
1869 4, &line_size);
1870 ASSERT_TRUE (source_line == NULL);
99b4f3a2 1871}
1872
b73690a4 1873/* Tests of lexing. */
1874
1875/* Verify that token TOK from PARSER has cpp_token_as_text
1876 equal to EXPECTED_TEXT. */
1877
1878#define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1879 SELFTEST_BEGIN_STMT \
1880 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1881 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1882 SELFTEST_END_STMT
1883
1884/* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1885 and ranges from EXP_START_COL to EXP_FINISH_COL.
1886 Use LOC as the effective location of the selftest. */
1887
1888static void
1889assert_token_loc_eq (const location &loc,
1890 const cpp_token *tok,
1891 const char *exp_filename, int exp_linenum,
1892 int exp_start_col, int exp_finish_col)
1893{
1894 location_t tok_loc = tok->src_loc;
1895 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1896 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1897
1898 /* If location_t values are sufficiently high, then column numbers
1899 will be unavailable. */
1900 if (!should_have_column_data_p (tok_loc))
1901 return;
1902
1903 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1904 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1905 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1906 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1907}
1908
1909/* Use assert_token_loc_eq to verify the TOK->src_loc, using
1910 SELFTEST_LOCATION as the effective location of the selftest. */
1911
1912#define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1913 EXP_START_COL, EXP_FINISH_COL) \
1914 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1915 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1916
1917/* Test of lexing a file using libcpp, verifying tokens and their
1918 location information. */
1919
1920static void
1921test_lexer (const line_table_case &case_)
1922{
1923 /* Create a tempfile and write some text to it. */
1924 const char *content =
1925 /*00000000011111111112222222222333333.3333444444444.455555555556
1926 12345678901234567890123456789012345.6789012345678.901234567890. */
1927 ("test_name /* c-style comment */\n"
1928 " \"test literal\"\n"
1929 " // test c++-style comment\n"
1930 " 42\n");
1931 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1932
7ec388ed 1933 line_table_test ltt (case_);
b73690a4 1934
1935 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1936
1937 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1938 ASSERT_NE (fname, NULL);
1939
1940 /* Verify that we get the expected tokens back, with the correct
1941 location information. */
1942
1943 location_t loc;
1944 const cpp_token *tok;
1945 tok = cpp_get_token_with_location (parser, &loc);
1946 ASSERT_NE (tok, NULL);
1947 ASSERT_EQ (tok->type, CPP_NAME);
1948 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1949 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1950
1951 tok = cpp_get_token_with_location (parser, &loc);
1952 ASSERT_NE (tok, NULL);
1953 ASSERT_EQ (tok->type, CPP_STRING);
1954 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1955 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1956
1957 tok = cpp_get_token_with_location (parser, &loc);
1958 ASSERT_NE (tok, NULL);
1959 ASSERT_EQ (tok->type, CPP_NUMBER);
1960 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1961 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1962
1963 tok = cpp_get_token_with_location (parser, &loc);
1964 ASSERT_NE (tok, NULL);
1965 ASSERT_EQ (tok->type, CPP_EOF);
1966
1967 cpp_finish (parser, NULL);
1968 cpp_destroy (parser);
1969}
1970
d4166bdc 1971/* Forward decls. */
1972
1973struct lexer_test;
1974class lexer_test_options;
1975
1976/* A class for specifying options of a lexer_test.
1977 The "apply" vfunc is called during the lexer_test constructor. */
1978
1979class lexer_test_options
1980{
1981 public:
1982 virtual void apply (lexer_test &) = 0;
1983};
1984
c6a7d9e9 1985/* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
1986 in its dtor.
1987
1988 This is needed by struct lexer_test to ensure that the cleanup of the
1989 cpp_reader happens *after* the cleanup of the temp_source_file. */
1990
1991class cpp_reader_ptr
1992{
1993 public:
1994 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
1995
1996 ~cpp_reader_ptr ()
1997 {
1998 cpp_finish (m_ptr, NULL);
1999 cpp_destroy (m_ptr);
2000 }
2001
2002 operator cpp_reader * () const { return m_ptr; }
2003
2004 private:
2005 cpp_reader *m_ptr;
2006};
2007
d4166bdc 2008/* A struct for writing lexer tests. */
2009
2010struct lexer_test
2011{
2012 lexer_test (const line_table_case &case_, const char *content,
2013 lexer_test_options *options);
2014 ~lexer_test ();
2015
2016 const cpp_token *get_token ();
2017
c6a7d9e9 2018 /* The ordering of these fields matters.
2019 The line_table_test must be first, since the cpp_reader_ptr
2020 uses it.
2021 The cpp_reader must be cleaned up *after* the temp_source_file
2022 since the filenames in input.c's input cache are owned by the
2023 cpp_reader; in particular, when ~temp_source_file evicts the
2024 filename the filenames must still be alive. */
7ec388ed 2025 line_table_test m_ltt;
c6a7d9e9 2026 cpp_reader_ptr m_parser;
2027 temp_source_file m_tempfile;
d4166bdc 2028 string_concat_db m_concats;
0ccd6e7a 2029 bool m_implicitly_expect_EOF;
d4166bdc 2030};
2031
2032/* Use an EBCDIC encoding for the execution charset, specifically
2033 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2034
2035 This exercises iconv integration within libcpp.
2036 Not every build of iconv supports the given charset,
2037 so we need to flag this error and handle it gracefully. */
2038
2039class ebcdic_execution_charset : public lexer_test_options
2040{
2041 public:
2042 ebcdic_execution_charset () : m_num_iconv_errors (0)
2043 {
2044 gcc_assert (s_singleton == NULL);
2045 s_singleton = this;
2046 }
2047 ~ebcdic_execution_charset ()
2048 {
2049 gcc_assert (s_singleton == this);
2050 s_singleton = NULL;
2051 }
2052
2053 void apply (lexer_test &test) FINAL OVERRIDE
2054 {
2055 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2056 cpp_opts->narrow_charset = "IBM1047";
2057
2058 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2059 callbacks->error = on_error;
2060 }
2061
2062 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2063 int level ATTRIBUTE_UNUSED,
2064 int reason ATTRIBUTE_UNUSED,
2065 rich_location *richloc ATTRIBUTE_UNUSED,
2066 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2067 ATTRIBUTE_FPTR_PRINTF(5,0)
2068 {
2069 gcc_assert (s_singleton);
9a784cf5 2070 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2071 const char *msg = "conversion from %s to %s not supported by iconv";
2072#ifdef ENABLE_NLS
2073 msg = dgettext ("cpplib", msg);
2074#endif
d4166bdc 2075 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2076 when the local iconv build doesn't support the conversion. */
9a784cf5 2077 if (strcmp (msgid, msg) == 0)
d4166bdc 2078 {
2079 s_singleton->m_num_iconv_errors++;
2080 return true;
2081 }
2082
2083 /* Otherwise, we have an unexpected error. */
2084 abort ();
2085 }
2086
2087 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2088
2089 private:
2090 static ebcdic_execution_charset *s_singleton;
2091 int m_num_iconv_errors;
2092};
2093
2094ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2095
0ccd6e7a 2096/* A lexer_test_options subclass that records a list of error
2097 messages emitted by the lexer. */
2098
2099class lexer_error_sink : public lexer_test_options
2100{
2101 public:
2102 lexer_error_sink ()
2103 {
2104 gcc_assert (s_singleton == NULL);
2105 s_singleton = this;
2106 }
2107 ~lexer_error_sink ()
2108 {
2109 gcc_assert (s_singleton == this);
2110 s_singleton = NULL;
2111
2112 int i;
2113 char *str;
2114 FOR_EACH_VEC_ELT (m_errors, i, str)
2115 free (str);
2116 }
2117
2118 void apply (lexer_test &test) FINAL OVERRIDE
2119 {
2120 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2121 callbacks->error = on_error;
2122 }
2123
2124 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2125 int level ATTRIBUTE_UNUSED,
2126 int reason ATTRIBUTE_UNUSED,
2127 rich_location *richloc ATTRIBUTE_UNUSED,
2128 const char *msgid, va_list *ap)
2129 ATTRIBUTE_FPTR_PRINTF(5,0)
2130 {
2131 char *msg = xvasprintf (msgid, *ap);
2132 s_singleton->m_errors.safe_push (msg);
2133 return true;
2134 }
2135
2136 auto_vec<char *> m_errors;
2137
2138 private:
2139 static lexer_error_sink *s_singleton;
2140};
2141
2142lexer_error_sink *lexer_error_sink::s_singleton;
2143
d4166bdc 2144/* Constructor. Override line_table with a new instance based on CASE_,
2145 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2146 start parsing the tempfile. */
2147
2148lexer_test::lexer_test (const line_table_case &case_, const char *content,
c6a7d9e9 2149 lexer_test_options *options)
2150: m_ltt (case_),
2151 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
d4166bdc 2152 /* Create a tempfile and write the text to it. */
2153 m_tempfile (SELFTEST_LOCATION, ".c", content),
0ccd6e7a 2154 m_concats (),
2155 m_implicitly_expect_EOF (true)
d4166bdc 2156{
2157 if (options)
2158 options->apply (*this);
2159
2160 cpp_init_iconv (m_parser);
2161
2162 /* Parse the file. */
2163 const char *fname = cpp_read_main_file (m_parser,
2164 m_tempfile.get_filename ());
2165 ASSERT_NE (fname, NULL);
2166}
2167
0ccd6e7a 2168/* Destructor. By default, verify that the next token in m_parser is EOF. */
d4166bdc 2169
2170lexer_test::~lexer_test ()
2171{
2172 location_t loc;
2173 const cpp_token *tok;
2174
0ccd6e7a 2175 if (m_implicitly_expect_EOF)
2176 {
2177 tok = cpp_get_token_with_location (m_parser, &loc);
2178 ASSERT_NE (tok, NULL);
2179 ASSERT_EQ (tok->type, CPP_EOF);
2180 }
d4166bdc 2181}
2182
2183/* Get the next token from m_parser. */
2184
2185const cpp_token *
2186lexer_test::get_token ()
2187{
2188 location_t loc;
2189 const cpp_token *tok;
2190
2191 tok = cpp_get_token_with_location (m_parser, &loc);
2192 ASSERT_NE (tok, NULL);
2193 return tok;
2194}
2195
2196/* Verify that locations within string literals are correctly handled. */
2197
2198/* Verify get_source_range_for_substring for token(s) at STRLOC,
2199 using the string concatenation database for TEST.
2200
2201 Assert that the character at index IDX is on EXPECTED_LINE,
2202 and that it begins at column EXPECTED_START_COL and ends at
2203 EXPECTED_FINISH_COL (unless the locations are beyond
2204 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2205 columns). */
2206
2207static void
2208assert_char_at_range (const location &loc,
2209 lexer_test& test,
2210 location_t strloc, enum cpp_ttype type, int idx,
2211 int expected_line, int expected_start_col,
2212 int expected_finish_col)
2213{
2214 cpp_reader *pfile = test.m_parser;
2215 string_concat_db *concats = &test.m_concats;
2216
be516c70 2217 source_range actual_range = source_range();
d4166bdc 2218 const char *err
5927e78e 2219 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2220 &actual_range);
d4166bdc 2221 if (should_have_column_data_p (strloc))
2222 ASSERT_EQ_AT (loc, NULL, err);
2223 else
2224 {
2225 ASSERT_STREQ_AT (loc,
2226 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2227 err);
2228 return;
2229 }
2230
2231 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2232 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2233 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2234 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2235
2236 if (should_have_column_data_p (actual_range.m_start))
2237 {
2238 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2239 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2240 }
2241 if (should_have_column_data_p (actual_range.m_finish))
2242 {
2243 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2244 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2245 }
2246}
2247
2248/* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2249 the effective location of any errors. */
2250
2251#define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2252 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2253 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2254 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2255 (EXPECTED_FINISH_COL))
2256
2257/* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2258 using the string concatenation database for TEST.
2259
2260 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2261
2262static void
2263assert_num_substring_ranges (const location &loc,
2264 lexer_test& test,
2265 location_t strloc,
2266 enum cpp_ttype type,
2267 int expected_num_ranges)
2268{
2269 cpp_reader *pfile = test.m_parser;
2270 string_concat_db *concats = &test.m_concats;
2271
45183e4c 2272 int actual_num_ranges = -1;
d4166bdc 2273 const char *err
2274 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2275 &actual_num_ranges);
2276 if (should_have_column_data_p (strloc))
2277 ASSERT_EQ_AT (loc, NULL, err);
2278 else
2279 {
2280 ASSERT_STREQ_AT (loc,
2281 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2282 err);
2283 return;
2284 }
2285 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2286}
2287
2288/* Macro for calling assert_num_substring_ranges, supplying
2289 SELFTEST_LOCATION for the effective location of any errors. */
2290
2291#define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2292 EXPECTED_NUM_RANGES) \
2293 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2294 (TYPE), (EXPECTED_NUM_RANGES))
2295
2296
2297/* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2298 returns an error (using the string concatenation database for TEST). */
2299
2300static void
2301assert_has_no_substring_ranges (const location &loc,
2302 lexer_test& test,
2303 location_t strloc,
2304 enum cpp_ttype type,
2305 const char *expected_err)
2306{
2307 cpp_reader *pfile = test.m_parser;
2308 string_concat_db *concats = &test.m_concats;
2309 cpp_substring_ranges ranges;
2310 const char *actual_err
2311 = get_substring_ranges_for_loc (pfile, concats, strloc,
2312 type, ranges);
2313 if (should_have_column_data_p (strloc))
2314 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2315 else
2316 ASSERT_STREQ_AT (loc,
2317 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2318 actual_err);
2319}
2320
2321#define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2322 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2323 (STRLOC), (TYPE), (ERR))
2324
2325/* Lex a simple string literal. Verify the substring location data, before
2326 and after running cpp_interpret_string on it. */
2327
2328static void
2329test_lexer_string_locations_simple (const line_table_case &case_)
2330{
2331 /* Digits 0-9 (with 0 at column 10), the simple way.
2332 ....................000000000.11111111112.2222222223333333333
2333 ....................123456789.01234567890.1234567890123456789
2334 We add a trailing comment to ensure that we correctly locate
2335 the end of the string literal token. */
2336 const char *content = " \"0123456789\" /* not a string */\n";
2337 lexer_test test (case_, content, NULL);
2338
2339 /* Verify that we get the expected token back, with the correct
2340 location information. */
2341 const cpp_token *tok = test.get_token ();
2342 ASSERT_EQ (tok->type, CPP_STRING);
2343 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2344 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2345
2346 /* At this point in lexing, the quote characters are treated as part of
2347 the string (they are stripped off by cpp_interpret_string). */
2348
2349 ASSERT_EQ (tok->val.str.len, 12);
2350
2351 /* Verify that cpp_interpret_string works. */
2352 cpp_string dst_string;
2353 const enum cpp_ttype type = CPP_STRING;
2354 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2355 &dst_string, type);
2356 ASSERT_TRUE (result);
2357 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2358 free (const_cast <unsigned char *> (dst_string.text));
2359
2360 /* Verify ranges of individual characters. This no longer includes the
7413e757 2361 opening quote, but does include the closing quote. */
2362 for (int i = 0; i <= 10; i++)
d4166bdc 2363 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2364 10 + i, 10 + i);
2365
7413e757 2366 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
d4166bdc 2367}
2368
2369/* As test_lexer_string_locations_simple, but use an EBCDIC execution
2370 encoding. */
2371
2372static void
2373test_lexer_string_locations_ebcdic (const line_table_case &case_)
2374{
2375 /* EBCDIC support requires iconv. */
2376 if (!HAVE_ICONV)
2377 return;
2378
2379 /* Digits 0-9 (with 0 at column 10), the simple way.
2380 ....................000000000.11111111112.2222222223333333333
2381 ....................123456789.01234567890.1234567890123456789
2382 We add a trailing comment to ensure that we correctly locate
2383 the end of the string literal token. */
2384 const char *content = " \"0123456789\" /* not a string */\n";
2385 ebcdic_execution_charset use_ebcdic;
2386 lexer_test test (case_, content, &use_ebcdic);
2387
2388 /* Verify that we get the expected token back, with the correct
2389 location information. */
2390 const cpp_token *tok = test.get_token ();
2391 ASSERT_EQ (tok->type, CPP_STRING);
2392 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2393 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2394
2395 /* At this point in lexing, the quote characters are treated as part of
2396 the string (they are stripped off by cpp_interpret_string). */
2397
2398 ASSERT_EQ (tok->val.str.len, 12);
2399
2400 /* The remainder of the test requires an iconv implementation that
2401 can convert from UTF-8 to the EBCDIC encoding requested above. */
2402 if (use_ebcdic.iconv_errors_occurred_p ())
2403 return;
2404
2405 /* Verify that cpp_interpret_string works. */
2406 cpp_string dst_string;
2407 const enum cpp_ttype type = CPP_STRING;
2408 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2409 &dst_string, type);
2410 ASSERT_TRUE (result);
2411 /* We should now have EBCDIC-encoded text, specifically
2412 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2413 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2414 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2415 (const char *)dst_string.text);
2416 free (const_cast <unsigned char *> (dst_string.text));
2417
2418 /* Verify that we don't attempt to record substring location information
2419 for such cases. */
2420 ASSERT_HAS_NO_SUBSTRING_RANGES
2421 (test, tok->src_loc, type,
2422 "execution character set != source character set");
2423}
2424
2425/* Lex a string literal containing a hex-escaped character.
2426 Verify the substring location data, before and after running
2427 cpp_interpret_string on it. */
2428
2429static void
2430test_lexer_string_locations_hex (const line_table_case &case_)
2431{
2432 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2433 and with a space in place of digit 6, to terminate the escaped
2434 hex code.
2435 ....................000000000.111111.11112222.
2436 ....................123456789.012345.67890123. */
2437 const char *content = " \"01234\\x35 789\"\n";
2438 lexer_test test (case_, content, NULL);
2439
2440 /* Verify that we get the expected token back, with the correct
2441 location information. */
2442 const cpp_token *tok = test.get_token ();
2443 ASSERT_EQ (tok->type, CPP_STRING);
2444 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2445 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2446
2447 /* At this point in lexing, the quote characters are treated as part of
2448 the string (they are stripped off by cpp_interpret_string). */
2449 ASSERT_EQ (tok->val.str.len, 15);
2450
2451 /* Verify that cpp_interpret_string works. */
2452 cpp_string dst_string;
2453 const enum cpp_ttype type = CPP_STRING;
2454 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2455 &dst_string, type);
2456 ASSERT_TRUE (result);
2457 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2458 free (const_cast <unsigned char *> (dst_string.text));
2459
2460 /* Verify ranges of individual characters. This no longer includes the
7413e757 2461 opening quote, but does include the closing quote. */
d4166bdc 2462 for (int i = 0; i <= 4; i++)
2463 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2464 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
7413e757 2465 for (int i = 6; i <= 10; i++)
d4166bdc 2466 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2467
7413e757 2468 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
d4166bdc 2469}
2470
2471/* Lex a string literal containing an octal-escaped character.
2472 Verify the substring location data after running cpp_interpret_string
2473 on it. */
2474
2475static void
2476test_lexer_string_locations_oct (const line_table_case &case_)
2477{
2478 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2479 and with a space in place of digit 6, to terminate the escaped
2480 octal code.
2481 ....................000000000.111111.11112222.2222223333333333444
2482 ....................123456789.012345.67890123.4567890123456789012 */
2483 const char *content = " \"01234\\065 789\" /* not a string */\n";
2484 lexer_test test (case_, content, NULL);
2485
2486 /* Verify that we get the expected token back, with the correct
2487 location information. */
2488 const cpp_token *tok = test.get_token ();
2489 ASSERT_EQ (tok->type, CPP_STRING);
2490 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2491
2492 /* Verify that cpp_interpret_string works. */
2493 cpp_string dst_string;
2494 const enum cpp_ttype type = CPP_STRING;
2495 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2496 &dst_string, type);
2497 ASSERT_TRUE (result);
2498 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2499 free (const_cast <unsigned char *> (dst_string.text));
2500
2501 /* Verify ranges of individual characters. This no longer includes the
7413e757 2502 opening quote, but does include the closing quote. */
d4166bdc 2503 for (int i = 0; i < 5; i++)
2504 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2505 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
7413e757 2506 for (int i = 6; i <= 10; i++)
d4166bdc 2507 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2508
7413e757 2509 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
d4166bdc 2510}
2511
2512/* Test of string literal containing letter escapes. */
2513
2514static void
2515test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2516{
2517 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2518 .....................000000000.1.11111.1.1.11222.22222223333333
2519 .....................123456789.0.12345.6.7.89012.34567890123456. */
2520 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2521 lexer_test test (case_, content, NULL);
2522
2523 /* Verify that we get the expected tokens back. */
2524 const cpp_token *tok = test.get_token ();
2525 ASSERT_EQ (tok->type, CPP_STRING);
2526 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2527
2528 /* Verify ranges of individual characters. */
2529 /* "\t". */
2530 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2531 0, 1, 10, 11);
2532 /* "foo". */
2533 for (int i = 1; i <= 3; i++)
2534 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2535 i, 1, 11 + i, 11 + i);
2536 /* "\\" and "\n". */
2537 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2538 4, 1, 15, 16);
2539 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2540 5, 1, 17, 18);
2541
7413e757 2542 /* "bar" and closing quote for nul-terminator. */
2543 for (int i = 6; i <= 9; i++)
d4166bdc 2544 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2545 i, 1, 13 + i, 13 + i);
2546
7413e757 2547 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
d4166bdc 2548}
2549
2550/* Another test of a string literal containing a letter escape.
2551 Based on string seen in
2552 printf ("%-%\n");
2553 in gcc.dg/format/c90-printf-1.c. */
2554
2555static void
2556test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2557{
2558 /* .....................000000000.1111.11.1111.22222222223.
2559 .....................123456789.0123.45.6789.01234567890. */
2560 const char *content = (" \"%-%\\n\" /* non-str */\n");
2561 lexer_test test (case_, content, NULL);
2562
2563 /* Verify that we get the expected tokens back. */
2564 const cpp_token *tok = test.get_token ();
2565 ASSERT_EQ (tok->type, CPP_STRING);
2566 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2567
2568 /* Verify ranges of individual characters. */
2569 /* "%-%". */
2570 for (int i = 0; i < 3; i++)
2571 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2572 i, 1, 10 + i, 10 + i);
2573 /* "\n". */
2574 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2575 3, 1, 13, 14);
2576
7413e757 2577 /* Closing quote for nul-terminator. */
2578 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2579 4, 1, 15, 15);
2580
2581 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
d4166bdc 2582}
2583
2584/* Lex a string literal containing UCN 4 characters.
2585 Verify the substring location data after running cpp_interpret_string
2586 on it. */
2587
2588static void
2589test_lexer_string_locations_ucn4 (const line_table_case &case_)
2590{
2591 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2592 as UCN 4.
2593 ....................000000000.111111.111122.222222223.33333333344444
2594 ....................123456789.012345.678901.234567890.12345678901234 */
2595 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2596 lexer_test test (case_, content, NULL);
2597
2598 /* Verify that we get the expected token back, with the correct
2599 location information. */
2600 const cpp_token *tok = test.get_token ();
2601 ASSERT_EQ (tok->type, CPP_STRING);
2602 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2603
2604 /* Verify that cpp_interpret_string works.
2605 The string should be encoded in the execution character
2606 set. Assuming that that is UTF-8, we should have the following:
2607 ----------- ---- ----- ------- ----------------
2608 Byte offset Byte Octal Unicode Source Column(s)
2609 ----------- ---- ----- ------- ----------------
2610 0 0x30 '0' 10
2611 1 0x31 '1' 11
2612 2 0x32 '2' 12
2613 3 0x33 '3' 13
2614 4 0x34 '4' 14
2615 5 0xE2 \342 U+2174 15-20
2616 6 0x85 \205 (cont) 15-20
2617 7 0xB4 \264 (cont) 15-20
2618 8 0xE2 \342 U+2175 21-26
2619 9 0x85 \205 (cont) 21-26
2620 10 0xB5 \265 (cont) 21-26
2621 11 0x37 '7' 27
2622 12 0x38 '8' 28
2623 13 0x39 '9' 29
7413e757 2624 14 0x00 30 (closing quote)
d4166bdc 2625 ----------- ---- ----- ------- ---------------. */
2626
2627 cpp_string dst_string;
2628 const enum cpp_ttype type = CPP_STRING;
2629 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2630 &dst_string, type);
2631 ASSERT_TRUE (result);
2632 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2633 (const char *)dst_string.text);
2634 free (const_cast <unsigned char *> (dst_string.text));
2635
2636 /* Verify ranges of individual characters. This no longer includes the
7413e757 2637 opening quote, but does include the closing quote.
d4166bdc 2638 '01234'. */
2639 for (int i = 0; i <= 4; i++)
2640 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2641 /* U+2174. */
2642 for (int i = 5; i <= 7; i++)
2643 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2644 /* U+2175. */
2645 for (int i = 8; i <= 10; i++)
2646 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
7413e757 2647 /* '789' and nul terminator */
2648 for (int i = 11; i <= 14; i++)
d4166bdc 2649 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2650
7413e757 2651 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
d4166bdc 2652}
2653
2654/* Lex a string literal containing UCN 8 characters.
2655 Verify the substring location data after running cpp_interpret_string
2656 on it. */
2657
2658static void
2659test_lexer_string_locations_ucn8 (const line_table_case &case_)
2660{
2661 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2662 ....................000000000.111111.1111222222.2222333333333.344444
2663 ....................123456789.012345.6789012345.6789012345678.901234 */
2664 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2665 lexer_test test (case_, content, NULL);
2666
2667 /* Verify that we get the expected token back, with the correct
2668 location information. */
2669 const cpp_token *tok = test.get_token ();
2670 ASSERT_EQ (tok->type, CPP_STRING);
2671 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2672 "\"01234\\U00002174\\U00002175789\"");
2673
2674 /* Verify that cpp_interpret_string works.
2675 The UTF-8 encoding of the string is identical to that from
2676 the ucn4 testcase above; the only difference is the column
2677 locations. */
2678 cpp_string dst_string;
2679 const enum cpp_ttype type = CPP_STRING;
2680 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2681 &dst_string, type);
2682 ASSERT_TRUE (result);
2683 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2684 (const char *)dst_string.text);
2685 free (const_cast <unsigned char *> (dst_string.text));
2686
2687 /* Verify ranges of individual characters. This no longer includes the
7413e757 2688 opening quote, but does include the closing quote.
d4166bdc 2689 '01234'. */
2690 for (int i = 0; i <= 4; i++)
2691 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2692 /* U+2174. */
2693 for (int i = 5; i <= 7; i++)
2694 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2695 /* U+2175. */
2696 for (int i = 8; i <= 10; i++)
2697 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2698 /* '789' at columns 35-37 */
2699 for (int i = 11; i <= 13; i++)
2700 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
7413e757 2701 /* Closing quote/nul-terminator at column 38. */
2702 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
d4166bdc 2703
7413e757 2704 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
d4166bdc 2705}
2706
2707/* Fetch a big-endian 32-bit value and convert to host endianness. */
2708
2709static uint32_t
2710uint32_from_big_endian (const uint32_t *ptr_be_value)
2711{
2712 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2713 return (((uint32_t) buf[0] << 24)
2714 | ((uint32_t) buf[1] << 16)
2715 | ((uint32_t) buf[2] << 8)
2716 | (uint32_t) buf[3]);
2717}
2718
2719/* Lex a wide string literal and verify that attempts to read substring
2720 location data from it fail gracefully. */
2721
2722static void
2723test_lexer_string_locations_wide_string (const line_table_case &case_)
2724{
2725 /* Digits 0-9.
2726 ....................000000000.11111111112.22222222233333
2727 ....................123456789.01234567890.12345678901234 */
2728 const char *content = " L\"0123456789\" /* non-str */\n";
2729 lexer_test test (case_, content, NULL);
2730
2731 /* Verify that we get the expected token back, with the correct
2732 location information. */
2733 const cpp_token *tok = test.get_token ();
2734 ASSERT_EQ (tok->type, CPP_WSTRING);
2735 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2736
2737 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2738 cpp_string dst_string;
2739 const enum cpp_ttype type = CPP_WSTRING;
2740 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2741 &dst_string, type);
2742 ASSERT_TRUE (result);
2743 /* The cpp_reader defaults to big-endian with
2744 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2745 now be encoded as UTF-32BE. */
2746 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2747 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2748 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2749 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2750 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2751 free (const_cast <unsigned char *> (dst_string.text));
2752
2753 /* We don't yet support generating substring location information
2754 for L"" strings. */
2755 ASSERT_HAS_NO_SUBSTRING_RANGES
2756 (test, tok->src_loc, type,
2757 "execution character set != source character set");
2758}
2759
2760/* Fetch a big-endian 16-bit value and convert to host endianness. */
2761
2762static uint16_t
2763uint16_from_big_endian (const uint16_t *ptr_be_value)
2764{
2765 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2766 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2767}
2768
2769/* Lex a u"" string literal and verify that attempts to read substring
2770 location data from it fail gracefully. */
2771
2772static void
2773test_lexer_string_locations_string16 (const line_table_case &case_)
2774{
2775 /* Digits 0-9.
2776 ....................000000000.11111111112.22222222233333
2777 ....................123456789.01234567890.12345678901234 */
2778 const char *content = " u\"0123456789\" /* non-str */\n";
2779 lexer_test test (case_, content, NULL);
2780
2781 /* Verify that we get the expected token back, with the correct
2782 location information. */
2783 const cpp_token *tok = test.get_token ();
2784 ASSERT_EQ (tok->type, CPP_STRING16);
2785 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2786
2787 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2788 cpp_string dst_string;
2789 const enum cpp_ttype type = CPP_STRING16;
2790 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2791 &dst_string, type);
2792 ASSERT_TRUE (result);
2793
2794 /* The cpp_reader defaults to big-endian, so dst_string should
2795 now be encoded as UTF-16BE. */
2796 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2797 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2798 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2799 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2800 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2801 free (const_cast <unsigned char *> (dst_string.text));
2802
2803 /* We don't yet support generating substring location information
2804 for L"" strings. */
2805 ASSERT_HAS_NO_SUBSTRING_RANGES
2806 (test, tok->src_loc, type,
2807 "execution character set != source character set");
2808}
2809
2810/* Lex a U"" string literal and verify that attempts to read substring
2811 location data from it fail gracefully. */
2812
2813static void
2814test_lexer_string_locations_string32 (const line_table_case &case_)
2815{
2816 /* Digits 0-9.
2817 ....................000000000.11111111112.22222222233333
2818 ....................123456789.01234567890.12345678901234 */
2819 const char *content = " U\"0123456789\" /* non-str */\n";
2820 lexer_test test (case_, content, NULL);
2821
2822 /* Verify that we get the expected token back, with the correct
2823 location information. */
2824 const cpp_token *tok = test.get_token ();
2825 ASSERT_EQ (tok->type, CPP_STRING32);
2826 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2827
2828 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2829 cpp_string dst_string;
2830 const enum cpp_ttype type = CPP_STRING32;
2831 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2832 &dst_string, type);
2833 ASSERT_TRUE (result);
2834
2835 /* The cpp_reader defaults to big-endian, so dst_string should
2836 now be encoded as UTF-32BE. */
2837 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2838 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2839 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2840 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2841 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2842 free (const_cast <unsigned char *> (dst_string.text));
2843
2844 /* We don't yet support generating substring location information
2845 for L"" strings. */
2846 ASSERT_HAS_NO_SUBSTRING_RANGES
2847 (test, tok->src_loc, type,
2848 "execution character set != source character set");
2849}
2850
2851/* Lex a u8-string literal.
2852 Verify the substring location data after running cpp_interpret_string
2853 on it. */
2854
2855static void
2856test_lexer_string_locations_u8 (const line_table_case &case_)
2857{
2858 /* Digits 0-9.
2859 ....................000000000.11111111112.22222222233333
2860 ....................123456789.01234567890.12345678901234 */
2861 const char *content = " u8\"0123456789\" /* non-str */\n";
2862 lexer_test test (case_, content, NULL);
2863
2864 /* Verify that we get the expected token back, with the correct
2865 location information. */
2866 const cpp_token *tok = test.get_token ();
2867 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2868 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2869
2870 /* Verify that cpp_interpret_string works. */
2871 cpp_string dst_string;
2872 const enum cpp_ttype type = CPP_STRING;
2873 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2874 &dst_string, type);
2875 ASSERT_TRUE (result);
2876 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2877 free (const_cast <unsigned char *> (dst_string.text));
2878
2879 /* Verify ranges of individual characters. This no longer includes the
7413e757 2880 opening quote, but does include the closing quote. */
2881 for (int i = 0; i <= 10; i++)
d4166bdc 2882 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2883}
2884
2885/* Lex a string literal containing UTF-8 source characters.
2886 Verify the substring location data after running cpp_interpret_string
2887 on it. */
2888
2889static void
2890test_lexer_string_locations_utf8_source (const line_table_case &case_)
2891{
2892 /* This string literal is written out to the source file as UTF-8,
2893 and is of the form "before mojibake after", where "mojibake"
2894 is written as the following four unicode code points:
2895 U+6587 CJK UNIFIED IDEOGRAPH-6587
2896 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2897 U+5316 CJK UNIFIED IDEOGRAPH-5316
2898 U+3051 HIRAGANA LETTER KE.
2899 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2900 "before" and "after" are 1 byte per unicode character.
2901
2902 The numbering shown are "columns", which are *byte* numbers within
2903 the line, rather than unicode character numbers.
2904
2905 .................... 000000000.1111111.
2906 .................... 123456789.0123456. */
2907 const char *content = (" \"before "
2908 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2909 UTF-8: 0xE6 0x96 0x87
2910 C octal escaped UTF-8: \346\226\207
2911 "column" numbers: 17-19. */
2912 "\346\226\207"
2913
2914 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2915 UTF-8: 0xE5 0xAD 0x97
2916 C octal escaped UTF-8: \345\255\227
2917 "column" numbers: 20-22. */
2918 "\345\255\227"
2919
2920 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2921 UTF-8: 0xE5 0x8C 0x96
2922 C octal escaped UTF-8: \345\214\226
2923 "column" numbers: 23-25. */
2924 "\345\214\226"
2925
2926 /* U+3051 HIRAGANA LETTER KE
2927 UTF-8: 0xE3 0x81 0x91
2928 C octal escaped UTF-8: \343\201\221
2929 "column" numbers: 26-28. */
2930 "\343\201\221"
2931
2932 /* column numbers 29 onwards
2933 2333333.33334444444444
2934 9012345.67890123456789. */
2935 " after\" /* non-str */\n");
2936 lexer_test test (case_, content, NULL);
2937
2938 /* Verify that we get the expected token back, with the correct
2939 location information. */
2940 const cpp_token *tok = test.get_token ();
2941 ASSERT_EQ (tok->type, CPP_STRING);
2942 ASSERT_TOKEN_AS_TEXT_EQ
2943 (test.m_parser, tok,
2944 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2945
2946 /* Verify that cpp_interpret_string works. */
2947 cpp_string dst_string;
2948 const enum cpp_ttype type = CPP_STRING;
2949 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2950 &dst_string, type);
2951 ASSERT_TRUE (result);
2952 ASSERT_STREQ
2953 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2954 (const char *)dst_string.text);
2955 free (const_cast <unsigned char *> (dst_string.text));
2956
2957 /* Verify ranges of individual characters. This no longer includes the
7413e757 2958 opening quote, but does include the closing quote.
d4166bdc 2959 Assuming that both source and execution encodings are UTF-8, we have
7413e757 2960 a run of 25 octets in each, plus the NUL terminator. */
d4166bdc 2961 for (int i = 0; i < 25; i++)
2962 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
7413e757 2963 /* NUL-terminator should use the closing quote at column 35. */
2964 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
d4166bdc 2965
7413e757 2966 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
d4166bdc 2967}
2968
2969/* Test of string literal concatenation. */
2970
2971static void
2972test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
2973{
2974 /* Digits 0-9.
2975 .....................000000000.111111.11112222222222
2976 .....................123456789.012345.67890123456789. */
2977 const char *content = (" \"01234\" /* non-str */\n"
2978 " \"56789\" /* non-str */\n");
2979 lexer_test test (case_, content, NULL);
2980
2981 location_t input_locs[2];
2982
2983 /* Verify that we get the expected tokens back. */
2984 auto_vec <cpp_string> input_strings;
2985 const cpp_token *tok_a = test.get_token ();
2986 ASSERT_EQ (tok_a->type, CPP_STRING);
2987 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
2988 input_strings.safe_push (tok_a->val.str);
2989 input_locs[0] = tok_a->src_loc;
2990
2991 const cpp_token *tok_b = test.get_token ();
2992 ASSERT_EQ (tok_b->type, CPP_STRING);
2993 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
2994 input_strings.safe_push (tok_b->val.str);
2995 input_locs[1] = tok_b->src_loc;
2996
2997 /* Verify that cpp_interpret_string works. */
2998 cpp_string dst_string;
2999 const enum cpp_ttype type = CPP_STRING;
3000 bool result = cpp_interpret_string (test.m_parser,
3001 input_strings.address (), 2,
3002 &dst_string, type);
3003 ASSERT_TRUE (result);
3004 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3005 free (const_cast <unsigned char *> (dst_string.text));
3006
3007 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3008 test.m_concats.record_string_concatenation (2, input_locs);
3009
3010 location_t initial_loc = input_locs[0];
3011
7413e757 3012 /* "01234" on line 1. */
d4166bdc 3013 for (int i = 0; i <= 4; i++)
3014 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
7413e757 3015 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3016 for (int i = 5; i <= 10; i++)
d4166bdc 3017 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3018
7413e757 3019 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
d4166bdc 3020}
3021
3022/* Another test of string literal concatenation. */
3023
3024static void
3025test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3026{
3027 /* Digits 0-9.
3028 .....................000000000.111.11111112222222
3029 .....................123456789.012.34567890123456. */
3030 const char *content = (" \"01\" /* non-str */\n"
3031 " \"23\" /* non-str */\n"
3032 " \"45\" /* non-str */\n"
3033 " \"67\" /* non-str */\n"
3034 " \"89\" /* non-str */\n");
3035 lexer_test test (case_, content, NULL);
3036
3037 auto_vec <cpp_string> input_strings;
3038 location_t input_locs[5];
3039
3040 /* Verify that we get the expected tokens back. */
3041 for (int i = 0; i < 5; i++)
3042 {
3043 const cpp_token *tok = test.get_token ();
3044 ASSERT_EQ (tok->type, CPP_STRING);
3045 input_strings.safe_push (tok->val.str);
3046 input_locs[i] = tok->src_loc;
3047 }
3048
3049 /* Verify that cpp_interpret_string works. */
3050 cpp_string dst_string;
3051 const enum cpp_ttype type = CPP_STRING;
3052 bool result = cpp_interpret_string (test.m_parser,
3053 input_strings.address (), 5,
3054 &dst_string, type);
3055 ASSERT_TRUE (result);
3056 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3057 free (const_cast <unsigned char *> (dst_string.text));
3058
3059 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3060 test.m_concats.record_string_concatenation (5, input_locs);
3061
3062 location_t initial_loc = input_locs[0];
3063
3064 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3065 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3066 and expect get_source_range_for_substring to fail.
3067 However, for a string concatenation test, we can have a case
3068 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3069 but subsequent strings can be after it.
3070 Attempting to detect this within assert_char_at_range
3071 would overcomplicate the logic for the common test cases, so
3072 we detect it here. */
3073 if (should_have_column_data_p (input_locs[0])
3074 && !should_have_column_data_p (input_locs[4]))
3075 {
3076 /* Verify that get_source_range_for_substring gracefully rejects
3077 this case. */
3078 source_range actual_range;
3079 const char *err
5927e78e 3080 = get_source_range_for_char (test.m_parser, &test.m_concats,
3081 initial_loc, type, 0, &actual_range);
d4166bdc 3082 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3083 return;
3084 }
3085
3086 for (int i = 0; i < 5; i++)
3087 for (int j = 0; j < 2; j++)
3088 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3089 i + 1, 10 + j, 10 + j);
3090
7413e757 3091 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3092 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3093
3094 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
d4166bdc 3095}
3096
3097/* Another test of string literal concatenation, this time combined with
3098 various kinds of escaped characters. */
3099
3100static void
3101test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3102{
3103 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3104 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3105 const char *content
3106 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3107 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3108 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3109 lexer_test test (case_, content, NULL);
3110
3111 auto_vec <cpp_string> input_strings;
3112 location_t input_locs[4];
3113
3114 /* Verify that we get the expected tokens back. */
3115 for (int i = 0; i < 4; i++)
3116 {
3117 const cpp_token *tok = test.get_token ();
3118 ASSERT_EQ (tok->type, CPP_STRING);
3119 input_strings.safe_push (tok->val.str);
3120 input_locs[i] = tok->src_loc;
3121 }
3122
3123 /* Verify that cpp_interpret_string works. */
3124 cpp_string dst_string;
3125 const enum cpp_ttype type = CPP_STRING;
3126 bool result = cpp_interpret_string (test.m_parser,
3127 input_strings.address (), 4,
3128 &dst_string, type);
3129 ASSERT_TRUE (result);
3130 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3131 free (const_cast <unsigned char *> (dst_string.text));
3132
3133 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3134 test.m_concats.record_string_concatenation (4, input_locs);
3135
3136 location_t initial_loc = input_locs[0];
3137
3138 for (int i = 0; i <= 4; i++)
3139 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3140 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3141 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3142 for (int i = 7; i <= 9; i++)
3143 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3144
7413e757 3145 /* NUL-terminator should use the location of the final closing quote. */
3146 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3147
3148 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
d4166bdc 3149}
3150
3151/* Test of string literal in a macro. */
3152
3153static void
3154test_lexer_string_locations_macro (const line_table_case &case_)
3155{
3156 /* Digits 0-9.
3157 .....................0000000001111111111.22222222223.
3158 .....................1234567890123456789.01234567890. */
3159 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3160 " MACRO");
3161 lexer_test test (case_, content, NULL);
3162
3163 /* Verify that we get the expected tokens back. */
3164 const cpp_token *tok = test.get_token ();
3165 ASSERT_EQ (tok->type, CPP_PADDING);
3166
3167 tok = test.get_token ();
3168 ASSERT_EQ (tok->type, CPP_STRING);
3169 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3170
3171 /* Verify ranges of individual characters. We ought to
3172 see columns within the macro definition. */
7413e757 3173 for (int i = 0; i <= 10; i++)
d4166bdc 3174 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3175 i, 1, 20 + i, 20 + i);
3176
7413e757 3177 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
d4166bdc 3178
3179 tok = test.get_token ();
3180 ASSERT_EQ (tok->type, CPP_PADDING);
3181}
3182
3183/* Test of stringification of a macro argument. */
3184
3185static void
3186test_lexer_string_locations_stringified_macro_argument
3187 (const line_table_case &case_)
3188{
3189 /* .....................000000000111111111122222222223.
3190 .....................123456789012345678901234567890. */
3191 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3192 "MACRO(foo)\n");
3193 lexer_test test (case_, content, NULL);
3194
3195 /* Verify that we get the expected token back. */
3196 const cpp_token *tok = test.get_token ();
3197 ASSERT_EQ (tok->type, CPP_PADDING);
3198
3199 tok = test.get_token ();
3200 ASSERT_EQ (tok->type, CPP_STRING);
3201 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3202
3203 /* We don't support getting the location of a stringified macro
3204 argument. Verify that it fails gracefully. */
3205 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3206 "cpp_interpret_string_1 failed");
3207
3208 tok = test.get_token ();
3209 ASSERT_EQ (tok->type, CPP_PADDING);
3210
3211 tok = test.get_token ();
3212 ASSERT_EQ (tok->type, CPP_PADDING);
3213}
3214
3215/* Ensure that we are fail gracefully if something attempts to pass
3216 in a location that isn't a string literal token. Seen on this code:
3217
3218 const char a[] = " %d ";
3219 __builtin_printf (a, 0.5);
3220 ^
3221
3222 when c-format.c erroneously used the indicated one-character
3223 location as the format string location, leading to a read past the
3224 end of a string buffer in cpp_interpret_string_1. */
3225
3226static void
3227test_lexer_string_locations_non_string (const line_table_case &case_)
3228{
3229 /* .....................000000000111111111122222222223.
3230 .....................123456789012345678901234567890. */
3231 const char *content = (" a\n");
3232 lexer_test test (case_, content, NULL);
3233
3234 /* Verify that we get the expected token back. */
3235 const cpp_token *tok = test.get_token ();
3236 ASSERT_EQ (tok->type, CPP_NAME);
3237 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3238
3239 /* At this point, libcpp is attempting to interpret the name as a
3240 string literal, despite it not starting with a quote. We don't detect
3241 that, but we should at least fail gracefully. */
3242 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3243 "cpp_interpret_string_1 failed");
3244}
3245
3246/* Ensure that we can read substring information for a token which
3247 starts in one linemap and ends in another . Adapted from
3248 gcc.dg/cpp/pr69985.c. */
3249
3250static void
3251test_lexer_string_locations_long_line (const line_table_case &case_)
3252{
3253 /* .....................000000.000111111111
3254 .....................123456.789012346789. */
3255 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3256 " \"0123456789012345678901234567890123456789"
3257 "0123456789012345678901234567890123456789"
3258 "0123456789012345678901234567890123456789"
3259 "0123456789\"\n");
3260
3261 lexer_test test (case_, content, NULL);
3262
3263 /* Verify that we get the expected token back. */
3264 const cpp_token *tok = test.get_token ();
3265 ASSERT_EQ (tok->type, CPP_STRING);
3266
3267 if (!should_have_column_data_p (line_table->highest_location))
3268 return;
3269
3270 /* Verify ranges of individual characters. */
7413e757 3271 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3272 for (int i = 0; i < 131; i++)
d4166bdc 3273 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3274 i, 2, 7 + i, 7 + i);
3275}
3276
f9f26759 3277/* Test of locations within a raw string that doesn't contain a newline. */
3278
3279static void
3280test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3281{
3282 /* .....................00.0000000111111111122.
3283 .....................12.3456789012345678901. */
3284 const char *content = ("R\"foo(0123456789)foo\"\n");
3285 lexer_test test (case_, content, NULL);
3286
3287 /* Verify that we get the expected token back. */
3288 const cpp_token *tok = test.get_token ();
3289 ASSERT_EQ (tok->type, CPP_STRING);
3290
3291 /* Verify that cpp_interpret_string works. */
3292 cpp_string dst_string;
3293 const enum cpp_ttype type = CPP_STRING;
3294 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3295 &dst_string, type);
3296 ASSERT_TRUE (result);
3297 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3298 free (const_cast <unsigned char *> (dst_string.text));
3299
3300 if (!should_have_column_data_p (line_table->highest_location))
3301 return;
3302
3303 /* 0-9, plus the nil terminator. */
3304 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3305 for (int i = 0; i < 11; i++)
3306 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3307 i, 1, 7 + i, 7 + i);
3308}
3309
3310/* Test of locations within a raw string that contains a newline. */
3311
3312static void
3313test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3314{
3315 /* .....................00.0000.
3316 .....................12.3456. */
3317 const char *content = ("R\"foo(\n"
3318 /* .....................00000.
3319 .....................12345. */
3320 "hello\n"
3321 "world\n"
3322 /* .....................00000.
3323 .....................12345. */
3324 ")foo\"\n");
3325 lexer_test test (case_, content, NULL);
3326
3327 /* Verify that we get the expected token back. */
3328 const cpp_token *tok = test.get_token ();
3329 ASSERT_EQ (tok->type, CPP_STRING);
3330
3331 /* Verify that cpp_interpret_string works. */
3332 cpp_string dst_string;
3333 const enum cpp_ttype type = CPP_STRING;
3334 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3335 &dst_string, type);
3336 ASSERT_TRUE (result);
3337 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3338 free (const_cast <unsigned char *> (dst_string.text));
3339
3340 if (!should_have_column_data_p (line_table->highest_location))
3341 return;
3342
3343 /* Currently we don't support locations within raw strings that
3344 contain newlines. */
3345 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3346 "range endpoints are on different lines");
3347}
3348
0ccd6e7a 3349/* Test of parsing an unterminated raw string. */
3350
3351static void
3352test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3353{
3354 const char *content = "R\"ouch()ouCh\" /* etc */";
3355
3356 lexer_error_sink errors;
3357 lexer_test test (case_, content, &errors);
3358 test.m_implicitly_expect_EOF = false;
3359
3360 /* Attempt to parse the raw string. */
3361 const cpp_token *tok = test.get_token ();
3362 ASSERT_EQ (tok->type, CPP_EOF);
3363
3364 ASSERT_EQ (1, errors.m_errors.length ());
3365 /* We expect the message "unterminated raw string"
3366 in the "cpplib" translation domain.
3367 It's not clear that dgettext is available on all supported hosts,
3368 so this assertion is commented-out for now.
3369 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3370 errors.m_errors[0]);
3371 */
3372}
3373
d4166bdc 3374/* Test of lexing char constants. */
3375
3376static void
3377test_lexer_char_constants (const line_table_case &case_)
3378{
3379 /* Various char constants.
3380 .....................0000000001111111111.22222222223.
3381 .....................1234567890123456789.01234567890. */
3382 const char *content = (" 'a'\n"
3383 " u'a'\n"
3384 " U'a'\n"
3385 " L'a'\n"
3386 " 'abc'\n");
3387 lexer_test test (case_, content, NULL);
3388
3389 /* Verify that we get the expected tokens back. */
3390 /* 'a'. */
3391 const cpp_token *tok = test.get_token ();
3392 ASSERT_EQ (tok->type, CPP_CHAR);
3393 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3394
3395 unsigned int chars_seen;
3396 int unsignedp;
3397 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3398 &chars_seen, &unsignedp);
3399 ASSERT_EQ (cc, 'a');
3400 ASSERT_EQ (chars_seen, 1);
3401
3402 /* u'a'. */
3403 tok = test.get_token ();
3404 ASSERT_EQ (tok->type, CPP_CHAR16);
3405 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3406
3407 /* U'a'. */
3408 tok = test.get_token ();
3409 ASSERT_EQ (tok->type, CPP_CHAR32);
3410 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3411
3412 /* L'a'. */
3413 tok = test.get_token ();
3414 ASSERT_EQ (tok->type, CPP_WCHAR);
3415 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3416
3417 /* 'abc' (c-char-sequence). */
3418 tok = test.get_token ();
3419 ASSERT_EQ (tok->type, CPP_CHAR);
3420 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3421}
b73690a4 3422/* A table of interesting location_t values, giving one axis of our test
3423 matrix. */
3424
3425static const location_t boundary_locations[] = {
3426 /* Zero means "don't override the default values for a new line_table". */
3427 0,
3428
3429 /* An arbitrary non-zero value that isn't close to one of
3430 the boundary values below. */
3431 0x10000,
3432
3433 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3434 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3435 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3436 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3437 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3438 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3439
3440 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3441 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3442 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3443 LINE_MAP_MAX_LOCATION_WITH_COLS,
3444 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3445 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3446};
3447
7ec388ed 3448/* Run TESTCASE multiple times, once for each case in our test matrix. */
99b4f3a2 3449
3450void
7ec388ed 3451for_each_line_table_case (void (*testcase) (const line_table_case &))
99b4f3a2 3452{
b73690a4 3453 /* As noted above in the description of struct line_table_case,
3454 we want to explore a test matrix of interesting line_table
3455 situations, running various selftests for each case within the
3456 matrix. */
3457
3458 /* Run all tests with:
3459 (a) line_table->default_range_bits == 0, and
3460 (b) line_table->default_range_bits == 5. */
3461 int num_cases_tested = 0;
3462 for (int default_range_bits = 0; default_range_bits <= 5;
3463 default_range_bits += 5)
3464 {
3465 /* ...and use each of the "interesting" location values as
3466 the starting location within line_table. */
3467 const int num_boundary_locations
3468 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3469 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3470 {
3471 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3472
7ec388ed 3473 testcase (c);
b73690a4 3474
3475 num_cases_tested++;
3476 }
3477 }
3478
3479 /* Verify that we fully covered the test matrix. */
3480 ASSERT_EQ (num_cases_tested, 2 * 12);
7ec388ed 3481}
3482
3483/* Run all of the selftests within this file. */
3484
3485void
3486input_c_tests ()
3487{
3488 test_should_have_column_data_p ();
3489 test_unknown_location ();
3490 test_builtins ();
aca2a315 3491 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
7ec388ed 3492
3493 for_each_line_table_case (test_accessing_ordinary_linemaps);
3494 for_each_line_table_case (test_lexer);
3495 for_each_line_table_case (test_lexer_string_locations_simple);
3496 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3497 for_each_line_table_case (test_lexer_string_locations_hex);
3498 for_each_line_table_case (test_lexer_string_locations_oct);
3499 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3500 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3501 for_each_line_table_case (test_lexer_string_locations_ucn4);
3502 for_each_line_table_case (test_lexer_string_locations_ucn8);
3503 for_each_line_table_case (test_lexer_string_locations_wide_string);
3504 for_each_line_table_case (test_lexer_string_locations_string16);
3505 for_each_line_table_case (test_lexer_string_locations_string32);
3506 for_each_line_table_case (test_lexer_string_locations_u8);
3507 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3508 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3509 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3510 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3511 for_each_line_table_case (test_lexer_string_locations_macro);
3512 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3513 for_each_line_table_case (test_lexer_string_locations_non_string);
3514 for_each_line_table_case (test_lexer_string_locations_long_line);
f9f26759 3515 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3516 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
0ccd6e7a 3517 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
7ec388ed 3518 for_each_line_table_case (test_lexer_char_constants);
b73690a4 3519
99b4f3a2 3520 test_reading_source_line ();
3521}
3522
3523} // namespace selftest
3524
3525#endif /* CHECKING_P */