]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/input.c
Update libbid according to the latest Intel Decimal Floating-Point Math Library.
[thirdparty/gcc.git] / gcc / input.c
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2019 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic.h"
25 #include "diagnostic-core.h"
26 #include "selftest.h"
27 #include "cpplib.h"
28
29 #ifndef HAVE_ICONV
30 #define HAVE_ICONV 0
31 #endif
32
33 /* This is a cache used by get_next_line to store the content of a
34 file to be searched for file lines. */
35 struct fcache
36 {
37 /* These are information used to store a line boundary. */
38 struct line_info
39 {
40 /* The line number. It starts from 1. */
41 size_t line_num;
42
43 /* The position (byte count) of the beginning of the line,
44 relative to the file data pointer. This starts at zero. */
45 size_t start_pos;
46
47 /* The position (byte count) of the last byte of the line. This
48 normally points to the '\n' character, or to one byte after the
49 last byte of the file, if the file doesn't contain a '\n'
50 character. */
51 size_t end_pos;
52
53 line_info (size_t l, size_t s, size_t e)
54 : line_num (l), start_pos (s), end_pos (e)
55 {}
56
57 line_info ()
58 :line_num (0), start_pos (0), end_pos (0)
59 {}
60 };
61
62 /* The number of time this file has been accessed. This is used
63 to designate which file cache to evict from the cache
64 array. */
65 unsigned use_count;
66
67 /* The file_path is the key for identifying a particular file in
68 the cache.
69 For libcpp-using code, the underlying buffer for this field is
70 owned by the corresponding _cpp_file within the cpp_reader. */
71 const char *file_path;
72
73 FILE *fp;
74
75 /* This points to the content of the file that we've read so
76 far. */
77 char *data;
78
79 /* The size of the DATA array above.*/
80 size_t size;
81
82 /* The number of bytes read from the underlying file so far. This
83 must be less (or equal) than SIZE above. */
84 size_t nb_read;
85
86 /* The index of the beginning of the current line. */
87 size_t line_start_idx;
88
89 /* The number of the previous line read. This starts at 1. Zero
90 means we've read no line so far. */
91 size_t line_num;
92
93 /* This is the total number of lines of the current file. At the
94 moment, we try to get this information from the line map
95 subsystem. Note that this is just a hint. When using the C++
96 front-end, this hint is correct because the input file is then
97 completely tokenized before parsing starts; so the line map knows
98 the number of lines before compilation really starts. For e.g,
99 the C front-end, it can happen that we start emitting diagnostics
100 before the line map has seen the end of the file. */
101 size_t total_lines;
102
103 /* Could this file be missing a trailing newline on its final line?
104 Initially true (to cope with empty files), set to true/false
105 as each line is read. */
106 bool missing_trailing_newline;
107
108 /* This is a record of the beginning and end of the lines we've seen
109 while reading the file. This is useful to avoid walking the data
110 from the beginning when we are asked to read a line that is
111 before LINE_START_IDX above. Note that the maximum size of this
112 record is fcache_line_record_size, so that the memory consumption
113 doesn't explode. We thus scale total_lines down to
114 fcache_line_record_size. */
115 vec<line_info, va_heap> line_record;
116
117 fcache ();
118 ~fcache ();
119 };
120
121 /* Current position in real source file. */
122
123 location_t input_location = UNKNOWN_LOCATION;
124
125 struct line_maps *line_table;
126
127 /* A stashed copy of "line_table" for use by selftest::line_table_test.
128 This needs to be a global so that it can be a GC root, and thus
129 prevent the stashed copy from being garbage-collected if the GC runs
130 during a line_table_test. */
131
132 struct line_maps *saved_line_table;
133
134 static fcache *fcache_tab;
135 static const size_t fcache_tab_size = 16;
136 static const size_t fcache_buffer_size = 4 * 1024;
137 static const size_t fcache_line_record_size = 100;
138
139 /* Expand the source location LOC into a human readable location. If
140 LOC resolves to a builtin location, the file name of the readable
141 location is set to the string "<built-in>". If EXPANSION_POINT_P is
142 TRUE and LOC is virtual, then it is resolved to the expansion
143 point of the involved macro. Otherwise, it is resolved to the
144 spelling location of the token.
145
146 When resolving to the spelling location of the token, if the
147 resulting location is for a built-in location (that is, it has no
148 associated line/column) in the context of a macro expansion, the
149 returned location is the first one (while unwinding the macro
150 location towards its expansion point) that is in real source
151 code.
152
153 ASPECT controls which part of the location to use. */
154
155 static expanded_location
156 expand_location_1 (location_t loc,
157 bool expansion_point_p,
158 enum location_aspect aspect)
159 {
160 expanded_location xloc;
161 const line_map_ordinary *map;
162 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
163 tree block = NULL;
164
165 if (IS_ADHOC_LOC (loc))
166 {
167 block = LOCATION_BLOCK (loc);
168 loc = LOCATION_LOCUS (loc);
169 }
170
171 memset (&xloc, 0, sizeof (xloc));
172
173 if (loc >= RESERVED_LOCATION_COUNT)
174 {
175 if (!expansion_point_p)
176 {
177 /* We want to resolve LOC to its spelling location.
178
179 But if that spelling location is a reserved location that
180 appears in the context of a macro expansion (like for a
181 location for a built-in token), let's consider the first
182 location (toward the expansion point) that is not reserved;
183 that is, the first location that is in real source code. */
184 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
185 loc, NULL);
186 lrk = LRK_SPELLING_LOCATION;
187 }
188 loc = linemap_resolve_location (line_table, loc, lrk, &map);
189
190 /* loc is now either in an ordinary map, or is a reserved location.
191 If it is a compound location, the caret is in a spelling location,
192 but the start/finish might still be a virtual location.
193 Depending of what the caller asked for, we may need to recurse
194 one level in order to resolve any virtual locations in the
195 end-points. */
196 switch (aspect)
197 {
198 default:
199 gcc_unreachable ();
200 /* Fall through. */
201 case LOCATION_ASPECT_CARET:
202 break;
203 case LOCATION_ASPECT_START:
204 {
205 location_t start = get_start (loc);
206 if (start != loc)
207 return expand_location_1 (start, expansion_point_p, aspect);
208 }
209 break;
210 case LOCATION_ASPECT_FINISH:
211 {
212 location_t finish = get_finish (loc);
213 if (finish != loc)
214 return expand_location_1 (finish, expansion_point_p, aspect);
215 }
216 break;
217 }
218 xloc = linemap_expand_location (line_table, map, loc);
219 }
220
221 xloc.data = block;
222 if (loc <= BUILTINS_LOCATION)
223 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
224
225 return xloc;
226 }
227
228 /* Initialize the set of cache used for files accessed by caret
229 diagnostic. */
230
231 static void
232 diagnostic_file_cache_init (void)
233 {
234 if (fcache_tab == NULL)
235 fcache_tab = new fcache[fcache_tab_size];
236 }
237
238 /* Free the resources used by the set of cache used for files accessed
239 by caret diagnostic. */
240
241 void
242 diagnostic_file_cache_fini (void)
243 {
244 if (fcache_tab)
245 {
246 delete [] (fcache_tab);
247 fcache_tab = NULL;
248 }
249 }
250
251 /* Return the total lines number that have been read so far by the
252 line map (in the preprocessor) so far. For languages like C++ that
253 entirely preprocess the input file before starting to parse, this
254 equals the actual number of lines of the file. */
255
256 static size_t
257 total_lines_num (const char *file_path)
258 {
259 size_t r = 0;
260 location_t l = 0;
261 if (linemap_get_file_highest_location (line_table, file_path, &l))
262 {
263 gcc_assert (l >= RESERVED_LOCATION_COUNT);
264 expanded_location xloc = expand_location (l);
265 r = xloc.line;
266 }
267 return r;
268 }
269
270 /* Lookup the cache used for the content of a given file accessed by
271 caret diagnostic. Return the found cached file, or NULL if no
272 cached file was found. */
273
274 static fcache*
275 lookup_file_in_cache_tab (const char *file_path)
276 {
277 if (file_path == NULL)
278 return NULL;
279
280 diagnostic_file_cache_init ();
281
282 /* This will contain the found cached file. */
283 fcache *r = NULL;
284 for (unsigned i = 0; i < fcache_tab_size; ++i)
285 {
286 fcache *c = &fcache_tab[i];
287 if (c->file_path && !strcmp (c->file_path, file_path))
288 {
289 ++c->use_count;
290 r = c;
291 }
292 }
293
294 if (r)
295 ++r->use_count;
296
297 return r;
298 }
299
300 /* Purge any mention of FILENAME from the cache of files used for
301 printing source code. For use in selftests when working
302 with tempfiles. */
303
304 void
305 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
306 {
307 gcc_assert (file_path);
308
309 fcache *r = lookup_file_in_cache_tab (file_path);
310 if (!r)
311 /* Not found. */
312 return;
313
314 r->file_path = NULL;
315 if (r->fp)
316 fclose (r->fp);
317 r->fp = NULL;
318 r->nb_read = 0;
319 r->line_start_idx = 0;
320 r->line_num = 0;
321 r->line_record.truncate (0);
322 r->use_count = 0;
323 r->total_lines = 0;
324 r->missing_trailing_newline = true;
325 }
326
327 /* Return the file cache that has been less used, recently, or the
328 first empty one. If HIGHEST_USE_COUNT is non-null,
329 *HIGHEST_USE_COUNT is set to the highest use count of the entries
330 in the cache table. */
331
332 static fcache*
333 evicted_cache_tab_entry (unsigned *highest_use_count)
334 {
335 diagnostic_file_cache_init ();
336
337 fcache *to_evict = &fcache_tab[0];
338 unsigned huc = to_evict->use_count;
339 for (unsigned i = 1; i < fcache_tab_size; ++i)
340 {
341 fcache *c = &fcache_tab[i];
342 bool c_is_empty = (c->file_path == NULL);
343
344 if (c->use_count < to_evict->use_count
345 || (to_evict->file_path && c_is_empty))
346 /* We evict C because it's either an entry with a lower use
347 count or one that is empty. */
348 to_evict = c;
349
350 if (huc < c->use_count)
351 huc = c->use_count;
352
353 if (c_is_empty)
354 /* We've reached the end of the cache; subsequent elements are
355 all empty. */
356 break;
357 }
358
359 if (highest_use_count)
360 *highest_use_count = huc;
361
362 return to_evict;
363 }
364
365 /* Create the cache used for the content of a given file to be
366 accessed by caret diagnostic. This cache is added to an array of
367 cache and can be retrieved by lookup_file_in_cache_tab. This
368 function returns the created cache. Note that only the last
369 fcache_tab_size files are cached. */
370
371 static fcache*
372 add_file_to_cache_tab (const char *file_path)
373 {
374
375 FILE *fp = fopen (file_path, "r");
376 if (fp == NULL)
377 return NULL;
378
379 unsigned highest_use_count = 0;
380 fcache *r = evicted_cache_tab_entry (&highest_use_count);
381 r->file_path = file_path;
382 if (r->fp)
383 fclose (r->fp);
384 r->fp = fp;
385 r->nb_read = 0;
386 r->line_start_idx = 0;
387 r->line_num = 0;
388 r->line_record.truncate (0);
389 /* Ensure that this cache entry doesn't get evicted next time
390 add_file_to_cache_tab is called. */
391 r->use_count = ++highest_use_count;
392 r->total_lines = total_lines_num (file_path);
393 r->missing_trailing_newline = true;
394
395 return r;
396 }
397
398 /* Lookup the cache used for the content of a given file accessed by
399 caret diagnostic. If no cached file was found, create a new cache
400 for this file, add it to the array of cached file and return
401 it. */
402
403 static fcache*
404 lookup_or_add_file_to_cache_tab (const char *file_path)
405 {
406 fcache *r = lookup_file_in_cache_tab (file_path);
407 if (r == NULL)
408 r = add_file_to_cache_tab (file_path);
409 return r;
410 }
411
412 /* Default constructor for a cache of file used by caret
413 diagnostic. */
414
415 fcache::fcache ()
416 : use_count (0), file_path (NULL), fp (NULL), data (0),
417 size (0), nb_read (0), line_start_idx (0), line_num (0),
418 total_lines (0), missing_trailing_newline (true)
419 {
420 line_record.create (0);
421 }
422
423 /* Destructor for a cache of file used by caret diagnostic. */
424
425 fcache::~fcache ()
426 {
427 if (fp)
428 {
429 fclose (fp);
430 fp = NULL;
431 }
432 if (data)
433 {
434 XDELETEVEC (data);
435 data = 0;
436 }
437 line_record.release ();
438 }
439
440 /* Returns TRUE iff the cache would need to be filled with data coming
441 from the file. That is, either the cache is empty or full or the
442 current line is empty. Note that if the cache is full, it would
443 need to be extended and filled again. */
444
445 static bool
446 needs_read (fcache *c)
447 {
448 return (c->nb_read == 0
449 || c->nb_read == c->size
450 || (c->line_start_idx >= c->nb_read - 1));
451 }
452
453 /* Return TRUE iff the cache is full and thus needs to be
454 extended. */
455
456 static bool
457 needs_grow (fcache *c)
458 {
459 return c->nb_read == c->size;
460 }
461
462 /* Grow the cache if it needs to be extended. */
463
464 static void
465 maybe_grow (fcache *c)
466 {
467 if (!needs_grow (c))
468 return;
469
470 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
471 c->data = XRESIZEVEC (char, c->data, size);
472 c->size = size;
473 }
474
475 /* Read more data into the cache. Extends the cache if need be.
476 Returns TRUE iff new data could be read. */
477
478 static bool
479 read_data (fcache *c)
480 {
481 if (feof (c->fp) || ferror (c->fp))
482 return false;
483
484 maybe_grow (c);
485
486 char * from = c->data + c->nb_read;
487 size_t to_read = c->size - c->nb_read;
488 size_t nb_read = fread (from, 1, to_read, c->fp);
489
490 if (ferror (c->fp))
491 return false;
492
493 c->nb_read += nb_read;
494 return !!nb_read;
495 }
496
497 /* Read new data iff the cache needs to be filled with more data
498 coming from the file FP. Return TRUE iff the cache was filled with
499 mode data. */
500
501 static bool
502 maybe_read_data (fcache *c)
503 {
504 if (!needs_read (c))
505 return false;
506 return read_data (c);
507 }
508
509 /* Read a new line from file FP, using C as a cache for the data
510 coming from the file. Upon successful completion, *LINE is set to
511 the beginning of the line found. *LINE points directly in the
512 line cache and is only valid until the next call of get_next_line.
513 *LINE_LEN is set to the length of the line. Note that the line
514 does not contain any terminal delimiter. This function returns
515 true if some data was read or process from the cache, false
516 otherwise. Note that subsequent calls to get_next_line might
517 make the content of *LINE invalid. */
518
519 static bool
520 get_next_line (fcache *c, char **line, ssize_t *line_len)
521 {
522 /* Fill the cache with data to process. */
523 maybe_read_data (c);
524
525 size_t remaining_size = c->nb_read - c->line_start_idx;
526 if (remaining_size == 0)
527 /* There is no more data to process. */
528 return false;
529
530 char *line_start = c->data + c->line_start_idx;
531
532 char *next_line_start = NULL;
533 size_t len = 0;
534 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
535 if (line_end == NULL)
536 {
537 /* We haven't found the end-of-line delimiter in the cache.
538 Fill the cache with more data from the file and look for the
539 '\n'. */
540 while (maybe_read_data (c))
541 {
542 line_start = c->data + c->line_start_idx;
543 remaining_size = c->nb_read - c->line_start_idx;
544 line_end = (char *) memchr (line_start, '\n', remaining_size);
545 if (line_end != NULL)
546 {
547 next_line_start = line_end + 1;
548 break;
549 }
550 }
551 if (line_end == NULL)
552 {
553 /* We've loadded all the file into the cache and still no
554 '\n'. Let's say the line ends up at one byte passed the
555 end of the file. This is to stay consistent with the case
556 of when the line ends up with a '\n' and line_end points to
557 that terminal '\n'. That consistency is useful below in
558 the len calculation. */
559 line_end = c->data + c->nb_read ;
560 c->missing_trailing_newline = true;
561 }
562 else
563 c->missing_trailing_newline = false;
564 }
565 else
566 {
567 next_line_start = line_end + 1;
568 c->missing_trailing_newline = false;
569 }
570
571 if (ferror (c->fp))
572 return false;
573
574 /* At this point, we've found the end of the of line. It either
575 points to the '\n' or to one byte after the last byte of the
576 file. */
577 gcc_assert (line_end != NULL);
578
579 len = line_end - line_start;
580
581 if (c->line_start_idx < c->nb_read)
582 *line = line_start;
583
584 ++c->line_num;
585
586 /* Before we update our line record, make sure the hint about the
587 total number of lines of the file is correct. If it's not, then
588 we give up recording line boundaries from now on. */
589 bool update_line_record = true;
590 if (c->line_num > c->total_lines)
591 update_line_record = false;
592
593 /* Now update our line record so that re-reading lines from the
594 before c->line_start_idx is faster. */
595 if (update_line_record
596 && c->line_record.length () < fcache_line_record_size)
597 {
598 /* If the file lines fits in the line record, we just record all
599 its lines ...*/
600 if (c->total_lines <= fcache_line_record_size
601 && c->line_num > c->line_record.length ())
602 c->line_record.safe_push (fcache::line_info (c->line_num,
603 c->line_start_idx,
604 line_end - c->data));
605 else if (c->total_lines > fcache_line_record_size)
606 {
607 /* ... otherwise, we just scale total_lines down to
608 (fcache_line_record_size lines. */
609 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
610 if (c->line_record.length () == 0
611 || n >= c->line_record.length ())
612 c->line_record.safe_push (fcache::line_info (c->line_num,
613 c->line_start_idx,
614 line_end - c->data));
615 }
616 }
617
618 /* Update c->line_start_idx so that it points to the next line to be
619 read. */
620 if (next_line_start)
621 c->line_start_idx = next_line_start - c->data;
622 else
623 /* We didn't find any terminal '\n'. Let's consider that the end
624 of line is the end of the data in the cache. The next
625 invocation of get_next_line will either read more data from the
626 underlying file or return false early because we've reached the
627 end of the file. */
628 c->line_start_idx = c->nb_read;
629
630 *line_len = len;
631
632 return true;
633 }
634
635 /* Consume the next bytes coming from the cache (or from its
636 underlying file if there are remaining unread bytes in the file)
637 until we reach the next end-of-line (or end-of-file). There is no
638 copying from the cache involved. Return TRUE upon successful
639 completion. */
640
641 static bool
642 goto_next_line (fcache *cache)
643 {
644 char *l;
645 ssize_t len;
646
647 return get_next_line (cache, &l, &len);
648 }
649
650 /* Read an arbitrary line number LINE_NUM from the file cached in C.
651 If the line was read successfully, *LINE points to the beginning
652 of the line in the file cache and *LINE_LEN is the length of the
653 line. *LINE is not nul-terminated, but may contain zero bytes.
654 *LINE is only valid until the next call of read_line_num.
655 This function returns bool if a line was read. */
656
657 static bool
658 read_line_num (fcache *c, size_t line_num,
659 char **line, ssize_t *line_len)
660 {
661 gcc_assert (line_num > 0);
662
663 if (line_num <= c->line_num)
664 {
665 /* We've been asked to read lines that are before c->line_num.
666 So lets use our line record (if it's not empty) to try to
667 avoid re-reading the file from the beginning again. */
668
669 if (c->line_record.is_empty ())
670 {
671 c->line_start_idx = 0;
672 c->line_num = 0;
673 }
674 else
675 {
676 fcache::line_info *i = NULL;
677 if (c->total_lines <= fcache_line_record_size)
678 {
679 /* In languages where the input file is not totally
680 preprocessed up front, the c->total_lines hint
681 can be smaller than the number of lines of the
682 file. In that case, only the first
683 c->total_lines have been recorded.
684
685 Otherwise, the first c->total_lines we've read have
686 their start/end recorded here. */
687 i = (line_num <= c->total_lines)
688 ? &c->line_record[line_num - 1]
689 : &c->line_record[c->total_lines - 1];
690 gcc_assert (i->line_num <= line_num);
691 }
692 else
693 {
694 /* So the file had more lines than our line record
695 size. Thus the number of lines we've recorded has
696 been scaled down to fcache_line_reacord_size. Let's
697 pick the start/end of the recorded line that is
698 closest to line_num. */
699 size_t n = (line_num <= c->total_lines)
700 ? line_num * fcache_line_record_size / c->total_lines
701 : c ->line_record.length () - 1;
702 if (n < c->line_record.length ())
703 {
704 i = &c->line_record[n];
705 gcc_assert (i->line_num <= line_num);
706 }
707 }
708
709 if (i && i->line_num == line_num)
710 {
711 /* We have the start/end of the line. */
712 *line = c->data + i->start_pos;
713 *line_len = i->end_pos - i->start_pos;
714 return true;
715 }
716
717 if (i)
718 {
719 c->line_start_idx = i->start_pos;
720 c->line_num = i->line_num - 1;
721 }
722 else
723 {
724 c->line_start_idx = 0;
725 c->line_num = 0;
726 }
727 }
728 }
729
730 /* Let's walk from line c->line_num up to line_num - 1, without
731 copying any line. */
732 while (c->line_num < line_num - 1)
733 if (!goto_next_line (c))
734 return false;
735
736 /* The line we want is the next one. Let's read and copy it back to
737 the caller. */
738 return get_next_line (c, line, line_len);
739 }
740
741 /* Return the physical source line that corresponds to FILE_PATH/LINE.
742 The line is not nul-terminated. The returned pointer is only
743 valid until the next call of location_get_source_line.
744 Note that the line can contain several null characters,
745 so the returned value's length has the actual length of the line.
746 If the function fails, a NULL char_span is returned. */
747
748 char_span
749 location_get_source_line (const char *file_path, int line)
750 {
751 char *buffer = NULL;
752 ssize_t len;
753
754 if (line == 0)
755 return char_span (NULL, 0);
756
757 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
758 if (c == NULL)
759 return char_span (NULL, 0);
760
761 bool read = read_line_num (c, line, &buffer, &len);
762 if (!read)
763 return char_span (NULL, 0);
764
765 return char_span (buffer, len);
766 }
767
768 /* Determine if FILE_PATH missing a trailing newline on its final line.
769 Only valid to call once all of the file has been loaded, by
770 requesting a line number beyond the end of the file. */
771
772 bool
773 location_missing_trailing_newline (const char *file_path)
774 {
775 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
776 if (c == NULL)
777 return false;
778
779 return c->missing_trailing_newline;
780 }
781
782 /* Test if the location originates from the spelling location of a
783 builtin-tokens. That is, return TRUE if LOC is a (possibly
784 virtual) location of a built-in token that appears in the expansion
785 list of a macro. Please note that this function also works on
786 tokens that result from built-in tokens. For instance, the
787 function would return true if passed a token "4" that is the result
788 of the expansion of the built-in __LINE__ macro. */
789 bool
790 is_location_from_builtin_token (location_t loc)
791 {
792 const line_map_ordinary *map = NULL;
793 loc = linemap_resolve_location (line_table, loc,
794 LRK_SPELLING_LOCATION, &map);
795 return loc == BUILTINS_LOCATION;
796 }
797
798 /* Expand the source location LOC into a human readable location. If
799 LOC is virtual, it resolves to the expansion point of the involved
800 macro. If LOC resolves to a builtin location, the file name of the
801 readable location is set to the string "<built-in>". */
802
803 expanded_location
804 expand_location (location_t loc)
805 {
806 return expand_location_1 (loc, /*expansion_point_p=*/true,
807 LOCATION_ASPECT_CARET);
808 }
809
810 /* Expand the source location LOC into a human readable location. If
811 LOC is virtual, it resolves to the expansion location of the
812 relevant macro. If LOC resolves to a builtin location, the file
813 name of the readable location is set to the string
814 "<built-in>". */
815
816 expanded_location
817 expand_location_to_spelling_point (location_t loc,
818 enum location_aspect aspect)
819 {
820 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
821 }
822
823 /* The rich_location class within libcpp requires a way to expand
824 location_t instances, and relies on the client code
825 providing a symbol named
826 linemap_client_expand_location_to_spelling_point
827 to do this.
828
829 This is the implementation for libcommon.a (all host binaries),
830 which simply calls into expand_location_1. */
831
832 expanded_location
833 linemap_client_expand_location_to_spelling_point (location_t loc,
834 enum location_aspect aspect)
835 {
836 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
837 }
838
839
840 /* If LOCATION is in a system header and if it is a virtual location for
841 a token coming from the expansion of a macro, unwind it to the
842 location of the expansion point of the macro. Otherwise, just return
843 LOCATION.
844
845 This is used for instance when we want to emit diagnostics about a
846 token that may be located in a macro that is itself defined in a
847 system header, for example, for the NULL macro. In such a case, if
848 LOCATION were passed directly to diagnostic functions such as
849 warning_at, the diagnostic would be suppressed (unless
850 -Wsystem-headers). */
851
852 location_t
853 expansion_point_location_if_in_system_header (location_t location)
854 {
855 if (in_system_header_at (location))
856 location = linemap_resolve_location (line_table, location,
857 LRK_MACRO_EXPANSION_POINT,
858 NULL);
859 return location;
860 }
861
862 /* If LOCATION is a virtual location for a token coming from the expansion
863 of a macro, unwind to the location of the expansion point of the macro. */
864
865 location_t
866 expansion_point_location (location_t location)
867 {
868 return linemap_resolve_location (line_table, location,
869 LRK_MACRO_EXPANSION_POINT, NULL);
870 }
871
872 /* Construct a location with caret at CARET, ranging from START to
873 finish e.g.
874
875 11111111112
876 12345678901234567890
877 522
878 523 return foo + bar;
879 ~~~~^~~~~
880 524
881
882 The location's caret is at the "+", line 523 column 15, but starts
883 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
884 of "bar" at column 19. */
885
886 location_t
887 make_location (location_t caret, location_t start, location_t finish)
888 {
889 location_t pure_loc = get_pure_location (caret);
890 source_range src_range;
891 src_range.m_start = get_start (start);
892 src_range.m_finish = get_finish (finish);
893 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
894 pure_loc,
895 src_range,
896 NULL);
897 return combined_loc;
898 }
899
900 /* Same as above, but taking a source range rather than two locations. */
901
902 location_t
903 make_location (location_t caret, source_range src_range)
904 {
905 location_t pure_loc = get_pure_location (caret);
906 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
907 }
908
909 /* Dump statistics to stderr about the memory usage of the line_table
910 set of line maps. This also displays some statistics about macro
911 expansion. */
912
913 void
914 dump_line_table_statistics (void)
915 {
916 struct linemap_stats s;
917 long total_used_map_size,
918 macro_maps_size,
919 total_allocated_map_size;
920
921 memset (&s, 0, sizeof (s));
922
923 linemap_get_statistics (line_table, &s);
924
925 macro_maps_size = s.macro_maps_used_size
926 + s.macro_maps_locations_size;
927
928 total_allocated_map_size = s.ordinary_maps_allocated_size
929 + s.macro_maps_allocated_size
930 + s.macro_maps_locations_size;
931
932 total_used_map_size = s.ordinary_maps_used_size
933 + s.macro_maps_used_size
934 + s.macro_maps_locations_size;
935
936 fprintf (stderr, "Number of expanded macros: %5ld\n",
937 s.num_expanded_macros);
938 if (s.num_expanded_macros != 0)
939 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
940 s.num_macro_tokens / s.num_expanded_macros);
941 fprintf (stderr,
942 "\nLine Table allocations during the "
943 "compilation process\n");
944 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
945 SIZE_AMOUNT (s.num_ordinary_maps_used));
946 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
947 SIZE_AMOUNT (s.ordinary_maps_used_size));
948 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
949 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
950 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
951 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
952 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
953 SIZE_AMOUNT (s.num_macro_maps_used));
954 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
955 SIZE_AMOUNT (s.macro_maps_used_size));
956 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
957 SIZE_AMOUNT (s.macro_maps_locations_size));
958 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
959 SIZE_AMOUNT (macro_maps_size));
960 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
961 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
962 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
963 SIZE_AMOUNT (total_allocated_map_size));
964 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
965 SIZE_AMOUNT (total_used_map_size));
966 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
967 SIZE_AMOUNT (s.adhoc_table_size));
968 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
969 SIZE_AMOUNT (s.adhoc_table_entries_used));
970 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
971 SIZE_AMOUNT (line_table->num_optimized_ranges));
972 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
973 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
974
975 fprintf (stderr, "\n");
976 }
977
978 /* Get location one beyond the final location in ordinary map IDX. */
979
980 static location_t
981 get_end_location (struct line_maps *set, unsigned int idx)
982 {
983 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
984 return set->highest_location;
985
986 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
987 return MAP_START_LOCATION (next_map);
988 }
989
990 /* Helper function for write_digit_row. */
991
992 static void
993 write_digit (FILE *stream, int digit)
994 {
995 fputc ('0' + (digit % 10), stream);
996 }
997
998 /* Helper function for dump_location_info.
999 Write a row of numbers to STREAM, numbering a source line,
1000 giving the units, tens, hundreds etc of the column number. */
1001
1002 static void
1003 write_digit_row (FILE *stream, int indent,
1004 const line_map_ordinary *map,
1005 location_t loc, int max_col, int divisor)
1006 {
1007 fprintf (stream, "%*c", indent, ' ');
1008 fprintf (stream, "|");
1009 for (int column = 1; column < max_col; column++)
1010 {
1011 location_t column_loc = loc + (column << map->m_range_bits);
1012 write_digit (stream, column_loc / divisor);
1013 }
1014 fprintf (stream, "\n");
1015 }
1016
1017 /* Write a half-closed (START) / half-open (END) interval of
1018 location_t to STREAM. */
1019
1020 static void
1021 dump_location_range (FILE *stream,
1022 location_t start, location_t end)
1023 {
1024 fprintf (stream,
1025 " location_t interval: %u <= loc < %u\n",
1026 start, end);
1027 }
1028
1029 /* Write a labelled description of a half-closed (START) / half-open (END)
1030 interval of location_t to STREAM. */
1031
1032 static void
1033 dump_labelled_location_range (FILE *stream,
1034 const char *name,
1035 location_t start, location_t end)
1036 {
1037 fprintf (stream, "%s\n", name);
1038 dump_location_range (stream, start, end);
1039 fprintf (stream, "\n");
1040 }
1041
1042 /* Write a visualization of the locations in the line_table to STREAM. */
1043
1044 void
1045 dump_location_info (FILE *stream)
1046 {
1047 /* Visualize the reserved locations. */
1048 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1049 0, RESERVED_LOCATION_COUNT);
1050
1051 /* Visualize the ordinary line_map instances, rendering the sources. */
1052 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1053 {
1054 location_t end_location = get_end_location (line_table, idx);
1055 /* half-closed: doesn't include this one. */
1056
1057 const line_map_ordinary *map
1058 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1059 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1060 dump_location_range (stream,
1061 MAP_START_LOCATION (map), end_location);
1062 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1063 fprintf (stream, " starting at line: %i\n",
1064 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1065 fprintf (stream, " column and range bits: %i\n",
1066 map->m_column_and_range_bits);
1067 fprintf (stream, " column bits: %i\n",
1068 map->m_column_and_range_bits - map->m_range_bits);
1069 fprintf (stream, " range bits: %i\n",
1070 map->m_range_bits);
1071 const char * reason;
1072 switch (map->reason) {
1073 case LC_ENTER:
1074 reason = "LC_ENTER";
1075 break;
1076 case LC_LEAVE:
1077 reason = "LC_LEAVE";
1078 break;
1079 case LC_RENAME:
1080 reason = "LC_RENAME";
1081 break;
1082 case LC_RENAME_VERBATIM:
1083 reason = "LC_RENAME_VERBATIM";
1084 break;
1085 case LC_ENTER_MACRO:
1086 reason = "LC_RENAME_MACRO";
1087 break;
1088 default:
1089 reason = "Unknown";
1090 }
1091 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1092
1093 const line_map_ordinary *includer_map
1094 = linemap_included_from_linemap (line_table, map);
1095 fprintf (stream, " included from location: %d",
1096 linemap_included_from (map));
1097 if (includer_map) {
1098 fprintf (stream, " (in ordinary map %d)",
1099 int (includer_map - line_table->info_ordinary.maps));
1100 }
1101 fprintf (stream, "\n");
1102
1103 /* Render the span of source lines that this "map" covers. */
1104 for (location_t loc = MAP_START_LOCATION (map);
1105 loc < end_location;
1106 loc += (1 << map->m_range_bits) )
1107 {
1108 gcc_assert (pure_location_p (line_table, loc) );
1109
1110 expanded_location exploc
1111 = linemap_expand_location (line_table, map, loc);
1112
1113 if (exploc.column == 0)
1114 {
1115 /* Beginning of a new source line: draw the line. */
1116
1117 char_span line_text = location_get_source_line (exploc.file,
1118 exploc.line);
1119 if (!line_text)
1120 break;
1121 fprintf (stream,
1122 "%s:%3i|loc:%5i|%.*s\n",
1123 exploc.file, exploc.line,
1124 loc,
1125 (int)line_text.length (), line_text.get_buffer ());
1126
1127 /* "loc" is at column 0, which means "the whole line".
1128 Render the locations *within* the line, by underlining
1129 it, showing the location_t numeric values
1130 at each column. */
1131 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1132 if (max_col > line_text.length ())
1133 max_col = line_text.length () + 1;
1134
1135 int len_lnum = num_digits (exploc.line);
1136 if (len_lnum < 3)
1137 len_lnum = 3;
1138 int len_loc = num_digits (loc);
1139 if (len_loc < 5)
1140 len_loc = 5;
1141
1142 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1143
1144 /* Thousands. */
1145 if (end_location > 999)
1146 write_digit_row (stream, indent, map, loc, max_col, 1000);
1147
1148 /* Hundreds. */
1149 if (end_location > 99)
1150 write_digit_row (stream, indent, map, loc, max_col, 100);
1151
1152 /* Tens. */
1153 write_digit_row (stream, indent, map, loc, max_col, 10);
1154
1155 /* Units. */
1156 write_digit_row (stream, indent, map, loc, max_col, 1);
1157 }
1158 }
1159 fprintf (stream, "\n");
1160 }
1161
1162 /* Visualize unallocated values. */
1163 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1164 line_table->highest_location,
1165 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1166
1167 /* Visualize the macro line_map instances, rendering the sources. */
1168 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1169 {
1170 /* Each macro map that is allocated owns location_t values
1171 that are *lower* that the one before them.
1172 Hence it's meaningful to view them either in order of ascending
1173 source locations, or in order of ascending macro map index. */
1174 const bool ascending_location_ts = true;
1175 unsigned int idx = (ascending_location_ts
1176 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1177 : i);
1178 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1179 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1180 idx,
1181 linemap_map_get_macro_name (map),
1182 MACRO_MAP_NUM_MACRO_TOKENS (map));
1183 dump_location_range (stream,
1184 map->start_location,
1185 (map->start_location
1186 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1187 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1188 "expansion point is location %i",
1189 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1190 fprintf (stream, " map->start_location: %u\n",
1191 map->start_location);
1192
1193 fprintf (stream, " macro_locations:\n");
1194 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1195 {
1196 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1197 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1198
1199 /* linemap_add_macro_token encodes token numbers in an expansion
1200 by putting them after MAP_START_LOCATION. */
1201
1202 /* I'm typically seeing 4 uninitialized entries at the end of
1203 0xafafafaf.
1204 This appears to be due to macro.c:replace_args
1205 adding 2 extra args for padding tokens; presumably there may
1206 be a leading and/or trailing padding token injected,
1207 each for 2 more location slots.
1208 This would explain there being up to 4 location_ts slots
1209 that may be uninitialized. */
1210
1211 fprintf (stream, " %u: %u, %u\n",
1212 i,
1213 x,
1214 y);
1215 if (x == y)
1216 {
1217 if (x < MAP_START_LOCATION (map))
1218 inform (x, "token %u has %<x-location == y-location == %u%>",
1219 i, x);
1220 else
1221 fprintf (stream,
1222 "x-location == y-location == %u encodes token # %u\n",
1223 x, x - MAP_START_LOCATION (map));
1224 }
1225 else
1226 {
1227 inform (x, "token %u has %<x-location == %u%>", i, x);
1228 inform (x, "token %u has %<y-location == %u%>", i, y);
1229 }
1230 }
1231 fprintf (stream, "\n");
1232 }
1233
1234 /* It appears that MAX_LOCATION_T itself is never assigned to a
1235 macro map, presumably due to an off-by-one error somewhere
1236 between the logic in linemap_enter_macro and
1237 LINEMAPS_MACRO_LOWEST_LOCATION. */
1238 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1239 MAX_LOCATION_T,
1240 MAX_LOCATION_T + 1);
1241
1242 /* Visualize ad-hoc values. */
1243 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1244 MAX_LOCATION_T + 1, UINT_MAX);
1245 }
1246
1247 /* string_concat's constructor. */
1248
1249 string_concat::string_concat (int num, location_t *locs)
1250 : m_num (num)
1251 {
1252 m_locs = ggc_vec_alloc <location_t> (num);
1253 for (int i = 0; i < num; i++)
1254 m_locs[i] = locs[i];
1255 }
1256
1257 /* string_concat_db's constructor. */
1258
1259 string_concat_db::string_concat_db ()
1260 {
1261 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1262 }
1263
1264 /* Record that a string concatenation occurred, covering NUM
1265 string literal tokens. LOCS is an array of size NUM, containing the
1266 locations of the tokens. A copy of LOCS is taken. */
1267
1268 void
1269 string_concat_db::record_string_concatenation (int num, location_t *locs)
1270 {
1271 gcc_assert (num > 1);
1272 gcc_assert (locs);
1273
1274 location_t key_loc = get_key_loc (locs[0]);
1275
1276 string_concat *concat
1277 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1278 m_table->put (key_loc, concat);
1279 }
1280
1281 /* Determine if LOC was the location of the the initial token of a
1282 concatenation of string literal tokens.
1283 If so, *OUT_NUM is written to with the number of tokens, and
1284 *OUT_LOCS with the location of an array of locations of the
1285 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1286 storage owned by the string_concat_db.
1287 Otherwise, return false. */
1288
1289 bool
1290 string_concat_db::get_string_concatenation (location_t loc,
1291 int *out_num,
1292 location_t **out_locs)
1293 {
1294 gcc_assert (out_num);
1295 gcc_assert (out_locs);
1296
1297 location_t key_loc = get_key_loc (loc);
1298
1299 string_concat **concat = m_table->get (key_loc);
1300 if (!concat)
1301 return false;
1302
1303 *out_num = (*concat)->m_num;
1304 *out_locs =(*concat)->m_locs;
1305 return true;
1306 }
1307
1308 /* Internal function. Canonicalize LOC into a form suitable for
1309 use as a key within the database, stripping away macro expansion,
1310 ad-hoc information, and range information, using the location of
1311 the start of LOC within an ordinary linemap. */
1312
1313 location_t
1314 string_concat_db::get_key_loc (location_t loc)
1315 {
1316 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1317 NULL);
1318
1319 loc = get_range_from_loc (line_table, loc).m_start;
1320
1321 return loc;
1322 }
1323
1324 /* Helper class for use within get_substring_ranges_for_loc.
1325 An vec of cpp_string with responsibility for releasing all of the
1326 str->text for each str in the vector. */
1327
1328 class auto_cpp_string_vec : public auto_vec <cpp_string>
1329 {
1330 public:
1331 auto_cpp_string_vec (int alloc)
1332 : auto_vec <cpp_string> (alloc) {}
1333
1334 ~auto_cpp_string_vec ()
1335 {
1336 /* Clean up the copies within this vec. */
1337 int i;
1338 cpp_string *str;
1339 FOR_EACH_VEC_ELT (*this, i, str)
1340 free (const_cast <unsigned char *> (str->text));
1341 }
1342 };
1343
1344 /* Attempt to populate RANGES with source location information on the
1345 individual characters within the string literal found at STRLOC.
1346 If CONCATS is non-NULL, then any string literals that the token at
1347 STRLOC was concatenated with are also added to RANGES.
1348
1349 Return NULL if successful, or an error message if any errors occurred (in
1350 which case RANGES may be only partially populated and should not
1351 be used).
1352
1353 This is implemented by re-parsing the relevant source line(s). */
1354
1355 static const char *
1356 get_substring_ranges_for_loc (cpp_reader *pfile,
1357 string_concat_db *concats,
1358 location_t strloc,
1359 enum cpp_ttype type,
1360 cpp_substring_ranges &ranges)
1361 {
1362 gcc_assert (pfile);
1363
1364 if (strloc == UNKNOWN_LOCATION)
1365 return "unknown location";
1366
1367 /* Reparsing the strings requires accurate location information.
1368 If -ftrack-macro-expansion has been overridden from its default
1369 of 2, then we might have a location of a macro expansion point,
1370 rather than the location of the literal itself.
1371 Avoid this by requiring that we have full macro expansion tracking
1372 for substring locations to be available. */
1373 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1374 return "track_macro_expansion != 2";
1375
1376 /* If #line or # 44 "file"-style directives are present, then there's
1377 no guarantee that the line numbers we have can be used to locate
1378 the strings. For example, we might have a .i file with # directives
1379 pointing back to lines within a .c file, but the .c file might
1380 have been edited since the .i file was created.
1381 In such a case, the safest course is to disable on-demand substring
1382 locations. */
1383 if (line_table->seen_line_directive)
1384 return "seen line directive";
1385
1386 /* If string concatenation has occurred at STRLOC, get the locations
1387 of all of the literal tokens making up the compound string.
1388 Otherwise, just use STRLOC. */
1389 int num_locs = 1;
1390 location_t *strlocs = &strloc;
1391 if (concats)
1392 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1393
1394 auto_cpp_string_vec strs (num_locs);
1395 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1396 for (int i = 0; i < num_locs; i++)
1397 {
1398 /* Get range of strloc. We will use it to locate the start and finish
1399 of the literal token within the line. */
1400 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1401
1402 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1403 {
1404 /* If the string token was within a macro expansion, then we can
1405 cope with it for the simple case where we have a single token.
1406 Otherwise, bail out. */
1407 if (src_range.m_start != src_range.m_finish)
1408 return "macro expansion";
1409 }
1410 else
1411 {
1412 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1413 /* If so, we can't reliably determine where the token started within
1414 its line. */
1415 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1416
1417 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1418 /* If so, we can't reliably determine where the token finished
1419 within its line. */
1420 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1421 }
1422
1423 expanded_location start
1424 = expand_location_to_spelling_point (src_range.m_start,
1425 LOCATION_ASPECT_START);
1426 expanded_location finish
1427 = expand_location_to_spelling_point (src_range.m_finish,
1428 LOCATION_ASPECT_FINISH);
1429 if (start.file != finish.file)
1430 return "range endpoints are in different files";
1431 if (start.line != finish.line)
1432 return "range endpoints are on different lines";
1433 if (start.column > finish.column)
1434 return "range endpoints are reversed";
1435
1436 char_span line = location_get_source_line (start.file, start.line);
1437 if (!line)
1438 return "unable to read source line";
1439
1440 /* Determine the location of the literal (including quotes
1441 and leading prefix chars, such as the 'u' in a u""
1442 token). */
1443 size_t literal_length = finish.column - start.column + 1;
1444
1445 /* Ensure that we don't crash if we got the wrong location. */
1446 if (line.length () < (start.column - 1 + literal_length))
1447 return "line is not wide enough";
1448
1449 char_span literal = line.subspan (start.column - 1, literal_length);
1450
1451 cpp_string from;
1452 from.len = literal_length;
1453 /* Make a copy of the literal, to avoid having to rely on
1454 the lifetime of the copy of the line within the cache.
1455 This will be released by the auto_cpp_string_vec dtor. */
1456 from.text = (unsigned char *)literal.xstrdup ();
1457 strs.safe_push (from);
1458
1459 /* For very long lines, a new linemap could have started
1460 halfway through the token.
1461 Ensure that the loc_reader uses the linemap of the
1462 *end* of the token for its start location. */
1463 const line_map_ordinary *start_ord_map;
1464 linemap_resolve_location (line_table, src_range.m_start,
1465 LRK_SPELLING_LOCATION, &start_ord_map);
1466 const line_map_ordinary *final_ord_map;
1467 linemap_resolve_location (line_table, src_range.m_finish,
1468 LRK_SPELLING_LOCATION, &final_ord_map);
1469 if (start_ord_map == NULL || final_ord_map == NULL)
1470 return "failed to get ordinary maps";
1471 /* Bulletproofing. We ought to only have different ordinary maps
1472 for start vs finish due to line-length jumps. */
1473 if (start_ord_map != final_ord_map
1474 && start_ord_map->to_file != final_ord_map->to_file)
1475 return "start and finish are spelled in different ordinary maps";
1476 /* The file from linemap_resolve_location ought to match that from
1477 expand_location_to_spelling_point. */
1478 if (start_ord_map->to_file != start.file)
1479 return "mismatching file after resolving linemap";
1480
1481 location_t start_loc
1482 = linemap_position_for_line_and_column (line_table, final_ord_map,
1483 start.line, start.column);
1484
1485 cpp_string_location_reader loc_reader (start_loc, line_table);
1486 loc_readers.safe_push (loc_reader);
1487 }
1488
1489 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1490 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1491 loc_readers.address (),
1492 num_locs, &ranges, type);
1493 if (err)
1494 return err;
1495
1496 /* Success: "ranges" should now contain information on the string. */
1497 return NULL;
1498 }
1499
1500 /* Attempt to populate *OUT_LOC with source location information on the
1501 given characters within the string literal found at STRLOC.
1502 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1503 character set.
1504
1505 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1506 and string literal "012345\n789"
1507 *OUT_LOC is written to with:
1508 "012345\n789"
1509 ~^~~~~
1510
1511 If CONCATS is non-NULL, then any string literals that the token at
1512 STRLOC was concatenated with are also considered.
1513
1514 This is implemented by re-parsing the relevant source line(s).
1515
1516 Return NULL if successful, or an error message if any errors occurred.
1517 Error messages are intended for GCC developers (to help debugging) rather
1518 than for end-users. */
1519
1520 const char *
1521 get_location_within_string (cpp_reader *pfile,
1522 string_concat_db *concats,
1523 location_t strloc,
1524 enum cpp_ttype type,
1525 int caret_idx, int start_idx, int end_idx,
1526 location_t *out_loc)
1527 {
1528 gcc_checking_assert (caret_idx >= 0);
1529 gcc_checking_assert (start_idx >= 0);
1530 gcc_checking_assert (end_idx >= 0);
1531 gcc_assert (out_loc);
1532
1533 cpp_substring_ranges ranges;
1534 const char *err
1535 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1536 if (err)
1537 return err;
1538
1539 if (caret_idx >= ranges.get_num_ranges ())
1540 return "caret_idx out of range";
1541 if (start_idx >= ranges.get_num_ranges ())
1542 return "start_idx out of range";
1543 if (end_idx >= ranges.get_num_ranges ())
1544 return "end_idx out of range";
1545
1546 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1547 ranges.get_range (start_idx).m_start,
1548 ranges.get_range (end_idx).m_finish);
1549 return NULL;
1550 }
1551
1552 #if CHECKING_P
1553
1554 namespace selftest {
1555
1556 /* Selftests of location handling. */
1557
1558 /* Attempt to populate *OUT_RANGE with source location information on the
1559 given character within the string literal found at STRLOC.
1560 CHAR_IDX refers to an offset within the execution character set.
1561 If CONCATS is non-NULL, then any string literals that the token at
1562 STRLOC was concatenated with are also considered.
1563
1564 This is implemented by re-parsing the relevant source line(s).
1565
1566 Return NULL if successful, or an error message if any errors occurred.
1567 Error messages are intended for GCC developers (to help debugging) rather
1568 than for end-users. */
1569
1570 static const char *
1571 get_source_range_for_char (cpp_reader *pfile,
1572 string_concat_db *concats,
1573 location_t strloc,
1574 enum cpp_ttype type,
1575 int char_idx,
1576 source_range *out_range)
1577 {
1578 gcc_checking_assert (char_idx >= 0);
1579 gcc_assert (out_range);
1580
1581 cpp_substring_ranges ranges;
1582 const char *err
1583 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1584 if (err)
1585 return err;
1586
1587 if (char_idx >= ranges.get_num_ranges ())
1588 return "char_idx out of range";
1589
1590 *out_range = ranges.get_range (char_idx);
1591 return NULL;
1592 }
1593
1594 /* As get_source_range_for_char, but write to *OUT the number
1595 of ranges that are available. */
1596
1597 static const char *
1598 get_num_source_ranges_for_substring (cpp_reader *pfile,
1599 string_concat_db *concats,
1600 location_t strloc,
1601 enum cpp_ttype type,
1602 int *out)
1603 {
1604 gcc_assert (out);
1605
1606 cpp_substring_ranges ranges;
1607 const char *err
1608 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1609
1610 if (err)
1611 return err;
1612
1613 *out = ranges.get_num_ranges ();
1614 return NULL;
1615 }
1616
1617 /* Selftests of location handling. */
1618
1619 /* Verify that compare() on linenum_type handles comparisons over the full
1620 range of the type. */
1621
1622 static void
1623 test_linenum_comparisons ()
1624 {
1625 linenum_type min_line (0);
1626 linenum_type max_line (0xffffffff);
1627 ASSERT_EQ (0, compare (min_line, min_line));
1628 ASSERT_EQ (0, compare (max_line, max_line));
1629
1630 ASSERT_GT (compare (max_line, min_line), 0);
1631 ASSERT_LT (compare (min_line, max_line), 0);
1632 }
1633
1634 /* Helper function for verifying location data: when location_t
1635 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1636 as having column 0. */
1637
1638 static bool
1639 should_have_column_data_p (location_t loc)
1640 {
1641 if (IS_ADHOC_LOC (loc))
1642 loc = get_location_from_adhoc_loc (line_table, loc);
1643 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1644 return false;
1645 return true;
1646 }
1647
1648 /* Selftest for should_have_column_data_p. */
1649
1650 static void
1651 test_should_have_column_data_p ()
1652 {
1653 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1654 ASSERT_TRUE
1655 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1656 ASSERT_FALSE
1657 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1658 }
1659
1660 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1661 on LOC. */
1662
1663 static void
1664 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1665 location_t loc)
1666 {
1667 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1668 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1669 /* If location_t values are sufficiently high, then column numbers
1670 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1671 When close to the threshold, column numbers *may* be present: if
1672 the final linemap before the threshold contains a line that straddles
1673 the threshold, locations in that line have column information. */
1674 if (should_have_column_data_p (loc))
1675 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1676 }
1677
1678 /* Various selftests involve constructing a line table and one or more
1679 line maps within it.
1680
1681 For maximum test coverage we want to run these tests with a variety
1682 of situations:
1683 - line_table->default_range_bits: some frontends use a non-zero value
1684 and others use zero
1685 - the fallback modes within line-map.c: there are various threshold
1686 values for location_t beyond line-map.c changes
1687 behavior (disabling of the range-packing optimization, disabling
1688 of column-tracking). We can exercise these by starting the line_table
1689 at interesting values at or near these thresholds.
1690
1691 The following struct describes a particular case within our test
1692 matrix. */
1693
1694 struct line_table_case
1695 {
1696 line_table_case (int default_range_bits, int base_location)
1697 : m_default_range_bits (default_range_bits),
1698 m_base_location (base_location)
1699 {}
1700
1701 int m_default_range_bits;
1702 int m_base_location;
1703 };
1704
1705 /* Constructor. Store the old value of line_table, and create a new
1706 one, using sane defaults. */
1707
1708 line_table_test::line_table_test ()
1709 {
1710 gcc_assert (saved_line_table == NULL);
1711 saved_line_table = line_table;
1712 line_table = ggc_alloc<line_maps> ();
1713 linemap_init (line_table, BUILTINS_LOCATION);
1714 gcc_assert (saved_line_table->reallocator);
1715 line_table->reallocator = saved_line_table->reallocator;
1716 gcc_assert (saved_line_table->round_alloc_size);
1717 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1718 line_table->default_range_bits = 0;
1719 }
1720
1721 /* Constructor. Store the old value of line_table, and create a new
1722 one, using the sitation described in CASE_. */
1723
1724 line_table_test::line_table_test (const line_table_case &case_)
1725 {
1726 gcc_assert (saved_line_table == NULL);
1727 saved_line_table = line_table;
1728 line_table = ggc_alloc<line_maps> ();
1729 linemap_init (line_table, BUILTINS_LOCATION);
1730 gcc_assert (saved_line_table->reallocator);
1731 line_table->reallocator = saved_line_table->reallocator;
1732 gcc_assert (saved_line_table->round_alloc_size);
1733 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1734 line_table->default_range_bits = case_.m_default_range_bits;
1735 if (case_.m_base_location)
1736 {
1737 line_table->highest_location = case_.m_base_location;
1738 line_table->highest_line = case_.m_base_location;
1739 }
1740 }
1741
1742 /* Destructor. Restore the old value of line_table. */
1743
1744 line_table_test::~line_table_test ()
1745 {
1746 gcc_assert (saved_line_table != NULL);
1747 line_table = saved_line_table;
1748 saved_line_table = NULL;
1749 }
1750
1751 /* Verify basic operation of ordinary linemaps. */
1752
1753 static void
1754 test_accessing_ordinary_linemaps (const line_table_case &case_)
1755 {
1756 line_table_test ltt (case_);
1757
1758 /* Build a simple linemap describing some locations. */
1759 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1760
1761 linemap_line_start (line_table, 1, 100);
1762 location_t loc_a = linemap_position_for_column (line_table, 1);
1763 location_t loc_b = linemap_position_for_column (line_table, 23);
1764
1765 linemap_line_start (line_table, 2, 100);
1766 location_t loc_c = linemap_position_for_column (line_table, 1);
1767 location_t loc_d = linemap_position_for_column (line_table, 17);
1768
1769 /* Example of a very long line. */
1770 linemap_line_start (line_table, 3, 2000);
1771 location_t loc_e = linemap_position_for_column (line_table, 700);
1772
1773 /* Transitioning back to a short line. */
1774 linemap_line_start (line_table, 4, 0);
1775 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1776
1777 if (should_have_column_data_p (loc_back_to_short))
1778 {
1779 /* Verify that we switched to short lines in the linemap. */
1780 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1781 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1782 }
1783
1784 /* Example of a line that will eventually be seen to be longer
1785 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1786 below that. */
1787 linemap_line_start (line_table, 5, 2000);
1788
1789 location_t loc_start_of_very_long_line
1790 = linemap_position_for_column (line_table, 2000);
1791 location_t loc_too_wide
1792 = linemap_position_for_column (line_table, 4097);
1793 location_t loc_too_wide_2
1794 = linemap_position_for_column (line_table, 4098);
1795
1796 /* ...and back to a sane line length. */
1797 linemap_line_start (line_table, 6, 100);
1798 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1799
1800 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1801
1802 /* Multiple files. */
1803 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1804 linemap_line_start (line_table, 1, 200);
1805 location_t loc_f = linemap_position_for_column (line_table, 150);
1806 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1807
1808 /* Verify that we can recover the location info. */
1809 assert_loceq ("foo.c", 1, 1, loc_a);
1810 assert_loceq ("foo.c", 1, 23, loc_b);
1811 assert_loceq ("foo.c", 2, 1, loc_c);
1812 assert_loceq ("foo.c", 2, 17, loc_d);
1813 assert_loceq ("foo.c", 3, 700, loc_e);
1814 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1815
1816 /* In the very wide line, the initial location should be fully tracked. */
1817 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1818 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1819 be disabled. */
1820 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1821 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1822 /*...and column-tracking should be re-enabled for subsequent lines. */
1823 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1824
1825 assert_loceq ("bar.c", 1, 150, loc_f);
1826
1827 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1828 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1829
1830 /* Verify using make_location to build a range, and extracting data
1831 back from it. */
1832 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1833 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1834 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1835 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1836 ASSERT_EQ (loc_b, src_range.m_start);
1837 ASSERT_EQ (loc_d, src_range.m_finish);
1838 }
1839
1840 /* Verify various properties of UNKNOWN_LOCATION. */
1841
1842 static void
1843 test_unknown_location ()
1844 {
1845 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1846 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1847 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1848 }
1849
1850 /* Verify various properties of BUILTINS_LOCATION. */
1851
1852 static void
1853 test_builtins ()
1854 {
1855 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1856 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1857 }
1858
1859 /* Regression test for make_location.
1860 Ensure that we use pure locations for the start/finish of the range,
1861 rather than storing a packed or ad-hoc range as the start/finish. */
1862
1863 static void
1864 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1865 {
1866 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1867 with C++ frontend.
1868 ....................0000000001111111111222.
1869 ....................1234567890123456789012. */
1870 const char *content = " r += !aaa == bbb;\n";
1871 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1872 line_table_test ltt (case_);
1873 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1874
1875 const location_t c11 = linemap_position_for_column (line_table, 11);
1876 const location_t c12 = linemap_position_for_column (line_table, 12);
1877 const location_t c13 = linemap_position_for_column (line_table, 13);
1878 const location_t c14 = linemap_position_for_column (line_table, 14);
1879 const location_t c21 = linemap_position_for_column (line_table, 21);
1880
1881 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1882 return;
1883
1884 /* Use column 13 for the caret location, arbitrarily, to verify that we
1885 handle start != caret. */
1886 const location_t aaa = make_location (c13, c12, c14);
1887 ASSERT_EQ (c13, get_pure_location (aaa));
1888 ASSERT_EQ (c12, get_start (aaa));
1889 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1890 ASSERT_EQ (c14, get_finish (aaa));
1891 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1892
1893 /* Make a location using a location with a range as the start-point. */
1894 const location_t not_aaa = make_location (c11, aaa, c14);
1895 ASSERT_EQ (c11, get_pure_location (not_aaa));
1896 /* It should use the start location of the range, not store the range
1897 itself. */
1898 ASSERT_EQ (c12, get_start (not_aaa));
1899 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1900 ASSERT_EQ (c14, get_finish (not_aaa));
1901 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1902
1903 /* Similarly, make a location with a range as the end-point. */
1904 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1905 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1906 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1907 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1908 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1909 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1910 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1911 /* It should use the finish location of the range, not store the range
1912 itself. */
1913 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1914 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1915 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1916 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1917 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1918 }
1919
1920 /* Verify reading of input files (e.g. for caret-based diagnostics). */
1921
1922 static void
1923 test_reading_source_line ()
1924 {
1925 /* Create a tempfile and write some text to it. */
1926 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1927 "01234567890123456789\n"
1928 "This is the test text\n"
1929 "This is the 3rd line");
1930
1931 /* Read back a specific line from the tempfile. */
1932 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1933 ASSERT_TRUE (source_line);
1934 ASSERT_TRUE (source_line.get_buffer () != NULL);
1935 ASSERT_EQ (20, source_line.length ());
1936 ASSERT_TRUE (!strncmp ("This is the 3rd line",
1937 source_line.get_buffer (), source_line.length ()));
1938
1939 source_line = location_get_source_line (tmp.get_filename (), 2);
1940 ASSERT_TRUE (source_line);
1941 ASSERT_TRUE (source_line.get_buffer () != NULL);
1942 ASSERT_EQ (21, source_line.length ());
1943 ASSERT_TRUE (!strncmp ("This is the test text",
1944 source_line.get_buffer (), source_line.length ()));
1945
1946 source_line = location_get_source_line (tmp.get_filename (), 4);
1947 ASSERT_FALSE (source_line);
1948 ASSERT_TRUE (source_line.get_buffer () == NULL);
1949 }
1950
1951 /* Tests of lexing. */
1952
1953 /* Verify that token TOK from PARSER has cpp_token_as_text
1954 equal to EXPECTED_TEXT. */
1955
1956 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1957 SELFTEST_BEGIN_STMT \
1958 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1959 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1960 SELFTEST_END_STMT
1961
1962 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1963 and ranges from EXP_START_COL to EXP_FINISH_COL.
1964 Use LOC as the effective location of the selftest. */
1965
1966 static void
1967 assert_token_loc_eq (const location &loc,
1968 const cpp_token *tok,
1969 const char *exp_filename, int exp_linenum,
1970 int exp_start_col, int exp_finish_col)
1971 {
1972 location_t tok_loc = tok->src_loc;
1973 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1974 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1975
1976 /* If location_t values are sufficiently high, then column numbers
1977 will be unavailable. */
1978 if (!should_have_column_data_p (tok_loc))
1979 return;
1980
1981 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1982 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1983 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1984 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1985 }
1986
1987 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1988 SELFTEST_LOCATION as the effective location of the selftest. */
1989
1990 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1991 EXP_START_COL, EXP_FINISH_COL) \
1992 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1993 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1994
1995 /* Test of lexing a file using libcpp, verifying tokens and their
1996 location information. */
1997
1998 static void
1999 test_lexer (const line_table_case &case_)
2000 {
2001 /* Create a tempfile and write some text to it. */
2002 const char *content =
2003 /*00000000011111111112222222222333333.3333444444444.455555555556
2004 12345678901234567890123456789012345.6789012345678.901234567890. */
2005 ("test_name /* c-style comment */\n"
2006 " \"test literal\"\n"
2007 " // test c++-style comment\n"
2008 " 42\n");
2009 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2010
2011 line_table_test ltt (case_);
2012
2013 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2014
2015 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2016 ASSERT_NE (fname, NULL);
2017
2018 /* Verify that we get the expected tokens back, with the correct
2019 location information. */
2020
2021 location_t loc;
2022 const cpp_token *tok;
2023 tok = cpp_get_token_with_location (parser, &loc);
2024 ASSERT_NE (tok, NULL);
2025 ASSERT_EQ (tok->type, CPP_NAME);
2026 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2027 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2028
2029 tok = cpp_get_token_with_location (parser, &loc);
2030 ASSERT_NE (tok, NULL);
2031 ASSERT_EQ (tok->type, CPP_STRING);
2032 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2033 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2034
2035 tok = cpp_get_token_with_location (parser, &loc);
2036 ASSERT_NE (tok, NULL);
2037 ASSERT_EQ (tok->type, CPP_NUMBER);
2038 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2039 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2040
2041 tok = cpp_get_token_with_location (parser, &loc);
2042 ASSERT_NE (tok, NULL);
2043 ASSERT_EQ (tok->type, CPP_EOF);
2044
2045 cpp_finish (parser, NULL);
2046 cpp_destroy (parser);
2047 }
2048
2049 /* Forward decls. */
2050
2051 struct lexer_test;
2052 class lexer_test_options;
2053
2054 /* A class for specifying options of a lexer_test.
2055 The "apply" vfunc is called during the lexer_test constructor. */
2056
2057 class lexer_test_options
2058 {
2059 public:
2060 virtual void apply (lexer_test &) = 0;
2061 };
2062
2063 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2064 in its dtor.
2065
2066 This is needed by struct lexer_test to ensure that the cleanup of the
2067 cpp_reader happens *after* the cleanup of the temp_source_file. */
2068
2069 class cpp_reader_ptr
2070 {
2071 public:
2072 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2073
2074 ~cpp_reader_ptr ()
2075 {
2076 cpp_finish (m_ptr, NULL);
2077 cpp_destroy (m_ptr);
2078 }
2079
2080 operator cpp_reader * () const { return m_ptr; }
2081
2082 private:
2083 cpp_reader *m_ptr;
2084 };
2085
2086 /* A struct for writing lexer tests. */
2087
2088 struct lexer_test
2089 {
2090 lexer_test (const line_table_case &case_, const char *content,
2091 lexer_test_options *options);
2092 ~lexer_test ();
2093
2094 const cpp_token *get_token ();
2095
2096 /* The ordering of these fields matters.
2097 The line_table_test must be first, since the cpp_reader_ptr
2098 uses it.
2099 The cpp_reader must be cleaned up *after* the temp_source_file
2100 since the filenames in input.c's input cache are owned by the
2101 cpp_reader; in particular, when ~temp_source_file evicts the
2102 filename the filenames must still be alive. */
2103 line_table_test m_ltt;
2104 cpp_reader_ptr m_parser;
2105 temp_source_file m_tempfile;
2106 string_concat_db m_concats;
2107 bool m_implicitly_expect_EOF;
2108 };
2109
2110 /* Use an EBCDIC encoding for the execution charset, specifically
2111 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2112
2113 This exercises iconv integration within libcpp.
2114 Not every build of iconv supports the given charset,
2115 so we need to flag this error and handle it gracefully. */
2116
2117 class ebcdic_execution_charset : public lexer_test_options
2118 {
2119 public:
2120 ebcdic_execution_charset () : m_num_iconv_errors (0)
2121 {
2122 gcc_assert (s_singleton == NULL);
2123 s_singleton = this;
2124 }
2125 ~ebcdic_execution_charset ()
2126 {
2127 gcc_assert (s_singleton == this);
2128 s_singleton = NULL;
2129 }
2130
2131 void apply (lexer_test &test) FINAL OVERRIDE
2132 {
2133 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2134 cpp_opts->narrow_charset = "IBM1047";
2135
2136 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2137 callbacks->diagnostic = on_diagnostic;
2138 }
2139
2140 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2141 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2142 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2143 rich_location *richloc ATTRIBUTE_UNUSED,
2144 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2145 ATTRIBUTE_FPTR_PRINTF(5,0)
2146 {
2147 gcc_assert (s_singleton);
2148 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2149 const char *msg = "conversion from %s to %s not supported by iconv";
2150 #ifdef ENABLE_NLS
2151 msg = dgettext ("cpplib", msg);
2152 #endif
2153 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2154 when the local iconv build doesn't support the conversion. */
2155 if (strcmp (msgid, msg) == 0)
2156 {
2157 s_singleton->m_num_iconv_errors++;
2158 return true;
2159 }
2160
2161 /* Otherwise, we have an unexpected error. */
2162 abort ();
2163 }
2164
2165 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2166
2167 private:
2168 static ebcdic_execution_charset *s_singleton;
2169 int m_num_iconv_errors;
2170 };
2171
2172 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2173
2174 /* A lexer_test_options subclass that records a list of diagnostic
2175 messages emitted by the lexer. */
2176
2177 class lexer_diagnostic_sink : public lexer_test_options
2178 {
2179 public:
2180 lexer_diagnostic_sink ()
2181 {
2182 gcc_assert (s_singleton == NULL);
2183 s_singleton = this;
2184 }
2185 ~lexer_diagnostic_sink ()
2186 {
2187 gcc_assert (s_singleton == this);
2188 s_singleton = NULL;
2189
2190 int i;
2191 char *str;
2192 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2193 free (str);
2194 }
2195
2196 void apply (lexer_test &test) FINAL OVERRIDE
2197 {
2198 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2199 callbacks->diagnostic = on_diagnostic;
2200 }
2201
2202 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2203 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2204 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2205 rich_location *richloc ATTRIBUTE_UNUSED,
2206 const char *msgid, va_list *ap)
2207 ATTRIBUTE_FPTR_PRINTF(5,0)
2208 {
2209 char *msg = xvasprintf (msgid, *ap);
2210 s_singleton->m_diagnostics.safe_push (msg);
2211 return true;
2212 }
2213
2214 auto_vec<char *> m_diagnostics;
2215
2216 private:
2217 static lexer_diagnostic_sink *s_singleton;
2218 };
2219
2220 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2221
2222 /* Constructor. Override line_table with a new instance based on CASE_,
2223 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2224 start parsing the tempfile. */
2225
2226 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2227 lexer_test_options *options)
2228 : m_ltt (case_),
2229 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2230 /* Create a tempfile and write the text to it. */
2231 m_tempfile (SELFTEST_LOCATION, ".c", content),
2232 m_concats (),
2233 m_implicitly_expect_EOF (true)
2234 {
2235 if (options)
2236 options->apply (*this);
2237
2238 cpp_init_iconv (m_parser);
2239
2240 /* Parse the file. */
2241 const char *fname = cpp_read_main_file (m_parser,
2242 m_tempfile.get_filename ());
2243 ASSERT_NE (fname, NULL);
2244 }
2245
2246 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2247
2248 lexer_test::~lexer_test ()
2249 {
2250 location_t loc;
2251 const cpp_token *tok;
2252
2253 if (m_implicitly_expect_EOF)
2254 {
2255 tok = cpp_get_token_with_location (m_parser, &loc);
2256 ASSERT_NE (tok, NULL);
2257 ASSERT_EQ (tok->type, CPP_EOF);
2258 }
2259 }
2260
2261 /* Get the next token from m_parser. */
2262
2263 const cpp_token *
2264 lexer_test::get_token ()
2265 {
2266 location_t loc;
2267 const cpp_token *tok;
2268
2269 tok = cpp_get_token_with_location (m_parser, &loc);
2270 ASSERT_NE (tok, NULL);
2271 return tok;
2272 }
2273
2274 /* Verify that locations within string literals are correctly handled. */
2275
2276 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2277 using the string concatenation database for TEST.
2278
2279 Assert that the character at index IDX is on EXPECTED_LINE,
2280 and that it begins at column EXPECTED_START_COL and ends at
2281 EXPECTED_FINISH_COL (unless the locations are beyond
2282 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2283 columns). */
2284
2285 static void
2286 assert_char_at_range (const location &loc,
2287 lexer_test& test,
2288 location_t strloc, enum cpp_ttype type, int idx,
2289 int expected_line, int expected_start_col,
2290 int expected_finish_col)
2291 {
2292 cpp_reader *pfile = test.m_parser;
2293 string_concat_db *concats = &test.m_concats;
2294
2295 source_range actual_range = source_range();
2296 const char *err
2297 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2298 &actual_range);
2299 if (should_have_column_data_p (strloc))
2300 ASSERT_EQ_AT (loc, NULL, err);
2301 else
2302 {
2303 ASSERT_STREQ_AT (loc,
2304 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2305 err);
2306 return;
2307 }
2308
2309 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2310 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2311 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2312 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2313
2314 if (should_have_column_data_p (actual_range.m_start))
2315 {
2316 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2317 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2318 }
2319 if (should_have_column_data_p (actual_range.m_finish))
2320 {
2321 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2322 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2323 }
2324 }
2325
2326 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2327 the effective location of any errors. */
2328
2329 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2330 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2331 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2332 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2333 (EXPECTED_FINISH_COL))
2334
2335 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2336 using the string concatenation database for TEST.
2337
2338 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2339
2340 static void
2341 assert_num_substring_ranges (const location &loc,
2342 lexer_test& test,
2343 location_t strloc,
2344 enum cpp_ttype type,
2345 int expected_num_ranges)
2346 {
2347 cpp_reader *pfile = test.m_parser;
2348 string_concat_db *concats = &test.m_concats;
2349
2350 int actual_num_ranges = -1;
2351 const char *err
2352 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2353 &actual_num_ranges);
2354 if (should_have_column_data_p (strloc))
2355 ASSERT_EQ_AT (loc, NULL, err);
2356 else
2357 {
2358 ASSERT_STREQ_AT (loc,
2359 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2360 err);
2361 return;
2362 }
2363 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2364 }
2365
2366 /* Macro for calling assert_num_substring_ranges, supplying
2367 SELFTEST_LOCATION for the effective location of any errors. */
2368
2369 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2370 EXPECTED_NUM_RANGES) \
2371 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2372 (TYPE), (EXPECTED_NUM_RANGES))
2373
2374
2375 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2376 returns an error (using the string concatenation database for TEST). */
2377
2378 static void
2379 assert_has_no_substring_ranges (const location &loc,
2380 lexer_test& test,
2381 location_t strloc,
2382 enum cpp_ttype type,
2383 const char *expected_err)
2384 {
2385 cpp_reader *pfile = test.m_parser;
2386 string_concat_db *concats = &test.m_concats;
2387 cpp_substring_ranges ranges;
2388 const char *actual_err
2389 = get_substring_ranges_for_loc (pfile, concats, strloc,
2390 type, ranges);
2391 if (should_have_column_data_p (strloc))
2392 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2393 else
2394 ASSERT_STREQ_AT (loc,
2395 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2396 actual_err);
2397 }
2398
2399 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2400 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2401 (STRLOC), (TYPE), (ERR))
2402
2403 /* Lex a simple string literal. Verify the substring location data, before
2404 and after running cpp_interpret_string on it. */
2405
2406 static void
2407 test_lexer_string_locations_simple (const line_table_case &case_)
2408 {
2409 /* Digits 0-9 (with 0 at column 10), the simple way.
2410 ....................000000000.11111111112.2222222223333333333
2411 ....................123456789.01234567890.1234567890123456789
2412 We add a trailing comment to ensure that we correctly locate
2413 the end of the string literal token. */
2414 const char *content = " \"0123456789\" /* not a string */\n";
2415 lexer_test test (case_, content, NULL);
2416
2417 /* Verify that we get the expected token back, with the correct
2418 location information. */
2419 const cpp_token *tok = test.get_token ();
2420 ASSERT_EQ (tok->type, CPP_STRING);
2421 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2422 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2423
2424 /* At this point in lexing, the quote characters are treated as part of
2425 the string (they are stripped off by cpp_interpret_string). */
2426
2427 ASSERT_EQ (tok->val.str.len, 12);
2428
2429 /* Verify that cpp_interpret_string works. */
2430 cpp_string dst_string;
2431 const enum cpp_ttype type = CPP_STRING;
2432 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2433 &dst_string, type);
2434 ASSERT_TRUE (result);
2435 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2436 free (const_cast <unsigned char *> (dst_string.text));
2437
2438 /* Verify ranges of individual characters. This no longer includes the
2439 opening quote, but does include the closing quote. */
2440 for (int i = 0; i <= 10; i++)
2441 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2442 10 + i, 10 + i);
2443
2444 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2445 }
2446
2447 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2448 encoding. */
2449
2450 static void
2451 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2452 {
2453 /* EBCDIC support requires iconv. */
2454 if (!HAVE_ICONV)
2455 return;
2456
2457 /* Digits 0-9 (with 0 at column 10), the simple way.
2458 ....................000000000.11111111112.2222222223333333333
2459 ....................123456789.01234567890.1234567890123456789
2460 We add a trailing comment to ensure that we correctly locate
2461 the end of the string literal token. */
2462 const char *content = " \"0123456789\" /* not a string */\n";
2463 ebcdic_execution_charset use_ebcdic;
2464 lexer_test test (case_, content, &use_ebcdic);
2465
2466 /* Verify that we get the expected token back, with the correct
2467 location information. */
2468 const cpp_token *tok = test.get_token ();
2469 ASSERT_EQ (tok->type, CPP_STRING);
2470 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2471 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2472
2473 /* At this point in lexing, the quote characters are treated as part of
2474 the string (they are stripped off by cpp_interpret_string). */
2475
2476 ASSERT_EQ (tok->val.str.len, 12);
2477
2478 /* The remainder of the test requires an iconv implementation that
2479 can convert from UTF-8 to the EBCDIC encoding requested above. */
2480 if (use_ebcdic.iconv_errors_occurred_p ())
2481 return;
2482
2483 /* Verify that cpp_interpret_string works. */
2484 cpp_string dst_string;
2485 const enum cpp_ttype type = CPP_STRING;
2486 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2487 &dst_string, type);
2488 ASSERT_TRUE (result);
2489 /* We should now have EBCDIC-encoded text, specifically
2490 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2491 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2492 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2493 (const char *)dst_string.text);
2494 free (const_cast <unsigned char *> (dst_string.text));
2495
2496 /* Verify that we don't attempt to record substring location information
2497 for such cases. */
2498 ASSERT_HAS_NO_SUBSTRING_RANGES
2499 (test, tok->src_loc, type,
2500 "execution character set != source character set");
2501 }
2502
2503 /* Lex a string literal containing a hex-escaped character.
2504 Verify the substring location data, before and after running
2505 cpp_interpret_string on it. */
2506
2507 static void
2508 test_lexer_string_locations_hex (const line_table_case &case_)
2509 {
2510 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2511 and with a space in place of digit 6, to terminate the escaped
2512 hex code.
2513 ....................000000000.111111.11112222.
2514 ....................123456789.012345.67890123. */
2515 const char *content = " \"01234\\x35 789\"\n";
2516 lexer_test test (case_, content, NULL);
2517
2518 /* Verify that we get the expected token back, with the correct
2519 location information. */
2520 const cpp_token *tok = test.get_token ();
2521 ASSERT_EQ (tok->type, CPP_STRING);
2522 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2523 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2524
2525 /* At this point in lexing, the quote characters are treated as part of
2526 the string (they are stripped off by cpp_interpret_string). */
2527 ASSERT_EQ (tok->val.str.len, 15);
2528
2529 /* Verify that cpp_interpret_string works. */
2530 cpp_string dst_string;
2531 const enum cpp_ttype type = CPP_STRING;
2532 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2533 &dst_string, type);
2534 ASSERT_TRUE (result);
2535 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2536 free (const_cast <unsigned char *> (dst_string.text));
2537
2538 /* Verify ranges of individual characters. This no longer includes the
2539 opening quote, but does include the closing quote. */
2540 for (int i = 0; i <= 4; i++)
2541 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2542 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2543 for (int i = 6; i <= 10; i++)
2544 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2545
2546 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2547 }
2548
2549 /* Lex a string literal containing an octal-escaped character.
2550 Verify the substring location data after running cpp_interpret_string
2551 on it. */
2552
2553 static void
2554 test_lexer_string_locations_oct (const line_table_case &case_)
2555 {
2556 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2557 and with a space in place of digit 6, to terminate the escaped
2558 octal code.
2559 ....................000000000.111111.11112222.2222223333333333444
2560 ....................123456789.012345.67890123.4567890123456789012 */
2561 const char *content = " \"01234\\065 789\" /* not a string */\n";
2562 lexer_test test (case_, content, NULL);
2563
2564 /* Verify that we get the expected token back, with the correct
2565 location information. */
2566 const cpp_token *tok = test.get_token ();
2567 ASSERT_EQ (tok->type, CPP_STRING);
2568 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2569
2570 /* Verify that cpp_interpret_string works. */
2571 cpp_string dst_string;
2572 const enum cpp_ttype type = CPP_STRING;
2573 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2574 &dst_string, type);
2575 ASSERT_TRUE (result);
2576 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2577 free (const_cast <unsigned char *> (dst_string.text));
2578
2579 /* Verify ranges of individual characters. This no longer includes the
2580 opening quote, but does include the closing quote. */
2581 for (int i = 0; i < 5; i++)
2582 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2583 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2584 for (int i = 6; i <= 10; i++)
2585 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2586
2587 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2588 }
2589
2590 /* Test of string literal containing letter escapes. */
2591
2592 static void
2593 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2594 {
2595 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2596 .....................000000000.1.11111.1.1.11222.22222223333333
2597 .....................123456789.0.12345.6.7.89012.34567890123456. */
2598 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2599 lexer_test test (case_, content, NULL);
2600
2601 /* Verify that we get the expected tokens back. */
2602 const cpp_token *tok = test.get_token ();
2603 ASSERT_EQ (tok->type, CPP_STRING);
2604 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2605
2606 /* Verify ranges of individual characters. */
2607 /* "\t". */
2608 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2609 0, 1, 10, 11);
2610 /* "foo". */
2611 for (int i = 1; i <= 3; i++)
2612 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2613 i, 1, 11 + i, 11 + i);
2614 /* "\\" and "\n". */
2615 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2616 4, 1, 15, 16);
2617 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2618 5, 1, 17, 18);
2619
2620 /* "bar" and closing quote for nul-terminator. */
2621 for (int i = 6; i <= 9; i++)
2622 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2623 i, 1, 13 + i, 13 + i);
2624
2625 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2626 }
2627
2628 /* Another test of a string literal containing a letter escape.
2629 Based on string seen in
2630 printf ("%-%\n");
2631 in gcc.dg/format/c90-printf-1.c. */
2632
2633 static void
2634 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2635 {
2636 /* .....................000000000.1111.11.1111.22222222223.
2637 .....................123456789.0123.45.6789.01234567890. */
2638 const char *content = (" \"%-%\\n\" /* non-str */\n");
2639 lexer_test test (case_, content, NULL);
2640
2641 /* Verify that we get the expected tokens back. */
2642 const cpp_token *tok = test.get_token ();
2643 ASSERT_EQ (tok->type, CPP_STRING);
2644 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2645
2646 /* Verify ranges of individual characters. */
2647 /* "%-%". */
2648 for (int i = 0; i < 3; i++)
2649 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2650 i, 1, 10 + i, 10 + i);
2651 /* "\n". */
2652 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2653 3, 1, 13, 14);
2654
2655 /* Closing quote for nul-terminator. */
2656 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2657 4, 1, 15, 15);
2658
2659 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2660 }
2661
2662 /* Lex a string literal containing UCN 4 characters.
2663 Verify the substring location data after running cpp_interpret_string
2664 on it. */
2665
2666 static void
2667 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2668 {
2669 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2670 as UCN 4.
2671 ....................000000000.111111.111122.222222223.33333333344444
2672 ....................123456789.012345.678901.234567890.12345678901234 */
2673 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2674 lexer_test test (case_, content, NULL);
2675
2676 /* Verify that we get the expected token back, with the correct
2677 location information. */
2678 const cpp_token *tok = test.get_token ();
2679 ASSERT_EQ (tok->type, CPP_STRING);
2680 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2681
2682 /* Verify that cpp_interpret_string works.
2683 The string should be encoded in the execution character
2684 set. Assuming that that is UTF-8, we should have the following:
2685 ----------- ---- ----- ------- ----------------
2686 Byte offset Byte Octal Unicode Source Column(s)
2687 ----------- ---- ----- ------- ----------------
2688 0 0x30 '0' 10
2689 1 0x31 '1' 11
2690 2 0x32 '2' 12
2691 3 0x33 '3' 13
2692 4 0x34 '4' 14
2693 5 0xE2 \342 U+2174 15-20
2694 6 0x85 \205 (cont) 15-20
2695 7 0xB4 \264 (cont) 15-20
2696 8 0xE2 \342 U+2175 21-26
2697 9 0x85 \205 (cont) 21-26
2698 10 0xB5 \265 (cont) 21-26
2699 11 0x37 '7' 27
2700 12 0x38 '8' 28
2701 13 0x39 '9' 29
2702 14 0x00 30 (closing quote)
2703 ----------- ---- ----- ------- ---------------. */
2704
2705 cpp_string dst_string;
2706 const enum cpp_ttype type = CPP_STRING;
2707 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2708 &dst_string, type);
2709 ASSERT_TRUE (result);
2710 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2711 (const char *)dst_string.text);
2712 free (const_cast <unsigned char *> (dst_string.text));
2713
2714 /* Verify ranges of individual characters. This no longer includes the
2715 opening quote, but does include the closing quote.
2716 '01234'. */
2717 for (int i = 0; i <= 4; i++)
2718 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2719 /* U+2174. */
2720 for (int i = 5; i <= 7; i++)
2721 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2722 /* U+2175. */
2723 for (int i = 8; i <= 10; i++)
2724 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2725 /* '789' and nul terminator */
2726 for (int i = 11; i <= 14; i++)
2727 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2728
2729 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2730 }
2731
2732 /* Lex a string literal containing UCN 8 characters.
2733 Verify the substring location data after running cpp_interpret_string
2734 on it. */
2735
2736 static void
2737 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2738 {
2739 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2740 ....................000000000.111111.1111222222.2222333333333.344444
2741 ....................123456789.012345.6789012345.6789012345678.901234 */
2742 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2743 lexer_test test (case_, content, NULL);
2744
2745 /* Verify that we get the expected token back, with the correct
2746 location information. */
2747 const cpp_token *tok = test.get_token ();
2748 ASSERT_EQ (tok->type, CPP_STRING);
2749 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2750 "\"01234\\U00002174\\U00002175789\"");
2751
2752 /* Verify that cpp_interpret_string works.
2753 The UTF-8 encoding of the string is identical to that from
2754 the ucn4 testcase above; the only difference is the column
2755 locations. */
2756 cpp_string dst_string;
2757 const enum cpp_ttype type = CPP_STRING;
2758 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2759 &dst_string, type);
2760 ASSERT_TRUE (result);
2761 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2762 (const char *)dst_string.text);
2763 free (const_cast <unsigned char *> (dst_string.text));
2764
2765 /* Verify ranges of individual characters. This no longer includes the
2766 opening quote, but does include the closing quote.
2767 '01234'. */
2768 for (int i = 0; i <= 4; i++)
2769 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2770 /* U+2174. */
2771 for (int i = 5; i <= 7; i++)
2772 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2773 /* U+2175. */
2774 for (int i = 8; i <= 10; i++)
2775 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2776 /* '789' at columns 35-37 */
2777 for (int i = 11; i <= 13; i++)
2778 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2779 /* Closing quote/nul-terminator at column 38. */
2780 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2781
2782 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2783 }
2784
2785 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2786
2787 static uint32_t
2788 uint32_from_big_endian (const uint32_t *ptr_be_value)
2789 {
2790 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2791 return (((uint32_t) buf[0] << 24)
2792 | ((uint32_t) buf[1] << 16)
2793 | ((uint32_t) buf[2] << 8)
2794 | (uint32_t) buf[3]);
2795 }
2796
2797 /* Lex a wide string literal and verify that attempts to read substring
2798 location data from it fail gracefully. */
2799
2800 static void
2801 test_lexer_string_locations_wide_string (const line_table_case &case_)
2802 {
2803 /* Digits 0-9.
2804 ....................000000000.11111111112.22222222233333
2805 ....................123456789.01234567890.12345678901234 */
2806 const char *content = " L\"0123456789\" /* non-str */\n";
2807 lexer_test test (case_, content, NULL);
2808
2809 /* Verify that we get the expected token back, with the correct
2810 location information. */
2811 const cpp_token *tok = test.get_token ();
2812 ASSERT_EQ (tok->type, CPP_WSTRING);
2813 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2814
2815 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2816 cpp_string dst_string;
2817 const enum cpp_ttype type = CPP_WSTRING;
2818 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2819 &dst_string, type);
2820 ASSERT_TRUE (result);
2821 /* The cpp_reader defaults to big-endian with
2822 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2823 now be encoded as UTF-32BE. */
2824 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2825 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2826 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2827 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2828 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2829 free (const_cast <unsigned char *> (dst_string.text));
2830
2831 /* We don't yet support generating substring location information
2832 for L"" strings. */
2833 ASSERT_HAS_NO_SUBSTRING_RANGES
2834 (test, tok->src_loc, type,
2835 "execution character set != source character set");
2836 }
2837
2838 /* Fetch a big-endian 16-bit value and convert to host endianness. */
2839
2840 static uint16_t
2841 uint16_from_big_endian (const uint16_t *ptr_be_value)
2842 {
2843 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2844 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2845 }
2846
2847 /* Lex a u"" string literal and verify that attempts to read substring
2848 location data from it fail gracefully. */
2849
2850 static void
2851 test_lexer_string_locations_string16 (const line_table_case &case_)
2852 {
2853 /* Digits 0-9.
2854 ....................000000000.11111111112.22222222233333
2855 ....................123456789.01234567890.12345678901234 */
2856 const char *content = " u\"0123456789\" /* non-str */\n";
2857 lexer_test test (case_, content, NULL);
2858
2859 /* Verify that we get the expected token back, with the correct
2860 location information. */
2861 const cpp_token *tok = test.get_token ();
2862 ASSERT_EQ (tok->type, CPP_STRING16);
2863 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2864
2865 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2866 cpp_string dst_string;
2867 const enum cpp_ttype type = CPP_STRING16;
2868 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2869 &dst_string, type);
2870 ASSERT_TRUE (result);
2871
2872 /* The cpp_reader defaults to big-endian, so dst_string should
2873 now be encoded as UTF-16BE. */
2874 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2875 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2876 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2877 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2878 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2879 free (const_cast <unsigned char *> (dst_string.text));
2880
2881 /* We don't yet support generating substring location information
2882 for L"" strings. */
2883 ASSERT_HAS_NO_SUBSTRING_RANGES
2884 (test, tok->src_loc, type,
2885 "execution character set != source character set");
2886 }
2887
2888 /* Lex a U"" string literal and verify that attempts to read substring
2889 location data from it fail gracefully. */
2890
2891 static void
2892 test_lexer_string_locations_string32 (const line_table_case &case_)
2893 {
2894 /* Digits 0-9.
2895 ....................000000000.11111111112.22222222233333
2896 ....................123456789.01234567890.12345678901234 */
2897 const char *content = " U\"0123456789\" /* non-str */\n";
2898 lexer_test test (case_, content, NULL);
2899
2900 /* Verify that we get the expected token back, with the correct
2901 location information. */
2902 const cpp_token *tok = test.get_token ();
2903 ASSERT_EQ (tok->type, CPP_STRING32);
2904 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2905
2906 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2907 cpp_string dst_string;
2908 const enum cpp_ttype type = CPP_STRING32;
2909 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2910 &dst_string, type);
2911 ASSERT_TRUE (result);
2912
2913 /* The cpp_reader defaults to big-endian, so dst_string should
2914 now be encoded as UTF-32BE. */
2915 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2916 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2917 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2918 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2919 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2920 free (const_cast <unsigned char *> (dst_string.text));
2921
2922 /* We don't yet support generating substring location information
2923 for L"" strings. */
2924 ASSERT_HAS_NO_SUBSTRING_RANGES
2925 (test, tok->src_loc, type,
2926 "execution character set != source character set");
2927 }
2928
2929 /* Lex a u8-string literal.
2930 Verify the substring location data after running cpp_interpret_string
2931 on it. */
2932
2933 static void
2934 test_lexer_string_locations_u8 (const line_table_case &case_)
2935 {
2936 /* Digits 0-9.
2937 ....................000000000.11111111112.22222222233333
2938 ....................123456789.01234567890.12345678901234 */
2939 const char *content = " u8\"0123456789\" /* non-str */\n";
2940 lexer_test test (case_, content, NULL);
2941
2942 /* Verify that we get the expected token back, with the correct
2943 location information. */
2944 const cpp_token *tok = test.get_token ();
2945 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2946 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2947
2948 /* Verify that cpp_interpret_string works. */
2949 cpp_string dst_string;
2950 const enum cpp_ttype type = CPP_STRING;
2951 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2952 &dst_string, type);
2953 ASSERT_TRUE (result);
2954 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2955 free (const_cast <unsigned char *> (dst_string.text));
2956
2957 /* Verify ranges of individual characters. This no longer includes the
2958 opening quote, but does include the closing quote. */
2959 for (int i = 0; i <= 10; i++)
2960 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2961 }
2962
2963 /* Lex a string literal containing UTF-8 source characters.
2964 Verify the substring location data after running cpp_interpret_string
2965 on it. */
2966
2967 static void
2968 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2969 {
2970 /* This string literal is written out to the source file as UTF-8,
2971 and is of the form "before mojibake after", where "mojibake"
2972 is written as the following four unicode code points:
2973 U+6587 CJK UNIFIED IDEOGRAPH-6587
2974 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2975 U+5316 CJK UNIFIED IDEOGRAPH-5316
2976 U+3051 HIRAGANA LETTER KE.
2977 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2978 "before" and "after" are 1 byte per unicode character.
2979
2980 The numbering shown are "columns", which are *byte* numbers within
2981 the line, rather than unicode character numbers.
2982
2983 .................... 000000000.1111111.
2984 .................... 123456789.0123456. */
2985 const char *content = (" \"before "
2986 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2987 UTF-8: 0xE6 0x96 0x87
2988 C octal escaped UTF-8: \346\226\207
2989 "column" numbers: 17-19. */
2990 "\346\226\207"
2991
2992 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2993 UTF-8: 0xE5 0xAD 0x97
2994 C octal escaped UTF-8: \345\255\227
2995 "column" numbers: 20-22. */
2996 "\345\255\227"
2997
2998 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2999 UTF-8: 0xE5 0x8C 0x96
3000 C octal escaped UTF-8: \345\214\226
3001 "column" numbers: 23-25. */
3002 "\345\214\226"
3003
3004 /* U+3051 HIRAGANA LETTER KE
3005 UTF-8: 0xE3 0x81 0x91
3006 C octal escaped UTF-8: \343\201\221
3007 "column" numbers: 26-28. */
3008 "\343\201\221"
3009
3010 /* column numbers 29 onwards
3011 2333333.33334444444444
3012 9012345.67890123456789. */
3013 " after\" /* non-str */\n");
3014 lexer_test test (case_, content, NULL);
3015
3016 /* Verify that we get the expected token back, with the correct
3017 location information. */
3018 const cpp_token *tok = test.get_token ();
3019 ASSERT_EQ (tok->type, CPP_STRING);
3020 ASSERT_TOKEN_AS_TEXT_EQ
3021 (test.m_parser, tok,
3022 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3023
3024 /* Verify that cpp_interpret_string works. */
3025 cpp_string dst_string;
3026 const enum cpp_ttype type = CPP_STRING;
3027 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3028 &dst_string, type);
3029 ASSERT_TRUE (result);
3030 ASSERT_STREQ
3031 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3032 (const char *)dst_string.text);
3033 free (const_cast <unsigned char *> (dst_string.text));
3034
3035 /* Verify ranges of individual characters. This no longer includes the
3036 opening quote, but does include the closing quote.
3037 Assuming that both source and execution encodings are UTF-8, we have
3038 a run of 25 octets in each, plus the NUL terminator. */
3039 for (int i = 0; i < 25; i++)
3040 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3041 /* NUL-terminator should use the closing quote at column 35. */
3042 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3043
3044 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3045 }
3046
3047 /* Test of string literal concatenation. */
3048
3049 static void
3050 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3051 {
3052 /* Digits 0-9.
3053 .....................000000000.111111.11112222222222
3054 .....................123456789.012345.67890123456789. */
3055 const char *content = (" \"01234\" /* non-str */\n"
3056 " \"56789\" /* non-str */\n");
3057 lexer_test test (case_, content, NULL);
3058
3059 location_t input_locs[2];
3060
3061 /* Verify that we get the expected tokens back. */
3062 auto_vec <cpp_string> input_strings;
3063 const cpp_token *tok_a = test.get_token ();
3064 ASSERT_EQ (tok_a->type, CPP_STRING);
3065 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3066 input_strings.safe_push (tok_a->val.str);
3067 input_locs[0] = tok_a->src_loc;
3068
3069 const cpp_token *tok_b = test.get_token ();
3070 ASSERT_EQ (tok_b->type, CPP_STRING);
3071 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3072 input_strings.safe_push (tok_b->val.str);
3073 input_locs[1] = tok_b->src_loc;
3074
3075 /* Verify that cpp_interpret_string works. */
3076 cpp_string dst_string;
3077 const enum cpp_ttype type = CPP_STRING;
3078 bool result = cpp_interpret_string (test.m_parser,
3079 input_strings.address (), 2,
3080 &dst_string, type);
3081 ASSERT_TRUE (result);
3082 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3083 free (const_cast <unsigned char *> (dst_string.text));
3084
3085 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3086 test.m_concats.record_string_concatenation (2, input_locs);
3087
3088 location_t initial_loc = input_locs[0];
3089
3090 /* "01234" on line 1. */
3091 for (int i = 0; i <= 4; i++)
3092 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3093 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3094 for (int i = 5; i <= 10; i++)
3095 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3096
3097 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3098 }
3099
3100 /* Another test of string literal concatenation. */
3101
3102 static void
3103 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3104 {
3105 /* Digits 0-9.
3106 .....................000000000.111.11111112222222
3107 .....................123456789.012.34567890123456. */
3108 const char *content = (" \"01\" /* non-str */\n"
3109 " \"23\" /* non-str */\n"
3110 " \"45\" /* non-str */\n"
3111 " \"67\" /* non-str */\n"
3112 " \"89\" /* non-str */\n");
3113 lexer_test test (case_, content, NULL);
3114
3115 auto_vec <cpp_string> input_strings;
3116 location_t input_locs[5];
3117
3118 /* Verify that we get the expected tokens back. */
3119 for (int i = 0; i < 5; i++)
3120 {
3121 const cpp_token *tok = test.get_token ();
3122 ASSERT_EQ (tok->type, CPP_STRING);
3123 input_strings.safe_push (tok->val.str);
3124 input_locs[i] = tok->src_loc;
3125 }
3126
3127 /* Verify that cpp_interpret_string works. */
3128 cpp_string dst_string;
3129 const enum cpp_ttype type = CPP_STRING;
3130 bool result = cpp_interpret_string (test.m_parser,
3131 input_strings.address (), 5,
3132 &dst_string, type);
3133 ASSERT_TRUE (result);
3134 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3135 free (const_cast <unsigned char *> (dst_string.text));
3136
3137 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3138 test.m_concats.record_string_concatenation (5, input_locs);
3139
3140 location_t initial_loc = input_locs[0];
3141
3142 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3143 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3144 and expect get_source_range_for_substring to fail.
3145 However, for a string concatenation test, we can have a case
3146 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3147 but subsequent strings can be after it.
3148 Attempting to detect this within assert_char_at_range
3149 would overcomplicate the logic for the common test cases, so
3150 we detect it here. */
3151 if (should_have_column_data_p (input_locs[0])
3152 && !should_have_column_data_p (input_locs[4]))
3153 {
3154 /* Verify that get_source_range_for_substring gracefully rejects
3155 this case. */
3156 source_range actual_range;
3157 const char *err
3158 = get_source_range_for_char (test.m_parser, &test.m_concats,
3159 initial_loc, type, 0, &actual_range);
3160 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3161 return;
3162 }
3163
3164 for (int i = 0; i < 5; i++)
3165 for (int j = 0; j < 2; j++)
3166 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3167 i + 1, 10 + j, 10 + j);
3168
3169 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3170 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3171
3172 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3173 }
3174
3175 /* Another test of string literal concatenation, this time combined with
3176 various kinds of escaped characters. */
3177
3178 static void
3179 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3180 {
3181 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3182 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3183 const char *content
3184 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3185 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3186 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3187 lexer_test test (case_, content, NULL);
3188
3189 auto_vec <cpp_string> input_strings;
3190 location_t input_locs[4];
3191
3192 /* Verify that we get the expected tokens back. */
3193 for (int i = 0; i < 4; i++)
3194 {
3195 const cpp_token *tok = test.get_token ();
3196 ASSERT_EQ (tok->type, CPP_STRING);
3197 input_strings.safe_push (tok->val.str);
3198 input_locs[i] = tok->src_loc;
3199 }
3200
3201 /* Verify that cpp_interpret_string works. */
3202 cpp_string dst_string;
3203 const enum cpp_ttype type = CPP_STRING;
3204 bool result = cpp_interpret_string (test.m_parser,
3205 input_strings.address (), 4,
3206 &dst_string, type);
3207 ASSERT_TRUE (result);
3208 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3209 free (const_cast <unsigned char *> (dst_string.text));
3210
3211 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3212 test.m_concats.record_string_concatenation (4, input_locs);
3213
3214 location_t initial_loc = input_locs[0];
3215
3216 for (int i = 0; i <= 4; i++)
3217 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3218 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3219 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3220 for (int i = 7; i <= 9; i++)
3221 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3222
3223 /* NUL-terminator should use the location of the final closing quote. */
3224 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3225
3226 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3227 }
3228
3229 /* Test of string literal in a macro. */
3230
3231 static void
3232 test_lexer_string_locations_macro (const line_table_case &case_)
3233 {
3234 /* Digits 0-9.
3235 .....................0000000001111111111.22222222223.
3236 .....................1234567890123456789.01234567890. */
3237 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3238 " MACRO");
3239 lexer_test test (case_, content, NULL);
3240
3241 /* Verify that we get the expected tokens back. */
3242 const cpp_token *tok = test.get_token ();
3243 ASSERT_EQ (tok->type, CPP_PADDING);
3244
3245 tok = test.get_token ();
3246 ASSERT_EQ (tok->type, CPP_STRING);
3247 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3248
3249 /* Verify ranges of individual characters. We ought to
3250 see columns within the macro definition. */
3251 for (int i = 0; i <= 10; i++)
3252 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3253 i, 1, 20 + i, 20 + i);
3254
3255 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3256
3257 tok = test.get_token ();
3258 ASSERT_EQ (tok->type, CPP_PADDING);
3259 }
3260
3261 /* Test of stringification of a macro argument. */
3262
3263 static void
3264 test_lexer_string_locations_stringified_macro_argument
3265 (const line_table_case &case_)
3266 {
3267 /* .....................000000000111111111122222222223.
3268 .....................123456789012345678901234567890. */
3269 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3270 "MACRO(foo)\n");
3271 lexer_test test (case_, content, NULL);
3272
3273 /* Verify that we get the expected token back. */
3274 const cpp_token *tok = test.get_token ();
3275 ASSERT_EQ (tok->type, CPP_PADDING);
3276
3277 tok = test.get_token ();
3278 ASSERT_EQ (tok->type, CPP_STRING);
3279 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3280
3281 /* We don't support getting the location of a stringified macro
3282 argument. Verify that it fails gracefully. */
3283 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3284 "cpp_interpret_string_1 failed");
3285
3286 tok = test.get_token ();
3287 ASSERT_EQ (tok->type, CPP_PADDING);
3288
3289 tok = test.get_token ();
3290 ASSERT_EQ (tok->type, CPP_PADDING);
3291 }
3292
3293 /* Ensure that we are fail gracefully if something attempts to pass
3294 in a location that isn't a string literal token. Seen on this code:
3295
3296 const char a[] = " %d ";
3297 __builtin_printf (a, 0.5);
3298 ^
3299
3300 when c-format.c erroneously used the indicated one-character
3301 location as the format string location, leading to a read past the
3302 end of a string buffer in cpp_interpret_string_1. */
3303
3304 static void
3305 test_lexer_string_locations_non_string (const line_table_case &case_)
3306 {
3307 /* .....................000000000111111111122222222223.
3308 .....................123456789012345678901234567890. */
3309 const char *content = (" a\n");
3310 lexer_test test (case_, content, NULL);
3311
3312 /* Verify that we get the expected token back. */
3313 const cpp_token *tok = test.get_token ();
3314 ASSERT_EQ (tok->type, CPP_NAME);
3315 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3316
3317 /* At this point, libcpp is attempting to interpret the name as a
3318 string literal, despite it not starting with a quote. We don't detect
3319 that, but we should at least fail gracefully. */
3320 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3321 "cpp_interpret_string_1 failed");
3322 }
3323
3324 /* Ensure that we can read substring information for a token which
3325 starts in one linemap and ends in another . Adapted from
3326 gcc.dg/cpp/pr69985.c. */
3327
3328 static void
3329 test_lexer_string_locations_long_line (const line_table_case &case_)
3330 {
3331 /* .....................000000.000111111111
3332 .....................123456.789012346789. */
3333 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3334 " \"0123456789012345678901234567890123456789"
3335 "0123456789012345678901234567890123456789"
3336 "0123456789012345678901234567890123456789"
3337 "0123456789\"\n");
3338
3339 lexer_test test (case_, content, NULL);
3340
3341 /* Verify that we get the expected token back. */
3342 const cpp_token *tok = test.get_token ();
3343 ASSERT_EQ (tok->type, CPP_STRING);
3344
3345 if (!should_have_column_data_p (line_table->highest_location))
3346 return;
3347
3348 /* Verify ranges of individual characters. */
3349 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3350 for (int i = 0; i < 131; i++)
3351 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3352 i, 2, 7 + i, 7 + i);
3353 }
3354
3355 /* Test of locations within a raw string that doesn't contain a newline. */
3356
3357 static void
3358 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3359 {
3360 /* .....................00.0000000111111111122.
3361 .....................12.3456789012345678901. */
3362 const char *content = ("R\"foo(0123456789)foo\"\n");
3363 lexer_test test (case_, content, NULL);
3364
3365 /* Verify that we get the expected token back. */
3366 const cpp_token *tok = test.get_token ();
3367 ASSERT_EQ (tok->type, CPP_STRING);
3368
3369 /* Verify that cpp_interpret_string works. */
3370 cpp_string dst_string;
3371 const enum cpp_ttype type = CPP_STRING;
3372 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3373 &dst_string, type);
3374 ASSERT_TRUE (result);
3375 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3376 free (const_cast <unsigned char *> (dst_string.text));
3377
3378 if (!should_have_column_data_p (line_table->highest_location))
3379 return;
3380
3381 /* 0-9, plus the nil terminator. */
3382 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3383 for (int i = 0; i < 11; i++)
3384 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3385 i, 1, 7 + i, 7 + i);
3386 }
3387
3388 /* Test of locations within a raw string that contains a newline. */
3389
3390 static void
3391 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3392 {
3393 /* .....................00.0000.
3394 .....................12.3456. */
3395 const char *content = ("R\"foo(\n"
3396 /* .....................00000.
3397 .....................12345. */
3398 "hello\n"
3399 "world\n"
3400 /* .....................00000.
3401 .....................12345. */
3402 ")foo\"\n");
3403 lexer_test test (case_, content, NULL);
3404
3405 /* Verify that we get the expected token back. */
3406 const cpp_token *tok = test.get_token ();
3407 ASSERT_EQ (tok->type, CPP_STRING);
3408
3409 /* Verify that cpp_interpret_string works. */
3410 cpp_string dst_string;
3411 const enum cpp_ttype type = CPP_STRING;
3412 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3413 &dst_string, type);
3414 ASSERT_TRUE (result);
3415 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3416 free (const_cast <unsigned char *> (dst_string.text));
3417
3418 if (!should_have_column_data_p (line_table->highest_location))
3419 return;
3420
3421 /* Currently we don't support locations within raw strings that
3422 contain newlines. */
3423 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3424 "range endpoints are on different lines");
3425 }
3426
3427 /* Test of parsing an unterminated raw string. */
3428
3429 static void
3430 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3431 {
3432 const char *content = "R\"ouch()ouCh\" /* etc */";
3433
3434 lexer_diagnostic_sink diagnostics;
3435 lexer_test test (case_, content, &diagnostics);
3436 test.m_implicitly_expect_EOF = false;
3437
3438 /* Attempt to parse the raw string. */
3439 const cpp_token *tok = test.get_token ();
3440 ASSERT_EQ (tok->type, CPP_EOF);
3441
3442 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3443 /* We expect the message "unterminated raw string"
3444 in the "cpplib" translation domain.
3445 It's not clear that dgettext is available on all supported hosts,
3446 so this assertion is commented-out for now.
3447 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3448 diagnostics.m_diagnostics[0]);
3449 */
3450 }
3451
3452 /* Test of lexing char constants. */
3453
3454 static void
3455 test_lexer_char_constants (const line_table_case &case_)
3456 {
3457 /* Various char constants.
3458 .....................0000000001111111111.22222222223.
3459 .....................1234567890123456789.01234567890. */
3460 const char *content = (" 'a'\n"
3461 " u'a'\n"
3462 " U'a'\n"
3463 " L'a'\n"
3464 " 'abc'\n");
3465 lexer_test test (case_, content, NULL);
3466
3467 /* Verify that we get the expected tokens back. */
3468 /* 'a'. */
3469 const cpp_token *tok = test.get_token ();
3470 ASSERT_EQ (tok->type, CPP_CHAR);
3471 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3472
3473 unsigned int chars_seen;
3474 int unsignedp;
3475 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3476 &chars_seen, &unsignedp);
3477 ASSERT_EQ (cc, 'a');
3478 ASSERT_EQ (chars_seen, 1);
3479
3480 /* u'a'. */
3481 tok = test.get_token ();
3482 ASSERT_EQ (tok->type, CPP_CHAR16);
3483 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3484
3485 /* U'a'. */
3486 tok = test.get_token ();
3487 ASSERT_EQ (tok->type, CPP_CHAR32);
3488 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3489
3490 /* L'a'. */
3491 tok = test.get_token ();
3492 ASSERT_EQ (tok->type, CPP_WCHAR);
3493 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3494
3495 /* 'abc' (c-char-sequence). */
3496 tok = test.get_token ();
3497 ASSERT_EQ (tok->type, CPP_CHAR);
3498 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3499 }
3500 /* A table of interesting location_t values, giving one axis of our test
3501 matrix. */
3502
3503 static const location_t boundary_locations[] = {
3504 /* Zero means "don't override the default values for a new line_table". */
3505 0,
3506
3507 /* An arbitrary non-zero value that isn't close to one of
3508 the boundary values below. */
3509 0x10000,
3510
3511 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3512 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3513 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3514 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3515 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3516 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3517
3518 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3519 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3520 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3521 LINE_MAP_MAX_LOCATION_WITH_COLS,
3522 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3523 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3524 };
3525
3526 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3527
3528 void
3529 for_each_line_table_case (void (*testcase) (const line_table_case &))
3530 {
3531 /* As noted above in the description of struct line_table_case,
3532 we want to explore a test matrix of interesting line_table
3533 situations, running various selftests for each case within the
3534 matrix. */
3535
3536 /* Run all tests with:
3537 (a) line_table->default_range_bits == 0, and
3538 (b) line_table->default_range_bits == 5. */
3539 int num_cases_tested = 0;
3540 for (int default_range_bits = 0; default_range_bits <= 5;
3541 default_range_bits += 5)
3542 {
3543 /* ...and use each of the "interesting" location values as
3544 the starting location within line_table. */
3545 const int num_boundary_locations
3546 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3547 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3548 {
3549 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3550
3551 testcase (c);
3552
3553 num_cases_tested++;
3554 }
3555 }
3556
3557 /* Verify that we fully covered the test matrix. */
3558 ASSERT_EQ (num_cases_tested, 2 * 12);
3559 }
3560
3561 /* Verify that when presented with a consecutive pair of locations with
3562 a very large line offset, we don't attempt to consolidate them into
3563 a single ordinary linemap where the line offsets within the line map
3564 would lead to overflow (PR lto/88147). */
3565
3566 static void
3567 test_line_offset_overflow ()
3568 {
3569 line_table_test ltt (line_table_case (5, 0));
3570
3571 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3572 linemap_line_start (line_table, 1, 100);
3573 location_t loc_a = linemap_line_start (line_table, 2578, 255);
3574 assert_loceq ("foo.c", 2578, 0, loc_a);
3575
3576 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3577 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3578 ASSERT_EQ (ordmap_a->m_range_bits, 5);
3579
3580 location_t loc_b = linemap_line_start (line_table, 404198, 512);
3581 assert_loceq ("foo.c", 404198, 0, loc_b);
3582
3583 /* We should have started a new linemap, rather than attempting to store
3584 a very large line offset. */
3585 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3586 ASSERT_NE (ordmap_a, ordmap_b);
3587 }
3588
3589 /* Run all of the selftests within this file. */
3590
3591 void
3592 input_c_tests ()
3593 {
3594 test_linenum_comparisons ();
3595 test_should_have_column_data_p ();
3596 test_unknown_location ();
3597 test_builtins ();
3598 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3599
3600 for_each_line_table_case (test_accessing_ordinary_linemaps);
3601 for_each_line_table_case (test_lexer);
3602 for_each_line_table_case (test_lexer_string_locations_simple);
3603 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3604 for_each_line_table_case (test_lexer_string_locations_hex);
3605 for_each_line_table_case (test_lexer_string_locations_oct);
3606 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3607 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3608 for_each_line_table_case (test_lexer_string_locations_ucn4);
3609 for_each_line_table_case (test_lexer_string_locations_ucn8);
3610 for_each_line_table_case (test_lexer_string_locations_wide_string);
3611 for_each_line_table_case (test_lexer_string_locations_string16);
3612 for_each_line_table_case (test_lexer_string_locations_string32);
3613 for_each_line_table_case (test_lexer_string_locations_u8);
3614 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3615 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3616 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3617 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3618 for_each_line_table_case (test_lexer_string_locations_macro);
3619 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3620 for_each_line_table_case (test_lexer_string_locations_non_string);
3621 for_each_line_table_case (test_lexer_string_locations_long_line);
3622 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3623 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3624 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3625 for_each_line_table_case (test_lexer_char_constants);
3626
3627 test_reading_source_line ();
3628
3629 test_line_offset_overflow ();
3630 }
3631
3632 } // namespace selftest
3633
3634 #endif /* CHECKING_P */