]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/input.c
PR fortran/95090 - ICE: identifier overflow
[thirdparty/gcc.git] / gcc / input.c
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2020 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic.h"
25 #include "diagnostic-core.h"
26 #include "selftest.h"
27 #include "cpplib.h"
28
29 #ifndef HAVE_ICONV
30 #define HAVE_ICONV 0
31 #endif
32
33 /* This is a cache used by get_next_line to store the content of a
34 file to be searched for file lines. */
35 class fcache
36 {
37 public:
38 /* These are information used to store a line boundary. */
39 class line_info
40 {
41 public:
42 /* The line number. It starts from 1. */
43 size_t line_num;
44
45 /* The position (byte count) of the beginning of the line,
46 relative to the file data pointer. This starts at zero. */
47 size_t start_pos;
48
49 /* The position (byte count) of the last byte of the line. This
50 normally points to the '\n' character, or to one byte after the
51 last byte of the file, if the file doesn't contain a '\n'
52 character. */
53 size_t end_pos;
54
55 line_info (size_t l, size_t s, size_t e)
56 : line_num (l), start_pos (s), end_pos (e)
57 {}
58
59 line_info ()
60 :line_num (0), start_pos (0), end_pos (0)
61 {}
62 };
63
64 /* The number of time this file has been accessed. This is used
65 to designate which file cache to evict from the cache
66 array. */
67 unsigned use_count;
68
69 /* The file_path is the key for identifying a particular file in
70 the cache.
71 For libcpp-using code, the underlying buffer for this field is
72 owned by the corresponding _cpp_file within the cpp_reader. */
73 const char *file_path;
74
75 FILE *fp;
76
77 /* This points to the content of the file that we've read so
78 far. */
79 char *data;
80
81 /* The size of the DATA array above.*/
82 size_t size;
83
84 /* The number of bytes read from the underlying file so far. This
85 must be less (or equal) than SIZE above. */
86 size_t nb_read;
87
88 /* The index of the beginning of the current line. */
89 size_t line_start_idx;
90
91 /* The number of the previous line read. This starts at 1. Zero
92 means we've read no line so far. */
93 size_t line_num;
94
95 /* This is the total number of lines of the current file. At the
96 moment, we try to get this information from the line map
97 subsystem. Note that this is just a hint. When using the C++
98 front-end, this hint is correct because the input file is then
99 completely tokenized before parsing starts; so the line map knows
100 the number of lines before compilation really starts. For e.g,
101 the C front-end, it can happen that we start emitting diagnostics
102 before the line map has seen the end of the file. */
103 size_t total_lines;
104
105 /* Could this file be missing a trailing newline on its final line?
106 Initially true (to cope with empty files), set to true/false
107 as each line is read. */
108 bool missing_trailing_newline;
109
110 /* This is a record of the beginning and end of the lines we've seen
111 while reading the file. This is useful to avoid walking the data
112 from the beginning when we are asked to read a line that is
113 before LINE_START_IDX above. Note that the maximum size of this
114 record is fcache_line_record_size, so that the memory consumption
115 doesn't explode. We thus scale total_lines down to
116 fcache_line_record_size. */
117 vec<line_info, va_heap> line_record;
118
119 fcache ();
120 ~fcache ();
121 };
122
123 /* Current position in real source file. */
124
125 location_t input_location = UNKNOWN_LOCATION;
126
127 class line_maps *line_table;
128
129 /* A stashed copy of "line_table" for use by selftest::line_table_test.
130 This needs to be a global so that it can be a GC root, and thus
131 prevent the stashed copy from being garbage-collected if the GC runs
132 during a line_table_test. */
133
134 class line_maps *saved_line_table;
135
136 static fcache *fcache_tab;
137 static const size_t fcache_tab_size = 16;
138 static const size_t fcache_buffer_size = 4 * 1024;
139 static const size_t fcache_line_record_size = 100;
140
141 /* Expand the source location LOC into a human readable location. If
142 LOC resolves to a builtin location, the file name of the readable
143 location is set to the string "<built-in>". If EXPANSION_POINT_P is
144 TRUE and LOC is virtual, then it is resolved to the expansion
145 point of the involved macro. Otherwise, it is resolved to the
146 spelling location of the token.
147
148 When resolving to the spelling location of the token, if the
149 resulting location is for a built-in location (that is, it has no
150 associated line/column) in the context of a macro expansion, the
151 returned location is the first one (while unwinding the macro
152 location towards its expansion point) that is in real source
153 code.
154
155 ASPECT controls which part of the location to use. */
156
157 static expanded_location
158 expand_location_1 (location_t loc,
159 bool expansion_point_p,
160 enum location_aspect aspect)
161 {
162 expanded_location xloc;
163 const line_map_ordinary *map;
164 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
165 tree block = NULL;
166
167 if (IS_ADHOC_LOC (loc))
168 {
169 block = LOCATION_BLOCK (loc);
170 loc = LOCATION_LOCUS (loc);
171 }
172
173 memset (&xloc, 0, sizeof (xloc));
174
175 if (loc >= RESERVED_LOCATION_COUNT)
176 {
177 if (!expansion_point_p)
178 {
179 /* We want to resolve LOC to its spelling location.
180
181 But if that spelling location is a reserved location that
182 appears in the context of a macro expansion (like for a
183 location for a built-in token), let's consider the first
184 location (toward the expansion point) that is not reserved;
185 that is, the first location that is in real source code. */
186 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
187 loc, NULL);
188 lrk = LRK_SPELLING_LOCATION;
189 }
190 loc = linemap_resolve_location (line_table, loc, lrk, &map);
191
192 /* loc is now either in an ordinary map, or is a reserved location.
193 If it is a compound location, the caret is in a spelling location,
194 but the start/finish might still be a virtual location.
195 Depending of what the caller asked for, we may need to recurse
196 one level in order to resolve any virtual locations in the
197 end-points. */
198 switch (aspect)
199 {
200 default:
201 gcc_unreachable ();
202 /* Fall through. */
203 case LOCATION_ASPECT_CARET:
204 break;
205 case LOCATION_ASPECT_START:
206 {
207 location_t start = get_start (loc);
208 if (start != loc)
209 return expand_location_1 (start, expansion_point_p, aspect);
210 }
211 break;
212 case LOCATION_ASPECT_FINISH:
213 {
214 location_t finish = get_finish (loc);
215 if (finish != loc)
216 return expand_location_1 (finish, expansion_point_p, aspect);
217 }
218 break;
219 }
220 xloc = linemap_expand_location (line_table, map, loc);
221 }
222
223 xloc.data = block;
224 if (loc <= BUILTINS_LOCATION)
225 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
226
227 return xloc;
228 }
229
230 /* Initialize the set of cache used for files accessed by caret
231 diagnostic. */
232
233 static void
234 diagnostic_file_cache_init (void)
235 {
236 if (fcache_tab == NULL)
237 fcache_tab = new fcache[fcache_tab_size];
238 }
239
240 /* Free the resources used by the set of cache used for files accessed
241 by caret diagnostic. */
242
243 void
244 diagnostic_file_cache_fini (void)
245 {
246 if (fcache_tab)
247 {
248 delete [] (fcache_tab);
249 fcache_tab = NULL;
250 }
251 }
252
253 /* Return the total lines number that have been read so far by the
254 line map (in the preprocessor) so far. For languages like C++ that
255 entirely preprocess the input file before starting to parse, this
256 equals the actual number of lines of the file. */
257
258 static size_t
259 total_lines_num (const char *file_path)
260 {
261 size_t r = 0;
262 location_t l = 0;
263 if (linemap_get_file_highest_location (line_table, file_path, &l))
264 {
265 gcc_assert (l >= RESERVED_LOCATION_COUNT);
266 expanded_location xloc = expand_location (l);
267 r = xloc.line;
268 }
269 return r;
270 }
271
272 /* Lookup the cache used for the content of a given file accessed by
273 caret diagnostic. Return the found cached file, or NULL if no
274 cached file was found. */
275
276 static fcache*
277 lookup_file_in_cache_tab (const char *file_path)
278 {
279 if (file_path == NULL)
280 return NULL;
281
282 diagnostic_file_cache_init ();
283
284 /* This will contain the found cached file. */
285 fcache *r = NULL;
286 for (unsigned i = 0; i < fcache_tab_size; ++i)
287 {
288 fcache *c = &fcache_tab[i];
289 if (c->file_path && !strcmp (c->file_path, file_path))
290 {
291 ++c->use_count;
292 r = c;
293 }
294 }
295
296 if (r)
297 ++r->use_count;
298
299 return r;
300 }
301
302 /* Purge any mention of FILENAME from the cache of files used for
303 printing source code. For use in selftests when working
304 with tempfiles. */
305
306 void
307 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
308 {
309 gcc_assert (file_path);
310
311 fcache *r = lookup_file_in_cache_tab (file_path);
312 if (!r)
313 /* Not found. */
314 return;
315
316 r->file_path = NULL;
317 if (r->fp)
318 fclose (r->fp);
319 r->fp = NULL;
320 r->nb_read = 0;
321 r->line_start_idx = 0;
322 r->line_num = 0;
323 r->line_record.truncate (0);
324 r->use_count = 0;
325 r->total_lines = 0;
326 r->missing_trailing_newline = true;
327 }
328
329 /* Return the file cache that has been less used, recently, or the
330 first empty one. If HIGHEST_USE_COUNT is non-null,
331 *HIGHEST_USE_COUNT is set to the highest use count of the entries
332 in the cache table. */
333
334 static fcache*
335 evicted_cache_tab_entry (unsigned *highest_use_count)
336 {
337 diagnostic_file_cache_init ();
338
339 fcache *to_evict = &fcache_tab[0];
340 unsigned huc = to_evict->use_count;
341 for (unsigned i = 1; i < fcache_tab_size; ++i)
342 {
343 fcache *c = &fcache_tab[i];
344 bool c_is_empty = (c->file_path == NULL);
345
346 if (c->use_count < to_evict->use_count
347 || (to_evict->file_path && c_is_empty))
348 /* We evict C because it's either an entry with a lower use
349 count or one that is empty. */
350 to_evict = c;
351
352 if (huc < c->use_count)
353 huc = c->use_count;
354
355 if (c_is_empty)
356 /* We've reached the end of the cache; subsequent elements are
357 all empty. */
358 break;
359 }
360
361 if (highest_use_count)
362 *highest_use_count = huc;
363
364 return to_evict;
365 }
366
367 /* Create the cache used for the content of a given file to be
368 accessed by caret diagnostic. This cache is added to an array of
369 cache and can be retrieved by lookup_file_in_cache_tab. This
370 function returns the created cache. Note that only the last
371 fcache_tab_size files are cached. */
372
373 static fcache*
374 add_file_to_cache_tab (const char *file_path)
375 {
376
377 FILE *fp = fopen (file_path, "r");
378 if (fp == NULL)
379 return NULL;
380
381 unsigned highest_use_count = 0;
382 fcache *r = evicted_cache_tab_entry (&highest_use_count);
383 r->file_path = file_path;
384 if (r->fp)
385 fclose (r->fp);
386 r->fp = fp;
387 r->nb_read = 0;
388 r->line_start_idx = 0;
389 r->line_num = 0;
390 r->line_record.truncate (0);
391 /* Ensure that this cache entry doesn't get evicted next time
392 add_file_to_cache_tab is called. */
393 r->use_count = ++highest_use_count;
394 r->total_lines = total_lines_num (file_path);
395 r->missing_trailing_newline = true;
396
397 return r;
398 }
399
400 /* Lookup the cache used for the content of a given file accessed by
401 caret diagnostic. If no cached file was found, create a new cache
402 for this file, add it to the array of cached file and return
403 it. */
404
405 static fcache*
406 lookup_or_add_file_to_cache_tab (const char *file_path)
407 {
408 fcache *r = lookup_file_in_cache_tab (file_path);
409 if (r == NULL)
410 r = add_file_to_cache_tab (file_path);
411 return r;
412 }
413
414 /* Default constructor for a cache of file used by caret
415 diagnostic. */
416
417 fcache::fcache ()
418 : use_count (0), file_path (NULL), fp (NULL), data (0),
419 size (0), nb_read (0), line_start_idx (0), line_num (0),
420 total_lines (0), missing_trailing_newline (true)
421 {
422 line_record.create (0);
423 }
424
425 /* Destructor for a cache of file used by caret diagnostic. */
426
427 fcache::~fcache ()
428 {
429 if (fp)
430 {
431 fclose (fp);
432 fp = NULL;
433 }
434 if (data)
435 {
436 XDELETEVEC (data);
437 data = 0;
438 }
439 line_record.release ();
440 }
441
442 /* Returns TRUE iff the cache would need to be filled with data coming
443 from the file. That is, either the cache is empty or full or the
444 current line is empty. Note that if the cache is full, it would
445 need to be extended and filled again. */
446
447 static bool
448 needs_read (fcache *c)
449 {
450 return (c->nb_read == 0
451 || c->nb_read == c->size
452 || (c->line_start_idx >= c->nb_read - 1));
453 }
454
455 /* Return TRUE iff the cache is full and thus needs to be
456 extended. */
457
458 static bool
459 needs_grow (fcache *c)
460 {
461 return c->nb_read == c->size;
462 }
463
464 /* Grow the cache if it needs to be extended. */
465
466 static void
467 maybe_grow (fcache *c)
468 {
469 if (!needs_grow (c))
470 return;
471
472 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
473 c->data = XRESIZEVEC (char, c->data, size);
474 c->size = size;
475 }
476
477 /* Read more data into the cache. Extends the cache if need be.
478 Returns TRUE iff new data could be read. */
479
480 static bool
481 read_data (fcache *c)
482 {
483 if (feof (c->fp) || ferror (c->fp))
484 return false;
485
486 maybe_grow (c);
487
488 char * from = c->data + c->nb_read;
489 size_t to_read = c->size - c->nb_read;
490 size_t nb_read = fread (from, 1, to_read, c->fp);
491
492 if (ferror (c->fp))
493 return false;
494
495 c->nb_read += nb_read;
496 return !!nb_read;
497 }
498
499 /* Read new data iff the cache needs to be filled with more data
500 coming from the file FP. Return TRUE iff the cache was filled with
501 mode data. */
502
503 static bool
504 maybe_read_data (fcache *c)
505 {
506 if (!needs_read (c))
507 return false;
508 return read_data (c);
509 }
510
511 /* Read a new line from file FP, using C as a cache for the data
512 coming from the file. Upon successful completion, *LINE is set to
513 the beginning of the line found. *LINE points directly in the
514 line cache and is only valid until the next call of get_next_line.
515 *LINE_LEN is set to the length of the line. Note that the line
516 does not contain any terminal delimiter. This function returns
517 true if some data was read or process from the cache, false
518 otherwise. Note that subsequent calls to get_next_line might
519 make the content of *LINE invalid. */
520
521 static bool
522 get_next_line (fcache *c, char **line, ssize_t *line_len)
523 {
524 /* Fill the cache with data to process. */
525 maybe_read_data (c);
526
527 size_t remaining_size = c->nb_read - c->line_start_idx;
528 if (remaining_size == 0)
529 /* There is no more data to process. */
530 return false;
531
532 char *line_start = c->data + c->line_start_idx;
533
534 char *next_line_start = NULL;
535 size_t len = 0;
536 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
537 if (line_end == NULL)
538 {
539 /* We haven't found the end-of-line delimiter in the cache.
540 Fill the cache with more data from the file and look for the
541 '\n'. */
542 while (maybe_read_data (c))
543 {
544 line_start = c->data + c->line_start_idx;
545 remaining_size = c->nb_read - c->line_start_idx;
546 line_end = (char *) memchr (line_start, '\n', remaining_size);
547 if (line_end != NULL)
548 {
549 next_line_start = line_end + 1;
550 break;
551 }
552 }
553 if (line_end == NULL)
554 {
555 /* We've loadded all the file into the cache and still no
556 '\n'. Let's say the line ends up at one byte passed the
557 end of the file. This is to stay consistent with the case
558 of when the line ends up with a '\n' and line_end points to
559 that terminal '\n'. That consistency is useful below in
560 the len calculation. */
561 line_end = c->data + c->nb_read ;
562 c->missing_trailing_newline = true;
563 }
564 else
565 c->missing_trailing_newline = false;
566 }
567 else
568 {
569 next_line_start = line_end + 1;
570 c->missing_trailing_newline = false;
571 }
572
573 if (ferror (c->fp))
574 return false;
575
576 /* At this point, we've found the end of the of line. It either
577 points to the '\n' or to one byte after the last byte of the
578 file. */
579 gcc_assert (line_end != NULL);
580
581 len = line_end - line_start;
582
583 if (c->line_start_idx < c->nb_read)
584 *line = line_start;
585
586 ++c->line_num;
587
588 /* Before we update our line record, make sure the hint about the
589 total number of lines of the file is correct. If it's not, then
590 we give up recording line boundaries from now on. */
591 bool update_line_record = true;
592 if (c->line_num > c->total_lines)
593 update_line_record = false;
594
595 /* Now update our line record so that re-reading lines from the
596 before c->line_start_idx is faster. */
597 if (update_line_record
598 && c->line_record.length () < fcache_line_record_size)
599 {
600 /* If the file lines fits in the line record, we just record all
601 its lines ...*/
602 if (c->total_lines <= fcache_line_record_size
603 && c->line_num > c->line_record.length ())
604 c->line_record.safe_push (fcache::line_info (c->line_num,
605 c->line_start_idx,
606 line_end - c->data));
607 else if (c->total_lines > fcache_line_record_size)
608 {
609 /* ... otherwise, we just scale total_lines down to
610 (fcache_line_record_size lines. */
611 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
612 if (c->line_record.length () == 0
613 || n >= c->line_record.length ())
614 c->line_record.safe_push (fcache::line_info (c->line_num,
615 c->line_start_idx,
616 line_end - c->data));
617 }
618 }
619
620 /* Update c->line_start_idx so that it points to the next line to be
621 read. */
622 if (next_line_start)
623 c->line_start_idx = next_line_start - c->data;
624 else
625 /* We didn't find any terminal '\n'. Let's consider that the end
626 of line is the end of the data in the cache. The next
627 invocation of get_next_line will either read more data from the
628 underlying file or return false early because we've reached the
629 end of the file. */
630 c->line_start_idx = c->nb_read;
631
632 *line_len = len;
633
634 return true;
635 }
636
637 /* Consume the next bytes coming from the cache (or from its
638 underlying file if there are remaining unread bytes in the file)
639 until we reach the next end-of-line (or end-of-file). There is no
640 copying from the cache involved. Return TRUE upon successful
641 completion. */
642
643 static bool
644 goto_next_line (fcache *cache)
645 {
646 char *l;
647 ssize_t len;
648
649 return get_next_line (cache, &l, &len);
650 }
651
652 /* Read an arbitrary line number LINE_NUM from the file cached in C.
653 If the line was read successfully, *LINE points to the beginning
654 of the line in the file cache and *LINE_LEN is the length of the
655 line. *LINE is not nul-terminated, but may contain zero bytes.
656 *LINE is only valid until the next call of read_line_num.
657 This function returns bool if a line was read. */
658
659 static bool
660 read_line_num (fcache *c, size_t line_num,
661 char **line, ssize_t *line_len)
662 {
663 gcc_assert (line_num > 0);
664
665 if (line_num <= c->line_num)
666 {
667 /* We've been asked to read lines that are before c->line_num.
668 So lets use our line record (if it's not empty) to try to
669 avoid re-reading the file from the beginning again. */
670
671 if (c->line_record.is_empty ())
672 {
673 c->line_start_idx = 0;
674 c->line_num = 0;
675 }
676 else
677 {
678 fcache::line_info *i = NULL;
679 if (c->total_lines <= fcache_line_record_size)
680 {
681 /* In languages where the input file is not totally
682 preprocessed up front, the c->total_lines hint
683 can be smaller than the number of lines of the
684 file. In that case, only the first
685 c->total_lines have been recorded.
686
687 Otherwise, the first c->total_lines we've read have
688 their start/end recorded here. */
689 i = (line_num <= c->total_lines)
690 ? &c->line_record[line_num - 1]
691 : &c->line_record[c->total_lines - 1];
692 gcc_assert (i->line_num <= line_num);
693 }
694 else
695 {
696 /* So the file had more lines than our line record
697 size. Thus the number of lines we've recorded has
698 been scaled down to fcache_line_reacord_size. Let's
699 pick the start/end of the recorded line that is
700 closest to line_num. */
701 size_t n = (line_num <= c->total_lines)
702 ? line_num * fcache_line_record_size / c->total_lines
703 : c ->line_record.length () - 1;
704 if (n < c->line_record.length ())
705 {
706 i = &c->line_record[n];
707 gcc_assert (i->line_num <= line_num);
708 }
709 }
710
711 if (i && i->line_num == line_num)
712 {
713 /* We have the start/end of the line. */
714 *line = c->data + i->start_pos;
715 *line_len = i->end_pos - i->start_pos;
716 return true;
717 }
718
719 if (i)
720 {
721 c->line_start_idx = i->start_pos;
722 c->line_num = i->line_num - 1;
723 }
724 else
725 {
726 c->line_start_idx = 0;
727 c->line_num = 0;
728 }
729 }
730 }
731
732 /* Let's walk from line c->line_num up to line_num - 1, without
733 copying any line. */
734 while (c->line_num < line_num - 1)
735 if (!goto_next_line (c))
736 return false;
737
738 /* The line we want is the next one. Let's read and copy it back to
739 the caller. */
740 return get_next_line (c, line, line_len);
741 }
742
743 /* Return the physical source line that corresponds to FILE_PATH/LINE.
744 The line is not nul-terminated. The returned pointer is only
745 valid until the next call of location_get_source_line.
746 Note that the line can contain several null characters,
747 so the returned value's length has the actual length of the line.
748 If the function fails, a NULL char_span is returned. */
749
750 char_span
751 location_get_source_line (const char *file_path, int line)
752 {
753 char *buffer = NULL;
754 ssize_t len;
755
756 if (line == 0)
757 return char_span (NULL, 0);
758
759 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
760 if (c == NULL)
761 return char_span (NULL, 0);
762
763 bool read = read_line_num (c, line, &buffer, &len);
764 if (!read)
765 return char_span (NULL, 0);
766
767 return char_span (buffer, len);
768 }
769
770 /* Determine if FILE_PATH missing a trailing newline on its final line.
771 Only valid to call once all of the file has been loaded, by
772 requesting a line number beyond the end of the file. */
773
774 bool
775 location_missing_trailing_newline (const char *file_path)
776 {
777 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
778 if (c == NULL)
779 return false;
780
781 return c->missing_trailing_newline;
782 }
783
784 /* Test if the location originates from the spelling location of a
785 builtin-tokens. That is, return TRUE if LOC is a (possibly
786 virtual) location of a built-in token that appears in the expansion
787 list of a macro. Please note that this function also works on
788 tokens that result from built-in tokens. For instance, the
789 function would return true if passed a token "4" that is the result
790 of the expansion of the built-in __LINE__ macro. */
791 bool
792 is_location_from_builtin_token (location_t loc)
793 {
794 const line_map_ordinary *map = NULL;
795 loc = linemap_resolve_location (line_table, loc,
796 LRK_SPELLING_LOCATION, &map);
797 return loc == BUILTINS_LOCATION;
798 }
799
800 /* Expand the source location LOC into a human readable location. If
801 LOC is virtual, it resolves to the expansion point of the involved
802 macro. If LOC resolves to a builtin location, the file name of the
803 readable location is set to the string "<built-in>". */
804
805 expanded_location
806 expand_location (location_t loc)
807 {
808 return expand_location_1 (loc, /*expansion_point_p=*/true,
809 LOCATION_ASPECT_CARET);
810 }
811
812 /* Expand the source location LOC into a human readable location. If
813 LOC is virtual, it resolves to the expansion location of the
814 relevant macro. If LOC resolves to a builtin location, the file
815 name of the readable location is set to the string
816 "<built-in>". */
817
818 expanded_location
819 expand_location_to_spelling_point (location_t loc,
820 enum location_aspect aspect)
821 {
822 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
823 }
824
825 /* The rich_location class within libcpp requires a way to expand
826 location_t instances, and relies on the client code
827 providing a symbol named
828 linemap_client_expand_location_to_spelling_point
829 to do this.
830
831 This is the implementation for libcommon.a (all host binaries),
832 which simply calls into expand_location_1. */
833
834 expanded_location
835 linemap_client_expand_location_to_spelling_point (location_t loc,
836 enum location_aspect aspect)
837 {
838 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
839 }
840
841
842 /* If LOCATION is in a system header and if it is a virtual location for
843 a token coming from the expansion of a macro, unwind it to the
844 location of the expansion point of the macro. Otherwise, just return
845 LOCATION.
846
847 This is used for instance when we want to emit diagnostics about a
848 token that may be located in a macro that is itself defined in a
849 system header, for example, for the NULL macro. In such a case, if
850 LOCATION were passed directly to diagnostic functions such as
851 warning_at, the diagnostic would be suppressed (unless
852 -Wsystem-headers). */
853
854 location_t
855 expansion_point_location_if_in_system_header (location_t location)
856 {
857 if (in_system_header_at (location))
858 location = linemap_resolve_location (line_table, location,
859 LRK_MACRO_EXPANSION_POINT,
860 NULL);
861 return location;
862 }
863
864 /* If LOCATION is a virtual location for a token coming from the expansion
865 of a macro, unwind to the location of the expansion point of the macro. */
866
867 location_t
868 expansion_point_location (location_t location)
869 {
870 return linemap_resolve_location (line_table, location,
871 LRK_MACRO_EXPANSION_POINT, NULL);
872 }
873
874 /* Construct a location with caret at CARET, ranging from START to
875 finish e.g.
876
877 11111111112
878 12345678901234567890
879 522
880 523 return foo + bar;
881 ~~~~^~~~~
882 524
883
884 The location's caret is at the "+", line 523 column 15, but starts
885 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
886 of "bar" at column 19. */
887
888 location_t
889 make_location (location_t caret, location_t start, location_t finish)
890 {
891 location_t pure_loc = get_pure_location (caret);
892 source_range src_range;
893 src_range.m_start = get_start (start);
894 src_range.m_finish = get_finish (finish);
895 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
896 pure_loc,
897 src_range,
898 NULL);
899 return combined_loc;
900 }
901
902 /* Same as above, but taking a source range rather than two locations. */
903
904 location_t
905 make_location (location_t caret, source_range src_range)
906 {
907 location_t pure_loc = get_pure_location (caret);
908 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
909 }
910
911 /* An expanded_location stores the column in byte units. This function
912 converts that column to display units. That requires reading the associated
913 source line in order to calculate the display width. If that cannot be done
914 for any reason, then returns the byte column as a fallback. */
915 int
916 location_compute_display_column (expanded_location exploc)
917 {
918 if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
919 return exploc.column;
920 char_span line = location_get_source_line (exploc.file, exploc.line);
921 /* If line is NULL, this function returns exploc.column which is the
922 desired fallback. */
923 return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
924 exploc.column);
925 }
926
927 /* Dump statistics to stderr about the memory usage of the line_table
928 set of line maps. This also displays some statistics about macro
929 expansion. */
930
931 void
932 dump_line_table_statistics (void)
933 {
934 struct linemap_stats s;
935 long total_used_map_size,
936 macro_maps_size,
937 total_allocated_map_size;
938
939 memset (&s, 0, sizeof (s));
940
941 linemap_get_statistics (line_table, &s);
942
943 macro_maps_size = s.macro_maps_used_size
944 + s.macro_maps_locations_size;
945
946 total_allocated_map_size = s.ordinary_maps_allocated_size
947 + s.macro_maps_allocated_size
948 + s.macro_maps_locations_size;
949
950 total_used_map_size = s.ordinary_maps_used_size
951 + s.macro_maps_used_size
952 + s.macro_maps_locations_size;
953
954 fprintf (stderr, "Number of expanded macros: %5ld\n",
955 s.num_expanded_macros);
956 if (s.num_expanded_macros != 0)
957 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
958 s.num_macro_tokens / s.num_expanded_macros);
959 fprintf (stderr,
960 "\nLine Table allocations during the "
961 "compilation process\n");
962 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
963 SIZE_AMOUNT (s.num_ordinary_maps_used));
964 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
965 SIZE_AMOUNT (s.ordinary_maps_used_size));
966 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
967 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
968 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
969 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
970 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
971 SIZE_AMOUNT (s.num_macro_maps_used));
972 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
973 SIZE_AMOUNT (s.macro_maps_used_size));
974 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
975 SIZE_AMOUNT (s.macro_maps_locations_size));
976 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
977 SIZE_AMOUNT (macro_maps_size));
978 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
979 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
980 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
981 SIZE_AMOUNT (total_allocated_map_size));
982 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
983 SIZE_AMOUNT (total_used_map_size));
984 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
985 SIZE_AMOUNT (s.adhoc_table_size));
986 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
987 SIZE_AMOUNT (s.adhoc_table_entries_used));
988 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
989 SIZE_AMOUNT (line_table->num_optimized_ranges));
990 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
991 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
992
993 fprintf (stderr, "\n");
994 }
995
996 /* Get location one beyond the final location in ordinary map IDX. */
997
998 static location_t
999 get_end_location (class line_maps *set, unsigned int idx)
1000 {
1001 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1002 return set->highest_location;
1003
1004 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1005 return MAP_START_LOCATION (next_map);
1006 }
1007
1008 /* Helper function for write_digit_row. */
1009
1010 static void
1011 write_digit (FILE *stream, int digit)
1012 {
1013 fputc ('0' + (digit % 10), stream);
1014 }
1015
1016 /* Helper function for dump_location_info.
1017 Write a row of numbers to STREAM, numbering a source line,
1018 giving the units, tens, hundreds etc of the column number. */
1019
1020 static void
1021 write_digit_row (FILE *stream, int indent,
1022 const line_map_ordinary *map,
1023 location_t loc, int max_col, int divisor)
1024 {
1025 fprintf (stream, "%*c", indent, ' ');
1026 fprintf (stream, "|");
1027 for (int column = 1; column < max_col; column++)
1028 {
1029 location_t column_loc = loc + (column << map->m_range_bits);
1030 write_digit (stream, column_loc / divisor);
1031 }
1032 fprintf (stream, "\n");
1033 }
1034
1035 /* Write a half-closed (START) / half-open (END) interval of
1036 location_t to STREAM. */
1037
1038 static void
1039 dump_location_range (FILE *stream,
1040 location_t start, location_t end)
1041 {
1042 fprintf (stream,
1043 " location_t interval: %u <= loc < %u\n",
1044 start, end);
1045 }
1046
1047 /* Write a labelled description of a half-closed (START) / half-open (END)
1048 interval of location_t to STREAM. */
1049
1050 static void
1051 dump_labelled_location_range (FILE *stream,
1052 const char *name,
1053 location_t start, location_t end)
1054 {
1055 fprintf (stream, "%s\n", name);
1056 dump_location_range (stream, start, end);
1057 fprintf (stream, "\n");
1058 }
1059
1060 /* Write a visualization of the locations in the line_table to STREAM. */
1061
1062 void
1063 dump_location_info (FILE *stream)
1064 {
1065 /* Visualize the reserved locations. */
1066 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1067 0, RESERVED_LOCATION_COUNT);
1068
1069 /* Visualize the ordinary line_map instances, rendering the sources. */
1070 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1071 {
1072 location_t end_location = get_end_location (line_table, idx);
1073 /* half-closed: doesn't include this one. */
1074
1075 const line_map_ordinary *map
1076 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1077 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1078 dump_location_range (stream,
1079 MAP_START_LOCATION (map), end_location);
1080 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1081 fprintf (stream, " starting at line: %i\n",
1082 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1083 fprintf (stream, " column and range bits: %i\n",
1084 map->m_column_and_range_bits);
1085 fprintf (stream, " column bits: %i\n",
1086 map->m_column_and_range_bits - map->m_range_bits);
1087 fprintf (stream, " range bits: %i\n",
1088 map->m_range_bits);
1089 const char * reason;
1090 switch (map->reason) {
1091 case LC_ENTER:
1092 reason = "LC_ENTER";
1093 break;
1094 case LC_LEAVE:
1095 reason = "LC_LEAVE";
1096 break;
1097 case LC_RENAME:
1098 reason = "LC_RENAME";
1099 break;
1100 case LC_RENAME_VERBATIM:
1101 reason = "LC_RENAME_VERBATIM";
1102 break;
1103 case LC_ENTER_MACRO:
1104 reason = "LC_RENAME_MACRO";
1105 break;
1106 default:
1107 reason = "Unknown";
1108 }
1109 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1110
1111 const line_map_ordinary *includer_map
1112 = linemap_included_from_linemap (line_table, map);
1113 fprintf (stream, " included from location: %d",
1114 linemap_included_from (map));
1115 if (includer_map) {
1116 fprintf (stream, " (in ordinary map %d)",
1117 int (includer_map - line_table->info_ordinary.maps));
1118 }
1119 fprintf (stream, "\n");
1120
1121 /* Render the span of source lines that this "map" covers. */
1122 for (location_t loc = MAP_START_LOCATION (map);
1123 loc < end_location;
1124 loc += (1 << map->m_range_bits) )
1125 {
1126 gcc_assert (pure_location_p (line_table, loc) );
1127
1128 expanded_location exploc
1129 = linemap_expand_location (line_table, map, loc);
1130
1131 if (exploc.column == 0)
1132 {
1133 /* Beginning of a new source line: draw the line. */
1134
1135 char_span line_text = location_get_source_line (exploc.file,
1136 exploc.line);
1137 if (!line_text)
1138 break;
1139 fprintf (stream,
1140 "%s:%3i|loc:%5i|%.*s\n",
1141 exploc.file, exploc.line,
1142 loc,
1143 (int)line_text.length (), line_text.get_buffer ());
1144
1145 /* "loc" is at column 0, which means "the whole line".
1146 Render the locations *within* the line, by underlining
1147 it, showing the location_t numeric values
1148 at each column. */
1149 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1150 if (max_col > line_text.length ())
1151 max_col = line_text.length () + 1;
1152
1153 int len_lnum = num_digits (exploc.line);
1154 if (len_lnum < 3)
1155 len_lnum = 3;
1156 int len_loc = num_digits (loc);
1157 if (len_loc < 5)
1158 len_loc = 5;
1159
1160 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1161
1162 /* Thousands. */
1163 if (end_location > 999)
1164 write_digit_row (stream, indent, map, loc, max_col, 1000);
1165
1166 /* Hundreds. */
1167 if (end_location > 99)
1168 write_digit_row (stream, indent, map, loc, max_col, 100);
1169
1170 /* Tens. */
1171 write_digit_row (stream, indent, map, loc, max_col, 10);
1172
1173 /* Units. */
1174 write_digit_row (stream, indent, map, loc, max_col, 1);
1175 }
1176 }
1177 fprintf (stream, "\n");
1178 }
1179
1180 /* Visualize unallocated values. */
1181 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1182 line_table->highest_location,
1183 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1184
1185 /* Visualize the macro line_map instances, rendering the sources. */
1186 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1187 {
1188 /* Each macro map that is allocated owns location_t values
1189 that are *lower* that the one before them.
1190 Hence it's meaningful to view them either in order of ascending
1191 source locations, or in order of ascending macro map index. */
1192 const bool ascending_location_ts = true;
1193 unsigned int idx = (ascending_location_ts
1194 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1195 : i);
1196 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1197 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1198 idx,
1199 linemap_map_get_macro_name (map),
1200 MACRO_MAP_NUM_MACRO_TOKENS (map));
1201 dump_location_range (stream,
1202 map->start_location,
1203 (map->start_location
1204 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1205 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1206 "expansion point is location %i",
1207 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1208 fprintf (stream, " map->start_location: %u\n",
1209 map->start_location);
1210
1211 fprintf (stream, " macro_locations:\n");
1212 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1213 {
1214 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1215 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1216
1217 /* linemap_add_macro_token encodes token numbers in an expansion
1218 by putting them after MAP_START_LOCATION. */
1219
1220 /* I'm typically seeing 4 uninitialized entries at the end of
1221 0xafafafaf.
1222 This appears to be due to macro.c:replace_args
1223 adding 2 extra args for padding tokens; presumably there may
1224 be a leading and/or trailing padding token injected,
1225 each for 2 more location slots.
1226 This would explain there being up to 4 location_ts slots
1227 that may be uninitialized. */
1228
1229 fprintf (stream, " %u: %u, %u\n",
1230 i,
1231 x,
1232 y);
1233 if (x == y)
1234 {
1235 if (x < MAP_START_LOCATION (map))
1236 inform (x, "token %u has %<x-location == y-location == %u%>",
1237 i, x);
1238 else
1239 fprintf (stream,
1240 "x-location == y-location == %u encodes token # %u\n",
1241 x, x - MAP_START_LOCATION (map));
1242 }
1243 else
1244 {
1245 inform (x, "token %u has %<x-location == %u%>", i, x);
1246 inform (x, "token %u has %<y-location == %u%>", i, y);
1247 }
1248 }
1249 fprintf (stream, "\n");
1250 }
1251
1252 /* It appears that MAX_LOCATION_T itself is never assigned to a
1253 macro map, presumably due to an off-by-one error somewhere
1254 between the logic in linemap_enter_macro and
1255 LINEMAPS_MACRO_LOWEST_LOCATION. */
1256 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1257 MAX_LOCATION_T,
1258 MAX_LOCATION_T + 1);
1259
1260 /* Visualize ad-hoc values. */
1261 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1262 MAX_LOCATION_T + 1, UINT_MAX);
1263 }
1264
1265 /* string_concat's constructor. */
1266
1267 string_concat::string_concat (int num, location_t *locs)
1268 : m_num (num)
1269 {
1270 m_locs = ggc_vec_alloc <location_t> (num);
1271 for (int i = 0; i < num; i++)
1272 m_locs[i] = locs[i];
1273 }
1274
1275 /* string_concat_db's constructor. */
1276
1277 string_concat_db::string_concat_db ()
1278 {
1279 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1280 }
1281
1282 /* Record that a string concatenation occurred, covering NUM
1283 string literal tokens. LOCS is an array of size NUM, containing the
1284 locations of the tokens. A copy of LOCS is taken. */
1285
1286 void
1287 string_concat_db::record_string_concatenation (int num, location_t *locs)
1288 {
1289 gcc_assert (num > 1);
1290 gcc_assert (locs);
1291
1292 location_t key_loc = get_key_loc (locs[0]);
1293
1294 string_concat *concat
1295 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1296 m_table->put (key_loc, concat);
1297 }
1298
1299 /* Determine if LOC was the location of the initial token of a
1300 concatenation of string literal tokens.
1301 If so, *OUT_NUM is written to with the number of tokens, and
1302 *OUT_LOCS with the location of an array of locations of the
1303 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1304 storage owned by the string_concat_db.
1305 Otherwise, return false. */
1306
1307 bool
1308 string_concat_db::get_string_concatenation (location_t loc,
1309 int *out_num,
1310 location_t **out_locs)
1311 {
1312 gcc_assert (out_num);
1313 gcc_assert (out_locs);
1314
1315 location_t key_loc = get_key_loc (loc);
1316
1317 string_concat **concat = m_table->get (key_loc);
1318 if (!concat)
1319 return false;
1320
1321 *out_num = (*concat)->m_num;
1322 *out_locs =(*concat)->m_locs;
1323 return true;
1324 }
1325
1326 /* Internal function. Canonicalize LOC into a form suitable for
1327 use as a key within the database, stripping away macro expansion,
1328 ad-hoc information, and range information, using the location of
1329 the start of LOC within an ordinary linemap. */
1330
1331 location_t
1332 string_concat_db::get_key_loc (location_t loc)
1333 {
1334 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1335 NULL);
1336
1337 loc = get_range_from_loc (line_table, loc).m_start;
1338
1339 return loc;
1340 }
1341
1342 /* Helper class for use within get_substring_ranges_for_loc.
1343 An vec of cpp_string with responsibility for releasing all of the
1344 str->text for each str in the vector. */
1345
1346 class auto_cpp_string_vec : public auto_vec <cpp_string>
1347 {
1348 public:
1349 auto_cpp_string_vec (int alloc)
1350 : auto_vec <cpp_string> (alloc) {}
1351
1352 ~auto_cpp_string_vec ()
1353 {
1354 /* Clean up the copies within this vec. */
1355 int i;
1356 cpp_string *str;
1357 FOR_EACH_VEC_ELT (*this, i, str)
1358 free (const_cast <unsigned char *> (str->text));
1359 }
1360 };
1361
1362 /* Attempt to populate RANGES with source location information on the
1363 individual characters within the string literal found at STRLOC.
1364 If CONCATS is non-NULL, then any string literals that the token at
1365 STRLOC was concatenated with are also added to RANGES.
1366
1367 Return NULL if successful, or an error message if any errors occurred (in
1368 which case RANGES may be only partially populated and should not
1369 be used).
1370
1371 This is implemented by re-parsing the relevant source line(s). */
1372
1373 static const char *
1374 get_substring_ranges_for_loc (cpp_reader *pfile,
1375 string_concat_db *concats,
1376 location_t strloc,
1377 enum cpp_ttype type,
1378 cpp_substring_ranges &ranges)
1379 {
1380 gcc_assert (pfile);
1381
1382 if (strloc == UNKNOWN_LOCATION)
1383 return "unknown location";
1384
1385 /* Reparsing the strings requires accurate location information.
1386 If -ftrack-macro-expansion has been overridden from its default
1387 of 2, then we might have a location of a macro expansion point,
1388 rather than the location of the literal itself.
1389 Avoid this by requiring that we have full macro expansion tracking
1390 for substring locations to be available. */
1391 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1392 return "track_macro_expansion != 2";
1393
1394 /* If #line or # 44 "file"-style directives are present, then there's
1395 no guarantee that the line numbers we have can be used to locate
1396 the strings. For example, we might have a .i file with # directives
1397 pointing back to lines within a .c file, but the .c file might
1398 have been edited since the .i file was created.
1399 In such a case, the safest course is to disable on-demand substring
1400 locations. */
1401 if (line_table->seen_line_directive)
1402 return "seen line directive";
1403
1404 /* If string concatenation has occurred at STRLOC, get the locations
1405 of all of the literal tokens making up the compound string.
1406 Otherwise, just use STRLOC. */
1407 int num_locs = 1;
1408 location_t *strlocs = &strloc;
1409 if (concats)
1410 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1411
1412 auto_cpp_string_vec strs (num_locs);
1413 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1414 for (int i = 0; i < num_locs; i++)
1415 {
1416 /* Get range of strloc. We will use it to locate the start and finish
1417 of the literal token within the line. */
1418 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1419
1420 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1421 {
1422 /* If the string token was within a macro expansion, then we can
1423 cope with it for the simple case where we have a single token.
1424 Otherwise, bail out. */
1425 if (src_range.m_start != src_range.m_finish)
1426 return "macro expansion";
1427 }
1428 else
1429 {
1430 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1431 /* If so, we can't reliably determine where the token started within
1432 its line. */
1433 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1434
1435 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1436 /* If so, we can't reliably determine where the token finished
1437 within its line. */
1438 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1439 }
1440
1441 expanded_location start
1442 = expand_location_to_spelling_point (src_range.m_start,
1443 LOCATION_ASPECT_START);
1444 expanded_location finish
1445 = expand_location_to_spelling_point (src_range.m_finish,
1446 LOCATION_ASPECT_FINISH);
1447 if (start.file != finish.file)
1448 return "range endpoints are in different files";
1449 if (start.line != finish.line)
1450 return "range endpoints are on different lines";
1451 if (start.column > finish.column)
1452 return "range endpoints are reversed";
1453
1454 char_span line = location_get_source_line (start.file, start.line);
1455 if (!line)
1456 return "unable to read source line";
1457
1458 /* Determine the location of the literal (including quotes
1459 and leading prefix chars, such as the 'u' in a u""
1460 token). */
1461 size_t literal_length = finish.column - start.column + 1;
1462
1463 /* Ensure that we don't crash if we got the wrong location. */
1464 if (line.length () < (start.column - 1 + literal_length))
1465 return "line is not wide enough";
1466
1467 char_span literal = line.subspan (start.column - 1, literal_length);
1468
1469 cpp_string from;
1470 from.len = literal_length;
1471 /* Make a copy of the literal, to avoid having to rely on
1472 the lifetime of the copy of the line within the cache.
1473 This will be released by the auto_cpp_string_vec dtor. */
1474 from.text = (unsigned char *)literal.xstrdup ();
1475 strs.safe_push (from);
1476
1477 /* For very long lines, a new linemap could have started
1478 halfway through the token.
1479 Ensure that the loc_reader uses the linemap of the
1480 *end* of the token for its start location. */
1481 const line_map_ordinary *start_ord_map;
1482 linemap_resolve_location (line_table, src_range.m_start,
1483 LRK_SPELLING_LOCATION, &start_ord_map);
1484 const line_map_ordinary *final_ord_map;
1485 linemap_resolve_location (line_table, src_range.m_finish,
1486 LRK_SPELLING_LOCATION, &final_ord_map);
1487 if (start_ord_map == NULL || final_ord_map == NULL)
1488 return "failed to get ordinary maps";
1489 /* Bulletproofing. We ought to only have different ordinary maps
1490 for start vs finish due to line-length jumps. */
1491 if (start_ord_map != final_ord_map
1492 && start_ord_map->to_file != final_ord_map->to_file)
1493 return "start and finish are spelled in different ordinary maps";
1494 /* The file from linemap_resolve_location ought to match that from
1495 expand_location_to_spelling_point. */
1496 if (start_ord_map->to_file != start.file)
1497 return "mismatching file after resolving linemap";
1498
1499 location_t start_loc
1500 = linemap_position_for_line_and_column (line_table, final_ord_map,
1501 start.line, start.column);
1502
1503 cpp_string_location_reader loc_reader (start_loc, line_table);
1504 loc_readers.safe_push (loc_reader);
1505 }
1506
1507 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1508 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1509 loc_readers.address (),
1510 num_locs, &ranges, type);
1511 if (err)
1512 return err;
1513
1514 /* Success: "ranges" should now contain information on the string. */
1515 return NULL;
1516 }
1517
1518 /* Attempt to populate *OUT_LOC with source location information on the
1519 given characters within the string literal found at STRLOC.
1520 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1521 character set.
1522
1523 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1524 and string literal "012345\n789"
1525 *OUT_LOC is written to with:
1526 "012345\n789"
1527 ~^~~~~
1528
1529 If CONCATS is non-NULL, then any string literals that the token at
1530 STRLOC was concatenated with are also considered.
1531
1532 This is implemented by re-parsing the relevant source line(s).
1533
1534 Return NULL if successful, or an error message if any errors occurred.
1535 Error messages are intended for GCC developers (to help debugging) rather
1536 than for end-users. */
1537
1538 const char *
1539 get_location_within_string (cpp_reader *pfile,
1540 string_concat_db *concats,
1541 location_t strloc,
1542 enum cpp_ttype type,
1543 int caret_idx, int start_idx, int end_idx,
1544 location_t *out_loc)
1545 {
1546 gcc_checking_assert (caret_idx >= 0);
1547 gcc_checking_assert (start_idx >= 0);
1548 gcc_checking_assert (end_idx >= 0);
1549 gcc_assert (out_loc);
1550
1551 cpp_substring_ranges ranges;
1552 const char *err
1553 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1554 if (err)
1555 return err;
1556
1557 if (caret_idx >= ranges.get_num_ranges ())
1558 return "caret_idx out of range";
1559 if (start_idx >= ranges.get_num_ranges ())
1560 return "start_idx out of range";
1561 if (end_idx >= ranges.get_num_ranges ())
1562 return "end_idx out of range";
1563
1564 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1565 ranges.get_range (start_idx).m_start,
1566 ranges.get_range (end_idx).m_finish);
1567 return NULL;
1568 }
1569
1570 #if CHECKING_P
1571
1572 namespace selftest {
1573
1574 /* Selftests of location handling. */
1575
1576 /* Attempt to populate *OUT_RANGE with source location information on the
1577 given character within the string literal found at STRLOC.
1578 CHAR_IDX refers to an offset within the execution character set.
1579 If CONCATS is non-NULL, then any string literals that the token at
1580 STRLOC was concatenated with are also considered.
1581
1582 This is implemented by re-parsing the relevant source line(s).
1583
1584 Return NULL if successful, or an error message if any errors occurred.
1585 Error messages are intended for GCC developers (to help debugging) rather
1586 than for end-users. */
1587
1588 static const char *
1589 get_source_range_for_char (cpp_reader *pfile,
1590 string_concat_db *concats,
1591 location_t strloc,
1592 enum cpp_ttype type,
1593 int char_idx,
1594 source_range *out_range)
1595 {
1596 gcc_checking_assert (char_idx >= 0);
1597 gcc_assert (out_range);
1598
1599 cpp_substring_ranges ranges;
1600 const char *err
1601 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1602 if (err)
1603 return err;
1604
1605 if (char_idx >= ranges.get_num_ranges ())
1606 return "char_idx out of range";
1607
1608 *out_range = ranges.get_range (char_idx);
1609 return NULL;
1610 }
1611
1612 /* As get_source_range_for_char, but write to *OUT the number
1613 of ranges that are available. */
1614
1615 static const char *
1616 get_num_source_ranges_for_substring (cpp_reader *pfile,
1617 string_concat_db *concats,
1618 location_t strloc,
1619 enum cpp_ttype type,
1620 int *out)
1621 {
1622 gcc_assert (out);
1623
1624 cpp_substring_ranges ranges;
1625 const char *err
1626 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1627
1628 if (err)
1629 return err;
1630
1631 *out = ranges.get_num_ranges ();
1632 return NULL;
1633 }
1634
1635 /* Selftests of location handling. */
1636
1637 /* Verify that compare() on linenum_type handles comparisons over the full
1638 range of the type. */
1639
1640 static void
1641 test_linenum_comparisons ()
1642 {
1643 linenum_type min_line (0);
1644 linenum_type max_line (0xffffffff);
1645 ASSERT_EQ (0, compare (min_line, min_line));
1646 ASSERT_EQ (0, compare (max_line, max_line));
1647
1648 ASSERT_GT (compare (max_line, min_line), 0);
1649 ASSERT_LT (compare (min_line, max_line), 0);
1650 }
1651
1652 /* Helper function for verifying location data: when location_t
1653 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1654 as having column 0. */
1655
1656 static bool
1657 should_have_column_data_p (location_t loc)
1658 {
1659 if (IS_ADHOC_LOC (loc))
1660 loc = get_location_from_adhoc_loc (line_table, loc);
1661 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1662 return false;
1663 return true;
1664 }
1665
1666 /* Selftest for should_have_column_data_p. */
1667
1668 static void
1669 test_should_have_column_data_p ()
1670 {
1671 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1672 ASSERT_TRUE
1673 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1674 ASSERT_FALSE
1675 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1676 }
1677
1678 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1679 on LOC. */
1680
1681 static void
1682 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1683 location_t loc)
1684 {
1685 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1686 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1687 /* If location_t values are sufficiently high, then column numbers
1688 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1689 When close to the threshold, column numbers *may* be present: if
1690 the final linemap before the threshold contains a line that straddles
1691 the threshold, locations in that line have column information. */
1692 if (should_have_column_data_p (loc))
1693 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1694 }
1695
1696 /* Various selftests involve constructing a line table and one or more
1697 line maps within it.
1698
1699 For maximum test coverage we want to run these tests with a variety
1700 of situations:
1701 - line_table->default_range_bits: some frontends use a non-zero value
1702 and others use zero
1703 - the fallback modes within line-map.c: there are various threshold
1704 values for location_t beyond line-map.c changes
1705 behavior (disabling of the range-packing optimization, disabling
1706 of column-tracking). We can exercise these by starting the line_table
1707 at interesting values at or near these thresholds.
1708
1709 The following struct describes a particular case within our test
1710 matrix. */
1711
1712 class line_table_case
1713 {
1714 public:
1715 line_table_case (int default_range_bits, int base_location)
1716 : m_default_range_bits (default_range_bits),
1717 m_base_location (base_location)
1718 {}
1719
1720 int m_default_range_bits;
1721 int m_base_location;
1722 };
1723
1724 /* Constructor. Store the old value of line_table, and create a new
1725 one, using sane defaults. */
1726
1727 line_table_test::line_table_test ()
1728 {
1729 gcc_assert (saved_line_table == NULL);
1730 saved_line_table = line_table;
1731 line_table = ggc_alloc<line_maps> ();
1732 linemap_init (line_table, BUILTINS_LOCATION);
1733 gcc_assert (saved_line_table->reallocator);
1734 line_table->reallocator = saved_line_table->reallocator;
1735 gcc_assert (saved_line_table->round_alloc_size);
1736 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1737 line_table->default_range_bits = 0;
1738 }
1739
1740 /* Constructor. Store the old value of line_table, and create a new
1741 one, using the sitation described in CASE_. */
1742
1743 line_table_test::line_table_test (const line_table_case &case_)
1744 {
1745 gcc_assert (saved_line_table == NULL);
1746 saved_line_table = line_table;
1747 line_table = ggc_alloc<line_maps> ();
1748 linemap_init (line_table, BUILTINS_LOCATION);
1749 gcc_assert (saved_line_table->reallocator);
1750 line_table->reallocator = saved_line_table->reallocator;
1751 gcc_assert (saved_line_table->round_alloc_size);
1752 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1753 line_table->default_range_bits = case_.m_default_range_bits;
1754 if (case_.m_base_location)
1755 {
1756 line_table->highest_location = case_.m_base_location;
1757 line_table->highest_line = case_.m_base_location;
1758 }
1759 }
1760
1761 /* Destructor. Restore the old value of line_table. */
1762
1763 line_table_test::~line_table_test ()
1764 {
1765 gcc_assert (saved_line_table != NULL);
1766 line_table = saved_line_table;
1767 saved_line_table = NULL;
1768 }
1769
1770 /* Verify basic operation of ordinary linemaps. */
1771
1772 static void
1773 test_accessing_ordinary_linemaps (const line_table_case &case_)
1774 {
1775 line_table_test ltt (case_);
1776
1777 /* Build a simple linemap describing some locations. */
1778 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1779
1780 linemap_line_start (line_table, 1, 100);
1781 location_t loc_a = linemap_position_for_column (line_table, 1);
1782 location_t loc_b = linemap_position_for_column (line_table, 23);
1783
1784 linemap_line_start (line_table, 2, 100);
1785 location_t loc_c = linemap_position_for_column (line_table, 1);
1786 location_t loc_d = linemap_position_for_column (line_table, 17);
1787
1788 /* Example of a very long line. */
1789 linemap_line_start (line_table, 3, 2000);
1790 location_t loc_e = linemap_position_for_column (line_table, 700);
1791
1792 /* Transitioning back to a short line. */
1793 linemap_line_start (line_table, 4, 0);
1794 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1795
1796 if (should_have_column_data_p (loc_back_to_short))
1797 {
1798 /* Verify that we switched to short lines in the linemap. */
1799 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1800 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1801 }
1802
1803 /* Example of a line that will eventually be seen to be longer
1804 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1805 below that. */
1806 linemap_line_start (line_table, 5, 2000);
1807
1808 location_t loc_start_of_very_long_line
1809 = linemap_position_for_column (line_table, 2000);
1810 location_t loc_too_wide
1811 = linemap_position_for_column (line_table, 4097);
1812 location_t loc_too_wide_2
1813 = linemap_position_for_column (line_table, 4098);
1814
1815 /* ...and back to a sane line length. */
1816 linemap_line_start (line_table, 6, 100);
1817 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1818
1819 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1820
1821 /* Multiple files. */
1822 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1823 linemap_line_start (line_table, 1, 200);
1824 location_t loc_f = linemap_position_for_column (line_table, 150);
1825 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1826
1827 /* Verify that we can recover the location info. */
1828 assert_loceq ("foo.c", 1, 1, loc_a);
1829 assert_loceq ("foo.c", 1, 23, loc_b);
1830 assert_loceq ("foo.c", 2, 1, loc_c);
1831 assert_loceq ("foo.c", 2, 17, loc_d);
1832 assert_loceq ("foo.c", 3, 700, loc_e);
1833 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1834
1835 /* In the very wide line, the initial location should be fully tracked. */
1836 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1837 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1838 be disabled. */
1839 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1840 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1841 /*...and column-tracking should be re-enabled for subsequent lines. */
1842 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1843
1844 assert_loceq ("bar.c", 1, 150, loc_f);
1845
1846 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1847 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1848
1849 /* Verify using make_location to build a range, and extracting data
1850 back from it. */
1851 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1852 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1853 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1854 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1855 ASSERT_EQ (loc_b, src_range.m_start);
1856 ASSERT_EQ (loc_d, src_range.m_finish);
1857 }
1858
1859 /* Verify various properties of UNKNOWN_LOCATION. */
1860
1861 static void
1862 test_unknown_location ()
1863 {
1864 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1865 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1866 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1867 }
1868
1869 /* Verify various properties of BUILTINS_LOCATION. */
1870
1871 static void
1872 test_builtins ()
1873 {
1874 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1875 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1876 }
1877
1878 /* Regression test for make_location.
1879 Ensure that we use pure locations for the start/finish of the range,
1880 rather than storing a packed or ad-hoc range as the start/finish. */
1881
1882 static void
1883 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1884 {
1885 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1886 with C++ frontend.
1887 ....................0000000001111111111222.
1888 ....................1234567890123456789012. */
1889 const char *content = " r += !aaa == bbb;\n";
1890 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1891 line_table_test ltt (case_);
1892 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1893
1894 const location_t c11 = linemap_position_for_column (line_table, 11);
1895 const location_t c12 = linemap_position_for_column (line_table, 12);
1896 const location_t c13 = linemap_position_for_column (line_table, 13);
1897 const location_t c14 = linemap_position_for_column (line_table, 14);
1898 const location_t c21 = linemap_position_for_column (line_table, 21);
1899
1900 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1901 return;
1902
1903 /* Use column 13 for the caret location, arbitrarily, to verify that we
1904 handle start != caret. */
1905 const location_t aaa = make_location (c13, c12, c14);
1906 ASSERT_EQ (c13, get_pure_location (aaa));
1907 ASSERT_EQ (c12, get_start (aaa));
1908 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1909 ASSERT_EQ (c14, get_finish (aaa));
1910 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1911
1912 /* Make a location using a location with a range as the start-point. */
1913 const location_t not_aaa = make_location (c11, aaa, c14);
1914 ASSERT_EQ (c11, get_pure_location (not_aaa));
1915 /* It should use the start location of the range, not store the range
1916 itself. */
1917 ASSERT_EQ (c12, get_start (not_aaa));
1918 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1919 ASSERT_EQ (c14, get_finish (not_aaa));
1920 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1921
1922 /* Similarly, make a location with a range as the end-point. */
1923 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1924 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1925 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1926 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1927 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1928 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1929 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1930 /* It should use the finish location of the range, not store the range
1931 itself. */
1932 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1933 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1934 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1935 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1936 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1937 }
1938
1939 /* Verify reading of input files (e.g. for caret-based diagnostics). */
1940
1941 static void
1942 test_reading_source_line ()
1943 {
1944 /* Create a tempfile and write some text to it. */
1945 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1946 "01234567890123456789\n"
1947 "This is the test text\n"
1948 "This is the 3rd line");
1949
1950 /* Read back a specific line from the tempfile. */
1951 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1952 ASSERT_TRUE (source_line);
1953 ASSERT_TRUE (source_line.get_buffer () != NULL);
1954 ASSERT_EQ (20, source_line.length ());
1955 ASSERT_TRUE (!strncmp ("This is the 3rd line",
1956 source_line.get_buffer (), source_line.length ()));
1957
1958 source_line = location_get_source_line (tmp.get_filename (), 2);
1959 ASSERT_TRUE (source_line);
1960 ASSERT_TRUE (source_line.get_buffer () != NULL);
1961 ASSERT_EQ (21, source_line.length ());
1962 ASSERT_TRUE (!strncmp ("This is the test text",
1963 source_line.get_buffer (), source_line.length ()));
1964
1965 source_line = location_get_source_line (tmp.get_filename (), 4);
1966 ASSERT_FALSE (source_line);
1967 ASSERT_TRUE (source_line.get_buffer () == NULL);
1968 }
1969
1970 /* Tests of lexing. */
1971
1972 /* Verify that token TOK from PARSER has cpp_token_as_text
1973 equal to EXPECTED_TEXT. */
1974
1975 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1976 SELFTEST_BEGIN_STMT \
1977 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1978 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1979 SELFTEST_END_STMT
1980
1981 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1982 and ranges from EXP_START_COL to EXP_FINISH_COL.
1983 Use LOC as the effective location of the selftest. */
1984
1985 static void
1986 assert_token_loc_eq (const location &loc,
1987 const cpp_token *tok,
1988 const char *exp_filename, int exp_linenum,
1989 int exp_start_col, int exp_finish_col)
1990 {
1991 location_t tok_loc = tok->src_loc;
1992 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1993 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1994
1995 /* If location_t values are sufficiently high, then column numbers
1996 will be unavailable. */
1997 if (!should_have_column_data_p (tok_loc))
1998 return;
1999
2000 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2001 source_range tok_range = get_range_from_loc (line_table, tok_loc);
2002 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2003 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2004 }
2005
2006 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2007 SELFTEST_LOCATION as the effective location of the selftest. */
2008
2009 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2010 EXP_START_COL, EXP_FINISH_COL) \
2011 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2012 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2013
2014 /* Test of lexing a file using libcpp, verifying tokens and their
2015 location information. */
2016
2017 static void
2018 test_lexer (const line_table_case &case_)
2019 {
2020 /* Create a tempfile and write some text to it. */
2021 const char *content =
2022 /*00000000011111111112222222222333333.3333444444444.455555555556
2023 12345678901234567890123456789012345.6789012345678.901234567890. */
2024 ("test_name /* c-style comment */\n"
2025 " \"test literal\"\n"
2026 " // test c++-style comment\n"
2027 " 42\n");
2028 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2029
2030 line_table_test ltt (case_);
2031
2032 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2033
2034 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2035 ASSERT_NE (fname, NULL);
2036
2037 /* Verify that we get the expected tokens back, with the correct
2038 location information. */
2039
2040 location_t loc;
2041 const cpp_token *tok;
2042 tok = cpp_get_token_with_location (parser, &loc);
2043 ASSERT_NE (tok, NULL);
2044 ASSERT_EQ (tok->type, CPP_NAME);
2045 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2046 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2047
2048 tok = cpp_get_token_with_location (parser, &loc);
2049 ASSERT_NE (tok, NULL);
2050 ASSERT_EQ (tok->type, CPP_STRING);
2051 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2052 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2053
2054 tok = cpp_get_token_with_location (parser, &loc);
2055 ASSERT_NE (tok, NULL);
2056 ASSERT_EQ (tok->type, CPP_NUMBER);
2057 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2058 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2059
2060 tok = cpp_get_token_with_location (parser, &loc);
2061 ASSERT_NE (tok, NULL);
2062 ASSERT_EQ (tok->type, CPP_EOF);
2063
2064 cpp_finish (parser, NULL);
2065 cpp_destroy (parser);
2066 }
2067
2068 /* Forward decls. */
2069
2070 class lexer_test;
2071 class lexer_test_options;
2072
2073 /* A class for specifying options of a lexer_test.
2074 The "apply" vfunc is called during the lexer_test constructor. */
2075
2076 class lexer_test_options
2077 {
2078 public:
2079 virtual void apply (lexer_test &) = 0;
2080 };
2081
2082 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2083 in its dtor.
2084
2085 This is needed by struct lexer_test to ensure that the cleanup of the
2086 cpp_reader happens *after* the cleanup of the temp_source_file. */
2087
2088 class cpp_reader_ptr
2089 {
2090 public:
2091 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2092
2093 ~cpp_reader_ptr ()
2094 {
2095 cpp_finish (m_ptr, NULL);
2096 cpp_destroy (m_ptr);
2097 }
2098
2099 operator cpp_reader * () const { return m_ptr; }
2100
2101 private:
2102 cpp_reader *m_ptr;
2103 };
2104
2105 /* A struct for writing lexer tests. */
2106
2107 class lexer_test
2108 {
2109 public:
2110 lexer_test (const line_table_case &case_, const char *content,
2111 lexer_test_options *options);
2112 ~lexer_test ();
2113
2114 const cpp_token *get_token ();
2115
2116 /* The ordering of these fields matters.
2117 The line_table_test must be first, since the cpp_reader_ptr
2118 uses it.
2119 The cpp_reader must be cleaned up *after* the temp_source_file
2120 since the filenames in input.c's input cache are owned by the
2121 cpp_reader; in particular, when ~temp_source_file evicts the
2122 filename the filenames must still be alive. */
2123 line_table_test m_ltt;
2124 cpp_reader_ptr m_parser;
2125 temp_source_file m_tempfile;
2126 string_concat_db m_concats;
2127 bool m_implicitly_expect_EOF;
2128 };
2129
2130 /* Use an EBCDIC encoding for the execution charset, specifically
2131 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2132
2133 This exercises iconv integration within libcpp.
2134 Not every build of iconv supports the given charset,
2135 so we need to flag this error and handle it gracefully. */
2136
2137 class ebcdic_execution_charset : public lexer_test_options
2138 {
2139 public:
2140 ebcdic_execution_charset () : m_num_iconv_errors (0)
2141 {
2142 gcc_assert (s_singleton == NULL);
2143 s_singleton = this;
2144 }
2145 ~ebcdic_execution_charset ()
2146 {
2147 gcc_assert (s_singleton == this);
2148 s_singleton = NULL;
2149 }
2150
2151 void apply (lexer_test &test) FINAL OVERRIDE
2152 {
2153 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2154 cpp_opts->narrow_charset = "IBM1047";
2155
2156 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2157 callbacks->diagnostic = on_diagnostic;
2158 }
2159
2160 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2161 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2162 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2163 rich_location *richloc ATTRIBUTE_UNUSED,
2164 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2165 ATTRIBUTE_FPTR_PRINTF(5,0)
2166 {
2167 gcc_assert (s_singleton);
2168 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2169 const char *msg = "conversion from %s to %s not supported by iconv";
2170 #ifdef ENABLE_NLS
2171 msg = dgettext ("cpplib", msg);
2172 #endif
2173 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2174 when the local iconv build doesn't support the conversion. */
2175 if (strcmp (msgid, msg) == 0)
2176 {
2177 s_singleton->m_num_iconv_errors++;
2178 return true;
2179 }
2180
2181 /* Otherwise, we have an unexpected error. */
2182 abort ();
2183 }
2184
2185 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2186
2187 private:
2188 static ebcdic_execution_charset *s_singleton;
2189 int m_num_iconv_errors;
2190 };
2191
2192 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2193
2194 /* A lexer_test_options subclass that records a list of diagnostic
2195 messages emitted by the lexer. */
2196
2197 class lexer_diagnostic_sink : public lexer_test_options
2198 {
2199 public:
2200 lexer_diagnostic_sink ()
2201 {
2202 gcc_assert (s_singleton == NULL);
2203 s_singleton = this;
2204 }
2205 ~lexer_diagnostic_sink ()
2206 {
2207 gcc_assert (s_singleton == this);
2208 s_singleton = NULL;
2209
2210 int i;
2211 char *str;
2212 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2213 free (str);
2214 }
2215
2216 void apply (lexer_test &test) FINAL OVERRIDE
2217 {
2218 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2219 callbacks->diagnostic = on_diagnostic;
2220 }
2221
2222 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2223 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2224 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2225 rich_location *richloc ATTRIBUTE_UNUSED,
2226 const char *msgid, va_list *ap)
2227 ATTRIBUTE_FPTR_PRINTF(5,0)
2228 {
2229 char *msg = xvasprintf (msgid, *ap);
2230 s_singleton->m_diagnostics.safe_push (msg);
2231 return true;
2232 }
2233
2234 auto_vec<char *> m_diagnostics;
2235
2236 private:
2237 static lexer_diagnostic_sink *s_singleton;
2238 };
2239
2240 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2241
2242 /* Constructor. Override line_table with a new instance based on CASE_,
2243 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2244 start parsing the tempfile. */
2245
2246 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2247 lexer_test_options *options)
2248 : m_ltt (case_),
2249 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2250 /* Create a tempfile and write the text to it. */
2251 m_tempfile (SELFTEST_LOCATION, ".c", content),
2252 m_concats (),
2253 m_implicitly_expect_EOF (true)
2254 {
2255 if (options)
2256 options->apply (*this);
2257
2258 cpp_init_iconv (m_parser);
2259
2260 /* Parse the file. */
2261 const char *fname = cpp_read_main_file (m_parser,
2262 m_tempfile.get_filename ());
2263 ASSERT_NE (fname, NULL);
2264 }
2265
2266 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2267
2268 lexer_test::~lexer_test ()
2269 {
2270 location_t loc;
2271 const cpp_token *tok;
2272
2273 if (m_implicitly_expect_EOF)
2274 {
2275 tok = cpp_get_token_with_location (m_parser, &loc);
2276 ASSERT_NE (tok, NULL);
2277 ASSERT_EQ (tok->type, CPP_EOF);
2278 }
2279 }
2280
2281 /* Get the next token from m_parser. */
2282
2283 const cpp_token *
2284 lexer_test::get_token ()
2285 {
2286 location_t loc;
2287 const cpp_token *tok;
2288
2289 tok = cpp_get_token_with_location (m_parser, &loc);
2290 ASSERT_NE (tok, NULL);
2291 return tok;
2292 }
2293
2294 /* Verify that locations within string literals are correctly handled. */
2295
2296 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2297 using the string concatenation database for TEST.
2298
2299 Assert that the character at index IDX is on EXPECTED_LINE,
2300 and that it begins at column EXPECTED_START_COL and ends at
2301 EXPECTED_FINISH_COL (unless the locations are beyond
2302 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2303 columns). */
2304
2305 static void
2306 assert_char_at_range (const location &loc,
2307 lexer_test& test,
2308 location_t strloc, enum cpp_ttype type, int idx,
2309 int expected_line, int expected_start_col,
2310 int expected_finish_col)
2311 {
2312 cpp_reader *pfile = test.m_parser;
2313 string_concat_db *concats = &test.m_concats;
2314
2315 source_range actual_range = source_range();
2316 const char *err
2317 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2318 &actual_range);
2319 if (should_have_column_data_p (strloc))
2320 ASSERT_EQ_AT (loc, NULL, err);
2321 else
2322 {
2323 ASSERT_STREQ_AT (loc,
2324 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2325 err);
2326 return;
2327 }
2328
2329 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2330 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2331 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2332 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2333
2334 if (should_have_column_data_p (actual_range.m_start))
2335 {
2336 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2337 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2338 }
2339 if (should_have_column_data_p (actual_range.m_finish))
2340 {
2341 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2342 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2343 }
2344 }
2345
2346 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2347 the effective location of any errors. */
2348
2349 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2350 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2351 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2352 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2353 (EXPECTED_FINISH_COL))
2354
2355 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2356 using the string concatenation database for TEST.
2357
2358 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2359
2360 static void
2361 assert_num_substring_ranges (const location &loc,
2362 lexer_test& test,
2363 location_t strloc,
2364 enum cpp_ttype type,
2365 int expected_num_ranges)
2366 {
2367 cpp_reader *pfile = test.m_parser;
2368 string_concat_db *concats = &test.m_concats;
2369
2370 int actual_num_ranges = -1;
2371 const char *err
2372 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2373 &actual_num_ranges);
2374 if (should_have_column_data_p (strloc))
2375 ASSERT_EQ_AT (loc, NULL, err);
2376 else
2377 {
2378 ASSERT_STREQ_AT (loc,
2379 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2380 err);
2381 return;
2382 }
2383 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2384 }
2385
2386 /* Macro for calling assert_num_substring_ranges, supplying
2387 SELFTEST_LOCATION for the effective location of any errors. */
2388
2389 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2390 EXPECTED_NUM_RANGES) \
2391 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2392 (TYPE), (EXPECTED_NUM_RANGES))
2393
2394
2395 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2396 returns an error (using the string concatenation database for TEST). */
2397
2398 static void
2399 assert_has_no_substring_ranges (const location &loc,
2400 lexer_test& test,
2401 location_t strloc,
2402 enum cpp_ttype type,
2403 const char *expected_err)
2404 {
2405 cpp_reader *pfile = test.m_parser;
2406 string_concat_db *concats = &test.m_concats;
2407 cpp_substring_ranges ranges;
2408 const char *actual_err
2409 = get_substring_ranges_for_loc (pfile, concats, strloc,
2410 type, ranges);
2411 if (should_have_column_data_p (strloc))
2412 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2413 else
2414 ASSERT_STREQ_AT (loc,
2415 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2416 actual_err);
2417 }
2418
2419 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2420 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2421 (STRLOC), (TYPE), (ERR))
2422
2423 /* Lex a simple string literal. Verify the substring location data, before
2424 and after running cpp_interpret_string on it. */
2425
2426 static void
2427 test_lexer_string_locations_simple (const line_table_case &case_)
2428 {
2429 /* Digits 0-9 (with 0 at column 10), the simple way.
2430 ....................000000000.11111111112.2222222223333333333
2431 ....................123456789.01234567890.1234567890123456789
2432 We add a trailing comment to ensure that we correctly locate
2433 the end of the string literal token. */
2434 const char *content = " \"0123456789\" /* not a string */\n";
2435 lexer_test test (case_, content, NULL);
2436
2437 /* Verify that we get the expected token back, with the correct
2438 location information. */
2439 const cpp_token *tok = test.get_token ();
2440 ASSERT_EQ (tok->type, CPP_STRING);
2441 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2442 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2443
2444 /* At this point in lexing, the quote characters are treated as part of
2445 the string (they are stripped off by cpp_interpret_string). */
2446
2447 ASSERT_EQ (tok->val.str.len, 12);
2448
2449 /* Verify that cpp_interpret_string works. */
2450 cpp_string dst_string;
2451 const enum cpp_ttype type = CPP_STRING;
2452 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2453 &dst_string, type);
2454 ASSERT_TRUE (result);
2455 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2456 free (const_cast <unsigned char *> (dst_string.text));
2457
2458 /* Verify ranges of individual characters. This no longer includes the
2459 opening quote, but does include the closing quote. */
2460 for (int i = 0; i <= 10; i++)
2461 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2462 10 + i, 10 + i);
2463
2464 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2465 }
2466
2467 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2468 encoding. */
2469
2470 static void
2471 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2472 {
2473 /* EBCDIC support requires iconv. */
2474 if (!HAVE_ICONV)
2475 return;
2476
2477 /* Digits 0-9 (with 0 at column 10), the simple way.
2478 ....................000000000.11111111112.2222222223333333333
2479 ....................123456789.01234567890.1234567890123456789
2480 We add a trailing comment to ensure that we correctly locate
2481 the end of the string literal token. */
2482 const char *content = " \"0123456789\" /* not a string */\n";
2483 ebcdic_execution_charset use_ebcdic;
2484 lexer_test test (case_, content, &use_ebcdic);
2485
2486 /* Verify that we get the expected token back, with the correct
2487 location information. */
2488 const cpp_token *tok = test.get_token ();
2489 ASSERT_EQ (tok->type, CPP_STRING);
2490 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2491 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2492
2493 /* At this point in lexing, the quote characters are treated as part of
2494 the string (they are stripped off by cpp_interpret_string). */
2495
2496 ASSERT_EQ (tok->val.str.len, 12);
2497
2498 /* The remainder of the test requires an iconv implementation that
2499 can convert from UTF-8 to the EBCDIC encoding requested above. */
2500 if (use_ebcdic.iconv_errors_occurred_p ())
2501 return;
2502
2503 /* Verify that cpp_interpret_string works. */
2504 cpp_string dst_string;
2505 const enum cpp_ttype type = CPP_STRING;
2506 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2507 &dst_string, type);
2508 ASSERT_TRUE (result);
2509 /* We should now have EBCDIC-encoded text, specifically
2510 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2511 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2512 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2513 (const char *)dst_string.text);
2514 free (const_cast <unsigned char *> (dst_string.text));
2515
2516 /* Verify that we don't attempt to record substring location information
2517 for such cases. */
2518 ASSERT_HAS_NO_SUBSTRING_RANGES
2519 (test, tok->src_loc, type,
2520 "execution character set != source character set");
2521 }
2522
2523 /* Lex a string literal containing a hex-escaped character.
2524 Verify the substring location data, before and after running
2525 cpp_interpret_string on it. */
2526
2527 static void
2528 test_lexer_string_locations_hex (const line_table_case &case_)
2529 {
2530 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2531 and with a space in place of digit 6, to terminate the escaped
2532 hex code.
2533 ....................000000000.111111.11112222.
2534 ....................123456789.012345.67890123. */
2535 const char *content = " \"01234\\x35 789\"\n";
2536 lexer_test test (case_, content, NULL);
2537
2538 /* Verify that we get the expected token back, with the correct
2539 location information. */
2540 const cpp_token *tok = test.get_token ();
2541 ASSERT_EQ (tok->type, CPP_STRING);
2542 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2543 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2544
2545 /* At this point in lexing, the quote characters are treated as part of
2546 the string (they are stripped off by cpp_interpret_string). */
2547 ASSERT_EQ (tok->val.str.len, 15);
2548
2549 /* Verify that cpp_interpret_string works. */
2550 cpp_string dst_string;
2551 const enum cpp_ttype type = CPP_STRING;
2552 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2553 &dst_string, type);
2554 ASSERT_TRUE (result);
2555 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2556 free (const_cast <unsigned char *> (dst_string.text));
2557
2558 /* Verify ranges of individual characters. This no longer includes the
2559 opening quote, but does include the closing quote. */
2560 for (int i = 0; i <= 4; i++)
2561 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2562 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2563 for (int i = 6; i <= 10; i++)
2564 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2565
2566 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2567 }
2568
2569 /* Lex a string literal containing an octal-escaped character.
2570 Verify the substring location data after running cpp_interpret_string
2571 on it. */
2572
2573 static void
2574 test_lexer_string_locations_oct (const line_table_case &case_)
2575 {
2576 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2577 and with a space in place of digit 6, to terminate the escaped
2578 octal code.
2579 ....................000000000.111111.11112222.2222223333333333444
2580 ....................123456789.012345.67890123.4567890123456789012 */
2581 const char *content = " \"01234\\065 789\" /* not a string */\n";
2582 lexer_test test (case_, content, NULL);
2583
2584 /* Verify that we get the expected token back, with the correct
2585 location information. */
2586 const cpp_token *tok = test.get_token ();
2587 ASSERT_EQ (tok->type, CPP_STRING);
2588 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2589
2590 /* Verify that cpp_interpret_string works. */
2591 cpp_string dst_string;
2592 const enum cpp_ttype type = CPP_STRING;
2593 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2594 &dst_string, type);
2595 ASSERT_TRUE (result);
2596 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2597 free (const_cast <unsigned char *> (dst_string.text));
2598
2599 /* Verify ranges of individual characters. This no longer includes the
2600 opening quote, but does include the closing quote. */
2601 for (int i = 0; i < 5; i++)
2602 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2603 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2604 for (int i = 6; i <= 10; i++)
2605 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2606
2607 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2608 }
2609
2610 /* Test of string literal containing letter escapes. */
2611
2612 static void
2613 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2614 {
2615 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2616 .....................000000000.1.11111.1.1.11222.22222223333333
2617 .....................123456789.0.12345.6.7.89012.34567890123456. */
2618 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2619 lexer_test test (case_, content, NULL);
2620
2621 /* Verify that we get the expected tokens back. */
2622 const cpp_token *tok = test.get_token ();
2623 ASSERT_EQ (tok->type, CPP_STRING);
2624 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2625
2626 /* Verify ranges of individual characters. */
2627 /* "\t". */
2628 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2629 0, 1, 10, 11);
2630 /* "foo". */
2631 for (int i = 1; i <= 3; i++)
2632 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2633 i, 1, 11 + i, 11 + i);
2634 /* "\\" and "\n". */
2635 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2636 4, 1, 15, 16);
2637 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2638 5, 1, 17, 18);
2639
2640 /* "bar" and closing quote for nul-terminator. */
2641 for (int i = 6; i <= 9; i++)
2642 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2643 i, 1, 13 + i, 13 + i);
2644
2645 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2646 }
2647
2648 /* Another test of a string literal containing a letter escape.
2649 Based on string seen in
2650 printf ("%-%\n");
2651 in gcc.dg/format/c90-printf-1.c. */
2652
2653 static void
2654 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2655 {
2656 /* .....................000000000.1111.11.1111.22222222223.
2657 .....................123456789.0123.45.6789.01234567890. */
2658 const char *content = (" \"%-%\\n\" /* non-str */\n");
2659 lexer_test test (case_, content, NULL);
2660
2661 /* Verify that we get the expected tokens back. */
2662 const cpp_token *tok = test.get_token ();
2663 ASSERT_EQ (tok->type, CPP_STRING);
2664 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2665
2666 /* Verify ranges of individual characters. */
2667 /* "%-%". */
2668 for (int i = 0; i < 3; i++)
2669 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2670 i, 1, 10 + i, 10 + i);
2671 /* "\n". */
2672 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2673 3, 1, 13, 14);
2674
2675 /* Closing quote for nul-terminator. */
2676 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2677 4, 1, 15, 15);
2678
2679 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2680 }
2681
2682 /* Lex a string literal containing UCN 4 characters.
2683 Verify the substring location data after running cpp_interpret_string
2684 on it. */
2685
2686 static void
2687 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2688 {
2689 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2690 as UCN 4.
2691 ....................000000000.111111.111122.222222223.33333333344444
2692 ....................123456789.012345.678901.234567890.12345678901234 */
2693 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2694 lexer_test test (case_, content, NULL);
2695
2696 /* Verify that we get the expected token back, with the correct
2697 location information. */
2698 const cpp_token *tok = test.get_token ();
2699 ASSERT_EQ (tok->type, CPP_STRING);
2700 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2701
2702 /* Verify that cpp_interpret_string works.
2703 The string should be encoded in the execution character
2704 set. Assuming that is UTF-8, we should have the following:
2705 ----------- ---- ----- ------- ----------------
2706 Byte offset Byte Octal Unicode Source Column(s)
2707 ----------- ---- ----- ------- ----------------
2708 0 0x30 '0' 10
2709 1 0x31 '1' 11
2710 2 0x32 '2' 12
2711 3 0x33 '3' 13
2712 4 0x34 '4' 14
2713 5 0xE2 \342 U+2174 15-20
2714 6 0x85 \205 (cont) 15-20
2715 7 0xB4 \264 (cont) 15-20
2716 8 0xE2 \342 U+2175 21-26
2717 9 0x85 \205 (cont) 21-26
2718 10 0xB5 \265 (cont) 21-26
2719 11 0x37 '7' 27
2720 12 0x38 '8' 28
2721 13 0x39 '9' 29
2722 14 0x00 30 (closing quote)
2723 ----------- ---- ----- ------- ---------------. */
2724
2725 cpp_string dst_string;
2726 const enum cpp_ttype type = CPP_STRING;
2727 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2728 &dst_string, type);
2729 ASSERT_TRUE (result);
2730 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2731 (const char *)dst_string.text);
2732 free (const_cast <unsigned char *> (dst_string.text));
2733
2734 /* Verify ranges of individual characters. This no longer includes the
2735 opening quote, but does include the closing quote.
2736 '01234'. */
2737 for (int i = 0; i <= 4; i++)
2738 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2739 /* U+2174. */
2740 for (int i = 5; i <= 7; i++)
2741 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2742 /* U+2175. */
2743 for (int i = 8; i <= 10; i++)
2744 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2745 /* '789' and nul terminator */
2746 for (int i = 11; i <= 14; i++)
2747 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2748
2749 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2750 }
2751
2752 /* Lex a string literal containing UCN 8 characters.
2753 Verify the substring location data after running cpp_interpret_string
2754 on it. */
2755
2756 static void
2757 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2758 {
2759 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2760 ....................000000000.111111.1111222222.2222333333333.344444
2761 ....................123456789.012345.6789012345.6789012345678.901234 */
2762 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2763 lexer_test test (case_, content, NULL);
2764
2765 /* Verify that we get the expected token back, with the correct
2766 location information. */
2767 const cpp_token *tok = test.get_token ();
2768 ASSERT_EQ (tok->type, CPP_STRING);
2769 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2770 "\"01234\\U00002174\\U00002175789\"");
2771
2772 /* Verify that cpp_interpret_string works.
2773 The UTF-8 encoding of the string is identical to that from
2774 the ucn4 testcase above; the only difference is the column
2775 locations. */
2776 cpp_string dst_string;
2777 const enum cpp_ttype type = CPP_STRING;
2778 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2779 &dst_string, type);
2780 ASSERT_TRUE (result);
2781 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2782 (const char *)dst_string.text);
2783 free (const_cast <unsigned char *> (dst_string.text));
2784
2785 /* Verify ranges of individual characters. This no longer includes the
2786 opening quote, but does include the closing quote.
2787 '01234'. */
2788 for (int i = 0; i <= 4; i++)
2789 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2790 /* U+2174. */
2791 for (int i = 5; i <= 7; i++)
2792 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2793 /* U+2175. */
2794 for (int i = 8; i <= 10; i++)
2795 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2796 /* '789' at columns 35-37 */
2797 for (int i = 11; i <= 13; i++)
2798 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2799 /* Closing quote/nul-terminator at column 38. */
2800 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2801
2802 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2803 }
2804
2805 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2806
2807 static uint32_t
2808 uint32_from_big_endian (const uint32_t *ptr_be_value)
2809 {
2810 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2811 return (((uint32_t) buf[0] << 24)
2812 | ((uint32_t) buf[1] << 16)
2813 | ((uint32_t) buf[2] << 8)
2814 | (uint32_t) buf[3]);
2815 }
2816
2817 /* Lex a wide string literal and verify that attempts to read substring
2818 location data from it fail gracefully. */
2819
2820 static void
2821 test_lexer_string_locations_wide_string (const line_table_case &case_)
2822 {
2823 /* Digits 0-9.
2824 ....................000000000.11111111112.22222222233333
2825 ....................123456789.01234567890.12345678901234 */
2826 const char *content = " L\"0123456789\" /* non-str */\n";
2827 lexer_test test (case_, content, NULL);
2828
2829 /* Verify that we get the expected token back, with the correct
2830 location information. */
2831 const cpp_token *tok = test.get_token ();
2832 ASSERT_EQ (tok->type, CPP_WSTRING);
2833 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2834
2835 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2836 cpp_string dst_string;
2837 const enum cpp_ttype type = CPP_WSTRING;
2838 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2839 &dst_string, type);
2840 ASSERT_TRUE (result);
2841 /* The cpp_reader defaults to big-endian with
2842 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2843 now be encoded as UTF-32BE. */
2844 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2845 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2846 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2847 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2848 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2849 free (const_cast <unsigned char *> (dst_string.text));
2850
2851 /* We don't yet support generating substring location information
2852 for L"" strings. */
2853 ASSERT_HAS_NO_SUBSTRING_RANGES
2854 (test, tok->src_loc, type,
2855 "execution character set != source character set");
2856 }
2857
2858 /* Fetch a big-endian 16-bit value and convert to host endianness. */
2859
2860 static uint16_t
2861 uint16_from_big_endian (const uint16_t *ptr_be_value)
2862 {
2863 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2864 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2865 }
2866
2867 /* Lex a u"" string literal and verify that attempts to read substring
2868 location data from it fail gracefully. */
2869
2870 static void
2871 test_lexer_string_locations_string16 (const line_table_case &case_)
2872 {
2873 /* Digits 0-9.
2874 ....................000000000.11111111112.22222222233333
2875 ....................123456789.01234567890.12345678901234 */
2876 const char *content = " u\"0123456789\" /* non-str */\n";
2877 lexer_test test (case_, content, NULL);
2878
2879 /* Verify that we get the expected token back, with the correct
2880 location information. */
2881 const cpp_token *tok = test.get_token ();
2882 ASSERT_EQ (tok->type, CPP_STRING16);
2883 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2884
2885 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2886 cpp_string dst_string;
2887 const enum cpp_ttype type = CPP_STRING16;
2888 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2889 &dst_string, type);
2890 ASSERT_TRUE (result);
2891
2892 /* The cpp_reader defaults to big-endian, so dst_string should
2893 now be encoded as UTF-16BE. */
2894 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2895 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2896 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2897 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2898 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2899 free (const_cast <unsigned char *> (dst_string.text));
2900
2901 /* We don't yet support generating substring location information
2902 for L"" strings. */
2903 ASSERT_HAS_NO_SUBSTRING_RANGES
2904 (test, tok->src_loc, type,
2905 "execution character set != source character set");
2906 }
2907
2908 /* Lex a U"" string literal and verify that attempts to read substring
2909 location data from it fail gracefully. */
2910
2911 static void
2912 test_lexer_string_locations_string32 (const line_table_case &case_)
2913 {
2914 /* Digits 0-9.
2915 ....................000000000.11111111112.22222222233333
2916 ....................123456789.01234567890.12345678901234 */
2917 const char *content = " U\"0123456789\" /* non-str */\n";
2918 lexer_test test (case_, content, NULL);
2919
2920 /* Verify that we get the expected token back, with the correct
2921 location information. */
2922 const cpp_token *tok = test.get_token ();
2923 ASSERT_EQ (tok->type, CPP_STRING32);
2924 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2925
2926 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2927 cpp_string dst_string;
2928 const enum cpp_ttype type = CPP_STRING32;
2929 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2930 &dst_string, type);
2931 ASSERT_TRUE (result);
2932
2933 /* The cpp_reader defaults to big-endian, so dst_string should
2934 now be encoded as UTF-32BE. */
2935 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2936 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2937 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2938 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2939 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2940 free (const_cast <unsigned char *> (dst_string.text));
2941
2942 /* We don't yet support generating substring location information
2943 for L"" strings. */
2944 ASSERT_HAS_NO_SUBSTRING_RANGES
2945 (test, tok->src_loc, type,
2946 "execution character set != source character set");
2947 }
2948
2949 /* Lex a u8-string literal.
2950 Verify the substring location data after running cpp_interpret_string
2951 on it. */
2952
2953 static void
2954 test_lexer_string_locations_u8 (const line_table_case &case_)
2955 {
2956 /* Digits 0-9.
2957 ....................000000000.11111111112.22222222233333
2958 ....................123456789.01234567890.12345678901234 */
2959 const char *content = " u8\"0123456789\" /* non-str */\n";
2960 lexer_test test (case_, content, NULL);
2961
2962 /* Verify that we get the expected token back, with the correct
2963 location information. */
2964 const cpp_token *tok = test.get_token ();
2965 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2966 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2967
2968 /* Verify that cpp_interpret_string works. */
2969 cpp_string dst_string;
2970 const enum cpp_ttype type = CPP_STRING;
2971 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2972 &dst_string, type);
2973 ASSERT_TRUE (result);
2974 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2975 free (const_cast <unsigned char *> (dst_string.text));
2976
2977 /* Verify ranges of individual characters. This no longer includes the
2978 opening quote, but does include the closing quote. */
2979 for (int i = 0; i <= 10; i++)
2980 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2981 }
2982
2983 /* Lex a string literal containing UTF-8 source characters.
2984 Verify the substring location data after running cpp_interpret_string
2985 on it. */
2986
2987 static void
2988 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2989 {
2990 /* This string literal is written out to the source file as UTF-8,
2991 and is of the form "before mojibake after", where "mojibake"
2992 is written as the following four unicode code points:
2993 U+6587 CJK UNIFIED IDEOGRAPH-6587
2994 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2995 U+5316 CJK UNIFIED IDEOGRAPH-5316
2996 U+3051 HIRAGANA LETTER KE.
2997 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2998 "before" and "after" are 1 byte per unicode character.
2999
3000 The numbering shown are "columns", which are *byte* numbers within
3001 the line, rather than unicode character numbers.
3002
3003 .................... 000000000.1111111.
3004 .................... 123456789.0123456. */
3005 const char *content = (" \"before "
3006 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3007 UTF-8: 0xE6 0x96 0x87
3008 C octal escaped UTF-8: \346\226\207
3009 "column" numbers: 17-19. */
3010 "\346\226\207"
3011
3012 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3013 UTF-8: 0xE5 0xAD 0x97
3014 C octal escaped UTF-8: \345\255\227
3015 "column" numbers: 20-22. */
3016 "\345\255\227"
3017
3018 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3019 UTF-8: 0xE5 0x8C 0x96
3020 C octal escaped UTF-8: \345\214\226
3021 "column" numbers: 23-25. */
3022 "\345\214\226"
3023
3024 /* U+3051 HIRAGANA LETTER KE
3025 UTF-8: 0xE3 0x81 0x91
3026 C octal escaped UTF-8: \343\201\221
3027 "column" numbers: 26-28. */
3028 "\343\201\221"
3029
3030 /* column numbers 29 onwards
3031 2333333.33334444444444
3032 9012345.67890123456789. */
3033 " after\" /* non-str */\n");
3034 lexer_test test (case_, content, NULL);
3035
3036 /* Verify that we get the expected token back, with the correct
3037 location information. */
3038 const cpp_token *tok = test.get_token ();
3039 ASSERT_EQ (tok->type, CPP_STRING);
3040 ASSERT_TOKEN_AS_TEXT_EQ
3041 (test.m_parser, tok,
3042 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3043
3044 /* Verify that cpp_interpret_string works. */
3045 cpp_string dst_string;
3046 const enum cpp_ttype type = CPP_STRING;
3047 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3048 &dst_string, type);
3049 ASSERT_TRUE (result);
3050 ASSERT_STREQ
3051 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3052 (const char *)dst_string.text);
3053 free (const_cast <unsigned char *> (dst_string.text));
3054
3055 /* Verify ranges of individual characters. This no longer includes the
3056 opening quote, but does include the closing quote.
3057 Assuming that both source and execution encodings are UTF-8, we have
3058 a run of 25 octets in each, plus the NUL terminator. */
3059 for (int i = 0; i < 25; i++)
3060 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3061 /* NUL-terminator should use the closing quote at column 35. */
3062 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3063
3064 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3065 }
3066
3067 /* Test of string literal concatenation. */
3068
3069 static void
3070 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3071 {
3072 /* Digits 0-9.
3073 .....................000000000.111111.11112222222222
3074 .....................123456789.012345.67890123456789. */
3075 const char *content = (" \"01234\" /* non-str */\n"
3076 " \"56789\" /* non-str */\n");
3077 lexer_test test (case_, content, NULL);
3078
3079 location_t input_locs[2];
3080
3081 /* Verify that we get the expected tokens back. */
3082 auto_vec <cpp_string> input_strings;
3083 const cpp_token *tok_a = test.get_token ();
3084 ASSERT_EQ (tok_a->type, CPP_STRING);
3085 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3086 input_strings.safe_push (tok_a->val.str);
3087 input_locs[0] = tok_a->src_loc;
3088
3089 const cpp_token *tok_b = test.get_token ();
3090 ASSERT_EQ (tok_b->type, CPP_STRING);
3091 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3092 input_strings.safe_push (tok_b->val.str);
3093 input_locs[1] = tok_b->src_loc;
3094
3095 /* Verify that cpp_interpret_string works. */
3096 cpp_string dst_string;
3097 const enum cpp_ttype type = CPP_STRING;
3098 bool result = cpp_interpret_string (test.m_parser,
3099 input_strings.address (), 2,
3100 &dst_string, type);
3101 ASSERT_TRUE (result);
3102 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3103 free (const_cast <unsigned char *> (dst_string.text));
3104
3105 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3106 test.m_concats.record_string_concatenation (2, input_locs);
3107
3108 location_t initial_loc = input_locs[0];
3109
3110 /* "01234" on line 1. */
3111 for (int i = 0; i <= 4; i++)
3112 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3113 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3114 for (int i = 5; i <= 10; i++)
3115 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3116
3117 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3118 }
3119
3120 /* Another test of string literal concatenation. */
3121
3122 static void
3123 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3124 {
3125 /* Digits 0-9.
3126 .....................000000000.111.11111112222222
3127 .....................123456789.012.34567890123456. */
3128 const char *content = (" \"01\" /* non-str */\n"
3129 " \"23\" /* non-str */\n"
3130 " \"45\" /* non-str */\n"
3131 " \"67\" /* non-str */\n"
3132 " \"89\" /* non-str */\n");
3133 lexer_test test (case_, content, NULL);
3134
3135 auto_vec <cpp_string> input_strings;
3136 location_t input_locs[5];
3137
3138 /* Verify that we get the expected tokens back. */
3139 for (int i = 0; i < 5; i++)
3140 {
3141 const cpp_token *tok = test.get_token ();
3142 ASSERT_EQ (tok->type, CPP_STRING);
3143 input_strings.safe_push (tok->val.str);
3144 input_locs[i] = tok->src_loc;
3145 }
3146
3147 /* Verify that cpp_interpret_string works. */
3148 cpp_string dst_string;
3149 const enum cpp_ttype type = CPP_STRING;
3150 bool result = cpp_interpret_string (test.m_parser,
3151 input_strings.address (), 5,
3152 &dst_string, type);
3153 ASSERT_TRUE (result);
3154 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3155 free (const_cast <unsigned char *> (dst_string.text));
3156
3157 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3158 test.m_concats.record_string_concatenation (5, input_locs);
3159
3160 location_t initial_loc = input_locs[0];
3161
3162 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3163 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3164 and expect get_source_range_for_substring to fail.
3165 However, for a string concatenation test, we can have a case
3166 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3167 but subsequent strings can be after it.
3168 Attempting to detect this within assert_char_at_range
3169 would overcomplicate the logic for the common test cases, so
3170 we detect it here. */
3171 if (should_have_column_data_p (input_locs[0])
3172 && !should_have_column_data_p (input_locs[4]))
3173 {
3174 /* Verify that get_source_range_for_substring gracefully rejects
3175 this case. */
3176 source_range actual_range;
3177 const char *err
3178 = get_source_range_for_char (test.m_parser, &test.m_concats,
3179 initial_loc, type, 0, &actual_range);
3180 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3181 return;
3182 }
3183
3184 for (int i = 0; i < 5; i++)
3185 for (int j = 0; j < 2; j++)
3186 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3187 i + 1, 10 + j, 10 + j);
3188
3189 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3190 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3191
3192 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3193 }
3194
3195 /* Another test of string literal concatenation, this time combined with
3196 various kinds of escaped characters. */
3197
3198 static void
3199 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3200 {
3201 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3202 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3203 const char *content
3204 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3205 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3206 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3207 lexer_test test (case_, content, NULL);
3208
3209 auto_vec <cpp_string> input_strings;
3210 location_t input_locs[4];
3211
3212 /* Verify that we get the expected tokens back. */
3213 for (int i = 0; i < 4; i++)
3214 {
3215 const cpp_token *tok = test.get_token ();
3216 ASSERT_EQ (tok->type, CPP_STRING);
3217 input_strings.safe_push (tok->val.str);
3218 input_locs[i] = tok->src_loc;
3219 }
3220
3221 /* Verify that cpp_interpret_string works. */
3222 cpp_string dst_string;
3223 const enum cpp_ttype type = CPP_STRING;
3224 bool result = cpp_interpret_string (test.m_parser,
3225 input_strings.address (), 4,
3226 &dst_string, type);
3227 ASSERT_TRUE (result);
3228 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3229 free (const_cast <unsigned char *> (dst_string.text));
3230
3231 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3232 test.m_concats.record_string_concatenation (4, input_locs);
3233
3234 location_t initial_loc = input_locs[0];
3235
3236 for (int i = 0; i <= 4; i++)
3237 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3238 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3239 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3240 for (int i = 7; i <= 9; i++)
3241 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3242
3243 /* NUL-terminator should use the location of the final closing quote. */
3244 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3245
3246 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3247 }
3248
3249 /* Test of string literal in a macro. */
3250
3251 static void
3252 test_lexer_string_locations_macro (const line_table_case &case_)
3253 {
3254 /* Digits 0-9.
3255 .....................0000000001111111111.22222222223.
3256 .....................1234567890123456789.01234567890. */
3257 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3258 " MACRO");
3259 lexer_test test (case_, content, NULL);
3260
3261 /* Verify that we get the expected tokens back. */
3262 const cpp_token *tok = test.get_token ();
3263 ASSERT_EQ (tok->type, CPP_PADDING);
3264
3265 tok = test.get_token ();
3266 ASSERT_EQ (tok->type, CPP_STRING);
3267 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3268
3269 /* Verify ranges of individual characters. We ought to
3270 see columns within the macro definition. */
3271 for (int i = 0; i <= 10; i++)
3272 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3273 i, 1, 20 + i, 20 + i);
3274
3275 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3276
3277 tok = test.get_token ();
3278 ASSERT_EQ (tok->type, CPP_PADDING);
3279 }
3280
3281 /* Test of stringification of a macro argument. */
3282
3283 static void
3284 test_lexer_string_locations_stringified_macro_argument
3285 (const line_table_case &case_)
3286 {
3287 /* .....................000000000111111111122222222223.
3288 .....................123456789012345678901234567890. */
3289 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3290 "MACRO(foo)\n");
3291 lexer_test test (case_, content, NULL);
3292
3293 /* Verify that we get the expected token back. */
3294 const cpp_token *tok = test.get_token ();
3295 ASSERT_EQ (tok->type, CPP_PADDING);
3296
3297 tok = test.get_token ();
3298 ASSERT_EQ (tok->type, CPP_STRING);
3299 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3300
3301 /* We don't support getting the location of a stringified macro
3302 argument. Verify that it fails gracefully. */
3303 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3304 "cpp_interpret_string_1 failed");
3305
3306 tok = test.get_token ();
3307 ASSERT_EQ (tok->type, CPP_PADDING);
3308
3309 tok = test.get_token ();
3310 ASSERT_EQ (tok->type, CPP_PADDING);
3311 }
3312
3313 /* Ensure that we are fail gracefully if something attempts to pass
3314 in a location that isn't a string literal token. Seen on this code:
3315
3316 const char a[] = " %d ";
3317 __builtin_printf (a, 0.5);
3318 ^
3319
3320 when c-format.c erroneously used the indicated one-character
3321 location as the format string location, leading to a read past the
3322 end of a string buffer in cpp_interpret_string_1. */
3323
3324 static void
3325 test_lexer_string_locations_non_string (const line_table_case &case_)
3326 {
3327 /* .....................000000000111111111122222222223.
3328 .....................123456789012345678901234567890. */
3329 const char *content = (" a\n");
3330 lexer_test test (case_, content, NULL);
3331
3332 /* Verify that we get the expected token back. */
3333 const cpp_token *tok = test.get_token ();
3334 ASSERT_EQ (tok->type, CPP_NAME);
3335 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3336
3337 /* At this point, libcpp is attempting to interpret the name as a
3338 string literal, despite it not starting with a quote. We don't detect
3339 that, but we should at least fail gracefully. */
3340 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3341 "cpp_interpret_string_1 failed");
3342 }
3343
3344 /* Ensure that we can read substring information for a token which
3345 starts in one linemap and ends in another . Adapted from
3346 gcc.dg/cpp/pr69985.c. */
3347
3348 static void
3349 test_lexer_string_locations_long_line (const line_table_case &case_)
3350 {
3351 /* .....................000000.000111111111
3352 .....................123456.789012346789. */
3353 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3354 " \"0123456789012345678901234567890123456789"
3355 "0123456789012345678901234567890123456789"
3356 "0123456789012345678901234567890123456789"
3357 "0123456789\"\n");
3358
3359 lexer_test test (case_, content, NULL);
3360
3361 /* Verify that we get the expected token back. */
3362 const cpp_token *tok = test.get_token ();
3363 ASSERT_EQ (tok->type, CPP_STRING);
3364
3365 if (!should_have_column_data_p (line_table->highest_location))
3366 return;
3367
3368 /* Verify ranges of individual characters. */
3369 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3370 for (int i = 0; i < 131; i++)
3371 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3372 i, 2, 7 + i, 7 + i);
3373 }
3374
3375 /* Test of locations within a raw string that doesn't contain a newline. */
3376
3377 static void
3378 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3379 {
3380 /* .....................00.0000000111111111122.
3381 .....................12.3456789012345678901. */
3382 const char *content = ("R\"foo(0123456789)foo\"\n");
3383 lexer_test test (case_, content, NULL);
3384
3385 /* Verify that we get the expected token back. */
3386 const cpp_token *tok = test.get_token ();
3387 ASSERT_EQ (tok->type, CPP_STRING);
3388
3389 /* Verify that cpp_interpret_string works. */
3390 cpp_string dst_string;
3391 const enum cpp_ttype type = CPP_STRING;
3392 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3393 &dst_string, type);
3394 ASSERT_TRUE (result);
3395 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3396 free (const_cast <unsigned char *> (dst_string.text));
3397
3398 if (!should_have_column_data_p (line_table->highest_location))
3399 return;
3400
3401 /* 0-9, plus the nil terminator. */
3402 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3403 for (int i = 0; i < 11; i++)
3404 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3405 i, 1, 7 + i, 7 + i);
3406 }
3407
3408 /* Test of locations within a raw string that contains a newline. */
3409
3410 static void
3411 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3412 {
3413 /* .....................00.0000.
3414 .....................12.3456. */
3415 const char *content = ("R\"foo(\n"
3416 /* .....................00000.
3417 .....................12345. */
3418 "hello\n"
3419 "world\n"
3420 /* .....................00000.
3421 .....................12345. */
3422 ")foo\"\n");
3423 lexer_test test (case_, content, NULL);
3424
3425 /* Verify that we get the expected token back. */
3426 const cpp_token *tok = test.get_token ();
3427 ASSERT_EQ (tok->type, CPP_STRING);
3428
3429 /* Verify that cpp_interpret_string works. */
3430 cpp_string dst_string;
3431 const enum cpp_ttype type = CPP_STRING;
3432 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3433 &dst_string, type);
3434 ASSERT_TRUE (result);
3435 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3436 free (const_cast <unsigned char *> (dst_string.text));
3437
3438 if (!should_have_column_data_p (line_table->highest_location))
3439 return;
3440
3441 /* Currently we don't support locations within raw strings that
3442 contain newlines. */
3443 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3444 "range endpoints are on different lines");
3445 }
3446
3447 /* Test of parsing an unterminated raw string. */
3448
3449 static void
3450 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3451 {
3452 const char *content = "R\"ouch()ouCh\" /* etc */";
3453
3454 lexer_diagnostic_sink diagnostics;
3455 lexer_test test (case_, content, &diagnostics);
3456 test.m_implicitly_expect_EOF = false;
3457
3458 /* Attempt to parse the raw string. */
3459 const cpp_token *tok = test.get_token ();
3460 ASSERT_EQ (tok->type, CPP_EOF);
3461
3462 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3463 /* We expect the message "unterminated raw string"
3464 in the "cpplib" translation domain.
3465 It's not clear that dgettext is available on all supported hosts,
3466 so this assertion is commented-out for now.
3467 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3468 diagnostics.m_diagnostics[0]);
3469 */
3470 }
3471
3472 /* Test of lexing char constants. */
3473
3474 static void
3475 test_lexer_char_constants (const line_table_case &case_)
3476 {
3477 /* Various char constants.
3478 .....................0000000001111111111.22222222223.
3479 .....................1234567890123456789.01234567890. */
3480 const char *content = (" 'a'\n"
3481 " u'a'\n"
3482 " U'a'\n"
3483 " L'a'\n"
3484 " 'abc'\n");
3485 lexer_test test (case_, content, NULL);
3486
3487 /* Verify that we get the expected tokens back. */
3488 /* 'a'. */
3489 const cpp_token *tok = test.get_token ();
3490 ASSERT_EQ (tok->type, CPP_CHAR);
3491 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3492
3493 unsigned int chars_seen;
3494 int unsignedp;
3495 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3496 &chars_seen, &unsignedp);
3497 ASSERT_EQ (cc, 'a');
3498 ASSERT_EQ (chars_seen, 1);
3499
3500 /* u'a'. */
3501 tok = test.get_token ();
3502 ASSERT_EQ (tok->type, CPP_CHAR16);
3503 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3504
3505 /* U'a'. */
3506 tok = test.get_token ();
3507 ASSERT_EQ (tok->type, CPP_CHAR32);
3508 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3509
3510 /* L'a'. */
3511 tok = test.get_token ();
3512 ASSERT_EQ (tok->type, CPP_WCHAR);
3513 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3514
3515 /* 'abc' (c-char-sequence). */
3516 tok = test.get_token ();
3517 ASSERT_EQ (tok->type, CPP_CHAR);
3518 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3519 }
3520 /* A table of interesting location_t values, giving one axis of our test
3521 matrix. */
3522
3523 static const location_t boundary_locations[] = {
3524 /* Zero means "don't override the default values for a new line_table". */
3525 0,
3526
3527 /* An arbitrary non-zero value that isn't close to one of
3528 the boundary values below. */
3529 0x10000,
3530
3531 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3532 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3533 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3534 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3535 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3536 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3537
3538 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3539 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3540 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3541 LINE_MAP_MAX_LOCATION_WITH_COLS,
3542 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3543 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3544 };
3545
3546 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3547
3548 void
3549 for_each_line_table_case (void (*testcase) (const line_table_case &))
3550 {
3551 /* As noted above in the description of struct line_table_case,
3552 we want to explore a test matrix of interesting line_table
3553 situations, running various selftests for each case within the
3554 matrix. */
3555
3556 /* Run all tests with:
3557 (a) line_table->default_range_bits == 0, and
3558 (b) line_table->default_range_bits == 5. */
3559 int num_cases_tested = 0;
3560 for (int default_range_bits = 0; default_range_bits <= 5;
3561 default_range_bits += 5)
3562 {
3563 /* ...and use each of the "interesting" location values as
3564 the starting location within line_table. */
3565 const int num_boundary_locations
3566 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3567 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3568 {
3569 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3570
3571 testcase (c);
3572
3573 num_cases_tested++;
3574 }
3575 }
3576
3577 /* Verify that we fully covered the test matrix. */
3578 ASSERT_EQ (num_cases_tested, 2 * 12);
3579 }
3580
3581 /* Verify that when presented with a consecutive pair of locations with
3582 a very large line offset, we don't attempt to consolidate them into
3583 a single ordinary linemap where the line offsets within the line map
3584 would lead to overflow (PR lto/88147). */
3585
3586 static void
3587 test_line_offset_overflow ()
3588 {
3589 line_table_test ltt (line_table_case (5, 0));
3590
3591 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3592 linemap_line_start (line_table, 1, 100);
3593 location_t loc_a = linemap_line_start (line_table, 2578, 255);
3594 assert_loceq ("foo.c", 2578, 0, loc_a);
3595
3596 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3597 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3598 ASSERT_EQ (ordmap_a->m_range_bits, 5);
3599
3600 location_t loc_b = linemap_line_start (line_table, 404198, 512);
3601 assert_loceq ("foo.c", 404198, 0, loc_b);
3602
3603 /* We should have started a new linemap, rather than attempting to store
3604 a very large line offset. */
3605 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3606 ASSERT_NE (ordmap_a, ordmap_b);
3607 }
3608
3609 void test_cpp_utf8 ()
3610 {
3611 /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
3612 {
3613 int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8);
3614 ASSERT_EQ (8, w_bad);
3615 int w_ctrl = cpp_display_width ("\r\t\n\v\0\1", 6);
3616 ASSERT_EQ (6, w_ctrl);
3617 }
3618
3619 /* Verify that wcwidth of valid UTF-8 is as expected. */
3620 {
3621 const int w_pi = cpp_display_width ("\xcf\x80", 2);
3622 ASSERT_EQ (1, w_pi);
3623 const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4);
3624 ASSERT_EQ (2, w_emoji);
3625 const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2);
3626 ASSERT_EQ (1, w_umlaut_precomposed);
3627 const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3);
3628 ASSERT_EQ (1, w_umlaut_combining);
3629 const int w_han = cpp_display_width ("\xe4\xb8\xba", 3);
3630 ASSERT_EQ (2, w_han);
3631 const int w_ascii = cpp_display_width ("GCC", 3);
3632 ASSERT_EQ (3, w_ascii);
3633 const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3634 "\x9f! \xe4\xb8\xba y\xcc\x88", 24);
3635 ASSERT_EQ (18, w_mixed);
3636 }
3637
3638 /* Verify that cpp_byte_column_to_display_column can go past the end,
3639 and similar edge cases. */
3640 {
3641 const char *str
3642 /* Display columns.
3643 111111112345 */
3644 = "\xcf\x80 abc";
3645 /* 111122223456
3646 Byte columns. */
3647
3648 ASSERT_EQ (5, cpp_display_width (str, 6));
3649 ASSERT_EQ (105, cpp_byte_column_to_display_column (str, 6, 106));
3650 ASSERT_EQ (10000, cpp_byte_column_to_display_column (NULL, 0, 10000));
3651 ASSERT_EQ (0, cpp_byte_column_to_display_column (NULL, 10000, 0));
3652 }
3653
3654 /* Verify that cpp_display_column_to_byte_column can go past the end,
3655 and similar edge cases, and check invertibility. */
3656 {
3657 const char *str
3658 /* Display columns.
3659 000000000000000000000000000000000000011
3660 111111112222222234444444455555555678901 */
3661 = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
3662 /* 000000000000000000000000000000000111111
3663 111122223333444456666777788889999012345
3664 Byte columns. */
3665 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2));
3666 ASSERT_EQ (15, cpp_display_column_to_byte_column (str, 15, 11));
3667 ASSERT_EQ (115, cpp_display_column_to_byte_column (str, 15, 111));
3668 ASSERT_EQ (10000, cpp_display_column_to_byte_column (NULL, 0, 10000));
3669 ASSERT_EQ (0, cpp_display_column_to_byte_column (NULL, 10000, 0));
3670
3671 /* Verify that we do not interrupt a UTF-8 sequence. */
3672 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1));
3673
3674 for (int byte_col = 1; byte_col <= 15; ++byte_col)
3675 {
3676 const int disp_col = cpp_byte_column_to_display_column (str, 15,
3677 byte_col);
3678 const int byte_col2 = cpp_display_column_to_byte_column (str, 15,
3679 disp_col);
3680
3681 /* If we ask for the display column in the middle of a UTF-8
3682 sequence, it will return the length of the partial sequence,
3683 matching the behavior of GCC before display column support.
3684 Otherwise check the round trip was successful. */
3685 if (byte_col < 4)
3686 ASSERT_EQ (byte_col, disp_col);
3687 else if (byte_col >= 6 && byte_col < 9)
3688 ASSERT_EQ (3 + (byte_col - 5), disp_col);
3689 else
3690 ASSERT_EQ (byte_col2, byte_col);
3691 }
3692 }
3693
3694 }
3695
3696 /* Run all of the selftests within this file. */
3697
3698 void
3699 input_c_tests ()
3700 {
3701 test_linenum_comparisons ();
3702 test_should_have_column_data_p ();
3703 test_unknown_location ();
3704 test_builtins ();
3705 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3706
3707 for_each_line_table_case (test_accessing_ordinary_linemaps);
3708 for_each_line_table_case (test_lexer);
3709 for_each_line_table_case (test_lexer_string_locations_simple);
3710 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3711 for_each_line_table_case (test_lexer_string_locations_hex);
3712 for_each_line_table_case (test_lexer_string_locations_oct);
3713 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3714 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3715 for_each_line_table_case (test_lexer_string_locations_ucn4);
3716 for_each_line_table_case (test_lexer_string_locations_ucn8);
3717 for_each_line_table_case (test_lexer_string_locations_wide_string);
3718 for_each_line_table_case (test_lexer_string_locations_string16);
3719 for_each_line_table_case (test_lexer_string_locations_string32);
3720 for_each_line_table_case (test_lexer_string_locations_u8);
3721 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3722 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3723 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3724 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3725 for_each_line_table_case (test_lexer_string_locations_macro);
3726 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3727 for_each_line_table_case (test_lexer_string_locations_non_string);
3728 for_each_line_table_case (test_lexer_string_locations_long_line);
3729 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3730 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3731 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3732 for_each_line_table_case (test_lexer_char_constants);
3733
3734 test_reading_source_line ();
3735
3736 test_line_offset_overflow ();
3737
3738 test_cpp_utf8 ();
3739 }
3740
3741 } // namespace selftest
3742
3743 #endif /* CHECKING_P */