]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/input.c
input.c: move file caching globals to a new file_cache class
[thirdparty/gcc.git] / gcc / input.c
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2021 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic.h"
25 #include "diagnostic-core.h"
26 #include "selftest.h"
27 #include "cpplib.h"
28
29 #ifndef HAVE_ICONV
30 #define HAVE_ICONV 0
31 #endif
32
33 /* This is a cache used by get_next_line to store the content of a
34 file to be searched for file lines. */
35 class file_cache_slot
36 {
37 public:
38 file_cache_slot ();
39 ~file_cache_slot ();
40
41 bool read_line_num (size_t line_num,
42 char ** line, ssize_t *line_len);
43
44 /* Accessors. */
45 const char *get_file_path () const { return m_file_path; }
46 unsigned get_use_count () const { return m_use_count; }
47 bool missing_trailing_newline_p () const
48 {
49 return m_missing_trailing_newline;
50 }
51
52 void inc_use_count () { m_use_count++; }
53
54 void create (const char *file_path, FILE *fp, unsigned highest_use_count);
55 void evict ();
56
57 private:
58 /* These are information used to store a line boundary. */
59 class line_info
60 {
61 public:
62 /* The line number. It starts from 1. */
63 size_t line_num;
64
65 /* The position (byte count) of the beginning of the line,
66 relative to the file data pointer. This starts at zero. */
67 size_t start_pos;
68
69 /* The position (byte count) of the last byte of the line. This
70 normally points to the '\n' character, or to one byte after the
71 last byte of the file, if the file doesn't contain a '\n'
72 character. */
73 size_t end_pos;
74
75 line_info (size_t l, size_t s, size_t e)
76 : line_num (l), start_pos (s), end_pos (e)
77 {}
78
79 line_info ()
80 :line_num (0), start_pos (0), end_pos (0)
81 {}
82 };
83
84 bool needs_read_p () const;
85 bool needs_grow_p () const;
86 void maybe_grow ();
87 bool read_data ();
88 bool maybe_read_data ();
89 bool get_next_line (char **line, ssize_t *line_len);
90 bool read_next_line (char ** line, ssize_t *line_len);
91 bool goto_next_line ();
92
93 static const size_t buffer_size = 4 * 1024;
94 static const size_t line_record_size = 100;
95
96 /* The number of time this file has been accessed. This is used
97 to designate which file cache to evict from the cache
98 array. */
99 unsigned m_use_count;
100
101 /* The file_path is the key for identifying a particular file in
102 the cache.
103 For libcpp-using code, the underlying buffer for this field is
104 owned by the corresponding _cpp_file within the cpp_reader. */
105 const char *m_file_path;
106
107 FILE *m_fp;
108
109 /* This points to the content of the file that we've read so
110 far. */
111 char *m_data;
112
113 /* The size of the DATA array above.*/
114 size_t m_size;
115
116 /* The number of bytes read from the underlying file so far. This
117 must be less (or equal) than SIZE above. */
118 size_t m_nb_read;
119
120 /* The index of the beginning of the current line. */
121 size_t m_line_start_idx;
122
123 /* The number of the previous line read. This starts at 1. Zero
124 means we've read no line so far. */
125 size_t m_line_num;
126
127 /* This is the total number of lines of the current file. At the
128 moment, we try to get this information from the line map
129 subsystem. Note that this is just a hint. When using the C++
130 front-end, this hint is correct because the input file is then
131 completely tokenized before parsing starts; so the line map knows
132 the number of lines before compilation really starts. For e.g,
133 the C front-end, it can happen that we start emitting diagnostics
134 before the line map has seen the end of the file. */
135 size_t m_total_lines;
136
137 /* Could this file be missing a trailing newline on its final line?
138 Initially true (to cope with empty files), set to true/false
139 as each line is read. */
140 bool m_missing_trailing_newline;
141
142 /* This is a record of the beginning and end of the lines we've seen
143 while reading the file. This is useful to avoid walking the data
144 from the beginning when we are asked to read a line that is
145 before LINE_START_IDX above. Note that the maximum size of this
146 record is line_record_size, so that the memory consumption
147 doesn't explode. We thus scale total_lines down to
148 line_record_size. */
149 vec<line_info, va_heap> m_line_record;
150 };
151
152 /* Current position in real source file. */
153
154 location_t input_location = UNKNOWN_LOCATION;
155
156 class line_maps *line_table;
157
158 /* A stashed copy of "line_table" for use by selftest::line_table_test.
159 This needs to be a global so that it can be a GC root, and thus
160 prevent the stashed copy from being garbage-collected if the GC runs
161 during a line_table_test. */
162
163 class line_maps *saved_line_table;
164
165 /* Expand the source location LOC into a human readable location. If
166 LOC resolves to a builtin location, the file name of the readable
167 location is set to the string "<built-in>". If EXPANSION_POINT_P is
168 TRUE and LOC is virtual, then it is resolved to the expansion
169 point of the involved macro. Otherwise, it is resolved to the
170 spelling location of the token.
171
172 When resolving to the spelling location of the token, if the
173 resulting location is for a built-in location (that is, it has no
174 associated line/column) in the context of a macro expansion, the
175 returned location is the first one (while unwinding the macro
176 location towards its expansion point) that is in real source
177 code.
178
179 ASPECT controls which part of the location to use. */
180
181 static expanded_location
182 expand_location_1 (location_t loc,
183 bool expansion_point_p,
184 enum location_aspect aspect)
185 {
186 expanded_location xloc;
187 const line_map_ordinary *map;
188 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
189 tree block = NULL;
190
191 if (IS_ADHOC_LOC (loc))
192 {
193 block = LOCATION_BLOCK (loc);
194 loc = LOCATION_LOCUS (loc);
195 }
196
197 memset (&xloc, 0, sizeof (xloc));
198
199 if (loc >= RESERVED_LOCATION_COUNT)
200 {
201 if (!expansion_point_p)
202 {
203 /* We want to resolve LOC to its spelling location.
204
205 But if that spelling location is a reserved location that
206 appears in the context of a macro expansion (like for a
207 location for a built-in token), let's consider the first
208 location (toward the expansion point) that is not reserved;
209 that is, the first location that is in real source code. */
210 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
211 loc, NULL);
212 lrk = LRK_SPELLING_LOCATION;
213 }
214 loc = linemap_resolve_location (line_table, loc, lrk, &map);
215
216 /* loc is now either in an ordinary map, or is a reserved location.
217 If it is a compound location, the caret is in a spelling location,
218 but the start/finish might still be a virtual location.
219 Depending of what the caller asked for, we may need to recurse
220 one level in order to resolve any virtual locations in the
221 end-points. */
222 switch (aspect)
223 {
224 default:
225 gcc_unreachable ();
226 /* Fall through. */
227 case LOCATION_ASPECT_CARET:
228 break;
229 case LOCATION_ASPECT_START:
230 {
231 location_t start = get_start (loc);
232 if (start != loc)
233 return expand_location_1 (start, expansion_point_p, aspect);
234 }
235 break;
236 case LOCATION_ASPECT_FINISH:
237 {
238 location_t finish = get_finish (loc);
239 if (finish != loc)
240 return expand_location_1 (finish, expansion_point_p, aspect);
241 }
242 break;
243 }
244 xloc = linemap_expand_location (line_table, map, loc);
245 }
246
247 xloc.data = block;
248 if (loc <= BUILTINS_LOCATION)
249 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
250
251 return xloc;
252 }
253
254 /* Initialize the set of cache used for files accessed by caret
255 diagnostic. */
256
257 static void
258 diagnostic_file_cache_init (void)
259 {
260 gcc_assert (global_dc);
261 if (global_dc->m_file_cache == NULL)
262 global_dc->m_file_cache = new file_cache ();
263 }
264
265 /* Free the resources used by the set of cache used for files accessed
266 by caret diagnostic. */
267
268 void
269 diagnostic_file_cache_fini (void)
270 {
271 if (global_dc->m_file_cache)
272 {
273 delete global_dc->m_file_cache;
274 global_dc->m_file_cache = NULL;
275 }
276 }
277
278 /* Return the total lines number that have been read so far by the
279 line map (in the preprocessor) so far. For languages like C++ that
280 entirely preprocess the input file before starting to parse, this
281 equals the actual number of lines of the file. */
282
283 static size_t
284 total_lines_num (const char *file_path)
285 {
286 size_t r = 0;
287 location_t l = 0;
288 if (linemap_get_file_highest_location (line_table, file_path, &l))
289 {
290 gcc_assert (l >= RESERVED_LOCATION_COUNT);
291 expanded_location xloc = expand_location (l);
292 r = xloc.line;
293 }
294 return r;
295 }
296
297 /* Lookup the cache used for the content of a given file accessed by
298 caret diagnostic. Return the found cached file, or NULL if no
299 cached file was found. */
300
301 file_cache_slot *
302 file_cache::lookup_file (const char *file_path)
303 {
304 gcc_assert (file_path);
305
306 /* This will contain the found cached file. */
307 file_cache_slot *r = NULL;
308 for (unsigned i = 0; i < num_file_slots; ++i)
309 {
310 file_cache_slot *c = &m_file_slots[i];
311 if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
312 {
313 c->inc_use_count ();
314 r = c;
315 }
316 }
317
318 if (r)
319 r->inc_use_count ();
320
321 return r;
322 }
323
324 /* Purge any mention of FILENAME from the cache of files used for
325 printing source code. For use in selftests when working
326 with tempfiles. */
327
328 void
329 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
330 {
331 gcc_assert (file_path);
332
333 if (!global_dc->m_file_cache)
334 return;
335
336 global_dc->m_file_cache->forcibly_evict_file (file_path);
337 }
338
339 void
340 file_cache::forcibly_evict_file (const char *file_path)
341 {
342 gcc_assert (file_path);
343
344 file_cache_slot *r = lookup_file (file_path);
345 if (!r)
346 /* Not found. */
347 return;
348
349 r->evict ();
350 }
351
352 void
353 file_cache_slot::evict ()
354 {
355 m_file_path = NULL;
356 if (m_fp)
357 fclose (m_fp);
358 m_fp = NULL;
359 m_nb_read = 0;
360 m_line_start_idx = 0;
361 m_line_num = 0;
362 m_line_record.truncate (0);
363 m_use_count = 0;
364 m_total_lines = 0;
365 m_missing_trailing_newline = true;
366 }
367
368 /* Return the file cache that has been less used, recently, or the
369 first empty one. If HIGHEST_USE_COUNT is non-null,
370 *HIGHEST_USE_COUNT is set to the highest use count of the entries
371 in the cache table. */
372
373 file_cache_slot*
374 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
375 {
376 diagnostic_file_cache_init ();
377
378 file_cache_slot *to_evict = &m_file_slots[0];
379 unsigned huc = to_evict->get_use_count ();
380 for (unsigned i = 1; i < num_file_slots; ++i)
381 {
382 file_cache_slot *c = &m_file_slots[i];
383 bool c_is_empty = (c->get_file_path () == NULL);
384
385 if (c->get_use_count () < to_evict->get_use_count ()
386 || (to_evict->get_file_path () && c_is_empty))
387 /* We evict C because it's either an entry with a lower use
388 count or one that is empty. */
389 to_evict = c;
390
391 if (huc < c->get_use_count ())
392 huc = c->get_use_count ();
393
394 if (c_is_empty)
395 /* We've reached the end of the cache; subsequent elements are
396 all empty. */
397 break;
398 }
399
400 if (highest_use_count)
401 *highest_use_count = huc;
402
403 return to_evict;
404 }
405
406 /* Create the cache used for the content of a given file to be
407 accessed by caret diagnostic. This cache is added to an array of
408 cache and can be retrieved by lookup_file_in_cache_tab. This
409 function returns the created cache. Note that only the last
410 num_file_slots files are cached. */
411
412 file_cache_slot*
413 file_cache::add_file (const char *file_path)
414 {
415
416 FILE *fp = fopen (file_path, "r");
417 if (fp == NULL)
418 return NULL;
419
420 unsigned highest_use_count = 0;
421 file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
422 r->create (file_path, fp, highest_use_count);
423 return r;
424 }
425
426 /* Populate this slot for use on FILE_PATH and FP, dropping any
427 existing cached content within it. */
428
429 void
430 file_cache_slot::create (const char *file_path, FILE *fp,
431 unsigned highest_use_count)
432 {
433 m_file_path = file_path;
434 if (m_fp)
435 fclose (m_fp);
436 m_fp = fp;
437 m_nb_read = 0;
438 m_line_start_idx = 0;
439 m_line_num = 0;
440 m_line_record.truncate (0);
441 /* Ensure that this cache entry doesn't get evicted next time
442 add_file_to_cache_tab is called. */
443 m_use_count = ++highest_use_count;
444 m_total_lines = total_lines_num (file_path);
445 m_missing_trailing_newline = true;
446 }
447
448 /* file_cache's ctor. */
449
450 file_cache::file_cache ()
451 : m_file_slots (new file_cache_slot[num_file_slots])
452 {
453 }
454
455 /* file_cache's dtor. */
456
457 file_cache::~file_cache ()
458 {
459 delete[] m_file_slots;
460 }
461
462 /* Lookup the cache used for the content of a given file accessed by
463 caret diagnostic. If no cached file was found, create a new cache
464 for this file, add it to the array of cached file and return
465 it. */
466
467 file_cache_slot*
468 file_cache::lookup_or_add_file (const char *file_path)
469 {
470 file_cache_slot *r = lookup_file (file_path);
471 if (r == NULL)
472 r = add_file (file_path);
473 return r;
474 }
475
476 /* Default constructor for a cache of file used by caret
477 diagnostic. */
478
479 file_cache_slot::file_cache_slot ()
480 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
481 m_size (0), m_nb_read (0), m_line_start_idx (0), m_line_num (0),
482 m_total_lines (0), m_missing_trailing_newline (true)
483 {
484 m_line_record.create (0);
485 }
486
487 /* Destructor for a cache of file used by caret diagnostic. */
488
489 file_cache_slot::~file_cache_slot ()
490 {
491 if (m_fp)
492 {
493 fclose (m_fp);
494 m_fp = NULL;
495 }
496 if (m_data)
497 {
498 XDELETEVEC (m_data);
499 m_data = 0;
500 }
501 m_line_record.release ();
502 }
503
504 /* Returns TRUE iff the cache would need to be filled with data coming
505 from the file. That is, either the cache is empty or full or the
506 current line is empty. Note that if the cache is full, it would
507 need to be extended and filled again. */
508
509 bool
510 file_cache_slot::needs_read_p () const
511 {
512 return (m_nb_read == 0
513 || m_nb_read == m_size
514 || (m_line_start_idx >= m_nb_read - 1));
515 }
516
517 /* Return TRUE iff the cache is full and thus needs to be
518 extended. */
519
520 bool
521 file_cache_slot::needs_grow_p () const
522 {
523 return m_nb_read == m_size;
524 }
525
526 /* Grow the cache if it needs to be extended. */
527
528 void
529 file_cache_slot::maybe_grow ()
530 {
531 if (!needs_grow_p ())
532 return;
533
534 size_t size = m_size == 0 ? buffer_size : m_size * 2;
535 m_data = XRESIZEVEC (char, m_data, size);
536 m_size = size;
537 }
538
539 /* Read more data into the cache. Extends the cache if need be.
540 Returns TRUE iff new data could be read. */
541
542 bool
543 file_cache_slot::read_data ()
544 {
545 if (feof (m_fp) || ferror (m_fp))
546 return false;
547
548 maybe_grow ();
549
550 char * from = m_data + m_nb_read;
551 size_t to_read = m_size - m_nb_read;
552 size_t nb_read = fread (from, 1, to_read, m_fp);
553
554 if (ferror (m_fp))
555 return false;
556
557 m_nb_read += nb_read;
558 return !!nb_read;
559 }
560
561 /* Read new data iff the cache needs to be filled with more data
562 coming from the file FP. Return TRUE iff the cache was filled with
563 mode data. */
564
565 bool
566 file_cache_slot::maybe_read_data ()
567 {
568 if (!needs_read_p ())
569 return false;
570 return read_data ();
571 }
572
573 /* Read a new line from file FP, using C as a cache for the data
574 coming from the file. Upon successful completion, *LINE is set to
575 the beginning of the line found. *LINE points directly in the
576 line cache and is only valid until the next call of get_next_line.
577 *LINE_LEN is set to the length of the line. Note that the line
578 does not contain any terminal delimiter. This function returns
579 true if some data was read or process from the cache, false
580 otherwise. Note that subsequent calls to get_next_line might
581 make the content of *LINE invalid. */
582
583 bool
584 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
585 {
586 /* Fill the cache with data to process. */
587 maybe_read_data ();
588
589 size_t remaining_size = m_nb_read - m_line_start_idx;
590 if (remaining_size == 0)
591 /* There is no more data to process. */
592 return false;
593
594 char *line_start = m_data + m_line_start_idx;
595
596 char *next_line_start = NULL;
597 size_t len = 0;
598 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
599 if (line_end == NULL)
600 {
601 /* We haven't found the end-of-line delimiter in the cache.
602 Fill the cache with more data from the file and look for the
603 '\n'. */
604 while (maybe_read_data ())
605 {
606 line_start = m_data + m_line_start_idx;
607 remaining_size = m_nb_read - m_line_start_idx;
608 line_end = (char *) memchr (line_start, '\n', remaining_size);
609 if (line_end != NULL)
610 {
611 next_line_start = line_end + 1;
612 break;
613 }
614 }
615 if (line_end == NULL)
616 {
617 /* We've loadded all the file into the cache and still no
618 '\n'. Let's say the line ends up at one byte passed the
619 end of the file. This is to stay consistent with the case
620 of when the line ends up with a '\n' and line_end points to
621 that terminal '\n'. That consistency is useful below in
622 the len calculation. */
623 line_end = m_data + m_nb_read ;
624 m_missing_trailing_newline = true;
625 }
626 else
627 m_missing_trailing_newline = false;
628 }
629 else
630 {
631 next_line_start = line_end + 1;
632 m_missing_trailing_newline = false;
633 }
634
635 if (ferror (m_fp))
636 return false;
637
638 /* At this point, we've found the end of the of line. It either
639 points to the '\n' or to one byte after the last byte of the
640 file. */
641 gcc_assert (line_end != NULL);
642
643 len = line_end - line_start;
644
645 if (m_line_start_idx < m_nb_read)
646 *line = line_start;
647
648 ++m_line_num;
649
650 /* Before we update our line record, make sure the hint about the
651 total number of lines of the file is correct. If it's not, then
652 we give up recording line boundaries from now on. */
653 bool update_line_record = true;
654 if (m_line_num > m_total_lines)
655 update_line_record = false;
656
657 /* Now update our line record so that re-reading lines from the
658 before m_line_start_idx is faster. */
659 if (update_line_record
660 && m_line_record.length () < line_record_size)
661 {
662 /* If the file lines fits in the line record, we just record all
663 its lines ...*/
664 if (m_total_lines <= line_record_size
665 && m_line_num > m_line_record.length ())
666 m_line_record.safe_push
667 (file_cache_slot::line_info (m_line_num,
668 m_line_start_idx,
669 line_end - m_data));
670 else if (m_total_lines > line_record_size)
671 {
672 /* ... otherwise, we just scale total_lines down to
673 (line_record_size lines. */
674 size_t n = (m_line_num * line_record_size) / m_total_lines;
675 if (m_line_record.length () == 0
676 || n >= m_line_record.length ())
677 m_line_record.safe_push
678 (file_cache_slot::line_info (m_line_num,
679 m_line_start_idx,
680 line_end - m_data));
681 }
682 }
683
684 /* Update m_line_start_idx so that it points to the next line to be
685 read. */
686 if (next_line_start)
687 m_line_start_idx = next_line_start - m_data;
688 else
689 /* We didn't find any terminal '\n'. Let's consider that the end
690 of line is the end of the data in the cache. The next
691 invocation of get_next_line will either read more data from the
692 underlying file or return false early because we've reached the
693 end of the file. */
694 m_line_start_idx = m_nb_read;
695
696 *line_len = len;
697
698 return true;
699 }
700
701 /* Consume the next bytes coming from the cache (or from its
702 underlying file if there are remaining unread bytes in the file)
703 until we reach the next end-of-line (or end-of-file). There is no
704 copying from the cache involved. Return TRUE upon successful
705 completion. */
706
707 bool
708 file_cache_slot::goto_next_line ()
709 {
710 char *l;
711 ssize_t len;
712
713 return get_next_line (&l, &len);
714 }
715
716 /* Read an arbitrary line number LINE_NUM from the file cached in C.
717 If the line was read successfully, *LINE points to the beginning
718 of the line in the file cache and *LINE_LEN is the length of the
719 line. *LINE is not nul-terminated, but may contain zero bytes.
720 *LINE is only valid until the next call of read_line_num.
721 This function returns bool if a line was read. */
722
723 bool
724 file_cache_slot::read_line_num (size_t line_num,
725 char ** line, ssize_t *line_len)
726 {
727 gcc_assert (line_num > 0);
728
729 if (line_num <= m_line_num)
730 {
731 /* We've been asked to read lines that are before m_line_num.
732 So lets use our line record (if it's not empty) to try to
733 avoid re-reading the file from the beginning again. */
734
735 if (m_line_record.is_empty ())
736 {
737 m_line_start_idx = 0;
738 m_line_num = 0;
739 }
740 else
741 {
742 file_cache_slot::line_info *i = NULL;
743 if (m_total_lines <= line_record_size)
744 {
745 /* In languages where the input file is not totally
746 preprocessed up front, the m_total_lines hint
747 can be smaller than the number of lines of the
748 file. In that case, only the first
749 m_total_lines have been recorded.
750
751 Otherwise, the first m_total_lines we've read have
752 their start/end recorded here. */
753 i = (line_num <= m_total_lines)
754 ? &m_line_record[line_num - 1]
755 : &m_line_record[m_total_lines - 1];
756 gcc_assert (i->line_num <= line_num);
757 }
758 else
759 {
760 /* So the file had more lines than our line record
761 size. Thus the number of lines we've recorded has
762 been scaled down to line_record_size. Let's
763 pick the start/end of the recorded line that is
764 closest to line_num. */
765 size_t n = (line_num <= m_total_lines)
766 ? line_num * line_record_size / m_total_lines
767 : m_line_record.length () - 1;
768 if (n < m_line_record.length ())
769 {
770 i = &m_line_record[n];
771 gcc_assert (i->line_num <= line_num);
772 }
773 }
774
775 if (i && i->line_num == line_num)
776 {
777 /* We have the start/end of the line. */
778 *line = m_data + i->start_pos;
779 *line_len = i->end_pos - i->start_pos;
780 return true;
781 }
782
783 if (i)
784 {
785 m_line_start_idx = i->start_pos;
786 m_line_num = i->line_num - 1;
787 }
788 else
789 {
790 m_line_start_idx = 0;
791 m_line_num = 0;
792 }
793 }
794 }
795
796 /* Let's walk from line m_line_num up to line_num - 1, without
797 copying any line. */
798 while (m_line_num < line_num - 1)
799 if (!goto_next_line ())
800 return false;
801
802 /* The line we want is the next one. Let's read and copy it back to
803 the caller. */
804 return get_next_line (line, line_len);
805 }
806
807 /* Return the physical source line that corresponds to FILE_PATH/LINE.
808 The line is not nul-terminated. The returned pointer is only
809 valid until the next call of location_get_source_line.
810 Note that the line can contain several null characters,
811 so the returned value's length has the actual length of the line.
812 If the function fails, a NULL char_span is returned. */
813
814 char_span
815 location_get_source_line (const char *file_path, int line)
816 {
817 char *buffer = NULL;
818 ssize_t len;
819
820 if (line == 0)
821 return char_span (NULL, 0);
822
823 if (file_path == NULL)
824 return char_span (NULL, 0);
825
826 diagnostic_file_cache_init ();
827
828 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
829 if (c == NULL)
830 return char_span (NULL, 0);
831
832 bool read = c->read_line_num (line, &buffer, &len);
833 if (!read)
834 return char_span (NULL, 0);
835
836 return char_span (buffer, len);
837 }
838
839 /* Determine if FILE_PATH missing a trailing newline on its final line.
840 Only valid to call once all of the file has been loaded, by
841 requesting a line number beyond the end of the file. */
842
843 bool
844 location_missing_trailing_newline (const char *file_path)
845 {
846 diagnostic_file_cache_init ();
847
848 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
849 if (c == NULL)
850 return false;
851
852 return c->missing_trailing_newline_p ();
853 }
854
855 /* Test if the location originates from the spelling location of a
856 builtin-tokens. That is, return TRUE if LOC is a (possibly
857 virtual) location of a built-in token that appears in the expansion
858 list of a macro. Please note that this function also works on
859 tokens that result from built-in tokens. For instance, the
860 function would return true if passed a token "4" that is the result
861 of the expansion of the built-in __LINE__ macro. */
862 bool
863 is_location_from_builtin_token (location_t loc)
864 {
865 const line_map_ordinary *map = NULL;
866 loc = linemap_resolve_location (line_table, loc,
867 LRK_SPELLING_LOCATION, &map);
868 return loc == BUILTINS_LOCATION;
869 }
870
871 /* Expand the source location LOC into a human readable location. If
872 LOC is virtual, it resolves to the expansion point of the involved
873 macro. If LOC resolves to a builtin location, the file name of the
874 readable location is set to the string "<built-in>". */
875
876 expanded_location
877 expand_location (location_t loc)
878 {
879 return expand_location_1 (loc, /*expansion_point_p=*/true,
880 LOCATION_ASPECT_CARET);
881 }
882
883 /* Expand the source location LOC into a human readable location. If
884 LOC is virtual, it resolves to the expansion location of the
885 relevant macro. If LOC resolves to a builtin location, the file
886 name of the readable location is set to the string
887 "<built-in>". */
888
889 expanded_location
890 expand_location_to_spelling_point (location_t loc,
891 enum location_aspect aspect)
892 {
893 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
894 }
895
896 /* The rich_location class within libcpp requires a way to expand
897 location_t instances, and relies on the client code
898 providing a symbol named
899 linemap_client_expand_location_to_spelling_point
900 to do this.
901
902 This is the implementation for libcommon.a (all host binaries),
903 which simply calls into expand_location_1. */
904
905 expanded_location
906 linemap_client_expand_location_to_spelling_point (location_t loc,
907 enum location_aspect aspect)
908 {
909 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
910 }
911
912
913 /* If LOCATION is in a system header and if it is a virtual location for
914 a token coming from the expansion of a macro, unwind it to the
915 location of the expansion point of the macro. Otherwise, just return
916 LOCATION.
917
918 This is used for instance when we want to emit diagnostics about a
919 token that may be located in a macro that is itself defined in a
920 system header, for example, for the NULL macro. In such a case, if
921 LOCATION were passed directly to diagnostic functions such as
922 warning_at, the diagnostic would be suppressed (unless
923 -Wsystem-headers). */
924
925 location_t
926 expansion_point_location_if_in_system_header (location_t location)
927 {
928 if (in_system_header_at (location))
929 location = linemap_resolve_location (line_table, location,
930 LRK_MACRO_EXPANSION_POINT,
931 NULL);
932 return location;
933 }
934
935 /* If LOCATION is a virtual location for a token coming from the expansion
936 of a macro, unwind to the location of the expansion point of the macro. */
937
938 location_t
939 expansion_point_location (location_t location)
940 {
941 return linemap_resolve_location (line_table, location,
942 LRK_MACRO_EXPANSION_POINT, NULL);
943 }
944
945 /* Construct a location with caret at CARET, ranging from START to
946 finish e.g.
947
948 11111111112
949 12345678901234567890
950 522
951 523 return foo + bar;
952 ~~~~^~~~~
953 524
954
955 The location's caret is at the "+", line 523 column 15, but starts
956 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
957 of "bar" at column 19. */
958
959 location_t
960 make_location (location_t caret, location_t start, location_t finish)
961 {
962 location_t pure_loc = get_pure_location (caret);
963 source_range src_range;
964 src_range.m_start = get_start (start);
965 src_range.m_finish = get_finish (finish);
966 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
967 pure_loc,
968 src_range,
969 NULL);
970 return combined_loc;
971 }
972
973 /* Same as above, but taking a source range rather than two locations. */
974
975 location_t
976 make_location (location_t caret, source_range src_range)
977 {
978 location_t pure_loc = get_pure_location (caret);
979 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
980 }
981
982 /* An expanded_location stores the column in byte units. This function
983 converts that column to display units. That requires reading the associated
984 source line in order to calculate the display width. If that cannot be done
985 for any reason, then returns the byte column as a fallback. */
986 int
987 location_compute_display_column (expanded_location exploc, int tabstop)
988 {
989 if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
990 return exploc.column;
991 char_span line = location_get_source_line (exploc.file, exploc.line);
992 /* If line is NULL, this function returns exploc.column which is the
993 desired fallback. */
994 return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
995 exploc.column, tabstop);
996 }
997
998 /* Dump statistics to stderr about the memory usage of the line_table
999 set of line maps. This also displays some statistics about macro
1000 expansion. */
1001
1002 void
1003 dump_line_table_statistics (void)
1004 {
1005 struct linemap_stats s;
1006 long total_used_map_size,
1007 macro_maps_size,
1008 total_allocated_map_size;
1009
1010 memset (&s, 0, sizeof (s));
1011
1012 linemap_get_statistics (line_table, &s);
1013
1014 macro_maps_size = s.macro_maps_used_size
1015 + s.macro_maps_locations_size;
1016
1017 total_allocated_map_size = s.ordinary_maps_allocated_size
1018 + s.macro_maps_allocated_size
1019 + s.macro_maps_locations_size;
1020
1021 total_used_map_size = s.ordinary_maps_used_size
1022 + s.macro_maps_used_size
1023 + s.macro_maps_locations_size;
1024
1025 fprintf (stderr, "Number of expanded macros: %5ld\n",
1026 s.num_expanded_macros);
1027 if (s.num_expanded_macros != 0)
1028 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
1029 s.num_macro_tokens / s.num_expanded_macros);
1030 fprintf (stderr,
1031 "\nLine Table allocations during the "
1032 "compilation process\n");
1033 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
1034 SIZE_AMOUNT (s.num_ordinary_maps_used));
1035 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
1036 SIZE_AMOUNT (s.ordinary_maps_used_size));
1037 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
1038 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1039 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
1040 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1041 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
1042 SIZE_AMOUNT (s.num_macro_maps_used));
1043 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
1044 SIZE_AMOUNT (s.macro_maps_used_size));
1045 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
1046 SIZE_AMOUNT (s.macro_maps_locations_size));
1047 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
1048 SIZE_AMOUNT (macro_maps_size));
1049 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
1050 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1051 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
1052 SIZE_AMOUNT (total_allocated_map_size));
1053 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
1054 SIZE_AMOUNT (total_used_map_size));
1055 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
1056 SIZE_AMOUNT (s.adhoc_table_size));
1057 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
1058 SIZE_AMOUNT (s.adhoc_table_entries_used));
1059 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
1060 SIZE_AMOUNT (line_table->num_optimized_ranges));
1061 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
1062 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
1063
1064 fprintf (stderr, "\n");
1065 }
1066
1067 /* Get location one beyond the final location in ordinary map IDX. */
1068
1069 static location_t
1070 get_end_location (class line_maps *set, unsigned int idx)
1071 {
1072 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1073 return set->highest_location;
1074
1075 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1076 return MAP_START_LOCATION (next_map);
1077 }
1078
1079 /* Helper function for write_digit_row. */
1080
1081 static void
1082 write_digit (FILE *stream, int digit)
1083 {
1084 fputc ('0' + (digit % 10), stream);
1085 }
1086
1087 /* Helper function for dump_location_info.
1088 Write a row of numbers to STREAM, numbering a source line,
1089 giving the units, tens, hundreds etc of the column number. */
1090
1091 static void
1092 write_digit_row (FILE *stream, int indent,
1093 const line_map_ordinary *map,
1094 location_t loc, int max_col, int divisor)
1095 {
1096 fprintf (stream, "%*c", indent, ' ');
1097 fprintf (stream, "|");
1098 for (int column = 1; column < max_col; column++)
1099 {
1100 location_t column_loc = loc + (column << map->m_range_bits);
1101 write_digit (stream, column_loc / divisor);
1102 }
1103 fprintf (stream, "\n");
1104 }
1105
1106 /* Write a half-closed (START) / half-open (END) interval of
1107 location_t to STREAM. */
1108
1109 static void
1110 dump_location_range (FILE *stream,
1111 location_t start, location_t end)
1112 {
1113 fprintf (stream,
1114 " location_t interval: %u <= loc < %u\n",
1115 start, end);
1116 }
1117
1118 /* Write a labelled description of a half-closed (START) / half-open (END)
1119 interval of location_t to STREAM. */
1120
1121 static void
1122 dump_labelled_location_range (FILE *stream,
1123 const char *name,
1124 location_t start, location_t end)
1125 {
1126 fprintf (stream, "%s\n", name);
1127 dump_location_range (stream, start, end);
1128 fprintf (stream, "\n");
1129 }
1130
1131 /* Write a visualization of the locations in the line_table to STREAM. */
1132
1133 void
1134 dump_location_info (FILE *stream)
1135 {
1136 /* Visualize the reserved locations. */
1137 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1138 0, RESERVED_LOCATION_COUNT);
1139
1140 /* Visualize the ordinary line_map instances, rendering the sources. */
1141 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1142 {
1143 location_t end_location = get_end_location (line_table, idx);
1144 /* half-closed: doesn't include this one. */
1145
1146 const line_map_ordinary *map
1147 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1148 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1149 dump_location_range (stream,
1150 MAP_START_LOCATION (map), end_location);
1151 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1152 fprintf (stream, " starting at line: %i\n",
1153 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1154 fprintf (stream, " column and range bits: %i\n",
1155 map->m_column_and_range_bits);
1156 fprintf (stream, " column bits: %i\n",
1157 map->m_column_and_range_bits - map->m_range_bits);
1158 fprintf (stream, " range bits: %i\n",
1159 map->m_range_bits);
1160 const char * reason;
1161 switch (map->reason) {
1162 case LC_ENTER:
1163 reason = "LC_ENTER";
1164 break;
1165 case LC_LEAVE:
1166 reason = "LC_LEAVE";
1167 break;
1168 case LC_RENAME:
1169 reason = "LC_RENAME";
1170 break;
1171 case LC_RENAME_VERBATIM:
1172 reason = "LC_RENAME_VERBATIM";
1173 break;
1174 case LC_ENTER_MACRO:
1175 reason = "LC_RENAME_MACRO";
1176 break;
1177 default:
1178 reason = "Unknown";
1179 }
1180 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1181
1182 const line_map_ordinary *includer_map
1183 = linemap_included_from_linemap (line_table, map);
1184 fprintf (stream, " included from location: %d",
1185 linemap_included_from (map));
1186 if (includer_map) {
1187 fprintf (stream, " (in ordinary map %d)",
1188 int (includer_map - line_table->info_ordinary.maps));
1189 }
1190 fprintf (stream, "\n");
1191
1192 /* Render the span of source lines that this "map" covers. */
1193 for (location_t loc = MAP_START_LOCATION (map);
1194 loc < end_location;
1195 loc += (1 << map->m_range_bits) )
1196 {
1197 gcc_assert (pure_location_p (line_table, loc) );
1198
1199 expanded_location exploc
1200 = linemap_expand_location (line_table, map, loc);
1201
1202 if (exploc.column == 0)
1203 {
1204 /* Beginning of a new source line: draw the line. */
1205
1206 char_span line_text = location_get_source_line (exploc.file,
1207 exploc.line);
1208 if (!line_text)
1209 break;
1210 fprintf (stream,
1211 "%s:%3i|loc:%5i|%.*s\n",
1212 exploc.file, exploc.line,
1213 loc,
1214 (int)line_text.length (), line_text.get_buffer ());
1215
1216 /* "loc" is at column 0, which means "the whole line".
1217 Render the locations *within* the line, by underlining
1218 it, showing the location_t numeric values
1219 at each column. */
1220 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1221 if (max_col > line_text.length ())
1222 max_col = line_text.length () + 1;
1223
1224 int len_lnum = num_digits (exploc.line);
1225 if (len_lnum < 3)
1226 len_lnum = 3;
1227 int len_loc = num_digits (loc);
1228 if (len_loc < 5)
1229 len_loc = 5;
1230
1231 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1232
1233 /* Thousands. */
1234 if (end_location > 999)
1235 write_digit_row (stream, indent, map, loc, max_col, 1000);
1236
1237 /* Hundreds. */
1238 if (end_location > 99)
1239 write_digit_row (stream, indent, map, loc, max_col, 100);
1240
1241 /* Tens. */
1242 write_digit_row (stream, indent, map, loc, max_col, 10);
1243
1244 /* Units. */
1245 write_digit_row (stream, indent, map, loc, max_col, 1);
1246 }
1247 }
1248 fprintf (stream, "\n");
1249 }
1250
1251 /* Visualize unallocated values. */
1252 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1253 line_table->highest_location,
1254 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1255
1256 /* Visualize the macro line_map instances, rendering the sources. */
1257 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1258 {
1259 /* Each macro map that is allocated owns location_t values
1260 that are *lower* that the one before them.
1261 Hence it's meaningful to view them either in order of ascending
1262 source locations, or in order of ascending macro map index. */
1263 const bool ascending_location_ts = true;
1264 unsigned int idx = (ascending_location_ts
1265 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1266 : i);
1267 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1268 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1269 idx,
1270 linemap_map_get_macro_name (map),
1271 MACRO_MAP_NUM_MACRO_TOKENS (map));
1272 dump_location_range (stream,
1273 map->start_location,
1274 (map->start_location
1275 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1276 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1277 "expansion point is location %i",
1278 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1279 fprintf (stream, " map->start_location: %u\n",
1280 map->start_location);
1281
1282 fprintf (stream, " macro_locations:\n");
1283 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1284 {
1285 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1286 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1287
1288 /* linemap_add_macro_token encodes token numbers in an expansion
1289 by putting them after MAP_START_LOCATION. */
1290
1291 /* I'm typically seeing 4 uninitialized entries at the end of
1292 0xafafafaf.
1293 This appears to be due to macro.c:replace_args
1294 adding 2 extra args for padding tokens; presumably there may
1295 be a leading and/or trailing padding token injected,
1296 each for 2 more location slots.
1297 This would explain there being up to 4 location_ts slots
1298 that may be uninitialized. */
1299
1300 fprintf (stream, " %u: %u, %u\n",
1301 i,
1302 x,
1303 y);
1304 if (x == y)
1305 {
1306 if (x < MAP_START_LOCATION (map))
1307 inform (x, "token %u has %<x-location == y-location == %u%>",
1308 i, x);
1309 else
1310 fprintf (stream,
1311 "x-location == y-location == %u encodes token # %u\n",
1312 x, x - MAP_START_LOCATION (map));
1313 }
1314 else
1315 {
1316 inform (x, "token %u has %<x-location == %u%>", i, x);
1317 inform (x, "token %u has %<y-location == %u%>", i, y);
1318 }
1319 }
1320 fprintf (stream, "\n");
1321 }
1322
1323 /* It appears that MAX_LOCATION_T itself is never assigned to a
1324 macro map, presumably due to an off-by-one error somewhere
1325 between the logic in linemap_enter_macro and
1326 LINEMAPS_MACRO_LOWEST_LOCATION. */
1327 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1328 MAX_LOCATION_T,
1329 MAX_LOCATION_T + 1);
1330
1331 /* Visualize ad-hoc values. */
1332 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1333 MAX_LOCATION_T + 1, UINT_MAX);
1334 }
1335
1336 /* string_concat's constructor. */
1337
1338 string_concat::string_concat (int num, location_t *locs)
1339 : m_num (num)
1340 {
1341 m_locs = ggc_vec_alloc <location_t> (num);
1342 for (int i = 0; i < num; i++)
1343 m_locs[i] = locs[i];
1344 }
1345
1346 /* string_concat_db's constructor. */
1347
1348 string_concat_db::string_concat_db ()
1349 {
1350 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1351 }
1352
1353 /* Record that a string concatenation occurred, covering NUM
1354 string literal tokens. LOCS is an array of size NUM, containing the
1355 locations of the tokens. A copy of LOCS is taken. */
1356
1357 void
1358 string_concat_db::record_string_concatenation (int num, location_t *locs)
1359 {
1360 gcc_assert (num > 1);
1361 gcc_assert (locs);
1362
1363 location_t key_loc = get_key_loc (locs[0]);
1364
1365 string_concat *concat
1366 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1367 m_table->put (key_loc, concat);
1368 }
1369
1370 /* Determine if LOC was the location of the initial token of a
1371 concatenation of string literal tokens.
1372 If so, *OUT_NUM is written to with the number of tokens, and
1373 *OUT_LOCS with the location of an array of locations of the
1374 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1375 storage owned by the string_concat_db.
1376 Otherwise, return false. */
1377
1378 bool
1379 string_concat_db::get_string_concatenation (location_t loc,
1380 int *out_num,
1381 location_t **out_locs)
1382 {
1383 gcc_assert (out_num);
1384 gcc_assert (out_locs);
1385
1386 location_t key_loc = get_key_loc (loc);
1387
1388 string_concat **concat = m_table->get (key_loc);
1389 if (!concat)
1390 return false;
1391
1392 *out_num = (*concat)->m_num;
1393 *out_locs =(*concat)->m_locs;
1394 return true;
1395 }
1396
1397 /* Internal function. Canonicalize LOC into a form suitable for
1398 use as a key within the database, stripping away macro expansion,
1399 ad-hoc information, and range information, using the location of
1400 the start of LOC within an ordinary linemap. */
1401
1402 location_t
1403 string_concat_db::get_key_loc (location_t loc)
1404 {
1405 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1406 NULL);
1407
1408 loc = get_range_from_loc (line_table, loc).m_start;
1409
1410 return loc;
1411 }
1412
1413 /* Helper class for use within get_substring_ranges_for_loc.
1414 An vec of cpp_string with responsibility for releasing all of the
1415 str->text for each str in the vector. */
1416
1417 class auto_cpp_string_vec : public auto_vec <cpp_string>
1418 {
1419 public:
1420 auto_cpp_string_vec (int alloc)
1421 : auto_vec <cpp_string> (alloc) {}
1422
1423 ~auto_cpp_string_vec ()
1424 {
1425 /* Clean up the copies within this vec. */
1426 int i;
1427 cpp_string *str;
1428 FOR_EACH_VEC_ELT (*this, i, str)
1429 free (const_cast <unsigned char *> (str->text));
1430 }
1431 };
1432
1433 /* Attempt to populate RANGES with source location information on the
1434 individual characters within the string literal found at STRLOC.
1435 If CONCATS is non-NULL, then any string literals that the token at
1436 STRLOC was concatenated with are also added to RANGES.
1437
1438 Return NULL if successful, or an error message if any errors occurred (in
1439 which case RANGES may be only partially populated and should not
1440 be used).
1441
1442 This is implemented by re-parsing the relevant source line(s). */
1443
1444 static const char *
1445 get_substring_ranges_for_loc (cpp_reader *pfile,
1446 string_concat_db *concats,
1447 location_t strloc,
1448 enum cpp_ttype type,
1449 cpp_substring_ranges &ranges)
1450 {
1451 gcc_assert (pfile);
1452
1453 if (strloc == UNKNOWN_LOCATION)
1454 return "unknown location";
1455
1456 /* Reparsing the strings requires accurate location information.
1457 If -ftrack-macro-expansion has been overridden from its default
1458 of 2, then we might have a location of a macro expansion point,
1459 rather than the location of the literal itself.
1460 Avoid this by requiring that we have full macro expansion tracking
1461 for substring locations to be available. */
1462 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1463 return "track_macro_expansion != 2";
1464
1465 /* If #line or # 44 "file"-style directives are present, then there's
1466 no guarantee that the line numbers we have can be used to locate
1467 the strings. For example, we might have a .i file with # directives
1468 pointing back to lines within a .c file, but the .c file might
1469 have been edited since the .i file was created.
1470 In such a case, the safest course is to disable on-demand substring
1471 locations. */
1472 if (line_table->seen_line_directive)
1473 return "seen line directive";
1474
1475 /* If string concatenation has occurred at STRLOC, get the locations
1476 of all of the literal tokens making up the compound string.
1477 Otherwise, just use STRLOC. */
1478 int num_locs = 1;
1479 location_t *strlocs = &strloc;
1480 if (concats)
1481 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1482
1483 auto_cpp_string_vec strs (num_locs);
1484 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1485 for (int i = 0; i < num_locs; i++)
1486 {
1487 /* Get range of strloc. We will use it to locate the start and finish
1488 of the literal token within the line. */
1489 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1490
1491 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1492 {
1493 /* If the string token was within a macro expansion, then we can
1494 cope with it for the simple case where we have a single token.
1495 Otherwise, bail out. */
1496 if (src_range.m_start != src_range.m_finish)
1497 return "macro expansion";
1498 }
1499 else
1500 {
1501 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1502 /* If so, we can't reliably determine where the token started within
1503 its line. */
1504 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1505
1506 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1507 /* If so, we can't reliably determine where the token finished
1508 within its line. */
1509 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1510 }
1511
1512 expanded_location start
1513 = expand_location_to_spelling_point (src_range.m_start,
1514 LOCATION_ASPECT_START);
1515 expanded_location finish
1516 = expand_location_to_spelling_point (src_range.m_finish,
1517 LOCATION_ASPECT_FINISH);
1518 if (start.file != finish.file)
1519 return "range endpoints are in different files";
1520 if (start.line != finish.line)
1521 return "range endpoints are on different lines";
1522 if (start.column > finish.column)
1523 return "range endpoints are reversed";
1524
1525 char_span line = location_get_source_line (start.file, start.line);
1526 if (!line)
1527 return "unable to read source line";
1528
1529 /* Determine the location of the literal (including quotes
1530 and leading prefix chars, such as the 'u' in a u""
1531 token). */
1532 size_t literal_length = finish.column - start.column + 1;
1533
1534 /* Ensure that we don't crash if we got the wrong location. */
1535 if (start.column < 1)
1536 return "zero start column";
1537 if (line.length () < (start.column - 1 + literal_length))
1538 return "line is not wide enough";
1539
1540 char_span literal = line.subspan (start.column - 1, literal_length);
1541
1542 cpp_string from;
1543 from.len = literal_length;
1544 /* Make a copy of the literal, to avoid having to rely on
1545 the lifetime of the copy of the line within the cache.
1546 This will be released by the auto_cpp_string_vec dtor. */
1547 from.text = (unsigned char *)literal.xstrdup ();
1548 strs.safe_push (from);
1549
1550 /* For very long lines, a new linemap could have started
1551 halfway through the token.
1552 Ensure that the loc_reader uses the linemap of the
1553 *end* of the token for its start location. */
1554 const line_map_ordinary *start_ord_map;
1555 linemap_resolve_location (line_table, src_range.m_start,
1556 LRK_SPELLING_LOCATION, &start_ord_map);
1557 const line_map_ordinary *final_ord_map;
1558 linemap_resolve_location (line_table, src_range.m_finish,
1559 LRK_SPELLING_LOCATION, &final_ord_map);
1560 if (start_ord_map == NULL || final_ord_map == NULL)
1561 return "failed to get ordinary maps";
1562 /* Bulletproofing. We ought to only have different ordinary maps
1563 for start vs finish due to line-length jumps. */
1564 if (start_ord_map != final_ord_map
1565 && start_ord_map->to_file != final_ord_map->to_file)
1566 return "start and finish are spelled in different ordinary maps";
1567 /* The file from linemap_resolve_location ought to match that from
1568 expand_location_to_spelling_point. */
1569 if (start_ord_map->to_file != start.file)
1570 return "mismatching file after resolving linemap";
1571
1572 location_t start_loc
1573 = linemap_position_for_line_and_column (line_table, final_ord_map,
1574 start.line, start.column);
1575
1576 cpp_string_location_reader loc_reader (start_loc, line_table);
1577 loc_readers.safe_push (loc_reader);
1578 }
1579
1580 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1581 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1582 loc_readers.address (),
1583 num_locs, &ranges, type);
1584 if (err)
1585 return err;
1586
1587 /* Success: "ranges" should now contain information on the string. */
1588 return NULL;
1589 }
1590
1591 /* Attempt to populate *OUT_LOC with source location information on the
1592 given characters within the string literal found at STRLOC.
1593 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1594 character set.
1595
1596 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1597 and string literal "012345\n789"
1598 *OUT_LOC is written to with:
1599 "012345\n789"
1600 ~^~~~~
1601
1602 If CONCATS is non-NULL, then any string literals that the token at
1603 STRLOC was concatenated with are also considered.
1604
1605 This is implemented by re-parsing the relevant source line(s).
1606
1607 Return NULL if successful, or an error message if any errors occurred.
1608 Error messages are intended for GCC developers (to help debugging) rather
1609 than for end-users. */
1610
1611 const char *
1612 get_location_within_string (cpp_reader *pfile,
1613 string_concat_db *concats,
1614 location_t strloc,
1615 enum cpp_ttype type,
1616 int caret_idx, int start_idx, int end_idx,
1617 location_t *out_loc)
1618 {
1619 gcc_checking_assert (caret_idx >= 0);
1620 gcc_checking_assert (start_idx >= 0);
1621 gcc_checking_assert (end_idx >= 0);
1622 gcc_assert (out_loc);
1623
1624 cpp_substring_ranges ranges;
1625 const char *err
1626 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1627 if (err)
1628 return err;
1629
1630 if (caret_idx >= ranges.get_num_ranges ())
1631 return "caret_idx out of range";
1632 if (start_idx >= ranges.get_num_ranges ())
1633 return "start_idx out of range";
1634 if (end_idx >= ranges.get_num_ranges ())
1635 return "end_idx out of range";
1636
1637 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1638 ranges.get_range (start_idx).m_start,
1639 ranges.get_range (end_idx).m_finish);
1640 return NULL;
1641 }
1642
1643 #if CHECKING_P
1644
1645 namespace selftest {
1646
1647 /* Selftests of location handling. */
1648
1649 /* Attempt to populate *OUT_RANGE with source location information on the
1650 given character within the string literal found at STRLOC.
1651 CHAR_IDX refers to an offset within the execution character set.
1652 If CONCATS is non-NULL, then any string literals that the token at
1653 STRLOC was concatenated with are also considered.
1654
1655 This is implemented by re-parsing the relevant source line(s).
1656
1657 Return NULL if successful, or an error message if any errors occurred.
1658 Error messages are intended for GCC developers (to help debugging) rather
1659 than for end-users. */
1660
1661 static const char *
1662 get_source_range_for_char (cpp_reader *pfile,
1663 string_concat_db *concats,
1664 location_t strloc,
1665 enum cpp_ttype type,
1666 int char_idx,
1667 source_range *out_range)
1668 {
1669 gcc_checking_assert (char_idx >= 0);
1670 gcc_assert (out_range);
1671
1672 cpp_substring_ranges ranges;
1673 const char *err
1674 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1675 if (err)
1676 return err;
1677
1678 if (char_idx >= ranges.get_num_ranges ())
1679 return "char_idx out of range";
1680
1681 *out_range = ranges.get_range (char_idx);
1682 return NULL;
1683 }
1684
1685 /* As get_source_range_for_char, but write to *OUT the number
1686 of ranges that are available. */
1687
1688 static const char *
1689 get_num_source_ranges_for_substring (cpp_reader *pfile,
1690 string_concat_db *concats,
1691 location_t strloc,
1692 enum cpp_ttype type,
1693 int *out)
1694 {
1695 gcc_assert (out);
1696
1697 cpp_substring_ranges ranges;
1698 const char *err
1699 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1700
1701 if (err)
1702 return err;
1703
1704 *out = ranges.get_num_ranges ();
1705 return NULL;
1706 }
1707
1708 /* Selftests of location handling. */
1709
1710 /* Verify that compare() on linenum_type handles comparisons over the full
1711 range of the type. */
1712
1713 static void
1714 test_linenum_comparisons ()
1715 {
1716 linenum_type min_line (0);
1717 linenum_type max_line (0xffffffff);
1718 ASSERT_EQ (0, compare (min_line, min_line));
1719 ASSERT_EQ (0, compare (max_line, max_line));
1720
1721 ASSERT_GT (compare (max_line, min_line), 0);
1722 ASSERT_LT (compare (min_line, max_line), 0);
1723 }
1724
1725 /* Helper function for verifying location data: when location_t
1726 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1727 as having column 0. */
1728
1729 static bool
1730 should_have_column_data_p (location_t loc)
1731 {
1732 if (IS_ADHOC_LOC (loc))
1733 loc = get_location_from_adhoc_loc (line_table, loc);
1734 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1735 return false;
1736 return true;
1737 }
1738
1739 /* Selftest for should_have_column_data_p. */
1740
1741 static void
1742 test_should_have_column_data_p ()
1743 {
1744 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1745 ASSERT_TRUE
1746 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1747 ASSERT_FALSE
1748 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1749 }
1750
1751 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1752 on LOC. */
1753
1754 static void
1755 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1756 location_t loc)
1757 {
1758 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1759 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1760 /* If location_t values are sufficiently high, then column numbers
1761 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1762 When close to the threshold, column numbers *may* be present: if
1763 the final linemap before the threshold contains a line that straddles
1764 the threshold, locations in that line have column information. */
1765 if (should_have_column_data_p (loc))
1766 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1767 }
1768
1769 /* Various selftests involve constructing a line table and one or more
1770 line maps within it.
1771
1772 For maximum test coverage we want to run these tests with a variety
1773 of situations:
1774 - line_table->default_range_bits: some frontends use a non-zero value
1775 and others use zero
1776 - the fallback modes within line-map.c: there are various threshold
1777 values for location_t beyond line-map.c changes
1778 behavior (disabling of the range-packing optimization, disabling
1779 of column-tracking). We can exercise these by starting the line_table
1780 at interesting values at or near these thresholds.
1781
1782 The following struct describes a particular case within our test
1783 matrix. */
1784
1785 class line_table_case
1786 {
1787 public:
1788 line_table_case (int default_range_bits, int base_location)
1789 : m_default_range_bits (default_range_bits),
1790 m_base_location (base_location)
1791 {}
1792
1793 int m_default_range_bits;
1794 int m_base_location;
1795 };
1796
1797 /* Constructor. Store the old value of line_table, and create a new
1798 one, using sane defaults. */
1799
1800 line_table_test::line_table_test ()
1801 {
1802 gcc_assert (saved_line_table == NULL);
1803 saved_line_table = line_table;
1804 line_table = ggc_alloc<line_maps> ();
1805 linemap_init (line_table, BUILTINS_LOCATION);
1806 gcc_assert (saved_line_table->reallocator);
1807 line_table->reallocator = saved_line_table->reallocator;
1808 gcc_assert (saved_line_table->round_alloc_size);
1809 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1810 line_table->default_range_bits = 0;
1811 }
1812
1813 /* Constructor. Store the old value of line_table, and create a new
1814 one, using the sitation described in CASE_. */
1815
1816 line_table_test::line_table_test (const line_table_case &case_)
1817 {
1818 gcc_assert (saved_line_table == NULL);
1819 saved_line_table = line_table;
1820 line_table = ggc_alloc<line_maps> ();
1821 linemap_init (line_table, BUILTINS_LOCATION);
1822 gcc_assert (saved_line_table->reallocator);
1823 line_table->reallocator = saved_line_table->reallocator;
1824 gcc_assert (saved_line_table->round_alloc_size);
1825 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1826 line_table->default_range_bits = case_.m_default_range_bits;
1827 if (case_.m_base_location)
1828 {
1829 line_table->highest_location = case_.m_base_location;
1830 line_table->highest_line = case_.m_base_location;
1831 }
1832 }
1833
1834 /* Destructor. Restore the old value of line_table. */
1835
1836 line_table_test::~line_table_test ()
1837 {
1838 gcc_assert (saved_line_table != NULL);
1839 line_table = saved_line_table;
1840 saved_line_table = NULL;
1841 }
1842
1843 /* Verify basic operation of ordinary linemaps. */
1844
1845 static void
1846 test_accessing_ordinary_linemaps (const line_table_case &case_)
1847 {
1848 line_table_test ltt (case_);
1849
1850 /* Build a simple linemap describing some locations. */
1851 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1852
1853 linemap_line_start (line_table, 1, 100);
1854 location_t loc_a = linemap_position_for_column (line_table, 1);
1855 location_t loc_b = linemap_position_for_column (line_table, 23);
1856
1857 linemap_line_start (line_table, 2, 100);
1858 location_t loc_c = linemap_position_for_column (line_table, 1);
1859 location_t loc_d = linemap_position_for_column (line_table, 17);
1860
1861 /* Example of a very long line. */
1862 linemap_line_start (line_table, 3, 2000);
1863 location_t loc_e = linemap_position_for_column (line_table, 700);
1864
1865 /* Transitioning back to a short line. */
1866 linemap_line_start (line_table, 4, 0);
1867 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1868
1869 if (should_have_column_data_p (loc_back_to_short))
1870 {
1871 /* Verify that we switched to short lines in the linemap. */
1872 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1873 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1874 }
1875
1876 /* Example of a line that will eventually be seen to be longer
1877 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1878 below that. */
1879 linemap_line_start (line_table, 5, 2000);
1880
1881 location_t loc_start_of_very_long_line
1882 = linemap_position_for_column (line_table, 2000);
1883 location_t loc_too_wide
1884 = linemap_position_for_column (line_table, 4097);
1885 location_t loc_too_wide_2
1886 = linemap_position_for_column (line_table, 4098);
1887
1888 /* ...and back to a sane line length. */
1889 linemap_line_start (line_table, 6, 100);
1890 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1891
1892 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1893
1894 /* Multiple files. */
1895 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1896 linemap_line_start (line_table, 1, 200);
1897 location_t loc_f = linemap_position_for_column (line_table, 150);
1898 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1899
1900 /* Verify that we can recover the location info. */
1901 assert_loceq ("foo.c", 1, 1, loc_a);
1902 assert_loceq ("foo.c", 1, 23, loc_b);
1903 assert_loceq ("foo.c", 2, 1, loc_c);
1904 assert_loceq ("foo.c", 2, 17, loc_d);
1905 assert_loceq ("foo.c", 3, 700, loc_e);
1906 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1907
1908 /* In the very wide line, the initial location should be fully tracked. */
1909 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1910 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1911 be disabled. */
1912 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1913 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1914 /*...and column-tracking should be re-enabled for subsequent lines. */
1915 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1916
1917 assert_loceq ("bar.c", 1, 150, loc_f);
1918
1919 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1920 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1921
1922 /* Verify using make_location to build a range, and extracting data
1923 back from it. */
1924 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1925 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1926 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1927 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1928 ASSERT_EQ (loc_b, src_range.m_start);
1929 ASSERT_EQ (loc_d, src_range.m_finish);
1930 }
1931
1932 /* Verify various properties of UNKNOWN_LOCATION. */
1933
1934 static void
1935 test_unknown_location ()
1936 {
1937 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1938 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1939 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1940 }
1941
1942 /* Verify various properties of BUILTINS_LOCATION. */
1943
1944 static void
1945 test_builtins ()
1946 {
1947 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1948 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1949 }
1950
1951 /* Regression test for make_location.
1952 Ensure that we use pure locations for the start/finish of the range,
1953 rather than storing a packed or ad-hoc range as the start/finish. */
1954
1955 static void
1956 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1957 {
1958 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1959 with C++ frontend.
1960 ....................0000000001111111111222.
1961 ....................1234567890123456789012. */
1962 const char *content = " r += !aaa == bbb;\n";
1963 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1964 line_table_test ltt (case_);
1965 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1966
1967 const location_t c11 = linemap_position_for_column (line_table, 11);
1968 const location_t c12 = linemap_position_for_column (line_table, 12);
1969 const location_t c13 = linemap_position_for_column (line_table, 13);
1970 const location_t c14 = linemap_position_for_column (line_table, 14);
1971 const location_t c21 = linemap_position_for_column (line_table, 21);
1972
1973 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1974 return;
1975
1976 /* Use column 13 for the caret location, arbitrarily, to verify that we
1977 handle start != caret. */
1978 const location_t aaa = make_location (c13, c12, c14);
1979 ASSERT_EQ (c13, get_pure_location (aaa));
1980 ASSERT_EQ (c12, get_start (aaa));
1981 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1982 ASSERT_EQ (c14, get_finish (aaa));
1983 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1984
1985 /* Make a location using a location with a range as the start-point. */
1986 const location_t not_aaa = make_location (c11, aaa, c14);
1987 ASSERT_EQ (c11, get_pure_location (not_aaa));
1988 /* It should use the start location of the range, not store the range
1989 itself. */
1990 ASSERT_EQ (c12, get_start (not_aaa));
1991 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1992 ASSERT_EQ (c14, get_finish (not_aaa));
1993 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1994
1995 /* Similarly, make a location with a range as the end-point. */
1996 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1997 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1998 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1999 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2000 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2001 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2002 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2003 /* It should use the finish location of the range, not store the range
2004 itself. */
2005 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2006 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2007 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2008 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2009 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2010 }
2011
2012 /* Verify reading of input files (e.g. for caret-based diagnostics). */
2013
2014 static void
2015 test_reading_source_line ()
2016 {
2017 /* Create a tempfile and write some text to it. */
2018 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2019 "01234567890123456789\n"
2020 "This is the test text\n"
2021 "This is the 3rd line");
2022
2023 /* Read back a specific line from the tempfile. */
2024 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2025 ASSERT_TRUE (source_line);
2026 ASSERT_TRUE (source_line.get_buffer () != NULL);
2027 ASSERT_EQ (20, source_line.length ());
2028 ASSERT_TRUE (!strncmp ("This is the 3rd line",
2029 source_line.get_buffer (), source_line.length ()));
2030
2031 source_line = location_get_source_line (tmp.get_filename (), 2);
2032 ASSERT_TRUE (source_line);
2033 ASSERT_TRUE (source_line.get_buffer () != NULL);
2034 ASSERT_EQ (21, source_line.length ());
2035 ASSERT_TRUE (!strncmp ("This is the test text",
2036 source_line.get_buffer (), source_line.length ()));
2037
2038 source_line = location_get_source_line (tmp.get_filename (), 4);
2039 ASSERT_FALSE (source_line);
2040 ASSERT_TRUE (source_line.get_buffer () == NULL);
2041 }
2042
2043 /* Tests of lexing. */
2044
2045 /* Verify that token TOK from PARSER has cpp_token_as_text
2046 equal to EXPECTED_TEXT. */
2047
2048 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
2049 SELFTEST_BEGIN_STMT \
2050 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
2051 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
2052 SELFTEST_END_STMT
2053
2054 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2055 and ranges from EXP_START_COL to EXP_FINISH_COL.
2056 Use LOC as the effective location of the selftest. */
2057
2058 static void
2059 assert_token_loc_eq (const location &loc,
2060 const cpp_token *tok,
2061 const char *exp_filename, int exp_linenum,
2062 int exp_start_col, int exp_finish_col)
2063 {
2064 location_t tok_loc = tok->src_loc;
2065 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2066 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2067
2068 /* If location_t values are sufficiently high, then column numbers
2069 will be unavailable. */
2070 if (!should_have_column_data_p (tok_loc))
2071 return;
2072
2073 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2074 source_range tok_range = get_range_from_loc (line_table, tok_loc);
2075 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2076 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2077 }
2078
2079 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2080 SELFTEST_LOCATION as the effective location of the selftest. */
2081
2082 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2083 EXP_START_COL, EXP_FINISH_COL) \
2084 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2085 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2086
2087 /* Test of lexing a file using libcpp, verifying tokens and their
2088 location information. */
2089
2090 static void
2091 test_lexer (const line_table_case &case_)
2092 {
2093 /* Create a tempfile and write some text to it. */
2094 const char *content =
2095 /*00000000011111111112222222222333333.3333444444444.455555555556
2096 12345678901234567890123456789012345.6789012345678.901234567890. */
2097 ("test_name /* c-style comment */\n"
2098 " \"test literal\"\n"
2099 " // test c++-style comment\n"
2100 " 42\n");
2101 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2102
2103 line_table_test ltt (case_);
2104
2105 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2106
2107 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2108 ASSERT_NE (fname, NULL);
2109
2110 /* Verify that we get the expected tokens back, with the correct
2111 location information. */
2112
2113 location_t loc;
2114 const cpp_token *tok;
2115 tok = cpp_get_token_with_location (parser, &loc);
2116 ASSERT_NE (tok, NULL);
2117 ASSERT_EQ (tok->type, CPP_NAME);
2118 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2119 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2120
2121 tok = cpp_get_token_with_location (parser, &loc);
2122 ASSERT_NE (tok, NULL);
2123 ASSERT_EQ (tok->type, CPP_STRING);
2124 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2125 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2126
2127 tok = cpp_get_token_with_location (parser, &loc);
2128 ASSERT_NE (tok, NULL);
2129 ASSERT_EQ (tok->type, CPP_NUMBER);
2130 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2131 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2132
2133 tok = cpp_get_token_with_location (parser, &loc);
2134 ASSERT_NE (tok, NULL);
2135 ASSERT_EQ (tok->type, CPP_EOF);
2136
2137 cpp_finish (parser, NULL);
2138 cpp_destroy (parser);
2139 }
2140
2141 /* Forward decls. */
2142
2143 class lexer_test;
2144 class lexer_test_options;
2145
2146 /* A class for specifying options of a lexer_test.
2147 The "apply" vfunc is called during the lexer_test constructor. */
2148
2149 class lexer_test_options
2150 {
2151 public:
2152 virtual void apply (lexer_test &) = 0;
2153 };
2154
2155 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2156 in its dtor.
2157
2158 This is needed by struct lexer_test to ensure that the cleanup of the
2159 cpp_reader happens *after* the cleanup of the temp_source_file. */
2160
2161 class cpp_reader_ptr
2162 {
2163 public:
2164 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2165
2166 ~cpp_reader_ptr ()
2167 {
2168 cpp_finish (m_ptr, NULL);
2169 cpp_destroy (m_ptr);
2170 }
2171
2172 operator cpp_reader * () const { return m_ptr; }
2173
2174 private:
2175 cpp_reader *m_ptr;
2176 };
2177
2178 /* A struct for writing lexer tests. */
2179
2180 class lexer_test
2181 {
2182 public:
2183 lexer_test (const line_table_case &case_, const char *content,
2184 lexer_test_options *options);
2185 ~lexer_test ();
2186
2187 const cpp_token *get_token ();
2188
2189 /* The ordering of these fields matters.
2190 The line_table_test must be first, since the cpp_reader_ptr
2191 uses it.
2192 The cpp_reader must be cleaned up *after* the temp_source_file
2193 since the filenames in input.c's input cache are owned by the
2194 cpp_reader; in particular, when ~temp_source_file evicts the
2195 filename the filenames must still be alive. */
2196 line_table_test m_ltt;
2197 cpp_reader_ptr m_parser;
2198 temp_source_file m_tempfile;
2199 string_concat_db m_concats;
2200 bool m_implicitly_expect_EOF;
2201 };
2202
2203 /* Use an EBCDIC encoding for the execution charset, specifically
2204 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2205
2206 This exercises iconv integration within libcpp.
2207 Not every build of iconv supports the given charset,
2208 so we need to flag this error and handle it gracefully. */
2209
2210 class ebcdic_execution_charset : public lexer_test_options
2211 {
2212 public:
2213 ebcdic_execution_charset () : m_num_iconv_errors (0)
2214 {
2215 gcc_assert (s_singleton == NULL);
2216 s_singleton = this;
2217 }
2218 ~ebcdic_execution_charset ()
2219 {
2220 gcc_assert (s_singleton == this);
2221 s_singleton = NULL;
2222 }
2223
2224 void apply (lexer_test &test) FINAL OVERRIDE
2225 {
2226 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2227 cpp_opts->narrow_charset = "IBM1047";
2228
2229 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2230 callbacks->diagnostic = on_diagnostic;
2231 }
2232
2233 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2234 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2235 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2236 rich_location *richloc ATTRIBUTE_UNUSED,
2237 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2238 ATTRIBUTE_FPTR_PRINTF(5,0)
2239 {
2240 gcc_assert (s_singleton);
2241 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2242 const char *msg = "conversion from %s to %s not supported by iconv";
2243 #ifdef ENABLE_NLS
2244 msg = dgettext ("cpplib", msg);
2245 #endif
2246 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2247 when the local iconv build doesn't support the conversion. */
2248 if (strcmp (msgid, msg) == 0)
2249 {
2250 s_singleton->m_num_iconv_errors++;
2251 return true;
2252 }
2253
2254 /* Otherwise, we have an unexpected error. */
2255 abort ();
2256 }
2257
2258 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2259
2260 private:
2261 static ebcdic_execution_charset *s_singleton;
2262 int m_num_iconv_errors;
2263 };
2264
2265 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2266
2267 /* A lexer_test_options subclass that records a list of diagnostic
2268 messages emitted by the lexer. */
2269
2270 class lexer_diagnostic_sink : public lexer_test_options
2271 {
2272 public:
2273 lexer_diagnostic_sink ()
2274 {
2275 gcc_assert (s_singleton == NULL);
2276 s_singleton = this;
2277 }
2278 ~lexer_diagnostic_sink ()
2279 {
2280 gcc_assert (s_singleton == this);
2281 s_singleton = NULL;
2282
2283 int i;
2284 char *str;
2285 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2286 free (str);
2287 }
2288
2289 void apply (lexer_test &test) FINAL OVERRIDE
2290 {
2291 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2292 callbacks->diagnostic = on_diagnostic;
2293 }
2294
2295 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2296 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2297 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2298 rich_location *richloc ATTRIBUTE_UNUSED,
2299 const char *msgid, va_list *ap)
2300 ATTRIBUTE_FPTR_PRINTF(5,0)
2301 {
2302 char *msg = xvasprintf (msgid, *ap);
2303 s_singleton->m_diagnostics.safe_push (msg);
2304 return true;
2305 }
2306
2307 auto_vec<char *> m_diagnostics;
2308
2309 private:
2310 static lexer_diagnostic_sink *s_singleton;
2311 };
2312
2313 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2314
2315 /* Constructor. Override line_table with a new instance based on CASE_,
2316 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2317 start parsing the tempfile. */
2318
2319 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2320 lexer_test_options *options)
2321 : m_ltt (case_),
2322 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2323 /* Create a tempfile and write the text to it. */
2324 m_tempfile (SELFTEST_LOCATION, ".c", content),
2325 m_concats (),
2326 m_implicitly_expect_EOF (true)
2327 {
2328 if (options)
2329 options->apply (*this);
2330
2331 cpp_init_iconv (m_parser);
2332
2333 /* Parse the file. */
2334 const char *fname = cpp_read_main_file (m_parser,
2335 m_tempfile.get_filename ());
2336 ASSERT_NE (fname, NULL);
2337 }
2338
2339 /* Destructor. By default, verify that the next token in m_parser is EOF. */
2340
2341 lexer_test::~lexer_test ()
2342 {
2343 location_t loc;
2344 const cpp_token *tok;
2345
2346 if (m_implicitly_expect_EOF)
2347 {
2348 tok = cpp_get_token_with_location (m_parser, &loc);
2349 ASSERT_NE (tok, NULL);
2350 ASSERT_EQ (tok->type, CPP_EOF);
2351 }
2352 }
2353
2354 /* Get the next token from m_parser. */
2355
2356 const cpp_token *
2357 lexer_test::get_token ()
2358 {
2359 location_t loc;
2360 const cpp_token *tok;
2361
2362 tok = cpp_get_token_with_location (m_parser, &loc);
2363 ASSERT_NE (tok, NULL);
2364 return tok;
2365 }
2366
2367 /* Verify that locations within string literals are correctly handled. */
2368
2369 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2370 using the string concatenation database for TEST.
2371
2372 Assert that the character at index IDX is on EXPECTED_LINE,
2373 and that it begins at column EXPECTED_START_COL and ends at
2374 EXPECTED_FINISH_COL (unless the locations are beyond
2375 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2376 columns). */
2377
2378 static void
2379 assert_char_at_range (const location &loc,
2380 lexer_test& test,
2381 location_t strloc, enum cpp_ttype type, int idx,
2382 int expected_line, int expected_start_col,
2383 int expected_finish_col)
2384 {
2385 cpp_reader *pfile = test.m_parser;
2386 string_concat_db *concats = &test.m_concats;
2387
2388 source_range actual_range = source_range();
2389 const char *err
2390 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2391 &actual_range);
2392 if (should_have_column_data_p (strloc))
2393 ASSERT_EQ_AT (loc, NULL, err);
2394 else
2395 {
2396 ASSERT_STREQ_AT (loc,
2397 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2398 err);
2399 return;
2400 }
2401
2402 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2403 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2404 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2405 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2406
2407 if (should_have_column_data_p (actual_range.m_start))
2408 {
2409 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2410 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2411 }
2412 if (should_have_column_data_p (actual_range.m_finish))
2413 {
2414 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2415 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2416 }
2417 }
2418
2419 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2420 the effective location of any errors. */
2421
2422 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2423 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2424 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2425 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2426 (EXPECTED_FINISH_COL))
2427
2428 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2429 using the string concatenation database for TEST.
2430
2431 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2432
2433 static void
2434 assert_num_substring_ranges (const location &loc,
2435 lexer_test& test,
2436 location_t strloc,
2437 enum cpp_ttype type,
2438 int expected_num_ranges)
2439 {
2440 cpp_reader *pfile = test.m_parser;
2441 string_concat_db *concats = &test.m_concats;
2442
2443 int actual_num_ranges = -1;
2444 const char *err
2445 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2446 &actual_num_ranges);
2447 if (should_have_column_data_p (strloc))
2448 ASSERT_EQ_AT (loc, NULL, err);
2449 else
2450 {
2451 ASSERT_STREQ_AT (loc,
2452 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2453 err);
2454 return;
2455 }
2456 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2457 }
2458
2459 /* Macro for calling assert_num_substring_ranges, supplying
2460 SELFTEST_LOCATION for the effective location of any errors. */
2461
2462 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2463 EXPECTED_NUM_RANGES) \
2464 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2465 (TYPE), (EXPECTED_NUM_RANGES))
2466
2467
2468 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2469 returns an error (using the string concatenation database for TEST). */
2470
2471 static void
2472 assert_has_no_substring_ranges (const location &loc,
2473 lexer_test& test,
2474 location_t strloc,
2475 enum cpp_ttype type,
2476 const char *expected_err)
2477 {
2478 cpp_reader *pfile = test.m_parser;
2479 string_concat_db *concats = &test.m_concats;
2480 cpp_substring_ranges ranges;
2481 const char *actual_err
2482 = get_substring_ranges_for_loc (pfile, concats, strloc,
2483 type, ranges);
2484 if (should_have_column_data_p (strloc))
2485 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2486 else
2487 ASSERT_STREQ_AT (loc,
2488 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2489 actual_err);
2490 }
2491
2492 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2493 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2494 (STRLOC), (TYPE), (ERR))
2495
2496 /* Lex a simple string literal. Verify the substring location data, before
2497 and after running cpp_interpret_string on it. */
2498
2499 static void
2500 test_lexer_string_locations_simple (const line_table_case &case_)
2501 {
2502 /* Digits 0-9 (with 0 at column 10), the simple way.
2503 ....................000000000.11111111112.2222222223333333333
2504 ....................123456789.01234567890.1234567890123456789
2505 We add a trailing comment to ensure that we correctly locate
2506 the end of the string literal token. */
2507 const char *content = " \"0123456789\" /* not a string */\n";
2508 lexer_test test (case_, content, NULL);
2509
2510 /* Verify that we get the expected token back, with the correct
2511 location information. */
2512 const cpp_token *tok = test.get_token ();
2513 ASSERT_EQ (tok->type, CPP_STRING);
2514 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2515 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2516
2517 /* At this point in lexing, the quote characters are treated as part of
2518 the string (they are stripped off by cpp_interpret_string). */
2519
2520 ASSERT_EQ (tok->val.str.len, 12);
2521
2522 /* Verify that cpp_interpret_string works. */
2523 cpp_string dst_string;
2524 const enum cpp_ttype type = CPP_STRING;
2525 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2526 &dst_string, type);
2527 ASSERT_TRUE (result);
2528 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2529 free (const_cast <unsigned char *> (dst_string.text));
2530
2531 /* Verify ranges of individual characters. This no longer includes the
2532 opening quote, but does include the closing quote. */
2533 for (int i = 0; i <= 10; i++)
2534 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2535 10 + i, 10 + i);
2536
2537 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2538 }
2539
2540 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2541 encoding. */
2542
2543 static void
2544 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2545 {
2546 /* EBCDIC support requires iconv. */
2547 if (!HAVE_ICONV)
2548 return;
2549
2550 /* Digits 0-9 (with 0 at column 10), the simple way.
2551 ....................000000000.11111111112.2222222223333333333
2552 ....................123456789.01234567890.1234567890123456789
2553 We add a trailing comment to ensure that we correctly locate
2554 the end of the string literal token. */
2555 const char *content = " \"0123456789\" /* not a string */\n";
2556 ebcdic_execution_charset use_ebcdic;
2557 lexer_test test (case_, content, &use_ebcdic);
2558
2559 /* Verify that we get the expected token back, with the correct
2560 location information. */
2561 const cpp_token *tok = test.get_token ();
2562 ASSERT_EQ (tok->type, CPP_STRING);
2563 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2564 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2565
2566 /* At this point in lexing, the quote characters are treated as part of
2567 the string (they are stripped off by cpp_interpret_string). */
2568
2569 ASSERT_EQ (tok->val.str.len, 12);
2570
2571 /* The remainder of the test requires an iconv implementation that
2572 can convert from UTF-8 to the EBCDIC encoding requested above. */
2573 if (use_ebcdic.iconv_errors_occurred_p ())
2574 return;
2575
2576 /* Verify that cpp_interpret_string works. */
2577 cpp_string dst_string;
2578 const enum cpp_ttype type = CPP_STRING;
2579 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2580 &dst_string, type);
2581 ASSERT_TRUE (result);
2582 /* We should now have EBCDIC-encoded text, specifically
2583 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2584 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2585 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2586 (const char *)dst_string.text);
2587 free (const_cast <unsigned char *> (dst_string.text));
2588
2589 /* Verify that we don't attempt to record substring location information
2590 for such cases. */
2591 ASSERT_HAS_NO_SUBSTRING_RANGES
2592 (test, tok->src_loc, type,
2593 "execution character set != source character set");
2594 }
2595
2596 /* Lex a string literal containing a hex-escaped character.
2597 Verify the substring location data, before and after running
2598 cpp_interpret_string on it. */
2599
2600 static void
2601 test_lexer_string_locations_hex (const line_table_case &case_)
2602 {
2603 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2604 and with a space in place of digit 6, to terminate the escaped
2605 hex code.
2606 ....................000000000.111111.11112222.
2607 ....................123456789.012345.67890123. */
2608 const char *content = " \"01234\\x35 789\"\n";
2609 lexer_test test (case_, content, NULL);
2610
2611 /* Verify that we get the expected token back, with the correct
2612 location information. */
2613 const cpp_token *tok = test.get_token ();
2614 ASSERT_EQ (tok->type, CPP_STRING);
2615 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2616 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2617
2618 /* At this point in lexing, the quote characters are treated as part of
2619 the string (they are stripped off by cpp_interpret_string). */
2620 ASSERT_EQ (tok->val.str.len, 15);
2621
2622 /* Verify that cpp_interpret_string works. */
2623 cpp_string dst_string;
2624 const enum cpp_ttype type = CPP_STRING;
2625 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2626 &dst_string, type);
2627 ASSERT_TRUE (result);
2628 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2629 free (const_cast <unsigned char *> (dst_string.text));
2630
2631 /* Verify ranges of individual characters. This no longer includes the
2632 opening quote, but does include the closing quote. */
2633 for (int i = 0; i <= 4; i++)
2634 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2635 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2636 for (int i = 6; i <= 10; i++)
2637 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2638
2639 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2640 }
2641
2642 /* Lex a string literal containing an octal-escaped character.
2643 Verify the substring location data after running cpp_interpret_string
2644 on it. */
2645
2646 static void
2647 test_lexer_string_locations_oct (const line_table_case &case_)
2648 {
2649 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2650 and with a space in place of digit 6, to terminate the escaped
2651 octal code.
2652 ....................000000000.111111.11112222.2222223333333333444
2653 ....................123456789.012345.67890123.4567890123456789012 */
2654 const char *content = " \"01234\\065 789\" /* not a string */\n";
2655 lexer_test test (case_, content, NULL);
2656
2657 /* Verify that we get the expected token back, with the correct
2658 location information. */
2659 const cpp_token *tok = test.get_token ();
2660 ASSERT_EQ (tok->type, CPP_STRING);
2661 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2662
2663 /* Verify that cpp_interpret_string works. */
2664 cpp_string dst_string;
2665 const enum cpp_ttype type = CPP_STRING;
2666 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2667 &dst_string, type);
2668 ASSERT_TRUE (result);
2669 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2670 free (const_cast <unsigned char *> (dst_string.text));
2671
2672 /* Verify ranges of individual characters. This no longer includes the
2673 opening quote, but does include the closing quote. */
2674 for (int i = 0; i < 5; i++)
2675 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2676 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2677 for (int i = 6; i <= 10; i++)
2678 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2679
2680 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2681 }
2682
2683 /* Test of string literal containing letter escapes. */
2684
2685 static void
2686 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2687 {
2688 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2689 .....................000000000.1.11111.1.1.11222.22222223333333
2690 .....................123456789.0.12345.6.7.89012.34567890123456. */
2691 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2692 lexer_test test (case_, content, NULL);
2693
2694 /* Verify that we get the expected tokens back. */
2695 const cpp_token *tok = test.get_token ();
2696 ASSERT_EQ (tok->type, CPP_STRING);
2697 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2698
2699 /* Verify ranges of individual characters. */
2700 /* "\t". */
2701 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2702 0, 1, 10, 11);
2703 /* "foo". */
2704 for (int i = 1; i <= 3; i++)
2705 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2706 i, 1, 11 + i, 11 + i);
2707 /* "\\" and "\n". */
2708 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2709 4, 1, 15, 16);
2710 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2711 5, 1, 17, 18);
2712
2713 /* "bar" and closing quote for nul-terminator. */
2714 for (int i = 6; i <= 9; i++)
2715 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2716 i, 1, 13 + i, 13 + i);
2717
2718 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2719 }
2720
2721 /* Another test of a string literal containing a letter escape.
2722 Based on string seen in
2723 printf ("%-%\n");
2724 in gcc.dg/format/c90-printf-1.c. */
2725
2726 static void
2727 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2728 {
2729 /* .....................000000000.1111.11.1111.22222222223.
2730 .....................123456789.0123.45.6789.01234567890. */
2731 const char *content = (" \"%-%\\n\" /* non-str */\n");
2732 lexer_test test (case_, content, NULL);
2733
2734 /* Verify that we get the expected tokens back. */
2735 const cpp_token *tok = test.get_token ();
2736 ASSERT_EQ (tok->type, CPP_STRING);
2737 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2738
2739 /* Verify ranges of individual characters. */
2740 /* "%-%". */
2741 for (int i = 0; i < 3; i++)
2742 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2743 i, 1, 10 + i, 10 + i);
2744 /* "\n". */
2745 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2746 3, 1, 13, 14);
2747
2748 /* Closing quote for nul-terminator. */
2749 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2750 4, 1, 15, 15);
2751
2752 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2753 }
2754
2755 /* Lex a string literal containing UCN 4 characters.
2756 Verify the substring location data after running cpp_interpret_string
2757 on it. */
2758
2759 static void
2760 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2761 {
2762 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2763 as UCN 4.
2764 ....................000000000.111111.111122.222222223.33333333344444
2765 ....................123456789.012345.678901.234567890.12345678901234 */
2766 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2767 lexer_test test (case_, content, NULL);
2768
2769 /* Verify that we get the expected token back, with the correct
2770 location information. */
2771 const cpp_token *tok = test.get_token ();
2772 ASSERT_EQ (tok->type, CPP_STRING);
2773 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2774
2775 /* Verify that cpp_interpret_string works.
2776 The string should be encoded in the execution character
2777 set. Assuming that is UTF-8, we should have the following:
2778 ----------- ---- ----- ------- ----------------
2779 Byte offset Byte Octal Unicode Source Column(s)
2780 ----------- ---- ----- ------- ----------------
2781 0 0x30 '0' 10
2782 1 0x31 '1' 11
2783 2 0x32 '2' 12
2784 3 0x33 '3' 13
2785 4 0x34 '4' 14
2786 5 0xE2 \342 U+2174 15-20
2787 6 0x85 \205 (cont) 15-20
2788 7 0xB4 \264 (cont) 15-20
2789 8 0xE2 \342 U+2175 21-26
2790 9 0x85 \205 (cont) 21-26
2791 10 0xB5 \265 (cont) 21-26
2792 11 0x37 '7' 27
2793 12 0x38 '8' 28
2794 13 0x39 '9' 29
2795 14 0x00 30 (closing quote)
2796 ----------- ---- ----- ------- ---------------. */
2797
2798 cpp_string dst_string;
2799 const enum cpp_ttype type = CPP_STRING;
2800 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2801 &dst_string, type);
2802 ASSERT_TRUE (result);
2803 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2804 (const char *)dst_string.text);
2805 free (const_cast <unsigned char *> (dst_string.text));
2806
2807 /* Verify ranges of individual characters. This no longer includes the
2808 opening quote, but does include the closing quote.
2809 '01234'. */
2810 for (int i = 0; i <= 4; i++)
2811 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2812 /* U+2174. */
2813 for (int i = 5; i <= 7; i++)
2814 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2815 /* U+2175. */
2816 for (int i = 8; i <= 10; i++)
2817 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2818 /* '789' and nul terminator */
2819 for (int i = 11; i <= 14; i++)
2820 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2821
2822 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2823 }
2824
2825 /* Lex a string literal containing UCN 8 characters.
2826 Verify the substring location data after running cpp_interpret_string
2827 on it. */
2828
2829 static void
2830 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2831 {
2832 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2833 ....................000000000.111111.1111222222.2222333333333.344444
2834 ....................123456789.012345.6789012345.6789012345678.901234 */
2835 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2836 lexer_test test (case_, content, NULL);
2837
2838 /* Verify that we get the expected token back, with the correct
2839 location information. */
2840 const cpp_token *tok = test.get_token ();
2841 ASSERT_EQ (tok->type, CPP_STRING);
2842 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2843 "\"01234\\U00002174\\U00002175789\"");
2844
2845 /* Verify that cpp_interpret_string works.
2846 The UTF-8 encoding of the string is identical to that from
2847 the ucn4 testcase above; the only difference is the column
2848 locations. */
2849 cpp_string dst_string;
2850 const enum cpp_ttype type = CPP_STRING;
2851 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2852 &dst_string, type);
2853 ASSERT_TRUE (result);
2854 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2855 (const char *)dst_string.text);
2856 free (const_cast <unsigned char *> (dst_string.text));
2857
2858 /* Verify ranges of individual characters. This no longer includes the
2859 opening quote, but does include the closing quote.
2860 '01234'. */
2861 for (int i = 0; i <= 4; i++)
2862 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2863 /* U+2174. */
2864 for (int i = 5; i <= 7; i++)
2865 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2866 /* U+2175. */
2867 for (int i = 8; i <= 10; i++)
2868 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2869 /* '789' at columns 35-37 */
2870 for (int i = 11; i <= 13; i++)
2871 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2872 /* Closing quote/nul-terminator at column 38. */
2873 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2874
2875 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2876 }
2877
2878 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2879
2880 static uint32_t
2881 uint32_from_big_endian (const uint32_t *ptr_be_value)
2882 {
2883 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2884 return (((uint32_t) buf[0] << 24)
2885 | ((uint32_t) buf[1] << 16)
2886 | ((uint32_t) buf[2] << 8)
2887 | (uint32_t) buf[3]);
2888 }
2889
2890 /* Lex a wide string literal and verify that attempts to read substring
2891 location data from it fail gracefully. */
2892
2893 static void
2894 test_lexer_string_locations_wide_string (const line_table_case &case_)
2895 {
2896 /* Digits 0-9.
2897 ....................000000000.11111111112.22222222233333
2898 ....................123456789.01234567890.12345678901234 */
2899 const char *content = " L\"0123456789\" /* non-str */\n";
2900 lexer_test test (case_, content, NULL);
2901
2902 /* Verify that we get the expected token back, with the correct
2903 location information. */
2904 const cpp_token *tok = test.get_token ();
2905 ASSERT_EQ (tok->type, CPP_WSTRING);
2906 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2907
2908 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2909 cpp_string dst_string;
2910 const enum cpp_ttype type = CPP_WSTRING;
2911 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2912 &dst_string, type);
2913 ASSERT_TRUE (result);
2914 /* The cpp_reader defaults to big-endian with
2915 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2916 now be encoded as UTF-32BE. */
2917 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2918 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2919 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2920 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2921 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2922 free (const_cast <unsigned char *> (dst_string.text));
2923
2924 /* We don't yet support generating substring location information
2925 for L"" strings. */
2926 ASSERT_HAS_NO_SUBSTRING_RANGES
2927 (test, tok->src_loc, type,
2928 "execution character set != source character set");
2929 }
2930
2931 /* Fetch a big-endian 16-bit value and convert to host endianness. */
2932
2933 static uint16_t
2934 uint16_from_big_endian (const uint16_t *ptr_be_value)
2935 {
2936 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2937 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2938 }
2939
2940 /* Lex a u"" string literal and verify that attempts to read substring
2941 location data from it fail gracefully. */
2942
2943 static void
2944 test_lexer_string_locations_string16 (const line_table_case &case_)
2945 {
2946 /* Digits 0-9.
2947 ....................000000000.11111111112.22222222233333
2948 ....................123456789.01234567890.12345678901234 */
2949 const char *content = " u\"0123456789\" /* non-str */\n";
2950 lexer_test test (case_, content, NULL);
2951
2952 /* Verify that we get the expected token back, with the correct
2953 location information. */
2954 const cpp_token *tok = test.get_token ();
2955 ASSERT_EQ (tok->type, CPP_STRING16);
2956 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2957
2958 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2959 cpp_string dst_string;
2960 const enum cpp_ttype type = CPP_STRING16;
2961 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2962 &dst_string, type);
2963 ASSERT_TRUE (result);
2964
2965 /* The cpp_reader defaults to big-endian, so dst_string should
2966 now be encoded as UTF-16BE. */
2967 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2968 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2969 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2970 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2971 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2972 free (const_cast <unsigned char *> (dst_string.text));
2973
2974 /* We don't yet support generating substring location information
2975 for L"" strings. */
2976 ASSERT_HAS_NO_SUBSTRING_RANGES
2977 (test, tok->src_loc, type,
2978 "execution character set != source character set");
2979 }
2980
2981 /* Lex a U"" string literal and verify that attempts to read substring
2982 location data from it fail gracefully. */
2983
2984 static void
2985 test_lexer_string_locations_string32 (const line_table_case &case_)
2986 {
2987 /* Digits 0-9.
2988 ....................000000000.11111111112.22222222233333
2989 ....................123456789.01234567890.12345678901234 */
2990 const char *content = " U\"0123456789\" /* non-str */\n";
2991 lexer_test test (case_, content, NULL);
2992
2993 /* Verify that we get the expected token back, with the correct
2994 location information. */
2995 const cpp_token *tok = test.get_token ();
2996 ASSERT_EQ (tok->type, CPP_STRING32);
2997 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2998
2999 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
3000 cpp_string dst_string;
3001 const enum cpp_ttype type = CPP_STRING32;
3002 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3003 &dst_string, type);
3004 ASSERT_TRUE (result);
3005
3006 /* The cpp_reader defaults to big-endian, so dst_string should
3007 now be encoded as UTF-32BE. */
3008 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3009 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3010 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3011 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3012 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3013 free (const_cast <unsigned char *> (dst_string.text));
3014
3015 /* We don't yet support generating substring location information
3016 for L"" strings. */
3017 ASSERT_HAS_NO_SUBSTRING_RANGES
3018 (test, tok->src_loc, type,
3019 "execution character set != source character set");
3020 }
3021
3022 /* Lex a u8-string literal.
3023 Verify the substring location data after running cpp_interpret_string
3024 on it. */
3025
3026 static void
3027 test_lexer_string_locations_u8 (const line_table_case &case_)
3028 {
3029 /* Digits 0-9.
3030 ....................000000000.11111111112.22222222233333
3031 ....................123456789.01234567890.12345678901234 */
3032 const char *content = " u8\"0123456789\" /* non-str */\n";
3033 lexer_test test (case_, content, NULL);
3034
3035 /* Verify that we get the expected token back, with the correct
3036 location information. */
3037 const cpp_token *tok = test.get_token ();
3038 ASSERT_EQ (tok->type, CPP_UTF8STRING);
3039 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3040
3041 /* Verify that cpp_interpret_string works. */
3042 cpp_string dst_string;
3043 const enum cpp_ttype type = CPP_STRING;
3044 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3045 &dst_string, type);
3046 ASSERT_TRUE (result);
3047 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3048 free (const_cast <unsigned char *> (dst_string.text));
3049
3050 /* Verify ranges of individual characters. This no longer includes the
3051 opening quote, but does include the closing quote. */
3052 for (int i = 0; i <= 10; i++)
3053 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3054 }
3055
3056 /* Lex a string literal containing UTF-8 source characters.
3057 Verify the substring location data after running cpp_interpret_string
3058 on it. */
3059
3060 static void
3061 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3062 {
3063 /* This string literal is written out to the source file as UTF-8,
3064 and is of the form "before mojibake after", where "mojibake"
3065 is written as the following four unicode code points:
3066 U+6587 CJK UNIFIED IDEOGRAPH-6587
3067 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3068 U+5316 CJK UNIFIED IDEOGRAPH-5316
3069 U+3051 HIRAGANA LETTER KE.
3070 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3071 "before" and "after" are 1 byte per unicode character.
3072
3073 The numbering shown are "columns", which are *byte* numbers within
3074 the line, rather than unicode character numbers.
3075
3076 .................... 000000000.1111111.
3077 .................... 123456789.0123456. */
3078 const char *content = (" \"before "
3079 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3080 UTF-8: 0xE6 0x96 0x87
3081 C octal escaped UTF-8: \346\226\207
3082 "column" numbers: 17-19. */
3083 "\346\226\207"
3084
3085 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3086 UTF-8: 0xE5 0xAD 0x97
3087 C octal escaped UTF-8: \345\255\227
3088 "column" numbers: 20-22. */
3089 "\345\255\227"
3090
3091 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3092 UTF-8: 0xE5 0x8C 0x96
3093 C octal escaped UTF-8: \345\214\226
3094 "column" numbers: 23-25. */
3095 "\345\214\226"
3096
3097 /* U+3051 HIRAGANA LETTER KE
3098 UTF-8: 0xE3 0x81 0x91
3099 C octal escaped UTF-8: \343\201\221
3100 "column" numbers: 26-28. */
3101 "\343\201\221"
3102
3103 /* column numbers 29 onwards
3104 2333333.33334444444444
3105 9012345.67890123456789. */
3106 " after\" /* non-str */\n");
3107 lexer_test test (case_, content, NULL);
3108
3109 /* Verify that we get the expected token back, with the correct
3110 location information. */
3111 const cpp_token *tok = test.get_token ();
3112 ASSERT_EQ (tok->type, CPP_STRING);
3113 ASSERT_TOKEN_AS_TEXT_EQ
3114 (test.m_parser, tok,
3115 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3116
3117 /* Verify that cpp_interpret_string works. */
3118 cpp_string dst_string;
3119 const enum cpp_ttype type = CPP_STRING;
3120 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3121 &dst_string, type);
3122 ASSERT_TRUE (result);
3123 ASSERT_STREQ
3124 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3125 (const char *)dst_string.text);
3126 free (const_cast <unsigned char *> (dst_string.text));
3127
3128 /* Verify ranges of individual characters. This no longer includes the
3129 opening quote, but does include the closing quote.
3130 Assuming that both source and execution encodings are UTF-8, we have
3131 a run of 25 octets in each, plus the NUL terminator. */
3132 for (int i = 0; i < 25; i++)
3133 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3134 /* NUL-terminator should use the closing quote at column 35. */
3135 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3136
3137 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3138 }
3139
3140 /* Test of string literal concatenation. */
3141
3142 static void
3143 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3144 {
3145 /* Digits 0-9.
3146 .....................000000000.111111.11112222222222
3147 .....................123456789.012345.67890123456789. */
3148 const char *content = (" \"01234\" /* non-str */\n"
3149 " \"56789\" /* non-str */\n");
3150 lexer_test test (case_, content, NULL);
3151
3152 location_t input_locs[2];
3153
3154 /* Verify that we get the expected tokens back. */
3155 auto_vec <cpp_string> input_strings;
3156 const cpp_token *tok_a = test.get_token ();
3157 ASSERT_EQ (tok_a->type, CPP_STRING);
3158 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3159 input_strings.safe_push (tok_a->val.str);
3160 input_locs[0] = tok_a->src_loc;
3161
3162 const cpp_token *tok_b = test.get_token ();
3163 ASSERT_EQ (tok_b->type, CPP_STRING);
3164 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3165 input_strings.safe_push (tok_b->val.str);
3166 input_locs[1] = tok_b->src_loc;
3167
3168 /* Verify that cpp_interpret_string works. */
3169 cpp_string dst_string;
3170 const enum cpp_ttype type = CPP_STRING;
3171 bool result = cpp_interpret_string (test.m_parser,
3172 input_strings.address (), 2,
3173 &dst_string, type);
3174 ASSERT_TRUE (result);
3175 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3176 free (const_cast <unsigned char *> (dst_string.text));
3177
3178 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3179 test.m_concats.record_string_concatenation (2, input_locs);
3180
3181 location_t initial_loc = input_locs[0];
3182
3183 /* "01234" on line 1. */
3184 for (int i = 0; i <= 4; i++)
3185 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3186 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3187 for (int i = 5; i <= 10; i++)
3188 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3189
3190 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3191 }
3192
3193 /* Another test of string literal concatenation. */
3194
3195 static void
3196 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3197 {
3198 /* Digits 0-9.
3199 .....................000000000.111.11111112222222
3200 .....................123456789.012.34567890123456. */
3201 const char *content = (" \"01\" /* non-str */\n"
3202 " \"23\" /* non-str */\n"
3203 " \"45\" /* non-str */\n"
3204 " \"67\" /* non-str */\n"
3205 " \"89\" /* non-str */\n");
3206 lexer_test test (case_, content, NULL);
3207
3208 auto_vec <cpp_string> input_strings;
3209 location_t input_locs[5];
3210
3211 /* Verify that we get the expected tokens back. */
3212 for (int i = 0; i < 5; i++)
3213 {
3214 const cpp_token *tok = test.get_token ();
3215 ASSERT_EQ (tok->type, CPP_STRING);
3216 input_strings.safe_push (tok->val.str);
3217 input_locs[i] = tok->src_loc;
3218 }
3219
3220 /* Verify that cpp_interpret_string works. */
3221 cpp_string dst_string;
3222 const enum cpp_ttype type = CPP_STRING;
3223 bool result = cpp_interpret_string (test.m_parser,
3224 input_strings.address (), 5,
3225 &dst_string, type);
3226 ASSERT_TRUE (result);
3227 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3228 free (const_cast <unsigned char *> (dst_string.text));
3229
3230 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3231 test.m_concats.record_string_concatenation (5, input_locs);
3232
3233 location_t initial_loc = input_locs[0];
3234
3235 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3236 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3237 and expect get_source_range_for_substring to fail.
3238 However, for a string concatenation test, we can have a case
3239 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3240 but subsequent strings can be after it.
3241 Attempting to detect this within assert_char_at_range
3242 would overcomplicate the logic for the common test cases, so
3243 we detect it here. */
3244 if (should_have_column_data_p (input_locs[0])
3245 && !should_have_column_data_p (input_locs[4]))
3246 {
3247 /* Verify that get_source_range_for_substring gracefully rejects
3248 this case. */
3249 source_range actual_range;
3250 const char *err
3251 = get_source_range_for_char (test.m_parser, &test.m_concats,
3252 initial_loc, type, 0, &actual_range);
3253 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3254 return;
3255 }
3256
3257 for (int i = 0; i < 5; i++)
3258 for (int j = 0; j < 2; j++)
3259 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3260 i + 1, 10 + j, 10 + j);
3261
3262 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3263 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3264
3265 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3266 }
3267
3268 /* Another test of string literal concatenation, this time combined with
3269 various kinds of escaped characters. */
3270
3271 static void
3272 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3273 {
3274 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3275 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3276 const char *content
3277 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3278 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3279 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3280 lexer_test test (case_, content, NULL);
3281
3282 auto_vec <cpp_string> input_strings;
3283 location_t input_locs[4];
3284
3285 /* Verify that we get the expected tokens back. */
3286 for (int i = 0; i < 4; i++)
3287 {
3288 const cpp_token *tok = test.get_token ();
3289 ASSERT_EQ (tok->type, CPP_STRING);
3290 input_strings.safe_push (tok->val.str);
3291 input_locs[i] = tok->src_loc;
3292 }
3293
3294 /* Verify that cpp_interpret_string works. */
3295 cpp_string dst_string;
3296 const enum cpp_ttype type = CPP_STRING;
3297 bool result = cpp_interpret_string (test.m_parser,
3298 input_strings.address (), 4,
3299 &dst_string, type);
3300 ASSERT_TRUE (result);
3301 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3302 free (const_cast <unsigned char *> (dst_string.text));
3303
3304 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3305 test.m_concats.record_string_concatenation (4, input_locs);
3306
3307 location_t initial_loc = input_locs[0];
3308
3309 for (int i = 0; i <= 4; i++)
3310 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3311 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3312 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3313 for (int i = 7; i <= 9; i++)
3314 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3315
3316 /* NUL-terminator should use the location of the final closing quote. */
3317 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3318
3319 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3320 }
3321
3322 /* Test of string literal in a macro. */
3323
3324 static void
3325 test_lexer_string_locations_macro (const line_table_case &case_)
3326 {
3327 /* Digits 0-9.
3328 .....................0000000001111111111.22222222223.
3329 .....................1234567890123456789.01234567890. */
3330 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3331 " MACRO");
3332 lexer_test test (case_, content, NULL);
3333
3334 /* Verify that we get the expected tokens back. */
3335 const cpp_token *tok = test.get_token ();
3336 ASSERT_EQ (tok->type, CPP_PADDING);
3337
3338 tok = test.get_token ();
3339 ASSERT_EQ (tok->type, CPP_STRING);
3340 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3341
3342 /* Verify ranges of individual characters. We ought to
3343 see columns within the macro definition. */
3344 for (int i = 0; i <= 10; i++)
3345 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3346 i, 1, 20 + i, 20 + i);
3347
3348 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3349
3350 tok = test.get_token ();
3351 ASSERT_EQ (tok->type, CPP_PADDING);
3352 }
3353
3354 /* Test of stringification of a macro argument. */
3355
3356 static void
3357 test_lexer_string_locations_stringified_macro_argument
3358 (const line_table_case &case_)
3359 {
3360 /* .....................000000000111111111122222222223.
3361 .....................123456789012345678901234567890. */
3362 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3363 "MACRO(foo)\n");
3364 lexer_test test (case_, content, NULL);
3365
3366 /* Verify that we get the expected token back. */
3367 const cpp_token *tok = test.get_token ();
3368 ASSERT_EQ (tok->type, CPP_PADDING);
3369
3370 tok = test.get_token ();
3371 ASSERT_EQ (tok->type, CPP_STRING);
3372 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3373
3374 /* We don't support getting the location of a stringified macro
3375 argument. Verify that it fails gracefully. */
3376 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3377 "cpp_interpret_string_1 failed");
3378
3379 tok = test.get_token ();
3380 ASSERT_EQ (tok->type, CPP_PADDING);
3381
3382 tok = test.get_token ();
3383 ASSERT_EQ (tok->type, CPP_PADDING);
3384 }
3385
3386 /* Ensure that we are fail gracefully if something attempts to pass
3387 in a location that isn't a string literal token. Seen on this code:
3388
3389 const char a[] = " %d ";
3390 __builtin_printf (a, 0.5);
3391 ^
3392
3393 when c-format.c erroneously used the indicated one-character
3394 location as the format string location, leading to a read past the
3395 end of a string buffer in cpp_interpret_string_1. */
3396
3397 static void
3398 test_lexer_string_locations_non_string (const line_table_case &case_)
3399 {
3400 /* .....................000000000111111111122222222223.
3401 .....................123456789012345678901234567890. */
3402 const char *content = (" a\n");
3403 lexer_test test (case_, content, NULL);
3404
3405 /* Verify that we get the expected token back. */
3406 const cpp_token *tok = test.get_token ();
3407 ASSERT_EQ (tok->type, CPP_NAME);
3408 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3409
3410 /* At this point, libcpp is attempting to interpret the name as a
3411 string literal, despite it not starting with a quote. We don't detect
3412 that, but we should at least fail gracefully. */
3413 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3414 "cpp_interpret_string_1 failed");
3415 }
3416
3417 /* Ensure that we can read substring information for a token which
3418 starts in one linemap and ends in another . Adapted from
3419 gcc.dg/cpp/pr69985.c. */
3420
3421 static void
3422 test_lexer_string_locations_long_line (const line_table_case &case_)
3423 {
3424 /* .....................000000.000111111111
3425 .....................123456.789012346789. */
3426 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3427 " \"0123456789012345678901234567890123456789"
3428 "0123456789012345678901234567890123456789"
3429 "0123456789012345678901234567890123456789"
3430 "0123456789\"\n");
3431
3432 lexer_test test (case_, content, NULL);
3433
3434 /* Verify that we get the expected token back. */
3435 const cpp_token *tok = test.get_token ();
3436 ASSERT_EQ (tok->type, CPP_STRING);
3437
3438 if (!should_have_column_data_p (line_table->highest_location))
3439 return;
3440
3441 /* Verify ranges of individual characters. */
3442 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3443 for (int i = 0; i < 131; i++)
3444 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3445 i, 2, 7 + i, 7 + i);
3446 }
3447
3448 /* Test of locations within a raw string that doesn't contain a newline. */
3449
3450 static void
3451 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3452 {
3453 /* .....................00.0000000111111111122.
3454 .....................12.3456789012345678901. */
3455 const char *content = ("R\"foo(0123456789)foo\"\n");
3456 lexer_test test (case_, content, NULL);
3457
3458 /* Verify that we get the expected token back. */
3459 const cpp_token *tok = test.get_token ();
3460 ASSERT_EQ (tok->type, CPP_STRING);
3461
3462 /* Verify that cpp_interpret_string works. */
3463 cpp_string dst_string;
3464 const enum cpp_ttype type = CPP_STRING;
3465 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3466 &dst_string, type);
3467 ASSERT_TRUE (result);
3468 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3469 free (const_cast <unsigned char *> (dst_string.text));
3470
3471 if (!should_have_column_data_p (line_table->highest_location))
3472 return;
3473
3474 /* 0-9, plus the nil terminator. */
3475 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3476 for (int i = 0; i < 11; i++)
3477 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3478 i, 1, 7 + i, 7 + i);
3479 }
3480
3481 /* Test of locations within a raw string that contains a newline. */
3482
3483 static void
3484 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3485 {
3486 /* .....................00.0000.
3487 .....................12.3456. */
3488 const char *content = ("R\"foo(\n"
3489 /* .....................00000.
3490 .....................12345. */
3491 "hello\n"
3492 "world\n"
3493 /* .....................00000.
3494 .....................12345. */
3495 ")foo\"\n");
3496 lexer_test test (case_, content, NULL);
3497
3498 /* Verify that we get the expected token back. */
3499 const cpp_token *tok = test.get_token ();
3500 ASSERT_EQ (tok->type, CPP_STRING);
3501
3502 /* Verify that cpp_interpret_string works. */
3503 cpp_string dst_string;
3504 const enum cpp_ttype type = CPP_STRING;
3505 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3506 &dst_string, type);
3507 ASSERT_TRUE (result);
3508 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3509 free (const_cast <unsigned char *> (dst_string.text));
3510
3511 if (!should_have_column_data_p (line_table->highest_location))
3512 return;
3513
3514 /* Currently we don't support locations within raw strings that
3515 contain newlines. */
3516 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3517 "range endpoints are on different lines");
3518 }
3519
3520 /* Test of parsing an unterminated raw string. */
3521
3522 static void
3523 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3524 {
3525 const char *content = "R\"ouch()ouCh\" /* etc */";
3526
3527 lexer_diagnostic_sink diagnostics;
3528 lexer_test test (case_, content, &diagnostics);
3529 test.m_implicitly_expect_EOF = false;
3530
3531 /* Attempt to parse the raw string. */
3532 const cpp_token *tok = test.get_token ();
3533 ASSERT_EQ (tok->type, CPP_EOF);
3534
3535 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3536 /* We expect the message "unterminated raw string"
3537 in the "cpplib" translation domain.
3538 It's not clear that dgettext is available on all supported hosts,
3539 so this assertion is commented-out for now.
3540 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3541 diagnostics.m_diagnostics[0]);
3542 */
3543 }
3544
3545 /* Test of lexing char constants. */
3546
3547 static void
3548 test_lexer_char_constants (const line_table_case &case_)
3549 {
3550 /* Various char constants.
3551 .....................0000000001111111111.22222222223.
3552 .....................1234567890123456789.01234567890. */
3553 const char *content = (" 'a'\n"
3554 " u'a'\n"
3555 " U'a'\n"
3556 " L'a'\n"
3557 " 'abc'\n");
3558 lexer_test test (case_, content, NULL);
3559
3560 /* Verify that we get the expected tokens back. */
3561 /* 'a'. */
3562 const cpp_token *tok = test.get_token ();
3563 ASSERT_EQ (tok->type, CPP_CHAR);
3564 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3565
3566 unsigned int chars_seen;
3567 int unsignedp;
3568 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3569 &chars_seen, &unsignedp);
3570 ASSERT_EQ (cc, 'a');
3571 ASSERT_EQ (chars_seen, 1);
3572
3573 /* u'a'. */
3574 tok = test.get_token ();
3575 ASSERT_EQ (tok->type, CPP_CHAR16);
3576 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3577
3578 /* U'a'. */
3579 tok = test.get_token ();
3580 ASSERT_EQ (tok->type, CPP_CHAR32);
3581 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3582
3583 /* L'a'. */
3584 tok = test.get_token ();
3585 ASSERT_EQ (tok->type, CPP_WCHAR);
3586 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3587
3588 /* 'abc' (c-char-sequence). */
3589 tok = test.get_token ();
3590 ASSERT_EQ (tok->type, CPP_CHAR);
3591 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3592 }
3593 /* A table of interesting location_t values, giving one axis of our test
3594 matrix. */
3595
3596 static const location_t boundary_locations[] = {
3597 /* Zero means "don't override the default values for a new line_table". */
3598 0,
3599
3600 /* An arbitrary non-zero value that isn't close to one of
3601 the boundary values below. */
3602 0x10000,
3603
3604 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3605 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3606 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3607 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3608 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3609 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3610
3611 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3612 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3613 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3614 LINE_MAP_MAX_LOCATION_WITH_COLS,
3615 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3616 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3617 };
3618
3619 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3620
3621 void
3622 for_each_line_table_case (void (*testcase) (const line_table_case &))
3623 {
3624 /* As noted above in the description of struct line_table_case,
3625 we want to explore a test matrix of interesting line_table
3626 situations, running various selftests for each case within the
3627 matrix. */
3628
3629 /* Run all tests with:
3630 (a) line_table->default_range_bits == 0, and
3631 (b) line_table->default_range_bits == 5. */
3632 int num_cases_tested = 0;
3633 for (int default_range_bits = 0; default_range_bits <= 5;
3634 default_range_bits += 5)
3635 {
3636 /* ...and use each of the "interesting" location values as
3637 the starting location within line_table. */
3638 const int num_boundary_locations
3639 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3640 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3641 {
3642 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3643
3644 testcase (c);
3645
3646 num_cases_tested++;
3647 }
3648 }
3649
3650 /* Verify that we fully covered the test matrix. */
3651 ASSERT_EQ (num_cases_tested, 2 * 12);
3652 }
3653
3654 /* Verify that when presented with a consecutive pair of locations with
3655 a very large line offset, we don't attempt to consolidate them into
3656 a single ordinary linemap where the line offsets within the line map
3657 would lead to overflow (PR lto/88147). */
3658
3659 static void
3660 test_line_offset_overflow ()
3661 {
3662 line_table_test ltt (line_table_case (5, 0));
3663
3664 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3665 linemap_line_start (line_table, 1, 100);
3666 location_t loc_a = linemap_line_start (line_table, 2578, 255);
3667 assert_loceq ("foo.c", 2578, 0, loc_a);
3668
3669 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3670 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3671 ASSERT_EQ (ordmap_a->m_range_bits, 5);
3672
3673 location_t loc_b = linemap_line_start (line_table, 404198, 512);
3674 assert_loceq ("foo.c", 404198, 0, loc_b);
3675
3676 /* We should have started a new linemap, rather than attempting to store
3677 a very large line offset. */
3678 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3679 ASSERT_NE (ordmap_a, ordmap_b);
3680 }
3681
3682 void test_cpp_utf8 ()
3683 {
3684 const int def_tabstop = 8;
3685 /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
3686 {
3687 int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, def_tabstop);
3688 ASSERT_EQ (8, w_bad);
3689 int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, def_tabstop);
3690 ASSERT_EQ (5, w_ctrl);
3691 }
3692
3693 /* Verify that wcwidth of valid UTF-8 is as expected. */
3694 {
3695 const int w_pi = cpp_display_width ("\xcf\x80", 2, def_tabstop);
3696 ASSERT_EQ (1, w_pi);
3697 const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, def_tabstop);
3698 ASSERT_EQ (2, w_emoji);
3699 const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3700 def_tabstop);
3701 ASSERT_EQ (1, w_umlaut_precomposed);
3702 const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3703 def_tabstop);
3704 ASSERT_EQ (1, w_umlaut_combining);
3705 const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, def_tabstop);
3706 ASSERT_EQ (2, w_han);
3707 const int w_ascii = cpp_display_width ("GCC", 3, def_tabstop);
3708 ASSERT_EQ (3, w_ascii);
3709 const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3710 "\x9f! \xe4\xb8\xba y\xcc\x88",
3711 24, def_tabstop);
3712 ASSERT_EQ (18, w_mixed);
3713 }
3714
3715 /* Verify that display width properly expands tabs. */
3716 {
3717 const char *tstr = "\tabc\td";
3718 ASSERT_EQ (6, cpp_display_width (tstr, 6, 1));
3719 ASSERT_EQ (10, cpp_display_width (tstr, 6, 3));
3720 ASSERT_EQ (17, cpp_display_width (tstr, 6, 8));
3721 ASSERT_EQ (1, cpp_display_column_to_byte_column (tstr, 6, 7, 8));
3722 }
3723
3724 /* Verify that cpp_byte_column_to_display_column can go past the end,
3725 and similar edge cases. */
3726 {
3727 const char *str
3728 /* Display columns.
3729 111111112345 */
3730 = "\xcf\x80 abc";
3731 /* 111122223456
3732 Byte columns. */
3733
3734 ASSERT_EQ (5, cpp_display_width (str, 6, def_tabstop));
3735 ASSERT_EQ (105,
3736 cpp_byte_column_to_display_column (str, 6, 106, def_tabstop));
3737 ASSERT_EQ (10000,
3738 cpp_byte_column_to_display_column (NULL, 0, 10000, def_tabstop));
3739 ASSERT_EQ (0,
3740 cpp_byte_column_to_display_column (NULL, 10000, 0, def_tabstop));
3741 }
3742
3743 /* Verify that cpp_display_column_to_byte_column can go past the end,
3744 and similar edge cases, and check invertibility. */
3745 {
3746 const char *str
3747 /* Display columns.
3748 000000000000000000000000000000000000011
3749 111111112222222234444444455555555678901 */
3750 = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
3751 /* 000000000000000000000000000000000111111
3752 111122223333444456666777788889999012345
3753 Byte columns. */
3754 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, def_tabstop));
3755 ASSERT_EQ (15,
3756 cpp_display_column_to_byte_column (str, 15, 11, def_tabstop));
3757 ASSERT_EQ (115,
3758 cpp_display_column_to_byte_column (str, 15, 111, def_tabstop));
3759 ASSERT_EQ (10000,
3760 cpp_display_column_to_byte_column (NULL, 0, 10000, def_tabstop));
3761 ASSERT_EQ (0,
3762 cpp_display_column_to_byte_column (NULL, 10000, 0, def_tabstop));
3763
3764 /* Verify that we do not interrupt a UTF-8 sequence. */
3765 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, def_tabstop));
3766
3767 for (int byte_col = 1; byte_col <= 15; ++byte_col)
3768 {
3769 const int disp_col
3770 = cpp_byte_column_to_display_column (str, 15, byte_col, def_tabstop);
3771 const int byte_col2
3772 = cpp_display_column_to_byte_column (str, 15, disp_col, def_tabstop);
3773
3774 /* If we ask for the display column in the middle of a UTF-8
3775 sequence, it will return the length of the partial sequence,
3776 matching the behavior of GCC before display column support.
3777 Otherwise check the round trip was successful. */
3778 if (byte_col < 4)
3779 ASSERT_EQ (byte_col, disp_col);
3780 else if (byte_col >= 6 && byte_col < 9)
3781 ASSERT_EQ (3 + (byte_col - 5), disp_col);
3782 else
3783 ASSERT_EQ (byte_col2, byte_col);
3784 }
3785 }
3786
3787 }
3788
3789 /* Run all of the selftests within this file. */
3790
3791 void
3792 input_c_tests ()
3793 {
3794 test_linenum_comparisons ();
3795 test_should_have_column_data_p ();
3796 test_unknown_location ();
3797 test_builtins ();
3798 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3799
3800 for_each_line_table_case (test_accessing_ordinary_linemaps);
3801 for_each_line_table_case (test_lexer);
3802 for_each_line_table_case (test_lexer_string_locations_simple);
3803 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3804 for_each_line_table_case (test_lexer_string_locations_hex);
3805 for_each_line_table_case (test_lexer_string_locations_oct);
3806 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3807 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3808 for_each_line_table_case (test_lexer_string_locations_ucn4);
3809 for_each_line_table_case (test_lexer_string_locations_ucn8);
3810 for_each_line_table_case (test_lexer_string_locations_wide_string);
3811 for_each_line_table_case (test_lexer_string_locations_string16);
3812 for_each_line_table_case (test_lexer_string_locations_string32);
3813 for_each_line_table_case (test_lexer_string_locations_u8);
3814 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3815 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3816 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3817 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3818 for_each_line_table_case (test_lexer_string_locations_macro);
3819 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3820 for_each_line_table_case (test_lexer_string_locations_non_string);
3821 for_each_line_table_case (test_lexer_string_locations_long_line);
3822 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3823 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3824 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3825 for_each_line_table_case (test_lexer_char_constants);
3826
3827 test_reading_source_line ();
3828
3829 test_line_offset_overflow ();
3830
3831 test_cpp_utf8 ();
3832 }
3833
3834 } // namespace selftest
3835
3836 #endif /* CHECKING_P */