]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/input.c
Provide location information for terminator characters (PR preprocessor/77672)
[thirdparty/gcc.git] / gcc / input.c
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic-core.h"
25 #include "selftest.h"
26 #include "cpplib.h"
27
28 #ifndef HAVE_ICONV
29 #define HAVE_ICONV 0
30 #endif
31
32 /* This is a cache used by get_next_line to store the content of a
33 file to be searched for file lines. */
34 struct fcache
35 {
36 /* These are information used to store a line boundary. */
37 struct line_info
38 {
39 /* The line number. It starts from 1. */
40 size_t line_num;
41
42 /* The position (byte count) of the beginning of the line,
43 relative to the file data pointer. This starts at zero. */
44 size_t start_pos;
45
46 /* The position (byte count) of the last byte of the line. This
47 normally points to the '\n' character, or to one byte after the
48 last byte of the file, if the file doesn't contain a '\n'
49 character. */
50 size_t end_pos;
51
52 line_info (size_t l, size_t s, size_t e)
53 : line_num (l), start_pos (s), end_pos (e)
54 {}
55
56 line_info ()
57 :line_num (0), start_pos (0), end_pos (0)
58 {}
59 };
60
61 /* The number of time this file has been accessed. This is used
62 to designate which file cache to evict from the cache
63 array. */
64 unsigned use_count;
65
66 const char *file_path;
67
68 FILE *fp;
69
70 /* This points to the content of the file that we've read so
71 far. */
72 char *data;
73
74 /* The size of the DATA array above.*/
75 size_t size;
76
77 /* The number of bytes read from the underlying file so far. This
78 must be less (or equal) than SIZE above. */
79 size_t nb_read;
80
81 /* The index of the beginning of the current line. */
82 size_t line_start_idx;
83
84 /* The number of the previous line read. This starts at 1. Zero
85 means we've read no line so far. */
86 size_t line_num;
87
88 /* This is the total number of lines of the current file. At the
89 moment, we try to get this information from the line map
90 subsystem. Note that this is just a hint. When using the C++
91 front-end, this hint is correct because the input file is then
92 completely tokenized before parsing starts; so the line map knows
93 the number of lines before compilation really starts. For e.g,
94 the C front-end, it can happen that we start emitting diagnostics
95 before the line map has seen the end of the file. */
96 size_t total_lines;
97
98 /* Could this file be missing a trailing newline on its final line?
99 Initially true (to cope with empty files), set to true/false
100 as each line is read. */
101 bool missing_trailing_newline;
102
103 /* This is a record of the beginning and end of the lines we've seen
104 while reading the file. This is useful to avoid walking the data
105 from the beginning when we are asked to read a line that is
106 before LINE_START_IDX above. Note that the maximum size of this
107 record is fcache_line_record_size, so that the memory consumption
108 doesn't explode. We thus scale total_lines down to
109 fcache_line_record_size. */
110 vec<line_info, va_heap> line_record;
111
112 fcache ();
113 ~fcache ();
114 };
115
116 /* Current position in real source file. */
117
118 location_t input_location = UNKNOWN_LOCATION;
119
120 struct line_maps *line_table;
121
122 /* A stashed copy of "line_table" for use by selftest::line_table_test.
123 This needs to be a global so that it can be a GC root, and thus
124 prevent the stashed copy from being garbage-collected if the GC runs
125 during a line_table_test. */
126
127 struct line_maps *saved_line_table;
128
129 static fcache *fcache_tab;
130 static const size_t fcache_tab_size = 16;
131 static const size_t fcache_buffer_size = 4 * 1024;
132 static const size_t fcache_line_record_size = 100;
133
134 /* Expand the source location LOC into a human readable location. If
135 LOC resolves to a builtin location, the file name of the readable
136 location is set to the string "<built-in>". If EXPANSION_POINT_P is
137 TRUE and LOC is virtual, then it is resolved to the expansion
138 point of the involved macro. Otherwise, it is resolved to the
139 spelling location of the token.
140
141 When resolving to the spelling location of the token, if the
142 resulting location is for a built-in location (that is, it has no
143 associated line/column) in the context of a macro expansion, the
144 returned location is the first one (while unwinding the macro
145 location towards its expansion point) that is in real source
146 code. */
147
148 static expanded_location
149 expand_location_1 (source_location loc,
150 bool expansion_point_p)
151 {
152 expanded_location xloc;
153 const line_map_ordinary *map;
154 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
155 tree block = NULL;
156
157 if (IS_ADHOC_LOC (loc))
158 {
159 block = LOCATION_BLOCK (loc);
160 loc = LOCATION_LOCUS (loc);
161 }
162
163 memset (&xloc, 0, sizeof (xloc));
164
165 if (loc >= RESERVED_LOCATION_COUNT)
166 {
167 if (!expansion_point_p)
168 {
169 /* We want to resolve LOC to its spelling location.
170
171 But if that spelling location is a reserved location that
172 appears in the context of a macro expansion (like for a
173 location for a built-in token), let's consider the first
174 location (toward the expansion point) that is not reserved;
175 that is, the first location that is in real source code. */
176 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
177 loc, NULL);
178 lrk = LRK_SPELLING_LOCATION;
179 }
180 loc = linemap_resolve_location (line_table, loc,
181 lrk, &map);
182 xloc = linemap_expand_location (line_table, map, loc);
183 }
184
185 xloc.data = block;
186 if (loc <= BUILTINS_LOCATION)
187 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
188
189 return xloc;
190 }
191
192 /* Initialize the set of cache used for files accessed by caret
193 diagnostic. */
194
195 static void
196 diagnostic_file_cache_init (void)
197 {
198 if (fcache_tab == NULL)
199 fcache_tab = new fcache[fcache_tab_size];
200 }
201
202 /* Free the resources used by the set of cache used for files accessed
203 by caret diagnostic. */
204
205 void
206 diagnostic_file_cache_fini (void)
207 {
208 if (fcache_tab)
209 {
210 delete [] (fcache_tab);
211 fcache_tab = NULL;
212 }
213 }
214
215 /* Return the total lines number that have been read so far by the
216 line map (in the preprocessor) so far. For languages like C++ that
217 entirely preprocess the input file before starting to parse, this
218 equals the actual number of lines of the file. */
219
220 static size_t
221 total_lines_num (const char *file_path)
222 {
223 size_t r = 0;
224 source_location l = 0;
225 if (linemap_get_file_highest_location (line_table, file_path, &l))
226 {
227 gcc_assert (l >= RESERVED_LOCATION_COUNT);
228 expanded_location xloc = expand_location (l);
229 r = xloc.line;
230 }
231 return r;
232 }
233
234 /* Lookup the cache used for the content of a given file accessed by
235 caret diagnostic. Return the found cached file, or NULL if no
236 cached file was found. */
237
238 static fcache*
239 lookup_file_in_cache_tab (const char *file_path)
240 {
241 if (file_path == NULL)
242 return NULL;
243
244 diagnostic_file_cache_init ();
245
246 /* This will contain the found cached file. */
247 fcache *r = NULL;
248 for (unsigned i = 0; i < fcache_tab_size; ++i)
249 {
250 fcache *c = &fcache_tab[i];
251 if (c->file_path && !strcmp (c->file_path, file_path))
252 {
253 ++c->use_count;
254 r = c;
255 }
256 }
257
258 if (r)
259 ++r->use_count;
260
261 return r;
262 }
263
264 /* Purge any mention of FILENAME from the cache of files used for
265 printing source code. For use in selftests when working
266 with tempfiles. */
267
268 void
269 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
270 {
271 gcc_assert (file_path);
272
273 fcache *r = lookup_file_in_cache_tab (file_path);
274 if (!r)
275 /* Not found. */
276 return;
277
278 r->file_path = NULL;
279 if (r->fp)
280 fclose (r->fp);
281 r->fp = NULL;
282 r->nb_read = 0;
283 r->line_start_idx = 0;
284 r->line_num = 0;
285 r->line_record.truncate (0);
286 r->use_count = 0;
287 r->total_lines = 0;
288 r->missing_trailing_newline = true;
289 }
290
291 /* Return the file cache that has been less used, recently, or the
292 first empty one. If HIGHEST_USE_COUNT is non-null,
293 *HIGHEST_USE_COUNT is set to the highest use count of the entries
294 in the cache table. */
295
296 static fcache*
297 evicted_cache_tab_entry (unsigned *highest_use_count)
298 {
299 diagnostic_file_cache_init ();
300
301 fcache *to_evict = &fcache_tab[0];
302 unsigned huc = to_evict->use_count;
303 for (unsigned i = 1; i < fcache_tab_size; ++i)
304 {
305 fcache *c = &fcache_tab[i];
306 bool c_is_empty = (c->file_path == NULL);
307
308 if (c->use_count < to_evict->use_count
309 || (to_evict->file_path && c_is_empty))
310 /* We evict C because it's either an entry with a lower use
311 count or one that is empty. */
312 to_evict = c;
313
314 if (huc < c->use_count)
315 huc = c->use_count;
316
317 if (c_is_empty)
318 /* We've reached the end of the cache; subsequent elements are
319 all empty. */
320 break;
321 }
322
323 if (highest_use_count)
324 *highest_use_count = huc;
325
326 return to_evict;
327 }
328
329 /* Create the cache used for the content of a given file to be
330 accessed by caret diagnostic. This cache is added to an array of
331 cache and can be retrieved by lookup_file_in_cache_tab. This
332 function returns the created cache. Note that only the last
333 fcache_tab_size files are cached. */
334
335 static fcache*
336 add_file_to_cache_tab (const char *file_path)
337 {
338
339 FILE *fp = fopen (file_path, "r");
340 if (fp == NULL)
341 return NULL;
342
343 unsigned highest_use_count = 0;
344 fcache *r = evicted_cache_tab_entry (&highest_use_count);
345 r->file_path = file_path;
346 if (r->fp)
347 fclose (r->fp);
348 r->fp = fp;
349 r->nb_read = 0;
350 r->line_start_idx = 0;
351 r->line_num = 0;
352 r->line_record.truncate (0);
353 /* Ensure that this cache entry doesn't get evicted next time
354 add_file_to_cache_tab is called. */
355 r->use_count = ++highest_use_count;
356 r->total_lines = total_lines_num (file_path);
357 r->missing_trailing_newline = true;
358
359 return r;
360 }
361
362 /* Lookup the cache used for the content of a given file accessed by
363 caret diagnostic. If no cached file was found, create a new cache
364 for this file, add it to the array of cached file and return
365 it. */
366
367 static fcache*
368 lookup_or_add_file_to_cache_tab (const char *file_path)
369 {
370 fcache *r = lookup_file_in_cache_tab (file_path);
371 if (r == NULL)
372 r = add_file_to_cache_tab (file_path);
373 return r;
374 }
375
376 /* Default constructor for a cache of file used by caret
377 diagnostic. */
378
379 fcache::fcache ()
380 : use_count (0), file_path (NULL), fp (NULL), data (0),
381 size (0), nb_read (0), line_start_idx (0), line_num (0),
382 total_lines (0), missing_trailing_newline (true)
383 {
384 line_record.create (0);
385 }
386
387 /* Destructor for a cache of file used by caret diagnostic. */
388
389 fcache::~fcache ()
390 {
391 if (fp)
392 {
393 fclose (fp);
394 fp = NULL;
395 }
396 if (data)
397 {
398 XDELETEVEC (data);
399 data = 0;
400 }
401 line_record.release ();
402 }
403
404 /* Returns TRUE iff the cache would need to be filled with data coming
405 from the file. That is, either the cache is empty or full or the
406 current line is empty. Note that if the cache is full, it would
407 need to be extended and filled again. */
408
409 static bool
410 needs_read (fcache *c)
411 {
412 return (c->nb_read == 0
413 || c->nb_read == c->size
414 || (c->line_start_idx >= c->nb_read - 1));
415 }
416
417 /* Return TRUE iff the cache is full and thus needs to be
418 extended. */
419
420 static bool
421 needs_grow (fcache *c)
422 {
423 return c->nb_read == c->size;
424 }
425
426 /* Grow the cache if it needs to be extended. */
427
428 static void
429 maybe_grow (fcache *c)
430 {
431 if (!needs_grow (c))
432 return;
433
434 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
435 c->data = XRESIZEVEC (char, c->data, size + 1);
436 c->size = size;
437 }
438
439 /* Read more data into the cache. Extends the cache if need be.
440 Returns TRUE iff new data could be read. */
441
442 static bool
443 read_data (fcache *c)
444 {
445 if (feof (c->fp) || ferror (c->fp))
446 return false;
447
448 maybe_grow (c);
449
450 char * from = c->data + c->nb_read;
451 size_t to_read = c->size - c->nb_read;
452 size_t nb_read = fread (from, 1, to_read, c->fp);
453
454 if (ferror (c->fp))
455 return false;
456
457 c->nb_read += nb_read;
458 return !!nb_read;
459 }
460
461 /* Read new data iff the cache needs to be filled with more data
462 coming from the file FP. Return TRUE iff the cache was filled with
463 mode data. */
464
465 static bool
466 maybe_read_data (fcache *c)
467 {
468 if (!needs_read (c))
469 return false;
470 return read_data (c);
471 }
472
473 /* Read a new line from file FP, using C as a cache for the data
474 coming from the file. Upon successful completion, *LINE is set to
475 the beginning of the line found. Space for that line has been
476 allocated in the cache thus *LINE has the same life time as C.
477 *LINE_LEN is set to the length of the line. Note that the line
478 does not contain any terminal delimiter. This function returns
479 true if some data was read or process from the cache, false
480 otherwise. Note that subsequent calls to get_next_line return the
481 next lines of the file and might overwrite the content of
482 *LINE. */
483
484 static bool
485 get_next_line (fcache *c, char **line, ssize_t *line_len)
486 {
487 /* Fill the cache with data to process. */
488 maybe_read_data (c);
489
490 size_t remaining_size = c->nb_read - c->line_start_idx;
491 if (remaining_size == 0)
492 /* There is no more data to process. */
493 return false;
494
495 char *line_start = c->data + c->line_start_idx;
496
497 char *next_line_start = NULL;
498 size_t len = 0;
499 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
500 if (line_end == NULL)
501 {
502 /* We haven't found the end-of-line delimiter in the cache.
503 Fill the cache with more data from the file and look for the
504 '\n'. */
505 while (maybe_read_data (c))
506 {
507 line_start = c->data + c->line_start_idx;
508 remaining_size = c->nb_read - c->line_start_idx;
509 line_end = (char *) memchr (line_start, '\n', remaining_size);
510 if (line_end != NULL)
511 {
512 next_line_start = line_end + 1;
513 break;
514 }
515 }
516 if (line_end == NULL)
517 {
518 /* We've loadded all the file into the cache and still no
519 '\n'. Let's say the line ends up at one byte passed the
520 end of the file. This is to stay consistent with the case
521 of when the line ends up with a '\n' and line_end points to
522 that terminal '\n'. That consistency is useful below in
523 the len calculation. */
524 line_end = c->data + c->nb_read ;
525 c->missing_trailing_newline = true;
526 }
527 else
528 c->missing_trailing_newline = false;
529 }
530 else
531 {
532 next_line_start = line_end + 1;
533 c->missing_trailing_newline = false;
534 }
535
536 if (ferror (c->fp))
537 return -1;
538
539 /* At this point, we've found the end of the of line. It either
540 points to the '\n' or to one byte after the last byte of the
541 file. */
542 gcc_assert (line_end != NULL);
543
544 len = line_end - line_start;
545
546 if (c->line_start_idx < c->nb_read)
547 *line = line_start;
548
549 ++c->line_num;
550
551 /* Before we update our line record, make sure the hint about the
552 total number of lines of the file is correct. If it's not, then
553 we give up recording line boundaries from now on. */
554 bool update_line_record = true;
555 if (c->line_num > c->total_lines)
556 update_line_record = false;
557
558 /* Now update our line record so that re-reading lines from the
559 before c->line_start_idx is faster. */
560 if (update_line_record
561 && c->line_record.length () < fcache_line_record_size)
562 {
563 /* If the file lines fits in the line record, we just record all
564 its lines ...*/
565 if (c->total_lines <= fcache_line_record_size
566 && c->line_num > c->line_record.length ())
567 c->line_record.safe_push (fcache::line_info (c->line_num,
568 c->line_start_idx,
569 line_end - c->data));
570 else if (c->total_lines > fcache_line_record_size)
571 {
572 /* ... otherwise, we just scale total_lines down to
573 (fcache_line_record_size lines. */
574 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
575 if (c->line_record.length () == 0
576 || n >= c->line_record.length ())
577 c->line_record.safe_push (fcache::line_info (c->line_num,
578 c->line_start_idx,
579 line_end - c->data));
580 }
581 }
582
583 /* Update c->line_start_idx so that it points to the next line to be
584 read. */
585 if (next_line_start)
586 c->line_start_idx = next_line_start - c->data;
587 else
588 /* We didn't find any terminal '\n'. Let's consider that the end
589 of line is the end of the data in the cache. The next
590 invocation of get_next_line will either read more data from the
591 underlying file or return false early because we've reached the
592 end of the file. */
593 c->line_start_idx = c->nb_read;
594
595 *line_len = len;
596
597 return true;
598 }
599
600 /* Reads the next line from FILE into *LINE. If *LINE is too small
601 (or NULL) it is allocated (or extended) to have enough space to
602 containe the line. *LINE_LENGTH must contain the size of the
603 initial*LINE buffer. It's then updated by this function to the
604 actual length of the returned line. Note that the returned line
605 can contain several zero bytes. Also note that the returned string
606 is allocated in static storage that is going to be re-used by
607 subsequent invocations of read_line. */
608
609 static bool
610 read_next_line (fcache *cache, char ** line, ssize_t *line_len)
611 {
612 char *l = NULL;
613 ssize_t len = 0;
614
615 if (!get_next_line (cache, &l, &len))
616 return false;
617
618 if (*line == NULL)
619 *line = XNEWVEC (char, len);
620 else
621 if (*line_len < len)
622 *line = XRESIZEVEC (char, *line, len);
623
624 memcpy (*line, l, len);
625 *line_len = len;
626
627 return true;
628 }
629
630 /* Consume the next bytes coming from the cache (or from its
631 underlying file if there are remaining unread bytes in the file)
632 until we reach the next end-of-line (or end-of-file). There is no
633 copying from the cache involved. Return TRUE upon successful
634 completion. */
635
636 static bool
637 goto_next_line (fcache *cache)
638 {
639 char *l;
640 ssize_t len;
641
642 return get_next_line (cache, &l, &len);
643 }
644
645 /* Read an arbitrary line number LINE_NUM from the file cached in C.
646 The line is copied into *LINE. *LINE_LEN must have been set to the
647 length of *LINE. If *LINE is too small (or NULL) it's extended (or
648 allocated) and *LINE_LEN is adjusted accordingly. *LINE ends up
649 with a terminal zero byte and can contain additional zero bytes.
650 This function returns bool if a line was read. */
651
652 static bool
653 read_line_num (fcache *c, size_t line_num,
654 char ** line, ssize_t *line_len)
655 {
656 gcc_assert (line_num > 0);
657
658 if (line_num <= c->line_num)
659 {
660 /* We've been asked to read lines that are before c->line_num.
661 So lets use our line record (if it's not empty) to try to
662 avoid re-reading the file from the beginning again. */
663
664 if (c->line_record.is_empty ())
665 {
666 c->line_start_idx = 0;
667 c->line_num = 0;
668 }
669 else
670 {
671 fcache::line_info *i = NULL;
672 if (c->total_lines <= fcache_line_record_size)
673 {
674 /* In languages where the input file is not totally
675 preprocessed up front, the c->total_lines hint
676 can be smaller than the number of lines of the
677 file. In that case, only the first
678 c->total_lines have been recorded.
679
680 Otherwise, the first c->total_lines we've read have
681 their start/end recorded here. */
682 i = (line_num <= c->total_lines)
683 ? &c->line_record[line_num - 1]
684 : &c->line_record[c->total_lines - 1];
685 gcc_assert (i->line_num <= line_num);
686 }
687 else
688 {
689 /* So the file had more lines than our line record
690 size. Thus the number of lines we've recorded has
691 been scaled down to fcache_line_reacord_size. Let's
692 pick the start/end of the recorded line that is
693 closest to line_num. */
694 size_t n = (line_num <= c->total_lines)
695 ? line_num * fcache_line_record_size / c->total_lines
696 : c ->line_record.length () - 1;
697 if (n < c->line_record.length ())
698 {
699 i = &c->line_record[n];
700 gcc_assert (i->line_num <= line_num);
701 }
702 }
703
704 if (i && i->line_num == line_num)
705 {
706 /* We have the start/end of the line. Let's just copy
707 it again and we are done. */
708 ssize_t len = i->end_pos - i->start_pos + 1;
709 if (*line_len < len)
710 *line = XRESIZEVEC (char, *line, len);
711 memmove (*line, c->data + i->start_pos, len);
712 (*line)[len - 1] = '\0';
713 *line_len = --len;
714 return true;
715 }
716
717 if (i)
718 {
719 c->line_start_idx = i->start_pos;
720 c->line_num = i->line_num - 1;
721 }
722 else
723 {
724 c->line_start_idx = 0;
725 c->line_num = 0;
726 }
727 }
728 }
729
730 /* Let's walk from line c->line_num up to line_num - 1, without
731 copying any line. */
732 while (c->line_num < line_num - 1)
733 if (!goto_next_line (c))
734 return false;
735
736 /* The line we want is the next one. Let's read and copy it back to
737 the caller. */
738 return read_next_line (c, line, line_len);
739 }
740
741 /* Return the physical source line that corresponds to FILE_PATH/LINE in a
742 buffer that is statically allocated. The newline is replaced by
743 the null character. Note that the line can contain several null
744 characters, so LINE_LEN, if non-null, points to the actual length
745 of the line. */
746
747 const char *
748 location_get_source_line (const char *file_path, int line,
749 int *line_len)
750 {
751 static char *buffer;
752 static ssize_t len;
753
754 if (line == 0)
755 return NULL;
756
757 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
758 if (c == NULL)
759 return NULL;
760
761 bool read = read_line_num (c, line, &buffer, &len);
762
763 if (read && line_len)
764 *line_len = len;
765
766 return read ? buffer : NULL;
767 }
768
769 /* Determine if FILE_PATH missing a trailing newline on its final line.
770 Only valid to call once all of the file has been loaded, by
771 requesting a line number beyond the end of the file. */
772
773 bool
774 location_missing_trailing_newline (const char *file_path)
775 {
776 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
777 if (c == NULL)
778 return false;
779
780 return c->missing_trailing_newline;
781 }
782
783 /* Test if the location originates from the spelling location of a
784 builtin-tokens. That is, return TRUE if LOC is a (possibly
785 virtual) location of a built-in token that appears in the expansion
786 list of a macro. Please note that this function also works on
787 tokens that result from built-in tokens. For instance, the
788 function would return true if passed a token "4" that is the result
789 of the expansion of the built-in __LINE__ macro. */
790 bool
791 is_location_from_builtin_token (source_location loc)
792 {
793 const line_map_ordinary *map = NULL;
794 loc = linemap_resolve_location (line_table, loc,
795 LRK_SPELLING_LOCATION, &map);
796 return loc == BUILTINS_LOCATION;
797 }
798
799 /* Expand the source location LOC into a human readable location. If
800 LOC is virtual, it resolves to the expansion point of the involved
801 macro. If LOC resolves to a builtin location, the file name of the
802 readable location is set to the string "<built-in>". */
803
804 expanded_location
805 expand_location (source_location loc)
806 {
807 return expand_location_1 (loc, /*expansion_point_p=*/true);
808 }
809
810 /* Expand the source location LOC into a human readable location. If
811 LOC is virtual, it resolves to the expansion location of the
812 relevant macro. If LOC resolves to a builtin location, the file
813 name of the readable location is set to the string
814 "<built-in>". */
815
816 expanded_location
817 expand_location_to_spelling_point (source_location loc)
818 {
819 return expand_location_1 (loc, /*expansion_point_p=*/false);
820 }
821
822 /* The rich_location class within libcpp requires a way to expand
823 source_location instances, and relies on the client code
824 providing a symbol named
825 linemap_client_expand_location_to_spelling_point
826 to do this.
827
828 This is the implementation for libcommon.a (all host binaries),
829 which simply calls into expand_location_to_spelling_point. */
830
831 expanded_location
832 linemap_client_expand_location_to_spelling_point (source_location loc)
833 {
834 return expand_location_to_spelling_point (loc);
835 }
836
837
838 /* If LOCATION is in a system header and if it is a virtual location for
839 a token coming from the expansion of a macro, unwind it to the
840 location of the expansion point of the macro. Otherwise, just return
841 LOCATION.
842
843 This is used for instance when we want to emit diagnostics about a
844 token that may be located in a macro that is itself defined in a
845 system header, for example, for the NULL macro. In such a case, if
846 LOCATION were passed directly to diagnostic functions such as
847 warning_at, the diagnostic would be suppressed (unless
848 -Wsystem-headers). */
849
850 source_location
851 expansion_point_location_if_in_system_header (source_location location)
852 {
853 if (in_system_header_at (location))
854 location = linemap_resolve_location (line_table, location,
855 LRK_MACRO_EXPANSION_POINT,
856 NULL);
857 return location;
858 }
859
860 /* If LOCATION is a virtual location for a token coming from the expansion
861 of a macro, unwind to the location of the expansion point of the macro. */
862
863 source_location
864 expansion_point_location (source_location location)
865 {
866 return linemap_resolve_location (line_table, location,
867 LRK_MACRO_EXPANSION_POINT, NULL);
868 }
869
870 /* Construct a location with caret at CARET, ranging from START to
871 finish e.g.
872
873 11111111112
874 12345678901234567890
875 522
876 523 return foo + bar;
877 ~~~~^~~~~
878 524
879
880 The location's caret is at the "+", line 523 column 15, but starts
881 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
882 of "bar" at column 19. */
883
884 location_t
885 make_location (location_t caret, location_t start, location_t finish)
886 {
887 location_t pure_loc = get_pure_location (caret);
888 source_range src_range;
889 src_range.m_start = get_start (start);
890 src_range.m_finish = get_finish (finish);
891 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
892 pure_loc,
893 src_range,
894 NULL);
895 return combined_loc;
896 }
897
898 #define ONE_K 1024
899 #define ONE_M (ONE_K * ONE_K)
900
901 /* Display a number as an integer multiple of either:
902 - 1024, if said integer is >= to 10 K (in base 2)
903 - 1024 * 1024, if said integer is >= 10 M in (base 2)
904 */
905 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
906 ? (x) \
907 : ((x) < 10 * ONE_M \
908 ? (x) / ONE_K \
909 : (x) / ONE_M)))
910
911 /* For a given integer, display either:
912 - the character 'k', if the number is higher than 10 K (in base 2)
913 but strictly lower than 10 M (in base 2)
914 - the character 'M' if the number is higher than 10 M (in base2)
915 - the charcter ' ' if the number is strictly lower than 10 K */
916 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
917
918 /* Display an integer amount as multiple of 1K or 1M (in base 2).
919 Display the correct unit (either k, M, or ' ') after the amout, as
920 well. */
921 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
922
923 /* Dump statistics to stderr about the memory usage of the line_table
924 set of line maps. This also displays some statistics about macro
925 expansion. */
926
927 void
928 dump_line_table_statistics (void)
929 {
930 struct linemap_stats s;
931 long total_used_map_size,
932 macro_maps_size,
933 total_allocated_map_size;
934
935 memset (&s, 0, sizeof (s));
936
937 linemap_get_statistics (line_table, &s);
938
939 macro_maps_size = s.macro_maps_used_size
940 + s.macro_maps_locations_size;
941
942 total_allocated_map_size = s.ordinary_maps_allocated_size
943 + s.macro_maps_allocated_size
944 + s.macro_maps_locations_size;
945
946 total_used_map_size = s.ordinary_maps_used_size
947 + s.macro_maps_used_size
948 + s.macro_maps_locations_size;
949
950 fprintf (stderr, "Number of expanded macros: %5ld\n",
951 s.num_expanded_macros);
952 if (s.num_expanded_macros != 0)
953 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
954 s.num_macro_tokens / s.num_expanded_macros);
955 fprintf (stderr,
956 "\nLine Table allocations during the "
957 "compilation process\n");
958 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
959 SCALE (s.num_ordinary_maps_used),
960 STAT_LABEL (s.num_ordinary_maps_used));
961 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
962 SCALE (s.ordinary_maps_used_size),
963 STAT_LABEL (s.ordinary_maps_used_size));
964 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
965 SCALE (s.num_ordinary_maps_allocated),
966 STAT_LABEL (s.num_ordinary_maps_allocated));
967 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
968 SCALE (s.ordinary_maps_allocated_size),
969 STAT_LABEL (s.ordinary_maps_allocated_size));
970 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
971 SCALE (s.num_macro_maps_used),
972 STAT_LABEL (s.num_macro_maps_used));
973 fprintf (stderr, "Macro maps used size: %5ld%c\n",
974 SCALE (s.macro_maps_used_size),
975 STAT_LABEL (s.macro_maps_used_size));
976 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
977 SCALE (s.macro_maps_locations_size),
978 STAT_LABEL (s.macro_maps_locations_size));
979 fprintf (stderr, "Macro maps size: %5ld%c\n",
980 SCALE (macro_maps_size),
981 STAT_LABEL (macro_maps_size));
982 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
983 SCALE (s.duplicated_macro_maps_locations_size),
984 STAT_LABEL (s.duplicated_macro_maps_locations_size));
985 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
986 SCALE (total_allocated_map_size),
987 STAT_LABEL (total_allocated_map_size));
988 fprintf (stderr, "Total used maps size: %5ld%c\n",
989 SCALE (total_used_map_size),
990 STAT_LABEL (total_used_map_size));
991 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
992 SCALE (s.adhoc_table_size),
993 STAT_LABEL (s.adhoc_table_size));
994 fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
995 s.adhoc_table_entries_used);
996 fprintf (stderr, "optimized_ranges: %i\n",
997 line_table->num_optimized_ranges);
998 fprintf (stderr, "unoptimized_ranges: %i\n",
999 line_table->num_unoptimized_ranges);
1000
1001 fprintf (stderr, "\n");
1002 }
1003
1004 /* Get location one beyond the final location in ordinary map IDX. */
1005
1006 static source_location
1007 get_end_location (struct line_maps *set, unsigned int idx)
1008 {
1009 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1010 return set->highest_location;
1011
1012 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1013 return MAP_START_LOCATION (next_map);
1014 }
1015
1016 /* Helper function for write_digit_row. */
1017
1018 static void
1019 write_digit (FILE *stream, int digit)
1020 {
1021 fputc ('0' + (digit % 10), stream);
1022 }
1023
1024 /* Helper function for dump_location_info.
1025 Write a row of numbers to STREAM, numbering a source line,
1026 giving the units, tens, hundreds etc of the column number. */
1027
1028 static void
1029 write_digit_row (FILE *stream, int indent,
1030 const line_map_ordinary *map,
1031 source_location loc, int max_col, int divisor)
1032 {
1033 fprintf (stream, "%*c", indent, ' ');
1034 fprintf (stream, "|");
1035 for (int column = 1; column < max_col; column++)
1036 {
1037 source_location column_loc = loc + (column << map->m_range_bits);
1038 write_digit (stream, column_loc / divisor);
1039 }
1040 fprintf (stream, "\n");
1041 }
1042
1043 /* Write a half-closed (START) / half-open (END) interval of
1044 source_location to STREAM. */
1045
1046 static void
1047 dump_location_range (FILE *stream,
1048 source_location start, source_location end)
1049 {
1050 fprintf (stream,
1051 " source_location interval: %u <= loc < %u\n",
1052 start, end);
1053 }
1054
1055 /* Write a labelled description of a half-closed (START) / half-open (END)
1056 interval of source_location to STREAM. */
1057
1058 static void
1059 dump_labelled_location_range (FILE *stream,
1060 const char *name,
1061 source_location start, source_location end)
1062 {
1063 fprintf (stream, "%s\n", name);
1064 dump_location_range (stream, start, end);
1065 fprintf (stream, "\n");
1066 }
1067
1068 /* Write a visualization of the locations in the line_table to STREAM. */
1069
1070 void
1071 dump_location_info (FILE *stream)
1072 {
1073 /* Visualize the reserved locations. */
1074 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1075 0, RESERVED_LOCATION_COUNT);
1076
1077 /* Visualize the ordinary line_map instances, rendering the sources. */
1078 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1079 {
1080 source_location end_location = get_end_location (line_table, idx);
1081 /* half-closed: doesn't include this one. */
1082
1083 const line_map_ordinary *map
1084 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1085 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1086 dump_location_range (stream,
1087 MAP_START_LOCATION (map), end_location);
1088 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1089 fprintf (stream, " starting at line: %i\n",
1090 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1091 fprintf (stream, " column and range bits: %i\n",
1092 map->m_column_and_range_bits);
1093 fprintf (stream, " column bits: %i\n",
1094 map->m_column_and_range_bits - map->m_range_bits);
1095 fprintf (stream, " range bits: %i\n",
1096 map->m_range_bits);
1097
1098 /* Render the span of source lines that this "map" covers. */
1099 for (source_location loc = MAP_START_LOCATION (map);
1100 loc < end_location;
1101 loc += (1 << map->m_range_bits) )
1102 {
1103 gcc_assert (pure_location_p (line_table, loc) );
1104
1105 expanded_location exploc
1106 = linemap_expand_location (line_table, map, loc);
1107
1108 if (0 == exploc.column)
1109 {
1110 /* Beginning of a new source line: draw the line. */
1111
1112 int line_size;
1113 const char *line_text = location_get_source_line (exploc.file,
1114 exploc.line,
1115 &line_size);
1116 if (!line_text)
1117 break;
1118 fprintf (stream,
1119 "%s:%3i|loc:%5i|%.*s\n",
1120 exploc.file, exploc.line,
1121 loc,
1122 line_size, line_text);
1123
1124 /* "loc" is at column 0, which means "the whole line".
1125 Render the locations *within* the line, by underlining
1126 it, showing the source_location numeric values
1127 at each column. */
1128 int max_col = (1 << map->m_column_and_range_bits) - 1;
1129 if (max_col > line_size)
1130 max_col = line_size + 1;
1131
1132 int indent = 14 + strlen (exploc.file);
1133
1134 /* Thousands. */
1135 if (end_location > 999)
1136 write_digit_row (stream, indent, map, loc, max_col, 1000);
1137
1138 /* Hundreds. */
1139 if (end_location > 99)
1140 write_digit_row (stream, indent, map, loc, max_col, 100);
1141
1142 /* Tens. */
1143 write_digit_row (stream, indent, map, loc, max_col, 10);
1144
1145 /* Units. */
1146 write_digit_row (stream, indent, map, loc, max_col, 1);
1147 }
1148 }
1149 fprintf (stream, "\n");
1150 }
1151
1152 /* Visualize unallocated values. */
1153 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1154 line_table->highest_location,
1155 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1156
1157 /* Visualize the macro line_map instances, rendering the sources. */
1158 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1159 {
1160 /* Each macro map that is allocated owns source_location values
1161 that are *lower* that the one before them.
1162 Hence it's meaningful to view them either in order of ascending
1163 source locations, or in order of ascending macro map index. */
1164 const bool ascending_source_locations = true;
1165 unsigned int idx = (ascending_source_locations
1166 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1167 : i);
1168 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1169 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1170 idx,
1171 linemap_map_get_macro_name (map),
1172 MACRO_MAP_NUM_MACRO_TOKENS (map));
1173 dump_location_range (stream,
1174 map->start_location,
1175 (map->start_location
1176 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1177 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1178 "expansion point is location %i",
1179 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1180 fprintf (stream, " map->start_location: %u\n",
1181 map->start_location);
1182
1183 fprintf (stream, " macro_locations:\n");
1184 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1185 {
1186 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1187 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1188
1189 /* linemap_add_macro_token encodes token numbers in an expansion
1190 by putting them after MAP_START_LOCATION. */
1191
1192 /* I'm typically seeing 4 uninitialized entries at the end of
1193 0xafafafaf.
1194 This appears to be due to macro.c:replace_args
1195 adding 2 extra args for padding tokens; presumably there may
1196 be a leading and/or trailing padding token injected,
1197 each for 2 more location slots.
1198 This would explain there being up to 4 source_locations slots
1199 that may be uninitialized. */
1200
1201 fprintf (stream, " %u: %u, %u\n",
1202 i,
1203 x,
1204 y);
1205 if (x == y)
1206 {
1207 if (x < MAP_START_LOCATION (map))
1208 inform (x, "token %u has x-location == y-location == %u", i, x);
1209 else
1210 fprintf (stream,
1211 "x-location == y-location == %u encodes token # %u\n",
1212 x, x - MAP_START_LOCATION (map));
1213 }
1214 else
1215 {
1216 inform (x, "token %u has x-location == %u", i, x);
1217 inform (x, "token %u has y-location == %u", i, y);
1218 }
1219 }
1220 fprintf (stream, "\n");
1221 }
1222
1223 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1224 macro map, presumably due to an off-by-one error somewhere
1225 between the logic in linemap_enter_macro and
1226 LINEMAPS_MACRO_LOWEST_LOCATION. */
1227 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1228 MAX_SOURCE_LOCATION,
1229 MAX_SOURCE_LOCATION + 1);
1230
1231 /* Visualize ad-hoc values. */
1232 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1233 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1234 }
1235
1236 /* string_concat's constructor. */
1237
1238 string_concat::string_concat (int num, location_t *locs)
1239 : m_num (num)
1240 {
1241 m_locs = ggc_vec_alloc <location_t> (num);
1242 for (int i = 0; i < num; i++)
1243 m_locs[i] = locs[i];
1244 }
1245
1246 /* string_concat_db's constructor. */
1247
1248 string_concat_db::string_concat_db ()
1249 {
1250 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1251 }
1252
1253 /* Record that a string concatenation occurred, covering NUM
1254 string literal tokens. LOCS is an array of size NUM, containing the
1255 locations of the tokens. A copy of LOCS is taken. */
1256
1257 void
1258 string_concat_db::record_string_concatenation (int num, location_t *locs)
1259 {
1260 gcc_assert (num > 1);
1261 gcc_assert (locs);
1262
1263 location_t key_loc = get_key_loc (locs[0]);
1264
1265 string_concat *concat
1266 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1267 m_table->put (key_loc, concat);
1268 }
1269
1270 /* Determine if LOC was the location of the the initial token of a
1271 concatenation of string literal tokens.
1272 If so, *OUT_NUM is written to with the number of tokens, and
1273 *OUT_LOCS with the location of an array of locations of the
1274 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1275 storage owned by the string_concat_db.
1276 Otherwise, return false. */
1277
1278 bool
1279 string_concat_db::get_string_concatenation (location_t loc,
1280 int *out_num,
1281 location_t **out_locs)
1282 {
1283 gcc_assert (out_num);
1284 gcc_assert (out_locs);
1285
1286 location_t key_loc = get_key_loc (loc);
1287
1288 string_concat **concat = m_table->get (key_loc);
1289 if (!concat)
1290 return false;
1291
1292 *out_num = (*concat)->m_num;
1293 *out_locs =(*concat)->m_locs;
1294 return true;
1295 }
1296
1297 /* Internal function. Canonicalize LOC into a form suitable for
1298 use as a key within the database, stripping away macro expansion,
1299 ad-hoc information, and range information, using the location of
1300 the start of LOC within an ordinary linemap. */
1301
1302 location_t
1303 string_concat_db::get_key_loc (location_t loc)
1304 {
1305 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1306 NULL);
1307
1308 loc = get_range_from_loc (line_table, loc).m_start;
1309
1310 return loc;
1311 }
1312
1313 /* Helper class for use within get_substring_ranges_for_loc.
1314 An vec of cpp_string with responsibility for releasing all of the
1315 str->text for each str in the vector. */
1316
1317 class auto_cpp_string_vec : public auto_vec <cpp_string>
1318 {
1319 public:
1320 auto_cpp_string_vec (int alloc)
1321 : auto_vec <cpp_string> (alloc) {}
1322
1323 ~auto_cpp_string_vec ()
1324 {
1325 /* Clean up the copies within this vec. */
1326 int i;
1327 cpp_string *str;
1328 FOR_EACH_VEC_ELT (*this, i, str)
1329 free (const_cast <unsigned char *> (str->text));
1330 }
1331 };
1332
1333 /* Attempt to populate RANGES with source location information on the
1334 individual characters within the string literal found at STRLOC.
1335 If CONCATS is non-NULL, then any string literals that the token at
1336 STRLOC was concatenated with are also added to RANGES.
1337
1338 Return NULL if successful, or an error message if any errors occurred (in
1339 which case RANGES may be only partially populated and should not
1340 be used).
1341
1342 This is implemented by re-parsing the relevant source line(s). */
1343
1344 static const char *
1345 get_substring_ranges_for_loc (cpp_reader *pfile,
1346 string_concat_db *concats,
1347 location_t strloc,
1348 enum cpp_ttype type,
1349 cpp_substring_ranges &ranges)
1350 {
1351 gcc_assert (pfile);
1352
1353 if (strloc == UNKNOWN_LOCATION)
1354 return "unknown location";
1355
1356 /* If string concatenation has occurred at STRLOC, get the locations
1357 of all of the literal tokens making up the compound string.
1358 Otherwise, just use STRLOC. */
1359 int num_locs = 1;
1360 location_t *strlocs = &strloc;
1361 if (concats)
1362 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1363
1364 auto_cpp_string_vec strs (num_locs);
1365 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1366 for (int i = 0; i < num_locs; i++)
1367 {
1368 /* Get range of strloc. We will use it to locate the start and finish
1369 of the literal token within the line. */
1370 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1371
1372 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1373 /* If the string is within a macro expansion, we can't get at the
1374 end location. */
1375 return "macro expansion";
1376
1377 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1378 /* If so, we can't reliably determine where the token started within
1379 its line. */
1380 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1381
1382 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1383 /* If so, we can't reliably determine where the token finished within
1384 its line. */
1385 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1386
1387 expanded_location start
1388 = expand_location_to_spelling_point (src_range.m_start);
1389 expanded_location finish
1390 = expand_location_to_spelling_point (src_range.m_finish);
1391 if (start.file != finish.file)
1392 return "range endpoints are in different files";
1393 if (start.line != finish.line)
1394 return "range endpoints are on different lines";
1395 if (start.column > finish.column)
1396 return "range endpoints are reversed";
1397
1398 int line_width;
1399 const char *line = location_get_source_line (start.file, start.line,
1400 &line_width);
1401 if (line == NULL)
1402 return "unable to read source line";
1403
1404 /* Determine the location of the literal (including quotes
1405 and leading prefix chars, such as the 'u' in a u""
1406 token). */
1407 const char *literal = line + start.column - 1;
1408 int literal_length = finish.column - start.column + 1;
1409
1410 gcc_assert (line_width >= (start.column - 1 + literal_length));
1411 cpp_string from;
1412 from.len = literal_length;
1413 /* Make a copy of the literal, to avoid having to rely on
1414 the lifetime of the copy of the line within the cache.
1415 This will be released by the auto_cpp_string_vec dtor. */
1416 from.text = XDUPVEC (unsigned char, literal, literal_length);
1417 strs.safe_push (from);
1418
1419 /* For very long lines, a new linemap could have started
1420 halfway through the token.
1421 Ensure that the loc_reader uses the linemap of the
1422 *end* of the token for its start location. */
1423 const line_map_ordinary *final_ord_map;
1424 linemap_resolve_location (line_table, src_range.m_finish,
1425 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1426 location_t start_loc
1427 = linemap_position_for_line_and_column (line_table, final_ord_map,
1428 start.line, start.column);
1429
1430 cpp_string_location_reader loc_reader (start_loc, line_table);
1431 loc_readers.safe_push (loc_reader);
1432 }
1433
1434 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1435 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1436 loc_readers.address (),
1437 num_locs, &ranges, type);
1438 if (err)
1439 return err;
1440
1441 /* Success: "ranges" should now contain information on the string. */
1442 return NULL;
1443 }
1444
1445 /* Attempt to populate *OUT_LOC with source location information on the
1446 given characters within the string literal found at STRLOC.
1447 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1448 character set.
1449
1450 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1451 and string literal "012345\n789"
1452 *OUT_LOC is written to with:
1453 "012345\n789"
1454 ~^~~~~
1455
1456 If CONCATS is non-NULL, then any string literals that the token at
1457 STRLOC was concatenated with are also considered.
1458
1459 This is implemented by re-parsing the relevant source line(s).
1460
1461 Return NULL if successful, or an error message if any errors occurred.
1462 Error messages are intended for GCC developers (to help debugging) rather
1463 than for end-users. */
1464
1465 const char *
1466 get_source_location_for_substring (cpp_reader *pfile,
1467 string_concat_db *concats,
1468 location_t strloc,
1469 enum cpp_ttype type,
1470 int caret_idx, int start_idx, int end_idx,
1471 source_location *out_loc)
1472 {
1473 gcc_checking_assert (caret_idx >= 0);
1474 gcc_checking_assert (start_idx >= 0);
1475 gcc_checking_assert (end_idx >= 0);
1476 gcc_assert (out_loc);
1477
1478 cpp_substring_ranges ranges;
1479 const char *err
1480 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1481 if (err)
1482 return err;
1483
1484 if (caret_idx >= ranges.get_num_ranges ())
1485 return "caret_idx out of range";
1486 if (start_idx >= ranges.get_num_ranges ())
1487 return "start_idx out of range";
1488 if (end_idx >= ranges.get_num_ranges ())
1489 return "end_idx out of range";
1490
1491 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1492 ranges.get_range (start_idx).m_start,
1493 ranges.get_range (end_idx).m_finish);
1494 return NULL;
1495 }
1496
1497 #if CHECKING_P
1498
1499 namespace selftest {
1500
1501 /* Selftests of location handling. */
1502
1503 /* Attempt to populate *OUT_RANGE with source location information on the
1504 given character within the string literal found at STRLOC.
1505 CHAR_IDX refers to an offset within the execution character set.
1506 If CONCATS is non-NULL, then any string literals that the token at
1507 STRLOC was concatenated with are also considered.
1508
1509 This is implemented by re-parsing the relevant source line(s).
1510
1511 Return NULL if successful, or an error message if any errors occurred.
1512 Error messages are intended for GCC developers (to help debugging) rather
1513 than for end-users. */
1514
1515 static const char *
1516 get_source_range_for_char (cpp_reader *pfile,
1517 string_concat_db *concats,
1518 location_t strloc,
1519 enum cpp_ttype type,
1520 int char_idx,
1521 source_range *out_range)
1522 {
1523 gcc_checking_assert (char_idx >= 0);
1524 gcc_assert (out_range);
1525
1526 cpp_substring_ranges ranges;
1527 const char *err
1528 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1529 if (err)
1530 return err;
1531
1532 if (char_idx >= ranges.get_num_ranges ())
1533 return "char_idx out of range";
1534
1535 *out_range = ranges.get_range (char_idx);
1536 return NULL;
1537 }
1538
1539 /* As get_source_range_for_char, but write to *OUT the number
1540 of ranges that are available. */
1541
1542 static const char *
1543 get_num_source_ranges_for_substring (cpp_reader *pfile,
1544 string_concat_db *concats,
1545 location_t strloc,
1546 enum cpp_ttype type,
1547 int *out)
1548 {
1549 gcc_assert (out);
1550
1551 cpp_substring_ranges ranges;
1552 const char *err
1553 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1554
1555 if (err)
1556 return err;
1557
1558 *out = ranges.get_num_ranges ();
1559 return NULL;
1560 }
1561
1562 /* Selftests of location handling. */
1563
1564 /* Helper function for verifying location data: when location_t
1565 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1566 as having column 0. */
1567
1568 static bool
1569 should_have_column_data_p (location_t loc)
1570 {
1571 if (IS_ADHOC_LOC (loc))
1572 loc = get_location_from_adhoc_loc (line_table, loc);
1573 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1574 return false;
1575 return true;
1576 }
1577
1578 /* Selftest for should_have_column_data_p. */
1579
1580 static void
1581 test_should_have_column_data_p ()
1582 {
1583 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1584 ASSERT_TRUE
1585 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1586 ASSERT_FALSE
1587 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1588 }
1589
1590 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1591 on LOC. */
1592
1593 static void
1594 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1595 location_t loc)
1596 {
1597 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1598 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1599 /* If location_t values are sufficiently high, then column numbers
1600 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1601 When close to the threshold, column numbers *may* be present: if
1602 the final linemap before the threshold contains a line that straddles
1603 the threshold, locations in that line have column information. */
1604 if (should_have_column_data_p (loc))
1605 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1606 }
1607
1608 /* Various selftests involve constructing a line table and one or more
1609 line maps within it.
1610
1611 For maximum test coverage we want to run these tests with a variety
1612 of situations:
1613 - line_table->default_range_bits: some frontends use a non-zero value
1614 and others use zero
1615 - the fallback modes within line-map.c: there are various threshold
1616 values for source_location/location_t beyond line-map.c changes
1617 behavior (disabling of the range-packing optimization, disabling
1618 of column-tracking). We can exercise these by starting the line_table
1619 at interesting values at or near these thresholds.
1620
1621 The following struct describes a particular case within our test
1622 matrix. */
1623
1624 struct line_table_case
1625 {
1626 line_table_case (int default_range_bits, int base_location)
1627 : m_default_range_bits (default_range_bits),
1628 m_base_location (base_location)
1629 {}
1630
1631 int m_default_range_bits;
1632 int m_base_location;
1633 };
1634
1635 /* Constructor. Store the old value of line_table, and create a new
1636 one, using sane defaults. */
1637
1638 line_table_test::line_table_test ()
1639 {
1640 gcc_assert (saved_line_table == NULL);
1641 saved_line_table = line_table;
1642 line_table = ggc_alloc<line_maps> ();
1643 linemap_init (line_table, BUILTINS_LOCATION);
1644 gcc_assert (saved_line_table->reallocator);
1645 line_table->reallocator = saved_line_table->reallocator;
1646 gcc_assert (saved_line_table->round_alloc_size);
1647 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1648 line_table->default_range_bits = 0;
1649 }
1650
1651 /* Constructor. Store the old value of line_table, and create a new
1652 one, using the sitation described in CASE_. */
1653
1654 line_table_test::line_table_test (const line_table_case &case_)
1655 {
1656 gcc_assert (saved_line_table == NULL);
1657 saved_line_table = line_table;
1658 line_table = ggc_alloc<line_maps> ();
1659 linemap_init (line_table, BUILTINS_LOCATION);
1660 gcc_assert (saved_line_table->reallocator);
1661 line_table->reallocator = saved_line_table->reallocator;
1662 gcc_assert (saved_line_table->round_alloc_size);
1663 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1664 line_table->default_range_bits = case_.m_default_range_bits;
1665 if (case_.m_base_location)
1666 {
1667 line_table->highest_location = case_.m_base_location;
1668 line_table->highest_line = case_.m_base_location;
1669 }
1670 }
1671
1672 /* Destructor. Restore the old value of line_table. */
1673
1674 line_table_test::~line_table_test ()
1675 {
1676 gcc_assert (saved_line_table != NULL);
1677 line_table = saved_line_table;
1678 saved_line_table = NULL;
1679 }
1680
1681 /* Verify basic operation of ordinary linemaps. */
1682
1683 static void
1684 test_accessing_ordinary_linemaps (const line_table_case &case_)
1685 {
1686 line_table_test ltt (case_);
1687
1688 /* Build a simple linemap describing some locations. */
1689 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1690
1691 linemap_line_start (line_table, 1, 100);
1692 location_t loc_a = linemap_position_for_column (line_table, 1);
1693 location_t loc_b = linemap_position_for_column (line_table, 23);
1694
1695 linemap_line_start (line_table, 2, 100);
1696 location_t loc_c = linemap_position_for_column (line_table, 1);
1697 location_t loc_d = linemap_position_for_column (line_table, 17);
1698
1699 /* Example of a very long line. */
1700 linemap_line_start (line_table, 3, 2000);
1701 location_t loc_e = linemap_position_for_column (line_table, 700);
1702
1703 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1704
1705 /* Multiple files. */
1706 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1707 linemap_line_start (line_table, 1, 200);
1708 location_t loc_f = linemap_position_for_column (line_table, 150);
1709 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1710
1711 /* Verify that we can recover the location info. */
1712 assert_loceq ("foo.c", 1, 1, loc_a);
1713 assert_loceq ("foo.c", 1, 23, loc_b);
1714 assert_loceq ("foo.c", 2, 1, loc_c);
1715 assert_loceq ("foo.c", 2, 17, loc_d);
1716 assert_loceq ("foo.c", 3, 700, loc_e);
1717 assert_loceq ("bar.c", 1, 150, loc_f);
1718
1719 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1720 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1721
1722 /* Verify using make_location to build a range, and extracting data
1723 back from it. */
1724 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1725 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1726 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1727 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1728 ASSERT_EQ (loc_b, src_range.m_start);
1729 ASSERT_EQ (loc_d, src_range.m_finish);
1730 }
1731
1732 /* Verify various properties of UNKNOWN_LOCATION. */
1733
1734 static void
1735 test_unknown_location ()
1736 {
1737 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1738 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1739 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1740 }
1741
1742 /* Verify various properties of BUILTINS_LOCATION. */
1743
1744 static void
1745 test_builtins ()
1746 {
1747 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1748 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1749 }
1750
1751 /* Regression test for make_location.
1752 Ensure that we use pure locations for the start/finish of the range,
1753 rather than storing a packed or ad-hoc range as the start/finish. */
1754
1755 static void
1756 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1757 {
1758 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1759 with C++ frontend.
1760 ....................0000000001111111111222.
1761 ....................1234567890123456789012. */
1762 const char *content = " r += !aaa == bbb;\n";
1763 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1764 line_table_test ltt (case_);
1765 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1766
1767 const location_t c11 = linemap_position_for_column (line_table, 11);
1768 const location_t c12 = linemap_position_for_column (line_table, 12);
1769 const location_t c13 = linemap_position_for_column (line_table, 13);
1770 const location_t c14 = linemap_position_for_column (line_table, 14);
1771 const location_t c21 = linemap_position_for_column (line_table, 21);
1772
1773 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1774 return;
1775
1776 /* Use column 13 for the caret location, arbitrarily, to verify that we
1777 handle start != caret. */
1778 const location_t aaa = make_location (c13, c12, c14);
1779 ASSERT_EQ (c13, get_pure_location (aaa));
1780 ASSERT_EQ (c12, get_start (aaa));
1781 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1782 ASSERT_EQ (c14, get_finish (aaa));
1783 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1784
1785 /* Make a location using a location with a range as the start-point. */
1786 const location_t not_aaa = make_location (c11, aaa, c14);
1787 ASSERT_EQ (c11, get_pure_location (not_aaa));
1788 /* It should use the start location of the range, not store the range
1789 itself. */
1790 ASSERT_EQ (c12, get_start (not_aaa));
1791 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1792 ASSERT_EQ (c14, get_finish (not_aaa));
1793 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1794
1795 /* Similarly, make a location with a range as the end-point. */
1796 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1797 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1798 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1799 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1800 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1801 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1802 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1803 /* It should use the finish location of the range, not store the range
1804 itself. */
1805 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1806 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1807 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1808 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1809 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1810 }
1811
1812 /* Verify reading of input files (e.g. for caret-based diagnostics). */
1813
1814 static void
1815 test_reading_source_line ()
1816 {
1817 /* Create a tempfile and write some text to it. */
1818 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1819 "01234567890123456789\n"
1820 "This is the test text\n"
1821 "This is the 3rd line\n");
1822
1823 /* Read back a specific line from the tempfile. */
1824 int line_size;
1825 const char *source_line = location_get_source_line (tmp.get_filename (),
1826 2, &line_size);
1827 ASSERT_TRUE (source_line != NULL);
1828 ASSERT_EQ (21, line_size);
1829 if (!strncmp ("This is the test text",
1830 source_line, line_size))
1831 ::selftest::pass (SELFTEST_LOCATION,
1832 "source_line matched expected value");
1833 else
1834 ::selftest::fail (SELFTEST_LOCATION,
1835 "source_line did not match expected value");
1836
1837 }
1838
1839 /* Tests of lexing. */
1840
1841 /* Verify that token TOK from PARSER has cpp_token_as_text
1842 equal to EXPECTED_TEXT. */
1843
1844 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1845 SELFTEST_BEGIN_STMT \
1846 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1847 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1848 SELFTEST_END_STMT
1849
1850 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1851 and ranges from EXP_START_COL to EXP_FINISH_COL.
1852 Use LOC as the effective location of the selftest. */
1853
1854 static void
1855 assert_token_loc_eq (const location &loc,
1856 const cpp_token *tok,
1857 const char *exp_filename, int exp_linenum,
1858 int exp_start_col, int exp_finish_col)
1859 {
1860 location_t tok_loc = tok->src_loc;
1861 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1862 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1863
1864 /* If location_t values are sufficiently high, then column numbers
1865 will be unavailable. */
1866 if (!should_have_column_data_p (tok_loc))
1867 return;
1868
1869 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1870 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1871 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1872 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1873 }
1874
1875 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1876 SELFTEST_LOCATION as the effective location of the selftest. */
1877
1878 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1879 EXP_START_COL, EXP_FINISH_COL) \
1880 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1881 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1882
1883 /* Test of lexing a file using libcpp, verifying tokens and their
1884 location information. */
1885
1886 static void
1887 test_lexer (const line_table_case &case_)
1888 {
1889 /* Create a tempfile and write some text to it. */
1890 const char *content =
1891 /*00000000011111111112222222222333333.3333444444444.455555555556
1892 12345678901234567890123456789012345.6789012345678.901234567890. */
1893 ("test_name /* c-style comment */\n"
1894 " \"test literal\"\n"
1895 " // test c++-style comment\n"
1896 " 42\n");
1897 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1898
1899 line_table_test ltt (case_);
1900
1901 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1902
1903 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1904 ASSERT_NE (fname, NULL);
1905
1906 /* Verify that we get the expected tokens back, with the correct
1907 location information. */
1908
1909 location_t loc;
1910 const cpp_token *tok;
1911 tok = cpp_get_token_with_location (parser, &loc);
1912 ASSERT_NE (tok, NULL);
1913 ASSERT_EQ (tok->type, CPP_NAME);
1914 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1915 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1916
1917 tok = cpp_get_token_with_location (parser, &loc);
1918 ASSERT_NE (tok, NULL);
1919 ASSERT_EQ (tok->type, CPP_STRING);
1920 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1921 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1922
1923 tok = cpp_get_token_with_location (parser, &loc);
1924 ASSERT_NE (tok, NULL);
1925 ASSERT_EQ (tok->type, CPP_NUMBER);
1926 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1927 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1928
1929 tok = cpp_get_token_with_location (parser, &loc);
1930 ASSERT_NE (tok, NULL);
1931 ASSERT_EQ (tok->type, CPP_EOF);
1932
1933 cpp_finish (parser, NULL);
1934 cpp_destroy (parser);
1935 }
1936
1937 /* Forward decls. */
1938
1939 struct lexer_test;
1940 class lexer_test_options;
1941
1942 /* A class for specifying options of a lexer_test.
1943 The "apply" vfunc is called during the lexer_test constructor. */
1944
1945 class lexer_test_options
1946 {
1947 public:
1948 virtual void apply (lexer_test &) = 0;
1949 };
1950
1951 /* A struct for writing lexer tests. */
1952
1953 struct lexer_test
1954 {
1955 lexer_test (const line_table_case &case_, const char *content,
1956 lexer_test_options *options);
1957 ~lexer_test ();
1958
1959 const cpp_token *get_token ();
1960
1961 temp_source_file m_tempfile;
1962 line_table_test m_ltt;
1963 cpp_reader *m_parser;
1964 string_concat_db m_concats;
1965 };
1966
1967 /* Use an EBCDIC encoding for the execution charset, specifically
1968 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
1969
1970 This exercises iconv integration within libcpp.
1971 Not every build of iconv supports the given charset,
1972 so we need to flag this error and handle it gracefully. */
1973
1974 class ebcdic_execution_charset : public lexer_test_options
1975 {
1976 public:
1977 ebcdic_execution_charset () : m_num_iconv_errors (0)
1978 {
1979 gcc_assert (s_singleton == NULL);
1980 s_singleton = this;
1981 }
1982 ~ebcdic_execution_charset ()
1983 {
1984 gcc_assert (s_singleton == this);
1985 s_singleton = NULL;
1986 }
1987
1988 void apply (lexer_test &test) FINAL OVERRIDE
1989 {
1990 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
1991 cpp_opts->narrow_charset = "IBM1047";
1992
1993 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
1994 callbacks->error = on_error;
1995 }
1996
1997 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
1998 int level ATTRIBUTE_UNUSED,
1999 int reason ATTRIBUTE_UNUSED,
2000 rich_location *richloc ATTRIBUTE_UNUSED,
2001 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2002 ATTRIBUTE_FPTR_PRINTF(5,0)
2003 {
2004 gcc_assert (s_singleton);
2005 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2006 when the local iconv build doesn't support the conversion. */
2007 if (strstr (msgid, "not supported by iconv"))
2008 {
2009 s_singleton->m_num_iconv_errors++;
2010 return true;
2011 }
2012
2013 /* Otherwise, we have an unexpected error. */
2014 abort ();
2015 }
2016
2017 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2018
2019 private:
2020 static ebcdic_execution_charset *s_singleton;
2021 int m_num_iconv_errors;
2022 };
2023
2024 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2025
2026 /* Constructor. Override line_table with a new instance based on CASE_,
2027 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2028 start parsing the tempfile. */
2029
2030 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2031 lexer_test_options *options) :
2032 /* Create a tempfile and write the text to it. */
2033 m_tempfile (SELFTEST_LOCATION, ".c", content),
2034 m_ltt (case_),
2035 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2036 m_concats ()
2037 {
2038 if (options)
2039 options->apply (*this);
2040
2041 cpp_init_iconv (m_parser);
2042
2043 /* Parse the file. */
2044 const char *fname = cpp_read_main_file (m_parser,
2045 m_tempfile.get_filename ());
2046 ASSERT_NE (fname, NULL);
2047 }
2048
2049 /* Destructor. Verify that the next token in m_parser is EOF. */
2050
2051 lexer_test::~lexer_test ()
2052 {
2053 location_t loc;
2054 const cpp_token *tok;
2055
2056 tok = cpp_get_token_with_location (m_parser, &loc);
2057 ASSERT_NE (tok, NULL);
2058 ASSERT_EQ (tok->type, CPP_EOF);
2059
2060 cpp_finish (m_parser, NULL);
2061 cpp_destroy (m_parser);
2062 }
2063
2064 /* Get the next token from m_parser. */
2065
2066 const cpp_token *
2067 lexer_test::get_token ()
2068 {
2069 location_t loc;
2070 const cpp_token *tok;
2071
2072 tok = cpp_get_token_with_location (m_parser, &loc);
2073 ASSERT_NE (tok, NULL);
2074 return tok;
2075 }
2076
2077 /* Verify that locations within string literals are correctly handled. */
2078
2079 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2080 using the string concatenation database for TEST.
2081
2082 Assert that the character at index IDX is on EXPECTED_LINE,
2083 and that it begins at column EXPECTED_START_COL and ends at
2084 EXPECTED_FINISH_COL (unless the locations are beyond
2085 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2086 columns). */
2087
2088 static void
2089 assert_char_at_range (const location &loc,
2090 lexer_test& test,
2091 location_t strloc, enum cpp_ttype type, int idx,
2092 int expected_line, int expected_start_col,
2093 int expected_finish_col)
2094 {
2095 cpp_reader *pfile = test.m_parser;
2096 string_concat_db *concats = &test.m_concats;
2097
2098 source_range actual_range;
2099 const char *err
2100 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2101 &actual_range);
2102 if (should_have_column_data_p (strloc))
2103 ASSERT_EQ_AT (loc, NULL, err);
2104 else
2105 {
2106 ASSERT_STREQ_AT (loc,
2107 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2108 err);
2109 return;
2110 }
2111
2112 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2113 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2114 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2115 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2116
2117 if (should_have_column_data_p (actual_range.m_start))
2118 {
2119 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2120 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2121 }
2122 if (should_have_column_data_p (actual_range.m_finish))
2123 {
2124 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2125 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2126 }
2127 }
2128
2129 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2130 the effective location of any errors. */
2131
2132 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2133 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2134 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2135 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2136 (EXPECTED_FINISH_COL))
2137
2138 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2139 using the string concatenation database for TEST.
2140
2141 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2142
2143 static void
2144 assert_num_substring_ranges (const location &loc,
2145 lexer_test& test,
2146 location_t strloc,
2147 enum cpp_ttype type,
2148 int expected_num_ranges)
2149 {
2150 cpp_reader *pfile = test.m_parser;
2151 string_concat_db *concats = &test.m_concats;
2152
2153 int actual_num_ranges = -1;
2154 const char *err
2155 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2156 &actual_num_ranges);
2157 if (should_have_column_data_p (strloc))
2158 ASSERT_EQ_AT (loc, NULL, err);
2159 else
2160 {
2161 ASSERT_STREQ_AT (loc,
2162 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2163 err);
2164 return;
2165 }
2166 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2167 }
2168
2169 /* Macro for calling assert_num_substring_ranges, supplying
2170 SELFTEST_LOCATION for the effective location of any errors. */
2171
2172 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2173 EXPECTED_NUM_RANGES) \
2174 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2175 (TYPE), (EXPECTED_NUM_RANGES))
2176
2177
2178 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2179 returns an error (using the string concatenation database for TEST). */
2180
2181 static void
2182 assert_has_no_substring_ranges (const location &loc,
2183 lexer_test& test,
2184 location_t strloc,
2185 enum cpp_ttype type,
2186 const char *expected_err)
2187 {
2188 cpp_reader *pfile = test.m_parser;
2189 string_concat_db *concats = &test.m_concats;
2190 cpp_substring_ranges ranges;
2191 const char *actual_err
2192 = get_substring_ranges_for_loc (pfile, concats, strloc,
2193 type, ranges);
2194 if (should_have_column_data_p (strloc))
2195 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2196 else
2197 ASSERT_STREQ_AT (loc,
2198 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2199 actual_err);
2200 }
2201
2202 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2203 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2204 (STRLOC), (TYPE), (ERR))
2205
2206 /* Lex a simple string literal. Verify the substring location data, before
2207 and after running cpp_interpret_string on it. */
2208
2209 static void
2210 test_lexer_string_locations_simple (const line_table_case &case_)
2211 {
2212 /* Digits 0-9 (with 0 at column 10), the simple way.
2213 ....................000000000.11111111112.2222222223333333333
2214 ....................123456789.01234567890.1234567890123456789
2215 We add a trailing comment to ensure that we correctly locate
2216 the end of the string literal token. */
2217 const char *content = " \"0123456789\" /* not a string */\n";
2218 lexer_test test (case_, content, NULL);
2219
2220 /* Verify that we get the expected token back, with the correct
2221 location information. */
2222 const cpp_token *tok = test.get_token ();
2223 ASSERT_EQ (tok->type, CPP_STRING);
2224 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2225 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2226
2227 /* At this point in lexing, the quote characters are treated as part of
2228 the string (they are stripped off by cpp_interpret_string). */
2229
2230 ASSERT_EQ (tok->val.str.len, 12);
2231
2232 /* Verify that cpp_interpret_string works. */
2233 cpp_string dst_string;
2234 const enum cpp_ttype type = CPP_STRING;
2235 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2236 &dst_string, type);
2237 ASSERT_TRUE (result);
2238 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2239 free (const_cast <unsigned char *> (dst_string.text));
2240
2241 /* Verify ranges of individual characters. This no longer includes the
2242 opening quote, but does include the closing quote. */
2243 for (int i = 0; i <= 10; i++)
2244 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2245 10 + i, 10 + i);
2246
2247 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2248 }
2249
2250 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2251 encoding. */
2252
2253 static void
2254 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2255 {
2256 /* EBCDIC support requires iconv. */
2257 if (!HAVE_ICONV)
2258 return;
2259
2260 /* Digits 0-9 (with 0 at column 10), the simple way.
2261 ....................000000000.11111111112.2222222223333333333
2262 ....................123456789.01234567890.1234567890123456789
2263 We add a trailing comment to ensure that we correctly locate
2264 the end of the string literal token. */
2265 const char *content = " \"0123456789\" /* not a string */\n";
2266 ebcdic_execution_charset use_ebcdic;
2267 lexer_test test (case_, content, &use_ebcdic);
2268
2269 /* Verify that we get the expected token back, with the correct
2270 location information. */
2271 const cpp_token *tok = test.get_token ();
2272 ASSERT_EQ (tok->type, CPP_STRING);
2273 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2274 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2275
2276 /* At this point in lexing, the quote characters are treated as part of
2277 the string (they are stripped off by cpp_interpret_string). */
2278
2279 ASSERT_EQ (tok->val.str.len, 12);
2280
2281 /* The remainder of the test requires an iconv implementation that
2282 can convert from UTF-8 to the EBCDIC encoding requested above. */
2283 if (use_ebcdic.iconv_errors_occurred_p ())
2284 return;
2285
2286 /* Verify that cpp_interpret_string works. */
2287 cpp_string dst_string;
2288 const enum cpp_ttype type = CPP_STRING;
2289 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2290 &dst_string, type);
2291 ASSERT_TRUE (result);
2292 /* We should now have EBCDIC-encoded text, specifically
2293 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2294 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2295 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2296 (const char *)dst_string.text);
2297 free (const_cast <unsigned char *> (dst_string.text));
2298
2299 /* Verify that we don't attempt to record substring location information
2300 for such cases. */
2301 ASSERT_HAS_NO_SUBSTRING_RANGES
2302 (test, tok->src_loc, type,
2303 "execution character set != source character set");
2304 }
2305
2306 /* Lex a string literal containing a hex-escaped character.
2307 Verify the substring location data, before and after running
2308 cpp_interpret_string on it. */
2309
2310 static void
2311 test_lexer_string_locations_hex (const line_table_case &case_)
2312 {
2313 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2314 and with a space in place of digit 6, to terminate the escaped
2315 hex code.
2316 ....................000000000.111111.11112222.
2317 ....................123456789.012345.67890123. */
2318 const char *content = " \"01234\\x35 789\"\n";
2319 lexer_test test (case_, content, NULL);
2320
2321 /* Verify that we get the expected token back, with the correct
2322 location information. */
2323 const cpp_token *tok = test.get_token ();
2324 ASSERT_EQ (tok->type, CPP_STRING);
2325 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2326 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2327
2328 /* At this point in lexing, the quote characters are treated as part of
2329 the string (they are stripped off by cpp_interpret_string). */
2330 ASSERT_EQ (tok->val.str.len, 15);
2331
2332 /* Verify that cpp_interpret_string works. */
2333 cpp_string dst_string;
2334 const enum cpp_ttype type = CPP_STRING;
2335 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2336 &dst_string, type);
2337 ASSERT_TRUE (result);
2338 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2339 free (const_cast <unsigned char *> (dst_string.text));
2340
2341 /* Verify ranges of individual characters. This no longer includes the
2342 opening quote, but does include the closing quote. */
2343 for (int i = 0; i <= 4; i++)
2344 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2345 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2346 for (int i = 6; i <= 10; i++)
2347 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2348
2349 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2350 }
2351
2352 /* Lex a string literal containing an octal-escaped character.
2353 Verify the substring location data after running cpp_interpret_string
2354 on it. */
2355
2356 static void
2357 test_lexer_string_locations_oct (const line_table_case &case_)
2358 {
2359 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2360 and with a space in place of digit 6, to terminate the escaped
2361 octal code.
2362 ....................000000000.111111.11112222.2222223333333333444
2363 ....................123456789.012345.67890123.4567890123456789012 */
2364 const char *content = " \"01234\\065 789\" /* not a string */\n";
2365 lexer_test test (case_, content, NULL);
2366
2367 /* Verify that we get the expected token back, with the correct
2368 location information. */
2369 const cpp_token *tok = test.get_token ();
2370 ASSERT_EQ (tok->type, CPP_STRING);
2371 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2372
2373 /* Verify that cpp_interpret_string works. */
2374 cpp_string dst_string;
2375 const enum cpp_ttype type = CPP_STRING;
2376 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2377 &dst_string, type);
2378 ASSERT_TRUE (result);
2379 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2380 free (const_cast <unsigned char *> (dst_string.text));
2381
2382 /* Verify ranges of individual characters. This no longer includes the
2383 opening quote, but does include the closing quote. */
2384 for (int i = 0; i < 5; i++)
2385 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2386 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2387 for (int i = 6; i <= 10; i++)
2388 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2389
2390 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2391 }
2392
2393 /* Test of string literal containing letter escapes. */
2394
2395 static void
2396 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2397 {
2398 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2399 .....................000000000.1.11111.1.1.11222.22222223333333
2400 .....................123456789.0.12345.6.7.89012.34567890123456. */
2401 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2402 lexer_test test (case_, content, NULL);
2403
2404 /* Verify that we get the expected tokens back. */
2405 const cpp_token *tok = test.get_token ();
2406 ASSERT_EQ (tok->type, CPP_STRING);
2407 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2408
2409 /* Verify ranges of individual characters. */
2410 /* "\t". */
2411 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2412 0, 1, 10, 11);
2413 /* "foo". */
2414 for (int i = 1; i <= 3; i++)
2415 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2416 i, 1, 11 + i, 11 + i);
2417 /* "\\" and "\n". */
2418 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2419 4, 1, 15, 16);
2420 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2421 5, 1, 17, 18);
2422
2423 /* "bar" and closing quote for nul-terminator. */
2424 for (int i = 6; i <= 9; i++)
2425 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2426 i, 1, 13 + i, 13 + i);
2427
2428 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2429 }
2430
2431 /* Another test of a string literal containing a letter escape.
2432 Based on string seen in
2433 printf ("%-%\n");
2434 in gcc.dg/format/c90-printf-1.c. */
2435
2436 static void
2437 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2438 {
2439 /* .....................000000000.1111.11.1111.22222222223.
2440 .....................123456789.0123.45.6789.01234567890. */
2441 const char *content = (" \"%-%\\n\" /* non-str */\n");
2442 lexer_test test (case_, content, NULL);
2443
2444 /* Verify that we get the expected tokens back. */
2445 const cpp_token *tok = test.get_token ();
2446 ASSERT_EQ (tok->type, CPP_STRING);
2447 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2448
2449 /* Verify ranges of individual characters. */
2450 /* "%-%". */
2451 for (int i = 0; i < 3; i++)
2452 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2453 i, 1, 10 + i, 10 + i);
2454 /* "\n". */
2455 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2456 3, 1, 13, 14);
2457
2458 /* Closing quote for nul-terminator. */
2459 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2460 4, 1, 15, 15);
2461
2462 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2463 }
2464
2465 /* Lex a string literal containing UCN 4 characters.
2466 Verify the substring location data after running cpp_interpret_string
2467 on it. */
2468
2469 static void
2470 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2471 {
2472 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2473 as UCN 4.
2474 ....................000000000.111111.111122.222222223.33333333344444
2475 ....................123456789.012345.678901.234567890.12345678901234 */
2476 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2477 lexer_test test (case_, content, NULL);
2478
2479 /* Verify that we get the expected token back, with the correct
2480 location information. */
2481 const cpp_token *tok = test.get_token ();
2482 ASSERT_EQ (tok->type, CPP_STRING);
2483 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2484
2485 /* Verify that cpp_interpret_string works.
2486 The string should be encoded in the execution character
2487 set. Assuming that that is UTF-8, we should have the following:
2488 ----------- ---- ----- ------- ----------------
2489 Byte offset Byte Octal Unicode Source Column(s)
2490 ----------- ---- ----- ------- ----------------
2491 0 0x30 '0' 10
2492 1 0x31 '1' 11
2493 2 0x32 '2' 12
2494 3 0x33 '3' 13
2495 4 0x34 '4' 14
2496 5 0xE2 \342 U+2174 15-20
2497 6 0x85 \205 (cont) 15-20
2498 7 0xB4 \264 (cont) 15-20
2499 8 0xE2 \342 U+2175 21-26
2500 9 0x85 \205 (cont) 21-26
2501 10 0xB5 \265 (cont) 21-26
2502 11 0x37 '7' 27
2503 12 0x38 '8' 28
2504 13 0x39 '9' 29
2505 14 0x00 30 (closing quote)
2506 ----------- ---- ----- ------- ---------------. */
2507
2508 cpp_string dst_string;
2509 const enum cpp_ttype type = CPP_STRING;
2510 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2511 &dst_string, type);
2512 ASSERT_TRUE (result);
2513 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2514 (const char *)dst_string.text);
2515 free (const_cast <unsigned char *> (dst_string.text));
2516
2517 /* Verify ranges of individual characters. This no longer includes the
2518 opening quote, but does include the closing quote.
2519 '01234'. */
2520 for (int i = 0; i <= 4; i++)
2521 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2522 /* U+2174. */
2523 for (int i = 5; i <= 7; i++)
2524 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2525 /* U+2175. */
2526 for (int i = 8; i <= 10; i++)
2527 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2528 /* '789' and nul terminator */
2529 for (int i = 11; i <= 14; i++)
2530 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2531
2532 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2533 }
2534
2535 /* Lex a string literal containing UCN 8 characters.
2536 Verify the substring location data after running cpp_interpret_string
2537 on it. */
2538
2539 static void
2540 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2541 {
2542 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2543 ....................000000000.111111.1111222222.2222333333333.344444
2544 ....................123456789.012345.6789012345.6789012345678.901234 */
2545 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2546 lexer_test test (case_, content, NULL);
2547
2548 /* Verify that we get the expected token back, with the correct
2549 location information. */
2550 const cpp_token *tok = test.get_token ();
2551 ASSERT_EQ (tok->type, CPP_STRING);
2552 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2553 "\"01234\\U00002174\\U00002175789\"");
2554
2555 /* Verify that cpp_interpret_string works.
2556 The UTF-8 encoding of the string is identical to that from
2557 the ucn4 testcase above; the only difference is the column
2558 locations. */
2559 cpp_string dst_string;
2560 const enum cpp_ttype type = CPP_STRING;
2561 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2562 &dst_string, type);
2563 ASSERT_TRUE (result);
2564 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2565 (const char *)dst_string.text);
2566 free (const_cast <unsigned char *> (dst_string.text));
2567
2568 /* Verify ranges of individual characters. This no longer includes the
2569 opening quote, but does include the closing quote.
2570 '01234'. */
2571 for (int i = 0; i <= 4; i++)
2572 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2573 /* U+2174. */
2574 for (int i = 5; i <= 7; i++)
2575 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2576 /* U+2175. */
2577 for (int i = 8; i <= 10; i++)
2578 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2579 /* '789' at columns 35-37 */
2580 for (int i = 11; i <= 13; i++)
2581 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2582 /* Closing quote/nul-terminator at column 38. */
2583 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2584
2585 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2586 }
2587
2588 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2589
2590 static uint32_t
2591 uint32_from_big_endian (const uint32_t *ptr_be_value)
2592 {
2593 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2594 return (((uint32_t) buf[0] << 24)
2595 | ((uint32_t) buf[1] << 16)
2596 | ((uint32_t) buf[2] << 8)
2597 | (uint32_t) buf[3]);
2598 }
2599
2600 /* Lex a wide string literal and verify that attempts to read substring
2601 location data from it fail gracefully. */
2602
2603 static void
2604 test_lexer_string_locations_wide_string (const line_table_case &case_)
2605 {
2606 /* Digits 0-9.
2607 ....................000000000.11111111112.22222222233333
2608 ....................123456789.01234567890.12345678901234 */
2609 const char *content = " L\"0123456789\" /* non-str */\n";
2610 lexer_test test (case_, content, NULL);
2611
2612 /* Verify that we get the expected token back, with the correct
2613 location information. */
2614 const cpp_token *tok = test.get_token ();
2615 ASSERT_EQ (tok->type, CPP_WSTRING);
2616 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2617
2618 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2619 cpp_string dst_string;
2620 const enum cpp_ttype type = CPP_WSTRING;
2621 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2622 &dst_string, type);
2623 ASSERT_TRUE (result);
2624 /* The cpp_reader defaults to big-endian with
2625 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2626 now be encoded as UTF-32BE. */
2627 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2628 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2629 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2630 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2631 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2632 free (const_cast <unsigned char *> (dst_string.text));
2633
2634 /* We don't yet support generating substring location information
2635 for L"" strings. */
2636 ASSERT_HAS_NO_SUBSTRING_RANGES
2637 (test, tok->src_loc, type,
2638 "execution character set != source character set");
2639 }
2640
2641 /* Fetch a big-endian 16-bit value and convert to host endianness. */
2642
2643 static uint16_t
2644 uint16_from_big_endian (const uint16_t *ptr_be_value)
2645 {
2646 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2647 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2648 }
2649
2650 /* Lex a u"" string literal and verify that attempts to read substring
2651 location data from it fail gracefully. */
2652
2653 static void
2654 test_lexer_string_locations_string16 (const line_table_case &case_)
2655 {
2656 /* Digits 0-9.
2657 ....................000000000.11111111112.22222222233333
2658 ....................123456789.01234567890.12345678901234 */
2659 const char *content = " u\"0123456789\" /* non-str */\n";
2660 lexer_test test (case_, content, NULL);
2661
2662 /* Verify that we get the expected token back, with the correct
2663 location information. */
2664 const cpp_token *tok = test.get_token ();
2665 ASSERT_EQ (tok->type, CPP_STRING16);
2666 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2667
2668 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2669 cpp_string dst_string;
2670 const enum cpp_ttype type = CPP_STRING16;
2671 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2672 &dst_string, type);
2673 ASSERT_TRUE (result);
2674
2675 /* The cpp_reader defaults to big-endian, so dst_string should
2676 now be encoded as UTF-16BE. */
2677 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2678 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2679 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2680 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2681 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2682 free (const_cast <unsigned char *> (dst_string.text));
2683
2684 /* We don't yet support generating substring location information
2685 for L"" strings. */
2686 ASSERT_HAS_NO_SUBSTRING_RANGES
2687 (test, tok->src_loc, type,
2688 "execution character set != source character set");
2689 }
2690
2691 /* Lex a U"" string literal and verify that attempts to read substring
2692 location data from it fail gracefully. */
2693
2694 static void
2695 test_lexer_string_locations_string32 (const line_table_case &case_)
2696 {
2697 /* Digits 0-9.
2698 ....................000000000.11111111112.22222222233333
2699 ....................123456789.01234567890.12345678901234 */
2700 const char *content = " U\"0123456789\" /* non-str */\n";
2701 lexer_test test (case_, content, NULL);
2702
2703 /* Verify that we get the expected token back, with the correct
2704 location information. */
2705 const cpp_token *tok = test.get_token ();
2706 ASSERT_EQ (tok->type, CPP_STRING32);
2707 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2708
2709 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2710 cpp_string dst_string;
2711 const enum cpp_ttype type = CPP_STRING32;
2712 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2713 &dst_string, type);
2714 ASSERT_TRUE (result);
2715
2716 /* The cpp_reader defaults to big-endian, so dst_string should
2717 now be encoded as UTF-32BE. */
2718 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2719 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2720 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2721 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2722 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2723 free (const_cast <unsigned char *> (dst_string.text));
2724
2725 /* We don't yet support generating substring location information
2726 for L"" strings. */
2727 ASSERT_HAS_NO_SUBSTRING_RANGES
2728 (test, tok->src_loc, type,
2729 "execution character set != source character set");
2730 }
2731
2732 /* Lex a u8-string literal.
2733 Verify the substring location data after running cpp_interpret_string
2734 on it. */
2735
2736 static void
2737 test_lexer_string_locations_u8 (const line_table_case &case_)
2738 {
2739 /* Digits 0-9.
2740 ....................000000000.11111111112.22222222233333
2741 ....................123456789.01234567890.12345678901234 */
2742 const char *content = " u8\"0123456789\" /* non-str */\n";
2743 lexer_test test (case_, content, NULL);
2744
2745 /* Verify that we get the expected token back, with the correct
2746 location information. */
2747 const cpp_token *tok = test.get_token ();
2748 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2749 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2750
2751 /* Verify that cpp_interpret_string works. */
2752 cpp_string dst_string;
2753 const enum cpp_ttype type = CPP_STRING;
2754 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2755 &dst_string, type);
2756 ASSERT_TRUE (result);
2757 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2758 free (const_cast <unsigned char *> (dst_string.text));
2759
2760 /* Verify ranges of individual characters. This no longer includes the
2761 opening quote, but does include the closing quote. */
2762 for (int i = 0; i <= 10; i++)
2763 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2764 }
2765
2766 /* Lex a string literal containing UTF-8 source characters.
2767 Verify the substring location data after running cpp_interpret_string
2768 on it. */
2769
2770 static void
2771 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2772 {
2773 /* This string literal is written out to the source file as UTF-8,
2774 and is of the form "before mojibake after", where "mojibake"
2775 is written as the following four unicode code points:
2776 U+6587 CJK UNIFIED IDEOGRAPH-6587
2777 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2778 U+5316 CJK UNIFIED IDEOGRAPH-5316
2779 U+3051 HIRAGANA LETTER KE.
2780 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2781 "before" and "after" are 1 byte per unicode character.
2782
2783 The numbering shown are "columns", which are *byte* numbers within
2784 the line, rather than unicode character numbers.
2785
2786 .................... 000000000.1111111.
2787 .................... 123456789.0123456. */
2788 const char *content = (" \"before "
2789 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2790 UTF-8: 0xE6 0x96 0x87
2791 C octal escaped UTF-8: \346\226\207
2792 "column" numbers: 17-19. */
2793 "\346\226\207"
2794
2795 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2796 UTF-8: 0xE5 0xAD 0x97
2797 C octal escaped UTF-8: \345\255\227
2798 "column" numbers: 20-22. */
2799 "\345\255\227"
2800
2801 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2802 UTF-8: 0xE5 0x8C 0x96
2803 C octal escaped UTF-8: \345\214\226
2804 "column" numbers: 23-25. */
2805 "\345\214\226"
2806
2807 /* U+3051 HIRAGANA LETTER KE
2808 UTF-8: 0xE3 0x81 0x91
2809 C octal escaped UTF-8: \343\201\221
2810 "column" numbers: 26-28. */
2811 "\343\201\221"
2812
2813 /* column numbers 29 onwards
2814 2333333.33334444444444
2815 9012345.67890123456789. */
2816 " after\" /* non-str */\n");
2817 lexer_test test (case_, content, NULL);
2818
2819 /* Verify that we get the expected token back, with the correct
2820 location information. */
2821 const cpp_token *tok = test.get_token ();
2822 ASSERT_EQ (tok->type, CPP_STRING);
2823 ASSERT_TOKEN_AS_TEXT_EQ
2824 (test.m_parser, tok,
2825 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2826
2827 /* Verify that cpp_interpret_string works. */
2828 cpp_string dst_string;
2829 const enum cpp_ttype type = CPP_STRING;
2830 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2831 &dst_string, type);
2832 ASSERT_TRUE (result);
2833 ASSERT_STREQ
2834 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2835 (const char *)dst_string.text);
2836 free (const_cast <unsigned char *> (dst_string.text));
2837
2838 /* Verify ranges of individual characters. This no longer includes the
2839 opening quote, but does include the closing quote.
2840 Assuming that both source and execution encodings are UTF-8, we have
2841 a run of 25 octets in each, plus the NUL terminator. */
2842 for (int i = 0; i < 25; i++)
2843 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2844 /* NUL-terminator should use the closing quote at column 35. */
2845 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
2846
2847 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
2848 }
2849
2850 /* Test of string literal concatenation. */
2851
2852 static void
2853 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
2854 {
2855 /* Digits 0-9.
2856 .....................000000000.111111.11112222222222
2857 .....................123456789.012345.67890123456789. */
2858 const char *content = (" \"01234\" /* non-str */\n"
2859 " \"56789\" /* non-str */\n");
2860 lexer_test test (case_, content, NULL);
2861
2862 location_t input_locs[2];
2863
2864 /* Verify that we get the expected tokens back. */
2865 auto_vec <cpp_string> input_strings;
2866 const cpp_token *tok_a = test.get_token ();
2867 ASSERT_EQ (tok_a->type, CPP_STRING);
2868 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
2869 input_strings.safe_push (tok_a->val.str);
2870 input_locs[0] = tok_a->src_loc;
2871
2872 const cpp_token *tok_b = test.get_token ();
2873 ASSERT_EQ (tok_b->type, CPP_STRING);
2874 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
2875 input_strings.safe_push (tok_b->val.str);
2876 input_locs[1] = tok_b->src_loc;
2877
2878 /* Verify that cpp_interpret_string works. */
2879 cpp_string dst_string;
2880 const enum cpp_ttype type = CPP_STRING;
2881 bool result = cpp_interpret_string (test.m_parser,
2882 input_strings.address (), 2,
2883 &dst_string, type);
2884 ASSERT_TRUE (result);
2885 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2886 free (const_cast <unsigned char *> (dst_string.text));
2887
2888 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2889 test.m_concats.record_string_concatenation (2, input_locs);
2890
2891 location_t initial_loc = input_locs[0];
2892
2893 /* "01234" on line 1. */
2894 for (int i = 0; i <= 4; i++)
2895 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2896 /* "56789" in line 2, plus its closing quote for the nul terminator. */
2897 for (int i = 5; i <= 10; i++)
2898 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
2899
2900 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
2901 }
2902
2903 /* Another test of string literal concatenation. */
2904
2905 static void
2906 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
2907 {
2908 /* Digits 0-9.
2909 .....................000000000.111.11111112222222
2910 .....................123456789.012.34567890123456. */
2911 const char *content = (" \"01\" /* non-str */\n"
2912 " \"23\" /* non-str */\n"
2913 " \"45\" /* non-str */\n"
2914 " \"67\" /* non-str */\n"
2915 " \"89\" /* non-str */\n");
2916 lexer_test test (case_, content, NULL);
2917
2918 auto_vec <cpp_string> input_strings;
2919 location_t input_locs[5];
2920
2921 /* Verify that we get the expected tokens back. */
2922 for (int i = 0; i < 5; i++)
2923 {
2924 const cpp_token *tok = test.get_token ();
2925 ASSERT_EQ (tok->type, CPP_STRING);
2926 input_strings.safe_push (tok->val.str);
2927 input_locs[i] = tok->src_loc;
2928 }
2929
2930 /* Verify that cpp_interpret_string works. */
2931 cpp_string dst_string;
2932 const enum cpp_ttype type = CPP_STRING;
2933 bool result = cpp_interpret_string (test.m_parser,
2934 input_strings.address (), 5,
2935 &dst_string, type);
2936 ASSERT_TRUE (result);
2937 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2938 free (const_cast <unsigned char *> (dst_string.text));
2939
2940 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2941 test.m_concats.record_string_concatenation (5, input_locs);
2942
2943 location_t initial_loc = input_locs[0];
2944
2945 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
2946 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
2947 and expect get_source_range_for_substring to fail.
2948 However, for a string concatenation test, we can have a case
2949 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
2950 but subsequent strings can be after it.
2951 Attempting to detect this within assert_char_at_range
2952 would overcomplicate the logic for the common test cases, so
2953 we detect it here. */
2954 if (should_have_column_data_p (input_locs[0])
2955 && !should_have_column_data_p (input_locs[4]))
2956 {
2957 /* Verify that get_source_range_for_substring gracefully rejects
2958 this case. */
2959 source_range actual_range;
2960 const char *err
2961 = get_source_range_for_char (test.m_parser, &test.m_concats,
2962 initial_loc, type, 0, &actual_range);
2963 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
2964 return;
2965 }
2966
2967 for (int i = 0; i < 5; i++)
2968 for (int j = 0; j < 2; j++)
2969 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
2970 i + 1, 10 + j, 10 + j);
2971
2972 /* NUL-terminator should use the final closing quote at line 5 column 12. */
2973 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
2974
2975 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
2976 }
2977
2978 /* Another test of string literal concatenation, this time combined with
2979 various kinds of escaped characters. */
2980
2981 static void
2982 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
2983 {
2984 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
2985 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
2986 const char *content
2987 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
2988 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
2989 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
2990 lexer_test test (case_, content, NULL);
2991
2992 auto_vec <cpp_string> input_strings;
2993 location_t input_locs[4];
2994
2995 /* Verify that we get the expected tokens back. */
2996 for (int i = 0; i < 4; i++)
2997 {
2998 const cpp_token *tok = test.get_token ();
2999 ASSERT_EQ (tok->type, CPP_STRING);
3000 input_strings.safe_push (tok->val.str);
3001 input_locs[i] = tok->src_loc;
3002 }
3003
3004 /* Verify that cpp_interpret_string works. */
3005 cpp_string dst_string;
3006 const enum cpp_ttype type = CPP_STRING;
3007 bool result = cpp_interpret_string (test.m_parser,
3008 input_strings.address (), 4,
3009 &dst_string, type);
3010 ASSERT_TRUE (result);
3011 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3012 free (const_cast <unsigned char *> (dst_string.text));
3013
3014 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3015 test.m_concats.record_string_concatenation (4, input_locs);
3016
3017 location_t initial_loc = input_locs[0];
3018
3019 for (int i = 0; i <= 4; i++)
3020 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3021 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3022 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3023 for (int i = 7; i <= 9; i++)
3024 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3025
3026 /* NUL-terminator should use the location of the final closing quote. */
3027 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3028
3029 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3030 }
3031
3032 /* Test of string literal in a macro. */
3033
3034 static void
3035 test_lexer_string_locations_macro (const line_table_case &case_)
3036 {
3037 /* Digits 0-9.
3038 .....................0000000001111111111.22222222223.
3039 .....................1234567890123456789.01234567890. */
3040 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3041 " MACRO");
3042 lexer_test test (case_, content, NULL);
3043
3044 /* Verify that we get the expected tokens back. */
3045 const cpp_token *tok = test.get_token ();
3046 ASSERT_EQ (tok->type, CPP_PADDING);
3047
3048 tok = test.get_token ();
3049 ASSERT_EQ (tok->type, CPP_STRING);
3050 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3051
3052 /* Verify ranges of individual characters. We ought to
3053 see columns within the macro definition. */
3054 for (int i = 0; i <= 10; i++)
3055 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3056 i, 1, 20 + i, 20 + i);
3057
3058 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3059
3060 tok = test.get_token ();
3061 ASSERT_EQ (tok->type, CPP_PADDING);
3062 }
3063
3064 /* Test of stringification of a macro argument. */
3065
3066 static void
3067 test_lexer_string_locations_stringified_macro_argument
3068 (const line_table_case &case_)
3069 {
3070 /* .....................000000000111111111122222222223.
3071 .....................123456789012345678901234567890. */
3072 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3073 "MACRO(foo)\n");
3074 lexer_test test (case_, content, NULL);
3075
3076 /* Verify that we get the expected token back. */
3077 const cpp_token *tok = test.get_token ();
3078 ASSERT_EQ (tok->type, CPP_PADDING);
3079
3080 tok = test.get_token ();
3081 ASSERT_EQ (tok->type, CPP_STRING);
3082 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3083
3084 /* We don't support getting the location of a stringified macro
3085 argument. Verify that it fails gracefully. */
3086 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3087 "cpp_interpret_string_1 failed");
3088
3089 tok = test.get_token ();
3090 ASSERT_EQ (tok->type, CPP_PADDING);
3091
3092 tok = test.get_token ();
3093 ASSERT_EQ (tok->type, CPP_PADDING);
3094 }
3095
3096 /* Ensure that we are fail gracefully if something attempts to pass
3097 in a location that isn't a string literal token. Seen on this code:
3098
3099 const char a[] = " %d ";
3100 __builtin_printf (a, 0.5);
3101 ^
3102
3103 when c-format.c erroneously used the indicated one-character
3104 location as the format string location, leading to a read past the
3105 end of a string buffer in cpp_interpret_string_1. */
3106
3107 static void
3108 test_lexer_string_locations_non_string (const line_table_case &case_)
3109 {
3110 /* .....................000000000111111111122222222223.
3111 .....................123456789012345678901234567890. */
3112 const char *content = (" a\n");
3113 lexer_test test (case_, content, NULL);
3114
3115 /* Verify that we get the expected token back. */
3116 const cpp_token *tok = test.get_token ();
3117 ASSERT_EQ (tok->type, CPP_NAME);
3118 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3119
3120 /* At this point, libcpp is attempting to interpret the name as a
3121 string literal, despite it not starting with a quote. We don't detect
3122 that, but we should at least fail gracefully. */
3123 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3124 "cpp_interpret_string_1 failed");
3125 }
3126
3127 /* Ensure that we can read substring information for a token which
3128 starts in one linemap and ends in another . Adapted from
3129 gcc.dg/cpp/pr69985.c. */
3130
3131 static void
3132 test_lexer_string_locations_long_line (const line_table_case &case_)
3133 {
3134 /* .....................000000.000111111111
3135 .....................123456.789012346789. */
3136 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3137 " \"0123456789012345678901234567890123456789"
3138 "0123456789012345678901234567890123456789"
3139 "0123456789012345678901234567890123456789"
3140 "0123456789\"\n");
3141
3142 lexer_test test (case_, content, NULL);
3143
3144 /* Verify that we get the expected token back. */
3145 const cpp_token *tok = test.get_token ();
3146 ASSERT_EQ (tok->type, CPP_STRING);
3147
3148 if (!should_have_column_data_p (line_table->highest_location))
3149 return;
3150
3151 /* Verify ranges of individual characters. */
3152 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3153 for (int i = 0; i < 131; i++)
3154 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3155 i, 2, 7 + i, 7 + i);
3156 }
3157
3158 /* Test of lexing char constants. */
3159
3160 static void
3161 test_lexer_char_constants (const line_table_case &case_)
3162 {
3163 /* Various char constants.
3164 .....................0000000001111111111.22222222223.
3165 .....................1234567890123456789.01234567890. */
3166 const char *content = (" 'a'\n"
3167 " u'a'\n"
3168 " U'a'\n"
3169 " L'a'\n"
3170 " 'abc'\n");
3171 lexer_test test (case_, content, NULL);
3172
3173 /* Verify that we get the expected tokens back. */
3174 /* 'a'. */
3175 const cpp_token *tok = test.get_token ();
3176 ASSERT_EQ (tok->type, CPP_CHAR);
3177 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3178
3179 unsigned int chars_seen;
3180 int unsignedp;
3181 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3182 &chars_seen, &unsignedp);
3183 ASSERT_EQ (cc, 'a');
3184 ASSERT_EQ (chars_seen, 1);
3185
3186 /* u'a'. */
3187 tok = test.get_token ();
3188 ASSERT_EQ (tok->type, CPP_CHAR16);
3189 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3190
3191 /* U'a'. */
3192 tok = test.get_token ();
3193 ASSERT_EQ (tok->type, CPP_CHAR32);
3194 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3195
3196 /* L'a'. */
3197 tok = test.get_token ();
3198 ASSERT_EQ (tok->type, CPP_WCHAR);
3199 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3200
3201 /* 'abc' (c-char-sequence). */
3202 tok = test.get_token ();
3203 ASSERT_EQ (tok->type, CPP_CHAR);
3204 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3205 }
3206 /* A table of interesting location_t values, giving one axis of our test
3207 matrix. */
3208
3209 static const location_t boundary_locations[] = {
3210 /* Zero means "don't override the default values for a new line_table". */
3211 0,
3212
3213 /* An arbitrary non-zero value that isn't close to one of
3214 the boundary values below. */
3215 0x10000,
3216
3217 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3218 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3219 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3220 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3221 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3222 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3223
3224 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3225 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3226 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3227 LINE_MAP_MAX_LOCATION_WITH_COLS,
3228 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3229 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3230 };
3231
3232 /* Run TESTCASE multiple times, once for each case in our test matrix. */
3233
3234 void
3235 for_each_line_table_case (void (*testcase) (const line_table_case &))
3236 {
3237 /* As noted above in the description of struct line_table_case,
3238 we want to explore a test matrix of interesting line_table
3239 situations, running various selftests for each case within the
3240 matrix. */
3241
3242 /* Run all tests with:
3243 (a) line_table->default_range_bits == 0, and
3244 (b) line_table->default_range_bits == 5. */
3245 int num_cases_tested = 0;
3246 for (int default_range_bits = 0; default_range_bits <= 5;
3247 default_range_bits += 5)
3248 {
3249 /* ...and use each of the "interesting" location values as
3250 the starting location within line_table. */
3251 const int num_boundary_locations
3252 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3253 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3254 {
3255 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3256
3257 testcase (c);
3258
3259 num_cases_tested++;
3260 }
3261 }
3262
3263 /* Verify that we fully covered the test matrix. */
3264 ASSERT_EQ (num_cases_tested, 2 * 12);
3265 }
3266
3267 /* Run all of the selftests within this file. */
3268
3269 void
3270 input_c_tests ()
3271 {
3272 test_should_have_column_data_p ();
3273 test_unknown_location ();
3274 test_builtins ();
3275 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3276
3277 for_each_line_table_case (test_accessing_ordinary_linemaps);
3278 for_each_line_table_case (test_lexer);
3279 for_each_line_table_case (test_lexer_string_locations_simple);
3280 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3281 for_each_line_table_case (test_lexer_string_locations_hex);
3282 for_each_line_table_case (test_lexer_string_locations_oct);
3283 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3284 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3285 for_each_line_table_case (test_lexer_string_locations_ucn4);
3286 for_each_line_table_case (test_lexer_string_locations_ucn8);
3287 for_each_line_table_case (test_lexer_string_locations_wide_string);
3288 for_each_line_table_case (test_lexer_string_locations_string16);
3289 for_each_line_table_case (test_lexer_string_locations_string32);
3290 for_each_line_table_case (test_lexer_string_locations_u8);
3291 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3292 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3293 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3294 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3295 for_each_line_table_case (test_lexer_string_locations_macro);
3296 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3297 for_each_line_table_case (test_lexer_string_locations_non_string);
3298 for_each_line_table_case (test_lexer_string_locations_long_line);
3299 for_each_line_table_case (test_lexer_char_constants);
3300
3301 test_reading_source_line ();
3302 }
3303
3304 } // namespace selftest
3305
3306 #endif /* CHECKING_P */