]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/input.c
On-demand locations within string-literals
[thirdparty/gcc.git] / gcc / input.c
1 /* Data and functions related to line maps and input files.
2 Copyright (C) 2004-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "intl.h"
24 #include "diagnostic-core.h"
25 #include "selftest.h"
26 #include "cpplib.h"
27
28 /* This is a cache used by get_next_line to store the content of a
29 file to be searched for file lines. */
30 struct fcache
31 {
32 /* These are information used to store a line boundary. */
33 struct line_info
34 {
35 /* The line number. It starts from 1. */
36 size_t line_num;
37
38 /* The position (byte count) of the beginning of the line,
39 relative to the file data pointer. This starts at zero. */
40 size_t start_pos;
41
42 /* The position (byte count) of the last byte of the line. This
43 normally points to the '\n' character, or to one byte after the
44 last byte of the file, if the file doesn't contain a '\n'
45 character. */
46 size_t end_pos;
47
48 line_info (size_t l, size_t s, size_t e)
49 : line_num (l), start_pos (s), end_pos (e)
50 {}
51
52 line_info ()
53 :line_num (0), start_pos (0), end_pos (0)
54 {}
55 };
56
57 /* The number of time this file has been accessed. This is used
58 to designate which file cache to evict from the cache
59 array. */
60 unsigned use_count;
61
62 const char *file_path;
63
64 FILE *fp;
65
66 /* This points to the content of the file that we've read so
67 far. */
68 char *data;
69
70 /* The size of the DATA array above.*/
71 size_t size;
72
73 /* The number of bytes read from the underlying file so far. This
74 must be less (or equal) than SIZE above. */
75 size_t nb_read;
76
77 /* The index of the beginning of the current line. */
78 size_t line_start_idx;
79
80 /* The number of the previous line read. This starts at 1. Zero
81 means we've read no line so far. */
82 size_t line_num;
83
84 /* This is the total number of lines of the current file. At the
85 moment, we try to get this information from the line map
86 subsystem. Note that this is just a hint. When using the C++
87 front-end, this hint is correct because the input file is then
88 completely tokenized before parsing starts; so the line map knows
89 the number of lines before compilation really starts. For e.g,
90 the C front-end, it can happen that we start emitting diagnostics
91 before the line map has seen the end of the file. */
92 size_t total_lines;
93
94 /* This is a record of the beginning and end of the lines we've seen
95 while reading the file. This is useful to avoid walking the data
96 from the beginning when we are asked to read a line that is
97 before LINE_START_IDX above. Note that the maximum size of this
98 record is fcache_line_record_size, so that the memory consumption
99 doesn't explode. We thus scale total_lines down to
100 fcache_line_record_size. */
101 vec<line_info, va_heap> line_record;
102
103 fcache ();
104 ~fcache ();
105 };
106
107 /* Current position in real source file. */
108
109 location_t input_location = UNKNOWN_LOCATION;
110
111 struct line_maps *line_table;
112
113 static fcache *fcache_tab;
114 static const size_t fcache_tab_size = 16;
115 static const size_t fcache_buffer_size = 4 * 1024;
116 static const size_t fcache_line_record_size = 100;
117
118 /* Expand the source location LOC into a human readable location. If
119 LOC resolves to a builtin location, the file name of the readable
120 location is set to the string "<built-in>". If EXPANSION_POINT_P is
121 TRUE and LOC is virtual, then it is resolved to the expansion
122 point of the involved macro. Otherwise, it is resolved to the
123 spelling location of the token.
124
125 When resolving to the spelling location of the token, if the
126 resulting location is for a built-in location (that is, it has no
127 associated line/column) in the context of a macro expansion, the
128 returned location is the first one (while unwinding the macro
129 location towards its expansion point) that is in real source
130 code. */
131
132 static expanded_location
133 expand_location_1 (source_location loc,
134 bool expansion_point_p)
135 {
136 expanded_location xloc;
137 const line_map_ordinary *map;
138 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
139 tree block = NULL;
140
141 if (IS_ADHOC_LOC (loc))
142 {
143 block = LOCATION_BLOCK (loc);
144 loc = LOCATION_LOCUS (loc);
145 }
146
147 memset (&xloc, 0, sizeof (xloc));
148
149 if (loc >= RESERVED_LOCATION_COUNT)
150 {
151 if (!expansion_point_p)
152 {
153 /* We want to resolve LOC to its spelling location.
154
155 But if that spelling location is a reserved location that
156 appears in the context of a macro expansion (like for a
157 location for a built-in token), let's consider the first
158 location (toward the expansion point) that is not reserved;
159 that is, the first location that is in real source code. */
160 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
161 loc, NULL);
162 lrk = LRK_SPELLING_LOCATION;
163 }
164 loc = linemap_resolve_location (line_table, loc,
165 lrk, &map);
166 xloc = linemap_expand_location (line_table, map, loc);
167 }
168
169 xloc.data = block;
170 if (loc <= BUILTINS_LOCATION)
171 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
172
173 return xloc;
174 }
175
176 /* Initialize the set of cache used for files accessed by caret
177 diagnostic. */
178
179 static void
180 diagnostic_file_cache_init (void)
181 {
182 if (fcache_tab == NULL)
183 fcache_tab = new fcache[fcache_tab_size];
184 }
185
186 /* Free the resources used by the set of cache used for files accessed
187 by caret diagnostic. */
188
189 void
190 diagnostic_file_cache_fini (void)
191 {
192 if (fcache_tab)
193 {
194 delete [] (fcache_tab);
195 fcache_tab = NULL;
196 }
197 }
198
199 /* Return the total lines number that have been read so far by the
200 line map (in the preprocessor) so far. For languages like C++ that
201 entirely preprocess the input file before starting to parse, this
202 equals the actual number of lines of the file. */
203
204 static size_t
205 total_lines_num (const char *file_path)
206 {
207 size_t r = 0;
208 source_location l = 0;
209 if (linemap_get_file_highest_location (line_table, file_path, &l))
210 {
211 gcc_assert (l >= RESERVED_LOCATION_COUNT);
212 expanded_location xloc = expand_location (l);
213 r = xloc.line;
214 }
215 return r;
216 }
217
218 /* Lookup the cache used for the content of a given file accessed by
219 caret diagnostic. Return the found cached file, or NULL if no
220 cached file was found. */
221
222 static fcache*
223 lookup_file_in_cache_tab (const char *file_path)
224 {
225 if (file_path == NULL)
226 return NULL;
227
228 diagnostic_file_cache_init ();
229
230 /* This will contain the found cached file. */
231 fcache *r = NULL;
232 for (unsigned i = 0; i < fcache_tab_size; ++i)
233 {
234 fcache *c = &fcache_tab[i];
235 if (c->file_path && !strcmp (c->file_path, file_path))
236 {
237 ++c->use_count;
238 r = c;
239 }
240 }
241
242 if (r)
243 ++r->use_count;
244
245 return r;
246 }
247
248 /* Return the file cache that has been less used, recently, or the
249 first empty one. If HIGHEST_USE_COUNT is non-null,
250 *HIGHEST_USE_COUNT is set to the highest use count of the entries
251 in the cache table. */
252
253 static fcache*
254 evicted_cache_tab_entry (unsigned *highest_use_count)
255 {
256 diagnostic_file_cache_init ();
257
258 fcache *to_evict = &fcache_tab[0];
259 unsigned huc = to_evict->use_count;
260 for (unsigned i = 1; i < fcache_tab_size; ++i)
261 {
262 fcache *c = &fcache_tab[i];
263 bool c_is_empty = (c->file_path == NULL);
264
265 if (c->use_count < to_evict->use_count
266 || (to_evict->file_path && c_is_empty))
267 /* We evict C because it's either an entry with a lower use
268 count or one that is empty. */
269 to_evict = c;
270
271 if (huc < c->use_count)
272 huc = c->use_count;
273
274 if (c_is_empty)
275 /* We've reached the end of the cache; subsequent elements are
276 all empty. */
277 break;
278 }
279
280 if (highest_use_count)
281 *highest_use_count = huc;
282
283 return to_evict;
284 }
285
286 /* Create the cache used for the content of a given file to be
287 accessed by caret diagnostic. This cache is added to an array of
288 cache and can be retrieved by lookup_file_in_cache_tab. This
289 function returns the created cache. Note that only the last
290 fcache_tab_size files are cached. */
291
292 static fcache*
293 add_file_to_cache_tab (const char *file_path)
294 {
295
296 FILE *fp = fopen (file_path, "r");
297 if (fp == NULL)
298 return NULL;
299
300 unsigned highest_use_count = 0;
301 fcache *r = evicted_cache_tab_entry (&highest_use_count);
302 r->file_path = file_path;
303 if (r->fp)
304 fclose (r->fp);
305 r->fp = fp;
306 r->nb_read = 0;
307 r->line_start_idx = 0;
308 r->line_num = 0;
309 r->line_record.truncate (0);
310 /* Ensure that this cache entry doesn't get evicted next time
311 add_file_to_cache_tab is called. */
312 r->use_count = ++highest_use_count;
313 r->total_lines = total_lines_num (file_path);
314
315 return r;
316 }
317
318 /* Lookup the cache used for the content of a given file accessed by
319 caret diagnostic. If no cached file was found, create a new cache
320 for this file, add it to the array of cached file and return
321 it. */
322
323 static fcache*
324 lookup_or_add_file_to_cache_tab (const char *file_path)
325 {
326 fcache *r = lookup_file_in_cache_tab (file_path);
327 if (r == NULL)
328 r = add_file_to_cache_tab (file_path);
329 return r;
330 }
331
332 /* Default constructor for a cache of file used by caret
333 diagnostic. */
334
335 fcache::fcache ()
336 : use_count (0), file_path (NULL), fp (NULL), data (0),
337 size (0), nb_read (0), line_start_idx (0), line_num (0),
338 total_lines (0)
339 {
340 line_record.create (0);
341 }
342
343 /* Destructor for a cache of file used by caret diagnostic. */
344
345 fcache::~fcache ()
346 {
347 if (fp)
348 {
349 fclose (fp);
350 fp = NULL;
351 }
352 if (data)
353 {
354 XDELETEVEC (data);
355 data = 0;
356 }
357 line_record.release ();
358 }
359
360 /* Returns TRUE iff the cache would need to be filled with data coming
361 from the file. That is, either the cache is empty or full or the
362 current line is empty. Note that if the cache is full, it would
363 need to be extended and filled again. */
364
365 static bool
366 needs_read (fcache *c)
367 {
368 return (c->nb_read == 0
369 || c->nb_read == c->size
370 || (c->line_start_idx >= c->nb_read - 1));
371 }
372
373 /* Return TRUE iff the cache is full and thus needs to be
374 extended. */
375
376 static bool
377 needs_grow (fcache *c)
378 {
379 return c->nb_read == c->size;
380 }
381
382 /* Grow the cache if it needs to be extended. */
383
384 static void
385 maybe_grow (fcache *c)
386 {
387 if (!needs_grow (c))
388 return;
389
390 size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
391 c->data = XRESIZEVEC (char, c->data, size + 1);
392 c->size = size;
393 }
394
395 /* Read more data into the cache. Extends the cache if need be.
396 Returns TRUE iff new data could be read. */
397
398 static bool
399 read_data (fcache *c)
400 {
401 if (feof (c->fp) || ferror (c->fp))
402 return false;
403
404 maybe_grow (c);
405
406 char * from = c->data + c->nb_read;
407 size_t to_read = c->size - c->nb_read;
408 size_t nb_read = fread (from, 1, to_read, c->fp);
409
410 if (ferror (c->fp))
411 return false;
412
413 c->nb_read += nb_read;
414 return !!nb_read;
415 }
416
417 /* Read new data iff the cache needs to be filled with more data
418 coming from the file FP. Return TRUE iff the cache was filled with
419 mode data. */
420
421 static bool
422 maybe_read_data (fcache *c)
423 {
424 if (!needs_read (c))
425 return false;
426 return read_data (c);
427 }
428
429 /* Read a new line from file FP, using C as a cache for the data
430 coming from the file. Upon successful completion, *LINE is set to
431 the beginning of the line found. Space for that line has been
432 allocated in the cache thus *LINE has the same life time as C.
433 *LINE_LEN is set to the length of the line. Note that the line
434 does not contain any terminal delimiter. This function returns
435 true if some data was read or process from the cache, false
436 otherwise. Note that subsequent calls to get_next_line return the
437 next lines of the file and might overwrite the content of
438 *LINE. */
439
440 static bool
441 get_next_line (fcache *c, char **line, ssize_t *line_len)
442 {
443 /* Fill the cache with data to process. */
444 maybe_read_data (c);
445
446 size_t remaining_size = c->nb_read - c->line_start_idx;
447 if (remaining_size == 0)
448 /* There is no more data to process. */
449 return false;
450
451 char *line_start = c->data + c->line_start_idx;
452
453 char *next_line_start = NULL;
454 size_t len = 0;
455 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
456 if (line_end == NULL)
457 {
458 /* We haven't found the end-of-line delimiter in the cache.
459 Fill the cache with more data from the file and look for the
460 '\n'. */
461 while (maybe_read_data (c))
462 {
463 line_start = c->data + c->line_start_idx;
464 remaining_size = c->nb_read - c->line_start_idx;
465 line_end = (char *) memchr (line_start, '\n', remaining_size);
466 if (line_end != NULL)
467 {
468 next_line_start = line_end + 1;
469 break;
470 }
471 }
472 if (line_end == NULL)
473 /* We've loadded all the file into the cache and still no
474 '\n'. Let's say the line ends up at one byte passed the
475 end of the file. This is to stay consistent with the case
476 of when the line ends up with a '\n' and line_end points to
477 that terminal '\n'. That consistency is useful below in
478 the len calculation. */
479 line_end = c->data + c->nb_read ;
480 }
481 else
482 next_line_start = line_end + 1;
483
484 if (ferror (c->fp))
485 return -1;
486
487 /* At this point, we've found the end of the of line. It either
488 points to the '\n' or to one byte after the last byte of the
489 file. */
490 gcc_assert (line_end != NULL);
491
492 len = line_end - line_start;
493
494 if (c->line_start_idx < c->nb_read)
495 *line = line_start;
496
497 ++c->line_num;
498
499 /* Before we update our line record, make sure the hint about the
500 total number of lines of the file is correct. If it's not, then
501 we give up recording line boundaries from now on. */
502 bool update_line_record = true;
503 if (c->line_num > c->total_lines)
504 update_line_record = false;
505
506 /* Now update our line record so that re-reading lines from the
507 before c->line_start_idx is faster. */
508 if (update_line_record
509 && c->line_record.length () < fcache_line_record_size)
510 {
511 /* If the file lines fits in the line record, we just record all
512 its lines ...*/
513 if (c->total_lines <= fcache_line_record_size
514 && c->line_num > c->line_record.length ())
515 c->line_record.safe_push (fcache::line_info (c->line_num,
516 c->line_start_idx,
517 line_end - c->data));
518 else if (c->total_lines > fcache_line_record_size)
519 {
520 /* ... otherwise, we just scale total_lines down to
521 (fcache_line_record_size lines. */
522 size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
523 if (c->line_record.length () == 0
524 || n >= c->line_record.length ())
525 c->line_record.safe_push (fcache::line_info (c->line_num,
526 c->line_start_idx,
527 line_end - c->data));
528 }
529 }
530
531 /* Update c->line_start_idx so that it points to the next line to be
532 read. */
533 if (next_line_start)
534 c->line_start_idx = next_line_start - c->data;
535 else
536 /* We didn't find any terminal '\n'. Let's consider that the end
537 of line is the end of the data in the cache. The next
538 invocation of get_next_line will either read more data from the
539 underlying file or return false early because we've reached the
540 end of the file. */
541 c->line_start_idx = c->nb_read;
542
543 *line_len = len;
544
545 return true;
546 }
547
548 /* Reads the next line from FILE into *LINE. If *LINE is too small
549 (or NULL) it is allocated (or extended) to have enough space to
550 containe the line. *LINE_LENGTH must contain the size of the
551 initial*LINE buffer. It's then updated by this function to the
552 actual length of the returned line. Note that the returned line
553 can contain several zero bytes. Also note that the returned string
554 is allocated in static storage that is going to be re-used by
555 subsequent invocations of read_line. */
556
557 static bool
558 read_next_line (fcache *cache, char ** line, ssize_t *line_len)
559 {
560 char *l = NULL;
561 ssize_t len = 0;
562
563 if (!get_next_line (cache, &l, &len))
564 return false;
565
566 if (*line == NULL)
567 *line = XNEWVEC (char, len);
568 else
569 if (*line_len < len)
570 *line = XRESIZEVEC (char, *line, len);
571
572 memcpy (*line, l, len);
573 *line_len = len;
574
575 return true;
576 }
577
578 /* Consume the next bytes coming from the cache (or from its
579 underlying file if there are remaining unread bytes in the file)
580 until we reach the next end-of-line (or end-of-file). There is no
581 copying from the cache involved. Return TRUE upon successful
582 completion. */
583
584 static bool
585 goto_next_line (fcache *cache)
586 {
587 char *l;
588 ssize_t len;
589
590 return get_next_line (cache, &l, &len);
591 }
592
593 /* Read an arbitrary line number LINE_NUM from the file cached in C.
594 The line is copied into *LINE. *LINE_LEN must have been set to the
595 length of *LINE. If *LINE is too small (or NULL) it's extended (or
596 allocated) and *LINE_LEN is adjusted accordingly. *LINE ends up
597 with a terminal zero byte and can contain additional zero bytes.
598 This function returns bool if a line was read. */
599
600 static bool
601 read_line_num (fcache *c, size_t line_num,
602 char ** line, ssize_t *line_len)
603 {
604 gcc_assert (line_num > 0);
605
606 if (line_num <= c->line_num)
607 {
608 /* We've been asked to read lines that are before c->line_num.
609 So lets use our line record (if it's not empty) to try to
610 avoid re-reading the file from the beginning again. */
611
612 if (c->line_record.is_empty ())
613 {
614 c->line_start_idx = 0;
615 c->line_num = 0;
616 }
617 else
618 {
619 fcache::line_info *i = NULL;
620 if (c->total_lines <= fcache_line_record_size)
621 {
622 /* In languages where the input file is not totally
623 preprocessed up front, the c->total_lines hint
624 can be smaller than the number of lines of the
625 file. In that case, only the first
626 c->total_lines have been recorded.
627
628 Otherwise, the first c->total_lines we've read have
629 their start/end recorded here. */
630 i = (line_num <= c->total_lines)
631 ? &c->line_record[line_num - 1]
632 : &c->line_record[c->total_lines - 1];
633 gcc_assert (i->line_num <= line_num);
634 }
635 else
636 {
637 /* So the file had more lines than our line record
638 size. Thus the number of lines we've recorded has
639 been scaled down to fcache_line_reacord_size. Let's
640 pick the start/end of the recorded line that is
641 closest to line_num. */
642 size_t n = (line_num <= c->total_lines)
643 ? line_num * fcache_line_record_size / c->total_lines
644 : c ->line_record.length () - 1;
645 if (n < c->line_record.length ())
646 {
647 i = &c->line_record[n];
648 gcc_assert (i->line_num <= line_num);
649 }
650 }
651
652 if (i && i->line_num == line_num)
653 {
654 /* We have the start/end of the line. Let's just copy
655 it again and we are done. */
656 ssize_t len = i->end_pos - i->start_pos + 1;
657 if (*line_len < len)
658 *line = XRESIZEVEC (char, *line, len);
659 memmove (*line, c->data + i->start_pos, len);
660 (*line)[len - 1] = '\0';
661 *line_len = --len;
662 return true;
663 }
664
665 if (i)
666 {
667 c->line_start_idx = i->start_pos;
668 c->line_num = i->line_num - 1;
669 }
670 else
671 {
672 c->line_start_idx = 0;
673 c->line_num = 0;
674 }
675 }
676 }
677
678 /* Let's walk from line c->line_num up to line_num - 1, without
679 copying any line. */
680 while (c->line_num < line_num - 1)
681 if (!goto_next_line (c))
682 return false;
683
684 /* The line we want is the next one. Let's read and copy it back to
685 the caller. */
686 return read_next_line (c, line, line_len);
687 }
688
689 /* Return the physical source line that corresponds to FILE_PATH/LINE in a
690 buffer that is statically allocated. The newline is replaced by
691 the null character. Note that the line can contain several null
692 characters, so LINE_LEN, if non-null, points to the actual length
693 of the line. */
694
695 const char *
696 location_get_source_line (const char *file_path, int line,
697 int *line_len)
698 {
699 static char *buffer;
700 static ssize_t len;
701
702 if (line == 0)
703 return NULL;
704
705 fcache *c = lookup_or_add_file_to_cache_tab (file_path);
706 if (c == NULL)
707 return NULL;
708
709 bool read = read_line_num (c, line, &buffer, &len);
710
711 if (read && line_len)
712 *line_len = len;
713
714 return read ? buffer : NULL;
715 }
716
717 /* Test if the location originates from the spelling location of a
718 builtin-tokens. That is, return TRUE if LOC is a (possibly
719 virtual) location of a built-in token that appears in the expansion
720 list of a macro. Please note that this function also works on
721 tokens that result from built-in tokens. For instance, the
722 function would return true if passed a token "4" that is the result
723 of the expansion of the built-in __LINE__ macro. */
724 bool
725 is_location_from_builtin_token (source_location loc)
726 {
727 const line_map_ordinary *map = NULL;
728 loc = linemap_resolve_location (line_table, loc,
729 LRK_SPELLING_LOCATION, &map);
730 return loc == BUILTINS_LOCATION;
731 }
732
733 /* Expand the source location LOC into a human readable location. If
734 LOC is virtual, it resolves to the expansion point of the involved
735 macro. If LOC resolves to a builtin location, the file name of the
736 readable location is set to the string "<built-in>". */
737
738 expanded_location
739 expand_location (source_location loc)
740 {
741 return expand_location_1 (loc, /*expansion_point_p=*/true);
742 }
743
744 /* Expand the source location LOC into a human readable location. If
745 LOC is virtual, it resolves to the expansion location of the
746 relevant macro. If LOC resolves to a builtin location, the file
747 name of the readable location is set to the string
748 "<built-in>". */
749
750 expanded_location
751 expand_location_to_spelling_point (source_location loc)
752 {
753 return expand_location_1 (loc, /*expansion_point_p=*/false);
754 }
755
756 /* The rich_location class within libcpp requires a way to expand
757 source_location instances, and relies on the client code
758 providing a symbol named
759 linemap_client_expand_location_to_spelling_point
760 to do this.
761
762 This is the implementation for libcommon.a (all host binaries),
763 which simply calls into expand_location_to_spelling_point. */
764
765 expanded_location
766 linemap_client_expand_location_to_spelling_point (source_location loc)
767 {
768 return expand_location_to_spelling_point (loc);
769 }
770
771
772 /* If LOCATION is in a system header and if it is a virtual location for
773 a token coming from the expansion of a macro, unwind it to the
774 location of the expansion point of the macro. Otherwise, just return
775 LOCATION.
776
777 This is used for instance when we want to emit diagnostics about a
778 token that may be located in a macro that is itself defined in a
779 system header, for example, for the NULL macro. In such a case, if
780 LOCATION were passed directly to diagnostic functions such as
781 warning_at, the diagnostic would be suppressed (unless
782 -Wsystem-headers). */
783
784 source_location
785 expansion_point_location_if_in_system_header (source_location location)
786 {
787 if (in_system_header_at (location))
788 location = linemap_resolve_location (line_table, location,
789 LRK_MACRO_EXPANSION_POINT,
790 NULL);
791 return location;
792 }
793
794 /* If LOCATION is a virtual location for a token coming from the expansion
795 of a macro, unwind to the location of the expansion point of the macro. */
796
797 source_location
798 expansion_point_location (source_location location)
799 {
800 return linemap_resolve_location (line_table, location,
801 LRK_MACRO_EXPANSION_POINT, NULL);
802 }
803
804 /* Given location LOC, strip away any packed range information
805 or ad-hoc information. */
806
807 location_t
808 get_pure_location (location_t loc)
809 {
810 if (IS_ADHOC_LOC (loc))
811 loc
812 = line_table->location_adhoc_data_map.data[loc & MAX_SOURCE_LOCATION].locus;
813
814 if (loc >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
815 return loc;
816
817 if (loc < RESERVED_LOCATION_COUNT)
818 return loc;
819
820 const line_map *map = linemap_lookup (line_table, loc);
821 const line_map_ordinary *ordmap = linemap_check_ordinary (map);
822
823 return loc & ~((1 << ordmap->m_range_bits) - 1);
824 }
825
826 /* Construct a location with caret at CARET, ranging from START to
827 finish e.g.
828
829 11111111112
830 12345678901234567890
831 522
832 523 return foo + bar;
833 ~~~~^~~~~
834 524
835
836 The location's caret is at the "+", line 523 column 15, but starts
837 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
838 of "bar" at column 19. */
839
840 location_t
841 make_location (location_t caret, location_t start, location_t finish)
842 {
843 location_t pure_loc = get_pure_location (caret);
844 source_range src_range;
845 src_range.m_start = start;
846 src_range.m_finish = finish;
847 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
848 pure_loc,
849 src_range,
850 NULL);
851 return combined_loc;
852 }
853
854 #define ONE_K 1024
855 #define ONE_M (ONE_K * ONE_K)
856
857 /* Display a number as an integer multiple of either:
858 - 1024, if said integer is >= to 10 K (in base 2)
859 - 1024 * 1024, if said integer is >= 10 M in (base 2)
860 */
861 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
862 ? (x) \
863 : ((x) < 10 * ONE_M \
864 ? (x) / ONE_K \
865 : (x) / ONE_M)))
866
867 /* For a given integer, display either:
868 - the character 'k', if the number is higher than 10 K (in base 2)
869 but strictly lower than 10 M (in base 2)
870 - the character 'M' if the number is higher than 10 M (in base2)
871 - the charcter ' ' if the number is strictly lower than 10 K */
872 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
873
874 /* Display an integer amount as multiple of 1K or 1M (in base 2).
875 Display the correct unit (either k, M, or ' ') after the amout, as
876 well. */
877 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
878
879 /* Dump statistics to stderr about the memory usage of the line_table
880 set of line maps. This also displays some statistics about macro
881 expansion. */
882
883 void
884 dump_line_table_statistics (void)
885 {
886 struct linemap_stats s;
887 long total_used_map_size,
888 macro_maps_size,
889 total_allocated_map_size;
890
891 memset (&s, 0, sizeof (s));
892
893 linemap_get_statistics (line_table, &s);
894
895 macro_maps_size = s.macro_maps_used_size
896 + s.macro_maps_locations_size;
897
898 total_allocated_map_size = s.ordinary_maps_allocated_size
899 + s.macro_maps_allocated_size
900 + s.macro_maps_locations_size;
901
902 total_used_map_size = s.ordinary_maps_used_size
903 + s.macro_maps_used_size
904 + s.macro_maps_locations_size;
905
906 fprintf (stderr, "Number of expanded macros: %5ld\n",
907 s.num_expanded_macros);
908 if (s.num_expanded_macros != 0)
909 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
910 s.num_macro_tokens / s.num_expanded_macros);
911 fprintf (stderr,
912 "\nLine Table allocations during the "
913 "compilation process\n");
914 fprintf (stderr, "Number of ordinary maps used: %5ld%c\n",
915 SCALE (s.num_ordinary_maps_used),
916 STAT_LABEL (s.num_ordinary_maps_used));
917 fprintf (stderr, "Ordinary map used size: %5ld%c\n",
918 SCALE (s.ordinary_maps_used_size),
919 STAT_LABEL (s.ordinary_maps_used_size));
920 fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n",
921 SCALE (s.num_ordinary_maps_allocated),
922 STAT_LABEL (s.num_ordinary_maps_allocated));
923 fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n",
924 SCALE (s.ordinary_maps_allocated_size),
925 STAT_LABEL (s.ordinary_maps_allocated_size));
926 fprintf (stderr, "Number of macro maps used: %5ld%c\n",
927 SCALE (s.num_macro_maps_used),
928 STAT_LABEL (s.num_macro_maps_used));
929 fprintf (stderr, "Macro maps used size: %5ld%c\n",
930 SCALE (s.macro_maps_used_size),
931 STAT_LABEL (s.macro_maps_used_size));
932 fprintf (stderr, "Macro maps locations size: %5ld%c\n",
933 SCALE (s.macro_maps_locations_size),
934 STAT_LABEL (s.macro_maps_locations_size));
935 fprintf (stderr, "Macro maps size: %5ld%c\n",
936 SCALE (macro_maps_size),
937 STAT_LABEL (macro_maps_size));
938 fprintf (stderr, "Duplicated maps locations size: %5ld%c\n",
939 SCALE (s.duplicated_macro_maps_locations_size),
940 STAT_LABEL (s.duplicated_macro_maps_locations_size));
941 fprintf (stderr, "Total allocated maps size: %5ld%c\n",
942 SCALE (total_allocated_map_size),
943 STAT_LABEL (total_allocated_map_size));
944 fprintf (stderr, "Total used maps size: %5ld%c\n",
945 SCALE (total_used_map_size),
946 STAT_LABEL (total_used_map_size));
947 fprintf (stderr, "Ad-hoc table size: %5ld%c\n",
948 SCALE (s.adhoc_table_size),
949 STAT_LABEL (s.adhoc_table_size));
950 fprintf (stderr, "Ad-hoc table entries used: %5ld\n",
951 s.adhoc_table_entries_used);
952 fprintf (stderr, "optimized_ranges: %i\n",
953 line_table->num_optimized_ranges);
954 fprintf (stderr, "unoptimized_ranges: %i\n",
955 line_table->num_unoptimized_ranges);
956
957 fprintf (stderr, "\n");
958 }
959
960 /* Get location one beyond the final location in ordinary map IDX. */
961
962 static source_location
963 get_end_location (struct line_maps *set, unsigned int idx)
964 {
965 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
966 return set->highest_location;
967
968 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
969 return MAP_START_LOCATION (next_map);
970 }
971
972 /* Helper function for write_digit_row. */
973
974 static void
975 write_digit (FILE *stream, int digit)
976 {
977 fputc ('0' + (digit % 10), stream);
978 }
979
980 /* Helper function for dump_location_info.
981 Write a row of numbers to STREAM, numbering a source line,
982 giving the units, tens, hundreds etc of the column number. */
983
984 static void
985 write_digit_row (FILE *stream, int indent,
986 const line_map_ordinary *map,
987 source_location loc, int max_col, int divisor)
988 {
989 fprintf (stream, "%*c", indent, ' ');
990 fprintf (stream, "|");
991 for (int column = 1; column < max_col; column++)
992 {
993 source_location column_loc = loc + (column << map->m_range_bits);
994 write_digit (stream, column_loc / divisor);
995 }
996 fprintf (stream, "\n");
997 }
998
999 /* Write a half-closed (START) / half-open (END) interval of
1000 source_location to STREAM. */
1001
1002 static void
1003 dump_location_range (FILE *stream,
1004 source_location start, source_location end)
1005 {
1006 fprintf (stream,
1007 " source_location interval: %u <= loc < %u\n",
1008 start, end);
1009 }
1010
1011 /* Write a labelled description of a half-closed (START) / half-open (END)
1012 interval of source_location to STREAM. */
1013
1014 static void
1015 dump_labelled_location_range (FILE *stream,
1016 const char *name,
1017 source_location start, source_location end)
1018 {
1019 fprintf (stream, "%s\n", name);
1020 dump_location_range (stream, start, end);
1021 fprintf (stream, "\n");
1022 }
1023
1024 /* Write a visualization of the locations in the line_table to STREAM. */
1025
1026 void
1027 dump_location_info (FILE *stream)
1028 {
1029 /* Visualize the reserved locations. */
1030 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1031 0, RESERVED_LOCATION_COUNT);
1032
1033 /* Visualize the ordinary line_map instances, rendering the sources. */
1034 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1035 {
1036 source_location end_location = get_end_location (line_table, idx);
1037 /* half-closed: doesn't include this one. */
1038
1039 const line_map_ordinary *map
1040 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1041 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1042 dump_location_range (stream,
1043 MAP_START_LOCATION (map), end_location);
1044 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1045 fprintf (stream, " starting at line: %i\n",
1046 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1047 fprintf (stream, " column and range bits: %i\n",
1048 map->m_column_and_range_bits);
1049 fprintf (stream, " column bits: %i\n",
1050 map->m_column_and_range_bits - map->m_range_bits);
1051 fprintf (stream, " range bits: %i\n",
1052 map->m_range_bits);
1053
1054 /* Render the span of source lines that this "map" covers. */
1055 for (source_location loc = MAP_START_LOCATION (map);
1056 loc < end_location;
1057 loc += (1 << map->m_range_bits) )
1058 {
1059 gcc_assert (pure_location_p (line_table, loc) );
1060
1061 expanded_location exploc
1062 = linemap_expand_location (line_table, map, loc);
1063
1064 if (0 == exploc.column)
1065 {
1066 /* Beginning of a new source line: draw the line. */
1067
1068 int line_size;
1069 const char *line_text = location_get_source_line (exploc.file,
1070 exploc.line,
1071 &line_size);
1072 if (!line_text)
1073 break;
1074 fprintf (stream,
1075 "%s:%3i|loc:%5i|%.*s\n",
1076 exploc.file, exploc.line,
1077 loc,
1078 line_size, line_text);
1079
1080 /* "loc" is at column 0, which means "the whole line".
1081 Render the locations *within* the line, by underlining
1082 it, showing the source_location numeric values
1083 at each column. */
1084 int max_col = (1 << map->m_column_and_range_bits) - 1;
1085 if (max_col > line_size)
1086 max_col = line_size + 1;
1087
1088 int indent = 14 + strlen (exploc.file);
1089
1090 /* Thousands. */
1091 if (end_location > 999)
1092 write_digit_row (stream, indent, map, loc, max_col, 1000);
1093
1094 /* Hundreds. */
1095 if (end_location > 99)
1096 write_digit_row (stream, indent, map, loc, max_col, 100);
1097
1098 /* Tens. */
1099 write_digit_row (stream, indent, map, loc, max_col, 10);
1100
1101 /* Units. */
1102 write_digit_row (stream, indent, map, loc, max_col, 1);
1103 }
1104 }
1105 fprintf (stream, "\n");
1106 }
1107
1108 /* Visualize unallocated values. */
1109 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1110 line_table->highest_location,
1111 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1112
1113 /* Visualize the macro line_map instances, rendering the sources. */
1114 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1115 {
1116 /* Each macro map that is allocated owns source_location values
1117 that are *lower* that the one before them.
1118 Hence it's meaningful to view them either in order of ascending
1119 source locations, or in order of ascending macro map index. */
1120 const bool ascending_source_locations = true;
1121 unsigned int idx = (ascending_source_locations
1122 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1123 : i);
1124 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1125 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1126 idx,
1127 linemap_map_get_macro_name (map),
1128 MACRO_MAP_NUM_MACRO_TOKENS (map));
1129 dump_location_range (stream,
1130 map->start_location,
1131 (map->start_location
1132 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1133 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1134 "expansion point is location %i",
1135 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1136 fprintf (stream, " map->start_location: %u\n",
1137 map->start_location);
1138
1139 fprintf (stream, " macro_locations:\n");
1140 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1141 {
1142 source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1143 source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1144
1145 /* linemap_add_macro_token encodes token numbers in an expansion
1146 by putting them after MAP_START_LOCATION. */
1147
1148 /* I'm typically seeing 4 uninitialized entries at the end of
1149 0xafafafaf.
1150 This appears to be due to macro.c:replace_args
1151 adding 2 extra args for padding tokens; presumably there may
1152 be a leading and/or trailing padding token injected,
1153 each for 2 more location slots.
1154 This would explain there being up to 4 source_locations slots
1155 that may be uninitialized. */
1156
1157 fprintf (stream, " %u: %u, %u\n",
1158 i,
1159 x,
1160 y);
1161 if (x == y)
1162 {
1163 if (x < MAP_START_LOCATION (map))
1164 inform (x, "token %u has x-location == y-location == %u", i, x);
1165 else
1166 fprintf (stream,
1167 "x-location == y-location == %u encodes token # %u\n",
1168 x, x - MAP_START_LOCATION (map));
1169 }
1170 else
1171 {
1172 inform (x, "token %u has x-location == %u", i, x);
1173 inform (x, "token %u has y-location == %u", i, y);
1174 }
1175 }
1176 fprintf (stream, "\n");
1177 }
1178
1179 /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1180 macro map, presumably due to an off-by-one error somewhere
1181 between the logic in linemap_enter_macro and
1182 LINEMAPS_MACRO_LOWEST_LOCATION. */
1183 dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1184 MAX_SOURCE_LOCATION,
1185 MAX_SOURCE_LOCATION + 1);
1186
1187 /* Visualize ad-hoc values. */
1188 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1189 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1190 }
1191
1192 /* string_concat's constructor. */
1193
1194 string_concat::string_concat (int num, location_t *locs)
1195 : m_num (num)
1196 {
1197 m_locs = ggc_vec_alloc <location_t> (num);
1198 for (int i = 0; i < num; i++)
1199 m_locs[i] = locs[i];
1200 }
1201
1202 /* string_concat_db's constructor. */
1203
1204 string_concat_db::string_concat_db ()
1205 {
1206 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1207 }
1208
1209 /* Record that a string concatenation occurred, covering NUM
1210 string literal tokens. LOCS is an array of size NUM, containing the
1211 locations of the tokens. A copy of LOCS is taken. */
1212
1213 void
1214 string_concat_db::record_string_concatenation (int num, location_t *locs)
1215 {
1216 gcc_assert (num > 1);
1217 gcc_assert (locs);
1218
1219 location_t key_loc = get_key_loc (locs[0]);
1220
1221 string_concat *concat
1222 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1223 m_table->put (key_loc, concat);
1224 }
1225
1226 /* Determine if LOC was the location of the the initial token of a
1227 concatenation of string literal tokens.
1228 If so, *OUT_NUM is written to with the number of tokens, and
1229 *OUT_LOCS with the location of an array of locations of the
1230 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1231 storage owned by the string_concat_db.
1232 Otherwise, return false. */
1233
1234 bool
1235 string_concat_db::get_string_concatenation (location_t loc,
1236 int *out_num,
1237 location_t **out_locs)
1238 {
1239 gcc_assert (out_num);
1240 gcc_assert (out_locs);
1241
1242 location_t key_loc = get_key_loc (loc);
1243
1244 string_concat **concat = m_table->get (key_loc);
1245 if (!concat)
1246 return false;
1247
1248 *out_num = (*concat)->m_num;
1249 *out_locs =(*concat)->m_locs;
1250 return true;
1251 }
1252
1253 /* Internal function. Canonicalize LOC into a form suitable for
1254 use as a key within the database, stripping away macro expansion,
1255 ad-hoc information, and range information, using the location of
1256 the start of LOC within an ordinary linemap. */
1257
1258 location_t
1259 string_concat_db::get_key_loc (location_t loc)
1260 {
1261 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1262 NULL);
1263
1264 loc = get_range_from_loc (line_table, loc).m_start;
1265
1266 return loc;
1267 }
1268
1269 /* Helper class for use within get_substring_ranges_for_loc.
1270 An vec of cpp_string with responsibility for releasing all of the
1271 str->text for each str in the vector. */
1272
1273 class auto_cpp_string_vec : public auto_vec <cpp_string>
1274 {
1275 public:
1276 auto_cpp_string_vec (int alloc)
1277 : auto_vec <cpp_string> (alloc) {}
1278
1279 ~auto_cpp_string_vec ()
1280 {
1281 /* Clean up the copies within this vec. */
1282 int i;
1283 cpp_string *str;
1284 FOR_EACH_VEC_ELT (*this, i, str)
1285 free (const_cast <unsigned char *> (str->text));
1286 }
1287 };
1288
1289 /* Attempt to populate RANGES with source location information on the
1290 individual characters within the string literal found at STRLOC.
1291 If CONCATS is non-NULL, then any string literals that the token at
1292 STRLOC was concatenated with are also added to RANGES.
1293
1294 Return NULL if successful, or an error message if any errors occurred (in
1295 which case RANGES may be only partially populated and should not
1296 be used).
1297
1298 This is implemented by re-parsing the relevant source line(s). */
1299
1300 static const char *
1301 get_substring_ranges_for_loc (cpp_reader *pfile,
1302 string_concat_db *concats,
1303 location_t strloc,
1304 enum cpp_ttype type,
1305 cpp_substring_ranges &ranges)
1306 {
1307 gcc_assert (pfile);
1308
1309 if (strloc == UNKNOWN_LOCATION)
1310 return "unknown location";
1311
1312 /* If string concatenation has occurred at STRLOC, get the locations
1313 of all of the literal tokens making up the compound string.
1314 Otherwise, just use STRLOC. */
1315 int num_locs = 1;
1316 location_t *strlocs = &strloc;
1317 if (concats)
1318 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1319
1320 auto_cpp_string_vec strs (num_locs);
1321 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1322 for (int i = 0; i < num_locs; i++)
1323 {
1324 /* Get range of strloc. We will use it to locate the start and finish
1325 of the literal token within the line. */
1326 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1327
1328 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1329 /* If the string is within a macro expansion, we can't get at the
1330 end location. */
1331 return "macro expansion";
1332
1333 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1334 /* If so, we can't reliably determine where the token started within
1335 its line. */
1336 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1337
1338 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1339 /* If so, we can't reliably determine where the token finished within
1340 its line. */
1341 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1342
1343 expanded_location start
1344 = expand_location_to_spelling_point (src_range.m_start);
1345 expanded_location finish
1346 = expand_location_to_spelling_point (src_range.m_finish);
1347 if (start.file != finish.file)
1348 return "range endpoints are in different files";
1349 if (start.line != finish.line)
1350 return "range endpoints are on different lines";
1351 if (start.column > finish.column)
1352 return "range endpoints are reversed";
1353
1354 int line_width;
1355 const char *line = location_get_source_line (start.file, start.line,
1356 &line_width);
1357 if (line == NULL)
1358 return "unable to read source line";
1359
1360 /* Determine the location of the literal (including quotes
1361 and leading prefix chars, such as the 'u' in a u""
1362 token). */
1363 const char *literal = line + start.column - 1;
1364 int literal_length = finish.column - start.column + 1;
1365
1366 gcc_assert (line_width >= (start.column - 1 + literal_length));
1367 cpp_string from;
1368 from.len = literal_length;
1369 /* Make a copy of the literal, to avoid having to rely on
1370 the lifetime of the copy of the line within the cache.
1371 This will be released by the auto_cpp_string_vec dtor. */
1372 from.text = XDUPVEC (unsigned char, literal, literal_length);
1373 strs.safe_push (from);
1374
1375 /* For very long lines, a new linemap could have started
1376 halfway through the token.
1377 Ensure that the loc_reader uses the linemap of the
1378 *end* of the token for its start location. */
1379 const line_map_ordinary *final_ord_map;
1380 linemap_resolve_location (line_table, src_range.m_finish,
1381 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1382 location_t start_loc
1383 = linemap_position_for_line_and_column (line_table, final_ord_map,
1384 start.line, start.column);
1385
1386 cpp_string_location_reader loc_reader (start_loc, line_table);
1387 loc_readers.safe_push (loc_reader);
1388 }
1389
1390 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1391 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1392 loc_readers.address (),
1393 num_locs, &ranges, type);
1394 if (err)
1395 return err;
1396
1397 /* Success: "ranges" should now contain information on the string. */
1398 return NULL;
1399 }
1400
1401 /* Attempt to populate *OUT_RANGE with source location information on the
1402 range of given characters within the string literal found at STRLOC.
1403 START_IDX and END_IDX refer to offsets within the execution character
1404 set.
1405 If CONCATS is non-NULL, then any string literals that the token at
1406 STRLOC was concatenated with are also considered.
1407
1408 This is implemented by re-parsing the relevant source line(s).
1409
1410 Return NULL if successful, or an error message if any errors occurred.
1411 Error messages are intended for GCC developers (to help debugging) rather
1412 than for end-users. */
1413
1414 const char *
1415 get_source_range_for_substring (cpp_reader *pfile,
1416 string_concat_db *concats,
1417 location_t strloc,
1418 enum cpp_ttype type,
1419 int start_idx, int end_idx,
1420 source_range *out_range)
1421 {
1422 gcc_checking_assert (start_idx >= 0);
1423 gcc_checking_assert (end_idx >= 0);
1424 gcc_assert (out_range);
1425
1426 cpp_substring_ranges ranges;
1427 const char *err
1428 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1429 if (err)
1430 return err;
1431
1432 if (start_idx >= ranges.get_num_ranges ())
1433 return "start_idx out of range";
1434 if (end_idx >= ranges.get_num_ranges ())
1435 return "end_idx out of range";
1436
1437 out_range->m_start = ranges.get_range (start_idx).m_start;
1438 out_range->m_finish = ranges.get_range (end_idx).m_finish;
1439 return NULL;
1440 }
1441
1442 /* As get_source_range_for_substring, but write to *OUT the number
1443 of ranges that are available. */
1444
1445 const char *
1446 get_num_source_ranges_for_substring (cpp_reader *pfile,
1447 string_concat_db *concats,
1448 location_t strloc,
1449 enum cpp_ttype type,
1450 int *out)
1451 {
1452 gcc_assert (out);
1453
1454 cpp_substring_ranges ranges;
1455 const char *err
1456 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1457
1458 if (err)
1459 return err;
1460
1461 *out = ranges.get_num_ranges ();
1462 return NULL;
1463 }
1464
1465 #if CHECKING_P
1466
1467 namespace selftest {
1468
1469 /* Selftests of location handling. */
1470
1471 /* A class for writing out a temporary sourcefile for use in selftests
1472 of input handling. */
1473
1474 class temp_source_file
1475 {
1476 public:
1477 temp_source_file (const location &loc, const char *suffix,
1478 const char *content);
1479 ~temp_source_file ();
1480
1481 const char *get_filename () const { return m_filename; }
1482
1483 private:
1484 char *m_filename;
1485 };
1486
1487 /* Constructor. Create a tempfile using SUFFIX, and write CONTENT to
1488 it. Abort if anything goes wrong, using LOC as the effective
1489 location in the problem report. */
1490
1491 temp_source_file::temp_source_file (const location &loc, const char *suffix,
1492 const char *content)
1493 {
1494 m_filename = make_temp_file (suffix);
1495 ASSERT_NE (m_filename, NULL);
1496
1497 FILE *out = fopen (m_filename, "w");
1498 if (!out)
1499 ::selftest::fail_formatted (loc, "unable to open tempfile: %s",
1500 m_filename);
1501 fprintf (out, "%s", content);
1502 fclose (out);
1503 }
1504
1505 /* Destructor. Delete the tempfile. */
1506
1507 temp_source_file::~temp_source_file ()
1508 {
1509 unlink (m_filename);
1510 free (m_filename);
1511 }
1512
1513 /* Helper function for verifying location data: when location_t
1514 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1515 as having column 0. */
1516
1517 static bool
1518 should_have_column_data_p (location_t loc)
1519 {
1520 if (IS_ADHOC_LOC (loc))
1521 loc = get_location_from_adhoc_loc (line_table, loc);
1522 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1523 return false;
1524 return true;
1525 }
1526
1527 /* Selftest for should_have_column_data_p. */
1528
1529 static void
1530 test_should_have_column_data_p ()
1531 {
1532 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1533 ASSERT_TRUE
1534 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1535 ASSERT_FALSE
1536 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1537 }
1538
1539 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1540 on LOC. */
1541
1542 static void
1543 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1544 location_t loc)
1545 {
1546 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1547 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1548 /* If location_t values are sufficiently high, then column numbers
1549 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1550 When close to the threshold, column numbers *may* be present: if
1551 the final linemap before the threshold contains a line that straddles
1552 the threshold, locations in that line have column information. */
1553 if (should_have_column_data_p (loc))
1554 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1555 }
1556
1557 /* Various selftests in this file involve constructing a line table
1558 and one or more line maps within it.
1559
1560 For maximum test coverage we want to run these tests with a variety
1561 of situations:
1562 - line_table->default_range_bits: some frontends use a non-zero value
1563 and others use zero
1564 - the fallback modes within line-map.c: there are various threshold
1565 values for source_location/location_t beyond line-map.c changes
1566 behavior (disabling of the range-packing optimization, disabling
1567 of column-tracking). We can exercise these by starting the line_table
1568 at interesting values at or near these thresholds.
1569
1570 The following struct describes a particular case within our test
1571 matrix. */
1572
1573 struct line_table_case
1574 {
1575 line_table_case (int default_range_bits, int base_location)
1576 : m_default_range_bits (default_range_bits),
1577 m_base_location (base_location)
1578 {}
1579
1580 int m_default_range_bits;
1581 int m_base_location;
1582 };
1583
1584 /* A class for overriding the global "line_table" within a selftest,
1585 restoring its value afterwards. */
1586
1587 class temp_line_table
1588 {
1589 public:
1590 temp_line_table (const line_table_case &);
1591 ~temp_line_table ();
1592
1593 private:
1594 line_maps *m_old_line_table;
1595 };
1596
1597 /* Constructor. Store the old value of line_table, and create a new
1598 one, using the sitation described in CASE_. */
1599
1600 temp_line_table::temp_line_table (const line_table_case &case_)
1601 : m_old_line_table (line_table)
1602 {
1603 line_table = ggc_alloc<line_maps> ();
1604 linemap_init (line_table, BUILTINS_LOCATION);
1605 line_table->reallocator = m_old_line_table->reallocator;
1606 line_table->round_alloc_size = m_old_line_table->round_alloc_size;
1607 line_table->default_range_bits = case_.m_default_range_bits;
1608 if (case_.m_base_location)
1609 {
1610 line_table->highest_location = case_.m_base_location;
1611 line_table->highest_line = case_.m_base_location;
1612 }
1613 }
1614
1615 /* Destructor. Restore the old value of line_table. */
1616
1617 temp_line_table::~temp_line_table ()
1618 {
1619 line_table = m_old_line_table;
1620 }
1621
1622 /* Verify basic operation of ordinary linemaps. */
1623
1624 static void
1625 test_accessing_ordinary_linemaps (const line_table_case &case_)
1626 {
1627 temp_line_table tmp_lt (case_);
1628
1629 /* Build a simple linemap describing some locations. */
1630 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1631
1632 linemap_line_start (line_table, 1, 100);
1633 location_t loc_a = linemap_position_for_column (line_table, 1);
1634 location_t loc_b = linemap_position_for_column (line_table, 23);
1635
1636 linemap_line_start (line_table, 2, 100);
1637 location_t loc_c = linemap_position_for_column (line_table, 1);
1638 location_t loc_d = linemap_position_for_column (line_table, 17);
1639
1640 /* Example of a very long line. */
1641 linemap_line_start (line_table, 3, 2000);
1642 location_t loc_e = linemap_position_for_column (line_table, 700);
1643
1644 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1645
1646 /* Multiple files. */
1647 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1648 linemap_line_start (line_table, 1, 200);
1649 location_t loc_f = linemap_position_for_column (line_table, 150);
1650 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1651
1652 /* Verify that we can recover the location info. */
1653 assert_loceq ("foo.c", 1, 1, loc_a);
1654 assert_loceq ("foo.c", 1, 23, loc_b);
1655 assert_loceq ("foo.c", 2, 1, loc_c);
1656 assert_loceq ("foo.c", 2, 17, loc_d);
1657 assert_loceq ("foo.c", 3, 700, loc_e);
1658 assert_loceq ("bar.c", 1, 150, loc_f);
1659
1660 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1661 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1662
1663 /* Verify using make_location to build a range, and extracting data
1664 back from it. */
1665 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1666 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1667 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1668 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1669 ASSERT_EQ (loc_b, src_range.m_start);
1670 ASSERT_EQ (loc_d, src_range.m_finish);
1671 }
1672
1673 /* Verify various properties of UNKNOWN_LOCATION. */
1674
1675 static void
1676 test_unknown_location ()
1677 {
1678 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1679 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1680 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1681 }
1682
1683 /* Verify various properties of BUILTINS_LOCATION. */
1684
1685 static void
1686 test_builtins ()
1687 {
1688 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1689 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1690 }
1691
1692 /* Verify reading of input files (e.g. for caret-based diagnostics). */
1693
1694 static void
1695 test_reading_source_line ()
1696 {
1697 /* Create a tempfile and write some text to it. */
1698 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1699 "01234567890123456789\n"
1700 "This is the test text\n"
1701 "This is the 3rd line\n");
1702
1703 /* Read back a specific line from the tempfile. */
1704 int line_size;
1705 const char *source_line = location_get_source_line (tmp.get_filename (),
1706 2, &line_size);
1707 ASSERT_TRUE (source_line != NULL);
1708 ASSERT_EQ (21, line_size);
1709 if (!strncmp ("This is the test text",
1710 source_line, line_size))
1711 ::selftest::pass (SELFTEST_LOCATION,
1712 "source_line matched expected value");
1713 else
1714 ::selftest::fail (SELFTEST_LOCATION,
1715 "source_line did not match expected value");
1716
1717 }
1718
1719 /* Tests of lexing. */
1720
1721 /* Verify that token TOK from PARSER has cpp_token_as_text
1722 equal to EXPECTED_TEXT. */
1723
1724 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1725 SELFTEST_BEGIN_STMT \
1726 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1727 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1728 SELFTEST_END_STMT
1729
1730 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1731 and ranges from EXP_START_COL to EXP_FINISH_COL.
1732 Use LOC as the effective location of the selftest. */
1733
1734 static void
1735 assert_token_loc_eq (const location &loc,
1736 const cpp_token *tok,
1737 const char *exp_filename, int exp_linenum,
1738 int exp_start_col, int exp_finish_col)
1739 {
1740 location_t tok_loc = tok->src_loc;
1741 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1742 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1743
1744 /* If location_t values are sufficiently high, then column numbers
1745 will be unavailable. */
1746 if (!should_have_column_data_p (tok_loc))
1747 return;
1748
1749 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1750 source_range tok_range = get_range_from_loc (line_table, tok_loc);
1751 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1752 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1753 }
1754
1755 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1756 SELFTEST_LOCATION as the effective location of the selftest. */
1757
1758 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1759 EXP_START_COL, EXP_FINISH_COL) \
1760 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1761 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1762
1763 /* Test of lexing a file using libcpp, verifying tokens and their
1764 location information. */
1765
1766 static void
1767 test_lexer (const line_table_case &case_)
1768 {
1769 /* Create a tempfile and write some text to it. */
1770 const char *content =
1771 /*00000000011111111112222222222333333.3333444444444.455555555556
1772 12345678901234567890123456789012345.6789012345678.901234567890. */
1773 ("test_name /* c-style comment */\n"
1774 " \"test literal\"\n"
1775 " // test c++-style comment\n"
1776 " 42\n");
1777 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1778
1779 temp_line_table tmp_lt (case_);
1780
1781 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1782
1783 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1784 ASSERT_NE (fname, NULL);
1785
1786 /* Verify that we get the expected tokens back, with the correct
1787 location information. */
1788
1789 location_t loc;
1790 const cpp_token *tok;
1791 tok = cpp_get_token_with_location (parser, &loc);
1792 ASSERT_NE (tok, NULL);
1793 ASSERT_EQ (tok->type, CPP_NAME);
1794 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1795 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1796
1797 tok = cpp_get_token_with_location (parser, &loc);
1798 ASSERT_NE (tok, NULL);
1799 ASSERT_EQ (tok->type, CPP_STRING);
1800 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1801 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1802
1803 tok = cpp_get_token_with_location (parser, &loc);
1804 ASSERT_NE (tok, NULL);
1805 ASSERT_EQ (tok->type, CPP_NUMBER);
1806 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1807 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1808
1809 tok = cpp_get_token_with_location (parser, &loc);
1810 ASSERT_NE (tok, NULL);
1811 ASSERT_EQ (tok->type, CPP_EOF);
1812
1813 cpp_finish (parser, NULL);
1814 cpp_destroy (parser);
1815 }
1816
1817 /* Forward decls. */
1818
1819 struct lexer_test;
1820 class lexer_test_options;
1821
1822 /* A class for specifying options of a lexer_test.
1823 The "apply" vfunc is called during the lexer_test constructor. */
1824
1825 class lexer_test_options
1826 {
1827 public:
1828 virtual void apply (lexer_test &) = 0;
1829 };
1830
1831 /* A struct for writing lexer tests. */
1832
1833 struct lexer_test
1834 {
1835 lexer_test (const line_table_case &case_, const char *content,
1836 lexer_test_options *options);
1837 ~lexer_test ();
1838
1839 const cpp_token *get_token ();
1840
1841 temp_source_file m_tempfile;
1842 temp_line_table m_tmp_lt;
1843 cpp_reader *m_parser;
1844 string_concat_db m_concats;
1845 };
1846
1847 /* Use an EBCDIC encoding for the execution charset, specifically
1848 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
1849
1850 This exercises iconv integration within libcpp.
1851 Not every build of iconv supports the given charset,
1852 so we need to flag this error and handle it gracefully. */
1853
1854 class ebcdic_execution_charset : public lexer_test_options
1855 {
1856 public:
1857 ebcdic_execution_charset () : m_num_iconv_errors (0)
1858 {
1859 gcc_assert (s_singleton == NULL);
1860 s_singleton = this;
1861 }
1862 ~ebcdic_execution_charset ()
1863 {
1864 gcc_assert (s_singleton == this);
1865 s_singleton = NULL;
1866 }
1867
1868 void apply (lexer_test &test) FINAL OVERRIDE
1869 {
1870 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
1871 cpp_opts->narrow_charset = "IBM1047";
1872
1873 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
1874 callbacks->error = on_error;
1875 }
1876
1877 static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
1878 int level ATTRIBUTE_UNUSED,
1879 int reason ATTRIBUTE_UNUSED,
1880 rich_location *richloc ATTRIBUTE_UNUSED,
1881 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
1882 ATTRIBUTE_FPTR_PRINTF(5,0)
1883 {
1884 gcc_assert (s_singleton);
1885 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
1886 when the local iconv build doesn't support the conversion. */
1887 if (strstr (msgid, "not supported by iconv"))
1888 {
1889 s_singleton->m_num_iconv_errors++;
1890 return true;
1891 }
1892
1893 /* Otherwise, we have an unexpected error. */
1894 abort ();
1895 }
1896
1897 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
1898
1899 private:
1900 static ebcdic_execution_charset *s_singleton;
1901 int m_num_iconv_errors;
1902 };
1903
1904 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
1905
1906 /* Constructor. Override line_table with a new instance based on CASE_,
1907 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
1908 start parsing the tempfile. */
1909
1910 lexer_test::lexer_test (const line_table_case &case_, const char *content,
1911 lexer_test_options *options) :
1912 /* Create a tempfile and write the text to it. */
1913 m_tempfile (SELFTEST_LOCATION, ".c", content),
1914 m_tmp_lt (case_),
1915 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
1916 m_concats ()
1917 {
1918 if (options)
1919 options->apply (*this);
1920
1921 cpp_init_iconv (m_parser);
1922
1923 /* Parse the file. */
1924 const char *fname = cpp_read_main_file (m_parser,
1925 m_tempfile.get_filename ());
1926 ASSERT_NE (fname, NULL);
1927 }
1928
1929 /* Destructor. Verify that the next token in m_parser is EOF. */
1930
1931 lexer_test::~lexer_test ()
1932 {
1933 location_t loc;
1934 const cpp_token *tok;
1935
1936 tok = cpp_get_token_with_location (m_parser, &loc);
1937 ASSERT_NE (tok, NULL);
1938 ASSERT_EQ (tok->type, CPP_EOF);
1939
1940 cpp_finish (m_parser, NULL);
1941 cpp_destroy (m_parser);
1942 }
1943
1944 /* Get the next token from m_parser. */
1945
1946 const cpp_token *
1947 lexer_test::get_token ()
1948 {
1949 location_t loc;
1950 const cpp_token *tok;
1951
1952 tok = cpp_get_token_with_location (m_parser, &loc);
1953 ASSERT_NE (tok, NULL);
1954 return tok;
1955 }
1956
1957 /* Verify that locations within string literals are correctly handled. */
1958
1959 /* Verify get_source_range_for_substring for token(s) at STRLOC,
1960 using the string concatenation database for TEST.
1961
1962 Assert that the character at index IDX is on EXPECTED_LINE,
1963 and that it begins at column EXPECTED_START_COL and ends at
1964 EXPECTED_FINISH_COL (unless the locations are beyond
1965 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
1966 columns). */
1967
1968 static void
1969 assert_char_at_range (const location &loc,
1970 lexer_test& test,
1971 location_t strloc, enum cpp_ttype type, int idx,
1972 int expected_line, int expected_start_col,
1973 int expected_finish_col)
1974 {
1975 cpp_reader *pfile = test.m_parser;
1976 string_concat_db *concats = &test.m_concats;
1977
1978 source_range actual_range;
1979 const char *err
1980 = get_source_range_for_substring (pfile, concats, strloc, type,
1981 idx, idx, &actual_range);
1982 if (should_have_column_data_p (strloc))
1983 ASSERT_EQ_AT (loc, NULL, err);
1984 else
1985 {
1986 ASSERT_STREQ_AT (loc,
1987 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
1988 err);
1989 return;
1990 }
1991
1992 int actual_start_line = LOCATION_LINE (actual_range.m_start);
1993 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
1994 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
1995 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
1996
1997 if (should_have_column_data_p (actual_range.m_start))
1998 {
1999 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2000 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2001 }
2002 if (should_have_column_data_p (actual_range.m_finish))
2003 {
2004 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2005 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2006 }
2007 }
2008
2009 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2010 the effective location of any errors. */
2011
2012 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2013 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2014 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2015 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2016 (EXPECTED_FINISH_COL))
2017
2018 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2019 using the string concatenation database for TEST.
2020
2021 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2022
2023 static void
2024 assert_num_substring_ranges (const location &loc,
2025 lexer_test& test,
2026 location_t strloc,
2027 enum cpp_ttype type,
2028 int expected_num_ranges)
2029 {
2030 cpp_reader *pfile = test.m_parser;
2031 string_concat_db *concats = &test.m_concats;
2032
2033 int actual_num_ranges;
2034 const char *err
2035 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2036 &actual_num_ranges);
2037 if (should_have_column_data_p (strloc))
2038 ASSERT_EQ_AT (loc, NULL, err);
2039 else
2040 {
2041 ASSERT_STREQ_AT (loc,
2042 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2043 err);
2044 return;
2045 }
2046 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2047 }
2048
2049 /* Macro for calling assert_num_substring_ranges, supplying
2050 SELFTEST_LOCATION for the effective location of any errors. */
2051
2052 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2053 EXPECTED_NUM_RANGES) \
2054 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2055 (TYPE), (EXPECTED_NUM_RANGES))
2056
2057
2058 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2059 returns an error (using the string concatenation database for TEST). */
2060
2061 static void
2062 assert_has_no_substring_ranges (const location &loc,
2063 lexer_test& test,
2064 location_t strloc,
2065 enum cpp_ttype type,
2066 const char *expected_err)
2067 {
2068 cpp_reader *pfile = test.m_parser;
2069 string_concat_db *concats = &test.m_concats;
2070 cpp_substring_ranges ranges;
2071 const char *actual_err
2072 = get_substring_ranges_for_loc (pfile, concats, strloc,
2073 type, ranges);
2074 if (should_have_column_data_p (strloc))
2075 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2076 else
2077 ASSERT_STREQ_AT (loc,
2078 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2079 actual_err);
2080 }
2081
2082 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2083 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2084 (STRLOC), (TYPE), (ERR))
2085
2086 /* Lex a simple string literal. Verify the substring location data, before
2087 and after running cpp_interpret_string on it. */
2088
2089 static void
2090 test_lexer_string_locations_simple (const line_table_case &case_)
2091 {
2092 /* Digits 0-9 (with 0 at column 10), the simple way.
2093 ....................000000000.11111111112.2222222223333333333
2094 ....................123456789.01234567890.1234567890123456789
2095 We add a trailing comment to ensure that we correctly locate
2096 the end of the string literal token. */
2097 const char *content = " \"0123456789\" /* not a string */\n";
2098 lexer_test test (case_, content, NULL);
2099
2100 /* Verify that we get the expected token back, with the correct
2101 location information. */
2102 const cpp_token *tok = test.get_token ();
2103 ASSERT_EQ (tok->type, CPP_STRING);
2104 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2105 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2106
2107 /* At this point in lexing, the quote characters are treated as part of
2108 the string (they are stripped off by cpp_interpret_string). */
2109
2110 ASSERT_EQ (tok->val.str.len, 12);
2111
2112 /* Verify that cpp_interpret_string works. */
2113 cpp_string dst_string;
2114 const enum cpp_ttype type = CPP_STRING;
2115 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2116 &dst_string, type);
2117 ASSERT_TRUE (result);
2118 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2119 free (const_cast <unsigned char *> (dst_string.text));
2120
2121 /* Verify ranges of individual characters. This no longer includes the
2122 quotes. */
2123 for (int i = 0; i <= 9; i++)
2124 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2125 10 + i, 10 + i);
2126
2127 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2128 }
2129
2130 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2131 encoding. */
2132
2133 static void
2134 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2135 {
2136 /* EBCDIC support requires iconv. */
2137 if (!HAVE_ICONV)
2138 return;
2139
2140 /* Digits 0-9 (with 0 at column 10), the simple way.
2141 ....................000000000.11111111112.2222222223333333333
2142 ....................123456789.01234567890.1234567890123456789
2143 We add a trailing comment to ensure that we correctly locate
2144 the end of the string literal token. */
2145 const char *content = " \"0123456789\" /* not a string */\n";
2146 ebcdic_execution_charset use_ebcdic;
2147 lexer_test test (case_, content, &use_ebcdic);
2148
2149 /* Verify that we get the expected token back, with the correct
2150 location information. */
2151 const cpp_token *tok = test.get_token ();
2152 ASSERT_EQ (tok->type, CPP_STRING);
2153 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2154 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2155
2156 /* At this point in lexing, the quote characters are treated as part of
2157 the string (they are stripped off by cpp_interpret_string). */
2158
2159 ASSERT_EQ (tok->val.str.len, 12);
2160
2161 /* The remainder of the test requires an iconv implementation that
2162 can convert from UTF-8 to the EBCDIC encoding requested above. */
2163 if (use_ebcdic.iconv_errors_occurred_p ())
2164 return;
2165
2166 /* Verify that cpp_interpret_string works. */
2167 cpp_string dst_string;
2168 const enum cpp_ttype type = CPP_STRING;
2169 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2170 &dst_string, type);
2171 ASSERT_TRUE (result);
2172 /* We should now have EBCDIC-encoded text, specifically
2173 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2174 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2175 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2176 (const char *)dst_string.text);
2177 free (const_cast <unsigned char *> (dst_string.text));
2178
2179 /* Verify that we don't attempt to record substring location information
2180 for such cases. */
2181 ASSERT_HAS_NO_SUBSTRING_RANGES
2182 (test, tok->src_loc, type,
2183 "execution character set != source character set");
2184 }
2185
2186 /* Lex a string literal containing a hex-escaped character.
2187 Verify the substring location data, before and after running
2188 cpp_interpret_string on it. */
2189
2190 static void
2191 test_lexer_string_locations_hex (const line_table_case &case_)
2192 {
2193 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2194 and with a space in place of digit 6, to terminate the escaped
2195 hex code.
2196 ....................000000000.111111.11112222.
2197 ....................123456789.012345.67890123. */
2198 const char *content = " \"01234\\x35 789\"\n";
2199 lexer_test test (case_, content, NULL);
2200
2201 /* Verify that we get the expected token back, with the correct
2202 location information. */
2203 const cpp_token *tok = test.get_token ();
2204 ASSERT_EQ (tok->type, CPP_STRING);
2205 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2206 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2207
2208 /* At this point in lexing, the quote characters are treated as part of
2209 the string (they are stripped off by cpp_interpret_string). */
2210 ASSERT_EQ (tok->val.str.len, 15);
2211
2212 /* Verify that cpp_interpret_string works. */
2213 cpp_string dst_string;
2214 const enum cpp_ttype type = CPP_STRING;
2215 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2216 &dst_string, type);
2217 ASSERT_TRUE (result);
2218 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2219 free (const_cast <unsigned char *> (dst_string.text));
2220
2221 /* Verify ranges of individual characters. This no longer includes the
2222 quotes. */
2223 for (int i = 0; i <= 4; i++)
2224 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2225 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2226 for (int i = 6; i <= 9; i++)
2227 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2228
2229 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2230 }
2231
2232 /* Lex a string literal containing an octal-escaped character.
2233 Verify the substring location data after running cpp_interpret_string
2234 on it. */
2235
2236 static void
2237 test_lexer_string_locations_oct (const line_table_case &case_)
2238 {
2239 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2240 and with a space in place of digit 6, to terminate the escaped
2241 octal code.
2242 ....................000000000.111111.11112222.2222223333333333444
2243 ....................123456789.012345.67890123.4567890123456789012 */
2244 const char *content = " \"01234\\065 789\" /* not a string */\n";
2245 lexer_test test (case_, content, NULL);
2246
2247 /* Verify that we get the expected token back, with the correct
2248 location information. */
2249 const cpp_token *tok = test.get_token ();
2250 ASSERT_EQ (tok->type, CPP_STRING);
2251 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2252
2253 /* Verify that cpp_interpret_string works. */
2254 cpp_string dst_string;
2255 const enum cpp_ttype type = CPP_STRING;
2256 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2257 &dst_string, type);
2258 ASSERT_TRUE (result);
2259 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2260 free (const_cast <unsigned char *> (dst_string.text));
2261
2262 /* Verify ranges of individual characters. This no longer includes the
2263 quotes. */
2264 for (int i = 0; i < 5; i++)
2265 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2266 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2267 for (int i = 6; i <= 9; i++)
2268 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2269
2270 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2271 }
2272
2273 /* Test of string literal containing letter escapes. */
2274
2275 static void
2276 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2277 {
2278 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2279 .....................000000000.1.11111.1.1.11222.22222223333333
2280 .....................123456789.0.12345.6.7.89012.34567890123456. */
2281 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2282 lexer_test test (case_, content, NULL);
2283
2284 /* Verify that we get the expected tokens back. */
2285 const cpp_token *tok = test.get_token ();
2286 ASSERT_EQ (tok->type, CPP_STRING);
2287 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2288
2289 /* Verify ranges of individual characters. */
2290 /* "\t". */
2291 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2292 0, 1, 10, 11);
2293 /* "foo". */
2294 for (int i = 1; i <= 3; i++)
2295 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2296 i, 1, 11 + i, 11 + i);
2297 /* "\\" and "\n". */
2298 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2299 4, 1, 15, 16);
2300 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2301 5, 1, 17, 18);
2302
2303 /* "bar". */
2304 for (int i = 6; i <= 8; i++)
2305 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2306 i, 1, 13 + i, 13 + i);
2307
2308 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 9);
2309 }
2310
2311 /* Another test of a string literal containing a letter escape.
2312 Based on string seen in
2313 printf ("%-%\n");
2314 in gcc.dg/format/c90-printf-1.c. */
2315
2316 static void
2317 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2318 {
2319 /* .....................000000000.1111.11.1111.22222222223.
2320 .....................123456789.0123.45.6789.01234567890. */
2321 const char *content = (" \"%-%\\n\" /* non-str */\n");
2322 lexer_test test (case_, content, NULL);
2323
2324 /* Verify that we get the expected tokens back. */
2325 const cpp_token *tok = test.get_token ();
2326 ASSERT_EQ (tok->type, CPP_STRING);
2327 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2328
2329 /* Verify ranges of individual characters. */
2330 /* "%-%". */
2331 for (int i = 0; i < 3; i++)
2332 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2333 i, 1, 10 + i, 10 + i);
2334 /* "\n". */
2335 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2336 3, 1, 13, 14);
2337
2338 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 4);
2339 }
2340
2341 /* Lex a string literal containing UCN 4 characters.
2342 Verify the substring location data after running cpp_interpret_string
2343 on it. */
2344
2345 static void
2346 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2347 {
2348 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2349 as UCN 4.
2350 ....................000000000.111111.111122.222222223.33333333344444
2351 ....................123456789.012345.678901.234567890.12345678901234 */
2352 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2353 lexer_test test (case_, content, NULL);
2354
2355 /* Verify that we get the expected token back, with the correct
2356 location information. */
2357 const cpp_token *tok = test.get_token ();
2358 ASSERT_EQ (tok->type, CPP_STRING);
2359 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2360
2361 /* Verify that cpp_interpret_string works.
2362 The string should be encoded in the execution character
2363 set. Assuming that that is UTF-8, we should have the following:
2364 ----------- ---- ----- ------- ----------------
2365 Byte offset Byte Octal Unicode Source Column(s)
2366 ----------- ---- ----- ------- ----------------
2367 0 0x30 '0' 10
2368 1 0x31 '1' 11
2369 2 0x32 '2' 12
2370 3 0x33 '3' 13
2371 4 0x34 '4' 14
2372 5 0xE2 \342 U+2174 15-20
2373 6 0x85 \205 (cont) 15-20
2374 7 0xB4 \264 (cont) 15-20
2375 8 0xE2 \342 U+2175 21-26
2376 9 0x85 \205 (cont) 21-26
2377 10 0xB5 \265 (cont) 21-26
2378 11 0x37 '7' 27
2379 12 0x38 '8' 28
2380 13 0x39 '9' 29
2381 ----------- ---- ----- ------- ---------------. */
2382
2383 cpp_string dst_string;
2384 const enum cpp_ttype type = CPP_STRING;
2385 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2386 &dst_string, type);
2387 ASSERT_TRUE (result);
2388 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2389 (const char *)dst_string.text);
2390 free (const_cast <unsigned char *> (dst_string.text));
2391
2392 /* Verify ranges of individual characters. This no longer includes the
2393 quotes.
2394 '01234'. */
2395 for (int i = 0; i <= 4; i++)
2396 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2397 /* U+2174. */
2398 for (int i = 5; i <= 7; i++)
2399 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2400 /* U+2175. */
2401 for (int i = 8; i <= 10; i++)
2402 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2403 /* '789'. */
2404 for (int i = 11; i <= 13; i++)
2405 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2406
2407 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 14);
2408 }
2409
2410 /* Lex a string literal containing UCN 8 characters.
2411 Verify the substring location data after running cpp_interpret_string
2412 on it. */
2413
2414 static void
2415 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2416 {
2417 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2418 ....................000000000.111111.1111222222.2222333333333.344444
2419 ....................123456789.012345.6789012345.6789012345678.901234 */
2420 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2421 lexer_test test (case_, content, NULL);
2422
2423 /* Verify that we get the expected token back, with the correct
2424 location information. */
2425 const cpp_token *tok = test.get_token ();
2426 ASSERT_EQ (tok->type, CPP_STRING);
2427 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2428 "\"01234\\U00002174\\U00002175789\"");
2429
2430 /* Verify that cpp_interpret_string works.
2431 The UTF-8 encoding of the string is identical to that from
2432 the ucn4 testcase above; the only difference is the column
2433 locations. */
2434 cpp_string dst_string;
2435 const enum cpp_ttype type = CPP_STRING;
2436 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2437 &dst_string, type);
2438 ASSERT_TRUE (result);
2439 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2440 (const char *)dst_string.text);
2441 free (const_cast <unsigned char *> (dst_string.text));
2442
2443 /* Verify ranges of individual characters. This no longer includes the
2444 quotes.
2445 '01234'. */
2446 for (int i = 0; i <= 4; i++)
2447 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2448 /* U+2174. */
2449 for (int i = 5; i <= 7; i++)
2450 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2451 /* U+2175. */
2452 for (int i = 8; i <= 10; i++)
2453 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2454 /* '789' at columns 35-37 */
2455 for (int i = 11; i <= 13; i++)
2456 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2457
2458 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 14);
2459 }
2460
2461 /* Fetch a big-endian 32-bit value and convert to host endianness. */
2462
2463 static uint32_t
2464 uint32_from_big_endian (const uint32_t *ptr_be_value)
2465 {
2466 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2467 return (((uint32_t) buf[0] << 24)
2468 | ((uint32_t) buf[1] << 16)
2469 | ((uint32_t) buf[2] << 8)
2470 | (uint32_t) buf[3]);
2471 }
2472
2473 /* Lex a wide string literal and verify that attempts to read substring
2474 location data from it fail gracefully. */
2475
2476 static void
2477 test_lexer_string_locations_wide_string (const line_table_case &case_)
2478 {
2479 /* Digits 0-9.
2480 ....................000000000.11111111112.22222222233333
2481 ....................123456789.01234567890.12345678901234 */
2482 const char *content = " L\"0123456789\" /* non-str */\n";
2483 lexer_test test (case_, content, NULL);
2484
2485 /* Verify that we get the expected token back, with the correct
2486 location information. */
2487 const cpp_token *tok = test.get_token ();
2488 ASSERT_EQ (tok->type, CPP_WSTRING);
2489 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2490
2491 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2492 cpp_string dst_string;
2493 const enum cpp_ttype type = CPP_WSTRING;
2494 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2495 &dst_string, type);
2496 ASSERT_TRUE (result);
2497 /* The cpp_reader defaults to big-endian with
2498 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2499 now be encoded as UTF-32BE. */
2500 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2501 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2502 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2503 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2504 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2505 free (const_cast <unsigned char *> (dst_string.text));
2506
2507 /* We don't yet support generating substring location information
2508 for L"" strings. */
2509 ASSERT_HAS_NO_SUBSTRING_RANGES
2510 (test, tok->src_loc, type,
2511 "execution character set != source character set");
2512 }
2513
2514 /* Fetch a big-endian 16-bit value and convert to host endianness. */
2515
2516 static uint16_t
2517 uint16_from_big_endian (const uint16_t *ptr_be_value)
2518 {
2519 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2520 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2521 }
2522
2523 /* Lex a u"" string literal and verify that attempts to read substring
2524 location data from it fail gracefully. */
2525
2526 static void
2527 test_lexer_string_locations_string16 (const line_table_case &case_)
2528 {
2529 /* Digits 0-9.
2530 ....................000000000.11111111112.22222222233333
2531 ....................123456789.01234567890.12345678901234 */
2532 const char *content = " u\"0123456789\" /* non-str */\n";
2533 lexer_test test (case_, content, NULL);
2534
2535 /* Verify that we get the expected token back, with the correct
2536 location information. */
2537 const cpp_token *tok = test.get_token ();
2538 ASSERT_EQ (tok->type, CPP_STRING16);
2539 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2540
2541 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2542 cpp_string dst_string;
2543 const enum cpp_ttype type = CPP_STRING16;
2544 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2545 &dst_string, type);
2546 ASSERT_TRUE (result);
2547
2548 /* The cpp_reader defaults to big-endian, so dst_string should
2549 now be encoded as UTF-16BE. */
2550 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2551 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2552 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2553 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2554 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2555 free (const_cast <unsigned char *> (dst_string.text));
2556
2557 /* We don't yet support generating substring location information
2558 for L"" strings. */
2559 ASSERT_HAS_NO_SUBSTRING_RANGES
2560 (test, tok->src_loc, type,
2561 "execution character set != source character set");
2562 }
2563
2564 /* Lex a U"" string literal and verify that attempts to read substring
2565 location data from it fail gracefully. */
2566
2567 static void
2568 test_lexer_string_locations_string32 (const line_table_case &case_)
2569 {
2570 /* Digits 0-9.
2571 ....................000000000.11111111112.22222222233333
2572 ....................123456789.01234567890.12345678901234 */
2573 const char *content = " U\"0123456789\" /* non-str */\n";
2574 lexer_test test (case_, content, NULL);
2575
2576 /* Verify that we get the expected token back, with the correct
2577 location information. */
2578 const cpp_token *tok = test.get_token ();
2579 ASSERT_EQ (tok->type, CPP_STRING32);
2580 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2581
2582 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2583 cpp_string dst_string;
2584 const enum cpp_ttype type = CPP_STRING32;
2585 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2586 &dst_string, type);
2587 ASSERT_TRUE (result);
2588
2589 /* The cpp_reader defaults to big-endian, so dst_string should
2590 now be encoded as UTF-32BE. */
2591 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2592 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2593 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2594 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2595 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2596 free (const_cast <unsigned char *> (dst_string.text));
2597
2598 /* We don't yet support generating substring location information
2599 for L"" strings. */
2600 ASSERT_HAS_NO_SUBSTRING_RANGES
2601 (test, tok->src_loc, type,
2602 "execution character set != source character set");
2603 }
2604
2605 /* Lex a u8-string literal.
2606 Verify the substring location data after running cpp_interpret_string
2607 on it. */
2608
2609 static void
2610 test_lexer_string_locations_u8 (const line_table_case &case_)
2611 {
2612 /* Digits 0-9.
2613 ....................000000000.11111111112.22222222233333
2614 ....................123456789.01234567890.12345678901234 */
2615 const char *content = " u8\"0123456789\" /* non-str */\n";
2616 lexer_test test (case_, content, NULL);
2617
2618 /* Verify that we get the expected token back, with the correct
2619 location information. */
2620 const cpp_token *tok = test.get_token ();
2621 ASSERT_EQ (tok->type, CPP_UTF8STRING);
2622 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2623
2624 /* Verify that cpp_interpret_string works. */
2625 cpp_string dst_string;
2626 const enum cpp_ttype type = CPP_STRING;
2627 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2628 &dst_string, type);
2629 ASSERT_TRUE (result);
2630 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2631 free (const_cast <unsigned char *> (dst_string.text));
2632
2633 /* Verify ranges of individual characters. This no longer includes the
2634 quotes. */
2635 for (int i = 0; i <= 9; i++)
2636 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2637 }
2638
2639 /* Lex a string literal containing UTF-8 source characters.
2640 Verify the substring location data after running cpp_interpret_string
2641 on it. */
2642
2643 static void
2644 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2645 {
2646 /* This string literal is written out to the source file as UTF-8,
2647 and is of the form "before mojibake after", where "mojibake"
2648 is written as the following four unicode code points:
2649 U+6587 CJK UNIFIED IDEOGRAPH-6587
2650 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2651 U+5316 CJK UNIFIED IDEOGRAPH-5316
2652 U+3051 HIRAGANA LETTER KE.
2653 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2654 "before" and "after" are 1 byte per unicode character.
2655
2656 The numbering shown are "columns", which are *byte* numbers within
2657 the line, rather than unicode character numbers.
2658
2659 .................... 000000000.1111111.
2660 .................... 123456789.0123456. */
2661 const char *content = (" \"before "
2662 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2663 UTF-8: 0xE6 0x96 0x87
2664 C octal escaped UTF-8: \346\226\207
2665 "column" numbers: 17-19. */
2666 "\346\226\207"
2667
2668 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2669 UTF-8: 0xE5 0xAD 0x97
2670 C octal escaped UTF-8: \345\255\227
2671 "column" numbers: 20-22. */
2672 "\345\255\227"
2673
2674 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2675 UTF-8: 0xE5 0x8C 0x96
2676 C octal escaped UTF-8: \345\214\226
2677 "column" numbers: 23-25. */
2678 "\345\214\226"
2679
2680 /* U+3051 HIRAGANA LETTER KE
2681 UTF-8: 0xE3 0x81 0x91
2682 C octal escaped UTF-8: \343\201\221
2683 "column" numbers: 26-28. */
2684 "\343\201\221"
2685
2686 /* column numbers 29 onwards
2687 2333333.33334444444444
2688 9012345.67890123456789. */
2689 " after\" /* non-str */\n");
2690 lexer_test test (case_, content, NULL);
2691
2692 /* Verify that we get the expected token back, with the correct
2693 location information. */
2694 const cpp_token *tok = test.get_token ();
2695 ASSERT_EQ (tok->type, CPP_STRING);
2696 ASSERT_TOKEN_AS_TEXT_EQ
2697 (test.m_parser, tok,
2698 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2699
2700 /* Verify that cpp_interpret_string works. */
2701 cpp_string dst_string;
2702 const enum cpp_ttype type = CPP_STRING;
2703 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2704 &dst_string, type);
2705 ASSERT_TRUE (result);
2706 ASSERT_STREQ
2707 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2708 (const char *)dst_string.text);
2709 free (const_cast <unsigned char *> (dst_string.text));
2710
2711 /* Verify ranges of individual characters. This no longer includes the
2712 quotes.
2713 Assuming that both source and execution encodings are UTF-8, we have
2714 a run of 25 octets in each. */
2715 for (int i = 0; i < 25; i++)
2716 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2717
2718 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 25);
2719 }
2720
2721 /* Test of string literal concatenation. */
2722
2723 static void
2724 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
2725 {
2726 /* Digits 0-9.
2727 .....................000000000.111111.11112222222222
2728 .....................123456789.012345.67890123456789. */
2729 const char *content = (" \"01234\" /* non-str */\n"
2730 " \"56789\" /* non-str */\n");
2731 lexer_test test (case_, content, NULL);
2732
2733 location_t input_locs[2];
2734
2735 /* Verify that we get the expected tokens back. */
2736 auto_vec <cpp_string> input_strings;
2737 const cpp_token *tok_a = test.get_token ();
2738 ASSERT_EQ (tok_a->type, CPP_STRING);
2739 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
2740 input_strings.safe_push (tok_a->val.str);
2741 input_locs[0] = tok_a->src_loc;
2742
2743 const cpp_token *tok_b = test.get_token ();
2744 ASSERT_EQ (tok_b->type, CPP_STRING);
2745 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
2746 input_strings.safe_push (tok_b->val.str);
2747 input_locs[1] = tok_b->src_loc;
2748
2749 /* Verify that cpp_interpret_string works. */
2750 cpp_string dst_string;
2751 const enum cpp_ttype type = CPP_STRING;
2752 bool result = cpp_interpret_string (test.m_parser,
2753 input_strings.address (), 2,
2754 &dst_string, type);
2755 ASSERT_TRUE (result);
2756 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2757 free (const_cast <unsigned char *> (dst_string.text));
2758
2759 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2760 test.m_concats.record_string_concatenation (2, input_locs);
2761
2762 location_t initial_loc = input_locs[0];
2763
2764 for (int i = 0; i <= 4; i++)
2765 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2766 for (int i = 5; i <= 9; i++)
2767 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
2768
2769 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2770 }
2771
2772 /* Another test of string literal concatenation. */
2773
2774 static void
2775 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
2776 {
2777 /* Digits 0-9.
2778 .....................000000000.111.11111112222222
2779 .....................123456789.012.34567890123456. */
2780 const char *content = (" \"01\" /* non-str */\n"
2781 " \"23\" /* non-str */\n"
2782 " \"45\" /* non-str */\n"
2783 " \"67\" /* non-str */\n"
2784 " \"89\" /* non-str */\n");
2785 lexer_test test (case_, content, NULL);
2786
2787 auto_vec <cpp_string> input_strings;
2788 location_t input_locs[5];
2789
2790 /* Verify that we get the expected tokens back. */
2791 for (int i = 0; i < 5; i++)
2792 {
2793 const cpp_token *tok = test.get_token ();
2794 ASSERT_EQ (tok->type, CPP_STRING);
2795 input_strings.safe_push (tok->val.str);
2796 input_locs[i] = tok->src_loc;
2797 }
2798
2799 /* Verify that cpp_interpret_string works. */
2800 cpp_string dst_string;
2801 const enum cpp_ttype type = CPP_STRING;
2802 bool result = cpp_interpret_string (test.m_parser,
2803 input_strings.address (), 5,
2804 &dst_string, type);
2805 ASSERT_TRUE (result);
2806 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2807 free (const_cast <unsigned char *> (dst_string.text));
2808
2809 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2810 test.m_concats.record_string_concatenation (5, input_locs);
2811
2812 location_t initial_loc = input_locs[0];
2813
2814 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
2815 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
2816 and expect get_source_range_for_substring to fail.
2817 However, for a string concatenation test, we can have a case
2818 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
2819 but subsequent strings can be after it.
2820 Attempting to detect this within assert_char_at_range
2821 would overcomplicate the logic for the common test cases, so
2822 we detect it here. */
2823 if (should_have_column_data_p (input_locs[0])
2824 && !should_have_column_data_p (input_locs[4]))
2825 {
2826 /* Verify that get_source_range_for_substring gracefully rejects
2827 this case. */
2828 source_range actual_range;
2829 const char *err
2830 = get_source_range_for_substring (test.m_parser, &test.m_concats,
2831 initial_loc, type, 0, 0,
2832 &actual_range);
2833 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
2834 return;
2835 }
2836
2837 for (int i = 0; i < 5; i++)
2838 for (int j = 0; j < 2; j++)
2839 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
2840 i + 1, 10 + j, 10 + j);
2841
2842 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2843 }
2844
2845 /* Another test of string literal concatenation, this time combined with
2846 various kinds of escaped characters. */
2847
2848 static void
2849 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
2850 {
2851 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
2852 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
2853 const char *content
2854 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
2855 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
2856 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
2857 lexer_test test (case_, content, NULL);
2858
2859 auto_vec <cpp_string> input_strings;
2860 location_t input_locs[4];
2861
2862 /* Verify that we get the expected tokens back. */
2863 for (int i = 0; i < 4; i++)
2864 {
2865 const cpp_token *tok = test.get_token ();
2866 ASSERT_EQ (tok->type, CPP_STRING);
2867 input_strings.safe_push (tok->val.str);
2868 input_locs[i] = tok->src_loc;
2869 }
2870
2871 /* Verify that cpp_interpret_string works. */
2872 cpp_string dst_string;
2873 const enum cpp_ttype type = CPP_STRING;
2874 bool result = cpp_interpret_string (test.m_parser,
2875 input_strings.address (), 4,
2876 &dst_string, type);
2877 ASSERT_TRUE (result);
2878 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2879 free (const_cast <unsigned char *> (dst_string.text));
2880
2881 /* Simulate c-lex.c's lex_string in order to record concatenation. */
2882 test.m_concats.record_string_concatenation (4, input_locs);
2883
2884 location_t initial_loc = input_locs[0];
2885
2886 for (int i = 0; i <= 4; i++)
2887 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2888 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
2889 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
2890 for (int i = 7; i <= 9; i++)
2891 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
2892
2893 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2894 }
2895
2896 /* Test of string literal in a macro. */
2897
2898 static void
2899 test_lexer_string_locations_macro (const line_table_case &case_)
2900 {
2901 /* Digits 0-9.
2902 .....................0000000001111111111.22222222223.
2903 .....................1234567890123456789.01234567890. */
2904 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
2905 " MACRO");
2906 lexer_test test (case_, content, NULL);
2907
2908 /* Verify that we get the expected tokens back. */
2909 const cpp_token *tok = test.get_token ();
2910 ASSERT_EQ (tok->type, CPP_PADDING);
2911
2912 tok = test.get_token ();
2913 ASSERT_EQ (tok->type, CPP_STRING);
2914 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2915
2916 /* Verify ranges of individual characters. We ought to
2917 see columns within the macro definition. */
2918 for (int i = 0; i <= 9; i++)
2919 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2920 i, 1, 20 + i, 20 + i);
2921
2922 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2923
2924 tok = test.get_token ();
2925 ASSERT_EQ (tok->type, CPP_PADDING);
2926 }
2927
2928 /* Test of stringification of a macro argument. */
2929
2930 static void
2931 test_lexer_string_locations_stringified_macro_argument
2932 (const line_table_case &case_)
2933 {
2934 /* .....................000000000111111111122222222223.
2935 .....................123456789012345678901234567890. */
2936 const char *content = ("#define MACRO(X) #X /* non-str */\n"
2937 "MACRO(foo)\n");
2938 lexer_test test (case_, content, NULL);
2939
2940 /* Verify that we get the expected token back. */
2941 const cpp_token *tok = test.get_token ();
2942 ASSERT_EQ (tok->type, CPP_PADDING);
2943
2944 tok = test.get_token ();
2945 ASSERT_EQ (tok->type, CPP_STRING);
2946 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
2947
2948 /* We don't support getting the location of a stringified macro
2949 argument. Verify that it fails gracefully. */
2950 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
2951 "cpp_interpret_string_1 failed");
2952
2953 tok = test.get_token ();
2954 ASSERT_EQ (tok->type, CPP_PADDING);
2955
2956 tok = test.get_token ();
2957 ASSERT_EQ (tok->type, CPP_PADDING);
2958 }
2959
2960 /* Ensure that we are fail gracefully if something attempts to pass
2961 in a location that isn't a string literal token. Seen on this code:
2962
2963 const char a[] = " %d ";
2964 __builtin_printf (a, 0.5);
2965 ^
2966
2967 when c-format.c erroneously used the indicated one-character
2968 location as the format string location, leading to a read past the
2969 end of a string buffer in cpp_interpret_string_1. */
2970
2971 static void
2972 test_lexer_string_locations_non_string (const line_table_case &case_)
2973 {
2974 /* .....................000000000111111111122222222223.
2975 .....................123456789012345678901234567890. */
2976 const char *content = (" a\n");
2977 lexer_test test (case_, content, NULL);
2978
2979 /* Verify that we get the expected token back. */
2980 const cpp_token *tok = test.get_token ();
2981 ASSERT_EQ (tok->type, CPP_NAME);
2982 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
2983
2984 /* At this point, libcpp is attempting to interpret the name as a
2985 string literal, despite it not starting with a quote. We don't detect
2986 that, but we should at least fail gracefully. */
2987 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
2988 "cpp_interpret_string_1 failed");
2989 }
2990
2991 /* Ensure that we can read substring information for a token which
2992 starts in one linemap and ends in another . Adapted from
2993 gcc.dg/cpp/pr69985.c. */
2994
2995 static void
2996 test_lexer_string_locations_long_line (const line_table_case &case_)
2997 {
2998 /* .....................000000.000111111111
2999 .....................123456.789012346789. */
3000 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3001 " \"0123456789012345678901234567890123456789"
3002 "0123456789012345678901234567890123456789"
3003 "0123456789012345678901234567890123456789"
3004 "0123456789\"\n");
3005
3006 lexer_test test (case_, content, NULL);
3007
3008 /* Verify that we get the expected token back. */
3009 const cpp_token *tok = test.get_token ();
3010 ASSERT_EQ (tok->type, CPP_STRING);
3011
3012 if (!should_have_column_data_p (line_table->highest_location))
3013 return;
3014
3015 /* Verify ranges of individual characters. */
3016 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 130);
3017 for (int i = 0; i < 130; i++)
3018 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3019 i, 2, 7 + i, 7 + i);
3020 }
3021
3022 /* Test of lexing char constants. */
3023
3024 static void
3025 test_lexer_char_constants (const line_table_case &case_)
3026 {
3027 /* Various char constants.
3028 .....................0000000001111111111.22222222223.
3029 .....................1234567890123456789.01234567890. */
3030 const char *content = (" 'a'\n"
3031 " u'a'\n"
3032 " U'a'\n"
3033 " L'a'\n"
3034 " 'abc'\n");
3035 lexer_test test (case_, content, NULL);
3036
3037 /* Verify that we get the expected tokens back. */
3038 /* 'a'. */
3039 const cpp_token *tok = test.get_token ();
3040 ASSERT_EQ (tok->type, CPP_CHAR);
3041 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3042
3043 unsigned int chars_seen;
3044 int unsignedp;
3045 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3046 &chars_seen, &unsignedp);
3047 ASSERT_EQ (cc, 'a');
3048 ASSERT_EQ (chars_seen, 1);
3049
3050 /* u'a'. */
3051 tok = test.get_token ();
3052 ASSERT_EQ (tok->type, CPP_CHAR16);
3053 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3054
3055 /* U'a'. */
3056 tok = test.get_token ();
3057 ASSERT_EQ (tok->type, CPP_CHAR32);
3058 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3059
3060 /* L'a'. */
3061 tok = test.get_token ();
3062 ASSERT_EQ (tok->type, CPP_WCHAR);
3063 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3064
3065 /* 'abc' (c-char-sequence). */
3066 tok = test.get_token ();
3067 ASSERT_EQ (tok->type, CPP_CHAR);
3068 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3069 }
3070 /* A table of interesting location_t values, giving one axis of our test
3071 matrix. */
3072
3073 static const location_t boundary_locations[] = {
3074 /* Zero means "don't override the default values for a new line_table". */
3075 0,
3076
3077 /* An arbitrary non-zero value that isn't close to one of
3078 the boundary values below. */
3079 0x10000,
3080
3081 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3082 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3083 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3084 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3085 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3086 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3087
3088 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3089 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3090 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3091 LINE_MAP_MAX_LOCATION_WITH_COLS,
3092 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3093 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3094 };
3095
3096 /* Run all of the selftests within this file. */
3097
3098 void
3099 input_c_tests ()
3100 {
3101 test_should_have_column_data_p ();
3102 test_unknown_location ();
3103 test_builtins ();
3104
3105 /* As noted above in the description of struct line_table_case,
3106 we want to explore a test matrix of interesting line_table
3107 situations, running various selftests for each case within the
3108 matrix. */
3109
3110 /* Run all tests with:
3111 (a) line_table->default_range_bits == 0, and
3112 (b) line_table->default_range_bits == 5. */
3113 int num_cases_tested = 0;
3114 for (int default_range_bits = 0; default_range_bits <= 5;
3115 default_range_bits += 5)
3116 {
3117 /* ...and use each of the "interesting" location values as
3118 the starting location within line_table. */
3119 const int num_boundary_locations
3120 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3121 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3122 {
3123 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3124
3125 /* Run all tests for the given case within the test matrix. */
3126 test_accessing_ordinary_linemaps (c);
3127 test_lexer (c);
3128 test_lexer_string_locations_simple (c);
3129 test_lexer_string_locations_ebcdic (c);
3130 test_lexer_string_locations_hex (c);
3131 test_lexer_string_locations_oct (c);
3132 test_lexer_string_locations_letter_escape_1 (c);
3133 test_lexer_string_locations_letter_escape_2 (c);
3134 test_lexer_string_locations_ucn4 (c);
3135 test_lexer_string_locations_ucn8 (c);
3136 test_lexer_string_locations_wide_string (c);
3137 test_lexer_string_locations_string16 (c);
3138 test_lexer_string_locations_string32 (c);
3139 test_lexer_string_locations_u8 (c);
3140 test_lexer_string_locations_utf8_source (c);
3141 test_lexer_string_locations_concatenation_1 (c);
3142 test_lexer_string_locations_concatenation_2 (c);
3143 test_lexer_string_locations_concatenation_3 (c);
3144 test_lexer_string_locations_macro (c);
3145 test_lexer_string_locations_stringified_macro_argument (c);
3146 test_lexer_string_locations_non_string (c);
3147 test_lexer_string_locations_long_line (c);
3148 test_lexer_char_constants (c);
3149
3150 num_cases_tested++;
3151 }
3152 }
3153
3154 /* Verify that we fully covered the test matrix. */
3155 ASSERT_EQ (num_cases_tested, 2 * 12);
3156
3157 test_reading_source_line ();
3158 }
3159
3160 } // namespace selftest
3161
3162 #endif /* CHECKING_P */