]>
Commit | Line | Data |
---|---|---|
37ba4887 | 1 | /* Data and functions related to line maps and input files. |
aad93da1 | 2 | Copyright (C) 2004-2017 Free Software Foundation, Inc. |
37ba4887 | 3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "intl.h" | |
28f17529 | 24 | #include "diagnostic-core.h" |
99b4f3a2 | 25 | #include "selftest.h" |
b73690a4 | 26 | #include "cpplib.h" |
ffc2c526 | 27 | |
e2f73ee8 | 28 | #ifndef HAVE_ICONV |
29 | #define HAVE_ICONV 0 | |
30 | #endif | |
31 | ||
ffc2c526 | 32 | /* This is a cache used by get_next_line to store the content of a |
33 | file to be searched for file lines. */ | |
34 | struct fcache | |
35 | { | |
36 | /* These are information used to store a line boundary. */ | |
37 | struct line_info | |
38 | { | |
39 | /* The line number. It starts from 1. */ | |
40 | size_t line_num; | |
41 | ||
42 | /* The position (byte count) of the beginning of the line, | |
43 | relative to the file data pointer. This starts at zero. */ | |
44 | size_t start_pos; | |
45 | ||
46 | /* The position (byte count) of the last byte of the line. This | |
47 | normally points to the '\n' character, or to one byte after the | |
48 | last byte of the file, if the file doesn't contain a '\n' | |
49 | character. */ | |
50 | size_t end_pos; | |
51 | ||
52 | line_info (size_t l, size_t s, size_t e) | |
53 | : line_num (l), start_pos (s), end_pos (e) | |
54 | {} | |
55 | ||
56 | line_info () | |
57 | :line_num (0), start_pos (0), end_pos (0) | |
58 | {} | |
59 | }; | |
60 | ||
61 | /* The number of time this file has been accessed. This is used | |
62 | to designate which file cache to evict from the cache | |
63 | array. */ | |
64 | unsigned use_count; | |
65 | ||
c6a7d9e9 | 66 | /* The file_path is the key for identifying a particular file in |
67 | the cache. | |
68 | For libcpp-using code, the underlying buffer for this field is | |
69 | owned by the corresponding _cpp_file within the cpp_reader. */ | |
ffc2c526 | 70 | const char *file_path; |
71 | ||
72 | FILE *fp; | |
73 | ||
74 | /* This points to the content of the file that we've read so | |
75 | far. */ | |
76 | char *data; | |
77 | ||
78 | /* The size of the DATA array above.*/ | |
79 | size_t size; | |
80 | ||
81 | /* The number of bytes read from the underlying file so far. This | |
82 | must be less (or equal) than SIZE above. */ | |
83 | size_t nb_read; | |
84 | ||
85 | /* The index of the beginning of the current line. */ | |
86 | size_t line_start_idx; | |
87 | ||
88 | /* The number of the previous line read. This starts at 1. Zero | |
89 | means we've read no line so far. */ | |
90 | size_t line_num; | |
91 | ||
92 | /* This is the total number of lines of the current file. At the | |
93 | moment, we try to get this information from the line map | |
94 | subsystem. Note that this is just a hint. When using the C++ | |
95 | front-end, this hint is correct because the input file is then | |
96 | completely tokenized before parsing starts; so the line map knows | |
97 | the number of lines before compilation really starts. For e.g, | |
98 | the C front-end, it can happen that we start emitting diagnostics | |
99 | before the line map has seen the end of the file. */ | |
100 | size_t total_lines; | |
101 | ||
fe066ce3 | 102 | /* Could this file be missing a trailing newline on its final line? |
103 | Initially true (to cope with empty files), set to true/false | |
104 | as each line is read. */ | |
105 | bool missing_trailing_newline; | |
106 | ||
ffc2c526 | 107 | /* This is a record of the beginning and end of the lines we've seen |
108 | while reading the file. This is useful to avoid walking the data | |
109 | from the beginning when we are asked to read a line that is | |
110 | before LINE_START_IDX above. Note that the maximum size of this | |
111 | record is fcache_line_record_size, so that the memory consumption | |
112 | doesn't explode. We thus scale total_lines down to | |
113 | fcache_line_record_size. */ | |
114 | vec<line_info, va_heap> line_record; | |
115 | ||
116 | fcache (); | |
117 | ~fcache (); | |
118 | }; | |
37ba4887 | 119 | |
120 | /* Current position in real source file. */ | |
121 | ||
415309e2 | 122 | location_t input_location = UNKNOWN_LOCATION; |
37ba4887 | 123 | |
124 | struct line_maps *line_table; | |
125 | ||
7ec388ed | 126 | /* A stashed copy of "line_table" for use by selftest::line_table_test. |
127 | This needs to be a global so that it can be a GC root, and thus | |
128 | prevent the stashed copy from being garbage-collected if the GC runs | |
129 | during a line_table_test. */ | |
130 | ||
131 | struct line_maps *saved_line_table; | |
132 | ||
ffc2c526 | 133 | static fcache *fcache_tab; |
134 | static const size_t fcache_tab_size = 16; | |
135 | static const size_t fcache_buffer_size = 4 * 1024; | |
136 | static const size_t fcache_line_record_size = 100; | |
137 | ||
5ebe2143 | 138 | /* Expand the source location LOC into a human readable location. If |
139 | LOC resolves to a builtin location, the file name of the readable | |
39107655 | 140 | location is set to the string "<built-in>". If EXPANSION_POINT_P is |
141 | TRUE and LOC is virtual, then it is resolved to the expansion | |
142 | point of the involved macro. Otherwise, it is resolved to the | |
bd172d61 | 143 | spelling location of the token. |
144 | ||
145 | When resolving to the spelling location of the token, if the | |
146 | resulting location is for a built-in location (that is, it has no | |
147 | associated line/column) in the context of a macro expansion, the | |
148 | returned location is the first one (while unwinding the macro | |
149 | location towards its expansion point) that is in real source | |
150 | code. */ | |
39107655 | 151 | |
152 | static expanded_location | |
153 | expand_location_1 (source_location loc, | |
154 | bool expansion_point_p) | |
37ba4887 | 155 | { |
156 | expanded_location xloc; | |
551e34da | 157 | const line_map_ordinary *map; |
bd172d61 | 158 | enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT; |
5169661d | 159 | tree block = NULL; |
160 | ||
161 | if (IS_ADHOC_LOC (loc)) | |
162 | { | |
163 | block = LOCATION_BLOCK (loc); | |
164 | loc = LOCATION_LOCUS (loc); | |
165 | } | |
bd172d61 | 166 | |
167 | memset (&xloc, 0, sizeof (xloc)); | |
5ebe2143 | 168 | |
bd172d61 | 169 | if (loc >= RESERVED_LOCATION_COUNT) |
170 | { | |
171 | if (!expansion_point_p) | |
172 | { | |
173 | /* We want to resolve LOC to its spelling location. | |
174 | ||
175 | But if that spelling location is a reserved location that | |
176 | appears in the context of a macro expansion (like for a | |
177 | location for a built-in token), let's consider the first | |
178 | location (toward the expansion point) that is not reserved; | |
179 | that is, the first location that is in real source code. */ | |
180 | loc = linemap_unwind_to_first_non_reserved_loc (line_table, | |
551e34da | 181 | loc, NULL); |
bd172d61 | 182 | lrk = LRK_SPELLING_LOCATION; |
183 | } | |
184 | loc = linemap_resolve_location (line_table, loc, | |
185 | lrk, &map); | |
186 | xloc = linemap_expand_location (line_table, map, loc); | |
187 | } | |
5ebe2143 | 188 | |
5169661d | 189 | xloc.data = block; |
37ba4887 | 190 | if (loc <= BUILTINS_LOCATION) |
5ebe2143 | 191 | xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>"); |
192 | ||
37ba4887 | 193 | return xloc; |
194 | } | |
e77b8253 | 195 | |
ffc2c526 | 196 | /* Initialize the set of cache used for files accessed by caret |
197 | diagnostic. */ | |
198 | ||
199 | static void | |
200 | diagnostic_file_cache_init (void) | |
201 | { | |
202 | if (fcache_tab == NULL) | |
203 | fcache_tab = new fcache[fcache_tab_size]; | |
204 | } | |
205 | ||
e7683169 | 206 | /* Free the resources used by the set of cache used for files accessed |
ffc2c526 | 207 | by caret diagnostic. */ |
208 | ||
209 | void | |
210 | diagnostic_file_cache_fini (void) | |
211 | { | |
212 | if (fcache_tab) | |
213 | { | |
214 | delete [] (fcache_tab); | |
215 | fcache_tab = NULL; | |
216 | } | |
217 | } | |
218 | ||
219 | /* Return the total lines number that have been read so far by the | |
220 | line map (in the preprocessor) so far. For languages like C++ that | |
221 | entirely preprocess the input file before starting to parse, this | |
222 | equals the actual number of lines of the file. */ | |
223 | ||
224 | static size_t | |
225 | total_lines_num (const char *file_path) | |
226 | { | |
227 | size_t r = 0; | |
228 | source_location l = 0; | |
229 | if (linemap_get_file_highest_location (line_table, file_path, &l)) | |
230 | { | |
231 | gcc_assert (l >= RESERVED_LOCATION_COUNT); | |
232 | expanded_location xloc = expand_location (l); | |
233 | r = xloc.line; | |
234 | } | |
235 | return r; | |
236 | } | |
237 | ||
238 | /* Lookup the cache used for the content of a given file accessed by | |
239 | caret diagnostic. Return the found cached file, or NULL if no | |
240 | cached file was found. */ | |
241 | ||
242 | static fcache* | |
243 | lookup_file_in_cache_tab (const char *file_path) | |
244 | { | |
245 | if (file_path == NULL) | |
246 | return NULL; | |
247 | ||
248 | diagnostic_file_cache_init (); | |
249 | ||
250 | /* This will contain the found cached file. */ | |
251 | fcache *r = NULL; | |
252 | for (unsigned i = 0; i < fcache_tab_size; ++i) | |
253 | { | |
254 | fcache *c = &fcache_tab[i]; | |
255 | if (c->file_path && !strcmp (c->file_path, file_path)) | |
256 | { | |
257 | ++c->use_count; | |
258 | r = c; | |
259 | } | |
260 | } | |
261 | ||
262 | if (r) | |
263 | ++r->use_count; | |
264 | ||
265 | return r; | |
266 | } | |
267 | ||
a476cb62 | 268 | /* Purge any mention of FILENAME from the cache of files used for |
269 | printing source code. For use in selftests when working | |
270 | with tempfiles. */ | |
271 | ||
272 | void | |
273 | diagnostics_file_cache_forcibly_evict_file (const char *file_path) | |
274 | { | |
275 | gcc_assert (file_path); | |
276 | ||
277 | fcache *r = lookup_file_in_cache_tab (file_path); | |
278 | if (!r) | |
279 | /* Not found. */ | |
280 | return; | |
281 | ||
282 | r->file_path = NULL; | |
283 | if (r->fp) | |
284 | fclose (r->fp); | |
285 | r->fp = NULL; | |
286 | r->nb_read = 0; | |
287 | r->line_start_idx = 0; | |
288 | r->line_num = 0; | |
289 | r->line_record.truncate (0); | |
290 | r->use_count = 0; | |
291 | r->total_lines = 0; | |
fe066ce3 | 292 | r->missing_trailing_newline = true; |
a476cb62 | 293 | } |
294 | ||
ffc2c526 | 295 | /* Return the file cache that has been less used, recently, or the |
296 | first empty one. If HIGHEST_USE_COUNT is non-null, | |
297 | *HIGHEST_USE_COUNT is set to the highest use count of the entries | |
298 | in the cache table. */ | |
299 | ||
300 | static fcache* | |
301 | evicted_cache_tab_entry (unsigned *highest_use_count) | |
302 | { | |
303 | diagnostic_file_cache_init (); | |
304 | ||
305 | fcache *to_evict = &fcache_tab[0]; | |
306 | unsigned huc = to_evict->use_count; | |
307 | for (unsigned i = 1; i < fcache_tab_size; ++i) | |
308 | { | |
309 | fcache *c = &fcache_tab[i]; | |
310 | bool c_is_empty = (c->file_path == NULL); | |
311 | ||
312 | if (c->use_count < to_evict->use_count | |
313 | || (to_evict->file_path && c_is_empty)) | |
314 | /* We evict C because it's either an entry with a lower use | |
315 | count or one that is empty. */ | |
316 | to_evict = c; | |
317 | ||
318 | if (huc < c->use_count) | |
319 | huc = c->use_count; | |
320 | ||
321 | if (c_is_empty) | |
322 | /* We've reached the end of the cache; subsequent elements are | |
323 | all empty. */ | |
324 | break; | |
325 | } | |
326 | ||
327 | if (highest_use_count) | |
328 | *highest_use_count = huc; | |
329 | ||
330 | return to_evict; | |
331 | } | |
332 | ||
333 | /* Create the cache used for the content of a given file to be | |
334 | accessed by caret diagnostic. This cache is added to an array of | |
335 | cache and can be retrieved by lookup_file_in_cache_tab. This | |
336 | function returns the created cache. Note that only the last | |
337 | fcache_tab_size files are cached. */ | |
338 | ||
339 | static fcache* | |
340 | add_file_to_cache_tab (const char *file_path) | |
341 | { | |
342 | ||
343 | FILE *fp = fopen (file_path, "r"); | |
c1cc4419 | 344 | if (fp == NULL) |
345 | return NULL; | |
ffc2c526 | 346 | |
347 | unsigned highest_use_count = 0; | |
348 | fcache *r = evicted_cache_tab_entry (&highest_use_count); | |
349 | r->file_path = file_path; | |
350 | if (r->fp) | |
351 | fclose (r->fp); | |
352 | r->fp = fp; | |
353 | r->nb_read = 0; | |
354 | r->line_start_idx = 0; | |
355 | r->line_num = 0; | |
356 | r->line_record.truncate (0); | |
357 | /* Ensure that this cache entry doesn't get evicted next time | |
358 | add_file_to_cache_tab is called. */ | |
359 | r->use_count = ++highest_use_count; | |
360 | r->total_lines = total_lines_num (file_path); | |
fe066ce3 | 361 | r->missing_trailing_newline = true; |
ffc2c526 | 362 | |
363 | return r; | |
364 | } | |
365 | ||
366 | /* Lookup the cache used for the content of a given file accessed by | |
367 | caret diagnostic. If no cached file was found, create a new cache | |
368 | for this file, add it to the array of cached file and return | |
369 | it. */ | |
370 | ||
371 | static fcache* | |
372 | lookup_or_add_file_to_cache_tab (const char *file_path) | |
373 | { | |
374 | fcache *r = lookup_file_in_cache_tab (file_path); | |
375 | if (r == NULL) | |
376 | r = add_file_to_cache_tab (file_path); | |
377 | return r; | |
378 | } | |
379 | ||
380 | /* Default constructor for a cache of file used by caret | |
381 | diagnostic. */ | |
382 | ||
383 | fcache::fcache () | |
384 | : use_count (0), file_path (NULL), fp (NULL), data (0), | |
385 | size (0), nb_read (0), line_start_idx (0), line_num (0), | |
fe066ce3 | 386 | total_lines (0), missing_trailing_newline (true) |
ffc2c526 | 387 | { |
388 | line_record.create (0); | |
389 | } | |
390 | ||
391 | /* Destructor for a cache of file used by caret diagnostic. */ | |
392 | ||
393 | fcache::~fcache () | |
394 | { | |
395 | if (fp) | |
396 | { | |
397 | fclose (fp); | |
398 | fp = NULL; | |
399 | } | |
400 | if (data) | |
401 | { | |
402 | XDELETEVEC (data); | |
403 | data = 0; | |
404 | } | |
405 | line_record.release (); | |
406 | } | |
407 | ||
408 | /* Returns TRUE iff the cache would need to be filled with data coming | |
409 | from the file. That is, either the cache is empty or full or the | |
410 | current line is empty. Note that if the cache is full, it would | |
411 | need to be extended and filled again. */ | |
412 | ||
413 | static bool | |
414 | needs_read (fcache *c) | |
415 | { | |
416 | return (c->nb_read == 0 | |
417 | || c->nb_read == c->size | |
418 | || (c->line_start_idx >= c->nb_read - 1)); | |
419 | } | |
420 | ||
421 | /* Return TRUE iff the cache is full and thus needs to be | |
422 | extended. */ | |
423 | ||
424 | static bool | |
425 | needs_grow (fcache *c) | |
426 | { | |
427 | return c->nb_read == c->size; | |
428 | } | |
429 | ||
430 | /* Grow the cache if it needs to be extended. */ | |
431 | ||
432 | static void | |
433 | maybe_grow (fcache *c) | |
5a983084 | 434 | { |
ffc2c526 | 435 | if (!needs_grow (c)) |
436 | return; | |
437 | ||
438 | size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2; | |
2e24ac9b | 439 | c->data = XRESIZEVEC (char, c->data, size); |
ffc2c526 | 440 | c->size = size; |
441 | } | |
5a983084 | 442 | |
ffc2c526 | 443 | /* Read more data into the cache. Extends the cache if need be. |
444 | Returns TRUE iff new data could be read. */ | |
445 | ||
446 | static bool | |
447 | read_data (fcache *c) | |
448 | { | |
449 | if (feof (c->fp) || ferror (c->fp)) | |
450 | return false; | |
451 | ||
452 | maybe_grow (c); | |
453 | ||
454 | char * from = c->data + c->nb_read; | |
455 | size_t to_read = c->size - c->nb_read; | |
456 | size_t nb_read = fread (from, 1, to_read, c->fp); | |
457 | ||
458 | if (ferror (c->fp)) | |
459 | return false; | |
460 | ||
461 | c->nb_read += nb_read; | |
462 | return !!nb_read; | |
463 | } | |
464 | ||
465 | /* Read new data iff the cache needs to be filled with more data | |
466 | coming from the file FP. Return TRUE iff the cache was filled with | |
467 | mode data. */ | |
468 | ||
469 | static bool | |
470 | maybe_read_data (fcache *c) | |
471 | { | |
472 | if (!needs_read (c)) | |
473 | return false; | |
474 | return read_data (c); | |
475 | } | |
476 | ||
477 | /* Read a new line from file FP, using C as a cache for the data | |
478 | coming from the file. Upon successful completion, *LINE is set to | |
2e24ac9b | 479 | the beginning of the line found. *LINE points directly in the |
480 | line cache and is only valid until the next call of get_next_line. | |
ffc2c526 | 481 | *LINE_LEN is set to the length of the line. Note that the line |
482 | does not contain any terminal delimiter. This function returns | |
483 | true if some data was read or process from the cache, false | |
2e24ac9b | 484 | otherwise. Note that subsequent calls to get_next_line might |
485 | make the content of *LINE invalid. */ | |
ffc2c526 | 486 | |
487 | static bool | |
488 | get_next_line (fcache *c, char **line, ssize_t *line_len) | |
489 | { | |
490 | /* Fill the cache with data to process. */ | |
491 | maybe_read_data (c); | |
492 | ||
493 | size_t remaining_size = c->nb_read - c->line_start_idx; | |
494 | if (remaining_size == 0) | |
495 | /* There is no more data to process. */ | |
496 | return false; | |
497 | ||
498 | char *line_start = c->data + c->line_start_idx; | |
499 | ||
500 | char *next_line_start = NULL; | |
501 | size_t len = 0; | |
502 | char *line_end = (char *) memchr (line_start, '\n', remaining_size); | |
503 | if (line_end == NULL) | |
5a983084 | 504 | { |
ffc2c526 | 505 | /* We haven't found the end-of-line delimiter in the cache. |
506 | Fill the cache with more data from the file and look for the | |
507 | '\n'. */ | |
508 | while (maybe_read_data (c)) | |
509 | { | |
510 | line_start = c->data + c->line_start_idx; | |
511 | remaining_size = c->nb_read - c->line_start_idx; | |
512 | line_end = (char *) memchr (line_start, '\n', remaining_size); | |
513 | if (line_end != NULL) | |
514 | { | |
515 | next_line_start = line_end + 1; | |
516 | break; | |
517 | } | |
518 | } | |
519 | if (line_end == NULL) | |
fe066ce3 | 520 | { |
521 | /* We've loadded all the file into the cache and still no | |
522 | '\n'. Let's say the line ends up at one byte passed the | |
523 | end of the file. This is to stay consistent with the case | |
524 | of when the line ends up with a '\n' and line_end points to | |
525 | that terminal '\n'. That consistency is useful below in | |
526 | the len calculation. */ | |
527 | line_end = c->data + c->nb_read ; | |
528 | c->missing_trailing_newline = true; | |
529 | } | |
530 | else | |
531 | c->missing_trailing_newline = false; | |
5a983084 | 532 | } |
ffc2c526 | 533 | else |
fe066ce3 | 534 | { |
535 | next_line_start = line_end + 1; | |
536 | c->missing_trailing_newline = false; | |
537 | } | |
ffc2c526 | 538 | |
539 | if (ferror (c->fp)) | |
2e24ac9b | 540 | return false; |
ffc2c526 | 541 | |
542 | /* At this point, we've found the end of the of line. It either | |
543 | points to the '\n' or to one byte after the last byte of the | |
544 | file. */ | |
545 | gcc_assert (line_end != NULL); | |
5a983084 | 546 | |
ffc2c526 | 547 | len = line_end - line_start; |
548 | ||
549 | if (c->line_start_idx < c->nb_read) | |
550 | *line = line_start; | |
551 | ||
552 | ++c->line_num; | |
553 | ||
554 | /* Before we update our line record, make sure the hint about the | |
555 | total number of lines of the file is correct. If it's not, then | |
556 | we give up recording line boundaries from now on. */ | |
557 | bool update_line_record = true; | |
558 | if (c->line_num > c->total_lines) | |
559 | update_line_record = false; | |
560 | ||
561 | /* Now update our line record so that re-reading lines from the | |
562 | before c->line_start_idx is faster. */ | |
563 | if (update_line_record | |
564 | && c->line_record.length () < fcache_line_record_size) | |
565 | { | |
566 | /* If the file lines fits in the line record, we just record all | |
567 | its lines ...*/ | |
568 | if (c->total_lines <= fcache_line_record_size | |
569 | && c->line_num > c->line_record.length ()) | |
570 | c->line_record.safe_push (fcache::line_info (c->line_num, | |
571 | c->line_start_idx, | |
572 | line_end - c->data)); | |
573 | else if (c->total_lines > fcache_line_record_size) | |
574 | { | |
575 | /* ... otherwise, we just scale total_lines down to | |
576 | (fcache_line_record_size lines. */ | |
577 | size_t n = (c->line_num * fcache_line_record_size) / c->total_lines; | |
578 | if (c->line_record.length () == 0 | |
579 | || n >= c->line_record.length ()) | |
580 | c->line_record.safe_push (fcache::line_info (c->line_num, | |
581 | c->line_start_idx, | |
582 | line_end - c->data)); | |
583 | } | |
584 | } | |
585 | ||
586 | /* Update c->line_start_idx so that it points to the next line to be | |
587 | read. */ | |
588 | if (next_line_start) | |
589 | c->line_start_idx = next_line_start - c->data; | |
590 | else | |
591 | /* We didn't find any terminal '\n'. Let's consider that the end | |
592 | of line is the end of the data in the cache. The next | |
593 | invocation of get_next_line will either read more data from the | |
594 | underlying file or return false early because we've reached the | |
595 | end of the file. */ | |
596 | c->line_start_idx = c->nb_read; | |
597 | ||
598 | *line_len = len; | |
599 | ||
600 | return true; | |
601 | } | |
602 | ||
ffc2c526 | 603 | /* Consume the next bytes coming from the cache (or from its |
604 | underlying file if there are remaining unread bytes in the file) | |
605 | until we reach the next end-of-line (or end-of-file). There is no | |
606 | copying from the cache involved. Return TRUE upon successful | |
607 | completion. */ | |
608 | ||
609 | static bool | |
610 | goto_next_line (fcache *cache) | |
611 | { | |
612 | char *l; | |
613 | ssize_t len; | |
614 | ||
615 | return get_next_line (cache, &l, &len); | |
616 | } | |
617 | ||
618 | /* Read an arbitrary line number LINE_NUM from the file cached in C. | |
2e24ac9b | 619 | If the line was read successfully, *LINE points to the beginning |
620 | of the line in the file cache and *LINE_LEN is the length of the | |
621 | line. *LINE is not nul-terminated, but may contain zero bytes. | |
622 | *LINE is only valid until the next call of read_line_num. | |
ffc2c526 | 623 | This function returns bool if a line was read. */ |
624 | ||
625 | static bool | |
626 | read_line_num (fcache *c, size_t line_num, | |
2e24ac9b | 627 | char **line, ssize_t *line_len) |
ffc2c526 | 628 | { |
629 | gcc_assert (line_num > 0); | |
630 | ||
631 | if (line_num <= c->line_num) | |
fc3eff88 | 632 | { |
ffc2c526 | 633 | /* We've been asked to read lines that are before c->line_num. |
634 | So lets use our line record (if it's not empty) to try to | |
635 | avoid re-reading the file from the beginning again. */ | |
13225ff5 | 636 | |
ffc2c526 | 637 | if (c->line_record.is_empty ()) |
5a983084 | 638 | { |
ffc2c526 | 639 | c->line_start_idx = 0; |
640 | c->line_num = 0; | |
641 | } | |
642 | else | |
643 | { | |
644 | fcache::line_info *i = NULL; | |
645 | if (c->total_lines <= fcache_line_record_size) | |
646 | { | |
647 | /* In languages where the input file is not totally | |
648 | preprocessed up front, the c->total_lines hint | |
649 | can be smaller than the number of lines of the | |
650 | file. In that case, only the first | |
651 | c->total_lines have been recorded. | |
652 | ||
653 | Otherwise, the first c->total_lines we've read have | |
654 | their start/end recorded here. */ | |
655 | i = (line_num <= c->total_lines) | |
656 | ? &c->line_record[line_num - 1] | |
657 | : &c->line_record[c->total_lines - 1]; | |
658 | gcc_assert (i->line_num <= line_num); | |
659 | } | |
660 | else | |
661 | { | |
662 | /* So the file had more lines than our line record | |
663 | size. Thus the number of lines we've recorded has | |
664 | been scaled down to fcache_line_reacord_size. Let's | |
665 | pick the start/end of the recorded line that is | |
666 | closest to line_num. */ | |
667 | size_t n = (line_num <= c->total_lines) | |
668 | ? line_num * fcache_line_record_size / c->total_lines | |
669 | : c ->line_record.length () - 1; | |
670 | if (n < c->line_record.length ()) | |
671 | { | |
672 | i = &c->line_record[n]; | |
673 | gcc_assert (i->line_num <= line_num); | |
674 | } | |
675 | } | |
676 | ||
677 | if (i && i->line_num == line_num) | |
678 | { | |
2e24ac9b | 679 | /* We have the start/end of the line. */ |
680 | *line = c->data + i->start_pos; | |
681 | *line_len = i->end_pos - i->start_pos; | |
ffc2c526 | 682 | return true; |
683 | } | |
684 | ||
685 | if (i) | |
686 | { | |
687 | c->line_start_idx = i->start_pos; | |
688 | c->line_num = i->line_num - 1; | |
689 | } | |
690 | else | |
691 | { | |
692 | c->line_start_idx = 0; | |
693 | c->line_num = 0; | |
694 | } | |
5a983084 | 695 | } |
5a983084 | 696 | } |
ffc2c526 | 697 | |
698 | /* Let's walk from line c->line_num up to line_num - 1, without | |
699 | copying any line. */ | |
700 | while (c->line_num < line_num - 1) | |
701 | if (!goto_next_line (c)) | |
702 | return false; | |
703 | ||
704 | /* The line we want is the next one. Let's read and copy it back to | |
705 | the caller. */ | |
2e24ac9b | 706 | return get_next_line (c, line, line_len); |
5a983084 | 707 | } |
708 | ||
2e24ac9b | 709 | /* Return the physical source line that corresponds to FILE_PATH/LINE. |
710 | The line is not nul-terminated. The returned pointer is only | |
711 | valid until the next call of location_get_source_line. | |
712 | Note that the line can contain several null characters, | |
713 | so LINE_LEN, if non-null, points to the actual length of the line. | |
714 | If the function fails, NULL is returned. */ | |
5a983084 | 715 | |
716 | const char * | |
be812248 | 717 | location_get_source_line (const char *file_path, int line, |
ffc2c526 | 718 | int *line_len) |
5a983084 | 719 | { |
7b645785 | 720 | char *buffer = NULL; |
2e24ac9b | 721 | ssize_t len; |
ffc2c526 | 722 | |
be812248 | 723 | if (line == 0) |
9e8234d0 | 724 | return NULL; |
725 | ||
be812248 | 726 | fcache *c = lookup_or_add_file_to_cache_tab (file_path); |
9e8234d0 | 727 | if (c == NULL) |
728 | return NULL; | |
729 | ||
be812248 | 730 | bool read = read_line_num (c, line, &buffer, &len); |
5a983084 | 731 | |
ffc2c526 | 732 | if (read && line_len) |
733 | *line_len = len; | |
5a983084 | 734 | |
ffc2c526 | 735 | return read ? buffer : NULL; |
5a983084 | 736 | } |
737 | ||
fe066ce3 | 738 | /* Determine if FILE_PATH missing a trailing newline on its final line. |
739 | Only valid to call once all of the file has been loaded, by | |
740 | requesting a line number beyond the end of the file. */ | |
741 | ||
742 | bool | |
743 | location_missing_trailing_newline (const char *file_path) | |
744 | { | |
745 | fcache *c = lookup_or_add_file_to_cache_tab (file_path); | |
746 | if (c == NULL) | |
747 | return false; | |
748 | ||
749 | return c->missing_trailing_newline; | |
750 | } | |
751 | ||
a4cfdfed | 752 | /* Test if the location originates from the spelling location of a |
753 | builtin-tokens. That is, return TRUE if LOC is a (possibly | |
754 | virtual) location of a built-in token that appears in the expansion | |
755 | list of a macro. Please note that this function also works on | |
756 | tokens that result from built-in tokens. For instance, the | |
757 | function would return true if passed a token "4" that is the result | |
758 | of the expansion of the built-in __LINE__ macro. */ | |
759 | bool | |
760 | is_location_from_builtin_token (source_location loc) | |
761 | { | |
551e34da | 762 | const line_map_ordinary *map = NULL; |
a4cfdfed | 763 | loc = linemap_resolve_location (line_table, loc, |
764 | LRK_SPELLING_LOCATION, &map); | |
765 | return loc == BUILTINS_LOCATION; | |
766 | } | |
767 | ||
39107655 | 768 | /* Expand the source location LOC into a human readable location. If |
769 | LOC is virtual, it resolves to the expansion point of the involved | |
770 | macro. If LOC resolves to a builtin location, the file name of the | |
771 | readable location is set to the string "<built-in>". */ | |
772 | ||
773 | expanded_location | |
774 | expand_location (source_location loc) | |
775 | { | |
776 | return expand_location_1 (loc, /*expansion_point_p=*/true); | |
777 | } | |
778 | ||
779 | /* Expand the source location LOC into a human readable location. If | |
780 | LOC is virtual, it resolves to the expansion location of the | |
781 | relevant macro. If LOC resolves to a builtin location, the file | |
782 | name of the readable location is set to the string | |
783 | "<built-in>". */ | |
784 | ||
785 | expanded_location | |
786 | expand_location_to_spelling_point (source_location loc) | |
787 | { | |
fb2edec0 | 788 | return expand_location_1 (loc, /*expansion_point_p=*/false); |
39107655 | 789 | } |
790 | ||
f0479000 | 791 | /* The rich_location class within libcpp requires a way to expand |
792 | source_location instances, and relies on the client code | |
793 | providing a symbol named | |
794 | linemap_client_expand_location_to_spelling_point | |
795 | to do this. | |
796 | ||
797 | This is the implementation for libcommon.a (all host binaries), | |
798 | which simply calls into expand_location_to_spelling_point. */ | |
799 | ||
800 | expanded_location | |
801 | linemap_client_expand_location_to_spelling_point (source_location loc) | |
802 | { | |
803 | return expand_location_to_spelling_point (loc); | |
804 | } | |
805 | ||
806 | ||
fb2edec0 | 807 | /* If LOCATION is in a system header and if it is a virtual location for |
808 | a token coming from the expansion of a macro, unwind it to the | |
809 | location of the expansion point of the macro. Otherwise, just return | |
db30b351 | 810 | LOCATION. |
811 | ||
812 | This is used for instance when we want to emit diagnostics about a | |
fb2edec0 | 813 | token that may be located in a macro that is itself defined in a |
814 | system header, for example, for the NULL macro. In such a case, if | |
815 | LOCATION were passed directly to diagnostic functions such as | |
816 | warning_at, the diagnostic would be suppressed (unless | |
817 | -Wsystem-headers). */ | |
db30b351 | 818 | |
819 | source_location | |
820 | expansion_point_location_if_in_system_header (source_location location) | |
821 | { | |
822 | if (in_system_header_at (location)) | |
823 | location = linemap_resolve_location (line_table, location, | |
824 | LRK_MACRO_EXPANSION_POINT, | |
825 | NULL); | |
826 | return location; | |
827 | } | |
39107655 | 828 | |
5d4db8ef | 829 | /* If LOCATION is a virtual location for a token coming from the expansion |
830 | of a macro, unwind to the location of the expansion point of the macro. */ | |
831 | ||
832 | source_location | |
833 | expansion_point_location (source_location location) | |
834 | { | |
835 | return linemap_resolve_location (line_table, location, | |
836 | LRK_MACRO_EXPANSION_POINT, NULL); | |
837 | } | |
838 | ||
f17776ff | 839 | /* Construct a location with caret at CARET, ranging from START to |
840 | finish e.g. | |
841 | ||
842 | 11111111112 | |
843 | 12345678901234567890 | |
844 | 522 | |
845 | 523 return foo + bar; | |
846 | ~~~~^~~~~ | |
847 | 524 | |
848 | ||
849 | The location's caret is at the "+", line 523 column 15, but starts | |
850 | earlier, at the "f" of "foo" at column 11. The finish is at the "r" | |
851 | of "bar" at column 19. */ | |
852 | ||
853 | location_t | |
854 | make_location (location_t caret, location_t start, location_t finish) | |
855 | { | |
856 | location_t pure_loc = get_pure_location (caret); | |
857 | source_range src_range; | |
aca2a315 | 858 | src_range.m_start = get_start (start); |
859 | src_range.m_finish = get_finish (finish); | |
f17776ff | 860 | location_t combined_loc = COMBINE_LOCATION_DATA (line_table, |
861 | pure_loc, | |
862 | src_range, | |
863 | NULL); | |
864 | return combined_loc; | |
865 | } | |
866 | ||
e77b8253 | 867 | #define ONE_K 1024 |
868 | #define ONE_M (ONE_K * ONE_K) | |
869 | ||
870 | /* Display a number as an integer multiple of either: | |
871 | - 1024, if said integer is >= to 10 K (in base 2) | |
872 | - 1024 * 1024, if said integer is >= 10 M in (base 2) | |
873 | */ | |
874 | #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \ | |
875 | ? (x) \ | |
876 | : ((x) < 10 * ONE_M \ | |
877 | ? (x) / ONE_K \ | |
878 | : (x) / ONE_M))) | |
879 | ||
880 | /* For a given integer, display either: | |
881 | - the character 'k', if the number is higher than 10 K (in base 2) | |
882 | but strictly lower than 10 M (in base 2) | |
883 | - the character 'M' if the number is higher than 10 M (in base2) | |
884 | - the charcter ' ' if the number is strictly lower than 10 K */ | |
885 | #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M')) | |
886 | ||
887 | /* Display an integer amount as multiple of 1K or 1M (in base 2). | |
2fbe7a32 | 888 | Display the correct unit (either k, M, or ' ') after the amount, as |
e77b8253 | 889 | well. */ |
890 | #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size) | |
891 | ||
892 | /* Dump statistics to stderr about the memory usage of the line_table | |
893 | set of line maps. This also displays some statistics about macro | |
894 | expansion. */ | |
895 | ||
896 | void | |
897 | dump_line_table_statistics (void) | |
898 | { | |
899 | struct linemap_stats s; | |
2a688977 | 900 | long total_used_map_size, |
e77b8253 | 901 | macro_maps_size, |
902 | total_allocated_map_size; | |
903 | ||
904 | memset (&s, 0, sizeof (s)); | |
905 | ||
906 | linemap_get_statistics (line_table, &s); | |
907 | ||
908 | macro_maps_size = s.macro_maps_used_size | |
909 | + s.macro_maps_locations_size; | |
910 | ||
911 | total_allocated_map_size = s.ordinary_maps_allocated_size | |
912 | + s.macro_maps_allocated_size | |
913 | + s.macro_maps_locations_size; | |
914 | ||
915 | total_used_map_size = s.ordinary_maps_used_size | |
916 | + s.macro_maps_used_size | |
917 | + s.macro_maps_locations_size; | |
918 | ||
2a688977 | 919 | fprintf (stderr, "Number of expanded macros: %5ld\n", |
e77b8253 | 920 | s.num_expanded_macros); |
921 | if (s.num_expanded_macros != 0) | |
2a688977 | 922 | fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n", |
e77b8253 | 923 | s.num_macro_tokens / s.num_expanded_macros); |
924 | fprintf (stderr, | |
925 | "\nLine Table allocations during the " | |
926 | "compilation process\n"); | |
2a688977 | 927 | fprintf (stderr, "Number of ordinary maps used: %5ld%c\n", |
e77b8253 | 928 | SCALE (s.num_ordinary_maps_used), |
929 | STAT_LABEL (s.num_ordinary_maps_used)); | |
2a688977 | 930 | fprintf (stderr, "Ordinary map used size: %5ld%c\n", |
e77b8253 | 931 | SCALE (s.ordinary_maps_used_size), |
932 | STAT_LABEL (s.ordinary_maps_used_size)); | |
2a688977 | 933 | fprintf (stderr, "Number of ordinary maps allocated: %5ld%c\n", |
e77b8253 | 934 | SCALE (s.num_ordinary_maps_allocated), |
935 | STAT_LABEL (s.num_ordinary_maps_allocated)); | |
2a688977 | 936 | fprintf (stderr, "Ordinary maps allocated size: %5ld%c\n", |
e77b8253 | 937 | SCALE (s.ordinary_maps_allocated_size), |
938 | STAT_LABEL (s.ordinary_maps_allocated_size)); | |
2a688977 | 939 | fprintf (stderr, "Number of macro maps used: %5ld%c\n", |
e77b8253 | 940 | SCALE (s.num_macro_maps_used), |
941 | STAT_LABEL (s.num_macro_maps_used)); | |
2a688977 | 942 | fprintf (stderr, "Macro maps used size: %5ld%c\n", |
e77b8253 | 943 | SCALE (s.macro_maps_used_size), |
944 | STAT_LABEL (s.macro_maps_used_size)); | |
2a688977 | 945 | fprintf (stderr, "Macro maps locations size: %5ld%c\n", |
e77b8253 | 946 | SCALE (s.macro_maps_locations_size), |
947 | STAT_LABEL (s.macro_maps_locations_size)); | |
2a688977 | 948 | fprintf (stderr, "Macro maps size: %5ld%c\n", |
e77b8253 | 949 | SCALE (macro_maps_size), |
950 | STAT_LABEL (macro_maps_size)); | |
2a688977 | 951 | fprintf (stderr, "Duplicated maps locations size: %5ld%c\n", |
e77b8253 | 952 | SCALE (s.duplicated_macro_maps_locations_size), |
953 | STAT_LABEL (s.duplicated_macro_maps_locations_size)); | |
2a688977 | 954 | fprintf (stderr, "Total allocated maps size: %5ld%c\n", |
e77b8253 | 955 | SCALE (total_allocated_map_size), |
956 | STAT_LABEL (total_allocated_map_size)); | |
2a688977 | 957 | fprintf (stderr, "Total used maps size: %5ld%c\n", |
e77b8253 | 958 | SCALE (total_used_map_size), |
959 | STAT_LABEL (total_used_map_size)); | |
0ffb4474 | 960 | fprintf (stderr, "Ad-hoc table size: %5ld%c\n", |
961 | SCALE (s.adhoc_table_size), | |
962 | STAT_LABEL (s.adhoc_table_size)); | |
963 | fprintf (stderr, "Ad-hoc table entries used: %5ld\n", | |
964 | s.adhoc_table_entries_used); | |
a96cefb2 | 965 | fprintf (stderr, "optimized_ranges: %i\n", |
966 | line_table->num_optimized_ranges); | |
967 | fprintf (stderr, "unoptimized_ranges: %i\n", | |
968 | line_table->num_unoptimized_ranges); | |
0ffb4474 | 969 | |
e77b8253 | 970 | fprintf (stderr, "\n"); |
971 | } | |
28f17529 | 972 | |
973 | /* Get location one beyond the final location in ordinary map IDX. */ | |
974 | ||
975 | static source_location | |
976 | get_end_location (struct line_maps *set, unsigned int idx) | |
977 | { | |
978 | if (idx == LINEMAPS_ORDINARY_USED (set) - 1) | |
979 | return set->highest_location; | |
980 | ||
981 | struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1); | |
982 | return MAP_START_LOCATION (next_map); | |
983 | } | |
984 | ||
985 | /* Helper function for write_digit_row. */ | |
986 | ||
987 | static void | |
988 | write_digit (FILE *stream, int digit) | |
989 | { | |
990 | fputc ('0' + (digit % 10), stream); | |
991 | } | |
992 | ||
993 | /* Helper function for dump_location_info. | |
994 | Write a row of numbers to STREAM, numbering a source line, | |
995 | giving the units, tens, hundreds etc of the column number. */ | |
996 | ||
997 | static void | |
998 | write_digit_row (FILE *stream, int indent, | |
a96cefb2 | 999 | const line_map_ordinary *map, |
28f17529 | 1000 | source_location loc, int max_col, int divisor) |
1001 | { | |
1002 | fprintf (stream, "%*c", indent, ' '); | |
1003 | fprintf (stream, "|"); | |
1004 | for (int column = 1; column < max_col; column++) | |
1005 | { | |
a96cefb2 | 1006 | source_location column_loc = loc + (column << map->m_range_bits); |
28f17529 | 1007 | write_digit (stream, column_loc / divisor); |
1008 | } | |
1009 | fprintf (stream, "\n"); | |
1010 | } | |
1011 | ||
1012 | /* Write a half-closed (START) / half-open (END) interval of | |
1013 | source_location to STREAM. */ | |
1014 | ||
1015 | static void | |
1016 | dump_location_range (FILE *stream, | |
1017 | source_location start, source_location end) | |
1018 | { | |
1019 | fprintf (stream, | |
1020 | " source_location interval: %u <= loc < %u\n", | |
1021 | start, end); | |
1022 | } | |
1023 | ||
1024 | /* Write a labelled description of a half-closed (START) / half-open (END) | |
1025 | interval of source_location to STREAM. */ | |
1026 | ||
1027 | static void | |
1028 | dump_labelled_location_range (FILE *stream, | |
1029 | const char *name, | |
1030 | source_location start, source_location end) | |
1031 | { | |
1032 | fprintf (stream, "%s\n", name); | |
1033 | dump_location_range (stream, start, end); | |
1034 | fprintf (stream, "\n"); | |
1035 | } | |
1036 | ||
1037 | /* Write a visualization of the locations in the line_table to STREAM. */ | |
1038 | ||
1039 | void | |
1040 | dump_location_info (FILE *stream) | |
1041 | { | |
1042 | /* Visualize the reserved locations. */ | |
1043 | dump_labelled_location_range (stream, "RESERVED LOCATIONS", | |
1044 | 0, RESERVED_LOCATION_COUNT); | |
1045 | ||
1046 | /* Visualize the ordinary line_map instances, rendering the sources. */ | |
1047 | for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++) | |
1048 | { | |
1049 | source_location end_location = get_end_location (line_table, idx); | |
1050 | /* half-closed: doesn't include this one. */ | |
1051 | ||
551e34da | 1052 | const line_map_ordinary *map |
1053 | = LINEMAPS_ORDINARY_MAP_AT (line_table, idx); | |
28f17529 | 1054 | fprintf (stream, "ORDINARY MAP: %i\n", idx); |
1055 | dump_location_range (stream, | |
1056 | MAP_START_LOCATION (map), end_location); | |
1057 | fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map)); | |
1058 | fprintf (stream, " starting at line: %i\n", | |
1059 | ORDINARY_MAP_STARTING_LINE_NUMBER (map)); | |
a96cefb2 | 1060 | fprintf (stream, " column and range bits: %i\n", |
1061 | map->m_column_and_range_bits); | |
28f17529 | 1062 | fprintf (stream, " column bits: %i\n", |
a96cefb2 | 1063 | map->m_column_and_range_bits - map->m_range_bits); |
1064 | fprintf (stream, " range bits: %i\n", | |
1065 | map->m_range_bits); | |
28f17529 | 1066 | |
1067 | /* Render the span of source lines that this "map" covers. */ | |
1068 | for (source_location loc = MAP_START_LOCATION (map); | |
1069 | loc < end_location; | |
a96cefb2 | 1070 | loc += (1 << map->m_range_bits) ) |
28f17529 | 1071 | { |
a96cefb2 | 1072 | gcc_assert (pure_location_p (line_table, loc) ); |
1073 | ||
28f17529 | 1074 | expanded_location exploc |
1075 | = linemap_expand_location (line_table, map, loc); | |
1076 | ||
1077 | if (0 == exploc.column) | |
1078 | { | |
1079 | /* Beginning of a new source line: draw the line. */ | |
1080 | ||
1081 | int line_size; | |
be812248 | 1082 | const char *line_text = location_get_source_line (exploc.file, |
1083 | exploc.line, | |
1084 | &line_size); | |
28f17529 | 1085 | if (!line_text) |
1086 | break; | |
1087 | fprintf (stream, | |
1088 | "%s:%3i|loc:%5i|%.*s\n", | |
1089 | exploc.file, exploc.line, | |
1090 | loc, | |
1091 | line_size, line_text); | |
1092 | ||
1093 | /* "loc" is at column 0, which means "the whole line". | |
1094 | Render the locations *within* the line, by underlining | |
1095 | it, showing the source_location numeric values | |
1096 | at each column. */ | |
a96cefb2 | 1097 | int max_col = (1 << map->m_column_and_range_bits) - 1; |
28f17529 | 1098 | if (max_col > line_size) |
1099 | max_col = line_size + 1; | |
1100 | ||
1101 | int indent = 14 + strlen (exploc.file); | |
1102 | ||
1103 | /* Thousands. */ | |
1104 | if (end_location > 999) | |
a96cefb2 | 1105 | write_digit_row (stream, indent, map, loc, max_col, 1000); |
28f17529 | 1106 | |
1107 | /* Hundreds. */ | |
1108 | if (end_location > 99) | |
a96cefb2 | 1109 | write_digit_row (stream, indent, map, loc, max_col, 100); |
28f17529 | 1110 | |
1111 | /* Tens. */ | |
a96cefb2 | 1112 | write_digit_row (stream, indent, map, loc, max_col, 10); |
28f17529 | 1113 | |
1114 | /* Units. */ | |
a96cefb2 | 1115 | write_digit_row (stream, indent, map, loc, max_col, 1); |
28f17529 | 1116 | } |
1117 | } | |
1118 | fprintf (stream, "\n"); | |
1119 | } | |
1120 | ||
1121 | /* Visualize unallocated values. */ | |
1122 | dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS", | |
1123 | line_table->highest_location, | |
1124 | LINEMAPS_MACRO_LOWEST_LOCATION (line_table)); | |
1125 | ||
1126 | /* Visualize the macro line_map instances, rendering the sources. */ | |
1127 | for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++) | |
1128 | { | |
1129 | /* Each macro map that is allocated owns source_location values | |
1130 | that are *lower* that the one before them. | |
1131 | Hence it's meaningful to view them either in order of ascending | |
1132 | source locations, or in order of ascending macro map index. */ | |
1133 | const bool ascending_source_locations = true; | |
1134 | unsigned int idx = (ascending_source_locations | |
1135 | ? (LINEMAPS_MACRO_USED (line_table) - (i + 1)) | |
1136 | : i); | |
551e34da | 1137 | const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx); |
28f17529 | 1138 | fprintf (stream, "MACRO %i: %s (%u tokens)\n", |
1139 | idx, | |
1140 | linemap_map_get_macro_name (map), | |
1141 | MACRO_MAP_NUM_MACRO_TOKENS (map)); | |
1142 | dump_location_range (stream, | |
1143 | map->start_location, | |
1144 | (map->start_location | |
1145 | + MACRO_MAP_NUM_MACRO_TOKENS (map))); | |
1146 | inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map), | |
1147 | "expansion point is location %i", | |
1148 | MACRO_MAP_EXPANSION_POINT_LOCATION (map)); | |
1149 | fprintf (stream, " map->start_location: %u\n", | |
1150 | map->start_location); | |
1151 | ||
1152 | fprintf (stream, " macro_locations:\n"); | |
1153 | for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++) | |
1154 | { | |
1155 | source_location x = MACRO_MAP_LOCATIONS (map)[2 * i]; | |
1156 | source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1]; | |
1157 | ||
1158 | /* linemap_add_macro_token encodes token numbers in an expansion | |
1159 | by putting them after MAP_START_LOCATION. */ | |
1160 | ||
1161 | /* I'm typically seeing 4 uninitialized entries at the end of | |
1162 | 0xafafafaf. | |
1163 | This appears to be due to macro.c:replace_args | |
1164 | adding 2 extra args for padding tokens; presumably there may | |
1165 | be a leading and/or trailing padding token injected, | |
1166 | each for 2 more location slots. | |
1167 | This would explain there being up to 4 source_locations slots | |
1168 | that may be uninitialized. */ | |
1169 | ||
1170 | fprintf (stream, " %u: %u, %u\n", | |
1171 | i, | |
1172 | x, | |
1173 | y); | |
1174 | if (x == y) | |
1175 | { | |
1176 | if (x < MAP_START_LOCATION (map)) | |
1177 | inform (x, "token %u has x-location == y-location == %u", i, x); | |
1178 | else | |
1179 | fprintf (stream, | |
1180 | "x-location == y-location == %u encodes token # %u\n", | |
1181 | x, x - MAP_START_LOCATION (map)); | |
1182 | } | |
1183 | else | |
1184 | { | |
1185 | inform (x, "token %u has x-location == %u", i, x); | |
1186 | inform (x, "token %u has y-location == %u", i, y); | |
1187 | } | |
1188 | } | |
1189 | fprintf (stream, "\n"); | |
1190 | } | |
1191 | ||
1192 | /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a | |
1193 | macro map, presumably due to an off-by-one error somewhere | |
1194 | between the logic in linemap_enter_macro and | |
1195 | LINEMAPS_MACRO_LOWEST_LOCATION. */ | |
1196 | dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION", | |
1197 | MAX_SOURCE_LOCATION, | |
1198 | MAX_SOURCE_LOCATION + 1); | |
1199 | ||
1200 | /* Visualize ad-hoc values. */ | |
1201 | dump_labelled_location_range (stream, "AD-HOC LOCATIONS", | |
1202 | MAX_SOURCE_LOCATION + 1, UINT_MAX); | |
1203 | } | |
99b4f3a2 | 1204 | |
d4166bdc | 1205 | /* string_concat's constructor. */ |
1206 | ||
1207 | string_concat::string_concat (int num, location_t *locs) | |
1208 | : m_num (num) | |
1209 | { | |
1210 | m_locs = ggc_vec_alloc <location_t> (num); | |
1211 | for (int i = 0; i < num; i++) | |
1212 | m_locs[i] = locs[i]; | |
1213 | } | |
1214 | ||
1215 | /* string_concat_db's constructor. */ | |
1216 | ||
1217 | string_concat_db::string_concat_db () | |
1218 | { | |
1219 | m_table = hash_map <location_hash, string_concat *>::create_ggc (64); | |
1220 | } | |
1221 | ||
1222 | /* Record that a string concatenation occurred, covering NUM | |
1223 | string literal tokens. LOCS is an array of size NUM, containing the | |
1224 | locations of the tokens. A copy of LOCS is taken. */ | |
1225 | ||
1226 | void | |
1227 | string_concat_db::record_string_concatenation (int num, location_t *locs) | |
1228 | { | |
1229 | gcc_assert (num > 1); | |
1230 | gcc_assert (locs); | |
1231 | ||
1232 | location_t key_loc = get_key_loc (locs[0]); | |
1233 | ||
1234 | string_concat *concat | |
1235 | = new (ggc_alloc <string_concat> ()) string_concat (num, locs); | |
1236 | m_table->put (key_loc, concat); | |
1237 | } | |
1238 | ||
1239 | /* Determine if LOC was the location of the the initial token of a | |
1240 | concatenation of string literal tokens. | |
1241 | If so, *OUT_NUM is written to with the number of tokens, and | |
1242 | *OUT_LOCS with the location of an array of locations of the | |
1243 | tokens, and return true. *OUT_LOCS is a borrowed pointer to | |
1244 | storage owned by the string_concat_db. | |
1245 | Otherwise, return false. */ | |
1246 | ||
1247 | bool | |
1248 | string_concat_db::get_string_concatenation (location_t loc, | |
1249 | int *out_num, | |
1250 | location_t **out_locs) | |
1251 | { | |
1252 | gcc_assert (out_num); | |
1253 | gcc_assert (out_locs); | |
1254 | ||
1255 | location_t key_loc = get_key_loc (loc); | |
1256 | ||
1257 | string_concat **concat = m_table->get (key_loc); | |
1258 | if (!concat) | |
1259 | return false; | |
1260 | ||
1261 | *out_num = (*concat)->m_num; | |
1262 | *out_locs =(*concat)->m_locs; | |
1263 | return true; | |
1264 | } | |
1265 | ||
1266 | /* Internal function. Canonicalize LOC into a form suitable for | |
1267 | use as a key within the database, stripping away macro expansion, | |
1268 | ad-hoc information, and range information, using the location of | |
1269 | the start of LOC within an ordinary linemap. */ | |
1270 | ||
1271 | location_t | |
1272 | string_concat_db::get_key_loc (location_t loc) | |
1273 | { | |
1274 | loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION, | |
1275 | NULL); | |
1276 | ||
1277 | loc = get_range_from_loc (line_table, loc).m_start; | |
1278 | ||
1279 | return loc; | |
1280 | } | |
1281 | ||
1282 | /* Helper class for use within get_substring_ranges_for_loc. | |
1283 | An vec of cpp_string with responsibility for releasing all of the | |
1284 | str->text for each str in the vector. */ | |
1285 | ||
1286 | class auto_cpp_string_vec : public auto_vec <cpp_string> | |
1287 | { | |
1288 | public: | |
1289 | auto_cpp_string_vec (int alloc) | |
1290 | : auto_vec <cpp_string> (alloc) {} | |
1291 | ||
1292 | ~auto_cpp_string_vec () | |
1293 | { | |
1294 | /* Clean up the copies within this vec. */ | |
1295 | int i; | |
1296 | cpp_string *str; | |
1297 | FOR_EACH_VEC_ELT (*this, i, str) | |
1298 | free (const_cast <unsigned char *> (str->text)); | |
1299 | } | |
1300 | }; | |
1301 | ||
1302 | /* Attempt to populate RANGES with source location information on the | |
1303 | individual characters within the string literal found at STRLOC. | |
1304 | If CONCATS is non-NULL, then any string literals that the token at | |
1305 | STRLOC was concatenated with are also added to RANGES. | |
1306 | ||
1307 | Return NULL if successful, or an error message if any errors occurred (in | |
1308 | which case RANGES may be only partially populated and should not | |
1309 | be used). | |
1310 | ||
1311 | This is implemented by re-parsing the relevant source line(s). */ | |
1312 | ||
1313 | static const char * | |
1314 | get_substring_ranges_for_loc (cpp_reader *pfile, | |
1315 | string_concat_db *concats, | |
1316 | location_t strloc, | |
1317 | enum cpp_ttype type, | |
1318 | cpp_substring_ranges &ranges) | |
1319 | { | |
1320 | gcc_assert (pfile); | |
1321 | ||
1322 | if (strloc == UNKNOWN_LOCATION) | |
1323 | return "unknown location"; | |
1324 | ||
8df44fbf | 1325 | /* Reparsing the strings requires accurate location information. |
1326 | If -ftrack-macro-expansion has been overridden from its default | |
1327 | of 2, then we might have a location of a macro expansion point, | |
1328 | rather than the location of the literal itself. | |
1329 | Avoid this by requiring that we have full macro expansion tracking | |
1330 | for substring locations to be available. */ | |
1331 | if (cpp_get_options (pfile)->track_macro_expansion != 2) | |
1332 | return "track_macro_expansion != 2"; | |
1333 | ||
a4d96eb7 | 1334 | /* If #line or # 44 "file"-style directives are present, then there's |
1335 | no guarantee that the line numbers we have can be used to locate | |
1336 | the strings. For example, we might have a .i file with # directives | |
1337 | pointing back to lines within a .c file, but the .c file might | |
1338 | have been edited since the .i file was created. | |
1339 | In such a case, the safest course is to disable on-demand substring | |
1340 | locations. */ | |
1341 | if (line_table->seen_line_directive) | |
1342 | return "seen line directive"; | |
1343 | ||
d4166bdc | 1344 | /* If string concatenation has occurred at STRLOC, get the locations |
1345 | of all of the literal tokens making up the compound string. | |
1346 | Otherwise, just use STRLOC. */ | |
1347 | int num_locs = 1; | |
1348 | location_t *strlocs = &strloc; | |
1349 | if (concats) | |
1350 | concats->get_string_concatenation (strloc, &num_locs, &strlocs); | |
1351 | ||
1352 | auto_cpp_string_vec strs (num_locs); | |
1353 | auto_vec <cpp_string_location_reader> loc_readers (num_locs); | |
1354 | for (int i = 0; i < num_locs; i++) | |
1355 | { | |
1356 | /* Get range of strloc. We will use it to locate the start and finish | |
1357 | of the literal token within the line. */ | |
1358 | source_range src_range = get_range_from_loc (line_table, strlocs[i]); | |
1359 | ||
1360 | if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table)) | |
1361 | /* If the string is within a macro expansion, we can't get at the | |
1362 | end location. */ | |
1363 | return "macro expansion"; | |
1364 | ||
1365 | if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS) | |
1366 | /* If so, we can't reliably determine where the token started within | |
1367 | its line. */ | |
1368 | return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS"; | |
1369 | ||
1370 | if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS) | |
1371 | /* If so, we can't reliably determine where the token finished within | |
1372 | its line. */ | |
1373 | return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS"; | |
1374 | ||
1375 | expanded_location start | |
1376 | = expand_location_to_spelling_point (src_range.m_start); | |
1377 | expanded_location finish | |
1378 | = expand_location_to_spelling_point (src_range.m_finish); | |
1379 | if (start.file != finish.file) | |
1380 | return "range endpoints are in different files"; | |
1381 | if (start.line != finish.line) | |
1382 | return "range endpoints are on different lines"; | |
1383 | if (start.column > finish.column) | |
1384 | return "range endpoints are reversed"; | |
1385 | ||
1386 | int line_width; | |
1387 | const char *line = location_get_source_line (start.file, start.line, | |
1388 | &line_width); | |
1389 | if (line == NULL) | |
1390 | return "unable to read source line"; | |
1391 | ||
1392 | /* Determine the location of the literal (including quotes | |
1393 | and leading prefix chars, such as the 'u' in a u"" | |
1394 | token). */ | |
1395 | const char *literal = line + start.column - 1; | |
1396 | int literal_length = finish.column - start.column + 1; | |
1397 | ||
44128dbe | 1398 | /* Ensure that we don't crash if we got the wrong location. */ |
1399 | if (line_width < (start.column - 1 + literal_length)) | |
1400 | return "line is not wide enough"; | |
1401 | ||
d4166bdc | 1402 | cpp_string from; |
1403 | from.len = literal_length; | |
1404 | /* Make a copy of the literal, to avoid having to rely on | |
1405 | the lifetime of the copy of the line within the cache. | |
1406 | This will be released by the auto_cpp_string_vec dtor. */ | |
1407 | from.text = XDUPVEC (unsigned char, literal, literal_length); | |
1408 | strs.safe_push (from); | |
1409 | ||
1410 | /* For very long lines, a new linemap could have started | |
1411 | halfway through the token. | |
1412 | Ensure that the loc_reader uses the linemap of the | |
1413 | *end* of the token for its start location. */ | |
1414 | const line_map_ordinary *final_ord_map; | |
1415 | linemap_resolve_location (line_table, src_range.m_finish, | |
1416 | LRK_MACRO_EXPANSION_POINT, &final_ord_map); | |
1417 | location_t start_loc | |
1418 | = linemap_position_for_line_and_column (line_table, final_ord_map, | |
1419 | start.line, start.column); | |
1420 | ||
1421 | cpp_string_location_reader loc_reader (start_loc, line_table); | |
1422 | loc_readers.safe_push (loc_reader); | |
1423 | } | |
1424 | ||
1425 | /* Rerun cpp_interpret_string, or rather, a modified version of it. */ | |
1426 | const char *err = cpp_interpret_string_ranges (pfile, strs.address (), | |
1427 | loc_readers.address (), | |
1428 | num_locs, &ranges, type); | |
1429 | if (err) | |
1430 | return err; | |
1431 | ||
1432 | /* Success: "ranges" should now contain information on the string. */ | |
1433 | return NULL; | |
1434 | } | |
1435 | ||
5927e78e | 1436 | /* Attempt to populate *OUT_LOC with source location information on the |
1437 | given characters within the string literal found at STRLOC. | |
1438 | CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution | |
1439 | character set. | |
1440 | ||
1441 | For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7 | |
1442 | and string literal "012345\n789" | |
1443 | *OUT_LOC is written to with: | |
1444 | "012345\n789" | |
1445 | ~^~~~~ | |
1446 | ||
d4166bdc | 1447 | If CONCATS is non-NULL, then any string literals that the token at |
1448 | STRLOC was concatenated with are also considered. | |
1449 | ||
1450 | This is implemented by re-parsing the relevant source line(s). | |
1451 | ||
1452 | Return NULL if successful, or an error message if any errors occurred. | |
1453 | Error messages are intended for GCC developers (to help debugging) rather | |
1454 | than for end-users. */ | |
1455 | ||
1456 | const char * | |
5927e78e | 1457 | get_source_location_for_substring (cpp_reader *pfile, |
1458 | string_concat_db *concats, | |
1459 | location_t strloc, | |
1460 | enum cpp_ttype type, | |
1461 | int caret_idx, int start_idx, int end_idx, | |
1462 | source_location *out_loc) | |
1463 | { | |
1464 | gcc_checking_assert (caret_idx >= 0); | |
d4166bdc | 1465 | gcc_checking_assert (start_idx >= 0); |
1466 | gcc_checking_assert (end_idx >= 0); | |
5927e78e | 1467 | gcc_assert (out_loc); |
d4166bdc | 1468 | |
1469 | cpp_substring_ranges ranges; | |
1470 | const char *err | |
1471 | = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges); | |
1472 | if (err) | |
1473 | return err; | |
1474 | ||
5927e78e | 1475 | if (caret_idx >= ranges.get_num_ranges ()) |
1476 | return "caret_idx out of range"; | |
d4166bdc | 1477 | if (start_idx >= ranges.get_num_ranges ()) |
1478 | return "start_idx out of range"; | |
1479 | if (end_idx >= ranges.get_num_ranges ()) | |
1480 | return "end_idx out of range"; | |
1481 | ||
5927e78e | 1482 | *out_loc = make_location (ranges.get_range (caret_idx).m_start, |
1483 | ranges.get_range (start_idx).m_start, | |
1484 | ranges.get_range (end_idx).m_finish); | |
1485 | return NULL; | |
1486 | } | |
1487 | ||
45183e4c | 1488 | #if CHECKING_P |
1489 | ||
1490 | namespace selftest { | |
1491 | ||
1492 | /* Selftests of location handling. */ | |
1493 | ||
5927e78e | 1494 | /* Attempt to populate *OUT_RANGE with source location information on the |
1495 | given character within the string literal found at STRLOC. | |
1496 | CHAR_IDX refers to an offset within the execution character set. | |
1497 | If CONCATS is non-NULL, then any string literals that the token at | |
1498 | STRLOC was concatenated with are also considered. | |
1499 | ||
1500 | This is implemented by re-parsing the relevant source line(s). | |
1501 | ||
1502 | Return NULL if successful, or an error message if any errors occurred. | |
1503 | Error messages are intended for GCC developers (to help debugging) rather | |
1504 | than for end-users. */ | |
1505 | ||
1506 | static const char * | |
1507 | get_source_range_for_char (cpp_reader *pfile, | |
1508 | string_concat_db *concats, | |
1509 | location_t strloc, | |
1510 | enum cpp_ttype type, | |
1511 | int char_idx, | |
1512 | source_range *out_range) | |
1513 | { | |
1514 | gcc_checking_assert (char_idx >= 0); | |
1515 | gcc_assert (out_range); | |
1516 | ||
1517 | cpp_substring_ranges ranges; | |
1518 | const char *err | |
1519 | = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges); | |
1520 | if (err) | |
1521 | return err; | |
1522 | ||
1523 | if (char_idx >= ranges.get_num_ranges ()) | |
1524 | return "char_idx out of range"; | |
1525 | ||
1526 | *out_range = ranges.get_range (char_idx); | |
d4166bdc | 1527 | return NULL; |
1528 | } | |
1529 | ||
5927e78e | 1530 | /* As get_source_range_for_char, but write to *OUT the number |
d4166bdc | 1531 | of ranges that are available. */ |
1532 | ||
45183e4c | 1533 | static const char * |
d4166bdc | 1534 | get_num_source_ranges_for_substring (cpp_reader *pfile, |
1535 | string_concat_db *concats, | |
1536 | location_t strloc, | |
1537 | enum cpp_ttype type, | |
1538 | int *out) | |
1539 | { | |
1540 | gcc_assert (out); | |
1541 | ||
1542 | cpp_substring_ranges ranges; | |
1543 | const char *err | |
1544 | = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges); | |
1545 | ||
1546 | if (err) | |
1547 | return err; | |
1548 | ||
1549 | *out = ranges.get_num_ranges (); | |
1550 | return NULL; | |
1551 | } | |
1552 | ||
99b4f3a2 | 1553 | /* Selftests of location handling. */ |
1554 | ||
b73690a4 | 1555 | /* Helper function for verifying location data: when location_t |
1556 | values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated | |
1557 | as having column 0. */ | |
1558 | ||
1559 | static bool | |
1560 | should_have_column_data_p (location_t loc) | |
1561 | { | |
1562 | if (IS_ADHOC_LOC (loc)) | |
1563 | loc = get_location_from_adhoc_loc (line_table, loc); | |
1564 | if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS) | |
1565 | return false; | |
1566 | return true; | |
1567 | } | |
1568 | ||
1569 | /* Selftest for should_have_column_data_p. */ | |
1570 | ||
1571 | static void | |
1572 | test_should_have_column_data_p () | |
1573 | { | |
1574 | ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT)); | |
1575 | ASSERT_TRUE | |
1576 | (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS)); | |
1577 | ASSERT_FALSE | |
1578 | (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1)); | |
1579 | } | |
1580 | ||
99b4f3a2 | 1581 | /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN |
1582 | on LOC. */ | |
1583 | ||
1584 | static void | |
1585 | assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum, | |
1586 | location_t loc) | |
1587 | { | |
1588 | ASSERT_STREQ (exp_filename, LOCATION_FILE (loc)); | |
1589 | ASSERT_EQ (exp_linenum, LOCATION_LINE (loc)); | |
b73690a4 | 1590 | /* If location_t values are sufficiently high, then column numbers |
1591 | will be unavailable and LOCATION_COLUMN (loc) will be 0. | |
1592 | When close to the threshold, column numbers *may* be present: if | |
1593 | the final linemap before the threshold contains a line that straddles | |
1594 | the threshold, locations in that line have column information. */ | |
1595 | if (should_have_column_data_p (loc)) | |
1596 | ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc)); | |
1597 | } | |
1598 | ||
7ec388ed | 1599 | /* Various selftests involve constructing a line table and one or more |
1600 | line maps within it. | |
b73690a4 | 1601 | |
1602 | For maximum test coverage we want to run these tests with a variety | |
1603 | of situations: | |
1604 | - line_table->default_range_bits: some frontends use a non-zero value | |
1605 | and others use zero | |
1606 | - the fallback modes within line-map.c: there are various threshold | |
1607 | values for source_location/location_t beyond line-map.c changes | |
1608 | behavior (disabling of the range-packing optimization, disabling | |
1609 | of column-tracking). We can exercise these by starting the line_table | |
1610 | at interesting values at or near these thresholds. | |
1611 | ||
1612 | The following struct describes a particular case within our test | |
1613 | matrix. */ | |
1614 | ||
1615 | struct line_table_case | |
1616 | { | |
1617 | line_table_case (int default_range_bits, int base_location) | |
1618 | : m_default_range_bits (default_range_bits), | |
1619 | m_base_location (base_location) | |
1620 | {} | |
1621 | ||
1622 | int m_default_range_bits; | |
1623 | int m_base_location; | |
1624 | }; | |
1625 | ||
7ec388ed | 1626 | /* Constructor. Store the old value of line_table, and create a new |
1627 | one, using sane defaults. */ | |
b73690a4 | 1628 | |
7ec388ed | 1629 | line_table_test::line_table_test () |
b73690a4 | 1630 | { |
7ec388ed | 1631 | gcc_assert (saved_line_table == NULL); |
1632 | saved_line_table = line_table; | |
1633 | line_table = ggc_alloc<line_maps> (); | |
1634 | linemap_init (line_table, BUILTINS_LOCATION); | |
1635 | gcc_assert (saved_line_table->reallocator); | |
1636 | line_table->reallocator = saved_line_table->reallocator; | |
1637 | gcc_assert (saved_line_table->round_alloc_size); | |
1638 | line_table->round_alloc_size = saved_line_table->round_alloc_size; | |
1639 | line_table->default_range_bits = 0; | |
1640 | } | |
b73690a4 | 1641 | |
1642 | /* Constructor. Store the old value of line_table, and create a new | |
1643 | one, using the sitation described in CASE_. */ | |
1644 | ||
7ec388ed | 1645 | line_table_test::line_table_test (const line_table_case &case_) |
b73690a4 | 1646 | { |
7ec388ed | 1647 | gcc_assert (saved_line_table == NULL); |
1648 | saved_line_table = line_table; | |
b73690a4 | 1649 | line_table = ggc_alloc<line_maps> (); |
1650 | linemap_init (line_table, BUILTINS_LOCATION); | |
7ec388ed | 1651 | gcc_assert (saved_line_table->reallocator); |
1652 | line_table->reallocator = saved_line_table->reallocator; | |
1653 | gcc_assert (saved_line_table->round_alloc_size); | |
1654 | line_table->round_alloc_size = saved_line_table->round_alloc_size; | |
b73690a4 | 1655 | line_table->default_range_bits = case_.m_default_range_bits; |
1656 | if (case_.m_base_location) | |
1657 | { | |
1658 | line_table->highest_location = case_.m_base_location; | |
1659 | line_table->highest_line = case_.m_base_location; | |
1660 | } | |
1661 | } | |
1662 | ||
1663 | /* Destructor. Restore the old value of line_table. */ | |
1664 | ||
7ec388ed | 1665 | line_table_test::~line_table_test () |
b73690a4 | 1666 | { |
7ec388ed | 1667 | gcc_assert (saved_line_table != NULL); |
1668 | line_table = saved_line_table; | |
1669 | saved_line_table = NULL; | |
99b4f3a2 | 1670 | } |
1671 | ||
1672 | /* Verify basic operation of ordinary linemaps. */ | |
1673 | ||
1674 | static void | |
b73690a4 | 1675 | test_accessing_ordinary_linemaps (const line_table_case &case_) |
99b4f3a2 | 1676 | { |
7ec388ed | 1677 | line_table_test ltt (case_); |
b73690a4 | 1678 | |
99b4f3a2 | 1679 | /* Build a simple linemap describing some locations. */ |
1680 | linemap_add (line_table, LC_ENTER, false, "foo.c", 0); | |
1681 | ||
1682 | linemap_line_start (line_table, 1, 100); | |
1683 | location_t loc_a = linemap_position_for_column (line_table, 1); | |
1684 | location_t loc_b = linemap_position_for_column (line_table, 23); | |
1685 | ||
1686 | linemap_line_start (line_table, 2, 100); | |
1687 | location_t loc_c = linemap_position_for_column (line_table, 1); | |
1688 | location_t loc_d = linemap_position_for_column (line_table, 17); | |
1689 | ||
1690 | /* Example of a very long line. */ | |
1691 | linemap_line_start (line_table, 3, 2000); | |
1692 | location_t loc_e = linemap_position_for_column (line_table, 700); | |
1693 | ||
732cf036 | 1694 | /* Transitioning back to a short line. */ |
1695 | linemap_line_start (line_table, 4, 0); | |
1696 | location_t loc_back_to_short = linemap_position_for_column (line_table, 100); | |
1697 | ||
1698 | if (should_have_column_data_p (loc_back_to_short)) | |
1699 | { | |
1700 | /* Verify that we switched to short lines in the linemap. */ | |
1701 | line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table); | |
1702 | ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits); | |
1703 | } | |
1704 | ||
9348467c | 1705 | /* Example of a line that will eventually be seen to be longer |
1706 | than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is | |
1707 | below that. */ | |
1708 | linemap_line_start (line_table, 5, 2000); | |
1709 | ||
1710 | location_t loc_start_of_very_long_line | |
1711 | = linemap_position_for_column (line_table, 2000); | |
1712 | location_t loc_too_wide | |
1713 | = linemap_position_for_column (line_table, 4097); | |
1714 | location_t loc_too_wide_2 | |
1715 | = linemap_position_for_column (line_table, 4098); | |
1716 | ||
1717 | /* ...and back to a sane line length. */ | |
1718 | linemap_line_start (line_table, 6, 100); | |
1719 | location_t loc_sane_again = linemap_position_for_column (line_table, 10); | |
1720 | ||
99b4f3a2 | 1721 | linemap_add (line_table, LC_LEAVE, false, NULL, 0); |
1722 | ||
1723 | /* Multiple files. */ | |
1724 | linemap_add (line_table, LC_ENTER, false, "bar.c", 0); | |
1725 | linemap_line_start (line_table, 1, 200); | |
1726 | location_t loc_f = linemap_position_for_column (line_table, 150); | |
1727 | linemap_add (line_table, LC_LEAVE, false, NULL, 0); | |
1728 | ||
1729 | /* Verify that we can recover the location info. */ | |
1730 | assert_loceq ("foo.c", 1, 1, loc_a); | |
1731 | assert_loceq ("foo.c", 1, 23, loc_b); | |
1732 | assert_loceq ("foo.c", 2, 1, loc_c); | |
1733 | assert_loceq ("foo.c", 2, 17, loc_d); | |
1734 | assert_loceq ("foo.c", 3, 700, loc_e); | |
732cf036 | 1735 | assert_loceq ("foo.c", 4, 100, loc_back_to_short); |
9348467c | 1736 | |
1737 | /* In the very wide line, the initial location should be fully tracked. */ | |
1738 | assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line); | |
1739 | /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should | |
1740 | be disabled. */ | |
1741 | assert_loceq ("foo.c", 5, 0, loc_too_wide); | |
1742 | assert_loceq ("foo.c", 5, 0, loc_too_wide_2); | |
1743 | /*...and column-tracking should be re-enabled for subsequent lines. */ | |
1744 | assert_loceq ("foo.c", 6, 10, loc_sane_again); | |
1745 | ||
99b4f3a2 | 1746 | assert_loceq ("bar.c", 1, 150, loc_f); |
1747 | ||
1748 | ASSERT_FALSE (is_location_from_builtin_token (loc_a)); | |
f17776ff | 1749 | ASSERT_TRUE (pure_location_p (line_table, loc_a)); |
1750 | ||
1751 | /* Verify using make_location to build a range, and extracting data | |
1752 | back from it. */ | |
1753 | location_t range_c_b_d = make_location (loc_c, loc_b, loc_d); | |
1754 | ASSERT_FALSE (pure_location_p (line_table, range_c_b_d)); | |
1755 | ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d)); | |
1756 | source_range src_range = get_range_from_loc (line_table, range_c_b_d); | |
1757 | ASSERT_EQ (loc_b, src_range.m_start); | |
1758 | ASSERT_EQ (loc_d, src_range.m_finish); | |
99b4f3a2 | 1759 | } |
1760 | ||
1761 | /* Verify various properties of UNKNOWN_LOCATION. */ | |
1762 | ||
1763 | static void | |
1764 | test_unknown_location () | |
1765 | { | |
1766 | ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION)); | |
1767 | ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION)); | |
1768 | ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION)); | |
1769 | } | |
1770 | ||
1771 | /* Verify various properties of BUILTINS_LOCATION. */ | |
1772 | ||
1773 | static void | |
1774 | test_builtins () | |
1775 | { | |
82e14468 | 1776 | assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION); |
99b4f3a2 | 1777 | ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION); |
1778 | } | |
1779 | ||
aca2a315 | 1780 | /* Regression test for make_location. |
1330da90 | 1781 | Ensure that we use pure locations for the start/finish of the range, |
1782 | rather than storing a packed or ad-hoc range as the start/finish. */ | |
aca2a315 | 1783 | |
1784 | static void | |
1785 | test_make_location_nonpure_range_endpoints (const line_table_case &case_) | |
1786 | { | |
1787 | /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c | |
1788 | with C++ frontend. | |
1789 | ....................0000000001111111111222. | |
1790 | ....................1234567890123456789012. */ | |
1791 | const char *content = " r += !aaa == bbb;\n"; | |
1792 | temp_source_file tmp (SELFTEST_LOCATION, ".C", content); | |
1793 | line_table_test ltt (case_); | |
1794 | linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1); | |
1795 | ||
1796 | const location_t c11 = linemap_position_for_column (line_table, 11); | |
1797 | const location_t c12 = linemap_position_for_column (line_table, 12); | |
1798 | const location_t c13 = linemap_position_for_column (line_table, 13); | |
1799 | const location_t c14 = linemap_position_for_column (line_table, 14); | |
1800 | const location_t c21 = linemap_position_for_column (line_table, 21); | |
1801 | ||
1802 | if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS) | |
1803 | return; | |
1804 | ||
1805 | /* Use column 13 for the caret location, arbitrarily, to verify that we | |
1806 | handle start != caret. */ | |
1807 | const location_t aaa = make_location (c13, c12, c14); | |
1808 | ASSERT_EQ (c13, get_pure_location (aaa)); | |
1809 | ASSERT_EQ (c12, get_start (aaa)); | |
1810 | ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa))); | |
1811 | ASSERT_EQ (c14, get_finish (aaa)); | |
1812 | ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa))); | |
1813 | ||
1814 | /* Make a location using a location with a range as the start-point. */ | |
1815 | const location_t not_aaa = make_location (c11, aaa, c14); | |
1816 | ASSERT_EQ (c11, get_pure_location (not_aaa)); | |
1817 | /* It should use the start location of the range, not store the range | |
1818 | itself. */ | |
1819 | ASSERT_EQ (c12, get_start (not_aaa)); | |
1820 | ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa))); | |
1821 | ASSERT_EQ (c14, get_finish (not_aaa)); | |
1822 | ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa))); | |
1823 | ||
1824 | /* Similarly, make a location with a range as the end-point. */ | |
1825 | const location_t aaa_eq_bbb = make_location (c12, c12, c21); | |
1826 | ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb)); | |
1827 | ASSERT_EQ (c12, get_start (aaa_eq_bbb)); | |
1828 | ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb))); | |
1829 | ASSERT_EQ (c21, get_finish (aaa_eq_bbb)); | |
1830 | ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb))); | |
1831 | const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb); | |
1832 | /* It should use the finish location of the range, not store the range | |
1833 | itself. */ | |
1834 | ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb)); | |
1835 | ASSERT_EQ (c12, get_start (not_aaa_eq_bbb)); | |
1836 | ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb))); | |
1837 | ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb)); | |
1838 | ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb))); | |
1839 | } | |
1840 | ||
99b4f3a2 | 1841 | /* Verify reading of input files (e.g. for caret-based diagnostics). */ |
1842 | ||
1843 | static void | |
1844 | test_reading_source_line () | |
1845 | { | |
423bd600 | 1846 | /* Create a tempfile and write some text to it. */ |
b73690a4 | 1847 | temp_source_file tmp (SELFTEST_LOCATION, ".txt", |
1848 | "01234567890123456789\n" | |
1849 | "This is the test text\n" | |
2e24ac9b | 1850 | "This is the 3rd line"); |
423bd600 | 1851 | |
1852 | /* Read back a specific line from the tempfile. */ | |
99b4f3a2 | 1853 | int line_size; |
b73690a4 | 1854 | const char *source_line = location_get_source_line (tmp.get_filename (), |
2e24ac9b | 1855 | 3, &line_size); |
1856 | ASSERT_TRUE (source_line != NULL); | |
1857 | ASSERT_EQ (20, line_size); | |
1858 | ASSERT_TRUE (!strncmp ("This is the 3rd line", | |
1859 | source_line, line_size)); | |
1860 | ||
1861 | source_line = location_get_source_line (tmp.get_filename (), | |
1862 | 2, &line_size); | |
99b4f3a2 | 1863 | ASSERT_TRUE (source_line != NULL); |
423bd600 | 1864 | ASSERT_EQ (21, line_size); |
2e24ac9b | 1865 | ASSERT_TRUE (!strncmp ("This is the test text", |
1866 | source_line, line_size)); | |
423bd600 | 1867 | |
2e24ac9b | 1868 | source_line = location_get_source_line (tmp.get_filename (), |
1869 | 4, &line_size); | |
1870 | ASSERT_TRUE (source_line == NULL); | |
99b4f3a2 | 1871 | } |
1872 | ||
b73690a4 | 1873 | /* Tests of lexing. */ |
1874 | ||
1875 | /* Verify that token TOK from PARSER has cpp_token_as_text | |
1876 | equal to EXPECTED_TEXT. */ | |
1877 | ||
1878 | #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \ | |
1879 | SELFTEST_BEGIN_STMT \ | |
1880 | unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \ | |
1881 | ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \ | |
1882 | SELFTEST_END_STMT | |
1883 | ||
1884 | /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM, | |
1885 | and ranges from EXP_START_COL to EXP_FINISH_COL. | |
1886 | Use LOC as the effective location of the selftest. */ | |
1887 | ||
1888 | static void | |
1889 | assert_token_loc_eq (const location &loc, | |
1890 | const cpp_token *tok, | |
1891 | const char *exp_filename, int exp_linenum, | |
1892 | int exp_start_col, int exp_finish_col) | |
1893 | { | |
1894 | location_t tok_loc = tok->src_loc; | |
1895 | ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc)); | |
1896 | ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc)); | |
1897 | ||
1898 | /* If location_t values are sufficiently high, then column numbers | |
1899 | will be unavailable. */ | |
1900 | if (!should_have_column_data_p (tok_loc)) | |
1901 | return; | |
1902 | ||
1903 | ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc)); | |
1904 | source_range tok_range = get_range_from_loc (line_table, tok_loc); | |
1905 | ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start)); | |
1906 | ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish)); | |
1907 | } | |
1908 | ||
1909 | /* Use assert_token_loc_eq to verify the TOK->src_loc, using | |
1910 | SELFTEST_LOCATION as the effective location of the selftest. */ | |
1911 | ||
1912 | #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \ | |
1913 | EXP_START_COL, EXP_FINISH_COL) \ | |
1914 | assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \ | |
1915 | (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL)) | |
1916 | ||
1917 | /* Test of lexing a file using libcpp, verifying tokens and their | |
1918 | location information. */ | |
1919 | ||
1920 | static void | |
1921 | test_lexer (const line_table_case &case_) | |
1922 | { | |
1923 | /* Create a tempfile and write some text to it. */ | |
1924 | const char *content = | |
1925 | /*00000000011111111112222222222333333.3333444444444.455555555556 | |
1926 | 12345678901234567890123456789012345.6789012345678.901234567890. */ | |
1927 | ("test_name /* c-style comment */\n" | |
1928 | " \"test literal\"\n" | |
1929 | " // test c++-style comment\n" | |
1930 | " 42\n"); | |
1931 | temp_source_file tmp (SELFTEST_LOCATION, ".txt", content); | |
1932 | ||
7ec388ed | 1933 | line_table_test ltt (case_); |
b73690a4 | 1934 | |
1935 | cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table); | |
1936 | ||
1937 | const char *fname = cpp_read_main_file (parser, tmp.get_filename ()); | |
1938 | ASSERT_NE (fname, NULL); | |
1939 | ||
1940 | /* Verify that we get the expected tokens back, with the correct | |
1941 | location information. */ | |
1942 | ||
1943 | location_t loc; | |
1944 | const cpp_token *tok; | |
1945 | tok = cpp_get_token_with_location (parser, &loc); | |
1946 | ASSERT_NE (tok, NULL); | |
1947 | ASSERT_EQ (tok->type, CPP_NAME); | |
1948 | ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name"); | |
1949 | ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9); | |
1950 | ||
1951 | tok = cpp_get_token_with_location (parser, &loc); | |
1952 | ASSERT_NE (tok, NULL); | |
1953 | ASSERT_EQ (tok->type, CPP_STRING); | |
1954 | ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\""); | |
1955 | ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48); | |
1956 | ||
1957 | tok = cpp_get_token_with_location (parser, &loc); | |
1958 | ASSERT_NE (tok, NULL); | |
1959 | ASSERT_EQ (tok->type, CPP_NUMBER); | |
1960 | ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42"); | |
1961 | ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5); | |
1962 | ||
1963 | tok = cpp_get_token_with_location (parser, &loc); | |
1964 | ASSERT_NE (tok, NULL); | |
1965 | ASSERT_EQ (tok->type, CPP_EOF); | |
1966 | ||
1967 | cpp_finish (parser, NULL); | |
1968 | cpp_destroy (parser); | |
1969 | } | |
1970 | ||
d4166bdc | 1971 | /* Forward decls. */ |
1972 | ||
1973 | struct lexer_test; | |
1974 | class lexer_test_options; | |
1975 | ||
1976 | /* A class for specifying options of a lexer_test. | |
1977 | The "apply" vfunc is called during the lexer_test constructor. */ | |
1978 | ||
1979 | class lexer_test_options | |
1980 | { | |
1981 | public: | |
1982 | virtual void apply (lexer_test &) = 0; | |
1983 | }; | |
1984 | ||
c6a7d9e9 | 1985 | /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy |
1986 | in its dtor. | |
1987 | ||
1988 | This is needed by struct lexer_test to ensure that the cleanup of the | |
1989 | cpp_reader happens *after* the cleanup of the temp_source_file. */ | |
1990 | ||
1991 | class cpp_reader_ptr | |
1992 | { | |
1993 | public: | |
1994 | cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {} | |
1995 | ||
1996 | ~cpp_reader_ptr () | |
1997 | { | |
1998 | cpp_finish (m_ptr, NULL); | |
1999 | cpp_destroy (m_ptr); | |
2000 | } | |
2001 | ||
2002 | operator cpp_reader * () const { return m_ptr; } | |
2003 | ||
2004 | private: | |
2005 | cpp_reader *m_ptr; | |
2006 | }; | |
2007 | ||
d4166bdc | 2008 | /* A struct for writing lexer tests. */ |
2009 | ||
2010 | struct lexer_test | |
2011 | { | |
2012 | lexer_test (const line_table_case &case_, const char *content, | |
2013 | lexer_test_options *options); | |
2014 | ~lexer_test (); | |
2015 | ||
2016 | const cpp_token *get_token (); | |
2017 | ||
c6a7d9e9 | 2018 | /* The ordering of these fields matters. |
2019 | The line_table_test must be first, since the cpp_reader_ptr | |
2020 | uses it. | |
2021 | The cpp_reader must be cleaned up *after* the temp_source_file | |
2022 | since the filenames in input.c's input cache are owned by the | |
2023 | cpp_reader; in particular, when ~temp_source_file evicts the | |
2024 | filename the filenames must still be alive. */ | |
7ec388ed | 2025 | line_table_test m_ltt; |
c6a7d9e9 | 2026 | cpp_reader_ptr m_parser; |
2027 | temp_source_file m_tempfile; | |
d4166bdc | 2028 | string_concat_db m_concats; |
0ccd6e7a | 2029 | bool m_implicitly_expect_EOF; |
d4166bdc | 2030 | }; |
2031 | ||
2032 | /* Use an EBCDIC encoding for the execution charset, specifically | |
2033 | IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047"). | |
2034 | ||
2035 | This exercises iconv integration within libcpp. | |
2036 | Not every build of iconv supports the given charset, | |
2037 | so we need to flag this error and handle it gracefully. */ | |
2038 | ||
2039 | class ebcdic_execution_charset : public lexer_test_options | |
2040 | { | |
2041 | public: | |
2042 | ebcdic_execution_charset () : m_num_iconv_errors (0) | |
2043 | { | |
2044 | gcc_assert (s_singleton == NULL); | |
2045 | s_singleton = this; | |
2046 | } | |
2047 | ~ebcdic_execution_charset () | |
2048 | { | |
2049 | gcc_assert (s_singleton == this); | |
2050 | s_singleton = NULL; | |
2051 | } | |
2052 | ||
2053 | void apply (lexer_test &test) FINAL OVERRIDE | |
2054 | { | |
2055 | cpp_options *cpp_opts = cpp_get_options (test.m_parser); | |
2056 | cpp_opts->narrow_charset = "IBM1047"; | |
2057 | ||
2058 | cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser); | |
2059 | callbacks->error = on_error; | |
2060 | } | |
2061 | ||
2062 | static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED, | |
2063 | int level ATTRIBUTE_UNUSED, | |
2064 | int reason ATTRIBUTE_UNUSED, | |
2065 | rich_location *richloc ATTRIBUTE_UNUSED, | |
2066 | const char *msgid, va_list *ap ATTRIBUTE_UNUSED) | |
2067 | ATTRIBUTE_FPTR_PRINTF(5,0) | |
2068 | { | |
2069 | gcc_assert (s_singleton); | |
9a784cf5 | 2070 | /* Avoid exgettext from picking this up, it is translated in libcpp. */ |
2071 | const char *msg = "conversion from %s to %s not supported by iconv"; | |
2072 | #ifdef ENABLE_NLS | |
2073 | msg = dgettext ("cpplib", msg); | |
2074 | #endif | |
d4166bdc | 2075 | /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc |
2076 | when the local iconv build doesn't support the conversion. */ | |
9a784cf5 | 2077 | if (strcmp (msgid, msg) == 0) |
d4166bdc | 2078 | { |
2079 | s_singleton->m_num_iconv_errors++; | |
2080 | return true; | |
2081 | } | |
2082 | ||
2083 | /* Otherwise, we have an unexpected error. */ | |
2084 | abort (); | |
2085 | } | |
2086 | ||
2087 | bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; } | |
2088 | ||
2089 | private: | |
2090 | static ebcdic_execution_charset *s_singleton; | |
2091 | int m_num_iconv_errors; | |
2092 | }; | |
2093 | ||
2094 | ebcdic_execution_charset *ebcdic_execution_charset::s_singleton; | |
2095 | ||
0ccd6e7a | 2096 | /* A lexer_test_options subclass that records a list of error |
2097 | messages emitted by the lexer. */ | |
2098 | ||
2099 | class lexer_error_sink : public lexer_test_options | |
2100 | { | |
2101 | public: | |
2102 | lexer_error_sink () | |
2103 | { | |
2104 | gcc_assert (s_singleton == NULL); | |
2105 | s_singleton = this; | |
2106 | } | |
2107 | ~lexer_error_sink () | |
2108 | { | |
2109 | gcc_assert (s_singleton == this); | |
2110 | s_singleton = NULL; | |
2111 | ||
2112 | int i; | |
2113 | char *str; | |
2114 | FOR_EACH_VEC_ELT (m_errors, i, str) | |
2115 | free (str); | |
2116 | } | |
2117 | ||
2118 | void apply (lexer_test &test) FINAL OVERRIDE | |
2119 | { | |
2120 | cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser); | |
2121 | callbacks->error = on_error; | |
2122 | } | |
2123 | ||
2124 | static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED, | |
2125 | int level ATTRIBUTE_UNUSED, | |
2126 | int reason ATTRIBUTE_UNUSED, | |
2127 | rich_location *richloc ATTRIBUTE_UNUSED, | |
2128 | const char *msgid, va_list *ap) | |
2129 | ATTRIBUTE_FPTR_PRINTF(5,0) | |
2130 | { | |
2131 | char *msg = xvasprintf (msgid, *ap); | |
2132 | s_singleton->m_errors.safe_push (msg); | |
2133 | return true; | |
2134 | } | |
2135 | ||
2136 | auto_vec<char *> m_errors; | |
2137 | ||
2138 | private: | |
2139 | static lexer_error_sink *s_singleton; | |
2140 | }; | |
2141 | ||
2142 | lexer_error_sink *lexer_error_sink::s_singleton; | |
2143 | ||
d4166bdc | 2144 | /* Constructor. Override line_table with a new instance based on CASE_, |
2145 | and write CONTENT to a tempfile. Create a cpp_reader, and use it to | |
2146 | start parsing the tempfile. */ | |
2147 | ||
2148 | lexer_test::lexer_test (const line_table_case &case_, const char *content, | |
c6a7d9e9 | 2149 | lexer_test_options *options) |
2150 | : m_ltt (case_), | |
2151 | m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)), | |
d4166bdc | 2152 | /* Create a tempfile and write the text to it. */ |
2153 | m_tempfile (SELFTEST_LOCATION, ".c", content), | |
0ccd6e7a | 2154 | m_concats (), |
2155 | m_implicitly_expect_EOF (true) | |
d4166bdc | 2156 | { |
2157 | if (options) | |
2158 | options->apply (*this); | |
2159 | ||
2160 | cpp_init_iconv (m_parser); | |
2161 | ||
2162 | /* Parse the file. */ | |
2163 | const char *fname = cpp_read_main_file (m_parser, | |
2164 | m_tempfile.get_filename ()); | |
2165 | ASSERT_NE (fname, NULL); | |
2166 | } | |
2167 | ||
0ccd6e7a | 2168 | /* Destructor. By default, verify that the next token in m_parser is EOF. */ |
d4166bdc | 2169 | |
2170 | lexer_test::~lexer_test () | |
2171 | { | |
2172 | location_t loc; | |
2173 | const cpp_token *tok; | |
2174 | ||
0ccd6e7a | 2175 | if (m_implicitly_expect_EOF) |
2176 | { | |
2177 | tok = cpp_get_token_with_location (m_parser, &loc); | |
2178 | ASSERT_NE (tok, NULL); | |
2179 | ASSERT_EQ (tok->type, CPP_EOF); | |
2180 | } | |
d4166bdc | 2181 | } |
2182 | ||
2183 | /* Get the next token from m_parser. */ | |
2184 | ||
2185 | const cpp_token * | |
2186 | lexer_test::get_token () | |
2187 | { | |
2188 | location_t loc; | |
2189 | const cpp_token *tok; | |
2190 | ||
2191 | tok = cpp_get_token_with_location (m_parser, &loc); | |
2192 | ASSERT_NE (tok, NULL); | |
2193 | return tok; | |
2194 | } | |
2195 | ||
2196 | /* Verify that locations within string literals are correctly handled. */ | |
2197 | ||
2198 | /* Verify get_source_range_for_substring for token(s) at STRLOC, | |
2199 | using the string concatenation database for TEST. | |
2200 | ||
2201 | Assert that the character at index IDX is on EXPECTED_LINE, | |
2202 | and that it begins at column EXPECTED_START_COL and ends at | |
2203 | EXPECTED_FINISH_COL (unless the locations are beyond | |
2204 | LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their | |
2205 | columns). */ | |
2206 | ||
2207 | static void | |
2208 | assert_char_at_range (const location &loc, | |
2209 | lexer_test& test, | |
2210 | location_t strloc, enum cpp_ttype type, int idx, | |
2211 | int expected_line, int expected_start_col, | |
2212 | int expected_finish_col) | |
2213 | { | |
2214 | cpp_reader *pfile = test.m_parser; | |
2215 | string_concat_db *concats = &test.m_concats; | |
2216 | ||
be516c70 | 2217 | source_range actual_range = source_range(); |
d4166bdc | 2218 | const char *err |
5927e78e | 2219 | = get_source_range_for_char (pfile, concats, strloc, type, idx, |
2220 | &actual_range); | |
d4166bdc | 2221 | if (should_have_column_data_p (strloc)) |
2222 | ASSERT_EQ_AT (loc, NULL, err); | |
2223 | else | |
2224 | { | |
2225 | ASSERT_STREQ_AT (loc, | |
2226 | "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", | |
2227 | err); | |
2228 | return; | |
2229 | } | |
2230 | ||
2231 | int actual_start_line = LOCATION_LINE (actual_range.m_start); | |
2232 | ASSERT_EQ_AT (loc, expected_line, actual_start_line); | |
2233 | int actual_finish_line = LOCATION_LINE (actual_range.m_finish); | |
2234 | ASSERT_EQ_AT (loc, expected_line, actual_finish_line); | |
2235 | ||
2236 | if (should_have_column_data_p (actual_range.m_start)) | |
2237 | { | |
2238 | int actual_start_col = LOCATION_COLUMN (actual_range.m_start); | |
2239 | ASSERT_EQ_AT (loc, expected_start_col, actual_start_col); | |
2240 | } | |
2241 | if (should_have_column_data_p (actual_range.m_finish)) | |
2242 | { | |
2243 | int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish); | |
2244 | ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col); | |
2245 | } | |
2246 | } | |
2247 | ||
2248 | /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for | |
2249 | the effective location of any errors. */ | |
2250 | ||
2251 | #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \ | |
2252 | EXPECTED_START_COL, EXPECTED_FINISH_COL) \ | |
2253 | assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \ | |
2254 | (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \ | |
2255 | (EXPECTED_FINISH_COL)) | |
2256 | ||
2257 | /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC, | |
2258 | using the string concatenation database for TEST. | |
2259 | ||
2260 | Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */ | |
2261 | ||
2262 | static void | |
2263 | assert_num_substring_ranges (const location &loc, | |
2264 | lexer_test& test, | |
2265 | location_t strloc, | |
2266 | enum cpp_ttype type, | |
2267 | int expected_num_ranges) | |
2268 | { | |
2269 | cpp_reader *pfile = test.m_parser; | |
2270 | string_concat_db *concats = &test.m_concats; | |
2271 | ||
45183e4c | 2272 | int actual_num_ranges = -1; |
d4166bdc | 2273 | const char *err |
2274 | = get_num_source_ranges_for_substring (pfile, concats, strloc, type, | |
2275 | &actual_num_ranges); | |
2276 | if (should_have_column_data_p (strloc)) | |
2277 | ASSERT_EQ_AT (loc, NULL, err); | |
2278 | else | |
2279 | { | |
2280 | ASSERT_STREQ_AT (loc, | |
2281 | "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", | |
2282 | err); | |
2283 | return; | |
2284 | } | |
2285 | ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges); | |
2286 | } | |
2287 | ||
2288 | /* Macro for calling assert_num_substring_ranges, supplying | |
2289 | SELFTEST_LOCATION for the effective location of any errors. */ | |
2290 | ||
2291 | #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \ | |
2292 | EXPECTED_NUM_RANGES) \ | |
2293 | assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \ | |
2294 | (TYPE), (EXPECTED_NUM_RANGES)) | |
2295 | ||
2296 | ||
2297 | /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC | |
2298 | returns an error (using the string concatenation database for TEST). */ | |
2299 | ||
2300 | static void | |
2301 | assert_has_no_substring_ranges (const location &loc, | |
2302 | lexer_test& test, | |
2303 | location_t strloc, | |
2304 | enum cpp_ttype type, | |
2305 | const char *expected_err) | |
2306 | { | |
2307 | cpp_reader *pfile = test.m_parser; | |
2308 | string_concat_db *concats = &test.m_concats; | |
2309 | cpp_substring_ranges ranges; | |
2310 | const char *actual_err | |
2311 | = get_substring_ranges_for_loc (pfile, concats, strloc, | |
2312 | type, ranges); | |
2313 | if (should_have_column_data_p (strloc)) | |
2314 | ASSERT_STREQ_AT (loc, expected_err, actual_err); | |
2315 | else | |
2316 | ASSERT_STREQ_AT (loc, | |
2317 | "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", | |
2318 | actual_err); | |
2319 | } | |
2320 | ||
2321 | #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \ | |
2322 | assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \ | |
2323 | (STRLOC), (TYPE), (ERR)) | |
2324 | ||
2325 | /* Lex a simple string literal. Verify the substring location data, before | |
2326 | and after running cpp_interpret_string on it. */ | |
2327 | ||
2328 | static void | |
2329 | test_lexer_string_locations_simple (const line_table_case &case_) | |
2330 | { | |
2331 | /* Digits 0-9 (with 0 at column 10), the simple way. | |
2332 | ....................000000000.11111111112.2222222223333333333 | |
2333 | ....................123456789.01234567890.1234567890123456789 | |
2334 | We add a trailing comment to ensure that we correctly locate | |
2335 | the end of the string literal token. */ | |
2336 | const char *content = " \"0123456789\" /* not a string */\n"; | |
2337 | lexer_test test (case_, content, NULL); | |
2338 | ||
2339 | /* Verify that we get the expected token back, with the correct | |
2340 | location information. */ | |
2341 | const cpp_token *tok = test.get_token (); | |
2342 | ASSERT_EQ (tok->type, CPP_STRING); | |
2343 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\""); | |
2344 | ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20); | |
2345 | ||
2346 | /* At this point in lexing, the quote characters are treated as part of | |
2347 | the string (they are stripped off by cpp_interpret_string). */ | |
2348 | ||
2349 | ASSERT_EQ (tok->val.str.len, 12); | |
2350 | ||
2351 | /* Verify that cpp_interpret_string works. */ | |
2352 | cpp_string dst_string; | |
2353 | const enum cpp_ttype type = CPP_STRING; | |
2354 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2355 | &dst_string, type); | |
2356 | ASSERT_TRUE (result); | |
2357 | ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
2358 | free (const_cast <unsigned char *> (dst_string.text)); | |
2359 | ||
2360 | /* Verify ranges of individual characters. This no longer includes the | |
7413e757 | 2361 | opening quote, but does include the closing quote. */ |
2362 | for (int i = 0; i <= 10; i++) | |
d4166bdc | 2363 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, |
2364 | 10 + i, 10 + i); | |
2365 | ||
7413e757 | 2366 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); |
d4166bdc | 2367 | } |
2368 | ||
2369 | /* As test_lexer_string_locations_simple, but use an EBCDIC execution | |
2370 | encoding. */ | |
2371 | ||
2372 | static void | |
2373 | test_lexer_string_locations_ebcdic (const line_table_case &case_) | |
2374 | { | |
2375 | /* EBCDIC support requires iconv. */ | |
2376 | if (!HAVE_ICONV) | |
2377 | return; | |
2378 | ||
2379 | /* Digits 0-9 (with 0 at column 10), the simple way. | |
2380 | ....................000000000.11111111112.2222222223333333333 | |
2381 | ....................123456789.01234567890.1234567890123456789 | |
2382 | We add a trailing comment to ensure that we correctly locate | |
2383 | the end of the string literal token. */ | |
2384 | const char *content = " \"0123456789\" /* not a string */\n"; | |
2385 | ebcdic_execution_charset use_ebcdic; | |
2386 | lexer_test test (case_, content, &use_ebcdic); | |
2387 | ||
2388 | /* Verify that we get the expected token back, with the correct | |
2389 | location information. */ | |
2390 | const cpp_token *tok = test.get_token (); | |
2391 | ASSERT_EQ (tok->type, CPP_STRING); | |
2392 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\""); | |
2393 | ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20); | |
2394 | ||
2395 | /* At this point in lexing, the quote characters are treated as part of | |
2396 | the string (they are stripped off by cpp_interpret_string). */ | |
2397 | ||
2398 | ASSERT_EQ (tok->val.str.len, 12); | |
2399 | ||
2400 | /* The remainder of the test requires an iconv implementation that | |
2401 | can convert from UTF-8 to the EBCDIC encoding requested above. */ | |
2402 | if (use_ebcdic.iconv_errors_occurred_p ()) | |
2403 | return; | |
2404 | ||
2405 | /* Verify that cpp_interpret_string works. */ | |
2406 | cpp_string dst_string; | |
2407 | const enum cpp_ttype type = CPP_STRING; | |
2408 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2409 | &dst_string, type); | |
2410 | ASSERT_TRUE (result); | |
2411 | /* We should now have EBCDIC-encoded text, specifically | |
2412 | IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047"). | |
2413 | The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */ | |
2414 | ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9", | |
2415 | (const char *)dst_string.text); | |
2416 | free (const_cast <unsigned char *> (dst_string.text)); | |
2417 | ||
2418 | /* Verify that we don't attempt to record substring location information | |
2419 | for such cases. */ | |
2420 | ASSERT_HAS_NO_SUBSTRING_RANGES | |
2421 | (test, tok->src_loc, type, | |
2422 | "execution character set != source character set"); | |
2423 | } | |
2424 | ||
2425 | /* Lex a string literal containing a hex-escaped character. | |
2426 | Verify the substring location data, before and after running | |
2427 | cpp_interpret_string on it. */ | |
2428 | ||
2429 | static void | |
2430 | test_lexer_string_locations_hex (const line_table_case &case_) | |
2431 | { | |
2432 | /* Digits 0-9, expressing digit 5 in ASCII as "\x35" | |
2433 | and with a space in place of digit 6, to terminate the escaped | |
2434 | hex code. | |
2435 | ....................000000000.111111.11112222. | |
2436 | ....................123456789.012345.67890123. */ | |
2437 | const char *content = " \"01234\\x35 789\"\n"; | |
2438 | lexer_test test (case_, content, NULL); | |
2439 | ||
2440 | /* Verify that we get the expected token back, with the correct | |
2441 | location information. */ | |
2442 | const cpp_token *tok = test.get_token (); | |
2443 | ASSERT_EQ (tok->type, CPP_STRING); | |
2444 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\""); | |
2445 | ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23); | |
2446 | ||
2447 | /* At this point in lexing, the quote characters are treated as part of | |
2448 | the string (they are stripped off by cpp_interpret_string). */ | |
2449 | ASSERT_EQ (tok->val.str.len, 15); | |
2450 | ||
2451 | /* Verify that cpp_interpret_string works. */ | |
2452 | cpp_string dst_string; | |
2453 | const enum cpp_ttype type = CPP_STRING; | |
2454 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2455 | &dst_string, type); | |
2456 | ASSERT_TRUE (result); | |
2457 | ASSERT_STREQ ("012345 789", (const char *)dst_string.text); | |
2458 | free (const_cast <unsigned char *> (dst_string.text)); | |
2459 | ||
2460 | /* Verify ranges of individual characters. This no longer includes the | |
7413e757 | 2461 | opening quote, but does include the closing quote. */ |
d4166bdc | 2462 | for (int i = 0; i <= 4; i++) |
2463 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
2464 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18); | |
7413e757 | 2465 | for (int i = 6; i <= 10; i++) |
d4166bdc | 2466 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i); |
2467 | ||
7413e757 | 2468 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); |
d4166bdc | 2469 | } |
2470 | ||
2471 | /* Lex a string literal containing an octal-escaped character. | |
2472 | Verify the substring location data after running cpp_interpret_string | |
2473 | on it. */ | |
2474 | ||
2475 | static void | |
2476 | test_lexer_string_locations_oct (const line_table_case &case_) | |
2477 | { | |
2478 | /* Digits 0-9, expressing digit 5 in ASCII as "\065" | |
2479 | and with a space in place of digit 6, to terminate the escaped | |
2480 | octal code. | |
2481 | ....................000000000.111111.11112222.2222223333333333444 | |
2482 | ....................123456789.012345.67890123.4567890123456789012 */ | |
2483 | const char *content = " \"01234\\065 789\" /* not a string */\n"; | |
2484 | lexer_test test (case_, content, NULL); | |
2485 | ||
2486 | /* Verify that we get the expected token back, with the correct | |
2487 | location information. */ | |
2488 | const cpp_token *tok = test.get_token (); | |
2489 | ASSERT_EQ (tok->type, CPP_STRING); | |
2490 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\""); | |
2491 | ||
2492 | /* Verify that cpp_interpret_string works. */ | |
2493 | cpp_string dst_string; | |
2494 | const enum cpp_ttype type = CPP_STRING; | |
2495 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2496 | &dst_string, type); | |
2497 | ASSERT_TRUE (result); | |
2498 | ASSERT_STREQ ("012345 789", (const char *)dst_string.text); | |
2499 | free (const_cast <unsigned char *> (dst_string.text)); | |
2500 | ||
2501 | /* Verify ranges of individual characters. This no longer includes the | |
7413e757 | 2502 | opening quote, but does include the closing quote. */ |
d4166bdc | 2503 | for (int i = 0; i < 5; i++) |
2504 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
2505 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18); | |
7413e757 | 2506 | for (int i = 6; i <= 10; i++) |
d4166bdc | 2507 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i); |
2508 | ||
7413e757 | 2509 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); |
d4166bdc | 2510 | } |
2511 | ||
2512 | /* Test of string literal containing letter escapes. */ | |
2513 | ||
2514 | static void | |
2515 | test_lexer_string_locations_letter_escape_1 (const line_table_case &case_) | |
2516 | { | |
2517 | /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar. | |
2518 | .....................000000000.1.11111.1.1.11222.22222223333333 | |
2519 | .....................123456789.0.12345.6.7.89012.34567890123456. */ | |
2520 | const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n"); | |
2521 | lexer_test test (case_, content, NULL); | |
2522 | ||
2523 | /* Verify that we get the expected tokens back. */ | |
2524 | const cpp_token *tok = test.get_token (); | |
2525 | ASSERT_EQ (tok->type, CPP_STRING); | |
2526 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\""); | |
2527 | ||
2528 | /* Verify ranges of individual characters. */ | |
2529 | /* "\t". */ | |
2530 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2531 | 0, 1, 10, 11); | |
2532 | /* "foo". */ | |
2533 | for (int i = 1; i <= 3; i++) | |
2534 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2535 | i, 1, 11 + i, 11 + i); | |
2536 | /* "\\" and "\n". */ | |
2537 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2538 | 4, 1, 15, 16); | |
2539 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2540 | 5, 1, 17, 18); | |
2541 | ||
7413e757 | 2542 | /* "bar" and closing quote for nul-terminator. */ |
2543 | for (int i = 6; i <= 9; i++) | |
d4166bdc | 2544 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
2545 | i, 1, 13 + i, 13 + i); | |
2546 | ||
7413e757 | 2547 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10); |
d4166bdc | 2548 | } |
2549 | ||
2550 | /* Another test of a string literal containing a letter escape. | |
2551 | Based on string seen in | |
2552 | printf ("%-%\n"); | |
2553 | in gcc.dg/format/c90-printf-1.c. */ | |
2554 | ||
2555 | static void | |
2556 | test_lexer_string_locations_letter_escape_2 (const line_table_case &case_) | |
2557 | { | |
2558 | /* .....................000000000.1111.11.1111.22222222223. | |
2559 | .....................123456789.0123.45.6789.01234567890. */ | |
2560 | const char *content = (" \"%-%\\n\" /* non-str */\n"); | |
2561 | lexer_test test (case_, content, NULL); | |
2562 | ||
2563 | /* Verify that we get the expected tokens back. */ | |
2564 | const cpp_token *tok = test.get_token (); | |
2565 | ASSERT_EQ (tok->type, CPP_STRING); | |
2566 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\""); | |
2567 | ||
2568 | /* Verify ranges of individual characters. */ | |
2569 | /* "%-%". */ | |
2570 | for (int i = 0; i < 3; i++) | |
2571 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2572 | i, 1, 10 + i, 10 + i); | |
2573 | /* "\n". */ | |
2574 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2575 | 3, 1, 13, 14); | |
2576 | ||
7413e757 | 2577 | /* Closing quote for nul-terminator. */ |
2578 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2579 | 4, 1, 15, 15); | |
2580 | ||
2581 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5); | |
d4166bdc | 2582 | } |
2583 | ||
2584 | /* Lex a string literal containing UCN 4 characters. | |
2585 | Verify the substring location data after running cpp_interpret_string | |
2586 | on it. */ | |
2587 | ||
2588 | static void | |
2589 | test_lexer_string_locations_ucn4 (const line_table_case &case_) | |
2590 | { | |
2591 | /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed | |
2592 | as UCN 4. | |
2593 | ....................000000000.111111.111122.222222223.33333333344444 | |
2594 | ....................123456789.012345.678901.234567890.12345678901234 */ | |
2595 | const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n"; | |
2596 | lexer_test test (case_, content, NULL); | |
2597 | ||
2598 | /* Verify that we get the expected token back, with the correct | |
2599 | location information. */ | |
2600 | const cpp_token *tok = test.get_token (); | |
2601 | ASSERT_EQ (tok->type, CPP_STRING); | |
2602 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\""); | |
2603 | ||
2604 | /* Verify that cpp_interpret_string works. | |
2605 | The string should be encoded in the execution character | |
2606 | set. Assuming that that is UTF-8, we should have the following: | |
2607 | ----------- ---- ----- ------- ---------------- | |
2608 | Byte offset Byte Octal Unicode Source Column(s) | |
2609 | ----------- ---- ----- ------- ---------------- | |
2610 | 0 0x30 '0' 10 | |
2611 | 1 0x31 '1' 11 | |
2612 | 2 0x32 '2' 12 | |
2613 | 3 0x33 '3' 13 | |
2614 | 4 0x34 '4' 14 | |
2615 | 5 0xE2 \342 U+2174 15-20 | |
2616 | 6 0x85 \205 (cont) 15-20 | |
2617 | 7 0xB4 \264 (cont) 15-20 | |
2618 | 8 0xE2 \342 U+2175 21-26 | |
2619 | 9 0x85 \205 (cont) 21-26 | |
2620 | 10 0xB5 \265 (cont) 21-26 | |
2621 | 11 0x37 '7' 27 | |
2622 | 12 0x38 '8' 28 | |
2623 | 13 0x39 '9' 29 | |
7413e757 | 2624 | 14 0x00 30 (closing quote) |
d4166bdc | 2625 | ----------- ---- ----- ------- ---------------. */ |
2626 | ||
2627 | cpp_string dst_string; | |
2628 | const enum cpp_ttype type = CPP_STRING; | |
2629 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2630 | &dst_string, type); | |
2631 | ASSERT_TRUE (result); | |
2632 | ASSERT_STREQ ("01234\342\205\264\342\205\265789", | |
2633 | (const char *)dst_string.text); | |
2634 | free (const_cast <unsigned char *> (dst_string.text)); | |
2635 | ||
2636 | /* Verify ranges of individual characters. This no longer includes the | |
7413e757 | 2637 | opening quote, but does include the closing quote. |
d4166bdc | 2638 | '01234'. */ |
2639 | for (int i = 0; i <= 4; i++) | |
2640 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
2641 | /* U+2174. */ | |
2642 | for (int i = 5; i <= 7; i++) | |
2643 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20); | |
2644 | /* U+2175. */ | |
2645 | for (int i = 8; i <= 10; i++) | |
2646 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26); | |
7413e757 | 2647 | /* '789' and nul terminator */ |
2648 | for (int i = 11; i <= 14; i++) | |
d4166bdc | 2649 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i); |
2650 | ||
7413e757 | 2651 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15); |
d4166bdc | 2652 | } |
2653 | ||
2654 | /* Lex a string literal containing UCN 8 characters. | |
2655 | Verify the substring location data after running cpp_interpret_string | |
2656 | on it. */ | |
2657 | ||
2658 | static void | |
2659 | test_lexer_string_locations_ucn8 (const line_table_case &case_) | |
2660 | { | |
2661 | /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8. | |
2662 | ....................000000000.111111.1111222222.2222333333333.344444 | |
2663 | ....................123456789.012345.6789012345.6789012345678.901234 */ | |
2664 | const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n"; | |
2665 | lexer_test test (case_, content, NULL); | |
2666 | ||
2667 | /* Verify that we get the expected token back, with the correct | |
2668 | location information. */ | |
2669 | const cpp_token *tok = test.get_token (); | |
2670 | ASSERT_EQ (tok->type, CPP_STRING); | |
2671 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, | |
2672 | "\"01234\\U00002174\\U00002175789\""); | |
2673 | ||
2674 | /* Verify that cpp_interpret_string works. | |
2675 | The UTF-8 encoding of the string is identical to that from | |
2676 | the ucn4 testcase above; the only difference is the column | |
2677 | locations. */ | |
2678 | cpp_string dst_string; | |
2679 | const enum cpp_ttype type = CPP_STRING; | |
2680 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2681 | &dst_string, type); | |
2682 | ASSERT_TRUE (result); | |
2683 | ASSERT_STREQ ("01234\342\205\264\342\205\265789", | |
2684 | (const char *)dst_string.text); | |
2685 | free (const_cast <unsigned char *> (dst_string.text)); | |
2686 | ||
2687 | /* Verify ranges of individual characters. This no longer includes the | |
7413e757 | 2688 | opening quote, but does include the closing quote. |
d4166bdc | 2689 | '01234'. */ |
2690 | for (int i = 0; i <= 4; i++) | |
2691 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
2692 | /* U+2174. */ | |
2693 | for (int i = 5; i <= 7; i++) | |
2694 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24); | |
2695 | /* U+2175. */ | |
2696 | for (int i = 8; i <= 10; i++) | |
2697 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34); | |
2698 | /* '789' at columns 35-37 */ | |
2699 | for (int i = 11; i <= 13; i++) | |
2700 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i); | |
7413e757 | 2701 | /* Closing quote/nul-terminator at column 38. */ |
2702 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38); | |
d4166bdc | 2703 | |
7413e757 | 2704 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15); |
d4166bdc | 2705 | } |
2706 | ||
2707 | /* Fetch a big-endian 32-bit value and convert to host endianness. */ | |
2708 | ||
2709 | static uint32_t | |
2710 | uint32_from_big_endian (const uint32_t *ptr_be_value) | |
2711 | { | |
2712 | const unsigned char *buf = (const unsigned char *)ptr_be_value; | |
2713 | return (((uint32_t) buf[0] << 24) | |
2714 | | ((uint32_t) buf[1] << 16) | |
2715 | | ((uint32_t) buf[2] << 8) | |
2716 | | (uint32_t) buf[3]); | |
2717 | } | |
2718 | ||
2719 | /* Lex a wide string literal and verify that attempts to read substring | |
2720 | location data from it fail gracefully. */ | |
2721 | ||
2722 | static void | |
2723 | test_lexer_string_locations_wide_string (const line_table_case &case_) | |
2724 | { | |
2725 | /* Digits 0-9. | |
2726 | ....................000000000.11111111112.22222222233333 | |
2727 | ....................123456789.01234567890.12345678901234 */ | |
2728 | const char *content = " L\"0123456789\" /* non-str */\n"; | |
2729 | lexer_test test (case_, content, NULL); | |
2730 | ||
2731 | /* Verify that we get the expected token back, with the correct | |
2732 | location information. */ | |
2733 | const cpp_token *tok = test.get_token (); | |
2734 | ASSERT_EQ (tok->type, CPP_WSTRING); | |
2735 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\""); | |
2736 | ||
2737 | /* Verify that cpp_interpret_string works, using CPP_WSTRING. */ | |
2738 | cpp_string dst_string; | |
2739 | const enum cpp_ttype type = CPP_WSTRING; | |
2740 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2741 | &dst_string, type); | |
2742 | ASSERT_TRUE (result); | |
2743 | /* The cpp_reader defaults to big-endian with | |
2744 | CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should | |
2745 | now be encoded as UTF-32BE. */ | |
2746 | const uint32_t *be32_chars = (const uint32_t *)dst_string.text; | |
2747 | ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0])); | |
2748 | ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5])); | |
2749 | ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9])); | |
2750 | ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10])); | |
2751 | free (const_cast <unsigned char *> (dst_string.text)); | |
2752 | ||
2753 | /* We don't yet support generating substring location information | |
2754 | for L"" strings. */ | |
2755 | ASSERT_HAS_NO_SUBSTRING_RANGES | |
2756 | (test, tok->src_loc, type, | |
2757 | "execution character set != source character set"); | |
2758 | } | |
2759 | ||
2760 | /* Fetch a big-endian 16-bit value and convert to host endianness. */ | |
2761 | ||
2762 | static uint16_t | |
2763 | uint16_from_big_endian (const uint16_t *ptr_be_value) | |
2764 | { | |
2765 | const unsigned char *buf = (const unsigned char *)ptr_be_value; | |
2766 | return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1]; | |
2767 | } | |
2768 | ||
2769 | /* Lex a u"" string literal and verify that attempts to read substring | |
2770 | location data from it fail gracefully. */ | |
2771 | ||
2772 | static void | |
2773 | test_lexer_string_locations_string16 (const line_table_case &case_) | |
2774 | { | |
2775 | /* Digits 0-9. | |
2776 | ....................000000000.11111111112.22222222233333 | |
2777 | ....................123456789.01234567890.12345678901234 */ | |
2778 | const char *content = " u\"0123456789\" /* non-str */\n"; | |
2779 | lexer_test test (case_, content, NULL); | |
2780 | ||
2781 | /* Verify that we get the expected token back, with the correct | |
2782 | location information. */ | |
2783 | const cpp_token *tok = test.get_token (); | |
2784 | ASSERT_EQ (tok->type, CPP_STRING16); | |
2785 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\""); | |
2786 | ||
2787 | /* Verify that cpp_interpret_string works, using CPP_STRING16. */ | |
2788 | cpp_string dst_string; | |
2789 | const enum cpp_ttype type = CPP_STRING16; | |
2790 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2791 | &dst_string, type); | |
2792 | ASSERT_TRUE (result); | |
2793 | ||
2794 | /* The cpp_reader defaults to big-endian, so dst_string should | |
2795 | now be encoded as UTF-16BE. */ | |
2796 | const uint16_t *be16_chars = (const uint16_t *)dst_string.text; | |
2797 | ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0])); | |
2798 | ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5])); | |
2799 | ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9])); | |
2800 | ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10])); | |
2801 | free (const_cast <unsigned char *> (dst_string.text)); | |
2802 | ||
2803 | /* We don't yet support generating substring location information | |
2804 | for L"" strings. */ | |
2805 | ASSERT_HAS_NO_SUBSTRING_RANGES | |
2806 | (test, tok->src_loc, type, | |
2807 | "execution character set != source character set"); | |
2808 | } | |
2809 | ||
2810 | /* Lex a U"" string literal and verify that attempts to read substring | |
2811 | location data from it fail gracefully. */ | |
2812 | ||
2813 | static void | |
2814 | test_lexer_string_locations_string32 (const line_table_case &case_) | |
2815 | { | |
2816 | /* Digits 0-9. | |
2817 | ....................000000000.11111111112.22222222233333 | |
2818 | ....................123456789.01234567890.12345678901234 */ | |
2819 | const char *content = " U\"0123456789\" /* non-str */\n"; | |
2820 | lexer_test test (case_, content, NULL); | |
2821 | ||
2822 | /* Verify that we get the expected token back, with the correct | |
2823 | location information. */ | |
2824 | const cpp_token *tok = test.get_token (); | |
2825 | ASSERT_EQ (tok->type, CPP_STRING32); | |
2826 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\""); | |
2827 | ||
2828 | /* Verify that cpp_interpret_string works, using CPP_STRING32. */ | |
2829 | cpp_string dst_string; | |
2830 | const enum cpp_ttype type = CPP_STRING32; | |
2831 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2832 | &dst_string, type); | |
2833 | ASSERT_TRUE (result); | |
2834 | ||
2835 | /* The cpp_reader defaults to big-endian, so dst_string should | |
2836 | now be encoded as UTF-32BE. */ | |
2837 | const uint32_t *be32_chars = (const uint32_t *)dst_string.text; | |
2838 | ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0])); | |
2839 | ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5])); | |
2840 | ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9])); | |
2841 | ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10])); | |
2842 | free (const_cast <unsigned char *> (dst_string.text)); | |
2843 | ||
2844 | /* We don't yet support generating substring location information | |
2845 | for L"" strings. */ | |
2846 | ASSERT_HAS_NO_SUBSTRING_RANGES | |
2847 | (test, tok->src_loc, type, | |
2848 | "execution character set != source character set"); | |
2849 | } | |
2850 | ||
2851 | /* Lex a u8-string literal. | |
2852 | Verify the substring location data after running cpp_interpret_string | |
2853 | on it. */ | |
2854 | ||
2855 | static void | |
2856 | test_lexer_string_locations_u8 (const line_table_case &case_) | |
2857 | { | |
2858 | /* Digits 0-9. | |
2859 | ....................000000000.11111111112.22222222233333 | |
2860 | ....................123456789.01234567890.12345678901234 */ | |
2861 | const char *content = " u8\"0123456789\" /* non-str */\n"; | |
2862 | lexer_test test (case_, content, NULL); | |
2863 | ||
2864 | /* Verify that we get the expected token back, with the correct | |
2865 | location information. */ | |
2866 | const cpp_token *tok = test.get_token (); | |
2867 | ASSERT_EQ (tok->type, CPP_UTF8STRING); | |
2868 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\""); | |
2869 | ||
2870 | /* Verify that cpp_interpret_string works. */ | |
2871 | cpp_string dst_string; | |
2872 | const enum cpp_ttype type = CPP_STRING; | |
2873 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2874 | &dst_string, type); | |
2875 | ASSERT_TRUE (result); | |
2876 | ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
2877 | free (const_cast <unsigned char *> (dst_string.text)); | |
2878 | ||
2879 | /* Verify ranges of individual characters. This no longer includes the | |
7413e757 | 2880 | opening quote, but does include the closing quote. */ |
2881 | for (int i = 0; i <= 10; i++) | |
d4166bdc | 2882 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); |
2883 | } | |
2884 | ||
2885 | /* Lex a string literal containing UTF-8 source characters. | |
2886 | Verify the substring location data after running cpp_interpret_string | |
2887 | on it. */ | |
2888 | ||
2889 | static void | |
2890 | test_lexer_string_locations_utf8_source (const line_table_case &case_) | |
2891 | { | |
2892 | /* This string literal is written out to the source file as UTF-8, | |
2893 | and is of the form "before mojibake after", where "mojibake" | |
2894 | is written as the following four unicode code points: | |
2895 | U+6587 CJK UNIFIED IDEOGRAPH-6587 | |
2896 | U+5B57 CJK UNIFIED IDEOGRAPH-5B57 | |
2897 | U+5316 CJK UNIFIED IDEOGRAPH-5316 | |
2898 | U+3051 HIRAGANA LETTER KE. | |
2899 | Each of these is 3 bytes wide when encoded in UTF-8, whereas the | |
2900 | "before" and "after" are 1 byte per unicode character. | |
2901 | ||
2902 | The numbering shown are "columns", which are *byte* numbers within | |
2903 | the line, rather than unicode character numbers. | |
2904 | ||
2905 | .................... 000000000.1111111. | |
2906 | .................... 123456789.0123456. */ | |
2907 | const char *content = (" \"before " | |
2908 | /* U+6587 CJK UNIFIED IDEOGRAPH-6587 | |
2909 | UTF-8: 0xE6 0x96 0x87 | |
2910 | C octal escaped UTF-8: \346\226\207 | |
2911 | "column" numbers: 17-19. */ | |
2912 | "\346\226\207" | |
2913 | ||
2914 | /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57 | |
2915 | UTF-8: 0xE5 0xAD 0x97 | |
2916 | C octal escaped UTF-8: \345\255\227 | |
2917 | "column" numbers: 20-22. */ | |
2918 | "\345\255\227" | |
2919 | ||
2920 | /* U+5316 CJK UNIFIED IDEOGRAPH-5316 | |
2921 | UTF-8: 0xE5 0x8C 0x96 | |
2922 | C octal escaped UTF-8: \345\214\226 | |
2923 | "column" numbers: 23-25. */ | |
2924 | "\345\214\226" | |
2925 | ||
2926 | /* U+3051 HIRAGANA LETTER KE | |
2927 | UTF-8: 0xE3 0x81 0x91 | |
2928 | C octal escaped UTF-8: \343\201\221 | |
2929 | "column" numbers: 26-28. */ | |
2930 | "\343\201\221" | |
2931 | ||
2932 | /* column numbers 29 onwards | |
2933 | 2333333.33334444444444 | |
2934 | 9012345.67890123456789. */ | |
2935 | " after\" /* non-str */\n"); | |
2936 | lexer_test test (case_, content, NULL); | |
2937 | ||
2938 | /* Verify that we get the expected token back, with the correct | |
2939 | location information. */ | |
2940 | const cpp_token *tok = test.get_token (); | |
2941 | ASSERT_EQ (tok->type, CPP_STRING); | |
2942 | ASSERT_TOKEN_AS_TEXT_EQ | |
2943 | (test.m_parser, tok, | |
2944 | "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\""); | |
2945 | ||
2946 | /* Verify that cpp_interpret_string works. */ | |
2947 | cpp_string dst_string; | |
2948 | const enum cpp_ttype type = CPP_STRING; | |
2949 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2950 | &dst_string, type); | |
2951 | ASSERT_TRUE (result); | |
2952 | ASSERT_STREQ | |
2953 | ("before \346\226\207\345\255\227\345\214\226\343\201\221 after", | |
2954 | (const char *)dst_string.text); | |
2955 | free (const_cast <unsigned char *> (dst_string.text)); | |
2956 | ||
2957 | /* Verify ranges of individual characters. This no longer includes the | |
7413e757 | 2958 | opening quote, but does include the closing quote. |
d4166bdc | 2959 | Assuming that both source and execution encodings are UTF-8, we have |
7413e757 | 2960 | a run of 25 octets in each, plus the NUL terminator. */ |
d4166bdc | 2961 | for (int i = 0; i < 25; i++) |
2962 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
7413e757 | 2963 | /* NUL-terminator should use the closing quote at column 35. */ |
2964 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35); | |
d4166bdc | 2965 | |
7413e757 | 2966 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26); |
d4166bdc | 2967 | } |
2968 | ||
2969 | /* Test of string literal concatenation. */ | |
2970 | ||
2971 | static void | |
2972 | test_lexer_string_locations_concatenation_1 (const line_table_case &case_) | |
2973 | { | |
2974 | /* Digits 0-9. | |
2975 | .....................000000000.111111.11112222222222 | |
2976 | .....................123456789.012345.67890123456789. */ | |
2977 | const char *content = (" \"01234\" /* non-str */\n" | |
2978 | " \"56789\" /* non-str */\n"); | |
2979 | lexer_test test (case_, content, NULL); | |
2980 | ||
2981 | location_t input_locs[2]; | |
2982 | ||
2983 | /* Verify that we get the expected tokens back. */ | |
2984 | auto_vec <cpp_string> input_strings; | |
2985 | const cpp_token *tok_a = test.get_token (); | |
2986 | ASSERT_EQ (tok_a->type, CPP_STRING); | |
2987 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\""); | |
2988 | input_strings.safe_push (tok_a->val.str); | |
2989 | input_locs[0] = tok_a->src_loc; | |
2990 | ||
2991 | const cpp_token *tok_b = test.get_token (); | |
2992 | ASSERT_EQ (tok_b->type, CPP_STRING); | |
2993 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\""); | |
2994 | input_strings.safe_push (tok_b->val.str); | |
2995 | input_locs[1] = tok_b->src_loc; | |
2996 | ||
2997 | /* Verify that cpp_interpret_string works. */ | |
2998 | cpp_string dst_string; | |
2999 | const enum cpp_ttype type = CPP_STRING; | |
3000 | bool result = cpp_interpret_string (test.m_parser, | |
3001 | input_strings.address (), 2, | |
3002 | &dst_string, type); | |
3003 | ASSERT_TRUE (result); | |
3004 | ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
3005 | free (const_cast <unsigned char *> (dst_string.text)); | |
3006 | ||
3007 | /* Simulate c-lex.c's lex_string in order to record concatenation. */ | |
3008 | test.m_concats.record_string_concatenation (2, input_locs); | |
3009 | ||
3010 | location_t initial_loc = input_locs[0]; | |
3011 | ||
7413e757 | 3012 | /* "01234" on line 1. */ |
d4166bdc | 3013 | for (int i = 0; i <= 4; i++) |
3014 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i); | |
7413e757 | 3015 | /* "56789" in line 2, plus its closing quote for the nul terminator. */ |
3016 | for (int i = 5; i <= 10; i++) | |
d4166bdc | 3017 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i); |
3018 | ||
7413e757 | 3019 | ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); |
d4166bdc | 3020 | } |
3021 | ||
3022 | /* Another test of string literal concatenation. */ | |
3023 | ||
3024 | static void | |
3025 | test_lexer_string_locations_concatenation_2 (const line_table_case &case_) | |
3026 | { | |
3027 | /* Digits 0-9. | |
3028 | .....................000000000.111.11111112222222 | |
3029 | .....................123456789.012.34567890123456. */ | |
3030 | const char *content = (" \"01\" /* non-str */\n" | |
3031 | " \"23\" /* non-str */\n" | |
3032 | " \"45\" /* non-str */\n" | |
3033 | " \"67\" /* non-str */\n" | |
3034 | " \"89\" /* non-str */\n"); | |
3035 | lexer_test test (case_, content, NULL); | |
3036 | ||
3037 | auto_vec <cpp_string> input_strings; | |
3038 | location_t input_locs[5]; | |
3039 | ||
3040 | /* Verify that we get the expected tokens back. */ | |
3041 | for (int i = 0; i < 5; i++) | |
3042 | { | |
3043 | const cpp_token *tok = test.get_token (); | |
3044 | ASSERT_EQ (tok->type, CPP_STRING); | |
3045 | input_strings.safe_push (tok->val.str); | |
3046 | input_locs[i] = tok->src_loc; | |
3047 | } | |
3048 | ||
3049 | /* Verify that cpp_interpret_string works. */ | |
3050 | cpp_string dst_string; | |
3051 | const enum cpp_ttype type = CPP_STRING; | |
3052 | bool result = cpp_interpret_string (test.m_parser, | |
3053 | input_strings.address (), 5, | |
3054 | &dst_string, type); | |
3055 | ASSERT_TRUE (result); | |
3056 | ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
3057 | free (const_cast <unsigned char *> (dst_string.text)); | |
3058 | ||
3059 | /* Simulate c-lex.c's lex_string in order to record concatenation. */ | |
3060 | test.m_concats.record_string_concatenation (5, input_locs); | |
3061 | ||
3062 | location_t initial_loc = input_locs[0]; | |
3063 | ||
3064 | /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can | |
3065 | detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS | |
3066 | and expect get_source_range_for_substring to fail. | |
3067 | However, for a string concatenation test, we can have a case | |
3068 | where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS, | |
3069 | but subsequent strings can be after it. | |
3070 | Attempting to detect this within assert_char_at_range | |
3071 | would overcomplicate the logic for the common test cases, so | |
3072 | we detect it here. */ | |
3073 | if (should_have_column_data_p (input_locs[0]) | |
3074 | && !should_have_column_data_p (input_locs[4])) | |
3075 | { | |
3076 | /* Verify that get_source_range_for_substring gracefully rejects | |
3077 | this case. */ | |
3078 | source_range actual_range; | |
3079 | const char *err | |
5927e78e | 3080 | = get_source_range_for_char (test.m_parser, &test.m_concats, |
3081 | initial_loc, type, 0, &actual_range); | |
d4166bdc | 3082 | ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err); |
3083 | return; | |
3084 | } | |
3085 | ||
3086 | for (int i = 0; i < 5; i++) | |
3087 | for (int j = 0; j < 2; j++) | |
3088 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j, | |
3089 | i + 1, 10 + j, 10 + j); | |
3090 | ||
7413e757 | 3091 | /* NUL-terminator should use the final closing quote at line 5 column 12. */ |
3092 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12); | |
3093 | ||
3094 | ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); | |
d4166bdc | 3095 | } |
3096 | ||
3097 | /* Another test of string literal concatenation, this time combined with | |
3098 | various kinds of escaped characters. */ | |
3099 | ||
3100 | static void | |
3101 | test_lexer_string_locations_concatenation_3 (const line_table_case &case_) | |
3102 | { | |
3103 | /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35" | |
3104 | digit 6 in ASCII as octal "\066", concatenating multiple strings. */ | |
3105 | const char *content | |
3106 | /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555 | |
3107 | .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */ | |
3108 | = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n"); | |
3109 | lexer_test test (case_, content, NULL); | |
3110 | ||
3111 | auto_vec <cpp_string> input_strings; | |
3112 | location_t input_locs[4]; | |
3113 | ||
3114 | /* Verify that we get the expected tokens back. */ | |
3115 | for (int i = 0; i < 4; i++) | |
3116 | { | |
3117 | const cpp_token *tok = test.get_token (); | |
3118 | ASSERT_EQ (tok->type, CPP_STRING); | |
3119 | input_strings.safe_push (tok->val.str); | |
3120 | input_locs[i] = tok->src_loc; | |
3121 | } | |
3122 | ||
3123 | /* Verify that cpp_interpret_string works. */ | |
3124 | cpp_string dst_string; | |
3125 | const enum cpp_ttype type = CPP_STRING; | |
3126 | bool result = cpp_interpret_string (test.m_parser, | |
3127 | input_strings.address (), 4, | |
3128 | &dst_string, type); | |
3129 | ASSERT_TRUE (result); | |
3130 | ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
3131 | free (const_cast <unsigned char *> (dst_string.text)); | |
3132 | ||
3133 | /* Simulate c-lex.c's lex_string in order to record concatenation. */ | |
3134 | test.m_concats.record_string_concatenation (4, input_locs); | |
3135 | ||
3136 | location_t initial_loc = input_locs[0]; | |
3137 | ||
3138 | for (int i = 0; i <= 4; i++) | |
3139 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i); | |
3140 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22); | |
3141 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30); | |
3142 | for (int i = 7; i <= 9; i++) | |
3143 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i); | |
3144 | ||
7413e757 | 3145 | /* NUL-terminator should use the location of the final closing quote. */ |
3146 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38); | |
3147 | ||
3148 | ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); | |
d4166bdc | 3149 | } |
3150 | ||
3151 | /* Test of string literal in a macro. */ | |
3152 | ||
3153 | static void | |
3154 | test_lexer_string_locations_macro (const line_table_case &case_) | |
3155 | { | |
3156 | /* Digits 0-9. | |
3157 | .....................0000000001111111111.22222222223. | |
3158 | .....................1234567890123456789.01234567890. */ | |
3159 | const char *content = ("#define MACRO \"0123456789\" /* non-str */\n" | |
3160 | " MACRO"); | |
3161 | lexer_test test (case_, content, NULL); | |
3162 | ||
3163 | /* Verify that we get the expected tokens back. */ | |
3164 | const cpp_token *tok = test.get_token (); | |
3165 | ASSERT_EQ (tok->type, CPP_PADDING); | |
3166 | ||
3167 | tok = test.get_token (); | |
3168 | ASSERT_EQ (tok->type, CPP_STRING); | |
3169 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\""); | |
3170 | ||
3171 | /* Verify ranges of individual characters. We ought to | |
3172 | see columns within the macro definition. */ | |
7413e757 | 3173 | for (int i = 0; i <= 10; i++) |
d4166bdc | 3174 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3175 | i, 1, 20 + i, 20 + i); | |
3176 | ||
7413e757 | 3177 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11); |
d4166bdc | 3178 | |
3179 | tok = test.get_token (); | |
3180 | ASSERT_EQ (tok->type, CPP_PADDING); | |
3181 | } | |
3182 | ||
3183 | /* Test of stringification of a macro argument. */ | |
3184 | ||
3185 | static void | |
3186 | test_lexer_string_locations_stringified_macro_argument | |
3187 | (const line_table_case &case_) | |
3188 | { | |
3189 | /* .....................000000000111111111122222222223. | |
3190 | .....................123456789012345678901234567890. */ | |
3191 | const char *content = ("#define MACRO(X) #X /* non-str */\n" | |
3192 | "MACRO(foo)\n"); | |
3193 | lexer_test test (case_, content, NULL); | |
3194 | ||
3195 | /* Verify that we get the expected token back. */ | |
3196 | const cpp_token *tok = test.get_token (); | |
3197 | ASSERT_EQ (tok->type, CPP_PADDING); | |
3198 | ||
3199 | tok = test.get_token (); | |
3200 | ASSERT_EQ (tok->type, CPP_STRING); | |
3201 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\""); | |
3202 | ||
3203 | /* We don't support getting the location of a stringified macro | |
3204 | argument. Verify that it fails gracefully. */ | |
3205 | ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, | |
3206 | "cpp_interpret_string_1 failed"); | |
3207 | ||
3208 | tok = test.get_token (); | |
3209 | ASSERT_EQ (tok->type, CPP_PADDING); | |
3210 | ||
3211 | tok = test.get_token (); | |
3212 | ASSERT_EQ (tok->type, CPP_PADDING); | |
3213 | } | |
3214 | ||
3215 | /* Ensure that we are fail gracefully if something attempts to pass | |
3216 | in a location that isn't a string literal token. Seen on this code: | |
3217 | ||
3218 | const char a[] = " %d "; | |
3219 | __builtin_printf (a, 0.5); | |
3220 | ^ | |
3221 | ||
3222 | when c-format.c erroneously used the indicated one-character | |
3223 | location as the format string location, leading to a read past the | |
3224 | end of a string buffer in cpp_interpret_string_1. */ | |
3225 | ||
3226 | static void | |
3227 | test_lexer_string_locations_non_string (const line_table_case &case_) | |
3228 | { | |
3229 | /* .....................000000000111111111122222222223. | |
3230 | .....................123456789012345678901234567890. */ | |
3231 | const char *content = (" a\n"); | |
3232 | lexer_test test (case_, content, NULL); | |
3233 | ||
3234 | /* Verify that we get the expected token back. */ | |
3235 | const cpp_token *tok = test.get_token (); | |
3236 | ASSERT_EQ (tok->type, CPP_NAME); | |
3237 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a"); | |
3238 | ||
3239 | /* At this point, libcpp is attempting to interpret the name as a | |
3240 | string literal, despite it not starting with a quote. We don't detect | |
3241 | that, but we should at least fail gracefully. */ | |
3242 | ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, | |
3243 | "cpp_interpret_string_1 failed"); | |
3244 | } | |
3245 | ||
3246 | /* Ensure that we can read substring information for a token which | |
3247 | starts in one linemap and ends in another . Adapted from | |
3248 | gcc.dg/cpp/pr69985.c. */ | |
3249 | ||
3250 | static void | |
3251 | test_lexer_string_locations_long_line (const line_table_case &case_) | |
3252 | { | |
3253 | /* .....................000000.000111111111 | |
3254 | .....................123456.789012346789. */ | |
3255 | const char *content = ("/* A very long line, so that we start a new line map. */\n" | |
3256 | " \"0123456789012345678901234567890123456789" | |
3257 | "0123456789012345678901234567890123456789" | |
3258 | "0123456789012345678901234567890123456789" | |
3259 | "0123456789\"\n"); | |
3260 | ||
3261 | lexer_test test (case_, content, NULL); | |
3262 | ||
3263 | /* Verify that we get the expected token back. */ | |
3264 | const cpp_token *tok = test.get_token (); | |
3265 | ASSERT_EQ (tok->type, CPP_STRING); | |
3266 | ||
3267 | if (!should_have_column_data_p (line_table->highest_location)) | |
3268 | return; | |
3269 | ||
3270 | /* Verify ranges of individual characters. */ | |
7413e757 | 3271 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131); |
3272 | for (int i = 0; i < 131; i++) | |
d4166bdc | 3273 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3274 | i, 2, 7 + i, 7 + i); | |
3275 | } | |
3276 | ||
f9f26759 | 3277 | /* Test of locations within a raw string that doesn't contain a newline. */ |
3278 | ||
3279 | static void | |
3280 | test_lexer_string_locations_raw_string_one_line (const line_table_case &case_) | |
3281 | { | |
3282 | /* .....................00.0000000111111111122. | |
3283 | .....................12.3456789012345678901. */ | |
3284 | const char *content = ("R\"foo(0123456789)foo\"\n"); | |
3285 | lexer_test test (case_, content, NULL); | |
3286 | ||
3287 | /* Verify that we get the expected token back. */ | |
3288 | const cpp_token *tok = test.get_token (); | |
3289 | ASSERT_EQ (tok->type, CPP_STRING); | |
3290 | ||
3291 | /* Verify that cpp_interpret_string works. */ | |
3292 | cpp_string dst_string; | |
3293 | const enum cpp_ttype type = CPP_STRING; | |
3294 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
3295 | &dst_string, type); | |
3296 | ASSERT_TRUE (result); | |
3297 | ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
3298 | free (const_cast <unsigned char *> (dst_string.text)); | |
3299 | ||
3300 | if (!should_have_column_data_p (line_table->highest_location)) | |
3301 | return; | |
3302 | ||
3303 | /* 0-9, plus the nil terminator. */ | |
3304 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11); | |
3305 | for (int i = 0; i < 11; i++) | |
3306 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
3307 | i, 1, 7 + i, 7 + i); | |
3308 | } | |
3309 | ||
3310 | /* Test of locations within a raw string that contains a newline. */ | |
3311 | ||
3312 | static void | |
3313 | test_lexer_string_locations_raw_string_multiline (const line_table_case &case_) | |
3314 | { | |
3315 | /* .....................00.0000. | |
3316 | .....................12.3456. */ | |
3317 | const char *content = ("R\"foo(\n" | |
3318 | /* .....................00000. | |
3319 | .....................12345. */ | |
3320 | "hello\n" | |
3321 | "world\n" | |
3322 | /* .....................00000. | |
3323 | .....................12345. */ | |
3324 | ")foo\"\n"); | |
3325 | lexer_test test (case_, content, NULL); | |
3326 | ||
3327 | /* Verify that we get the expected token back. */ | |
3328 | const cpp_token *tok = test.get_token (); | |
3329 | ASSERT_EQ (tok->type, CPP_STRING); | |
3330 | ||
3331 | /* Verify that cpp_interpret_string works. */ | |
3332 | cpp_string dst_string; | |
3333 | const enum cpp_ttype type = CPP_STRING; | |
3334 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
3335 | &dst_string, type); | |
3336 | ASSERT_TRUE (result); | |
3337 | ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text); | |
3338 | free (const_cast <unsigned char *> (dst_string.text)); | |
3339 | ||
3340 | if (!should_have_column_data_p (line_table->highest_location)) | |
3341 | return; | |
3342 | ||
3343 | /* Currently we don't support locations within raw strings that | |
3344 | contain newlines. */ | |
3345 | ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type, | |
3346 | "range endpoints are on different lines"); | |
3347 | } | |
3348 | ||
0ccd6e7a | 3349 | /* Test of parsing an unterminated raw string. */ |
3350 | ||
3351 | static void | |
3352 | test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_) | |
3353 | { | |
3354 | const char *content = "R\"ouch()ouCh\" /* etc */"; | |
3355 | ||
3356 | lexer_error_sink errors; | |
3357 | lexer_test test (case_, content, &errors); | |
3358 | test.m_implicitly_expect_EOF = false; | |
3359 | ||
3360 | /* Attempt to parse the raw string. */ | |
3361 | const cpp_token *tok = test.get_token (); | |
3362 | ASSERT_EQ (tok->type, CPP_EOF); | |
3363 | ||
3364 | ASSERT_EQ (1, errors.m_errors.length ()); | |
3365 | /* We expect the message "unterminated raw string" | |
3366 | in the "cpplib" translation domain. | |
3367 | It's not clear that dgettext is available on all supported hosts, | |
3368 | so this assertion is commented-out for now. | |
3369 | ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"), | |
3370 | errors.m_errors[0]); | |
3371 | */ | |
3372 | } | |
3373 | ||
d4166bdc | 3374 | /* Test of lexing char constants. */ |
3375 | ||
3376 | static void | |
3377 | test_lexer_char_constants (const line_table_case &case_) | |
3378 | { | |
3379 | /* Various char constants. | |
3380 | .....................0000000001111111111.22222222223. | |
3381 | .....................1234567890123456789.01234567890. */ | |
3382 | const char *content = (" 'a'\n" | |
3383 | " u'a'\n" | |
3384 | " U'a'\n" | |
3385 | " L'a'\n" | |
3386 | " 'abc'\n"); | |
3387 | lexer_test test (case_, content, NULL); | |
3388 | ||
3389 | /* Verify that we get the expected tokens back. */ | |
3390 | /* 'a'. */ | |
3391 | const cpp_token *tok = test.get_token (); | |
3392 | ASSERT_EQ (tok->type, CPP_CHAR); | |
3393 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'"); | |
3394 | ||
3395 | unsigned int chars_seen; | |
3396 | int unsignedp; | |
3397 | cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok, | |
3398 | &chars_seen, &unsignedp); | |
3399 | ASSERT_EQ (cc, 'a'); | |
3400 | ASSERT_EQ (chars_seen, 1); | |
3401 | ||
3402 | /* u'a'. */ | |
3403 | tok = test.get_token (); | |
3404 | ASSERT_EQ (tok->type, CPP_CHAR16); | |
3405 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'"); | |
3406 | ||
3407 | /* U'a'. */ | |
3408 | tok = test.get_token (); | |
3409 | ASSERT_EQ (tok->type, CPP_CHAR32); | |
3410 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'"); | |
3411 | ||
3412 | /* L'a'. */ | |
3413 | tok = test.get_token (); | |
3414 | ASSERT_EQ (tok->type, CPP_WCHAR); | |
3415 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'"); | |
3416 | ||
3417 | /* 'abc' (c-char-sequence). */ | |
3418 | tok = test.get_token (); | |
3419 | ASSERT_EQ (tok->type, CPP_CHAR); | |
3420 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'"); | |
3421 | } | |
b73690a4 | 3422 | /* A table of interesting location_t values, giving one axis of our test |
3423 | matrix. */ | |
3424 | ||
3425 | static const location_t boundary_locations[] = { | |
3426 | /* Zero means "don't override the default values for a new line_table". */ | |
3427 | 0, | |
3428 | ||
3429 | /* An arbitrary non-zero value that isn't close to one of | |
3430 | the boundary values below. */ | |
3431 | 0x10000, | |
3432 | ||
3433 | /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */ | |
3434 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100, | |
3435 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1, | |
3436 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES, | |
3437 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1, | |
3438 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100, | |
3439 | ||
3440 | /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */ | |
3441 | LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100, | |
3442 | LINE_MAP_MAX_LOCATION_WITH_COLS - 1, | |
3443 | LINE_MAP_MAX_LOCATION_WITH_COLS, | |
3444 | LINE_MAP_MAX_LOCATION_WITH_COLS + 1, | |
3445 | LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100, | |
3446 | }; | |
3447 | ||
7ec388ed | 3448 | /* Run TESTCASE multiple times, once for each case in our test matrix. */ |
99b4f3a2 | 3449 | |
3450 | void | |
7ec388ed | 3451 | for_each_line_table_case (void (*testcase) (const line_table_case &)) |
99b4f3a2 | 3452 | { |
b73690a4 | 3453 | /* As noted above in the description of struct line_table_case, |
3454 | we want to explore a test matrix of interesting line_table | |
3455 | situations, running various selftests for each case within the | |
3456 | matrix. */ | |
3457 | ||
3458 | /* Run all tests with: | |
3459 | (a) line_table->default_range_bits == 0, and | |
3460 | (b) line_table->default_range_bits == 5. */ | |
3461 | int num_cases_tested = 0; | |
3462 | for (int default_range_bits = 0; default_range_bits <= 5; | |
3463 | default_range_bits += 5) | |
3464 | { | |
3465 | /* ...and use each of the "interesting" location values as | |
3466 | the starting location within line_table. */ | |
3467 | const int num_boundary_locations | |
3468 | = sizeof (boundary_locations) / sizeof (boundary_locations[0]); | |
3469 | for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++) | |
3470 | { | |
3471 | line_table_case c (default_range_bits, boundary_locations[loc_idx]); | |
3472 | ||
7ec388ed | 3473 | testcase (c); |
b73690a4 | 3474 | |
3475 | num_cases_tested++; | |
3476 | } | |
3477 | } | |
3478 | ||
3479 | /* Verify that we fully covered the test matrix. */ | |
3480 | ASSERT_EQ (num_cases_tested, 2 * 12); | |
7ec388ed | 3481 | } |
3482 | ||
3483 | /* Run all of the selftests within this file. */ | |
3484 | ||
3485 | void | |
3486 | input_c_tests () | |
3487 | { | |
3488 | test_should_have_column_data_p (); | |
3489 | test_unknown_location (); | |
3490 | test_builtins (); | |
aca2a315 | 3491 | for_each_line_table_case (test_make_location_nonpure_range_endpoints); |
7ec388ed | 3492 | |
3493 | for_each_line_table_case (test_accessing_ordinary_linemaps); | |
3494 | for_each_line_table_case (test_lexer); | |
3495 | for_each_line_table_case (test_lexer_string_locations_simple); | |
3496 | for_each_line_table_case (test_lexer_string_locations_ebcdic); | |
3497 | for_each_line_table_case (test_lexer_string_locations_hex); | |
3498 | for_each_line_table_case (test_lexer_string_locations_oct); | |
3499 | for_each_line_table_case (test_lexer_string_locations_letter_escape_1); | |
3500 | for_each_line_table_case (test_lexer_string_locations_letter_escape_2); | |
3501 | for_each_line_table_case (test_lexer_string_locations_ucn4); | |
3502 | for_each_line_table_case (test_lexer_string_locations_ucn8); | |
3503 | for_each_line_table_case (test_lexer_string_locations_wide_string); | |
3504 | for_each_line_table_case (test_lexer_string_locations_string16); | |
3505 | for_each_line_table_case (test_lexer_string_locations_string32); | |
3506 | for_each_line_table_case (test_lexer_string_locations_u8); | |
3507 | for_each_line_table_case (test_lexer_string_locations_utf8_source); | |
3508 | for_each_line_table_case (test_lexer_string_locations_concatenation_1); | |
3509 | for_each_line_table_case (test_lexer_string_locations_concatenation_2); | |
3510 | for_each_line_table_case (test_lexer_string_locations_concatenation_3); | |
3511 | for_each_line_table_case (test_lexer_string_locations_macro); | |
3512 | for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument); | |
3513 | for_each_line_table_case (test_lexer_string_locations_non_string); | |
3514 | for_each_line_table_case (test_lexer_string_locations_long_line); | |
f9f26759 | 3515 | for_each_line_table_case (test_lexer_string_locations_raw_string_one_line); |
3516 | for_each_line_table_case (test_lexer_string_locations_raw_string_multiline); | |
0ccd6e7a | 3517 | for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated); |
7ec388ed | 3518 | for_each_line_table_case (test_lexer_char_constants); |
b73690a4 | 3519 | |
99b4f3a2 | 3520 | test_reading_source_line (); |
3521 | } | |
3522 | ||
3523 | } // namespace selftest | |
3524 | ||
3525 | #endif /* CHECKING_P */ |