]>
Commit | Line | Data |
---|---|---|
447924ef | 1 | /* Data and functions related to line maps and input files. |
a945c346 | 2 | Copyright (C) 2004-2024 Free Software Foundation, Inc. |
447924ef JM |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "intl.h" | |
bc65bad2 | 24 | #include "diagnostic.h" |
d9b950dd | 25 | #include "selftest.h" |
741d3be5 | 26 | #include "cpplib.h" |
7ecc3eb9 | 27 | |
a7d79e5c DM |
28 | #ifndef HAVE_ICONV |
29 | #define HAVE_ICONV 0 | |
30 | #endif | |
31 | ||
15d31555 LH |
32 | const char * |
33 | special_fname_builtin () | |
34 | { | |
35 | return _("<built-in>"); | |
36 | } | |
37 | ||
3ac6b5cf LH |
38 | /* Input charset configuration. */ |
39 | static const char *default_charset_callback (const char *) | |
40 | { | |
41 | return nullptr; | |
42 | } | |
43 | ||
44 | void | |
45 | file_cache::initialize_input_context (diagnostic_input_charset_callback ccb, | |
46 | bool should_skip_bom) | |
47 | { | |
48 | in_context.ccb = (ccb ? ccb : default_charset_callback); | |
49 | in_context.should_skip_bom = should_skip_bom; | |
50 | } | |
51 | ||
7ecc3eb9 DS |
52 | /* This is a cache used by get_next_line to store the content of a |
53 | file to be searched for file lines. */ | |
b544c348 | 54 | class file_cache_slot |
7ecc3eb9 | 55 | { |
6c1dae73 | 56 | public: |
b544c348 DM |
57 | file_cache_slot (); |
58 | ~file_cache_slot (); | |
59 | ||
60 | bool read_line_num (size_t line_num, | |
61 | char ** line, ssize_t *line_len); | |
62 | ||
63 | /* Accessors. */ | |
64 | const char *get_file_path () const { return m_file_path; } | |
65 | unsigned get_use_count () const { return m_use_count; } | |
66 | bool missing_trailing_newline_p () const | |
67 | { | |
68 | return m_missing_trailing_newline; | |
69 | } | |
d495ea2b | 70 | char_span get_full_file_content (); |
b544c348 DM |
71 | |
72 | void inc_use_count () { m_use_count++; } | |
73 | ||
3ac6b5cf LH |
74 | bool create (const file_cache::input_context &in_context, |
75 | const char *file_path, FILE *fp, unsigned highest_use_count); | |
b544c348 DM |
76 | void evict (); |
77 | ||
78 | private: | |
7ecc3eb9 | 79 | /* These are information used to store a line boundary. */ |
6c1dae73 | 80 | class line_info |
7ecc3eb9 | 81 | { |
6c1dae73 | 82 | public: |
7ecc3eb9 DS |
83 | /* The line number. It starts from 1. */ |
84 | size_t line_num; | |
85 | ||
86 | /* The position (byte count) of the beginning of the line, | |
87 | relative to the file data pointer. This starts at zero. */ | |
88 | size_t start_pos; | |
89 | ||
90 | /* The position (byte count) of the last byte of the line. This | |
91 | normally points to the '\n' character, or to one byte after the | |
92 | last byte of the file, if the file doesn't contain a '\n' | |
93 | character. */ | |
94 | size_t end_pos; | |
95 | ||
96 | line_info (size_t l, size_t s, size_t e) | |
97 | : line_num (l), start_pos (s), end_pos (e) | |
98 | {} | |
99 | ||
100 | line_info () | |
101 | :line_num (0), start_pos (0), end_pos (0) | |
102 | {} | |
103 | }; | |
104 | ||
b544c348 DM |
105 | bool needs_read_p () const; |
106 | bool needs_grow_p () const; | |
107 | void maybe_grow (); | |
108 | bool read_data (); | |
109 | bool maybe_read_data (); | |
110 | bool get_next_line (char **line, ssize_t *line_len); | |
111 | bool read_next_line (char ** line, ssize_t *line_len); | |
112 | bool goto_next_line (); | |
113 | ||
114 | static const size_t buffer_size = 4 * 1024; | |
115 | static const size_t line_record_size = 100; | |
116 | ||
7ecc3eb9 DS |
117 | /* The number of time this file has been accessed. This is used |
118 | to designate which file cache to evict from the cache | |
119 | array. */ | |
b544c348 | 120 | unsigned m_use_count; |
7ecc3eb9 | 121 | |
f5ea989d DM |
122 | /* The file_path is the key for identifying a particular file in |
123 | the cache. | |
124 | For libcpp-using code, the underlying buffer for this field is | |
125 | owned by the corresponding _cpp_file within the cpp_reader. */ | |
b544c348 | 126 | const char *m_file_path; |
7ecc3eb9 | 127 | |
b544c348 | 128 | FILE *m_fp; |
7ecc3eb9 DS |
129 | |
130 | /* This points to the content of the file that we've read so | |
131 | far. */ | |
b544c348 | 132 | char *m_data; |
7ecc3eb9 | 133 | |
3ac6b5cf LH |
134 | /* The allocated buffer to be freed may start a little earlier than DATA, |
135 | e.g. if a UTF8 BOM was skipped at the beginning. */ | |
136 | int m_alloc_offset; | |
137 | ||
7ecc3eb9 | 138 | /* The size of the DATA array above.*/ |
b544c348 | 139 | size_t m_size; |
7ecc3eb9 DS |
140 | |
141 | /* The number of bytes read from the underlying file so far. This | |
142 | must be less (or equal) than SIZE above. */ | |
b544c348 | 143 | size_t m_nb_read; |
7ecc3eb9 DS |
144 | |
145 | /* The index of the beginning of the current line. */ | |
b544c348 | 146 | size_t m_line_start_idx; |
7ecc3eb9 DS |
147 | |
148 | /* The number of the previous line read. This starts at 1. Zero | |
149 | means we've read no line so far. */ | |
b544c348 | 150 | size_t m_line_num; |
7ecc3eb9 DS |
151 | |
152 | /* This is the total number of lines of the current file. At the | |
153 | moment, we try to get this information from the line map | |
154 | subsystem. Note that this is just a hint. When using the C++ | |
155 | front-end, this hint is correct because the input file is then | |
156 | completely tokenized before parsing starts; so the line map knows | |
157 | the number of lines before compilation really starts. For e.g, | |
158 | the C front-end, it can happen that we start emitting diagnostics | |
159 | before the line map has seen the end of the file. */ | |
b544c348 | 160 | size_t m_total_lines; |
7ecc3eb9 | 161 | |
c65236d6 DM |
162 | /* Could this file be missing a trailing newline on its final line? |
163 | Initially true (to cope with empty files), set to true/false | |
164 | as each line is read. */ | |
b544c348 | 165 | bool m_missing_trailing_newline; |
c65236d6 | 166 | |
7ecc3eb9 DS |
167 | /* This is a record of the beginning and end of the lines we've seen |
168 | while reading the file. This is useful to avoid walking the data | |
169 | from the beginning when we are asked to read a line that is | |
170 | before LINE_START_IDX above. Note that the maximum size of this | |
b544c348 | 171 | record is line_record_size, so that the memory consumption |
7ecc3eb9 | 172 | doesn't explode. We thus scale total_lines down to |
b544c348 DM |
173 | line_record_size. */ |
174 | vec<line_info, va_heap> m_line_record; | |
3ac6b5cf LH |
175 | |
176 | void offset_buffer (int offset) | |
177 | { | |
178 | gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0 | |
179 | : (size_t) offset <= m_size); | |
180 | gcc_assert (m_data); | |
181 | m_alloc_offset += offset; | |
182 | m_data += offset; | |
183 | m_size -= offset; | |
184 | } | |
185 | ||
7ecc3eb9 | 186 | }; |
447924ef JM |
187 | |
188 | /* Current position in real source file. */ | |
189 | ||
3edf64aa | 190 | location_t input_location = UNKNOWN_LOCATION; |
447924ef | 191 | |
99b1c316 | 192 | class line_maps *line_table; |
447924ef | 193 | |
f87e22c5 DM |
194 | /* A stashed copy of "line_table" for use by selftest::line_table_test. |
195 | This needs to be a global so that it can be a GC root, and thus | |
196 | prevent the stashed copy from being garbage-collected if the GC runs | |
197 | during a line_table_test. */ | |
198 | ||
99b1c316 | 199 | class line_maps *saved_line_table; |
f87e22c5 | 200 | |
84756fd4 DS |
201 | /* Expand the source location LOC into a human readable location. If |
202 | LOC resolves to a builtin location, the file name of the readable | |
7eb918cc DS |
203 | location is set to the string "<built-in>". If EXPANSION_POINT_P is |
204 | TRUE and LOC is virtual, then it is resolved to the expansion | |
205 | point of the involved macro. Otherwise, it is resolved to the | |
c4ca1a09 DS |
206 | spelling location of the token. |
207 | ||
208 | When resolving to the spelling location of the token, if the | |
209 | resulting location is for a built-in location (that is, it has no | |
210 | associated line/column) in the context of a macro expansion, the | |
211 | returned location is the first one (while unwinding the macro | |
212 | location towards its expansion point) that is in real source | |
c471c6ed DM |
213 | code. |
214 | ||
215 | ASPECT controls which part of the location to use. */ | |
7eb918cc DS |
216 | |
217 | static expanded_location | |
8625aa24 DM |
218 | expand_location_1 (const line_maps *set, |
219 | location_t loc, | |
c471c6ed DM |
220 | bool expansion_point_p, |
221 | enum location_aspect aspect) | |
447924ef JM |
222 | { |
223 | expanded_location xloc; | |
0e50b624 | 224 | const line_map_ordinary *map; |
c4ca1a09 | 225 | enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT; |
5368224f DC |
226 | tree block = NULL; |
227 | ||
228 | if (IS_ADHOC_LOC (loc)) | |
229 | { | |
230 | block = LOCATION_BLOCK (loc); | |
231 | loc = LOCATION_LOCUS (loc); | |
232 | } | |
c4ca1a09 DS |
233 | |
234 | memset (&xloc, 0, sizeof (xloc)); | |
84756fd4 | 235 | |
c4ca1a09 DS |
236 | if (loc >= RESERVED_LOCATION_COUNT) |
237 | { | |
238 | if (!expansion_point_p) | |
239 | { | |
240 | /* We want to resolve LOC to its spelling location. | |
241 | ||
242 | But if that spelling location is a reserved location that | |
243 | appears in the context of a macro expansion (like for a | |
244 | location for a built-in token), let's consider the first | |
245 | location (toward the expansion point) that is not reserved; | |
246 | that is, the first location that is in real source code. */ | |
8625aa24 | 247 | loc = linemap_unwind_to_first_non_reserved_loc (set, |
0e50b624 | 248 | loc, NULL); |
c4ca1a09 DS |
249 | lrk = LRK_SPELLING_LOCATION; |
250 | } | |
8625aa24 | 251 | loc = linemap_resolve_location (set, loc, lrk, &map); |
c471c6ed DM |
252 | |
253 | /* loc is now either in an ordinary map, or is a reserved location. | |
254 | If it is a compound location, the caret is in a spelling location, | |
255 | but the start/finish might still be a virtual location. | |
256 | Depending of what the caller asked for, we may need to recurse | |
257 | one level in order to resolve any virtual locations in the | |
258 | end-points. */ | |
259 | switch (aspect) | |
260 | { | |
261 | default: | |
262 | gcc_unreachable (); | |
263 | /* Fall through. */ | |
264 | case LOCATION_ASPECT_CARET: | |
265 | break; | |
266 | case LOCATION_ASPECT_START: | |
267 | { | |
620e594b | 268 | location_t start = get_start (loc); |
c471c6ed | 269 | if (start != loc) |
8625aa24 | 270 | return expand_location_1 (set, start, expansion_point_p, aspect); |
c471c6ed DM |
271 | } |
272 | break; | |
273 | case LOCATION_ASPECT_FINISH: | |
274 | { | |
620e594b | 275 | location_t finish = get_finish (loc); |
c471c6ed | 276 | if (finish != loc) |
8625aa24 | 277 | return expand_location_1 (set, finish, expansion_point_p, aspect); |
c471c6ed DM |
278 | } |
279 | break; | |
280 | } | |
8625aa24 | 281 | xloc = linemap_expand_location (set, map, loc); |
c4ca1a09 | 282 | } |
84756fd4 | 283 | |
5368224f | 284 | xloc.data = block; |
447924ef | 285 | if (loc <= BUILTINS_LOCATION) |
15d31555 | 286 | xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin (); |
84756fd4 | 287 | |
447924ef JM |
288 | return xloc; |
289 | } | |
64a1a422 | 290 | |
7ecc3eb9 DS |
291 | /* Return the total lines number that have been read so far by the |
292 | line map (in the preprocessor) so far. For languages like C++ that | |
293 | entirely preprocess the input file before starting to parse, this | |
294 | equals the actual number of lines of the file. */ | |
295 | ||
296 | static size_t | |
297 | total_lines_num (const char *file_path) | |
298 | { | |
299 | size_t r = 0; | |
620e594b | 300 | location_t l = 0; |
7ecc3eb9 DS |
301 | if (linemap_get_file_highest_location (line_table, file_path, &l)) |
302 | { | |
303 | gcc_assert (l >= RESERVED_LOCATION_COUNT); | |
304 | expanded_location xloc = expand_location (l); | |
305 | r = xloc.line; | |
306 | } | |
307 | return r; | |
308 | } | |
309 | ||
310 | /* Lookup the cache used for the content of a given file accessed by | |
311 | caret diagnostic. Return the found cached file, or NULL if no | |
312 | cached file was found. */ | |
313 | ||
b544c348 DM |
314 | file_cache_slot * |
315 | file_cache::lookup_file (const char *file_path) | |
7ecc3eb9 | 316 | { |
b544c348 | 317 | gcc_assert (file_path); |
7ecc3eb9 DS |
318 | |
319 | /* This will contain the found cached file. */ | |
b544c348 DM |
320 | file_cache_slot *r = NULL; |
321 | for (unsigned i = 0; i < num_file_slots; ++i) | |
7ecc3eb9 | 322 | { |
b544c348 DM |
323 | file_cache_slot *c = &m_file_slots[i]; |
324 | if (c->get_file_path () && !strcmp (c->get_file_path (), file_path)) | |
7ecc3eb9 | 325 | { |
b544c348 | 326 | c->inc_use_count (); |
7ecc3eb9 DS |
327 | r = c; |
328 | } | |
329 | } | |
330 | ||
331 | if (r) | |
b544c348 | 332 | r->inc_use_count (); |
7ecc3eb9 DS |
333 | |
334 | return r; | |
335 | } | |
336 | ||
f89b03b6 DM |
337 | /* Purge any mention of FILENAME from the cache of files used for |
338 | printing source code. For use in selftests when working | |
339 | with tempfiles. */ | |
340 | ||
b544c348 DM |
341 | void |
342 | file_cache::forcibly_evict_file (const char *file_path) | |
343 | { | |
344 | gcc_assert (file_path); | |
345 | ||
346 | file_cache_slot *r = lookup_file (file_path); | |
f89b03b6 DM |
347 | if (!r) |
348 | /* Not found. */ | |
349 | return; | |
350 | ||
b544c348 DM |
351 | r->evict (); |
352 | } | |
353 | ||
1bdd665a DM |
354 | /* Determine if FILE_PATH missing a trailing newline on its final line. |
355 | Only valid to call once all of the file has been loaded, by | |
356 | requesting a line number beyond the end of the file. */ | |
357 | ||
358 | bool | |
359 | file_cache::missing_trailing_newline_p (const char *file_path) | |
360 | { | |
361 | gcc_assert (file_path); | |
362 | ||
363 | file_cache_slot *r = lookup_or_add_file (file_path); | |
364 | return r->missing_trailing_newline_p (); | |
365 | } | |
366 | ||
b544c348 DM |
367 | void |
368 | file_cache_slot::evict () | |
369 | { | |
370 | m_file_path = NULL; | |
371 | if (m_fp) | |
372 | fclose (m_fp); | |
373 | m_fp = NULL; | |
374 | m_nb_read = 0; | |
375 | m_line_start_idx = 0; | |
376 | m_line_num = 0; | |
377 | m_line_record.truncate (0); | |
378 | m_use_count = 0; | |
379 | m_total_lines = 0; | |
380 | m_missing_trailing_newline = true; | |
f89b03b6 DM |
381 | } |
382 | ||
7ecc3eb9 DS |
383 | /* Return the file cache that has been less used, recently, or the |
384 | first empty one. If HIGHEST_USE_COUNT is non-null, | |
385 | *HIGHEST_USE_COUNT is set to the highest use count of the entries | |
386 | in the cache table. */ | |
387 | ||
b544c348 DM |
388 | file_cache_slot* |
389 | file_cache::evicted_cache_tab_entry (unsigned *highest_use_count) | |
7ecc3eb9 | 390 | { |
b544c348 DM |
391 | file_cache_slot *to_evict = &m_file_slots[0]; |
392 | unsigned huc = to_evict->get_use_count (); | |
393 | for (unsigned i = 1; i < num_file_slots; ++i) | |
7ecc3eb9 | 394 | { |
b544c348 DM |
395 | file_cache_slot *c = &m_file_slots[i]; |
396 | bool c_is_empty = (c->get_file_path () == NULL); | |
7ecc3eb9 | 397 | |
b544c348 DM |
398 | if (c->get_use_count () < to_evict->get_use_count () |
399 | || (to_evict->get_file_path () && c_is_empty)) | |
7ecc3eb9 DS |
400 | /* We evict C because it's either an entry with a lower use |
401 | count or one that is empty. */ | |
402 | to_evict = c; | |
403 | ||
b544c348 DM |
404 | if (huc < c->get_use_count ()) |
405 | huc = c->get_use_count (); | |
7ecc3eb9 DS |
406 | |
407 | if (c_is_empty) | |
408 | /* We've reached the end of the cache; subsequent elements are | |
409 | all empty. */ | |
410 | break; | |
411 | } | |
412 | ||
413 | if (highest_use_count) | |
414 | *highest_use_count = huc; | |
415 | ||
416 | return to_evict; | |
417 | } | |
418 | ||
419 | /* Create the cache used for the content of a given file to be | |
420 | accessed by caret diagnostic. This cache is added to an array of | |
421 | cache and can be retrieved by lookup_file_in_cache_tab. This | |
422 | function returns the created cache. Note that only the last | |
94caa6a6 DM |
423 | num_file_slots files are cached. |
424 | ||
425 | This can return nullptr if the FILE_PATH can't be opened for | |
426 | reading, or if the content can't be converted to the input_charset. */ | |
7ecc3eb9 | 427 | |
b544c348 DM |
428 | file_cache_slot* |
429 | file_cache::add_file (const char *file_path) | |
7ecc3eb9 DS |
430 | { |
431 | ||
432 | FILE *fp = fopen (file_path, "r"); | |
317363b4 DS |
433 | if (fp == NULL) |
434 | return NULL; | |
7ecc3eb9 DS |
435 | |
436 | unsigned highest_use_count = 0; | |
b544c348 | 437 | file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count); |
3ac6b5cf LH |
438 | if (!r->create (in_context, file_path, fp, highest_use_count)) |
439 | return NULL; | |
b544c348 DM |
440 | return r; |
441 | } | |
442 | ||
d495ea2b DM |
443 | /* Get a borrowed char_span to the full content of this file |
444 | as decoded according to the input charset, encoded as UTF-8. */ | |
445 | ||
446 | char_span | |
447 | file_cache_slot::get_full_file_content () | |
448 | { | |
449 | char *line; | |
450 | ssize_t line_len; | |
451 | while (get_next_line (&line, &line_len)) | |
452 | { | |
453 | } | |
454 | return char_span (m_data, m_nb_read); | |
455 | } | |
456 | ||
b544c348 DM |
457 | /* Populate this slot for use on FILE_PATH and FP, dropping any |
458 | existing cached content within it. */ | |
459 | ||
3ac6b5cf LH |
460 | bool |
461 | file_cache_slot::create (const file_cache::input_context &in_context, | |
462 | const char *file_path, FILE *fp, | |
b544c348 DM |
463 | unsigned highest_use_count) |
464 | { | |
465 | m_file_path = file_path; | |
466 | if (m_fp) | |
467 | fclose (m_fp); | |
468 | m_fp = fp; | |
3ac6b5cf LH |
469 | if (m_alloc_offset) |
470 | offset_buffer (-m_alloc_offset); | |
b544c348 DM |
471 | m_nb_read = 0; |
472 | m_line_start_idx = 0; | |
473 | m_line_num = 0; | |
474 | m_line_record.truncate (0); | |
7ecc3eb9 DS |
475 | /* Ensure that this cache entry doesn't get evicted next time |
476 | add_file_to_cache_tab is called. */ | |
b544c348 DM |
477 | m_use_count = ++highest_use_count; |
478 | m_total_lines = total_lines_num (file_path); | |
479 | m_missing_trailing_newline = true; | |
3ac6b5cf LH |
480 | |
481 | ||
482 | /* Check the input configuration to determine if we need to do any | |
483 | transformations, such as charset conversion or BOM skipping. */ | |
484 | if (const char *input_charset = in_context.ccb (file_path)) | |
485 | { | |
486 | /* Need a full-blown conversion of the input charset. */ | |
487 | fclose (m_fp); | |
488 | m_fp = NULL; | |
489 | const cpp_converted_source cs | |
490 | = cpp_get_converted_source (file_path, input_charset); | |
491 | if (!cs.data) | |
492 | return false; | |
493 | if (m_data) | |
494 | XDELETEVEC (m_data); | |
495 | m_data = cs.data; | |
496 | m_nb_read = m_size = cs.len; | |
497 | m_alloc_offset = cs.data - cs.to_free; | |
498 | } | |
499 | else if (in_context.should_skip_bom) | |
500 | { | |
501 | if (read_data ()) | |
502 | { | |
503 | const int offset = cpp_check_utf8_bom (m_data, m_nb_read); | |
504 | offset_buffer (offset); | |
505 | m_nb_read -= offset; | |
506 | } | |
507 | } | |
508 | ||
509 | return true; | |
b544c348 | 510 | } |
7ecc3eb9 | 511 | |
b544c348 DM |
512 | /* file_cache's ctor. */ |
513 | ||
514 | file_cache::file_cache () | |
515 | : m_file_slots (new file_cache_slot[num_file_slots]) | |
516 | { | |
3ac6b5cf | 517 | initialize_input_context (nullptr, false); |
b544c348 DM |
518 | } |
519 | ||
520 | /* file_cache's dtor. */ | |
521 | ||
522 | file_cache::~file_cache () | |
523 | { | |
524 | delete[] m_file_slots; | |
7ecc3eb9 DS |
525 | } |
526 | ||
527 | /* Lookup the cache used for the content of a given file accessed by | |
528 | caret diagnostic. If no cached file was found, create a new cache | |
529 | for this file, add it to the array of cached file and return | |
94caa6a6 DM |
530 | it. |
531 | ||
532 | This can return nullptr on a cache miss if FILE_PATH can't be opened for | |
533 | reading, or if the content can't be converted to the input_charset. */ | |
7ecc3eb9 | 534 | |
b544c348 DM |
535 | file_cache_slot* |
536 | file_cache::lookup_or_add_file (const char *file_path) | |
7ecc3eb9 | 537 | { |
b544c348 | 538 | file_cache_slot *r = lookup_file (file_path); |
7ecc3eb9 | 539 | if (r == NULL) |
b544c348 | 540 | r = add_file (file_path); |
7ecc3eb9 DS |
541 | return r; |
542 | } | |
543 | ||
544 | /* Default constructor for a cache of file used by caret | |
545 | diagnostic. */ | |
546 | ||
b544c348 DM |
547 | file_cache_slot::file_cache_slot () |
548 | : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0), | |
3ac6b5cf LH |
549 | m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0), |
550 | m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true) | |
7ecc3eb9 | 551 | { |
b544c348 | 552 | m_line_record.create (0); |
7ecc3eb9 DS |
553 | } |
554 | ||
555 | /* Destructor for a cache of file used by caret diagnostic. */ | |
556 | ||
b544c348 | 557 | file_cache_slot::~file_cache_slot () |
7ecc3eb9 | 558 | { |
b544c348 | 559 | if (m_fp) |
7ecc3eb9 | 560 | { |
b544c348 DM |
561 | fclose (m_fp); |
562 | m_fp = NULL; | |
7ecc3eb9 | 563 | } |
b544c348 | 564 | if (m_data) |
7ecc3eb9 | 565 | { |
3ac6b5cf | 566 | offset_buffer (-m_alloc_offset); |
b544c348 DM |
567 | XDELETEVEC (m_data); |
568 | m_data = 0; | |
7ecc3eb9 | 569 | } |
b544c348 | 570 | m_line_record.release (); |
7ecc3eb9 DS |
571 | } |
572 | ||
573 | /* Returns TRUE iff the cache would need to be filled with data coming | |
574 | from the file. That is, either the cache is empty or full or the | |
575 | current line is empty. Note that if the cache is full, it would | |
576 | need to be extended and filled again. */ | |
577 | ||
b544c348 DM |
578 | bool |
579 | file_cache_slot::needs_read_p () const | |
7ecc3eb9 | 580 | { |
3ac6b5cf | 581 | return m_fp && (m_nb_read == 0 |
b544c348 DM |
582 | || m_nb_read == m_size |
583 | || (m_line_start_idx >= m_nb_read - 1)); | |
7ecc3eb9 DS |
584 | } |
585 | ||
586 | /* Return TRUE iff the cache is full and thus needs to be | |
587 | extended. */ | |
588 | ||
b544c348 DM |
589 | bool |
590 | file_cache_slot::needs_grow_p () const | |
7ecc3eb9 | 591 | { |
b544c348 | 592 | return m_nb_read == m_size; |
7ecc3eb9 DS |
593 | } |
594 | ||
595 | /* Grow the cache if it needs to be extended. */ | |
596 | ||
b544c348 DM |
597 | void |
598 | file_cache_slot::maybe_grow () | |
9fec0042 | 599 | { |
b544c348 | 600 | if (!needs_grow_p ()) |
7ecc3eb9 DS |
601 | return; |
602 | ||
3ac6b5cf LH |
603 | if (!m_data) |
604 | { | |
605 | gcc_assert (m_size == 0 && m_alloc_offset == 0); | |
606 | m_size = buffer_size; | |
607 | m_data = XNEWVEC (char, m_size); | |
608 | } | |
609 | else | |
610 | { | |
611 | const int offset = m_alloc_offset; | |
612 | offset_buffer (-offset); | |
613 | m_size *= 2; | |
614 | m_data = XRESIZEVEC (char, m_data, m_size); | |
615 | offset_buffer (offset); | |
616 | } | |
7ecc3eb9 | 617 | } |
9fec0042 | 618 | |
7ecc3eb9 DS |
619 | /* Read more data into the cache. Extends the cache if need be. |
620 | Returns TRUE iff new data could be read. */ | |
621 | ||
b544c348 DM |
622 | bool |
623 | file_cache_slot::read_data () | |
7ecc3eb9 | 624 | { |
b544c348 | 625 | if (feof (m_fp) || ferror (m_fp)) |
7ecc3eb9 DS |
626 | return false; |
627 | ||
b544c348 | 628 | maybe_grow (); |
7ecc3eb9 | 629 | |
b544c348 DM |
630 | char * from = m_data + m_nb_read; |
631 | size_t to_read = m_size - m_nb_read; | |
632 | size_t nb_read = fread (from, 1, to_read, m_fp); | |
7ecc3eb9 | 633 | |
b544c348 | 634 | if (ferror (m_fp)) |
7ecc3eb9 DS |
635 | return false; |
636 | ||
b544c348 | 637 | m_nb_read += nb_read; |
7ecc3eb9 DS |
638 | return !!nb_read; |
639 | } | |
640 | ||
641 | /* Read new data iff the cache needs to be filled with more data | |
642 | coming from the file FP. Return TRUE iff the cache was filled with | |
643 | mode data. */ | |
644 | ||
b544c348 DM |
645 | bool |
646 | file_cache_slot::maybe_read_data () | |
7ecc3eb9 | 647 | { |
b544c348 | 648 | if (!needs_read_p ()) |
7ecc3eb9 | 649 | return false; |
b544c348 | 650 | return read_data (); |
7ecc3eb9 DS |
651 | } |
652 | ||
2bd15617 LH |
653 | /* Helper function for file_cache_slot::get_next_line (), to find the end of |
654 | the next line. Returns with the memchr convention, i.e. nullptr if a line | |
655 | terminator was not found. We need to determine line endings in the same | |
656 | manner that libcpp does: any of \n, \r\n, or \r is a line ending. */ | |
657 | ||
658 | static char * | |
659 | find_end_of_line (char *s, size_t len) | |
660 | { | |
661 | for (const auto end = s + len; s != end; ++s) | |
662 | { | |
663 | if (*s == '\n') | |
664 | return s; | |
665 | if (*s == '\r') | |
666 | { | |
667 | const auto next = s + 1; | |
668 | if (next == end) | |
669 | { | |
670 | /* Don't find the line ending if \r is the very last character | |
671 | in the buffer; we do not know if it's the end of the file or | |
672 | just the end of what has been read so far, and we wouldn't | |
673 | want to break in the middle of what's actually a \r\n | |
674 | sequence. Instead, we will handle the case of a file ending | |
675 | in a \r later. */ | |
676 | break; | |
677 | } | |
678 | return (*next == '\n' ? next : s); | |
679 | } | |
680 | } | |
681 | return nullptr; | |
682 | } | |
683 | ||
7ecc3eb9 DS |
684 | /* Read a new line from file FP, using C as a cache for the data |
685 | coming from the file. Upon successful completion, *LINE is set to | |
1adae327 BE |
686 | the beginning of the line found. *LINE points directly in the |
687 | line cache and is only valid until the next call of get_next_line. | |
7ecc3eb9 DS |
688 | *LINE_LEN is set to the length of the line. Note that the line |
689 | does not contain any terminal delimiter. This function returns | |
690 | true if some data was read or process from the cache, false | |
1adae327 BE |
691 | otherwise. Note that subsequent calls to get_next_line might |
692 | make the content of *LINE invalid. */ | |
7ecc3eb9 | 693 | |
b544c348 DM |
694 | bool |
695 | file_cache_slot::get_next_line (char **line, ssize_t *line_len) | |
7ecc3eb9 DS |
696 | { |
697 | /* Fill the cache with data to process. */ | |
b544c348 | 698 | maybe_read_data (); |
7ecc3eb9 | 699 | |
b544c348 | 700 | size_t remaining_size = m_nb_read - m_line_start_idx; |
7ecc3eb9 DS |
701 | if (remaining_size == 0) |
702 | /* There is no more data to process. */ | |
703 | return false; | |
704 | ||
b544c348 | 705 | char *line_start = m_data + m_line_start_idx; |
7ecc3eb9 DS |
706 | |
707 | char *next_line_start = NULL; | |
708 | size_t len = 0; | |
2bd15617 | 709 | char *line_end = find_end_of_line (line_start, remaining_size); |
7ecc3eb9 | 710 | if (line_end == NULL) |
9fec0042 | 711 | { |
2bd15617 LH |
712 | /* We haven't found an end-of-line delimiter in the cache. |
713 | Fill the cache with more data from the file and look again. */ | |
b544c348 | 714 | while (maybe_read_data ()) |
7ecc3eb9 | 715 | { |
b544c348 DM |
716 | line_start = m_data + m_line_start_idx; |
717 | remaining_size = m_nb_read - m_line_start_idx; | |
2bd15617 | 718 | line_end = find_end_of_line (line_start, remaining_size); |
7ecc3eb9 DS |
719 | if (line_end != NULL) |
720 | { | |
721 | next_line_start = line_end + 1; | |
722 | break; | |
723 | } | |
724 | } | |
725 | if (line_end == NULL) | |
c65236d6 | 726 | { |
2bd15617 LH |
727 | /* We've loaded all the file into the cache and still no |
728 | terminator. Let's say the line ends up at one byte past the | |
c65236d6 | 729 | end of the file. This is to stay consistent with the case |
2bd15617 LH |
730 | of when the line ends up with a terminator and line_end points to |
731 | that. That consistency is useful below in the len calculation. | |
732 | ||
733 | If the file ends in a \r, we didn't identify it as a line | |
734 | terminator above, so do that now instead. */ | |
735 | line_end = m_data + m_nb_read; | |
736 | if (m_nb_read && line_end[-1] == '\r') | |
737 | { | |
738 | --line_end; | |
739 | m_missing_trailing_newline = false; | |
740 | } | |
741 | else | |
742 | m_missing_trailing_newline = true; | |
c65236d6 DM |
743 | } |
744 | else | |
b544c348 | 745 | m_missing_trailing_newline = false; |
9fec0042 | 746 | } |
7ecc3eb9 | 747 | else |
c65236d6 DM |
748 | { |
749 | next_line_start = line_end + 1; | |
b544c348 | 750 | m_missing_trailing_newline = false; |
c65236d6 | 751 | } |
7ecc3eb9 | 752 | |
3ac6b5cf | 753 | if (m_fp && ferror (m_fp)) |
1adae327 | 754 | return false; |
7ecc3eb9 | 755 | |
2bd15617 LH |
756 | /* At this point, we've found the end of the of line. It either points to |
757 | the line terminator or to one byte after the last byte of the file. */ | |
7ecc3eb9 | 758 | gcc_assert (line_end != NULL); |
9fec0042 | 759 | |
7ecc3eb9 DS |
760 | len = line_end - line_start; |
761 | ||
b544c348 | 762 | if (m_line_start_idx < m_nb_read) |
7ecc3eb9 DS |
763 | *line = line_start; |
764 | ||
b544c348 | 765 | ++m_line_num; |
7ecc3eb9 DS |
766 | |
767 | /* Before we update our line record, make sure the hint about the | |
768 | total number of lines of the file is correct. If it's not, then | |
769 | we give up recording line boundaries from now on. */ | |
770 | bool update_line_record = true; | |
b544c348 | 771 | if (m_line_num > m_total_lines) |
7ecc3eb9 DS |
772 | update_line_record = false; |
773 | ||
774 | /* Now update our line record so that re-reading lines from the | |
b544c348 | 775 | before m_line_start_idx is faster. */ |
7ecc3eb9 | 776 | if (update_line_record |
b544c348 | 777 | && m_line_record.length () < line_record_size) |
7ecc3eb9 DS |
778 | { |
779 | /* If the file lines fits in the line record, we just record all | |
780 | its lines ...*/ | |
b544c348 DM |
781 | if (m_total_lines <= line_record_size |
782 | && m_line_num > m_line_record.length ()) | |
783 | m_line_record.safe_push | |
784 | (file_cache_slot::line_info (m_line_num, | |
785 | m_line_start_idx, | |
786 | line_end - m_data)); | |
787 | else if (m_total_lines > line_record_size) | |
7ecc3eb9 DS |
788 | { |
789 | /* ... otherwise, we just scale total_lines down to | |
b544c348 DM |
790 | (line_record_size lines. */ |
791 | size_t n = (m_line_num * line_record_size) / m_total_lines; | |
792 | if (m_line_record.length () == 0 | |
793 | || n >= m_line_record.length ()) | |
794 | m_line_record.safe_push | |
795 | (file_cache_slot::line_info (m_line_num, | |
796 | m_line_start_idx, | |
797 | line_end - m_data)); | |
7ecc3eb9 DS |
798 | } |
799 | } | |
800 | ||
b544c348 | 801 | /* Update m_line_start_idx so that it points to the next line to be |
7ecc3eb9 DS |
802 | read. */ |
803 | if (next_line_start) | |
b544c348 | 804 | m_line_start_idx = next_line_start - m_data; |
7ecc3eb9 DS |
805 | else |
806 | /* We didn't find any terminal '\n'. Let's consider that the end | |
807 | of line is the end of the data in the cache. The next | |
808 | invocation of get_next_line will either read more data from the | |
809 | underlying file or return false early because we've reached the | |
810 | end of the file. */ | |
b544c348 | 811 | m_line_start_idx = m_nb_read; |
7ecc3eb9 DS |
812 | |
813 | *line_len = len; | |
814 | ||
815 | return true; | |
816 | } | |
817 | ||
7ecc3eb9 DS |
818 | /* Consume the next bytes coming from the cache (or from its |
819 | underlying file if there are remaining unread bytes in the file) | |
820 | until we reach the next end-of-line (or end-of-file). There is no | |
821 | copying from the cache involved. Return TRUE upon successful | |
822 | completion. */ | |
823 | ||
b544c348 DM |
824 | bool |
825 | file_cache_slot::goto_next_line () | |
7ecc3eb9 DS |
826 | { |
827 | char *l; | |
828 | ssize_t len; | |
829 | ||
b544c348 | 830 | return get_next_line (&l, &len); |
7ecc3eb9 DS |
831 | } |
832 | ||
833 | /* Read an arbitrary line number LINE_NUM from the file cached in C. | |
1adae327 BE |
834 | If the line was read successfully, *LINE points to the beginning |
835 | of the line in the file cache and *LINE_LEN is the length of the | |
836 | line. *LINE is not nul-terminated, but may contain zero bytes. | |
837 | *LINE is only valid until the next call of read_line_num. | |
7ecc3eb9 DS |
838 | This function returns bool if a line was read. */ |
839 | ||
b544c348 DM |
840 | bool |
841 | file_cache_slot::read_line_num (size_t line_num, | |
842 | char ** line, ssize_t *line_len) | |
7ecc3eb9 DS |
843 | { |
844 | gcc_assert (line_num > 0); | |
845 | ||
b544c348 | 846 | if (line_num <= m_line_num) |
9789a912 | 847 | { |
b544c348 | 848 | /* We've been asked to read lines that are before m_line_num. |
7ecc3eb9 DS |
849 | So lets use our line record (if it's not empty) to try to |
850 | avoid re-reading the file from the beginning again. */ | |
7f4d640c | 851 | |
b544c348 | 852 | if (m_line_record.is_empty ()) |
9fec0042 | 853 | { |
b544c348 DM |
854 | m_line_start_idx = 0; |
855 | m_line_num = 0; | |
7ecc3eb9 DS |
856 | } |
857 | else | |
858 | { | |
b544c348 DM |
859 | file_cache_slot::line_info *i = NULL; |
860 | if (m_total_lines <= line_record_size) | |
7ecc3eb9 DS |
861 | { |
862 | /* In languages where the input file is not totally | |
b544c348 | 863 | preprocessed up front, the m_total_lines hint |
7ecc3eb9 DS |
864 | can be smaller than the number of lines of the |
865 | file. In that case, only the first | |
b544c348 | 866 | m_total_lines have been recorded. |
7ecc3eb9 | 867 | |
b544c348 | 868 | Otherwise, the first m_total_lines we've read have |
7ecc3eb9 | 869 | their start/end recorded here. */ |
b544c348 DM |
870 | i = (line_num <= m_total_lines) |
871 | ? &m_line_record[line_num - 1] | |
872 | : &m_line_record[m_total_lines - 1]; | |
7ecc3eb9 DS |
873 | gcc_assert (i->line_num <= line_num); |
874 | } | |
875 | else | |
876 | { | |
877 | /* So the file had more lines than our line record | |
878 | size. Thus the number of lines we've recorded has | |
b544c348 | 879 | been scaled down to line_record_size. Let's |
7ecc3eb9 DS |
880 | pick the start/end of the recorded line that is |
881 | closest to line_num. */ | |
b544c348 DM |
882 | size_t n = (line_num <= m_total_lines) |
883 | ? line_num * line_record_size / m_total_lines | |
884 | : m_line_record.length () - 1; | |
885 | if (n < m_line_record.length ()) | |
7ecc3eb9 | 886 | { |
b544c348 | 887 | i = &m_line_record[n]; |
7ecc3eb9 DS |
888 | gcc_assert (i->line_num <= line_num); |
889 | } | |
890 | } | |
891 | ||
892 | if (i && i->line_num == line_num) | |
893 | { | |
1adae327 | 894 | /* We have the start/end of the line. */ |
b544c348 | 895 | *line = m_data + i->start_pos; |
1adae327 | 896 | *line_len = i->end_pos - i->start_pos; |
7ecc3eb9 DS |
897 | return true; |
898 | } | |
899 | ||
900 | if (i) | |
901 | { | |
b544c348 DM |
902 | m_line_start_idx = i->start_pos; |
903 | m_line_num = i->line_num - 1; | |
7ecc3eb9 DS |
904 | } |
905 | else | |
906 | { | |
b544c348 DM |
907 | m_line_start_idx = 0; |
908 | m_line_num = 0; | |
7ecc3eb9 | 909 | } |
9fec0042 | 910 | } |
9fec0042 | 911 | } |
7ecc3eb9 | 912 | |
b544c348 | 913 | /* Let's walk from line m_line_num up to line_num - 1, without |
7ecc3eb9 | 914 | copying any line. */ |
b544c348 DM |
915 | while (m_line_num < line_num - 1) |
916 | if (!goto_next_line ()) | |
7ecc3eb9 DS |
917 | return false; |
918 | ||
919 | /* The line we want is the next one. Let's read and copy it back to | |
920 | the caller. */ | |
b544c348 | 921 | return get_next_line (line, line_len); |
9fec0042 MLI |
922 | } |
923 | ||
1adae327 BE |
924 | /* Return the physical source line that corresponds to FILE_PATH/LINE. |
925 | The line is not nul-terminated. The returned pointer is only | |
926 | valid until the next call of location_get_source_line. | |
927 | Note that the line can contain several null characters, | |
7761dfbe DM |
928 | so the returned value's length has the actual length of the line. |
929 | If the function fails, a NULL char_span is returned. */ | |
9fec0042 | 930 | |
7761dfbe | 931 | char_span |
14082026 | 932 | file_cache::get_source_line (const char *file_path, int line) |
9fec0042 | 933 | { |
ac2a97db | 934 | char *buffer = NULL; |
1adae327 | 935 | ssize_t len; |
7ecc3eb9 | 936 | |
31bdd08a | 937 | if (line == 0) |
7761dfbe | 938 | return char_span (NULL, 0); |
367c8286 | 939 | |
b544c348 DM |
940 | if (file_path == NULL) |
941 | return char_span (NULL, 0); | |
942 | ||
14082026 | 943 | file_cache_slot *c = lookup_or_add_file (file_path); |
367c8286 | 944 | if (c == NULL) |
7761dfbe | 945 | return char_span (NULL, 0); |
367c8286 | 946 | |
b544c348 | 947 | bool read = c->read_line_num (line, &buffer, &len); |
7761dfbe DM |
948 | if (!read) |
949 | return char_span (NULL, 0); | |
9fec0042 | 950 | |
7761dfbe | 951 | return char_span (buffer, len); |
9fec0042 MLI |
952 | } |
953 | ||
0386c40e JCI |
954 | /* Return a NUL-terminated copy of the source text between two locations, or |
955 | NULL if the arguments are invalid. The caller is responsible for freeing | |
956 | the return value. */ | |
957 | ||
958 | char * | |
1bdd665a | 959 | get_source_text_between (file_cache &fc, location_t start, location_t end) |
0386c40e JCI |
960 | { |
961 | expanded_location expstart = | |
962 | expand_location_to_spelling_point (start, LOCATION_ASPECT_START); | |
963 | expanded_location expend = | |
964 | expand_location_to_spelling_point (end, LOCATION_ASPECT_FINISH); | |
965 | ||
966 | /* If the locations are in different files or the end comes before the | |
967 | start, give up and return nothing. */ | |
968 | if (!expstart.file || !expend.file) | |
969 | return NULL; | |
970 | if (strcmp (expstart.file, expend.file) != 0) | |
971 | return NULL; | |
972 | if (expstart.line > expend.line) | |
973 | return NULL; | |
974 | if (expstart.line == expend.line | |
975 | && expstart.column > expend.column) | |
976 | return NULL; | |
977 | /* These aren't real column numbers, give up. */ | |
978 | if (expstart.column == 0 || expend.column == 0) | |
979 | return NULL; | |
980 | ||
981 | /* For a single line we need to trim both edges. */ | |
982 | if (expstart.line == expend.line) | |
983 | { | |
1bdd665a | 984 | char_span line = fc.get_source_line (expstart.file, expstart.line); |
0386c40e JCI |
985 | if (line.length () < 1) |
986 | return NULL; | |
987 | int s = expstart.column - 1; | |
988 | int len = expend.column - s; | |
989 | if (line.length () < (size_t)expend.column) | |
990 | return NULL; | |
991 | return line.subspan (s, len).xstrdup (); | |
992 | } | |
993 | ||
994 | struct obstack buf_obstack; | |
995 | obstack_init (&buf_obstack); | |
996 | ||
997 | /* Loop through all lines in the range and append each to buf; may trim | |
998 | parts of the start and end lines off depending on column values. */ | |
999 | for (int lnum = expstart.line; lnum <= expend.line; ++lnum) | |
1000 | { | |
1bdd665a | 1001 | char_span line = fc.get_source_line (expstart.file, lnum); |
0386c40e JCI |
1002 | if (line.length () < 1 && (lnum != expstart.line && lnum != expend.line)) |
1003 | continue; | |
1004 | ||
1005 | /* For the first line in the range, only start at expstart.column */ | |
1006 | if (lnum == expstart.line) | |
1007 | { | |
1008 | unsigned off = expstart.column - 1; | |
1009 | if (line.length () < off) | |
1010 | return NULL; | |
1011 | line = line.subspan (off, line.length() - off); | |
1012 | } | |
1013 | /* For the last line, don't go past expend.column */ | |
1014 | else if (lnum == expend.line) | |
1015 | { | |
1016 | if (line.length () < (size_t)expend.column) | |
1017 | return NULL; | |
1018 | line = line.subspan (0, expend.column); | |
1019 | } | |
1020 | ||
1021 | /* Combine spaces at the beginning of later lines. */ | |
1022 | if (lnum > expstart.line) | |
1023 | { | |
1024 | unsigned off; | |
1025 | for (off = 0; off < line.length(); ++off) | |
1026 | if (line[off] != ' ' && line[off] != '\t') | |
1027 | break; | |
1028 | if (off > 0) | |
1029 | { | |
1030 | obstack_1grow (&buf_obstack, ' '); | |
1031 | line = line.subspan (off, line.length() - off); | |
1032 | } | |
1033 | } | |
1034 | ||
1035 | /* This does not include any trailing newlines. */ | |
1036 | obstack_grow (&buf_obstack, line.get_buffer (), line.length ()); | |
1037 | } | |
1038 | ||
1039 | /* NUL-terminate and finish the buf obstack. */ | |
1040 | obstack_1grow (&buf_obstack, 0); | |
1041 | const char *buf = (const char *) obstack_finish (&buf_obstack); | |
1042 | ||
1043 | return xstrdup (buf); | |
1044 | } | |
1045 | ||
14082026 DM |
1046 | |
1047 | char_span | |
1048 | file_cache::get_source_file_content (const char *file_path) | |
1049 | { | |
1050 | file_cache_slot *c = lookup_or_add_file (file_path); | |
94caa6a6 DM |
1051 | if (c == nullptr) |
1052 | return char_span (nullptr, 0); | |
14082026 DM |
1053 | return c->get_full_file_content (); |
1054 | } | |
1055 | ||
c468587a DS |
1056 | /* Test if the location originates from the spelling location of a |
1057 | builtin-tokens. That is, return TRUE if LOC is a (possibly | |
1058 | virtual) location of a built-in token that appears in the expansion | |
1059 | list of a macro. Please note that this function also works on | |
1060 | tokens that result from built-in tokens. For instance, the | |
1061 | function would return true if passed a token "4" that is the result | |
1062 | of the expansion of the built-in __LINE__ macro. */ | |
1063 | bool | |
620e594b | 1064 | is_location_from_builtin_token (location_t loc) |
c468587a | 1065 | { |
0e50b624 | 1066 | const line_map_ordinary *map = NULL; |
c468587a DS |
1067 | loc = linemap_resolve_location (line_table, loc, |
1068 | LRK_SPELLING_LOCATION, &map); | |
1069 | return loc == BUILTINS_LOCATION; | |
1070 | } | |
1071 | ||
7eb918cc DS |
1072 | /* Expand the source location LOC into a human readable location. If |
1073 | LOC is virtual, it resolves to the expansion point of the involved | |
1074 | macro. If LOC resolves to a builtin location, the file name of the | |
1075 | readable location is set to the string "<built-in>". */ | |
1076 | ||
1077 | expanded_location | |
620e594b | 1078 | expand_location (location_t loc) |
7eb918cc | 1079 | { |
8625aa24 | 1080 | return expand_location_1 (line_table, loc, /*expansion_point_p=*/true, |
c471c6ed | 1081 | LOCATION_ASPECT_CARET); |
7eb918cc DS |
1082 | } |
1083 | ||
1084 | /* Expand the source location LOC into a human readable location. If | |
1085 | LOC is virtual, it resolves to the expansion location of the | |
1086 | relevant macro. If LOC resolves to a builtin location, the file | |
1087 | name of the readable location is set to the string | |
1088 | "<built-in>". */ | |
1089 | ||
1090 | expanded_location | |
620e594b | 1091 | expand_location_to_spelling_point (location_t loc, |
0d48e877 | 1092 | enum location_aspect aspect) |
7eb918cc | 1093 | { |
8625aa24 DM |
1094 | return expand_location_1 (line_table, loc, /*expansion_point_p=*/false, |
1095 | aspect); | |
7eb918cc DS |
1096 | } |
1097 | ||
8a645150 | 1098 | /* The rich_location class within libcpp requires a way to expand |
620e594b | 1099 | location_t instances, and relies on the client code |
8a645150 DM |
1100 | providing a symbol named |
1101 | linemap_client_expand_location_to_spelling_point | |
1102 | to do this. | |
1103 | ||
1104 | This is the implementation for libcommon.a (all host binaries), | |
c471c6ed | 1105 | which simply calls into expand_location_1. */ |
8a645150 DM |
1106 | |
1107 | expanded_location | |
8625aa24 DM |
1108 | linemap_client_expand_location_to_spelling_point (const line_maps *set, |
1109 | location_t loc, | |
c471c6ed | 1110 | enum location_aspect aspect) |
8a645150 | 1111 | { |
8625aa24 | 1112 | return expand_location_1 (set, loc, /*expansion_point_p=*/false, aspect); |
8a645150 DM |
1113 | } |
1114 | ||
1115 | ||
5a431b60 MS |
1116 | /* If LOCATION is in a system header and if it is a virtual location |
1117 | for a token coming from the expansion of a macro, unwind it to | |
1118 | the location of the expansion point of the macro. If the expansion | |
1119 | point is also in a system header return the original LOCATION. | |
1120 | Otherwise, return the location of the expansion point. | |
70dc395a DS |
1121 | |
1122 | This is used for instance when we want to emit diagnostics about a | |
e1f0c178 MLI |
1123 | token that may be located in a macro that is itself defined in a |
1124 | system header, for example, for the NULL macro. In such a case, if | |
1125 | LOCATION were passed directly to diagnostic functions such as | |
1126 | warning_at, the diagnostic would be suppressed (unless | |
1127 | -Wsystem-headers). */ | |
70dc395a | 1128 | |
620e594b DM |
1129 | location_t |
1130 | expansion_point_location_if_in_system_header (location_t location) | |
70dc395a | 1131 | { |
5a431b60 MS |
1132 | if (!in_system_header_at (location)) |
1133 | return location; | |
1134 | ||
1135 | location_t xloc = linemap_resolve_location (line_table, location, | |
1136 | LRK_MACRO_EXPANSION_POINT, | |
1137 | NULL); | |
1138 | return in_system_header_at (xloc) ? location : xloc; | |
70dc395a | 1139 | } |
7eb918cc | 1140 | |
79ce98bc MP |
1141 | /* If LOCATION is a virtual location for a token coming from the expansion |
1142 | of a macro, unwind to the location of the expansion point of the macro. */ | |
1143 | ||
620e594b DM |
1144 | location_t |
1145 | expansion_point_location (location_t location) | |
79ce98bc MP |
1146 | { |
1147 | return linemap_resolve_location (line_table, location, | |
1148 | LRK_MACRO_EXPANSION_POINT, NULL); | |
1149 | } | |
1150 | ||
a01fc549 | 1151 | /* Construct a location with caret at CARET, ranging from START to |
25af7c1a DM |
1152 | FINISH. |
1153 | ||
1154 | For example, consider: | |
a01fc549 DM |
1155 | |
1156 | 11111111112 | |
1157 | 12345678901234567890 | |
1158 | 522 | |
1159 | 523 return foo + bar; | |
1160 | ~~~~^~~~~ | |
1161 | 524 | |
1162 | ||
1163 | The location's caret is at the "+", line 523 column 15, but starts | |
1164 | earlier, at the "f" of "foo" at column 11. The finish is at the "r" | |
1165 | of "bar" at column 19. */ | |
1166 | ||
1167 | location_t | |
1168 | make_location (location_t caret, location_t start, location_t finish) | |
1169 | { | |
25af7c1a | 1170 | return line_table->make_location (caret, start, finish); |
a01fc549 DM |
1171 | } |
1172 | ||
a32c8316 MP |
1173 | /* Same as above, but taking a source range rather than two locations. */ |
1174 | ||
1175 | location_t | |
1176 | make_location (location_t caret, source_range src_range) | |
1177 | { | |
1178 | location_t pure_loc = get_pure_location (caret); | |
1f68a3e8 DM |
1179 | return line_table->get_or_create_combined_loc (pure_loc, src_range, |
1180 | nullptr, 0); | |
a32c8316 MP |
1181 | } |
1182 | ||
ee925640 LH |
1183 | /* An expanded_location stores the column in byte units. This function |
1184 | converts that column to display units. That requires reading the associated | |
1185 | source line in order to calculate the display width. If that cannot be done | |
1186 | for any reason, then returns the byte column as a fallback. */ | |
1187 | int | |
1bdd665a DM |
1188 | location_compute_display_column (file_cache &fc, |
1189 | expanded_location exploc, | |
bd5e882c | 1190 | const cpp_char_column_policy &policy) |
ee925640 LH |
1191 | { |
1192 | if (!(exploc.file && *exploc.file && exploc.line && exploc.column)) | |
1193 | return exploc.column; | |
1bdd665a | 1194 | char_span line = fc.get_source_line (exploc.file, exploc.line); |
ee925640 LH |
1195 | /* If line is NULL, this function returns exploc.column which is the |
1196 | desired fallback. */ | |
1197 | return cpp_byte_column_to_display_column (line.get_buffer (), line.length (), | |
bd5e882c | 1198 | exploc.column, policy); |
ee925640 LH |
1199 | } |
1200 | ||
64a1a422 TT |
1201 | /* Dump statistics to stderr about the memory usage of the line_table |
1202 | set of line maps. This also displays some statistics about macro | |
1203 | expansion. */ | |
1204 | ||
1205 | void | |
1206 | dump_line_table_statistics (void) | |
1207 | { | |
1208 | struct linemap_stats s; | |
d17687f6 | 1209 | long total_used_map_size, |
64a1a422 TT |
1210 | macro_maps_size, |
1211 | total_allocated_map_size; | |
1212 | ||
1213 | memset (&s, 0, sizeof (s)); | |
1214 | ||
1215 | linemap_get_statistics (line_table, &s); | |
1216 | ||
1217 | macro_maps_size = s.macro_maps_used_size | |
1218 | + s.macro_maps_locations_size; | |
1219 | ||
1220 | total_allocated_map_size = s.ordinary_maps_allocated_size | |
1221 | + s.macro_maps_allocated_size | |
1222 | + s.macro_maps_locations_size; | |
1223 | ||
1224 | total_used_map_size = s.ordinary_maps_used_size | |
1225 | + s.macro_maps_used_size | |
1226 | + s.macro_maps_locations_size; | |
1227 | ||
d17687f6 | 1228 | fprintf (stderr, "Number of expanded macros: %5ld\n", |
64a1a422 TT |
1229 | s.num_expanded_macros); |
1230 | if (s.num_expanded_macros != 0) | |
d17687f6 | 1231 | fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n", |
64a1a422 TT |
1232 | s.num_macro_tokens / s.num_expanded_macros); |
1233 | fprintf (stderr, | |
1234 | "\nLine Table allocations during the " | |
40ce7fa6 | 1235 | "compilation process\n"); |
a0b48080 | 1236 | fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n", |
40ce7fa6 | 1237 | SIZE_AMOUNT (s.num_ordinary_maps_used)); |
a0b48080 | 1238 | fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n", |
40ce7fa6 | 1239 | SIZE_AMOUNT (s.ordinary_maps_used_size)); |
a0b48080 | 1240 | fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n", |
40ce7fa6 | 1241 | SIZE_AMOUNT (s.num_ordinary_maps_allocated)); |
a0b48080 | 1242 | fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n", |
40ce7fa6 | 1243 | SIZE_AMOUNT (s.ordinary_maps_allocated_size)); |
a0b48080 | 1244 | fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n", |
40ce7fa6 | 1245 | SIZE_AMOUNT (s.num_macro_maps_used)); |
a0b48080 | 1246 | fprintf (stderr, "Macro maps used size: " PRsa (5) "\n", |
40ce7fa6 | 1247 | SIZE_AMOUNT (s.macro_maps_used_size)); |
a0b48080 | 1248 | fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n", |
40ce7fa6 | 1249 | SIZE_AMOUNT (s.macro_maps_locations_size)); |
a0b48080 | 1250 | fprintf (stderr, "Macro maps size: " PRsa (5) "\n", |
40ce7fa6 | 1251 | SIZE_AMOUNT (macro_maps_size)); |
a0b48080 | 1252 | fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n", |
40ce7fa6 | 1253 | SIZE_AMOUNT (s.duplicated_macro_maps_locations_size)); |
a0b48080 | 1254 | fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n", |
40ce7fa6 | 1255 | SIZE_AMOUNT (total_allocated_map_size)); |
a0b48080 | 1256 | fprintf (stderr, "Total used maps size: " PRsa (5) "\n", |
40ce7fa6 | 1257 | SIZE_AMOUNT (total_used_map_size)); |
a0b48080 | 1258 | fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n", |
40ce7fa6 | 1259 | SIZE_AMOUNT (s.adhoc_table_size)); |
a0b48080 | 1260 | fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n", |
40ce7fa6 | 1261 | SIZE_AMOUNT (s.adhoc_table_entries_used)); |
a0b48080 | 1262 | fprintf (stderr, "optimized_ranges: " PRsa (5) "\n", |
1f68a3e8 | 1263 | SIZE_AMOUNT (line_table->m_num_optimized_ranges)); |
a0b48080 | 1264 | fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n", |
1f68a3e8 | 1265 | SIZE_AMOUNT (line_table->m_num_unoptimized_ranges)); |
ee015909 | 1266 | |
64a1a422 TT |
1267 | fprintf (stderr, "\n"); |
1268 | } | |
ba4ad400 DM |
1269 | |
1270 | /* Get location one beyond the final location in ordinary map IDX. */ | |
1271 | ||
620e594b | 1272 | static location_t |
99b1c316 | 1273 | get_end_location (class line_maps *set, unsigned int idx) |
ba4ad400 DM |
1274 | { |
1275 | if (idx == LINEMAPS_ORDINARY_USED (set) - 1) | |
1276 | return set->highest_location; | |
1277 | ||
1278 | struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1); | |
1279 | return MAP_START_LOCATION (next_map); | |
1280 | } | |
1281 | ||
1282 | /* Helper function for write_digit_row. */ | |
1283 | ||
1284 | static void | |
1285 | write_digit (FILE *stream, int digit) | |
1286 | { | |
1287 | fputc ('0' + (digit % 10), stream); | |
1288 | } | |
1289 | ||
1290 | /* Helper function for dump_location_info. | |
1291 | Write a row of numbers to STREAM, numbering a source line, | |
1292 | giving the units, tens, hundreds etc of the column number. */ | |
1293 | ||
1294 | static void | |
1295 | write_digit_row (FILE *stream, int indent, | |
ebedc9a3 | 1296 | const line_map_ordinary *map, |
620e594b | 1297 | location_t loc, int max_col, int divisor) |
ba4ad400 DM |
1298 | { |
1299 | fprintf (stream, "%*c", indent, ' '); | |
1300 | fprintf (stream, "|"); | |
1301 | for (int column = 1; column < max_col; column++) | |
1302 | { | |
620e594b | 1303 | location_t column_loc = loc + (column << map->m_range_bits); |
ba4ad400 DM |
1304 | write_digit (stream, column_loc / divisor); |
1305 | } | |
1306 | fprintf (stream, "\n"); | |
1307 | } | |
1308 | ||
1309 | /* Write a half-closed (START) / half-open (END) interval of | |
620e594b | 1310 | location_t to STREAM. */ |
ba4ad400 DM |
1311 | |
1312 | static void | |
1313 | dump_location_range (FILE *stream, | |
620e594b | 1314 | location_t start, location_t end) |
ba4ad400 DM |
1315 | { |
1316 | fprintf (stream, | |
620e594b | 1317 | " location_t interval: %u <= loc < %u\n", |
ba4ad400 DM |
1318 | start, end); |
1319 | } | |
1320 | ||
1321 | /* Write a labelled description of a half-closed (START) / half-open (END) | |
620e594b | 1322 | interval of location_t to STREAM. */ |
ba4ad400 DM |
1323 | |
1324 | static void | |
1325 | dump_labelled_location_range (FILE *stream, | |
1326 | const char *name, | |
620e594b | 1327 | location_t start, location_t end) |
ba4ad400 DM |
1328 | { |
1329 | fprintf (stream, "%s\n", name); | |
1330 | dump_location_range (stream, start, end); | |
1331 | fprintf (stream, "\n"); | |
1332 | } | |
1333 | ||
1334 | /* Write a visualization of the locations in the line_table to STREAM. */ | |
1335 | ||
1336 | void | |
1337 | dump_location_info (FILE *stream) | |
1338 | { | |
1bdd665a DM |
1339 | file_cache fc; |
1340 | ||
ba4ad400 DM |
1341 | /* Visualize the reserved locations. */ |
1342 | dump_labelled_location_range (stream, "RESERVED LOCATIONS", | |
1343 | 0, RESERVED_LOCATION_COUNT); | |
1344 | ||
1345 | /* Visualize the ordinary line_map instances, rendering the sources. */ | |
1346 | for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++) | |
1347 | { | |
620e594b | 1348 | location_t end_location = get_end_location (line_table, idx); |
ba4ad400 DM |
1349 | /* half-closed: doesn't include this one. */ |
1350 | ||
0e50b624 DM |
1351 | const line_map_ordinary *map |
1352 | = LINEMAPS_ORDINARY_MAP_AT (line_table, idx); | |
ba4ad400 DM |
1353 | fprintf (stream, "ORDINARY MAP: %i\n", idx); |
1354 | dump_location_range (stream, | |
1355 | MAP_START_LOCATION (map), end_location); | |
1356 | fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map)); | |
1357 | fprintf (stream, " starting at line: %i\n", | |
1358 | ORDINARY_MAP_STARTING_LINE_NUMBER (map)); | |
ebedc9a3 DM |
1359 | fprintf (stream, " column and range bits: %i\n", |
1360 | map->m_column_and_range_bits); | |
ba4ad400 | 1361 | fprintf (stream, " column bits: %i\n", |
ebedc9a3 DM |
1362 | map->m_column_and_range_bits - map->m_range_bits); |
1363 | fprintf (stream, " range bits: %i\n", | |
1364 | map->m_range_bits); | |
bc65bad2 MG |
1365 | const char * reason; |
1366 | switch (map->reason) { | |
1367 | case LC_ENTER: | |
1368 | reason = "LC_ENTER"; | |
1369 | break; | |
1370 | case LC_LEAVE: | |
1371 | reason = "LC_LEAVE"; | |
1372 | break; | |
1373 | case LC_RENAME: | |
1374 | reason = "LC_RENAME"; | |
1375 | break; | |
1376 | case LC_RENAME_VERBATIM: | |
1377 | reason = "LC_RENAME_VERBATIM"; | |
1378 | break; | |
1379 | case LC_ENTER_MACRO: | |
1380 | reason = "LC_RENAME_MACRO"; | |
1381 | break; | |
1382 | default: | |
1383 | reason = "Unknown"; | |
1384 | } | |
1385 | fprintf (stream, " reason: %d (%s)\n", map->reason, reason); | |
1386 | ||
1387 | const line_map_ordinary *includer_map | |
1388 | = linemap_included_from_linemap (line_table, map); | |
1389 | fprintf (stream, " included from location: %d", | |
1390 | linemap_included_from (map)); | |
1391 | if (includer_map) { | |
1392 | fprintf (stream, " (in ordinary map %d)", | |
1393 | int (includer_map - line_table->info_ordinary.maps)); | |
1394 | } | |
1395 | fprintf (stream, "\n"); | |
ba4ad400 DM |
1396 | |
1397 | /* Render the span of source lines that this "map" covers. */ | |
620e594b | 1398 | for (location_t loc = MAP_START_LOCATION (map); |
ba4ad400 | 1399 | loc < end_location; |
ebedc9a3 | 1400 | loc += (1 << map->m_range_bits) ) |
ba4ad400 | 1401 | { |
ebedc9a3 DM |
1402 | gcc_assert (pure_location_p (line_table, loc) ); |
1403 | ||
ba4ad400 DM |
1404 | expanded_location exploc |
1405 | = linemap_expand_location (line_table, map, loc); | |
1406 | ||
01512446 | 1407 | if (exploc.column == 0) |
ba4ad400 DM |
1408 | { |
1409 | /* Beginning of a new source line: draw the line. */ | |
1410 | ||
1bdd665a DM |
1411 | char_span line_text = fc.get_source_line (exploc.file, |
1412 | exploc.line); | |
ba4ad400 DM |
1413 | if (!line_text) |
1414 | break; | |
1415 | fprintf (stream, | |
1416 | "%s:%3i|loc:%5i|%.*s\n", | |
1417 | exploc.file, exploc.line, | |
1418 | loc, | |
7761dfbe | 1419 | (int)line_text.length (), line_text.get_buffer ()); |
ba4ad400 DM |
1420 | |
1421 | /* "loc" is at column 0, which means "the whole line". | |
1422 | Render the locations *within* the line, by underlining | |
620e594b | 1423 | it, showing the location_t numeric values |
ba4ad400 | 1424 | at each column. */ |
7761dfbe DM |
1425 | size_t max_col = (1 << map->m_column_and_range_bits) - 1; |
1426 | if (max_col > line_text.length ()) | |
1427 | max_col = line_text.length () + 1; | |
ba4ad400 | 1428 | |
bc65bad2 MG |
1429 | int len_lnum = num_digits (exploc.line); |
1430 | if (len_lnum < 3) | |
1431 | len_lnum = 3; | |
1432 | int len_loc = num_digits (loc); | |
1433 | if (len_loc < 5) | |
1434 | len_loc = 5; | |
1435 | ||
1436 | int indent = 6 + strlen (exploc.file) + len_lnum + len_loc; | |
ba4ad400 DM |
1437 | |
1438 | /* Thousands. */ | |
1439 | if (end_location > 999) | |
ebedc9a3 | 1440 | write_digit_row (stream, indent, map, loc, max_col, 1000); |
ba4ad400 DM |
1441 | |
1442 | /* Hundreds. */ | |
1443 | if (end_location > 99) | |
ebedc9a3 | 1444 | write_digit_row (stream, indent, map, loc, max_col, 100); |
ba4ad400 DM |
1445 | |
1446 | /* Tens. */ | |
ebedc9a3 | 1447 | write_digit_row (stream, indent, map, loc, max_col, 10); |
ba4ad400 DM |
1448 | |
1449 | /* Units. */ | |
ebedc9a3 | 1450 | write_digit_row (stream, indent, map, loc, max_col, 1); |
ba4ad400 DM |
1451 | } |
1452 | } | |
1453 | fprintf (stream, "\n"); | |
1454 | } | |
1455 | ||
1456 | /* Visualize unallocated values. */ | |
1457 | dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS", | |
1458 | line_table->highest_location, | |
1459 | LINEMAPS_MACRO_LOWEST_LOCATION (line_table)); | |
1460 | ||
1461 | /* Visualize the macro line_map instances, rendering the sources. */ | |
1462 | for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++) | |
1463 | { | |
620e594b | 1464 | /* Each macro map that is allocated owns location_t values |
ba4ad400 DM |
1465 | that are *lower* that the one before them. |
1466 | Hence it's meaningful to view them either in order of ascending | |
1467 | source locations, or in order of ascending macro map index. */ | |
620e594b DM |
1468 | const bool ascending_location_ts = true; |
1469 | unsigned int idx = (ascending_location_ts | |
ba4ad400 DM |
1470 | ? (LINEMAPS_MACRO_USED (line_table) - (i + 1)) |
1471 | : i); | |
0e50b624 | 1472 | const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx); |
ba4ad400 DM |
1473 | fprintf (stream, "MACRO %i: %s (%u tokens)\n", |
1474 | idx, | |
1475 | linemap_map_get_macro_name (map), | |
1476 | MACRO_MAP_NUM_MACRO_TOKENS (map)); | |
1477 | dump_location_range (stream, | |
1478 | map->start_location, | |
1479 | (map->start_location | |
1480 | + MACRO_MAP_NUM_MACRO_TOKENS (map))); | |
b0f19336 | 1481 | inform (map->get_expansion_point_location (), |
ba4ad400 | 1482 | "expansion point is location %i", |
b0f19336 | 1483 | map->get_expansion_point_location ()); |
ba4ad400 DM |
1484 | fprintf (stream, " map->start_location: %u\n", |
1485 | map->start_location); | |
1486 | ||
1487 | fprintf (stream, " macro_locations:\n"); | |
1488 | for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++) | |
1489 | { | |
620e594b DM |
1490 | location_t x = MACRO_MAP_LOCATIONS (map)[2 * i]; |
1491 | location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1]; | |
ba4ad400 DM |
1492 | |
1493 | /* linemap_add_macro_token encodes token numbers in an expansion | |
1494 | by putting them after MAP_START_LOCATION. */ | |
1495 | ||
1496 | /* I'm typically seeing 4 uninitialized entries at the end of | |
1497 | 0xafafafaf. | |
e53b6e56 | 1498 | This appears to be due to macro.cc:replace_args |
ba4ad400 DM |
1499 | adding 2 extra args for padding tokens; presumably there may |
1500 | be a leading and/or trailing padding token injected, | |
1501 | each for 2 more location slots. | |
620e594b | 1502 | This would explain there being up to 4 location_ts slots |
ba4ad400 DM |
1503 | that may be uninitialized. */ |
1504 | ||
1505 | fprintf (stream, " %u: %u, %u\n", | |
1506 | i, | |
1507 | x, | |
1508 | y); | |
1509 | if (x == y) | |
1510 | { | |
1511 | if (x < MAP_START_LOCATION (map)) | |
a9c697b8 MS |
1512 | inform (x, "token %u has %<x-location == y-location == %u%>", |
1513 | i, x); | |
ba4ad400 DM |
1514 | else |
1515 | fprintf (stream, | |
1516 | "x-location == y-location == %u encodes token # %u\n", | |
1517 | x, x - MAP_START_LOCATION (map)); | |
1518 | } | |
1519 | else | |
1520 | { | |
a9c697b8 MS |
1521 | inform (x, "token %u has %<x-location == %u%>", i, x); |
1522 | inform (x, "token %u has %<y-location == %u%>", i, y); | |
ba4ad400 DM |
1523 | } |
1524 | } | |
1525 | fprintf (stream, "\n"); | |
1526 | } | |
1527 | ||
620e594b | 1528 | /* It appears that MAX_LOCATION_T itself is never assigned to a |
ba4ad400 DM |
1529 | macro map, presumably due to an off-by-one error somewhere |
1530 | between the logic in linemap_enter_macro and | |
1531 | LINEMAPS_MACRO_LOWEST_LOCATION. */ | |
620e594b DM |
1532 | dump_labelled_location_range (stream, "MAX_LOCATION_T", |
1533 | MAX_LOCATION_T, | |
1534 | MAX_LOCATION_T + 1); | |
ba4ad400 DM |
1535 | |
1536 | /* Visualize ad-hoc values. */ | |
1537 | dump_labelled_location_range (stream, "AD-HOC LOCATIONS", | |
620e594b | 1538 | MAX_LOCATION_T + 1, UINT_MAX); |
ba4ad400 | 1539 | } |
d9b950dd | 1540 | |
88fa5555 DM |
1541 | /* string_concat's constructor. */ |
1542 | ||
1543 | string_concat::string_concat (int num, location_t *locs) | |
1544 | : m_num (num) | |
1545 | { | |
1546 | m_locs = ggc_vec_alloc <location_t> (num); | |
1547 | for (int i = 0; i < num; i++) | |
1548 | m_locs[i] = locs[i]; | |
1549 | } | |
1550 | ||
1551 | /* string_concat_db's constructor. */ | |
1552 | ||
1553 | string_concat_db::string_concat_db () | |
1554 | { | |
1555 | m_table = hash_map <location_hash, string_concat *>::create_ggc (64); | |
1556 | } | |
1557 | ||
1558 | /* Record that a string concatenation occurred, covering NUM | |
1559 | string literal tokens. LOCS is an array of size NUM, containing the | |
1560 | locations of the tokens. A copy of LOCS is taken. */ | |
1561 | ||
1562 | void | |
1563 | string_concat_db::record_string_concatenation (int num, location_t *locs) | |
1564 | { | |
1565 | gcc_assert (num > 1); | |
1566 | gcc_assert (locs); | |
1567 | ||
1568 | location_t key_loc = get_key_loc (locs[0]); | |
7d79c3eb TS |
1569 | /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values: |
1570 | any data now recorded under key 'key_loc' would be overwritten by a | |
1571 | subsequent call with the same key 'key_loc'. */ | |
1572 | if (RESERVED_LOCATION_P (key_loc)) | |
1573 | return; | |
88fa5555 DM |
1574 | |
1575 | string_concat *concat | |
1576 | = new (ggc_alloc <string_concat> ()) string_concat (num, locs); | |
1577 | m_table->put (key_loc, concat); | |
1578 | } | |
1579 | ||
700d4cb0 | 1580 | /* Determine if LOC was the location of the initial token of a |
88fa5555 DM |
1581 | concatenation of string literal tokens. |
1582 | If so, *OUT_NUM is written to with the number of tokens, and | |
1583 | *OUT_LOCS with the location of an array of locations of the | |
1584 | tokens, and return true. *OUT_LOCS is a borrowed pointer to | |
1585 | storage owned by the string_concat_db. | |
1586 | Otherwise, return false. */ | |
1587 | ||
1588 | bool | |
1589 | string_concat_db::get_string_concatenation (location_t loc, | |
1590 | int *out_num, | |
1591 | location_t **out_locs) | |
1592 | { | |
1593 | gcc_assert (out_num); | |
1594 | gcc_assert (out_locs); | |
1595 | ||
1596 | location_t key_loc = get_key_loc (loc); | |
7d79c3eb TS |
1597 | /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see |
1598 | discussion in 'string_concat_db::record_string_concatenation'. */ | |
1599 | if (RESERVED_LOCATION_P (key_loc)) | |
1600 | return false; | |
88fa5555 DM |
1601 | |
1602 | string_concat **concat = m_table->get (key_loc); | |
1603 | if (!concat) | |
1604 | return false; | |
1605 | ||
1606 | *out_num = (*concat)->m_num; | |
1607 | *out_locs =(*concat)->m_locs; | |
1608 | return true; | |
1609 | } | |
1610 | ||
1611 | /* Internal function. Canonicalize LOC into a form suitable for | |
1612 | use as a key within the database, stripping away macro expansion, | |
1613 | ad-hoc information, and range information, using the location of | |
1614 | the start of LOC within an ordinary linemap. */ | |
1615 | ||
1616 | location_t | |
1617 | string_concat_db::get_key_loc (location_t loc) | |
1618 | { | |
1619 | loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION, | |
1620 | NULL); | |
1621 | ||
1622 | loc = get_range_from_loc (line_table, loc).m_start; | |
1623 | ||
1624 | return loc; | |
1625 | } | |
1626 | ||
1627 | /* Helper class for use within get_substring_ranges_for_loc. | |
1628 | An vec of cpp_string with responsibility for releasing all of the | |
1629 | str->text for each str in the vector. */ | |
1630 | ||
1631 | class auto_cpp_string_vec : public auto_vec <cpp_string> | |
1632 | { | |
1633 | public: | |
1634 | auto_cpp_string_vec (int alloc) | |
1635 | : auto_vec <cpp_string> (alloc) {} | |
1636 | ||
1637 | ~auto_cpp_string_vec () | |
1638 | { | |
1639 | /* Clean up the copies within this vec. */ | |
1640 | int i; | |
1641 | cpp_string *str; | |
1642 | FOR_EACH_VEC_ELT (*this, i, str) | |
1643 | free (const_cast <unsigned char *> (str->text)); | |
1644 | } | |
1645 | }; | |
1646 | ||
1647 | /* Attempt to populate RANGES with source location information on the | |
1648 | individual characters within the string literal found at STRLOC. | |
1649 | If CONCATS is non-NULL, then any string literals that the token at | |
1650 | STRLOC was concatenated with are also added to RANGES. | |
1651 | ||
1652 | Return NULL if successful, or an error message if any errors occurred (in | |
1653 | which case RANGES may be only partially populated and should not | |
1654 | be used). | |
1655 | ||
1656 | This is implemented by re-parsing the relevant source line(s). */ | |
1657 | ||
1658 | static const char * | |
1659 | get_substring_ranges_for_loc (cpp_reader *pfile, | |
1bdd665a | 1660 | file_cache &fc, |
88fa5555 DM |
1661 | string_concat_db *concats, |
1662 | location_t strloc, | |
1663 | enum cpp_ttype type, | |
1664 | cpp_substring_ranges &ranges) | |
1665 | { | |
1666 | gcc_assert (pfile); | |
1667 | ||
1668 | if (strloc == UNKNOWN_LOCATION) | |
1669 | return "unknown location"; | |
1670 | ||
67b5d0b2 DM |
1671 | /* Reparsing the strings requires accurate location information. |
1672 | If -ftrack-macro-expansion has been overridden from its default | |
1673 | of 2, then we might have a location of a macro expansion point, | |
1674 | rather than the location of the literal itself. | |
1675 | Avoid this by requiring that we have full macro expansion tracking | |
1676 | for substring locations to be available. */ | |
1677 | if (cpp_get_options (pfile)->track_macro_expansion != 2) | |
1678 | return "track_macro_expansion != 2"; | |
1679 | ||
94f597df DM |
1680 | /* If #line or # 44 "file"-style directives are present, then there's |
1681 | no guarantee that the line numbers we have can be used to locate | |
1682 | the strings. For example, we might have a .i file with # directives | |
1683 | pointing back to lines within a .c file, but the .c file might | |
1684 | have been edited since the .i file was created. | |
1685 | In such a case, the safest course is to disable on-demand substring | |
1686 | locations. */ | |
1687 | if (line_table->seen_line_directive) | |
1688 | return "seen line directive"; | |
1689 | ||
88fa5555 DM |
1690 | /* If string concatenation has occurred at STRLOC, get the locations |
1691 | of all of the literal tokens making up the compound string. | |
1692 | Otherwise, just use STRLOC. */ | |
1693 | int num_locs = 1; | |
1694 | location_t *strlocs = &strloc; | |
1695 | if (concats) | |
1696 | concats->get_string_concatenation (strloc, &num_locs, &strlocs); | |
1697 | ||
1698 | auto_cpp_string_vec strs (num_locs); | |
1699 | auto_vec <cpp_string_location_reader> loc_readers (num_locs); | |
1700 | for (int i = 0; i < num_locs; i++) | |
1701 | { | |
1702 | /* Get range of strloc. We will use it to locate the start and finish | |
1703 | of the literal token within the line. */ | |
1704 | source_range src_range = get_range_from_loc (line_table, strlocs[i]); | |
1705 | ||
1706 | if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table)) | |
0d48e877 DM |
1707 | { |
1708 | /* If the string token was within a macro expansion, then we can | |
1709 | cope with it for the simple case where we have a single token. | |
1710 | Otherwise, bail out. */ | |
1711 | if (src_range.m_start != src_range.m_finish) | |
1712 | return "macro expansion"; | |
1713 | } | |
1714 | else | |
1715 | { | |
1716 | if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS) | |
1717 | /* If so, we can't reliably determine where the token started within | |
1718 | its line. */ | |
1719 | return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS"; | |
1720 | ||
1721 | if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS) | |
1722 | /* If so, we can't reliably determine where the token finished | |
1723 | within its line. */ | |
1724 | return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS"; | |
1725 | } | |
88fa5555 DM |
1726 | |
1727 | expanded_location start | |
0d48e877 DM |
1728 | = expand_location_to_spelling_point (src_range.m_start, |
1729 | LOCATION_ASPECT_START); | |
88fa5555 | 1730 | expanded_location finish |
0d48e877 DM |
1731 | = expand_location_to_spelling_point (src_range.m_finish, |
1732 | LOCATION_ASPECT_FINISH); | |
88fa5555 DM |
1733 | if (start.file != finish.file) |
1734 | return "range endpoints are in different files"; | |
1735 | if (start.line != finish.line) | |
1736 | return "range endpoints are on different lines"; | |
1737 | if (start.column > finish.column) | |
1738 | return "range endpoints are reversed"; | |
1739 | ||
1bdd665a | 1740 | char_span line = fc.get_source_line (start.file, start.line); |
7761dfbe | 1741 | if (!line) |
88fa5555 DM |
1742 | return "unable to read source line"; |
1743 | ||
1744 | /* Determine the location of the literal (including quotes | |
1745 | and leading prefix chars, such as the 'u' in a u"" | |
1746 | token). */ | |
7761dfbe | 1747 | size_t literal_length = finish.column - start.column + 1; |
88fa5555 | 1748 | |
7cfa044d | 1749 | /* Ensure that we don't crash if we got the wrong location. */ |
31dd5cd6 MP |
1750 | if (start.column < 1) |
1751 | return "zero start column"; | |
7761dfbe | 1752 | if (line.length () < (start.column - 1 + literal_length)) |
7cfa044d DM |
1753 | return "line is not wide enough"; |
1754 | ||
7761dfbe DM |
1755 | char_span literal = line.subspan (start.column - 1, literal_length); |
1756 | ||
88fa5555 DM |
1757 | cpp_string from; |
1758 | from.len = literal_length; | |
1759 | /* Make a copy of the literal, to avoid having to rely on | |
1760 | the lifetime of the copy of the line within the cache. | |
1761 | This will be released by the auto_cpp_string_vec dtor. */ | |
7761dfbe | 1762 | from.text = (unsigned char *)literal.xstrdup (); |
88fa5555 DM |
1763 | strs.safe_push (from); |
1764 | ||
1765 | /* For very long lines, a new linemap could have started | |
1766 | halfway through the token. | |
1767 | Ensure that the loc_reader uses the linemap of the | |
1768 | *end* of the token for its start location. */ | |
05d57d65 DM |
1769 | const line_map_ordinary *start_ord_map; |
1770 | linemap_resolve_location (line_table, src_range.m_start, | |
1771 | LRK_SPELLING_LOCATION, &start_ord_map); | |
88fa5555 DM |
1772 | const line_map_ordinary *final_ord_map; |
1773 | linemap_resolve_location (line_table, src_range.m_finish, | |
05d57d65 | 1774 | LRK_SPELLING_LOCATION, &final_ord_map); |
3d0a5393 DM |
1775 | if (start_ord_map == NULL || final_ord_map == NULL) |
1776 | return "failed to get ordinary maps"; | |
05d57d65 DM |
1777 | /* Bulletproofing. We ought to only have different ordinary maps |
1778 | for start vs finish due to line-length jumps. */ | |
1779 | if (start_ord_map != final_ord_map | |
1780 | && start_ord_map->to_file != final_ord_map->to_file) | |
ef33afeb DM |
1781 | return "start and finish are spelled in different ordinary maps"; |
1782 | /* The file from linemap_resolve_location ought to match that from | |
1783 | expand_location_to_spelling_point. */ | |
1784 | if (start_ord_map->to_file != start.file) | |
1785 | return "mismatching file after resolving linemap"; | |
1786 | ||
88fa5555 DM |
1787 | location_t start_loc |
1788 | = linemap_position_for_line_and_column (line_table, final_ord_map, | |
1789 | start.line, start.column); | |
1790 | ||
1791 | cpp_string_location_reader loc_reader (start_loc, line_table); | |
1792 | loc_readers.safe_push (loc_reader); | |
1793 | } | |
1794 | ||
1795 | /* Rerun cpp_interpret_string, or rather, a modified version of it. */ | |
1796 | const char *err = cpp_interpret_string_ranges (pfile, strs.address (), | |
1797 | loc_readers.address (), | |
1798 | num_locs, &ranges, type); | |
1799 | if (err) | |
1800 | return err; | |
1801 | ||
1802 | /* Success: "ranges" should now contain information on the string. */ | |
1803 | return NULL; | |
1804 | } | |
1805 | ||
65e736c0 DM |
1806 | /* Attempt to populate *OUT_LOC with source location information on the |
1807 | given characters within the string literal found at STRLOC. | |
1808 | CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution | |
1809 | character set. | |
1810 | ||
1811 | For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7 | |
1812 | and string literal "012345\n789" | |
1813 | *OUT_LOC is written to with: | |
1814 | "012345\n789" | |
1815 | ~^~~~~ | |
1816 | ||
88fa5555 DM |
1817 | If CONCATS is non-NULL, then any string literals that the token at |
1818 | STRLOC was concatenated with are also considered. | |
1819 | ||
1820 | This is implemented by re-parsing the relevant source line(s). | |
1821 | ||
1822 | Return NULL if successful, or an error message if any errors occurred. | |
1823 | Error messages are intended for GCC developers (to help debugging) rather | |
1824 | than for end-users. */ | |
1825 | ||
1826 | const char * | |
620e594b | 1827 | get_location_within_string (cpp_reader *pfile, |
1bdd665a | 1828 | file_cache &fc, |
620e594b DM |
1829 | string_concat_db *concats, |
1830 | location_t strloc, | |
1831 | enum cpp_ttype type, | |
1832 | int caret_idx, int start_idx, int end_idx, | |
1833 | location_t *out_loc) | |
65e736c0 DM |
1834 | { |
1835 | gcc_checking_assert (caret_idx >= 0); | |
88fa5555 DM |
1836 | gcc_checking_assert (start_idx >= 0); |
1837 | gcc_checking_assert (end_idx >= 0); | |
65e736c0 | 1838 | gcc_assert (out_loc); |
88fa5555 DM |
1839 | |
1840 | cpp_substring_ranges ranges; | |
1841 | const char *err | |
1bdd665a | 1842 | = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges); |
88fa5555 DM |
1843 | if (err) |
1844 | return err; | |
1845 | ||
65e736c0 DM |
1846 | if (caret_idx >= ranges.get_num_ranges ()) |
1847 | return "caret_idx out of range"; | |
88fa5555 DM |
1848 | if (start_idx >= ranges.get_num_ranges ()) |
1849 | return "start_idx out of range"; | |
1850 | if (end_idx >= ranges.get_num_ranges ()) | |
1851 | return "end_idx out of range"; | |
1852 | ||
65e736c0 DM |
1853 | *out_loc = make_location (ranges.get_range (caret_idx).m_start, |
1854 | ranges.get_range (start_idx).m_start, | |
1855 | ranges.get_range (end_idx).m_finish); | |
1856 | return NULL; | |
1857 | } | |
1858 | ||
f1adf45b ER |
1859 | /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */ |
1860 | ||
1861 | location_t | |
1862 | location_with_discriminator (location_t locus, int discriminator) | |
1863 | { | |
1864 | tree block = LOCATION_BLOCK (locus); | |
1865 | source_range src_range = get_range_from_loc (line_table, locus); | |
1866 | locus = get_pure_location (locus); | |
1867 | ||
1868 | if (locus == UNKNOWN_LOCATION) | |
1869 | return locus; | |
1870 | ||
1f68a3e8 DM |
1871 | return line_table->get_or_create_combined_loc (locus, src_range, block, |
1872 | discriminator); | |
f1adf45b ER |
1873 | } |
1874 | ||
1875 | /* Return TRUE if LOCUS represents a location with a discriminator. */ | |
1876 | ||
1877 | bool | |
1878 | has_discriminator (location_t locus) | |
1879 | { | |
1880 | return get_discriminator_from_loc (locus) != 0; | |
1881 | } | |
1882 | ||
1883 | /* Return the discriminator for LOCUS. */ | |
1884 | ||
1885 | int | |
1886 | get_discriminator_from_loc (location_t locus) | |
1887 | { | |
1888 | return get_discriminator_from_loc (line_table, locus); | |
1889 | } | |
1890 | ||
0e06d2b3 DM |
1891 | #if CHECKING_P |
1892 | ||
1893 | namespace selftest { | |
1894 | ||
1895 | /* Selftests of location handling. */ | |
1896 | ||
65e736c0 DM |
1897 | /* Attempt to populate *OUT_RANGE with source location information on the |
1898 | given character within the string literal found at STRLOC. | |
1899 | CHAR_IDX refers to an offset within the execution character set. | |
1900 | If CONCATS is non-NULL, then any string literals that the token at | |
1901 | STRLOC was concatenated with are also considered. | |
1902 | ||
1903 | This is implemented by re-parsing the relevant source line(s). | |
1904 | ||
1905 | Return NULL if successful, or an error message if any errors occurred. | |
1906 | Error messages are intended for GCC developers (to help debugging) rather | |
1907 | than for end-users. */ | |
1908 | ||
1909 | static const char * | |
1910 | get_source_range_for_char (cpp_reader *pfile, | |
1bdd665a | 1911 | file_cache &fc, |
65e736c0 DM |
1912 | string_concat_db *concats, |
1913 | location_t strloc, | |
1914 | enum cpp_ttype type, | |
1915 | int char_idx, | |
1916 | source_range *out_range) | |
1917 | { | |
1918 | gcc_checking_assert (char_idx >= 0); | |
1919 | gcc_assert (out_range); | |
1920 | ||
1921 | cpp_substring_ranges ranges; | |
1922 | const char *err | |
1bdd665a | 1923 | = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges); |
65e736c0 DM |
1924 | if (err) |
1925 | return err; | |
1926 | ||
1927 | if (char_idx >= ranges.get_num_ranges ()) | |
1928 | return "char_idx out of range"; | |
1929 | ||
1930 | *out_range = ranges.get_range (char_idx); | |
88fa5555 DM |
1931 | return NULL; |
1932 | } | |
1933 | ||
65e736c0 | 1934 | /* As get_source_range_for_char, but write to *OUT the number |
88fa5555 DM |
1935 | of ranges that are available. */ |
1936 | ||
0e06d2b3 | 1937 | static const char * |
88fa5555 | 1938 | get_num_source_ranges_for_substring (cpp_reader *pfile, |
1bdd665a | 1939 | file_cache &fc, |
88fa5555 DM |
1940 | string_concat_db *concats, |
1941 | location_t strloc, | |
1942 | enum cpp_ttype type, | |
1943 | int *out) | |
1944 | { | |
1945 | gcc_assert (out); | |
1946 | ||
1947 | cpp_substring_ranges ranges; | |
1948 | const char *err | |
1bdd665a | 1949 | = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges); |
88fa5555 DM |
1950 | |
1951 | if (err) | |
1952 | return err; | |
1953 | ||
1954 | *out = ranges.get_num_ranges (); | |
1955 | return NULL; | |
1956 | } | |
1957 | ||
d9b950dd DM |
1958 | /* Selftests of location handling. */ |
1959 | ||
082284da DM |
1960 | /* Verify that compare() on linenum_type handles comparisons over the full |
1961 | range of the type. */ | |
1962 | ||
1963 | static void | |
1964 | test_linenum_comparisons () | |
1965 | { | |
1966 | linenum_type min_line (0); | |
1967 | linenum_type max_line (0xffffffff); | |
1968 | ASSERT_EQ (0, compare (min_line, min_line)); | |
1969 | ASSERT_EQ (0, compare (max_line, max_line)); | |
1970 | ||
1971 | ASSERT_GT (compare (max_line, min_line), 0); | |
1972 | ASSERT_LT (compare (min_line, max_line), 0); | |
1973 | } | |
1974 | ||
741d3be5 DM |
1975 | /* Helper function for verifying location data: when location_t |
1976 | values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated | |
1977 | as having column 0. */ | |
1978 | ||
1979 | static bool | |
1980 | should_have_column_data_p (location_t loc) | |
1981 | { | |
1982 | if (IS_ADHOC_LOC (loc)) | |
1983 | loc = get_location_from_adhoc_loc (line_table, loc); | |
1984 | if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS) | |
1985 | return false; | |
1986 | return true; | |
1987 | } | |
1988 | ||
1989 | /* Selftest for should_have_column_data_p. */ | |
1990 | ||
1991 | static void | |
1992 | test_should_have_column_data_p () | |
1993 | { | |
1994 | ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT)); | |
1995 | ASSERT_TRUE | |
1996 | (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS)); | |
1997 | ASSERT_FALSE | |
1998 | (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1)); | |
1999 | } | |
2000 | ||
d9b950dd DM |
2001 | /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN |
2002 | on LOC. */ | |
2003 | ||
2004 | static void | |
2005 | assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum, | |
2006 | location_t loc) | |
2007 | { | |
2008 | ASSERT_STREQ (exp_filename, LOCATION_FILE (loc)); | |
2009 | ASSERT_EQ (exp_linenum, LOCATION_LINE (loc)); | |
741d3be5 DM |
2010 | /* If location_t values are sufficiently high, then column numbers |
2011 | will be unavailable and LOCATION_COLUMN (loc) will be 0. | |
2012 | When close to the threshold, column numbers *may* be present: if | |
2013 | the final linemap before the threshold contains a line that straddles | |
2014 | the threshold, locations in that line have column information. */ | |
2015 | if (should_have_column_data_p (loc)) | |
2016 | ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc)); | |
2017 | } | |
2018 | ||
f87e22c5 DM |
2019 | /* Various selftests involve constructing a line table and one or more |
2020 | line maps within it. | |
741d3be5 DM |
2021 | |
2022 | For maximum test coverage we want to run these tests with a variety | |
2023 | of situations: | |
2024 | - line_table->default_range_bits: some frontends use a non-zero value | |
2025 | and others use zero | |
e53b6e56 ML |
2026 | - the fallback modes within line-map.cc: there are various threshold |
2027 | values for location_t beyond line-map.cc changes | |
741d3be5 DM |
2028 | behavior (disabling of the range-packing optimization, disabling |
2029 | of column-tracking). We can exercise these by starting the line_table | |
2030 | at interesting values at or near these thresholds. | |
2031 | ||
2032 | The following struct describes a particular case within our test | |
2033 | matrix. */ | |
2034 | ||
6c1dae73 | 2035 | class line_table_case |
741d3be5 | 2036 | { |
6c1dae73 | 2037 | public: |
741d3be5 DM |
2038 | line_table_case (int default_range_bits, int base_location) |
2039 | : m_default_range_bits (default_range_bits), | |
2040 | m_base_location (base_location) | |
2041 | {} | |
2042 | ||
2043 | int m_default_range_bits; | |
2044 | int m_base_location; | |
2045 | }; | |
2046 | ||
f87e22c5 DM |
2047 | /* Constructor. Store the old value of line_table, and create a new |
2048 | one, using sane defaults. */ | |
741d3be5 | 2049 | |
f87e22c5 | 2050 | line_table_test::line_table_test () |
741d3be5 | 2051 | { |
f87e22c5 DM |
2052 | gcc_assert (saved_line_table == NULL); |
2053 | saved_line_table = line_table; | |
2054 | line_table = ggc_alloc<line_maps> (); | |
2055 | linemap_init (line_table, BUILTINS_LOCATION); | |
1f68a3e8 DM |
2056 | gcc_assert (saved_line_table->m_reallocator); |
2057 | line_table->m_reallocator = saved_line_table->m_reallocator; | |
2058 | gcc_assert (saved_line_table->m_round_alloc_size); | |
2059 | line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size; | |
f87e22c5 DM |
2060 | line_table->default_range_bits = 0; |
2061 | } | |
741d3be5 DM |
2062 | |
2063 | /* Constructor. Store the old value of line_table, and create a new | |
2064 | one, using the sitation described in CASE_. */ | |
2065 | ||
f87e22c5 | 2066 | line_table_test::line_table_test (const line_table_case &case_) |
741d3be5 | 2067 | { |
f87e22c5 DM |
2068 | gcc_assert (saved_line_table == NULL); |
2069 | saved_line_table = line_table; | |
741d3be5 DM |
2070 | line_table = ggc_alloc<line_maps> (); |
2071 | linemap_init (line_table, BUILTINS_LOCATION); | |
1f68a3e8 DM |
2072 | gcc_assert (saved_line_table->m_reallocator); |
2073 | line_table->m_reallocator = saved_line_table->m_reallocator; | |
2074 | gcc_assert (saved_line_table->m_round_alloc_size); | |
2075 | line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size; | |
741d3be5 DM |
2076 | line_table->default_range_bits = case_.m_default_range_bits; |
2077 | if (case_.m_base_location) | |
2078 | { | |
2079 | line_table->highest_location = case_.m_base_location; | |
2080 | line_table->highest_line = case_.m_base_location; | |
2081 | } | |
2082 | } | |
2083 | ||
2084 | /* Destructor. Restore the old value of line_table. */ | |
2085 | ||
f87e22c5 | 2086 | line_table_test::~line_table_test () |
741d3be5 | 2087 | { |
f87e22c5 DM |
2088 | gcc_assert (saved_line_table != NULL); |
2089 | line_table = saved_line_table; | |
2090 | saved_line_table = NULL; | |
d9b950dd DM |
2091 | } |
2092 | ||
2093 | /* Verify basic operation of ordinary linemaps. */ | |
2094 | ||
2095 | static void | |
741d3be5 | 2096 | test_accessing_ordinary_linemaps (const line_table_case &case_) |
d9b950dd | 2097 | { |
f87e22c5 | 2098 | line_table_test ltt (case_); |
741d3be5 | 2099 | |
d9b950dd DM |
2100 | /* Build a simple linemap describing some locations. */ |
2101 | linemap_add (line_table, LC_ENTER, false, "foo.c", 0); | |
2102 | ||
2103 | linemap_line_start (line_table, 1, 100); | |
2104 | location_t loc_a = linemap_position_for_column (line_table, 1); | |
2105 | location_t loc_b = linemap_position_for_column (line_table, 23); | |
2106 | ||
2107 | linemap_line_start (line_table, 2, 100); | |
2108 | location_t loc_c = linemap_position_for_column (line_table, 1); | |
2109 | location_t loc_d = linemap_position_for_column (line_table, 17); | |
2110 | ||
2111 | /* Example of a very long line. */ | |
2112 | linemap_line_start (line_table, 3, 2000); | |
2113 | location_t loc_e = linemap_position_for_column (line_table, 700); | |
2114 | ||
5ccf1d8d DM |
2115 | /* Transitioning back to a short line. */ |
2116 | linemap_line_start (line_table, 4, 0); | |
2117 | location_t loc_back_to_short = linemap_position_for_column (line_table, 100); | |
2118 | ||
2119 | if (should_have_column_data_p (loc_back_to_short)) | |
2120 | { | |
2121 | /* Verify that we switched to short lines in the linemap. */ | |
2122 | line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table); | |
2123 | ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits); | |
2124 | } | |
2125 | ||
b9f4757f DM |
2126 | /* Example of a line that will eventually be seen to be longer |
2127 | than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is | |
2128 | below that. */ | |
2129 | linemap_line_start (line_table, 5, 2000); | |
2130 | ||
2131 | location_t loc_start_of_very_long_line | |
2132 | = linemap_position_for_column (line_table, 2000); | |
2133 | location_t loc_too_wide | |
2134 | = linemap_position_for_column (line_table, 4097); | |
2135 | location_t loc_too_wide_2 | |
2136 | = linemap_position_for_column (line_table, 4098); | |
2137 | ||
2138 | /* ...and back to a sane line length. */ | |
2139 | linemap_line_start (line_table, 6, 100); | |
2140 | location_t loc_sane_again = linemap_position_for_column (line_table, 10); | |
2141 | ||
d9b950dd DM |
2142 | linemap_add (line_table, LC_LEAVE, false, NULL, 0); |
2143 | ||
2144 | /* Multiple files. */ | |
2145 | linemap_add (line_table, LC_ENTER, false, "bar.c", 0); | |
2146 | linemap_line_start (line_table, 1, 200); | |
2147 | location_t loc_f = linemap_position_for_column (line_table, 150); | |
2148 | linemap_add (line_table, LC_LEAVE, false, NULL, 0); | |
2149 | ||
2150 | /* Verify that we can recover the location info. */ | |
2151 | assert_loceq ("foo.c", 1, 1, loc_a); | |
2152 | assert_loceq ("foo.c", 1, 23, loc_b); | |
2153 | assert_loceq ("foo.c", 2, 1, loc_c); | |
2154 | assert_loceq ("foo.c", 2, 17, loc_d); | |
2155 | assert_loceq ("foo.c", 3, 700, loc_e); | |
5ccf1d8d | 2156 | assert_loceq ("foo.c", 4, 100, loc_back_to_short); |
b9f4757f DM |
2157 | |
2158 | /* In the very wide line, the initial location should be fully tracked. */ | |
2159 | assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line); | |
2160 | /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should | |
2161 | be disabled. */ | |
2162 | assert_loceq ("foo.c", 5, 0, loc_too_wide); | |
2163 | assert_loceq ("foo.c", 5, 0, loc_too_wide_2); | |
2164 | /*...and column-tracking should be re-enabled for subsequent lines. */ | |
2165 | assert_loceq ("foo.c", 6, 10, loc_sane_again); | |
2166 | ||
d9b950dd DM |
2167 | assert_loceq ("bar.c", 1, 150, loc_f); |
2168 | ||
2169 | ASSERT_FALSE (is_location_from_builtin_token (loc_a)); | |
a01fc549 DM |
2170 | ASSERT_TRUE (pure_location_p (line_table, loc_a)); |
2171 | ||
2172 | /* Verify using make_location to build a range, and extracting data | |
2173 | back from it. */ | |
2174 | location_t range_c_b_d = make_location (loc_c, loc_b, loc_d); | |
2175 | ASSERT_FALSE (pure_location_p (line_table, range_c_b_d)); | |
2176 | ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d)); | |
2177 | source_range src_range = get_range_from_loc (line_table, range_c_b_d); | |
2178 | ASSERT_EQ (loc_b, src_range.m_start); | |
2179 | ASSERT_EQ (loc_d, src_range.m_finish); | |
d9b950dd DM |
2180 | } |
2181 | ||
2182 | /* Verify various properties of UNKNOWN_LOCATION. */ | |
2183 | ||
2184 | static void | |
2185 | test_unknown_location () | |
2186 | { | |
2187 | ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION)); | |
2188 | ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION)); | |
2189 | ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION)); | |
2190 | } | |
2191 | ||
2192 | /* Verify various properties of BUILTINS_LOCATION. */ | |
2193 | ||
2194 | static void | |
2195 | test_builtins () | |
2196 | { | |
15d31555 | 2197 | assert_loceq (special_fname_builtin (), 0, 0, BUILTINS_LOCATION); |
d9b950dd DM |
2198 | ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION); |
2199 | } | |
2200 | ||
9144eabb | 2201 | /* Regression test for make_location. |
cfa435e1 DM |
2202 | Ensure that we use pure locations for the start/finish of the range, |
2203 | rather than storing a packed or ad-hoc range as the start/finish. */ | |
9144eabb DM |
2204 | |
2205 | static void | |
2206 | test_make_location_nonpure_range_endpoints (const line_table_case &case_) | |
2207 | { | |
2208 | /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c | |
2209 | with C++ frontend. | |
2210 | ....................0000000001111111111222. | |
2211 | ....................1234567890123456789012. */ | |
2212 | const char *content = " r += !aaa == bbb;\n"; | |
2213 | temp_source_file tmp (SELFTEST_LOCATION, ".C", content); | |
2214 | line_table_test ltt (case_); | |
2215 | linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1); | |
2216 | ||
2217 | const location_t c11 = linemap_position_for_column (line_table, 11); | |
2218 | const location_t c12 = linemap_position_for_column (line_table, 12); | |
2219 | const location_t c13 = linemap_position_for_column (line_table, 13); | |
2220 | const location_t c14 = linemap_position_for_column (line_table, 14); | |
2221 | const location_t c21 = linemap_position_for_column (line_table, 21); | |
2222 | ||
2223 | if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS) | |
2224 | return; | |
2225 | ||
2226 | /* Use column 13 for the caret location, arbitrarily, to verify that we | |
2227 | handle start != caret. */ | |
2228 | const location_t aaa = make_location (c13, c12, c14); | |
2229 | ASSERT_EQ (c13, get_pure_location (aaa)); | |
2230 | ASSERT_EQ (c12, get_start (aaa)); | |
2231 | ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa))); | |
2232 | ASSERT_EQ (c14, get_finish (aaa)); | |
2233 | ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa))); | |
2234 | ||
2235 | /* Make a location using a location with a range as the start-point. */ | |
2236 | const location_t not_aaa = make_location (c11, aaa, c14); | |
2237 | ASSERT_EQ (c11, get_pure_location (not_aaa)); | |
2238 | /* It should use the start location of the range, not store the range | |
2239 | itself. */ | |
2240 | ASSERT_EQ (c12, get_start (not_aaa)); | |
2241 | ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa))); | |
2242 | ASSERT_EQ (c14, get_finish (not_aaa)); | |
2243 | ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa))); | |
2244 | ||
2245 | /* Similarly, make a location with a range as the end-point. */ | |
2246 | const location_t aaa_eq_bbb = make_location (c12, c12, c21); | |
2247 | ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb)); | |
2248 | ASSERT_EQ (c12, get_start (aaa_eq_bbb)); | |
2249 | ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb))); | |
2250 | ASSERT_EQ (c21, get_finish (aaa_eq_bbb)); | |
2251 | ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb))); | |
2252 | const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb); | |
2253 | /* It should use the finish location of the range, not store the range | |
2254 | itself. */ | |
2255 | ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb)); | |
2256 | ASSERT_EQ (c12, get_start (not_aaa_eq_bbb)); | |
2257 | ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb))); | |
2258 | ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb)); | |
2259 | ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb))); | |
2260 | } | |
2261 | ||
d9b950dd DM |
2262 | /* Verify reading of input files (e.g. for caret-based diagnostics). */ |
2263 | ||
2264 | static void | |
2265 | test_reading_source_line () | |
2266 | { | |
85ecd05c | 2267 | /* Create a tempfile and write some text to it. */ |
741d3be5 DM |
2268 | temp_source_file tmp (SELFTEST_LOCATION, ".txt", |
2269 | "01234567890123456789\n" | |
2270 | "This is the test text\n" | |
1adae327 | 2271 | "This is the 3rd line"); |
1bdd665a | 2272 | file_cache fc; |
85ecd05c DM |
2273 | |
2274 | /* Read back a specific line from the tempfile. */ | |
1bdd665a | 2275 | char_span source_line = fc.get_source_line (tmp.get_filename (), 3); |
7761dfbe DM |
2276 | ASSERT_TRUE (source_line); |
2277 | ASSERT_TRUE (source_line.get_buffer () != NULL); | |
2278 | ASSERT_EQ (20, source_line.length ()); | |
1adae327 | 2279 | ASSERT_TRUE (!strncmp ("This is the 3rd line", |
7761dfbe | 2280 | source_line.get_buffer (), source_line.length ())); |
1adae327 | 2281 | |
1bdd665a | 2282 | source_line = fc.get_source_line (tmp.get_filename (), 2); |
7761dfbe DM |
2283 | ASSERT_TRUE (source_line); |
2284 | ASSERT_TRUE (source_line.get_buffer () != NULL); | |
2285 | ASSERT_EQ (21, source_line.length ()); | |
1adae327 | 2286 | ASSERT_TRUE (!strncmp ("This is the test text", |
7761dfbe | 2287 | source_line.get_buffer (), source_line.length ())); |
85ecd05c | 2288 | |
1bdd665a | 2289 | source_line = fc.get_source_line (tmp.get_filename (), 4); |
7761dfbe DM |
2290 | ASSERT_FALSE (source_line); |
2291 | ASSERT_TRUE (source_line.get_buffer () == NULL); | |
d9b950dd DM |
2292 | } |
2293 | ||
741d3be5 DM |
2294 | /* Tests of lexing. */ |
2295 | ||
2296 | /* Verify that token TOK from PARSER has cpp_token_as_text | |
2297 | equal to EXPECTED_TEXT. */ | |
2298 | ||
2299 | #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \ | |
2300 | SELFTEST_BEGIN_STMT \ | |
2301 | unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \ | |
2302 | ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \ | |
2303 | SELFTEST_END_STMT | |
2304 | ||
2305 | /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM, | |
2306 | and ranges from EXP_START_COL to EXP_FINISH_COL. | |
2307 | Use LOC as the effective location of the selftest. */ | |
2308 | ||
2309 | static void | |
2310 | assert_token_loc_eq (const location &loc, | |
2311 | const cpp_token *tok, | |
2312 | const char *exp_filename, int exp_linenum, | |
2313 | int exp_start_col, int exp_finish_col) | |
2314 | { | |
2315 | location_t tok_loc = tok->src_loc; | |
2316 | ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc)); | |
2317 | ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc)); | |
2318 | ||
2319 | /* If location_t values are sufficiently high, then column numbers | |
2320 | will be unavailable. */ | |
2321 | if (!should_have_column_data_p (tok_loc)) | |
2322 | return; | |
2323 | ||
2324 | ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc)); | |
2325 | source_range tok_range = get_range_from_loc (line_table, tok_loc); | |
2326 | ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start)); | |
2327 | ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish)); | |
2328 | } | |
2329 | ||
2330 | /* Use assert_token_loc_eq to verify the TOK->src_loc, using | |
2331 | SELFTEST_LOCATION as the effective location of the selftest. */ | |
2332 | ||
2333 | #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \ | |
2334 | EXP_START_COL, EXP_FINISH_COL) \ | |
2335 | assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \ | |
2336 | (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL)) | |
2337 | ||
2338 | /* Test of lexing a file using libcpp, verifying tokens and their | |
2339 | location information. */ | |
2340 | ||
2341 | static void | |
2342 | test_lexer (const line_table_case &case_) | |
2343 | { | |
2344 | /* Create a tempfile and write some text to it. */ | |
2345 | const char *content = | |
2346 | /*00000000011111111112222222222333333.3333444444444.455555555556 | |
2347 | 12345678901234567890123456789012345.6789012345678.901234567890. */ | |
2348 | ("test_name /* c-style comment */\n" | |
2349 | " \"test literal\"\n" | |
2350 | " // test c++-style comment\n" | |
2351 | " 42\n"); | |
2352 | temp_source_file tmp (SELFTEST_LOCATION, ".txt", content); | |
2353 | ||
f87e22c5 | 2354 | line_table_test ltt (case_); |
741d3be5 DM |
2355 | |
2356 | cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table); | |
2357 | ||
2358 | const char *fname = cpp_read_main_file (parser, tmp.get_filename ()); | |
2359 | ASSERT_NE (fname, NULL); | |
2360 | ||
2361 | /* Verify that we get the expected tokens back, with the correct | |
2362 | location information. */ | |
2363 | ||
2364 | location_t loc; | |
2365 | const cpp_token *tok; | |
2366 | tok = cpp_get_token_with_location (parser, &loc); | |
2367 | ASSERT_NE (tok, NULL); | |
2368 | ASSERT_EQ (tok->type, CPP_NAME); | |
2369 | ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name"); | |
2370 | ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9); | |
2371 | ||
2372 | tok = cpp_get_token_with_location (parser, &loc); | |
2373 | ASSERT_NE (tok, NULL); | |
2374 | ASSERT_EQ (tok->type, CPP_STRING); | |
2375 | ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\""); | |
2376 | ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48); | |
2377 | ||
2378 | tok = cpp_get_token_with_location (parser, &loc); | |
2379 | ASSERT_NE (tok, NULL); | |
2380 | ASSERT_EQ (tok->type, CPP_NUMBER); | |
2381 | ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42"); | |
2382 | ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5); | |
2383 | ||
2384 | tok = cpp_get_token_with_location (parser, &loc); | |
2385 | ASSERT_NE (tok, NULL); | |
2386 | ASSERT_EQ (tok->type, CPP_EOF); | |
2387 | ||
2388 | cpp_finish (parser, NULL); | |
2389 | cpp_destroy (parser); | |
2390 | } | |
2391 | ||
88fa5555 DM |
2392 | /* Forward decls. */ |
2393 | ||
99b1c316 | 2394 | class lexer_test; |
88fa5555 DM |
2395 | class lexer_test_options; |
2396 | ||
2397 | /* A class for specifying options of a lexer_test. | |
2398 | The "apply" vfunc is called during the lexer_test constructor. */ | |
2399 | ||
2400 | class lexer_test_options | |
2401 | { | |
2402 | public: | |
2403 | virtual void apply (lexer_test &) = 0; | |
2404 | }; | |
2405 | ||
f5ea989d DM |
2406 | /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy |
2407 | in its dtor. | |
2408 | ||
2409 | This is needed by struct lexer_test to ensure that the cleanup of the | |
2410 | cpp_reader happens *after* the cleanup of the temp_source_file. */ | |
2411 | ||
2412 | class cpp_reader_ptr | |
2413 | { | |
2414 | public: | |
2415 | cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {} | |
2416 | ||
2417 | ~cpp_reader_ptr () | |
2418 | { | |
2419 | cpp_finish (m_ptr, NULL); | |
2420 | cpp_destroy (m_ptr); | |
2421 | } | |
2422 | ||
2423 | operator cpp_reader * () const { return m_ptr; } | |
2424 | ||
2425 | private: | |
2426 | cpp_reader *m_ptr; | |
2427 | }; | |
2428 | ||
88fa5555 DM |
2429 | /* A struct for writing lexer tests. */ |
2430 | ||
6c1dae73 | 2431 | class lexer_test |
88fa5555 | 2432 | { |
6c1dae73 | 2433 | public: |
88fa5555 DM |
2434 | lexer_test (const line_table_case &case_, const char *content, |
2435 | lexer_test_options *options); | |
2436 | ~lexer_test (); | |
2437 | ||
2438 | const cpp_token *get_token (); | |
2439 | ||
f5ea989d DM |
2440 | /* The ordering of these fields matters. |
2441 | The line_table_test must be first, since the cpp_reader_ptr | |
2442 | uses it. | |
2443 | The cpp_reader must be cleaned up *after* the temp_source_file | |
e53b6e56 | 2444 | since the filenames in input.cc's input cache are owned by the |
f5ea989d DM |
2445 | cpp_reader; in particular, when ~temp_source_file evicts the |
2446 | filename the filenames must still be alive. */ | |
f87e22c5 | 2447 | line_table_test m_ltt; |
f5ea989d DM |
2448 | cpp_reader_ptr m_parser; |
2449 | temp_source_file m_tempfile; | |
1bdd665a | 2450 | file_cache m_file_cache; |
88fa5555 | 2451 | string_concat_db m_concats; |
a3998c2f | 2452 | bool m_implicitly_expect_EOF; |
88fa5555 DM |
2453 | }; |
2454 | ||
2455 | /* Use an EBCDIC encoding for the execution charset, specifically | |
2456 | IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047"). | |
2457 | ||
2458 | This exercises iconv integration within libcpp. | |
2459 | Not every build of iconv supports the given charset, | |
2460 | so we need to flag this error and handle it gracefully. */ | |
2461 | ||
2462 | class ebcdic_execution_charset : public lexer_test_options | |
2463 | { | |
2464 | public: | |
2465 | ebcdic_execution_charset () : m_num_iconv_errors (0) | |
2466 | { | |
2467 | gcc_assert (s_singleton == NULL); | |
2468 | s_singleton = this; | |
2469 | } | |
2470 | ~ebcdic_execution_charset () | |
2471 | { | |
2472 | gcc_assert (s_singleton == this); | |
2473 | s_singleton = NULL; | |
2474 | } | |
2475 | ||
ff171cb1 | 2476 | void apply (lexer_test &test) final override |
88fa5555 DM |
2477 | { |
2478 | cpp_options *cpp_opts = cpp_get_options (test.m_parser); | |
2479 | cpp_opts->narrow_charset = "IBM1047"; | |
2480 | ||
2481 | cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser); | |
c24300ba | 2482 | callbacks->diagnostic = on_diagnostic; |
88fa5555 DM |
2483 | } |
2484 | ||
c24300ba DM |
2485 | static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED, |
2486 | enum cpp_diagnostic_level level ATTRIBUTE_UNUSED, | |
2487 | enum cpp_warning_reason reason ATTRIBUTE_UNUSED, | |
2488 | rich_location *richloc ATTRIBUTE_UNUSED, | |
2489 | const char *msgid, va_list *ap ATTRIBUTE_UNUSED) | |
88fa5555 DM |
2490 | ATTRIBUTE_FPTR_PRINTF(5,0) |
2491 | { | |
2492 | gcc_assert (s_singleton); | |
a7085816 JJ |
2493 | /* Avoid exgettext from picking this up, it is translated in libcpp. */ |
2494 | const char *msg = "conversion from %s to %s not supported by iconv"; | |
2495 | #ifdef ENABLE_NLS | |
2496 | msg = dgettext ("cpplib", msg); | |
2497 | #endif | |
e53b6e56 | 2498 | /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc |
88fa5555 | 2499 | when the local iconv build doesn't support the conversion. */ |
a7085816 | 2500 | if (strcmp (msgid, msg) == 0) |
88fa5555 DM |
2501 | { |
2502 | s_singleton->m_num_iconv_errors++; | |
2503 | return true; | |
2504 | } | |
2505 | ||
2506 | /* Otherwise, we have an unexpected error. */ | |
2507 | abort (); | |
2508 | } | |
2509 | ||
2510 | bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; } | |
2511 | ||
2512 | private: | |
2513 | static ebcdic_execution_charset *s_singleton; | |
2514 | int m_num_iconv_errors; | |
2515 | }; | |
2516 | ||
2517 | ebcdic_execution_charset *ebcdic_execution_charset::s_singleton; | |
2518 | ||
c24300ba | 2519 | /* A lexer_test_options subclass that records a list of diagnostic |
a3998c2f DM |
2520 | messages emitted by the lexer. */ |
2521 | ||
c24300ba | 2522 | class lexer_diagnostic_sink : public lexer_test_options |
a3998c2f DM |
2523 | { |
2524 | public: | |
c24300ba | 2525 | lexer_diagnostic_sink () |
a3998c2f DM |
2526 | { |
2527 | gcc_assert (s_singleton == NULL); | |
2528 | s_singleton = this; | |
2529 | } | |
c24300ba | 2530 | ~lexer_diagnostic_sink () |
a3998c2f DM |
2531 | { |
2532 | gcc_assert (s_singleton == this); | |
2533 | s_singleton = NULL; | |
2534 | ||
2535 | int i; | |
2536 | char *str; | |
c24300ba | 2537 | FOR_EACH_VEC_ELT (m_diagnostics, i, str) |
a3998c2f DM |
2538 | free (str); |
2539 | } | |
2540 | ||
ff171cb1 | 2541 | void apply (lexer_test &test) final override |
a3998c2f DM |
2542 | { |
2543 | cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser); | |
c24300ba | 2544 | callbacks->diagnostic = on_diagnostic; |
a3998c2f DM |
2545 | } |
2546 | ||
c24300ba DM |
2547 | static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED, |
2548 | enum cpp_diagnostic_level level ATTRIBUTE_UNUSED, | |
2549 | enum cpp_warning_reason reason ATTRIBUTE_UNUSED, | |
2550 | rich_location *richloc ATTRIBUTE_UNUSED, | |
2551 | const char *msgid, va_list *ap) | |
a3998c2f DM |
2552 | ATTRIBUTE_FPTR_PRINTF(5,0) |
2553 | { | |
2554 | char *msg = xvasprintf (msgid, *ap); | |
c24300ba | 2555 | s_singleton->m_diagnostics.safe_push (msg); |
a3998c2f DM |
2556 | return true; |
2557 | } | |
2558 | ||
c24300ba | 2559 | auto_vec<char *> m_diagnostics; |
a3998c2f DM |
2560 | |
2561 | private: | |
c24300ba | 2562 | static lexer_diagnostic_sink *s_singleton; |
a3998c2f DM |
2563 | }; |
2564 | ||
c24300ba | 2565 | lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton; |
a3998c2f | 2566 | |
88fa5555 DM |
2567 | /* Constructor. Override line_table with a new instance based on CASE_, |
2568 | and write CONTENT to a tempfile. Create a cpp_reader, and use it to | |
2569 | start parsing the tempfile. */ | |
2570 | ||
2571 | lexer_test::lexer_test (const line_table_case &case_, const char *content, | |
f5ea989d DM |
2572 | lexer_test_options *options) |
2573 | : m_ltt (case_), | |
2574 | m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)), | |
88fa5555 DM |
2575 | /* Create a tempfile and write the text to it. */ |
2576 | m_tempfile (SELFTEST_LOCATION, ".c", content), | |
a3998c2f DM |
2577 | m_concats (), |
2578 | m_implicitly_expect_EOF (true) | |
88fa5555 DM |
2579 | { |
2580 | if (options) | |
2581 | options->apply (*this); | |
2582 | ||
2583 | cpp_init_iconv (m_parser); | |
2584 | ||
2585 | /* Parse the file. */ | |
2586 | const char *fname = cpp_read_main_file (m_parser, | |
2587 | m_tempfile.get_filename ()); | |
2588 | ASSERT_NE (fname, NULL); | |
2589 | } | |
2590 | ||
a3998c2f | 2591 | /* Destructor. By default, verify that the next token in m_parser is EOF. */ |
88fa5555 DM |
2592 | |
2593 | lexer_test::~lexer_test () | |
2594 | { | |
2595 | location_t loc; | |
2596 | const cpp_token *tok; | |
2597 | ||
a3998c2f DM |
2598 | if (m_implicitly_expect_EOF) |
2599 | { | |
2600 | tok = cpp_get_token_with_location (m_parser, &loc); | |
2601 | ASSERT_NE (tok, NULL); | |
2602 | ASSERT_EQ (tok->type, CPP_EOF); | |
2603 | } | |
88fa5555 DM |
2604 | } |
2605 | ||
2606 | /* Get the next token from m_parser. */ | |
2607 | ||
2608 | const cpp_token * | |
2609 | lexer_test::get_token () | |
2610 | { | |
2611 | location_t loc; | |
2612 | const cpp_token *tok; | |
2613 | ||
2614 | tok = cpp_get_token_with_location (m_parser, &loc); | |
2615 | ASSERT_NE (tok, NULL); | |
2616 | return tok; | |
2617 | } | |
2618 | ||
2619 | /* Verify that locations within string literals are correctly handled. */ | |
2620 | ||
2621 | /* Verify get_source_range_for_substring for token(s) at STRLOC, | |
2622 | using the string concatenation database for TEST. | |
2623 | ||
2624 | Assert that the character at index IDX is on EXPECTED_LINE, | |
2625 | and that it begins at column EXPECTED_START_COL and ends at | |
2626 | EXPECTED_FINISH_COL (unless the locations are beyond | |
2627 | LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their | |
2628 | columns). */ | |
2629 | ||
2630 | static void | |
2631 | assert_char_at_range (const location &loc, | |
2632 | lexer_test& test, | |
2633 | location_t strloc, enum cpp_ttype type, int idx, | |
2634 | int expected_line, int expected_start_col, | |
2635 | int expected_finish_col) | |
2636 | { | |
2637 | cpp_reader *pfile = test.m_parser; | |
2638 | string_concat_db *concats = &test.m_concats; | |
2639 | ||
a954833d | 2640 | source_range actual_range = source_range(); |
88fa5555 | 2641 | const char *err |
1bdd665a DM |
2642 | = get_source_range_for_char (pfile, test.m_file_cache, |
2643 | concats, strloc, type, idx, | |
65e736c0 | 2644 | &actual_range); |
88fa5555 DM |
2645 | if (should_have_column_data_p (strloc)) |
2646 | ASSERT_EQ_AT (loc, NULL, err); | |
2647 | else | |
2648 | { | |
2649 | ASSERT_STREQ_AT (loc, | |
2650 | "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", | |
2651 | err); | |
2652 | return; | |
2653 | } | |
2654 | ||
2655 | int actual_start_line = LOCATION_LINE (actual_range.m_start); | |
2656 | ASSERT_EQ_AT (loc, expected_line, actual_start_line); | |
2657 | int actual_finish_line = LOCATION_LINE (actual_range.m_finish); | |
2658 | ASSERT_EQ_AT (loc, expected_line, actual_finish_line); | |
2659 | ||
2660 | if (should_have_column_data_p (actual_range.m_start)) | |
2661 | { | |
2662 | int actual_start_col = LOCATION_COLUMN (actual_range.m_start); | |
2663 | ASSERT_EQ_AT (loc, expected_start_col, actual_start_col); | |
2664 | } | |
2665 | if (should_have_column_data_p (actual_range.m_finish)) | |
2666 | { | |
2667 | int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish); | |
2668 | ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col); | |
2669 | } | |
2670 | } | |
2671 | ||
2672 | /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for | |
2673 | the effective location of any errors. */ | |
2674 | ||
2675 | #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \ | |
2676 | EXPECTED_START_COL, EXPECTED_FINISH_COL) \ | |
2677 | assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \ | |
2678 | (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \ | |
2679 | (EXPECTED_FINISH_COL)) | |
2680 | ||
2681 | /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC, | |
2682 | using the string concatenation database for TEST. | |
2683 | ||
2684 | Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */ | |
2685 | ||
2686 | static void | |
2687 | assert_num_substring_ranges (const location &loc, | |
2688 | lexer_test& test, | |
2689 | location_t strloc, | |
2690 | enum cpp_ttype type, | |
2691 | int expected_num_ranges) | |
2692 | { | |
2693 | cpp_reader *pfile = test.m_parser; | |
2694 | string_concat_db *concats = &test.m_concats; | |
2695 | ||
0e06d2b3 | 2696 | int actual_num_ranges = -1; |
88fa5555 | 2697 | const char *err |
1bdd665a DM |
2698 | = get_num_source_ranges_for_substring (pfile, test.m_file_cache, |
2699 | concats, strloc, type, | |
88fa5555 DM |
2700 | &actual_num_ranges); |
2701 | if (should_have_column_data_p (strloc)) | |
2702 | ASSERT_EQ_AT (loc, NULL, err); | |
2703 | else | |
2704 | { | |
2705 | ASSERT_STREQ_AT (loc, | |
2706 | "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", | |
2707 | err); | |
2708 | return; | |
2709 | } | |
2710 | ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges); | |
2711 | } | |
2712 | ||
2713 | /* Macro for calling assert_num_substring_ranges, supplying | |
2714 | SELFTEST_LOCATION for the effective location of any errors. */ | |
2715 | ||
2716 | #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \ | |
2717 | EXPECTED_NUM_RANGES) \ | |
2718 | assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \ | |
2719 | (TYPE), (EXPECTED_NUM_RANGES)) | |
2720 | ||
2721 | ||
2722 | /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC | |
2723 | returns an error (using the string concatenation database for TEST). */ | |
2724 | ||
2725 | static void | |
2726 | assert_has_no_substring_ranges (const location &loc, | |
2727 | lexer_test& test, | |
2728 | location_t strloc, | |
2729 | enum cpp_ttype type, | |
2730 | const char *expected_err) | |
2731 | { | |
2732 | cpp_reader *pfile = test.m_parser; | |
2733 | string_concat_db *concats = &test.m_concats; | |
2734 | cpp_substring_ranges ranges; | |
2735 | const char *actual_err | |
1bdd665a | 2736 | = get_substring_ranges_for_loc (pfile, test.m_file_cache, concats, strloc, |
88fa5555 DM |
2737 | type, ranges); |
2738 | if (should_have_column_data_p (strloc)) | |
2739 | ASSERT_STREQ_AT (loc, expected_err, actual_err); | |
2740 | else | |
2741 | ASSERT_STREQ_AT (loc, | |
2742 | "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", | |
2743 | actual_err); | |
2744 | } | |
2745 | ||
2746 | #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \ | |
2747 | assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \ | |
2748 | (STRLOC), (TYPE), (ERR)) | |
2749 | ||
2750 | /* Lex a simple string literal. Verify the substring location data, before | |
2751 | and after running cpp_interpret_string on it. */ | |
2752 | ||
2753 | static void | |
2754 | test_lexer_string_locations_simple (const line_table_case &case_) | |
2755 | { | |
2756 | /* Digits 0-9 (with 0 at column 10), the simple way. | |
2757 | ....................000000000.11111111112.2222222223333333333 | |
2758 | ....................123456789.01234567890.1234567890123456789 | |
2759 | We add a trailing comment to ensure that we correctly locate | |
2760 | the end of the string literal token. */ | |
2761 | const char *content = " \"0123456789\" /* not a string */\n"; | |
2762 | lexer_test test (case_, content, NULL); | |
2763 | ||
2764 | /* Verify that we get the expected token back, with the correct | |
2765 | location information. */ | |
2766 | const cpp_token *tok = test.get_token (); | |
2767 | ASSERT_EQ (tok->type, CPP_STRING); | |
2768 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\""); | |
2769 | ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20); | |
2770 | ||
2771 | /* At this point in lexing, the quote characters are treated as part of | |
2772 | the string (they are stripped off by cpp_interpret_string). */ | |
2773 | ||
2774 | ASSERT_EQ (tok->val.str.len, 12); | |
2775 | ||
2776 | /* Verify that cpp_interpret_string works. */ | |
2777 | cpp_string dst_string; | |
2778 | const enum cpp_ttype type = CPP_STRING; | |
2779 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2780 | &dst_string, type); | |
2781 | ASSERT_TRUE (result); | |
2782 | ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
2783 | free (const_cast <unsigned char *> (dst_string.text)); | |
2784 | ||
2785 | /* Verify ranges of individual characters. This no longer includes the | |
bbd6fcf3 DM |
2786 | opening quote, but does include the closing quote. */ |
2787 | for (int i = 0; i <= 10; i++) | |
88fa5555 DM |
2788 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, |
2789 | 10 + i, 10 + i); | |
2790 | ||
bbd6fcf3 | 2791 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); |
88fa5555 DM |
2792 | } |
2793 | ||
2794 | /* As test_lexer_string_locations_simple, but use an EBCDIC execution | |
2795 | encoding. */ | |
2796 | ||
2797 | static void | |
2798 | test_lexer_string_locations_ebcdic (const line_table_case &case_) | |
2799 | { | |
2800 | /* EBCDIC support requires iconv. */ | |
2801 | if (!HAVE_ICONV) | |
2802 | return; | |
2803 | ||
2804 | /* Digits 0-9 (with 0 at column 10), the simple way. | |
2805 | ....................000000000.11111111112.2222222223333333333 | |
2806 | ....................123456789.01234567890.1234567890123456789 | |
2807 | We add a trailing comment to ensure that we correctly locate | |
2808 | the end of the string literal token. */ | |
2809 | const char *content = " \"0123456789\" /* not a string */\n"; | |
2810 | ebcdic_execution_charset use_ebcdic; | |
2811 | lexer_test test (case_, content, &use_ebcdic); | |
2812 | ||
2813 | /* Verify that we get the expected token back, with the correct | |
2814 | location information. */ | |
2815 | const cpp_token *tok = test.get_token (); | |
2816 | ASSERT_EQ (tok->type, CPP_STRING); | |
2817 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\""); | |
2818 | ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20); | |
2819 | ||
2820 | /* At this point in lexing, the quote characters are treated as part of | |
2821 | the string (they are stripped off by cpp_interpret_string). */ | |
2822 | ||
2823 | ASSERT_EQ (tok->val.str.len, 12); | |
2824 | ||
2825 | /* The remainder of the test requires an iconv implementation that | |
2826 | can convert from UTF-8 to the EBCDIC encoding requested above. */ | |
2827 | if (use_ebcdic.iconv_errors_occurred_p ()) | |
2828 | return; | |
2829 | ||
2830 | /* Verify that cpp_interpret_string works. */ | |
2831 | cpp_string dst_string; | |
2832 | const enum cpp_ttype type = CPP_STRING; | |
2833 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2834 | &dst_string, type); | |
2835 | ASSERT_TRUE (result); | |
2836 | /* We should now have EBCDIC-encoded text, specifically | |
2837 | IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047"). | |
2838 | The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */ | |
2839 | ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9", | |
2840 | (const char *)dst_string.text); | |
2841 | free (const_cast <unsigned char *> (dst_string.text)); | |
2842 | ||
2843 | /* Verify that we don't attempt to record substring location information | |
2844 | for such cases. */ | |
2845 | ASSERT_HAS_NO_SUBSTRING_RANGES | |
2846 | (test, tok->src_loc, type, | |
2847 | "execution character set != source character set"); | |
2848 | } | |
2849 | ||
2850 | /* Lex a string literal containing a hex-escaped character. | |
2851 | Verify the substring location data, before and after running | |
2852 | cpp_interpret_string on it. */ | |
2853 | ||
2854 | static void | |
2855 | test_lexer_string_locations_hex (const line_table_case &case_) | |
2856 | { | |
2857 | /* Digits 0-9, expressing digit 5 in ASCII as "\x35" | |
2858 | and with a space in place of digit 6, to terminate the escaped | |
2859 | hex code. | |
2860 | ....................000000000.111111.11112222. | |
2861 | ....................123456789.012345.67890123. */ | |
2862 | const char *content = " \"01234\\x35 789\"\n"; | |
2863 | lexer_test test (case_, content, NULL); | |
2864 | ||
2865 | /* Verify that we get the expected token back, with the correct | |
2866 | location information. */ | |
2867 | const cpp_token *tok = test.get_token (); | |
2868 | ASSERT_EQ (tok->type, CPP_STRING); | |
2869 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\""); | |
2870 | ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23); | |
2871 | ||
2872 | /* At this point in lexing, the quote characters are treated as part of | |
2873 | the string (they are stripped off by cpp_interpret_string). */ | |
2874 | ASSERT_EQ (tok->val.str.len, 15); | |
2875 | ||
2876 | /* Verify that cpp_interpret_string works. */ | |
2877 | cpp_string dst_string; | |
2878 | const enum cpp_ttype type = CPP_STRING; | |
2879 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2880 | &dst_string, type); | |
2881 | ASSERT_TRUE (result); | |
2882 | ASSERT_STREQ ("012345 789", (const char *)dst_string.text); | |
2883 | free (const_cast <unsigned char *> (dst_string.text)); | |
2884 | ||
2885 | /* Verify ranges of individual characters. This no longer includes the | |
bbd6fcf3 | 2886 | opening quote, but does include the closing quote. */ |
88fa5555 DM |
2887 | for (int i = 0; i <= 4; i++) |
2888 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
2889 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18); | |
bbd6fcf3 | 2890 | for (int i = 6; i <= 10; i++) |
88fa5555 DM |
2891 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i); |
2892 | ||
bbd6fcf3 | 2893 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); |
88fa5555 DM |
2894 | } |
2895 | ||
2896 | /* Lex a string literal containing an octal-escaped character. | |
2897 | Verify the substring location data after running cpp_interpret_string | |
2898 | on it. */ | |
2899 | ||
2900 | static void | |
2901 | test_lexer_string_locations_oct (const line_table_case &case_) | |
2902 | { | |
2903 | /* Digits 0-9, expressing digit 5 in ASCII as "\065" | |
2904 | and with a space in place of digit 6, to terminate the escaped | |
2905 | octal code. | |
2906 | ....................000000000.111111.11112222.2222223333333333444 | |
2907 | ....................123456789.012345.67890123.4567890123456789012 */ | |
2908 | const char *content = " \"01234\\065 789\" /* not a string */\n"; | |
2909 | lexer_test test (case_, content, NULL); | |
2910 | ||
2911 | /* Verify that we get the expected token back, with the correct | |
2912 | location information. */ | |
2913 | const cpp_token *tok = test.get_token (); | |
2914 | ASSERT_EQ (tok->type, CPP_STRING); | |
2915 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\""); | |
2916 | ||
2917 | /* Verify that cpp_interpret_string works. */ | |
2918 | cpp_string dst_string; | |
2919 | const enum cpp_ttype type = CPP_STRING; | |
2920 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
2921 | &dst_string, type); | |
2922 | ASSERT_TRUE (result); | |
2923 | ASSERT_STREQ ("012345 789", (const char *)dst_string.text); | |
2924 | free (const_cast <unsigned char *> (dst_string.text)); | |
2925 | ||
2926 | /* Verify ranges of individual characters. This no longer includes the | |
bbd6fcf3 | 2927 | opening quote, but does include the closing quote. */ |
88fa5555 DM |
2928 | for (int i = 0; i < 5; i++) |
2929 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
2930 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18); | |
bbd6fcf3 | 2931 | for (int i = 6; i <= 10; i++) |
88fa5555 DM |
2932 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i); |
2933 | ||
bbd6fcf3 | 2934 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); |
88fa5555 DM |
2935 | } |
2936 | ||
2937 | /* Test of string literal containing letter escapes. */ | |
2938 | ||
2939 | static void | |
2940 | test_lexer_string_locations_letter_escape_1 (const line_table_case &case_) | |
2941 | { | |
2942 | /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar. | |
2943 | .....................000000000.1.11111.1.1.11222.22222223333333 | |
2944 | .....................123456789.0.12345.6.7.89012.34567890123456. */ | |
2945 | const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n"); | |
2946 | lexer_test test (case_, content, NULL); | |
2947 | ||
2948 | /* Verify that we get the expected tokens back. */ | |
2949 | const cpp_token *tok = test.get_token (); | |
2950 | ASSERT_EQ (tok->type, CPP_STRING); | |
2951 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\""); | |
2952 | ||
2953 | /* Verify ranges of individual characters. */ | |
2954 | /* "\t". */ | |
2955 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2956 | 0, 1, 10, 11); | |
2957 | /* "foo". */ | |
2958 | for (int i = 1; i <= 3; i++) | |
2959 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2960 | i, 1, 11 + i, 11 + i); | |
2961 | /* "\\" and "\n". */ | |
2962 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2963 | 4, 1, 15, 16); | |
2964 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2965 | 5, 1, 17, 18); | |
2966 | ||
bbd6fcf3 DM |
2967 | /* "bar" and closing quote for nul-terminator. */ |
2968 | for (int i = 6; i <= 9; i++) | |
88fa5555 DM |
2969 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
2970 | i, 1, 13 + i, 13 + i); | |
2971 | ||
bbd6fcf3 | 2972 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10); |
88fa5555 DM |
2973 | } |
2974 | ||
2975 | /* Another test of a string literal containing a letter escape. | |
2976 | Based on string seen in | |
2977 | printf ("%-%\n"); | |
2978 | in gcc.dg/format/c90-printf-1.c. */ | |
2979 | ||
2980 | static void | |
2981 | test_lexer_string_locations_letter_escape_2 (const line_table_case &case_) | |
2982 | { | |
2983 | /* .....................000000000.1111.11.1111.22222222223. | |
2984 | .....................123456789.0123.45.6789.01234567890. */ | |
2985 | const char *content = (" \"%-%\\n\" /* non-str */\n"); | |
2986 | lexer_test test (case_, content, NULL); | |
2987 | ||
2988 | /* Verify that we get the expected tokens back. */ | |
2989 | const cpp_token *tok = test.get_token (); | |
2990 | ASSERT_EQ (tok->type, CPP_STRING); | |
2991 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\""); | |
2992 | ||
2993 | /* Verify ranges of individual characters. */ | |
2994 | /* "%-%". */ | |
2995 | for (int i = 0; i < 3; i++) | |
2996 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
2997 | i, 1, 10 + i, 10 + i); | |
2998 | /* "\n". */ | |
2999 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
3000 | 3, 1, 13, 14); | |
3001 | ||
bbd6fcf3 DM |
3002 | /* Closing quote for nul-terminator. */ |
3003 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
3004 | 4, 1, 15, 15); | |
3005 | ||
3006 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5); | |
88fa5555 DM |
3007 | } |
3008 | ||
3009 | /* Lex a string literal containing UCN 4 characters. | |
3010 | Verify the substring location data after running cpp_interpret_string | |
3011 | on it. */ | |
3012 | ||
3013 | static void | |
3014 | test_lexer_string_locations_ucn4 (const line_table_case &case_) | |
3015 | { | |
3016 | /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed | |
3017 | as UCN 4. | |
3018 | ....................000000000.111111.111122.222222223.33333333344444 | |
3019 | ....................123456789.012345.678901.234567890.12345678901234 */ | |
3020 | const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n"; | |
3021 | lexer_test test (case_, content, NULL); | |
3022 | ||
3023 | /* Verify that we get the expected token back, with the correct | |
3024 | location information. */ | |
3025 | const cpp_token *tok = test.get_token (); | |
3026 | ASSERT_EQ (tok->type, CPP_STRING); | |
3027 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\""); | |
3028 | ||
3029 | /* Verify that cpp_interpret_string works. | |
3030 | The string should be encoded in the execution character | |
700d4cb0 | 3031 | set. Assuming that is UTF-8, we should have the following: |
88fa5555 DM |
3032 | ----------- ---- ----- ------- ---------------- |
3033 | Byte offset Byte Octal Unicode Source Column(s) | |
3034 | ----------- ---- ----- ------- ---------------- | |
3035 | 0 0x30 '0' 10 | |
3036 | 1 0x31 '1' 11 | |
3037 | 2 0x32 '2' 12 | |
3038 | 3 0x33 '3' 13 | |
3039 | 4 0x34 '4' 14 | |
3040 | 5 0xE2 \342 U+2174 15-20 | |
3041 | 6 0x85 \205 (cont) 15-20 | |
3042 | 7 0xB4 \264 (cont) 15-20 | |
3043 | 8 0xE2 \342 U+2175 21-26 | |
3044 | 9 0x85 \205 (cont) 21-26 | |
3045 | 10 0xB5 \265 (cont) 21-26 | |
3046 | 11 0x37 '7' 27 | |
3047 | 12 0x38 '8' 28 | |
3048 | 13 0x39 '9' 29 | |
bbd6fcf3 | 3049 | 14 0x00 30 (closing quote) |
88fa5555 DM |
3050 | ----------- ---- ----- ------- ---------------. */ |
3051 | ||
3052 | cpp_string dst_string; | |
3053 | const enum cpp_ttype type = CPP_STRING; | |
3054 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
3055 | &dst_string, type); | |
3056 | ASSERT_TRUE (result); | |
3057 | ASSERT_STREQ ("01234\342\205\264\342\205\265789", | |
3058 | (const char *)dst_string.text); | |
3059 | free (const_cast <unsigned char *> (dst_string.text)); | |
3060 | ||
3061 | /* Verify ranges of individual characters. This no longer includes the | |
bbd6fcf3 | 3062 | opening quote, but does include the closing quote. |
88fa5555 DM |
3063 | '01234'. */ |
3064 | for (int i = 0; i <= 4; i++) | |
3065 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
3066 | /* U+2174. */ | |
3067 | for (int i = 5; i <= 7; i++) | |
3068 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20); | |
3069 | /* U+2175. */ | |
3070 | for (int i = 8; i <= 10; i++) | |
3071 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26); | |
bbd6fcf3 DM |
3072 | /* '789' and nul terminator */ |
3073 | for (int i = 11; i <= 14; i++) | |
88fa5555 DM |
3074 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i); |
3075 | ||
bbd6fcf3 | 3076 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15); |
88fa5555 DM |
3077 | } |
3078 | ||
3079 | /* Lex a string literal containing UCN 8 characters. | |
3080 | Verify the substring location data after running cpp_interpret_string | |
3081 | on it. */ | |
3082 | ||
3083 | static void | |
3084 | test_lexer_string_locations_ucn8 (const line_table_case &case_) | |
3085 | { | |
3086 | /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8. | |
3087 | ....................000000000.111111.1111222222.2222333333333.344444 | |
3088 | ....................123456789.012345.6789012345.6789012345678.901234 */ | |
3089 | const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n"; | |
3090 | lexer_test test (case_, content, NULL); | |
3091 | ||
3092 | /* Verify that we get the expected token back, with the correct | |
3093 | location information. */ | |
3094 | const cpp_token *tok = test.get_token (); | |
3095 | ASSERT_EQ (tok->type, CPP_STRING); | |
3096 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, | |
3097 | "\"01234\\U00002174\\U00002175789\""); | |
3098 | ||
3099 | /* Verify that cpp_interpret_string works. | |
3100 | The UTF-8 encoding of the string is identical to that from | |
3101 | the ucn4 testcase above; the only difference is the column | |
3102 | locations. */ | |
3103 | cpp_string dst_string; | |
3104 | const enum cpp_ttype type = CPP_STRING; | |
3105 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
3106 | &dst_string, type); | |
3107 | ASSERT_TRUE (result); | |
3108 | ASSERT_STREQ ("01234\342\205\264\342\205\265789", | |
3109 | (const char *)dst_string.text); | |
3110 | free (const_cast <unsigned char *> (dst_string.text)); | |
3111 | ||
3112 | /* Verify ranges of individual characters. This no longer includes the | |
bbd6fcf3 | 3113 | opening quote, but does include the closing quote. |
88fa5555 DM |
3114 | '01234'. */ |
3115 | for (int i = 0; i <= 4; i++) | |
3116 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
3117 | /* U+2174. */ | |
3118 | for (int i = 5; i <= 7; i++) | |
3119 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24); | |
3120 | /* U+2175. */ | |
3121 | for (int i = 8; i <= 10; i++) | |
3122 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34); | |
3123 | /* '789' at columns 35-37 */ | |
3124 | for (int i = 11; i <= 13; i++) | |
3125 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i); | |
bbd6fcf3 DM |
3126 | /* Closing quote/nul-terminator at column 38. */ |
3127 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38); | |
88fa5555 | 3128 | |
bbd6fcf3 | 3129 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15); |
88fa5555 DM |
3130 | } |
3131 | ||
3132 | /* Fetch a big-endian 32-bit value and convert to host endianness. */ | |
3133 | ||
3134 | static uint32_t | |
3135 | uint32_from_big_endian (const uint32_t *ptr_be_value) | |
3136 | { | |
3137 | const unsigned char *buf = (const unsigned char *)ptr_be_value; | |
3138 | return (((uint32_t) buf[0] << 24) | |
3139 | | ((uint32_t) buf[1] << 16) | |
3140 | | ((uint32_t) buf[2] << 8) | |
3141 | | (uint32_t) buf[3]); | |
3142 | } | |
3143 | ||
3144 | /* Lex a wide string literal and verify that attempts to read substring | |
3145 | location data from it fail gracefully. */ | |
3146 | ||
3147 | static void | |
3148 | test_lexer_string_locations_wide_string (const line_table_case &case_) | |
3149 | { | |
3150 | /* Digits 0-9. | |
3151 | ....................000000000.11111111112.22222222233333 | |
3152 | ....................123456789.01234567890.12345678901234 */ | |
3153 | const char *content = " L\"0123456789\" /* non-str */\n"; | |
3154 | lexer_test test (case_, content, NULL); | |
3155 | ||
3156 | /* Verify that we get the expected token back, with the correct | |
3157 | location information. */ | |
3158 | const cpp_token *tok = test.get_token (); | |
3159 | ASSERT_EQ (tok->type, CPP_WSTRING); | |
3160 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\""); | |
3161 | ||
3162 | /* Verify that cpp_interpret_string works, using CPP_WSTRING. */ | |
3163 | cpp_string dst_string; | |
3164 | const enum cpp_ttype type = CPP_WSTRING; | |
3165 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
3166 | &dst_string, type); | |
3167 | ASSERT_TRUE (result); | |
3168 | /* The cpp_reader defaults to big-endian with | |
3169 | CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should | |
3170 | now be encoded as UTF-32BE. */ | |
3171 | const uint32_t *be32_chars = (const uint32_t *)dst_string.text; | |
3172 | ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0])); | |
3173 | ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5])); | |
3174 | ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9])); | |
3175 | ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10])); | |
3176 | free (const_cast <unsigned char *> (dst_string.text)); | |
3177 | ||
3178 | /* We don't yet support generating substring location information | |
3179 | for L"" strings. */ | |
3180 | ASSERT_HAS_NO_SUBSTRING_RANGES | |
3181 | (test, tok->src_loc, type, | |
3182 | "execution character set != source character set"); | |
3183 | } | |
3184 | ||
3185 | /* Fetch a big-endian 16-bit value and convert to host endianness. */ | |
3186 | ||
3187 | static uint16_t | |
3188 | uint16_from_big_endian (const uint16_t *ptr_be_value) | |
3189 | { | |
3190 | const unsigned char *buf = (const unsigned char *)ptr_be_value; | |
3191 | return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1]; | |
3192 | } | |
3193 | ||
3194 | /* Lex a u"" string literal and verify that attempts to read substring | |
3195 | location data from it fail gracefully. */ | |
3196 | ||
3197 | static void | |
3198 | test_lexer_string_locations_string16 (const line_table_case &case_) | |
3199 | { | |
3200 | /* Digits 0-9. | |
3201 | ....................000000000.11111111112.22222222233333 | |
3202 | ....................123456789.01234567890.12345678901234 */ | |
3203 | const char *content = " u\"0123456789\" /* non-str */\n"; | |
3204 | lexer_test test (case_, content, NULL); | |
3205 | ||
3206 | /* Verify that we get the expected token back, with the correct | |
3207 | location information. */ | |
3208 | const cpp_token *tok = test.get_token (); | |
3209 | ASSERT_EQ (tok->type, CPP_STRING16); | |
3210 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\""); | |
3211 | ||
3212 | /* Verify that cpp_interpret_string works, using CPP_STRING16. */ | |
3213 | cpp_string dst_string; | |
3214 | const enum cpp_ttype type = CPP_STRING16; | |
3215 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
3216 | &dst_string, type); | |
3217 | ASSERT_TRUE (result); | |
3218 | ||
3219 | /* The cpp_reader defaults to big-endian, so dst_string should | |
3220 | now be encoded as UTF-16BE. */ | |
3221 | const uint16_t *be16_chars = (const uint16_t *)dst_string.text; | |
3222 | ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0])); | |
3223 | ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5])); | |
3224 | ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9])); | |
3225 | ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10])); | |
3226 | free (const_cast <unsigned char *> (dst_string.text)); | |
3227 | ||
3228 | /* We don't yet support generating substring location information | |
3229 | for L"" strings. */ | |
3230 | ASSERT_HAS_NO_SUBSTRING_RANGES | |
3231 | (test, tok->src_loc, type, | |
3232 | "execution character set != source character set"); | |
3233 | } | |
3234 | ||
3235 | /* Lex a U"" string literal and verify that attempts to read substring | |
3236 | location data from it fail gracefully. */ | |
3237 | ||
3238 | static void | |
3239 | test_lexer_string_locations_string32 (const line_table_case &case_) | |
3240 | { | |
3241 | /* Digits 0-9. | |
3242 | ....................000000000.11111111112.22222222233333 | |
3243 | ....................123456789.01234567890.12345678901234 */ | |
3244 | const char *content = " U\"0123456789\" /* non-str */\n"; | |
3245 | lexer_test test (case_, content, NULL); | |
3246 | ||
3247 | /* Verify that we get the expected token back, with the correct | |
3248 | location information. */ | |
3249 | const cpp_token *tok = test.get_token (); | |
3250 | ASSERT_EQ (tok->type, CPP_STRING32); | |
3251 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\""); | |
3252 | ||
3253 | /* Verify that cpp_interpret_string works, using CPP_STRING32. */ | |
3254 | cpp_string dst_string; | |
3255 | const enum cpp_ttype type = CPP_STRING32; | |
3256 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
3257 | &dst_string, type); | |
3258 | ASSERT_TRUE (result); | |
3259 | ||
3260 | /* The cpp_reader defaults to big-endian, so dst_string should | |
3261 | now be encoded as UTF-32BE. */ | |
3262 | const uint32_t *be32_chars = (const uint32_t *)dst_string.text; | |
3263 | ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0])); | |
3264 | ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5])); | |
3265 | ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9])); | |
3266 | ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10])); | |
3267 | free (const_cast <unsigned char *> (dst_string.text)); | |
3268 | ||
3269 | /* We don't yet support generating substring location information | |
3270 | for L"" strings. */ | |
3271 | ASSERT_HAS_NO_SUBSTRING_RANGES | |
3272 | (test, tok->src_loc, type, | |
3273 | "execution character set != source character set"); | |
3274 | } | |
3275 | ||
3276 | /* Lex a u8-string literal. | |
3277 | Verify the substring location data after running cpp_interpret_string | |
3278 | on it. */ | |
3279 | ||
3280 | static void | |
3281 | test_lexer_string_locations_u8 (const line_table_case &case_) | |
3282 | { | |
3283 | /* Digits 0-9. | |
3284 | ....................000000000.11111111112.22222222233333 | |
3285 | ....................123456789.01234567890.12345678901234 */ | |
3286 | const char *content = " u8\"0123456789\" /* non-str */\n"; | |
3287 | lexer_test test (case_, content, NULL); | |
3288 | ||
3289 | /* Verify that we get the expected token back, with the correct | |
3290 | location information. */ | |
3291 | const cpp_token *tok = test.get_token (); | |
3292 | ASSERT_EQ (tok->type, CPP_UTF8STRING); | |
3293 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\""); | |
3294 | ||
3295 | /* Verify that cpp_interpret_string works. */ | |
3296 | cpp_string dst_string; | |
3297 | const enum cpp_ttype type = CPP_STRING; | |
3298 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
3299 | &dst_string, type); | |
3300 | ASSERT_TRUE (result); | |
3301 | ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
3302 | free (const_cast <unsigned char *> (dst_string.text)); | |
3303 | ||
3304 | /* Verify ranges of individual characters. This no longer includes the | |
bbd6fcf3 DM |
3305 | opening quote, but does include the closing quote. */ |
3306 | for (int i = 0; i <= 10; i++) | |
88fa5555 DM |
3307 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); |
3308 | } | |
3309 | ||
3310 | /* Lex a string literal containing UTF-8 source characters. | |
3311 | Verify the substring location data after running cpp_interpret_string | |
3312 | on it. */ | |
3313 | ||
3314 | static void | |
3315 | test_lexer_string_locations_utf8_source (const line_table_case &case_) | |
3316 | { | |
3317 | /* This string literal is written out to the source file as UTF-8, | |
3318 | and is of the form "before mojibake after", where "mojibake" | |
3319 | is written as the following four unicode code points: | |
3320 | U+6587 CJK UNIFIED IDEOGRAPH-6587 | |
3321 | U+5B57 CJK UNIFIED IDEOGRAPH-5B57 | |
3322 | U+5316 CJK UNIFIED IDEOGRAPH-5316 | |
3323 | U+3051 HIRAGANA LETTER KE. | |
3324 | Each of these is 3 bytes wide when encoded in UTF-8, whereas the | |
3325 | "before" and "after" are 1 byte per unicode character. | |
3326 | ||
3327 | The numbering shown are "columns", which are *byte* numbers within | |
3328 | the line, rather than unicode character numbers. | |
3329 | ||
3330 | .................... 000000000.1111111. | |
3331 | .................... 123456789.0123456. */ | |
3332 | const char *content = (" \"before " | |
3333 | /* U+6587 CJK UNIFIED IDEOGRAPH-6587 | |
3334 | UTF-8: 0xE6 0x96 0x87 | |
3335 | C octal escaped UTF-8: \346\226\207 | |
3336 | "column" numbers: 17-19. */ | |
3337 | "\346\226\207" | |
3338 | ||
3339 | /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57 | |
3340 | UTF-8: 0xE5 0xAD 0x97 | |
3341 | C octal escaped UTF-8: \345\255\227 | |
3342 | "column" numbers: 20-22. */ | |
3343 | "\345\255\227" | |
3344 | ||
3345 | /* U+5316 CJK UNIFIED IDEOGRAPH-5316 | |
3346 | UTF-8: 0xE5 0x8C 0x96 | |
3347 | C octal escaped UTF-8: \345\214\226 | |
3348 | "column" numbers: 23-25. */ | |
3349 | "\345\214\226" | |
3350 | ||
3351 | /* U+3051 HIRAGANA LETTER KE | |
3352 | UTF-8: 0xE3 0x81 0x91 | |
3353 | C octal escaped UTF-8: \343\201\221 | |
3354 | "column" numbers: 26-28. */ | |
3355 | "\343\201\221" | |
3356 | ||
3357 | /* column numbers 29 onwards | |
3358 | 2333333.33334444444444 | |
3359 | 9012345.67890123456789. */ | |
3360 | " after\" /* non-str */\n"); | |
3361 | lexer_test test (case_, content, NULL); | |
3362 | ||
3363 | /* Verify that we get the expected token back, with the correct | |
3364 | location information. */ | |
3365 | const cpp_token *tok = test.get_token (); | |
3366 | ASSERT_EQ (tok->type, CPP_STRING); | |
3367 | ASSERT_TOKEN_AS_TEXT_EQ | |
3368 | (test.m_parser, tok, | |
3369 | "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\""); | |
3370 | ||
3371 | /* Verify that cpp_interpret_string works. */ | |
3372 | cpp_string dst_string; | |
3373 | const enum cpp_ttype type = CPP_STRING; | |
3374 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
3375 | &dst_string, type); | |
3376 | ASSERT_TRUE (result); | |
3377 | ASSERT_STREQ | |
3378 | ("before \346\226\207\345\255\227\345\214\226\343\201\221 after", | |
3379 | (const char *)dst_string.text); | |
3380 | free (const_cast <unsigned char *> (dst_string.text)); | |
3381 | ||
3382 | /* Verify ranges of individual characters. This no longer includes the | |
bbd6fcf3 | 3383 | opening quote, but does include the closing quote. |
88fa5555 | 3384 | Assuming that both source and execution encodings are UTF-8, we have |
bbd6fcf3 | 3385 | a run of 25 octets in each, plus the NUL terminator. */ |
88fa5555 DM |
3386 | for (int i = 0; i < 25; i++) |
3387 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); | |
bbd6fcf3 DM |
3388 | /* NUL-terminator should use the closing quote at column 35. */ |
3389 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35); | |
88fa5555 | 3390 | |
bbd6fcf3 | 3391 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26); |
88fa5555 DM |
3392 | } |
3393 | ||
3394 | /* Test of string literal concatenation. */ | |
3395 | ||
3396 | static void | |
3397 | test_lexer_string_locations_concatenation_1 (const line_table_case &case_) | |
3398 | { | |
3399 | /* Digits 0-9. | |
3400 | .....................000000000.111111.11112222222222 | |
3401 | .....................123456789.012345.67890123456789. */ | |
3402 | const char *content = (" \"01234\" /* non-str */\n" | |
3403 | " \"56789\" /* non-str */\n"); | |
3404 | lexer_test test (case_, content, NULL); | |
3405 | ||
3406 | location_t input_locs[2]; | |
3407 | ||
3408 | /* Verify that we get the expected tokens back. */ | |
3409 | auto_vec <cpp_string> input_strings; | |
3410 | const cpp_token *tok_a = test.get_token (); | |
3411 | ASSERT_EQ (tok_a->type, CPP_STRING); | |
3412 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\""); | |
3413 | input_strings.safe_push (tok_a->val.str); | |
3414 | input_locs[0] = tok_a->src_loc; | |
3415 | ||
3416 | const cpp_token *tok_b = test.get_token (); | |
3417 | ASSERT_EQ (tok_b->type, CPP_STRING); | |
3418 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\""); | |
3419 | input_strings.safe_push (tok_b->val.str); | |
3420 | input_locs[1] = tok_b->src_loc; | |
3421 | ||
3422 | /* Verify that cpp_interpret_string works. */ | |
3423 | cpp_string dst_string; | |
3424 | const enum cpp_ttype type = CPP_STRING; | |
3425 | bool result = cpp_interpret_string (test.m_parser, | |
3426 | input_strings.address (), 2, | |
3427 | &dst_string, type); | |
3428 | ASSERT_TRUE (result); | |
3429 | ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
3430 | free (const_cast <unsigned char *> (dst_string.text)); | |
3431 | ||
e53b6e56 | 3432 | /* Simulate c-lex.cc's lex_string in order to record concatenation. */ |
88fa5555 DM |
3433 | test.m_concats.record_string_concatenation (2, input_locs); |
3434 | ||
3435 | location_t initial_loc = input_locs[0]; | |
3436 | ||
bbd6fcf3 | 3437 | /* "01234" on line 1. */ |
88fa5555 DM |
3438 | for (int i = 0; i <= 4; i++) |
3439 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i); | |
bbd6fcf3 DM |
3440 | /* "56789" in line 2, plus its closing quote for the nul terminator. */ |
3441 | for (int i = 5; i <= 10; i++) | |
88fa5555 DM |
3442 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i); |
3443 | ||
bbd6fcf3 | 3444 | ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); |
88fa5555 DM |
3445 | } |
3446 | ||
3447 | /* Another test of string literal concatenation. */ | |
3448 | ||
3449 | static void | |
3450 | test_lexer_string_locations_concatenation_2 (const line_table_case &case_) | |
3451 | { | |
3452 | /* Digits 0-9. | |
3453 | .....................000000000.111.11111112222222 | |
3454 | .....................123456789.012.34567890123456. */ | |
3455 | const char *content = (" \"01\" /* non-str */\n" | |
3456 | " \"23\" /* non-str */\n" | |
3457 | " \"45\" /* non-str */\n" | |
3458 | " \"67\" /* non-str */\n" | |
3459 | " \"89\" /* non-str */\n"); | |
3460 | lexer_test test (case_, content, NULL); | |
3461 | ||
3462 | auto_vec <cpp_string> input_strings; | |
3463 | location_t input_locs[5]; | |
3464 | ||
3465 | /* Verify that we get the expected tokens back. */ | |
3466 | for (int i = 0; i < 5; i++) | |
3467 | { | |
3468 | const cpp_token *tok = test.get_token (); | |
3469 | ASSERT_EQ (tok->type, CPP_STRING); | |
3470 | input_strings.safe_push (tok->val.str); | |
3471 | input_locs[i] = tok->src_loc; | |
3472 | } | |
3473 | ||
3474 | /* Verify that cpp_interpret_string works. */ | |
3475 | cpp_string dst_string; | |
3476 | const enum cpp_ttype type = CPP_STRING; | |
3477 | bool result = cpp_interpret_string (test.m_parser, | |
3478 | input_strings.address (), 5, | |
3479 | &dst_string, type); | |
3480 | ASSERT_TRUE (result); | |
3481 | ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
3482 | free (const_cast <unsigned char *> (dst_string.text)); | |
3483 | ||
e53b6e56 | 3484 | /* Simulate c-lex.cc's lex_string in order to record concatenation. */ |
88fa5555 DM |
3485 | test.m_concats.record_string_concatenation (5, input_locs); |
3486 | ||
3487 | location_t initial_loc = input_locs[0]; | |
3488 | ||
3489 | /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can | |
3490 | detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS | |
3491 | and expect get_source_range_for_substring to fail. | |
3492 | However, for a string concatenation test, we can have a case | |
3493 | where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS, | |
3494 | but subsequent strings can be after it. | |
3495 | Attempting to detect this within assert_char_at_range | |
3496 | would overcomplicate the logic for the common test cases, so | |
3497 | we detect it here. */ | |
3498 | if (should_have_column_data_p (input_locs[0]) | |
3499 | && !should_have_column_data_p (input_locs[4])) | |
3500 | { | |
3501 | /* Verify that get_source_range_for_substring gracefully rejects | |
3502 | this case. */ | |
3503 | source_range actual_range; | |
3504 | const char *err | |
1bdd665a DM |
3505 | = get_source_range_for_char (test.m_parser, test.m_file_cache, |
3506 | &test.m_concats, | |
65e736c0 | 3507 | initial_loc, type, 0, &actual_range); |
88fa5555 DM |
3508 | ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err); |
3509 | return; | |
3510 | } | |
3511 | ||
3512 | for (int i = 0; i < 5; i++) | |
3513 | for (int j = 0; j < 2; j++) | |
3514 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j, | |
3515 | i + 1, 10 + j, 10 + j); | |
3516 | ||
bbd6fcf3 DM |
3517 | /* NUL-terminator should use the final closing quote at line 5 column 12. */ |
3518 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12); | |
3519 | ||
3520 | ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); | |
88fa5555 DM |
3521 | } |
3522 | ||
3523 | /* Another test of string literal concatenation, this time combined with | |
3524 | various kinds of escaped characters. */ | |
3525 | ||
3526 | static void | |
3527 | test_lexer_string_locations_concatenation_3 (const line_table_case &case_) | |
3528 | { | |
3529 | /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35" | |
3530 | digit 6 in ASCII as octal "\066", concatenating multiple strings. */ | |
3531 | const char *content | |
3532 | /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555 | |
3533 | .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */ | |
3534 | = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n"); | |
3535 | lexer_test test (case_, content, NULL); | |
3536 | ||
3537 | auto_vec <cpp_string> input_strings; | |
3538 | location_t input_locs[4]; | |
3539 | ||
3540 | /* Verify that we get the expected tokens back. */ | |
3541 | for (int i = 0; i < 4; i++) | |
3542 | { | |
3543 | const cpp_token *tok = test.get_token (); | |
3544 | ASSERT_EQ (tok->type, CPP_STRING); | |
3545 | input_strings.safe_push (tok->val.str); | |
3546 | input_locs[i] = tok->src_loc; | |
3547 | } | |
3548 | ||
3549 | /* Verify that cpp_interpret_string works. */ | |
3550 | cpp_string dst_string; | |
3551 | const enum cpp_ttype type = CPP_STRING; | |
3552 | bool result = cpp_interpret_string (test.m_parser, | |
3553 | input_strings.address (), 4, | |
3554 | &dst_string, type); | |
3555 | ASSERT_TRUE (result); | |
3556 | ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
3557 | free (const_cast <unsigned char *> (dst_string.text)); | |
3558 | ||
e53b6e56 | 3559 | /* Simulate c-lex.cc's lex_string in order to record concatenation. */ |
88fa5555 DM |
3560 | test.m_concats.record_string_concatenation (4, input_locs); |
3561 | ||
3562 | location_t initial_loc = input_locs[0]; | |
3563 | ||
3564 | for (int i = 0; i <= 4; i++) | |
3565 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i); | |
3566 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22); | |
3567 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30); | |
3568 | for (int i = 7; i <= 9; i++) | |
3569 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i); | |
3570 | ||
bbd6fcf3 DM |
3571 | /* NUL-terminator should use the location of the final closing quote. */ |
3572 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38); | |
3573 | ||
3574 | ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); | |
88fa5555 DM |
3575 | } |
3576 | ||
3577 | /* Test of string literal in a macro. */ | |
3578 | ||
3579 | static void | |
3580 | test_lexer_string_locations_macro (const line_table_case &case_) | |
3581 | { | |
3582 | /* Digits 0-9. | |
3583 | .....................0000000001111111111.22222222223. | |
3584 | .....................1234567890123456789.01234567890. */ | |
3585 | const char *content = ("#define MACRO \"0123456789\" /* non-str */\n" | |
3586 | " MACRO"); | |
3587 | lexer_test test (case_, content, NULL); | |
3588 | ||
3589 | /* Verify that we get the expected tokens back. */ | |
3590 | const cpp_token *tok = test.get_token (); | |
3591 | ASSERT_EQ (tok->type, CPP_PADDING); | |
3592 | ||
3593 | tok = test.get_token (); | |
3594 | ASSERT_EQ (tok->type, CPP_STRING); | |
3595 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\""); | |
3596 | ||
3597 | /* Verify ranges of individual characters. We ought to | |
3598 | see columns within the macro definition. */ | |
bbd6fcf3 | 3599 | for (int i = 0; i <= 10; i++) |
88fa5555 DM |
3600 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3601 | i, 1, 20 + i, 20 + i); | |
3602 | ||
bbd6fcf3 | 3603 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11); |
88fa5555 DM |
3604 | |
3605 | tok = test.get_token (); | |
3606 | ASSERT_EQ (tok->type, CPP_PADDING); | |
3607 | } | |
3608 | ||
3609 | /* Test of stringification of a macro argument. */ | |
3610 | ||
3611 | static void | |
3612 | test_lexer_string_locations_stringified_macro_argument | |
3613 | (const line_table_case &case_) | |
3614 | { | |
3615 | /* .....................000000000111111111122222222223. | |
3616 | .....................123456789012345678901234567890. */ | |
3617 | const char *content = ("#define MACRO(X) #X /* non-str */\n" | |
3618 | "MACRO(foo)\n"); | |
3619 | lexer_test test (case_, content, NULL); | |
3620 | ||
3621 | /* Verify that we get the expected token back. */ | |
3622 | const cpp_token *tok = test.get_token (); | |
3623 | ASSERT_EQ (tok->type, CPP_PADDING); | |
3624 | ||
3625 | tok = test.get_token (); | |
3626 | ASSERT_EQ (tok->type, CPP_STRING); | |
3627 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\""); | |
3628 | ||
3629 | /* We don't support getting the location of a stringified macro | |
3630 | argument. Verify that it fails gracefully. */ | |
3631 | ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, | |
3632 | "cpp_interpret_string_1 failed"); | |
3633 | ||
3634 | tok = test.get_token (); | |
3635 | ASSERT_EQ (tok->type, CPP_PADDING); | |
3636 | ||
3637 | tok = test.get_token (); | |
3638 | ASSERT_EQ (tok->type, CPP_PADDING); | |
3639 | } | |
3640 | ||
3641 | /* Ensure that we are fail gracefully if something attempts to pass | |
3642 | in a location that isn't a string literal token. Seen on this code: | |
3643 | ||
3644 | const char a[] = " %d "; | |
3645 | __builtin_printf (a, 0.5); | |
3646 | ^ | |
3647 | ||
e53b6e56 | 3648 | when c-format.cc erroneously used the indicated one-character |
88fa5555 DM |
3649 | location as the format string location, leading to a read past the |
3650 | end of a string buffer in cpp_interpret_string_1. */ | |
3651 | ||
3652 | static void | |
3653 | test_lexer_string_locations_non_string (const line_table_case &case_) | |
3654 | { | |
3655 | /* .....................000000000111111111122222222223. | |
3656 | .....................123456789012345678901234567890. */ | |
3657 | const char *content = (" a\n"); | |
3658 | lexer_test test (case_, content, NULL); | |
3659 | ||
3660 | /* Verify that we get the expected token back. */ | |
3661 | const cpp_token *tok = test.get_token (); | |
3662 | ASSERT_EQ (tok->type, CPP_NAME); | |
3663 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a"); | |
3664 | ||
3665 | /* At this point, libcpp is attempting to interpret the name as a | |
3666 | string literal, despite it not starting with a quote. We don't detect | |
3667 | that, but we should at least fail gracefully. */ | |
3668 | ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, | |
3669 | "cpp_interpret_string_1 failed"); | |
3670 | } | |
3671 | ||
3672 | /* Ensure that we can read substring information for a token which | |
3673 | starts in one linemap and ends in another . Adapted from | |
3674 | gcc.dg/cpp/pr69985.c. */ | |
3675 | ||
3676 | static void | |
3677 | test_lexer_string_locations_long_line (const line_table_case &case_) | |
3678 | { | |
3679 | /* .....................000000.000111111111 | |
3680 | .....................123456.789012346789. */ | |
3681 | const char *content = ("/* A very long line, so that we start a new line map. */\n" | |
3682 | " \"0123456789012345678901234567890123456789" | |
3683 | "0123456789012345678901234567890123456789" | |
3684 | "0123456789012345678901234567890123456789" | |
3685 | "0123456789\"\n"); | |
3686 | ||
3687 | lexer_test test (case_, content, NULL); | |
3688 | ||
3689 | /* Verify that we get the expected token back. */ | |
3690 | const cpp_token *tok = test.get_token (); | |
3691 | ASSERT_EQ (tok->type, CPP_STRING); | |
3692 | ||
3693 | if (!should_have_column_data_p (line_table->highest_location)) | |
3694 | return; | |
3695 | ||
3696 | /* Verify ranges of individual characters. */ | |
bbd6fcf3 DM |
3697 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131); |
3698 | for (int i = 0; i < 131; i++) | |
88fa5555 DM |
3699 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3700 | i, 2, 7 + i, 7 + i); | |
3701 | } | |
3702 | ||
b8f56412 DM |
3703 | /* Test of locations within a raw string that doesn't contain a newline. */ |
3704 | ||
3705 | static void | |
3706 | test_lexer_string_locations_raw_string_one_line (const line_table_case &case_) | |
3707 | { | |
3708 | /* .....................00.0000000111111111122. | |
3709 | .....................12.3456789012345678901. */ | |
3710 | const char *content = ("R\"foo(0123456789)foo\"\n"); | |
3711 | lexer_test test (case_, content, NULL); | |
3712 | ||
3713 | /* Verify that we get the expected token back. */ | |
3714 | const cpp_token *tok = test.get_token (); | |
3715 | ASSERT_EQ (tok->type, CPP_STRING); | |
3716 | ||
3717 | /* Verify that cpp_interpret_string works. */ | |
3718 | cpp_string dst_string; | |
3719 | const enum cpp_ttype type = CPP_STRING; | |
3720 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
3721 | &dst_string, type); | |
3722 | ASSERT_TRUE (result); | |
3723 | ASSERT_STREQ ("0123456789", (const char *)dst_string.text); | |
3724 | free (const_cast <unsigned char *> (dst_string.text)); | |
3725 | ||
3726 | if (!should_have_column_data_p (line_table->highest_location)) | |
3727 | return; | |
3728 | ||
3729 | /* 0-9, plus the nil terminator. */ | |
3730 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11); | |
3731 | for (int i = 0; i < 11; i++) | |
3732 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, | |
3733 | i, 1, 7 + i, 7 + i); | |
3734 | } | |
3735 | ||
3736 | /* Test of locations within a raw string that contains a newline. */ | |
3737 | ||
3738 | static void | |
3739 | test_lexer_string_locations_raw_string_multiline (const line_table_case &case_) | |
3740 | { | |
3741 | /* .....................00.0000. | |
3742 | .....................12.3456. */ | |
3743 | const char *content = ("R\"foo(\n" | |
3744 | /* .....................00000. | |
3745 | .....................12345. */ | |
3746 | "hello\n" | |
3747 | "world\n" | |
3748 | /* .....................00000. | |
3749 | .....................12345. */ | |
3750 | ")foo\"\n"); | |
3751 | lexer_test test (case_, content, NULL); | |
3752 | ||
3753 | /* Verify that we get the expected token back. */ | |
3754 | const cpp_token *tok = test.get_token (); | |
3755 | ASSERT_EQ (tok->type, CPP_STRING); | |
3756 | ||
3757 | /* Verify that cpp_interpret_string works. */ | |
3758 | cpp_string dst_string; | |
3759 | const enum cpp_ttype type = CPP_STRING; | |
3760 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, | |
3761 | &dst_string, type); | |
3762 | ASSERT_TRUE (result); | |
3763 | ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text); | |
3764 | free (const_cast <unsigned char *> (dst_string.text)); | |
3765 | ||
3766 | if (!should_have_column_data_p (line_table->highest_location)) | |
3767 | return; | |
3768 | ||
3769 | /* Currently we don't support locations within raw strings that | |
3770 | contain newlines. */ | |
3771 | ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type, | |
3772 | "range endpoints are on different lines"); | |
3773 | } | |
3774 | ||
a3998c2f DM |
3775 | /* Test of parsing an unterminated raw string. */ |
3776 | ||
3777 | static void | |
3778 | test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_) | |
3779 | { | |
3780 | const char *content = "R\"ouch()ouCh\" /* etc */"; | |
3781 | ||
c24300ba DM |
3782 | lexer_diagnostic_sink diagnostics; |
3783 | lexer_test test (case_, content, &diagnostics); | |
a3998c2f DM |
3784 | test.m_implicitly_expect_EOF = false; |
3785 | ||
3786 | /* Attempt to parse the raw string. */ | |
3787 | const cpp_token *tok = test.get_token (); | |
3788 | ASSERT_EQ (tok->type, CPP_EOF); | |
3789 | ||
c24300ba | 3790 | ASSERT_EQ (1, diagnostics.m_diagnostics.length ()); |
a3998c2f DM |
3791 | /* We expect the message "unterminated raw string" |
3792 | in the "cpplib" translation domain. | |
3793 | It's not clear that dgettext is available on all supported hosts, | |
3794 | so this assertion is commented-out for now. | |
3795 | ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"), | |
c24300ba | 3796 | diagnostics.m_diagnostics[0]); |
a3998c2f DM |
3797 | */ |
3798 | } | |
3799 | ||
88fa5555 DM |
3800 | /* Test of lexing char constants. */ |
3801 | ||
3802 | static void | |
3803 | test_lexer_char_constants (const line_table_case &case_) | |
3804 | { | |
3805 | /* Various char constants. | |
3806 | .....................0000000001111111111.22222222223. | |
3807 | .....................1234567890123456789.01234567890. */ | |
3808 | const char *content = (" 'a'\n" | |
3809 | " u'a'\n" | |
3810 | " U'a'\n" | |
3811 | " L'a'\n" | |
3812 | " 'abc'\n"); | |
3813 | lexer_test test (case_, content, NULL); | |
3814 | ||
3815 | /* Verify that we get the expected tokens back. */ | |
3816 | /* 'a'. */ | |
3817 | const cpp_token *tok = test.get_token (); | |
3818 | ASSERT_EQ (tok->type, CPP_CHAR); | |
3819 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'"); | |
3820 | ||
3821 | unsigned int chars_seen; | |
3822 | int unsignedp; | |
3823 | cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok, | |
3824 | &chars_seen, &unsignedp); | |
3825 | ASSERT_EQ (cc, 'a'); | |
3826 | ASSERT_EQ (chars_seen, 1); | |
3827 | ||
3828 | /* u'a'. */ | |
3829 | tok = test.get_token (); | |
3830 | ASSERT_EQ (tok->type, CPP_CHAR16); | |
3831 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'"); | |
3832 | ||
3833 | /* U'a'. */ | |
3834 | tok = test.get_token (); | |
3835 | ASSERT_EQ (tok->type, CPP_CHAR32); | |
3836 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'"); | |
3837 | ||
3838 | /* L'a'. */ | |
3839 | tok = test.get_token (); | |
3840 | ASSERT_EQ (tok->type, CPP_WCHAR); | |
3841 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'"); | |
3842 | ||
3843 | /* 'abc' (c-char-sequence). */ | |
3844 | tok = test.get_token (); | |
3845 | ASSERT_EQ (tok->type, CPP_CHAR); | |
3846 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'"); | |
3847 | } | |
741d3be5 DM |
3848 | /* A table of interesting location_t values, giving one axis of our test |
3849 | matrix. */ | |
3850 | ||
3851 | static const location_t boundary_locations[] = { | |
3852 | /* Zero means "don't override the default values for a new line_table". */ | |
3853 | 0, | |
3854 | ||
3855 | /* An arbitrary non-zero value that isn't close to one of | |
3856 | the boundary values below. */ | |
3857 | 0x10000, | |
3858 | ||
3859 | /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */ | |
3860 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100, | |
3861 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1, | |
3862 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES, | |
3863 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1, | |
3864 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100, | |
3865 | ||
3866 | /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */ | |
3867 | LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100, | |
3868 | LINE_MAP_MAX_LOCATION_WITH_COLS - 1, | |
3869 | LINE_MAP_MAX_LOCATION_WITH_COLS, | |
3870 | LINE_MAP_MAX_LOCATION_WITH_COLS + 1, | |
3871 | LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100, | |
3872 | }; | |
3873 | ||
f87e22c5 | 3874 | /* Run TESTCASE multiple times, once for each case in our test matrix. */ |
d9b950dd DM |
3875 | |
3876 | void | |
f87e22c5 | 3877 | for_each_line_table_case (void (*testcase) (const line_table_case &)) |
d9b950dd | 3878 | { |
741d3be5 DM |
3879 | /* As noted above in the description of struct line_table_case, |
3880 | we want to explore a test matrix of interesting line_table | |
3881 | situations, running various selftests for each case within the | |
3882 | matrix. */ | |
3883 | ||
3884 | /* Run all tests with: | |
3885 | (a) line_table->default_range_bits == 0, and | |
3886 | (b) line_table->default_range_bits == 5. */ | |
3887 | int num_cases_tested = 0; | |
3888 | for (int default_range_bits = 0; default_range_bits <= 5; | |
3889 | default_range_bits += 5) | |
3890 | { | |
3891 | /* ...and use each of the "interesting" location values as | |
3892 | the starting location within line_table. */ | |
ca32b29e | 3893 | const int num_boundary_locations = ARRAY_SIZE (boundary_locations); |
741d3be5 DM |
3894 | for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++) |
3895 | { | |
3896 | line_table_case c (default_range_bits, boundary_locations[loc_idx]); | |
3897 | ||
f87e22c5 | 3898 | testcase (c); |
741d3be5 DM |
3899 | |
3900 | num_cases_tested++; | |
3901 | } | |
3902 | } | |
3903 | ||
3904 | /* Verify that we fully covered the test matrix. */ | |
3905 | ASSERT_EQ (num_cases_tested, 2 * 12); | |
f87e22c5 DM |
3906 | } |
3907 | ||
a4553534 DM |
3908 | /* Verify that when presented with a consecutive pair of locations with |
3909 | a very large line offset, we don't attempt to consolidate them into | |
3910 | a single ordinary linemap where the line offsets within the line map | |
3911 | would lead to overflow (PR lto/88147). */ | |
3912 | ||
3913 | static void | |
3914 | test_line_offset_overflow () | |
3915 | { | |
3916 | line_table_test ltt (line_table_case (5, 0)); | |
3917 | ||
3918 | linemap_add (line_table, LC_ENTER, false, "foo.c", 0); | |
3919 | linemap_line_start (line_table, 1, 100); | |
3920 | location_t loc_a = linemap_line_start (line_table, 2578, 255); | |
3921 | assert_loceq ("foo.c", 2578, 0, loc_a); | |
3922 | ||
3923 | const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table); | |
3924 | ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13); | |
3925 | ASSERT_EQ (ordmap_a->m_range_bits, 5); | |
3926 | ||
3927 | location_t loc_b = linemap_line_start (line_table, 404198, 512); | |
3928 | assert_loceq ("foo.c", 404198, 0, loc_b); | |
3929 | ||
3930 | /* We should have started a new linemap, rather than attempting to store | |
3931 | a very large line offset. */ | |
3932 | const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table); | |
3933 | ASSERT_NE (ordmap_a, ordmap_b); | |
3934 | } | |
3935 | ||
ee925640 LH |
3936 | void test_cpp_utf8 () |
3937 | { | |
004bb936 | 3938 | const int def_tabstop = 8; |
bd5e882c DM |
3939 | cpp_char_column_policy policy (def_tabstop, cpp_wcwidth); |
3940 | ||
ee925640 LH |
3941 | /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */ |
3942 | { | |
bd5e882c | 3943 | int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy); |
ee925640 | 3944 | ASSERT_EQ (8, w_bad); |
bd5e882c | 3945 | int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy); |
004bb936 | 3946 | ASSERT_EQ (5, w_ctrl); |
ee925640 LH |
3947 | } |
3948 | ||
3949 | /* Verify that wcwidth of valid UTF-8 is as expected. */ | |
3950 | { | |
bd5e882c | 3951 | const int w_pi = cpp_display_width ("\xcf\x80", 2, policy); |
ee925640 | 3952 | ASSERT_EQ (1, w_pi); |
bd5e882c | 3953 | const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy); |
ee925640 | 3954 | ASSERT_EQ (2, w_emoji); |
004bb936 | 3955 | const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2, |
bd5e882c | 3956 | policy); |
ee925640 | 3957 | ASSERT_EQ (1, w_umlaut_precomposed); |
004bb936 | 3958 | const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3, |
bd5e882c | 3959 | policy); |
ee925640 | 3960 | ASSERT_EQ (1, w_umlaut_combining); |
bd5e882c | 3961 | const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy); |
ee925640 | 3962 | ASSERT_EQ (2, w_han); |
bd5e882c | 3963 | const int w_ascii = cpp_display_width ("GCC", 3, policy); |
ee925640 LH |
3964 | ASSERT_EQ (3, w_ascii); |
3965 | const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82" | |
004bb936 | 3966 | "\x9f! \xe4\xb8\xba y\xcc\x88", |
bd5e882c | 3967 | 24, policy); |
ee925640 LH |
3968 | ASSERT_EQ (18, w_mixed); |
3969 | } | |
3970 | ||
004bb936 LH |
3971 | /* Verify that display width properly expands tabs. */ |
3972 | { | |
3973 | const char *tstr = "\tabc\td"; | |
bd5e882c DM |
3974 | ASSERT_EQ (6, cpp_display_width (tstr, 6, |
3975 | cpp_char_column_policy (1, cpp_wcwidth))); | |
3976 | ASSERT_EQ (10, cpp_display_width (tstr, 6, | |
3977 | cpp_char_column_policy (3, cpp_wcwidth))); | |
3978 | ASSERT_EQ (17, cpp_display_width (tstr, 6, | |
3979 | cpp_char_column_policy (8, cpp_wcwidth))); | |
3980 | ASSERT_EQ (1, | |
3981 | cpp_display_column_to_byte_column | |
3982 | (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth))); | |
004bb936 LH |
3983 | } |
3984 | ||
ee925640 LH |
3985 | /* Verify that cpp_byte_column_to_display_column can go past the end, |
3986 | and similar edge cases. */ | |
3987 | { | |
3988 | const char *str | |
3989 | /* Display columns. | |
3990 | 111111112345 */ | |
3991 | = "\xcf\x80 abc"; | |
3992 | /* 111122223456 | |
3993 | Byte columns. */ | |
3994 | ||
bd5e882c | 3995 | ASSERT_EQ (5, cpp_display_width (str, 6, policy)); |
004bb936 | 3996 | ASSERT_EQ (105, |
bd5e882c | 3997 | cpp_byte_column_to_display_column (str, 6, 106, policy)); |
004bb936 | 3998 | ASSERT_EQ (10000, |
bd5e882c | 3999 | cpp_byte_column_to_display_column (NULL, 0, 10000, policy)); |
004bb936 | 4000 | ASSERT_EQ (0, |
bd5e882c | 4001 | cpp_byte_column_to_display_column (NULL, 10000, 0, policy)); |
ee925640 LH |
4002 | } |
4003 | ||
4004 | /* Verify that cpp_display_column_to_byte_column can go past the end, | |
4005 | and similar edge cases, and check invertibility. */ | |
4006 | { | |
4007 | const char *str | |
4008 | /* Display columns. | |
4009 | 000000000000000000000000000000000000011 | |
4010 | 111111112222222234444444455555555678901 */ | |
4011 | = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello"; | |
4012 | /* 000000000000000000000000000000000111111 | |
4013 | 111122223333444456666777788889999012345 | |
4014 | Byte columns. */ | |
bd5e882c | 4015 | ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy)); |
004bb936 | 4016 | ASSERT_EQ (15, |
bd5e882c | 4017 | cpp_display_column_to_byte_column (str, 15, 11, policy)); |
004bb936 | 4018 | ASSERT_EQ (115, |
bd5e882c | 4019 | cpp_display_column_to_byte_column (str, 15, 111, policy)); |
004bb936 | 4020 | ASSERT_EQ (10000, |
bd5e882c | 4021 | cpp_display_column_to_byte_column (NULL, 0, 10000, policy)); |
004bb936 | 4022 | ASSERT_EQ (0, |
bd5e882c | 4023 | cpp_display_column_to_byte_column (NULL, 10000, 0, policy)); |
ee925640 LH |
4024 | |
4025 | /* Verify that we do not interrupt a UTF-8 sequence. */ | |
bd5e882c | 4026 | ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy)); |
ee925640 LH |
4027 | |
4028 | for (int byte_col = 1; byte_col <= 15; ++byte_col) | |
4029 | { | |
004bb936 | 4030 | const int disp_col |
bd5e882c | 4031 | = cpp_byte_column_to_display_column (str, 15, byte_col, policy); |
004bb936 | 4032 | const int byte_col2 |
bd5e882c | 4033 | = cpp_display_column_to_byte_column (str, 15, disp_col, policy); |
ee925640 LH |
4034 | |
4035 | /* If we ask for the display column in the middle of a UTF-8 | |
4036 | sequence, it will return the length of the partial sequence, | |
4037 | matching the behavior of GCC before display column support. | |
4038 | Otherwise check the round trip was successful. */ | |
4039 | if (byte_col < 4) | |
4040 | ASSERT_EQ (byte_col, disp_col); | |
4041 | else if (byte_col >= 6 && byte_col < 9) | |
4042 | ASSERT_EQ (3 + (byte_col - 5), disp_col); | |
4043 | else | |
4044 | ASSERT_EQ (byte_col2, byte_col); | |
4045 | } | |
4046 | } | |
d495ea2b DM |
4047 | } |
4048 | ||
4049 | static bool | |
4050 | check_cpp_valid_utf8_p (const char *str) | |
4051 | { | |
4052 | return cpp_valid_utf8_p (str, strlen (str)); | |
4053 | } | |
4054 | ||
4055 | /* Check that cpp_valid_utf8_p works as expected. */ | |
4056 | ||
4057 | static void | |
4058 | test_cpp_valid_utf8_p () | |
4059 | { | |
4060 | ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world")); | |
4061 | ||
4062 | /* 2-byte char (pi). */ | |
4063 | ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80")); | |
4064 | ||
4065 | /* 3-byte chars (the Japanese word "mojibake"). */ | |
4066 | ASSERT_TRUE (check_cpp_valid_utf8_p | |
4067 | ( | |
4068 | /* U+6587 CJK UNIFIED IDEOGRAPH-6587 | |
4069 | UTF-8: 0xE6 0x96 0x87 | |
4070 | C octal escaped UTF-8: \346\226\207. */ | |
4071 | "\346\226\207" | |
4072 | /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57 | |
4073 | UTF-8: 0xE5 0xAD 0x97 | |
4074 | C octal escaped UTF-8: \345\255\227. */ | |
4075 | "\345\255\227" | |
4076 | /* U+5316 CJK UNIFIED IDEOGRAPH-5316 | |
4077 | UTF-8: 0xE5 0x8C 0x96 | |
4078 | C octal escaped UTF-8: \345\214\226. */ | |
4079 | "\345\214\226" | |
4080 | /* U+3051 HIRAGANA LETTER KE | |
4081 | UTF-8: 0xE3 0x81 0x91 | |
4082 | C octal escaped UTF-8: \343\201\221. */ | |
4083 | "\343\201\221")); | |
4084 | ||
4085 | /* 4-byte char: an emoji. */ | |
4086 | ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82")); | |
4087 | ||
4088 | /* Control codes, including the NUL byte. */ | |
4089 | ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", 5)); | |
4090 | ||
4091 | ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!")); | |
4092 | ||
4093 | /* Unexpected continuation bytes. */ | |
4094 | for (unsigned char continuation_byte = 0x80; | |
4095 | continuation_byte <= 0xbf; | |
4096 | continuation_byte++) | |
4097 | ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, 1)); | |
4098 | ||
4099 | /* "Lonely start characters" for 2-byte sequences. */ | |
4100 | { | |
4101 | unsigned char buf[2]; | |
4102 | buf[1] = ' '; | |
4103 | for (buf[0] = 0xc0; | |
4104 | buf[0] <= 0xdf; | |
4105 | buf[0]++) | |
4106 | ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2)); | |
4107 | } | |
4108 | ||
4109 | /* "Lonely start characters" for 3-byte sequences. */ | |
4110 | { | |
4111 | unsigned char buf[2]; | |
4112 | buf[1] = ' '; | |
4113 | for (buf[0] = 0xe0; | |
4114 | buf[0] <= 0xef; | |
4115 | buf[0]++) | |
4116 | ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2)); | |
4117 | } | |
4118 | ||
4119 | /* "Lonely start characters" for 4-byte sequences. */ | |
4120 | { | |
4121 | unsigned char buf[2]; | |
4122 | buf[1] = ' '; | |
4123 | for (buf[0] = 0xf0; | |
4124 | buf[0] <= 0xf4; | |
4125 | buf[0]++) | |
4126 | ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2)); | |
4127 | } | |
4128 | ||
4129 | /* Invalid start characters (formerly valid for 5-byte and 6-byte | |
4130 | sequences). */ | |
4131 | { | |
4132 | unsigned char buf[2]; | |
4133 | buf[1] = ' '; | |
4134 | for (buf[0] = 0xf5; | |
4135 | buf[0] <= 0xfd; | |
4136 | buf[0]++) | |
4137 | ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2)); | |
4138 | } | |
ee925640 | 4139 | |
d495ea2b DM |
4140 | /* Impossible bytes. */ |
4141 | ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0")); | |
4142 | ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1")); | |
4143 | ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe")); | |
4144 | ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff")); | |
ee925640 LH |
4145 | } |
4146 | ||
f87e22c5 DM |
4147 | /* Run all of the selftests within this file. */ |
4148 | ||
4149 | void | |
d5148d4f | 4150 | input_cc_tests () |
f87e22c5 | 4151 | { |
082284da | 4152 | test_linenum_comparisons (); |
f87e22c5 DM |
4153 | test_should_have_column_data_p (); |
4154 | test_unknown_location (); | |
4155 | test_builtins (); | |
9144eabb | 4156 | for_each_line_table_case (test_make_location_nonpure_range_endpoints); |
f87e22c5 DM |
4157 | |
4158 | for_each_line_table_case (test_accessing_ordinary_linemaps); | |
4159 | for_each_line_table_case (test_lexer); | |
4160 | for_each_line_table_case (test_lexer_string_locations_simple); | |
4161 | for_each_line_table_case (test_lexer_string_locations_ebcdic); | |
4162 | for_each_line_table_case (test_lexer_string_locations_hex); | |
4163 | for_each_line_table_case (test_lexer_string_locations_oct); | |
4164 | for_each_line_table_case (test_lexer_string_locations_letter_escape_1); | |
4165 | for_each_line_table_case (test_lexer_string_locations_letter_escape_2); | |
4166 | for_each_line_table_case (test_lexer_string_locations_ucn4); | |
4167 | for_each_line_table_case (test_lexer_string_locations_ucn8); | |
4168 | for_each_line_table_case (test_lexer_string_locations_wide_string); | |
4169 | for_each_line_table_case (test_lexer_string_locations_string16); | |
4170 | for_each_line_table_case (test_lexer_string_locations_string32); | |
4171 | for_each_line_table_case (test_lexer_string_locations_u8); | |
4172 | for_each_line_table_case (test_lexer_string_locations_utf8_source); | |
4173 | for_each_line_table_case (test_lexer_string_locations_concatenation_1); | |
4174 | for_each_line_table_case (test_lexer_string_locations_concatenation_2); | |
4175 | for_each_line_table_case (test_lexer_string_locations_concatenation_3); | |
4176 | for_each_line_table_case (test_lexer_string_locations_macro); | |
4177 | for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument); | |
4178 | for_each_line_table_case (test_lexer_string_locations_non_string); | |
4179 | for_each_line_table_case (test_lexer_string_locations_long_line); | |
b8f56412 DM |
4180 | for_each_line_table_case (test_lexer_string_locations_raw_string_one_line); |
4181 | for_each_line_table_case (test_lexer_string_locations_raw_string_multiline); | |
a3998c2f | 4182 | for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated); |
f87e22c5 | 4183 | for_each_line_table_case (test_lexer_char_constants); |
741d3be5 | 4184 | |
d9b950dd | 4185 | test_reading_source_line (); |
a4553534 DM |
4186 | |
4187 | test_line_offset_overflow (); | |
ee925640 LH |
4188 | |
4189 | test_cpp_utf8 (); | |
d495ea2b | 4190 | test_cpp_valid_utf8_p (); |
d9b950dd DM |
4191 | } |
4192 | ||
4193 | } // namespace selftest | |
4194 | ||
4195 | #endif /* CHECKING_P */ |