]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/input.c
Correct a function pre/postcondition [PR102403].
[thirdparty/gcc.git] / gcc / input.c
CommitLineData
447924ef 1/* Data and functions related to line maps and input files.
99dee823 2 Copyright (C) 2004-2021 Free Software Foundation, Inc.
447924ef
JM
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "intl.h"
bc65bad2 24#include "diagnostic.h"
d9b950dd 25#include "selftest.h"
741d3be5 26#include "cpplib.h"
7ecc3eb9 27
a7d79e5c
DM
28#ifndef HAVE_ICONV
29#define HAVE_ICONV 0
30#endif
31
3ac6b5cf
LH
32/* Input charset configuration. */
33static const char *default_charset_callback (const char *)
34{
35 return nullptr;
36}
37
38void
39file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
40 bool should_skip_bom)
41{
42 in_context.ccb = (ccb ? ccb : default_charset_callback);
43 in_context.should_skip_bom = should_skip_bom;
44}
45
7ecc3eb9
DS
46/* This is a cache used by get_next_line to store the content of a
47 file to be searched for file lines. */
b544c348 48class file_cache_slot
7ecc3eb9 49{
6c1dae73 50public:
b544c348
DM
51 file_cache_slot ();
52 ~file_cache_slot ();
53
54 bool read_line_num (size_t line_num,
55 char ** line, ssize_t *line_len);
56
57 /* Accessors. */
58 const char *get_file_path () const { return m_file_path; }
59 unsigned get_use_count () const { return m_use_count; }
60 bool missing_trailing_newline_p () const
61 {
62 return m_missing_trailing_newline;
63 }
64
65 void inc_use_count () { m_use_count++; }
66
3ac6b5cf
LH
67 bool create (const file_cache::input_context &in_context,
68 const char *file_path, FILE *fp, unsigned highest_use_count);
b544c348
DM
69 void evict ();
70
71 private:
7ecc3eb9 72 /* These are information used to store a line boundary. */
6c1dae73 73 class line_info
7ecc3eb9 74 {
6c1dae73 75 public:
7ecc3eb9
DS
76 /* The line number. It starts from 1. */
77 size_t line_num;
78
79 /* The position (byte count) of the beginning of the line,
80 relative to the file data pointer. This starts at zero. */
81 size_t start_pos;
82
83 /* The position (byte count) of the last byte of the line. This
84 normally points to the '\n' character, or to one byte after the
85 last byte of the file, if the file doesn't contain a '\n'
86 character. */
87 size_t end_pos;
88
89 line_info (size_t l, size_t s, size_t e)
90 : line_num (l), start_pos (s), end_pos (e)
91 {}
92
93 line_info ()
94 :line_num (0), start_pos (0), end_pos (0)
95 {}
96 };
97
b544c348
DM
98 bool needs_read_p () const;
99 bool needs_grow_p () const;
100 void maybe_grow ();
101 bool read_data ();
102 bool maybe_read_data ();
103 bool get_next_line (char **line, ssize_t *line_len);
104 bool read_next_line (char ** line, ssize_t *line_len);
105 bool goto_next_line ();
106
107 static const size_t buffer_size = 4 * 1024;
108 static const size_t line_record_size = 100;
109
7ecc3eb9
DS
110 /* The number of time this file has been accessed. This is used
111 to designate which file cache to evict from the cache
112 array. */
b544c348 113 unsigned m_use_count;
7ecc3eb9 114
f5ea989d
DM
115 /* The file_path is the key for identifying a particular file in
116 the cache.
117 For libcpp-using code, the underlying buffer for this field is
118 owned by the corresponding _cpp_file within the cpp_reader. */
b544c348 119 const char *m_file_path;
7ecc3eb9 120
b544c348 121 FILE *m_fp;
7ecc3eb9
DS
122
123 /* This points to the content of the file that we've read so
124 far. */
b544c348 125 char *m_data;
7ecc3eb9 126
3ac6b5cf
LH
127 /* The allocated buffer to be freed may start a little earlier than DATA,
128 e.g. if a UTF8 BOM was skipped at the beginning. */
129 int m_alloc_offset;
130
7ecc3eb9 131 /* The size of the DATA array above.*/
b544c348 132 size_t m_size;
7ecc3eb9
DS
133
134 /* The number of bytes read from the underlying file so far. This
135 must be less (or equal) than SIZE above. */
b544c348 136 size_t m_nb_read;
7ecc3eb9
DS
137
138 /* The index of the beginning of the current line. */
b544c348 139 size_t m_line_start_idx;
7ecc3eb9
DS
140
141 /* The number of the previous line read. This starts at 1. Zero
142 means we've read no line so far. */
b544c348 143 size_t m_line_num;
7ecc3eb9
DS
144
145 /* This is the total number of lines of the current file. At the
146 moment, we try to get this information from the line map
147 subsystem. Note that this is just a hint. When using the C++
148 front-end, this hint is correct because the input file is then
149 completely tokenized before parsing starts; so the line map knows
150 the number of lines before compilation really starts. For e.g,
151 the C front-end, it can happen that we start emitting diagnostics
152 before the line map has seen the end of the file. */
b544c348 153 size_t m_total_lines;
7ecc3eb9 154
c65236d6
DM
155 /* Could this file be missing a trailing newline on its final line?
156 Initially true (to cope with empty files), set to true/false
157 as each line is read. */
b544c348 158 bool m_missing_trailing_newline;
c65236d6 159
7ecc3eb9
DS
160 /* This is a record of the beginning and end of the lines we've seen
161 while reading the file. This is useful to avoid walking the data
162 from the beginning when we are asked to read a line that is
163 before LINE_START_IDX above. Note that the maximum size of this
b544c348 164 record is line_record_size, so that the memory consumption
7ecc3eb9 165 doesn't explode. We thus scale total_lines down to
b544c348
DM
166 line_record_size. */
167 vec<line_info, va_heap> m_line_record;
3ac6b5cf
LH
168
169 void offset_buffer (int offset)
170 {
171 gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
172 : (size_t) offset <= m_size);
173 gcc_assert (m_data);
174 m_alloc_offset += offset;
175 m_data += offset;
176 m_size -= offset;
177 }
178
7ecc3eb9 179};
447924ef
JM
180
181/* Current position in real source file. */
182
3edf64aa 183location_t input_location = UNKNOWN_LOCATION;
447924ef 184
99b1c316 185class line_maps *line_table;
447924ef 186
f87e22c5
DM
187/* A stashed copy of "line_table" for use by selftest::line_table_test.
188 This needs to be a global so that it can be a GC root, and thus
189 prevent the stashed copy from being garbage-collected if the GC runs
190 during a line_table_test. */
191
99b1c316 192class line_maps *saved_line_table;
f87e22c5 193
84756fd4
DS
194/* Expand the source location LOC into a human readable location. If
195 LOC resolves to a builtin location, the file name of the readable
7eb918cc
DS
196 location is set to the string "<built-in>". If EXPANSION_POINT_P is
197 TRUE and LOC is virtual, then it is resolved to the expansion
198 point of the involved macro. Otherwise, it is resolved to the
c4ca1a09
DS
199 spelling location of the token.
200
201 When resolving to the spelling location of the token, if the
202 resulting location is for a built-in location (that is, it has no
203 associated line/column) in the context of a macro expansion, the
204 returned location is the first one (while unwinding the macro
205 location towards its expansion point) that is in real source
c471c6ed
DM
206 code.
207
208 ASPECT controls which part of the location to use. */
7eb918cc
DS
209
210static expanded_location
620e594b 211expand_location_1 (location_t loc,
c471c6ed
DM
212 bool expansion_point_p,
213 enum location_aspect aspect)
447924ef
JM
214{
215 expanded_location xloc;
0e50b624 216 const line_map_ordinary *map;
c4ca1a09 217 enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
5368224f
DC
218 tree block = NULL;
219
220 if (IS_ADHOC_LOC (loc))
221 {
222 block = LOCATION_BLOCK (loc);
223 loc = LOCATION_LOCUS (loc);
224 }
c4ca1a09
DS
225
226 memset (&xloc, 0, sizeof (xloc));
84756fd4 227
c4ca1a09
DS
228 if (loc >= RESERVED_LOCATION_COUNT)
229 {
230 if (!expansion_point_p)
231 {
232 /* We want to resolve LOC to its spelling location.
233
234 But if that spelling location is a reserved location that
235 appears in the context of a macro expansion (like for a
236 location for a built-in token), let's consider the first
237 location (toward the expansion point) that is not reserved;
238 that is, the first location that is in real source code. */
239 loc = linemap_unwind_to_first_non_reserved_loc (line_table,
0e50b624 240 loc, NULL);
c4ca1a09
DS
241 lrk = LRK_SPELLING_LOCATION;
242 }
c471c6ed
DM
243 loc = linemap_resolve_location (line_table, loc, lrk, &map);
244
245 /* loc is now either in an ordinary map, or is a reserved location.
246 If it is a compound location, the caret is in a spelling location,
247 but the start/finish might still be a virtual location.
248 Depending of what the caller asked for, we may need to recurse
249 one level in order to resolve any virtual locations in the
250 end-points. */
251 switch (aspect)
252 {
253 default:
254 gcc_unreachable ();
255 /* Fall through. */
256 case LOCATION_ASPECT_CARET:
257 break;
258 case LOCATION_ASPECT_START:
259 {
620e594b 260 location_t start = get_start (loc);
c471c6ed
DM
261 if (start != loc)
262 return expand_location_1 (start, expansion_point_p, aspect);
263 }
264 break;
265 case LOCATION_ASPECT_FINISH:
266 {
620e594b 267 location_t finish = get_finish (loc);
c471c6ed
DM
268 if (finish != loc)
269 return expand_location_1 (finish, expansion_point_p, aspect);
270 }
271 break;
272 }
c4ca1a09
DS
273 xloc = linemap_expand_location (line_table, map, loc);
274 }
84756fd4 275
5368224f 276 xloc.data = block;
447924ef 277 if (loc <= BUILTINS_LOCATION)
84756fd4
DS
278 xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
279
447924ef
JM
280 return xloc;
281}
64a1a422 282
7ecc3eb9
DS
283/* Initialize the set of cache used for files accessed by caret
284 diagnostic. */
285
286static void
287diagnostic_file_cache_init (void)
288{
b544c348
DM
289 gcc_assert (global_dc);
290 if (global_dc->m_file_cache == NULL)
291 global_dc->m_file_cache = new file_cache ();
7ecc3eb9
DS
292}
293
592f32fa 294/* Free the resources used by the set of cache used for files accessed
7ecc3eb9
DS
295 by caret diagnostic. */
296
297void
298diagnostic_file_cache_fini (void)
299{
b544c348 300 if (global_dc->m_file_cache)
7ecc3eb9 301 {
b544c348
DM
302 delete global_dc->m_file_cache;
303 global_dc->m_file_cache = NULL;
7ecc3eb9
DS
304 }
305}
306
307/* Return the total lines number that have been read so far by the
308 line map (in the preprocessor) so far. For languages like C++ that
309 entirely preprocess the input file before starting to parse, this
310 equals the actual number of lines of the file. */
311
312static size_t
313total_lines_num (const char *file_path)
314{
315 size_t r = 0;
620e594b 316 location_t l = 0;
7ecc3eb9
DS
317 if (linemap_get_file_highest_location (line_table, file_path, &l))
318 {
319 gcc_assert (l >= RESERVED_LOCATION_COUNT);
320 expanded_location xloc = expand_location (l);
321 r = xloc.line;
322 }
323 return r;
324}
325
326/* Lookup the cache used for the content of a given file accessed by
327 caret diagnostic. Return the found cached file, or NULL if no
328 cached file was found. */
329
b544c348
DM
330file_cache_slot *
331file_cache::lookup_file (const char *file_path)
7ecc3eb9 332{
b544c348 333 gcc_assert (file_path);
7ecc3eb9
DS
334
335 /* This will contain the found cached file. */
b544c348
DM
336 file_cache_slot *r = NULL;
337 for (unsigned i = 0; i < num_file_slots; ++i)
7ecc3eb9 338 {
b544c348
DM
339 file_cache_slot *c = &m_file_slots[i];
340 if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
7ecc3eb9 341 {
b544c348 342 c->inc_use_count ();
7ecc3eb9
DS
343 r = c;
344 }
345 }
346
347 if (r)
b544c348 348 r->inc_use_count ();
7ecc3eb9
DS
349
350 return r;
351}
352
f89b03b6
DM
353/* Purge any mention of FILENAME from the cache of files used for
354 printing source code. For use in selftests when working
355 with tempfiles. */
356
357void
358diagnostics_file_cache_forcibly_evict_file (const char *file_path)
359{
360 gcc_assert (file_path);
361
b544c348
DM
362 if (!global_dc->m_file_cache)
363 return;
364
365 global_dc->m_file_cache->forcibly_evict_file (file_path);
366}
367
368void
369file_cache::forcibly_evict_file (const char *file_path)
370{
371 gcc_assert (file_path);
372
373 file_cache_slot *r = lookup_file (file_path);
f89b03b6
DM
374 if (!r)
375 /* Not found. */
376 return;
377
b544c348
DM
378 r->evict ();
379}
380
381void
382file_cache_slot::evict ()
383{
384 m_file_path = NULL;
385 if (m_fp)
386 fclose (m_fp);
387 m_fp = NULL;
388 m_nb_read = 0;
389 m_line_start_idx = 0;
390 m_line_num = 0;
391 m_line_record.truncate (0);
392 m_use_count = 0;
393 m_total_lines = 0;
394 m_missing_trailing_newline = true;
f89b03b6
DM
395}
396
7ecc3eb9
DS
397/* Return the file cache that has been less used, recently, or the
398 first empty one. If HIGHEST_USE_COUNT is non-null,
399 *HIGHEST_USE_COUNT is set to the highest use count of the entries
400 in the cache table. */
401
b544c348
DM
402file_cache_slot*
403file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
7ecc3eb9
DS
404{
405 diagnostic_file_cache_init ();
406
b544c348
DM
407 file_cache_slot *to_evict = &m_file_slots[0];
408 unsigned huc = to_evict->get_use_count ();
409 for (unsigned i = 1; i < num_file_slots; ++i)
7ecc3eb9 410 {
b544c348
DM
411 file_cache_slot *c = &m_file_slots[i];
412 bool c_is_empty = (c->get_file_path () == NULL);
7ecc3eb9 413
b544c348
DM
414 if (c->get_use_count () < to_evict->get_use_count ()
415 || (to_evict->get_file_path () && c_is_empty))
7ecc3eb9
DS
416 /* We evict C because it's either an entry with a lower use
417 count or one that is empty. */
418 to_evict = c;
419
b544c348
DM
420 if (huc < c->get_use_count ())
421 huc = c->get_use_count ();
7ecc3eb9
DS
422
423 if (c_is_empty)
424 /* We've reached the end of the cache; subsequent elements are
425 all empty. */
426 break;
427 }
428
429 if (highest_use_count)
430 *highest_use_count = huc;
431
432 return to_evict;
433}
434
435/* Create the cache used for the content of a given file to be
436 accessed by caret diagnostic. This cache is added to an array of
437 cache and can be retrieved by lookup_file_in_cache_tab. This
438 function returns the created cache. Note that only the last
b544c348 439 num_file_slots files are cached. */
7ecc3eb9 440
b544c348
DM
441file_cache_slot*
442file_cache::add_file (const char *file_path)
7ecc3eb9
DS
443{
444
445 FILE *fp = fopen (file_path, "r");
317363b4
DS
446 if (fp == NULL)
447 return NULL;
7ecc3eb9
DS
448
449 unsigned highest_use_count = 0;
b544c348 450 file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
3ac6b5cf
LH
451 if (!r->create (in_context, file_path, fp, highest_use_count))
452 return NULL;
b544c348
DM
453 return r;
454}
455
456/* Populate this slot for use on FILE_PATH and FP, dropping any
457 existing cached content within it. */
458
3ac6b5cf
LH
459bool
460file_cache_slot::create (const file_cache::input_context &in_context,
461 const char *file_path, FILE *fp,
b544c348
DM
462 unsigned highest_use_count)
463{
464 m_file_path = file_path;
465 if (m_fp)
466 fclose (m_fp);
467 m_fp = fp;
3ac6b5cf
LH
468 if (m_alloc_offset)
469 offset_buffer (-m_alloc_offset);
b544c348
DM
470 m_nb_read = 0;
471 m_line_start_idx = 0;
472 m_line_num = 0;
473 m_line_record.truncate (0);
7ecc3eb9
DS
474 /* Ensure that this cache entry doesn't get evicted next time
475 add_file_to_cache_tab is called. */
b544c348
DM
476 m_use_count = ++highest_use_count;
477 m_total_lines = total_lines_num (file_path);
478 m_missing_trailing_newline = true;
3ac6b5cf
LH
479
480
481 /* Check the input configuration to determine if we need to do any
482 transformations, such as charset conversion or BOM skipping. */
483 if (const char *input_charset = in_context.ccb (file_path))
484 {
485 /* Need a full-blown conversion of the input charset. */
486 fclose (m_fp);
487 m_fp = NULL;
488 const cpp_converted_source cs
489 = cpp_get_converted_source (file_path, input_charset);
490 if (!cs.data)
491 return false;
492 if (m_data)
493 XDELETEVEC (m_data);
494 m_data = cs.data;
495 m_nb_read = m_size = cs.len;
496 m_alloc_offset = cs.data - cs.to_free;
497 }
498 else if (in_context.should_skip_bom)
499 {
500 if (read_data ())
501 {
502 const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
503 offset_buffer (offset);
504 m_nb_read -= offset;
505 }
506 }
507
508 return true;
b544c348 509}
7ecc3eb9 510
b544c348
DM
511/* file_cache's ctor. */
512
513file_cache::file_cache ()
514: m_file_slots (new file_cache_slot[num_file_slots])
515{
3ac6b5cf 516 initialize_input_context (nullptr, false);
b544c348
DM
517}
518
519/* file_cache's dtor. */
520
521file_cache::~file_cache ()
522{
523 delete[] m_file_slots;
7ecc3eb9
DS
524}
525
526/* Lookup the cache used for the content of a given file accessed by
527 caret diagnostic. If no cached file was found, create a new cache
528 for this file, add it to the array of cached file and return
529 it. */
530
b544c348
DM
531file_cache_slot*
532file_cache::lookup_or_add_file (const char *file_path)
7ecc3eb9 533{
b544c348 534 file_cache_slot *r = lookup_file (file_path);
7ecc3eb9 535 if (r == NULL)
b544c348 536 r = add_file (file_path);
7ecc3eb9
DS
537 return r;
538}
539
540/* Default constructor for a cache of file used by caret
541 diagnostic. */
542
b544c348
DM
543file_cache_slot::file_cache_slot ()
544: m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
3ac6b5cf
LH
545 m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
546 m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
7ecc3eb9 547{
b544c348 548 m_line_record.create (0);
7ecc3eb9
DS
549}
550
551/* Destructor for a cache of file used by caret diagnostic. */
552
b544c348 553file_cache_slot::~file_cache_slot ()
7ecc3eb9 554{
b544c348 555 if (m_fp)
7ecc3eb9 556 {
b544c348
DM
557 fclose (m_fp);
558 m_fp = NULL;
7ecc3eb9 559 }
b544c348 560 if (m_data)
7ecc3eb9 561 {
3ac6b5cf 562 offset_buffer (-m_alloc_offset);
b544c348
DM
563 XDELETEVEC (m_data);
564 m_data = 0;
7ecc3eb9 565 }
b544c348 566 m_line_record.release ();
7ecc3eb9
DS
567}
568
569/* Returns TRUE iff the cache would need to be filled with data coming
570 from the file. That is, either the cache is empty or full or the
571 current line is empty. Note that if the cache is full, it would
572 need to be extended and filled again. */
573
b544c348
DM
574bool
575file_cache_slot::needs_read_p () const
7ecc3eb9 576{
3ac6b5cf 577 return m_fp && (m_nb_read == 0
b544c348
DM
578 || m_nb_read == m_size
579 || (m_line_start_idx >= m_nb_read - 1));
7ecc3eb9
DS
580}
581
582/* Return TRUE iff the cache is full and thus needs to be
583 extended. */
584
b544c348
DM
585bool
586file_cache_slot::needs_grow_p () const
7ecc3eb9 587{
b544c348 588 return m_nb_read == m_size;
7ecc3eb9
DS
589}
590
591/* Grow the cache if it needs to be extended. */
592
b544c348
DM
593void
594file_cache_slot::maybe_grow ()
9fec0042 595{
b544c348 596 if (!needs_grow_p ())
7ecc3eb9
DS
597 return;
598
3ac6b5cf
LH
599 if (!m_data)
600 {
601 gcc_assert (m_size == 0 && m_alloc_offset == 0);
602 m_size = buffer_size;
603 m_data = XNEWVEC (char, m_size);
604 }
605 else
606 {
607 const int offset = m_alloc_offset;
608 offset_buffer (-offset);
609 m_size *= 2;
610 m_data = XRESIZEVEC (char, m_data, m_size);
611 offset_buffer (offset);
612 }
7ecc3eb9 613}
9fec0042 614
7ecc3eb9
DS
615/* Read more data into the cache. Extends the cache if need be.
616 Returns TRUE iff new data could be read. */
617
b544c348
DM
618bool
619file_cache_slot::read_data ()
7ecc3eb9 620{
b544c348 621 if (feof (m_fp) || ferror (m_fp))
7ecc3eb9
DS
622 return false;
623
b544c348 624 maybe_grow ();
7ecc3eb9 625
b544c348
DM
626 char * from = m_data + m_nb_read;
627 size_t to_read = m_size - m_nb_read;
628 size_t nb_read = fread (from, 1, to_read, m_fp);
7ecc3eb9 629
b544c348 630 if (ferror (m_fp))
7ecc3eb9
DS
631 return false;
632
b544c348 633 m_nb_read += nb_read;
7ecc3eb9
DS
634 return !!nb_read;
635}
636
637/* Read new data iff the cache needs to be filled with more data
638 coming from the file FP. Return TRUE iff the cache was filled with
639 mode data. */
640
b544c348
DM
641bool
642file_cache_slot::maybe_read_data ()
7ecc3eb9 643{
b544c348 644 if (!needs_read_p ())
7ecc3eb9 645 return false;
b544c348 646 return read_data ();
7ecc3eb9
DS
647}
648
649/* Read a new line from file FP, using C as a cache for the data
650 coming from the file. Upon successful completion, *LINE is set to
1adae327
BE
651 the beginning of the line found. *LINE points directly in the
652 line cache and is only valid until the next call of get_next_line.
7ecc3eb9
DS
653 *LINE_LEN is set to the length of the line. Note that the line
654 does not contain any terminal delimiter. This function returns
655 true if some data was read or process from the cache, false
1adae327
BE
656 otherwise. Note that subsequent calls to get_next_line might
657 make the content of *LINE invalid. */
7ecc3eb9 658
b544c348
DM
659bool
660file_cache_slot::get_next_line (char **line, ssize_t *line_len)
7ecc3eb9
DS
661{
662 /* Fill the cache with data to process. */
b544c348 663 maybe_read_data ();
7ecc3eb9 664
b544c348 665 size_t remaining_size = m_nb_read - m_line_start_idx;
7ecc3eb9
DS
666 if (remaining_size == 0)
667 /* There is no more data to process. */
668 return false;
669
b544c348 670 char *line_start = m_data + m_line_start_idx;
7ecc3eb9
DS
671
672 char *next_line_start = NULL;
673 size_t len = 0;
674 char *line_end = (char *) memchr (line_start, '\n', remaining_size);
675 if (line_end == NULL)
9fec0042 676 {
7ecc3eb9
DS
677 /* We haven't found the end-of-line delimiter in the cache.
678 Fill the cache with more data from the file and look for the
679 '\n'. */
b544c348 680 while (maybe_read_data ())
7ecc3eb9 681 {
b544c348
DM
682 line_start = m_data + m_line_start_idx;
683 remaining_size = m_nb_read - m_line_start_idx;
7ecc3eb9
DS
684 line_end = (char *) memchr (line_start, '\n', remaining_size);
685 if (line_end != NULL)
686 {
687 next_line_start = line_end + 1;
688 break;
689 }
690 }
691 if (line_end == NULL)
c65236d6
DM
692 {
693 /* We've loadded all the file into the cache and still no
694 '\n'. Let's say the line ends up at one byte passed the
695 end of the file. This is to stay consistent with the case
696 of when the line ends up with a '\n' and line_end points to
697 that terminal '\n'. That consistency is useful below in
698 the len calculation. */
b544c348
DM
699 line_end = m_data + m_nb_read ;
700 m_missing_trailing_newline = true;
c65236d6
DM
701 }
702 else
b544c348 703 m_missing_trailing_newline = false;
9fec0042 704 }
7ecc3eb9 705 else
c65236d6
DM
706 {
707 next_line_start = line_end + 1;
b544c348 708 m_missing_trailing_newline = false;
c65236d6 709 }
7ecc3eb9 710
3ac6b5cf 711 if (m_fp && ferror (m_fp))
1adae327 712 return false;
7ecc3eb9
DS
713
714 /* At this point, we've found the end of the of line. It either
715 points to the '\n' or to one byte after the last byte of the
716 file. */
717 gcc_assert (line_end != NULL);
9fec0042 718
7ecc3eb9
DS
719 len = line_end - line_start;
720
b544c348 721 if (m_line_start_idx < m_nb_read)
7ecc3eb9
DS
722 *line = line_start;
723
b544c348 724 ++m_line_num;
7ecc3eb9
DS
725
726 /* Before we update our line record, make sure the hint about the
727 total number of lines of the file is correct. If it's not, then
728 we give up recording line boundaries from now on. */
729 bool update_line_record = true;
b544c348 730 if (m_line_num > m_total_lines)
7ecc3eb9
DS
731 update_line_record = false;
732
733 /* Now update our line record so that re-reading lines from the
b544c348 734 before m_line_start_idx is faster. */
7ecc3eb9 735 if (update_line_record
b544c348 736 && m_line_record.length () < line_record_size)
7ecc3eb9
DS
737 {
738 /* If the file lines fits in the line record, we just record all
739 its lines ...*/
b544c348
DM
740 if (m_total_lines <= line_record_size
741 && m_line_num > m_line_record.length ())
742 m_line_record.safe_push
743 (file_cache_slot::line_info (m_line_num,
744 m_line_start_idx,
745 line_end - m_data));
746 else if (m_total_lines > line_record_size)
7ecc3eb9
DS
747 {
748 /* ... otherwise, we just scale total_lines down to
b544c348
DM
749 (line_record_size lines. */
750 size_t n = (m_line_num * line_record_size) / m_total_lines;
751 if (m_line_record.length () == 0
752 || n >= m_line_record.length ())
753 m_line_record.safe_push
754 (file_cache_slot::line_info (m_line_num,
755 m_line_start_idx,
756 line_end - m_data));
7ecc3eb9
DS
757 }
758 }
759
b544c348 760 /* Update m_line_start_idx so that it points to the next line to be
7ecc3eb9
DS
761 read. */
762 if (next_line_start)
b544c348 763 m_line_start_idx = next_line_start - m_data;
7ecc3eb9
DS
764 else
765 /* We didn't find any terminal '\n'. Let's consider that the end
766 of line is the end of the data in the cache. The next
767 invocation of get_next_line will either read more data from the
768 underlying file or return false early because we've reached the
769 end of the file. */
b544c348 770 m_line_start_idx = m_nb_read;
7ecc3eb9
DS
771
772 *line_len = len;
773
774 return true;
775}
776
7ecc3eb9
DS
777/* Consume the next bytes coming from the cache (or from its
778 underlying file if there are remaining unread bytes in the file)
779 until we reach the next end-of-line (or end-of-file). There is no
780 copying from the cache involved. Return TRUE upon successful
781 completion. */
782
b544c348
DM
783bool
784file_cache_slot::goto_next_line ()
7ecc3eb9
DS
785{
786 char *l;
787 ssize_t len;
788
b544c348 789 return get_next_line (&l, &len);
7ecc3eb9
DS
790}
791
792/* Read an arbitrary line number LINE_NUM from the file cached in C.
1adae327
BE
793 If the line was read successfully, *LINE points to the beginning
794 of the line in the file cache and *LINE_LEN is the length of the
795 line. *LINE is not nul-terminated, but may contain zero bytes.
796 *LINE is only valid until the next call of read_line_num.
7ecc3eb9
DS
797 This function returns bool if a line was read. */
798
b544c348
DM
799bool
800file_cache_slot::read_line_num (size_t line_num,
801 char ** line, ssize_t *line_len)
7ecc3eb9
DS
802{
803 gcc_assert (line_num > 0);
804
b544c348 805 if (line_num <= m_line_num)
9789a912 806 {
b544c348 807 /* We've been asked to read lines that are before m_line_num.
7ecc3eb9
DS
808 So lets use our line record (if it's not empty) to try to
809 avoid re-reading the file from the beginning again. */
7f4d640c 810
b544c348 811 if (m_line_record.is_empty ())
9fec0042 812 {
b544c348
DM
813 m_line_start_idx = 0;
814 m_line_num = 0;
7ecc3eb9
DS
815 }
816 else
817 {
b544c348
DM
818 file_cache_slot::line_info *i = NULL;
819 if (m_total_lines <= line_record_size)
7ecc3eb9
DS
820 {
821 /* In languages where the input file is not totally
b544c348 822 preprocessed up front, the m_total_lines hint
7ecc3eb9
DS
823 can be smaller than the number of lines of the
824 file. In that case, only the first
b544c348 825 m_total_lines have been recorded.
7ecc3eb9 826
b544c348 827 Otherwise, the first m_total_lines we've read have
7ecc3eb9 828 their start/end recorded here. */
b544c348
DM
829 i = (line_num <= m_total_lines)
830 ? &m_line_record[line_num - 1]
831 : &m_line_record[m_total_lines - 1];
7ecc3eb9
DS
832 gcc_assert (i->line_num <= line_num);
833 }
834 else
835 {
836 /* So the file had more lines than our line record
837 size. Thus the number of lines we've recorded has
b544c348 838 been scaled down to line_record_size. Let's
7ecc3eb9
DS
839 pick the start/end of the recorded line that is
840 closest to line_num. */
b544c348
DM
841 size_t n = (line_num <= m_total_lines)
842 ? line_num * line_record_size / m_total_lines
843 : m_line_record.length () - 1;
844 if (n < m_line_record.length ())
7ecc3eb9 845 {
b544c348 846 i = &m_line_record[n];
7ecc3eb9
DS
847 gcc_assert (i->line_num <= line_num);
848 }
849 }
850
851 if (i && i->line_num == line_num)
852 {
1adae327 853 /* We have the start/end of the line. */
b544c348 854 *line = m_data + i->start_pos;
1adae327 855 *line_len = i->end_pos - i->start_pos;
7ecc3eb9
DS
856 return true;
857 }
858
859 if (i)
860 {
b544c348
DM
861 m_line_start_idx = i->start_pos;
862 m_line_num = i->line_num - 1;
7ecc3eb9
DS
863 }
864 else
865 {
b544c348
DM
866 m_line_start_idx = 0;
867 m_line_num = 0;
7ecc3eb9 868 }
9fec0042 869 }
9fec0042 870 }
7ecc3eb9 871
b544c348 872 /* Let's walk from line m_line_num up to line_num - 1, without
7ecc3eb9 873 copying any line. */
b544c348
DM
874 while (m_line_num < line_num - 1)
875 if (!goto_next_line ())
7ecc3eb9
DS
876 return false;
877
878 /* The line we want is the next one. Let's read and copy it back to
879 the caller. */
b544c348 880 return get_next_line (line, line_len);
9fec0042
MLI
881}
882
1adae327
BE
883/* Return the physical source line that corresponds to FILE_PATH/LINE.
884 The line is not nul-terminated. The returned pointer is only
885 valid until the next call of location_get_source_line.
886 Note that the line can contain several null characters,
7761dfbe
DM
887 so the returned value's length has the actual length of the line.
888 If the function fails, a NULL char_span is returned. */
9fec0042 889
7761dfbe
DM
890char_span
891location_get_source_line (const char *file_path, int line)
9fec0042 892{
ac2a97db 893 char *buffer = NULL;
1adae327 894 ssize_t len;
7ecc3eb9 895
31bdd08a 896 if (line == 0)
7761dfbe 897 return char_span (NULL, 0);
367c8286 898
b544c348
DM
899 if (file_path == NULL)
900 return char_span (NULL, 0);
901
902 diagnostic_file_cache_init ();
903
904 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
367c8286 905 if (c == NULL)
7761dfbe 906 return char_span (NULL, 0);
367c8286 907
b544c348 908 bool read = c->read_line_num (line, &buffer, &len);
7761dfbe
DM
909 if (!read)
910 return char_span (NULL, 0);
9fec0042 911
7761dfbe 912 return char_span (buffer, len);
9fec0042
MLI
913}
914
c65236d6
DM
915/* Determine if FILE_PATH missing a trailing newline on its final line.
916 Only valid to call once all of the file has been loaded, by
917 requesting a line number beyond the end of the file. */
918
919bool
920location_missing_trailing_newline (const char *file_path)
921{
b544c348
DM
922 diagnostic_file_cache_init ();
923
924 file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
c65236d6
DM
925 if (c == NULL)
926 return false;
927
b544c348 928 return c->missing_trailing_newline_p ();
c65236d6
DM
929}
930
c468587a
DS
931/* Test if the location originates from the spelling location of a
932 builtin-tokens. That is, return TRUE if LOC is a (possibly
933 virtual) location of a built-in token that appears in the expansion
934 list of a macro. Please note that this function also works on
935 tokens that result from built-in tokens. For instance, the
936 function would return true if passed a token "4" that is the result
937 of the expansion of the built-in __LINE__ macro. */
938bool
620e594b 939is_location_from_builtin_token (location_t loc)
c468587a 940{
0e50b624 941 const line_map_ordinary *map = NULL;
c468587a
DS
942 loc = linemap_resolve_location (line_table, loc,
943 LRK_SPELLING_LOCATION, &map);
944 return loc == BUILTINS_LOCATION;
945}
946
7eb918cc
DS
947/* Expand the source location LOC into a human readable location. If
948 LOC is virtual, it resolves to the expansion point of the involved
949 macro. If LOC resolves to a builtin location, the file name of the
950 readable location is set to the string "<built-in>". */
951
952expanded_location
620e594b 953expand_location (location_t loc)
7eb918cc 954{
c471c6ed
DM
955 return expand_location_1 (loc, /*expansion_point_p=*/true,
956 LOCATION_ASPECT_CARET);
7eb918cc
DS
957}
958
959/* Expand the source location LOC into a human readable location. If
960 LOC is virtual, it resolves to the expansion location of the
961 relevant macro. If LOC resolves to a builtin location, the file
962 name of the readable location is set to the string
963 "<built-in>". */
964
965expanded_location
620e594b 966expand_location_to_spelling_point (location_t loc,
0d48e877 967 enum location_aspect aspect)
7eb918cc 968{
0d48e877 969 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
7eb918cc
DS
970}
971
8a645150 972/* The rich_location class within libcpp requires a way to expand
620e594b 973 location_t instances, and relies on the client code
8a645150
DM
974 providing a symbol named
975 linemap_client_expand_location_to_spelling_point
976 to do this.
977
978 This is the implementation for libcommon.a (all host binaries),
c471c6ed 979 which simply calls into expand_location_1. */
8a645150
DM
980
981expanded_location
620e594b 982linemap_client_expand_location_to_spelling_point (location_t loc,
c471c6ed 983 enum location_aspect aspect)
8a645150 984{
c471c6ed 985 return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
8a645150
DM
986}
987
988
e1f0c178
MLI
989/* If LOCATION is in a system header and if it is a virtual location for
990 a token coming from the expansion of a macro, unwind it to the
991 location of the expansion point of the macro. Otherwise, just return
70dc395a
DS
992 LOCATION.
993
994 This is used for instance when we want to emit diagnostics about a
e1f0c178
MLI
995 token that may be located in a macro that is itself defined in a
996 system header, for example, for the NULL macro. In such a case, if
997 LOCATION were passed directly to diagnostic functions such as
998 warning_at, the diagnostic would be suppressed (unless
999 -Wsystem-headers). */
70dc395a 1000
620e594b
DM
1001location_t
1002expansion_point_location_if_in_system_header (location_t location)
70dc395a
DS
1003{
1004 if (in_system_header_at (location))
1005 location = linemap_resolve_location (line_table, location,
1006 LRK_MACRO_EXPANSION_POINT,
1007 NULL);
1008 return location;
1009}
7eb918cc 1010
79ce98bc
MP
1011/* If LOCATION is a virtual location for a token coming from the expansion
1012 of a macro, unwind to the location of the expansion point of the macro. */
1013
620e594b
DM
1014location_t
1015expansion_point_location (location_t location)
79ce98bc
MP
1016{
1017 return linemap_resolve_location (line_table, location,
1018 LRK_MACRO_EXPANSION_POINT, NULL);
1019}
1020
a01fc549
DM
1021/* Construct a location with caret at CARET, ranging from START to
1022 finish e.g.
1023
1024 11111111112
1025 12345678901234567890
1026 522
1027 523 return foo + bar;
1028 ~~~~^~~~~
1029 524
1030
1031 The location's caret is at the "+", line 523 column 15, but starts
1032 earlier, at the "f" of "foo" at column 11. The finish is at the "r"
1033 of "bar" at column 19. */
1034
1035location_t
1036make_location (location_t caret, location_t start, location_t finish)
1037{
1038 location_t pure_loc = get_pure_location (caret);
1039 source_range src_range;
9144eabb
DM
1040 src_range.m_start = get_start (start);
1041 src_range.m_finish = get_finish (finish);
a01fc549
DM
1042 location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
1043 pure_loc,
1044 src_range,
1045 NULL);
1046 return combined_loc;
1047}
1048
a32c8316
MP
1049/* Same as above, but taking a source range rather than two locations. */
1050
1051location_t
1052make_location (location_t caret, source_range src_range)
1053{
1054 location_t pure_loc = get_pure_location (caret);
1055 return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
1056}
1057
ee925640
LH
1058/* An expanded_location stores the column in byte units. This function
1059 converts that column to display units. That requires reading the associated
1060 source line in order to calculate the display width. If that cannot be done
1061 for any reason, then returns the byte column as a fallback. */
1062int
004bb936 1063location_compute_display_column (expanded_location exploc, int tabstop)
ee925640
LH
1064{
1065 if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1066 return exploc.column;
1067 char_span line = location_get_source_line (exploc.file, exploc.line);
1068 /* If line is NULL, this function returns exploc.column which is the
1069 desired fallback. */
1070 return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
004bb936 1071 exploc.column, tabstop);
ee925640
LH
1072}
1073
64a1a422
TT
1074/* Dump statistics to stderr about the memory usage of the line_table
1075 set of line maps. This also displays some statistics about macro
1076 expansion. */
1077
1078void
1079dump_line_table_statistics (void)
1080{
1081 struct linemap_stats s;
d17687f6 1082 long total_used_map_size,
64a1a422
TT
1083 macro_maps_size,
1084 total_allocated_map_size;
1085
1086 memset (&s, 0, sizeof (s));
1087
1088 linemap_get_statistics (line_table, &s);
1089
1090 macro_maps_size = s.macro_maps_used_size
1091 + s.macro_maps_locations_size;
1092
1093 total_allocated_map_size = s.ordinary_maps_allocated_size
1094 + s.macro_maps_allocated_size
1095 + s.macro_maps_locations_size;
1096
1097 total_used_map_size = s.ordinary_maps_used_size
1098 + s.macro_maps_used_size
1099 + s.macro_maps_locations_size;
1100
d17687f6 1101 fprintf (stderr, "Number of expanded macros: %5ld\n",
64a1a422
TT
1102 s.num_expanded_macros);
1103 if (s.num_expanded_macros != 0)
d17687f6 1104 fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
64a1a422
TT
1105 s.num_macro_tokens / s.num_expanded_macros);
1106 fprintf (stderr,
1107 "\nLine Table allocations during the "
40ce7fa6 1108 "compilation process\n");
a0b48080 1109 fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
40ce7fa6 1110 SIZE_AMOUNT (s.num_ordinary_maps_used));
a0b48080 1111 fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
40ce7fa6 1112 SIZE_AMOUNT (s.ordinary_maps_used_size));
a0b48080 1113 fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
40ce7fa6 1114 SIZE_AMOUNT (s.num_ordinary_maps_allocated));
a0b48080 1115 fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
40ce7fa6 1116 SIZE_AMOUNT (s.ordinary_maps_allocated_size));
a0b48080 1117 fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
40ce7fa6 1118 SIZE_AMOUNT (s.num_macro_maps_used));
a0b48080 1119 fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
40ce7fa6 1120 SIZE_AMOUNT (s.macro_maps_used_size));
a0b48080 1121 fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
40ce7fa6 1122 SIZE_AMOUNT (s.macro_maps_locations_size));
a0b48080 1123 fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
40ce7fa6 1124 SIZE_AMOUNT (macro_maps_size));
a0b48080 1125 fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
40ce7fa6 1126 SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
a0b48080 1127 fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
40ce7fa6 1128 SIZE_AMOUNT (total_allocated_map_size));
a0b48080 1129 fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
40ce7fa6 1130 SIZE_AMOUNT (total_used_map_size));
a0b48080 1131 fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
40ce7fa6 1132 SIZE_AMOUNT (s.adhoc_table_size));
a0b48080 1133 fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
40ce7fa6 1134 SIZE_AMOUNT (s.adhoc_table_entries_used));
a0b48080 1135 fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
40ce7fa6 1136 SIZE_AMOUNT (line_table->num_optimized_ranges));
a0b48080 1137 fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
40ce7fa6 1138 SIZE_AMOUNT (line_table->num_unoptimized_ranges));
ee015909 1139
64a1a422
TT
1140 fprintf (stderr, "\n");
1141}
ba4ad400
DM
1142
1143/* Get location one beyond the final location in ordinary map IDX. */
1144
620e594b 1145static location_t
99b1c316 1146get_end_location (class line_maps *set, unsigned int idx)
ba4ad400
DM
1147{
1148 if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1149 return set->highest_location;
1150
1151 struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1152 return MAP_START_LOCATION (next_map);
1153}
1154
1155/* Helper function for write_digit_row. */
1156
1157static void
1158write_digit (FILE *stream, int digit)
1159{
1160 fputc ('0' + (digit % 10), stream);
1161}
1162
1163/* Helper function for dump_location_info.
1164 Write a row of numbers to STREAM, numbering a source line,
1165 giving the units, tens, hundreds etc of the column number. */
1166
1167static void
1168write_digit_row (FILE *stream, int indent,
ebedc9a3 1169 const line_map_ordinary *map,
620e594b 1170 location_t loc, int max_col, int divisor)
ba4ad400
DM
1171{
1172 fprintf (stream, "%*c", indent, ' ');
1173 fprintf (stream, "|");
1174 for (int column = 1; column < max_col; column++)
1175 {
620e594b 1176 location_t column_loc = loc + (column << map->m_range_bits);
ba4ad400
DM
1177 write_digit (stream, column_loc / divisor);
1178 }
1179 fprintf (stream, "\n");
1180}
1181
1182/* Write a half-closed (START) / half-open (END) interval of
620e594b 1183 location_t to STREAM. */
ba4ad400
DM
1184
1185static void
1186dump_location_range (FILE *stream,
620e594b 1187 location_t start, location_t end)
ba4ad400
DM
1188{
1189 fprintf (stream,
620e594b 1190 " location_t interval: %u <= loc < %u\n",
ba4ad400
DM
1191 start, end);
1192}
1193
1194/* Write a labelled description of a half-closed (START) / half-open (END)
620e594b 1195 interval of location_t to STREAM. */
ba4ad400
DM
1196
1197static void
1198dump_labelled_location_range (FILE *stream,
1199 const char *name,
620e594b 1200 location_t start, location_t end)
ba4ad400
DM
1201{
1202 fprintf (stream, "%s\n", name);
1203 dump_location_range (stream, start, end);
1204 fprintf (stream, "\n");
1205}
1206
1207/* Write a visualization of the locations in the line_table to STREAM. */
1208
1209void
1210dump_location_info (FILE *stream)
1211{
1212 /* Visualize the reserved locations. */
1213 dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1214 0, RESERVED_LOCATION_COUNT);
1215
1216 /* Visualize the ordinary line_map instances, rendering the sources. */
1217 for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1218 {
620e594b 1219 location_t end_location = get_end_location (line_table, idx);
ba4ad400
DM
1220 /* half-closed: doesn't include this one. */
1221
0e50b624
DM
1222 const line_map_ordinary *map
1223 = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
ba4ad400
DM
1224 fprintf (stream, "ORDINARY MAP: %i\n", idx);
1225 dump_location_range (stream,
1226 MAP_START_LOCATION (map), end_location);
1227 fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1228 fprintf (stream, " starting at line: %i\n",
1229 ORDINARY_MAP_STARTING_LINE_NUMBER (map));
ebedc9a3
DM
1230 fprintf (stream, " column and range bits: %i\n",
1231 map->m_column_and_range_bits);
ba4ad400 1232 fprintf (stream, " column bits: %i\n",
ebedc9a3
DM
1233 map->m_column_and_range_bits - map->m_range_bits);
1234 fprintf (stream, " range bits: %i\n",
1235 map->m_range_bits);
bc65bad2
MG
1236 const char * reason;
1237 switch (map->reason) {
1238 case LC_ENTER:
1239 reason = "LC_ENTER";
1240 break;
1241 case LC_LEAVE:
1242 reason = "LC_LEAVE";
1243 break;
1244 case LC_RENAME:
1245 reason = "LC_RENAME";
1246 break;
1247 case LC_RENAME_VERBATIM:
1248 reason = "LC_RENAME_VERBATIM";
1249 break;
1250 case LC_ENTER_MACRO:
1251 reason = "LC_RENAME_MACRO";
1252 break;
1253 default:
1254 reason = "Unknown";
1255 }
1256 fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1257
1258 const line_map_ordinary *includer_map
1259 = linemap_included_from_linemap (line_table, map);
1260 fprintf (stream, " included from location: %d",
1261 linemap_included_from (map));
1262 if (includer_map) {
1263 fprintf (stream, " (in ordinary map %d)",
1264 int (includer_map - line_table->info_ordinary.maps));
1265 }
1266 fprintf (stream, "\n");
ba4ad400
DM
1267
1268 /* Render the span of source lines that this "map" covers. */
620e594b 1269 for (location_t loc = MAP_START_LOCATION (map);
ba4ad400 1270 loc < end_location;
ebedc9a3 1271 loc += (1 << map->m_range_bits) )
ba4ad400 1272 {
ebedc9a3
DM
1273 gcc_assert (pure_location_p (line_table, loc) );
1274
ba4ad400
DM
1275 expanded_location exploc
1276 = linemap_expand_location (line_table, map, loc);
1277
01512446 1278 if (exploc.column == 0)
ba4ad400
DM
1279 {
1280 /* Beginning of a new source line: draw the line. */
1281
7761dfbe
DM
1282 char_span line_text = location_get_source_line (exploc.file,
1283 exploc.line);
ba4ad400
DM
1284 if (!line_text)
1285 break;
1286 fprintf (stream,
1287 "%s:%3i|loc:%5i|%.*s\n",
1288 exploc.file, exploc.line,
1289 loc,
7761dfbe 1290 (int)line_text.length (), line_text.get_buffer ());
ba4ad400
DM
1291
1292 /* "loc" is at column 0, which means "the whole line".
1293 Render the locations *within* the line, by underlining
620e594b 1294 it, showing the location_t numeric values
ba4ad400 1295 at each column. */
7761dfbe
DM
1296 size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1297 if (max_col > line_text.length ())
1298 max_col = line_text.length () + 1;
ba4ad400 1299
bc65bad2
MG
1300 int len_lnum = num_digits (exploc.line);
1301 if (len_lnum < 3)
1302 len_lnum = 3;
1303 int len_loc = num_digits (loc);
1304 if (len_loc < 5)
1305 len_loc = 5;
1306
1307 int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
ba4ad400
DM
1308
1309 /* Thousands. */
1310 if (end_location > 999)
ebedc9a3 1311 write_digit_row (stream, indent, map, loc, max_col, 1000);
ba4ad400
DM
1312
1313 /* Hundreds. */
1314 if (end_location > 99)
ebedc9a3 1315 write_digit_row (stream, indent, map, loc, max_col, 100);
ba4ad400
DM
1316
1317 /* Tens. */
ebedc9a3 1318 write_digit_row (stream, indent, map, loc, max_col, 10);
ba4ad400
DM
1319
1320 /* Units. */
ebedc9a3 1321 write_digit_row (stream, indent, map, loc, max_col, 1);
ba4ad400
DM
1322 }
1323 }
1324 fprintf (stream, "\n");
1325 }
1326
1327 /* Visualize unallocated values. */
1328 dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1329 line_table->highest_location,
1330 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1331
1332 /* Visualize the macro line_map instances, rendering the sources. */
1333 for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1334 {
620e594b 1335 /* Each macro map that is allocated owns location_t values
ba4ad400
DM
1336 that are *lower* that the one before them.
1337 Hence it's meaningful to view them either in order of ascending
1338 source locations, or in order of ascending macro map index. */
620e594b
DM
1339 const bool ascending_location_ts = true;
1340 unsigned int idx = (ascending_location_ts
ba4ad400
DM
1341 ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1342 : i);
0e50b624 1343 const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
ba4ad400
DM
1344 fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1345 idx,
1346 linemap_map_get_macro_name (map),
1347 MACRO_MAP_NUM_MACRO_TOKENS (map));
1348 dump_location_range (stream,
1349 map->start_location,
1350 (map->start_location
1351 + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1352 inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1353 "expansion point is location %i",
1354 MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1355 fprintf (stream, " map->start_location: %u\n",
1356 map->start_location);
1357
1358 fprintf (stream, " macro_locations:\n");
1359 for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1360 {
620e594b
DM
1361 location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1362 location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
ba4ad400
DM
1363
1364 /* linemap_add_macro_token encodes token numbers in an expansion
1365 by putting them after MAP_START_LOCATION. */
1366
1367 /* I'm typically seeing 4 uninitialized entries at the end of
1368 0xafafafaf.
1369 This appears to be due to macro.c:replace_args
1370 adding 2 extra args for padding tokens; presumably there may
1371 be a leading and/or trailing padding token injected,
1372 each for 2 more location slots.
620e594b 1373 This would explain there being up to 4 location_ts slots
ba4ad400
DM
1374 that may be uninitialized. */
1375
1376 fprintf (stream, " %u: %u, %u\n",
1377 i,
1378 x,
1379 y);
1380 if (x == y)
1381 {
1382 if (x < MAP_START_LOCATION (map))
a9c697b8
MS
1383 inform (x, "token %u has %<x-location == y-location == %u%>",
1384 i, x);
ba4ad400
DM
1385 else
1386 fprintf (stream,
1387 "x-location == y-location == %u encodes token # %u\n",
1388 x, x - MAP_START_LOCATION (map));
1389 }
1390 else
1391 {
a9c697b8
MS
1392 inform (x, "token %u has %<x-location == %u%>", i, x);
1393 inform (x, "token %u has %<y-location == %u%>", i, y);
ba4ad400
DM
1394 }
1395 }
1396 fprintf (stream, "\n");
1397 }
1398
620e594b 1399 /* It appears that MAX_LOCATION_T itself is never assigned to a
ba4ad400
DM
1400 macro map, presumably due to an off-by-one error somewhere
1401 between the logic in linemap_enter_macro and
1402 LINEMAPS_MACRO_LOWEST_LOCATION. */
620e594b
DM
1403 dump_labelled_location_range (stream, "MAX_LOCATION_T",
1404 MAX_LOCATION_T,
1405 MAX_LOCATION_T + 1);
ba4ad400
DM
1406
1407 /* Visualize ad-hoc values. */
1408 dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
620e594b 1409 MAX_LOCATION_T + 1, UINT_MAX);
ba4ad400 1410}
d9b950dd 1411
88fa5555
DM
1412/* string_concat's constructor. */
1413
1414string_concat::string_concat (int num, location_t *locs)
1415 : m_num (num)
1416{
1417 m_locs = ggc_vec_alloc <location_t> (num);
1418 for (int i = 0; i < num; i++)
1419 m_locs[i] = locs[i];
1420}
1421
1422/* string_concat_db's constructor. */
1423
1424string_concat_db::string_concat_db ()
1425{
1426 m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1427}
1428
1429/* Record that a string concatenation occurred, covering NUM
1430 string literal tokens. LOCS is an array of size NUM, containing the
1431 locations of the tokens. A copy of LOCS is taken. */
1432
1433void
1434string_concat_db::record_string_concatenation (int num, location_t *locs)
1435{
1436 gcc_assert (num > 1);
1437 gcc_assert (locs);
1438
1439 location_t key_loc = get_key_loc (locs[0]);
1440
1441 string_concat *concat
1442 = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1443 m_table->put (key_loc, concat);
1444}
1445
700d4cb0 1446/* Determine if LOC was the location of the initial token of a
88fa5555
DM
1447 concatenation of string literal tokens.
1448 If so, *OUT_NUM is written to with the number of tokens, and
1449 *OUT_LOCS with the location of an array of locations of the
1450 tokens, and return true. *OUT_LOCS is a borrowed pointer to
1451 storage owned by the string_concat_db.
1452 Otherwise, return false. */
1453
1454bool
1455string_concat_db::get_string_concatenation (location_t loc,
1456 int *out_num,
1457 location_t **out_locs)
1458{
1459 gcc_assert (out_num);
1460 gcc_assert (out_locs);
1461
1462 location_t key_loc = get_key_loc (loc);
1463
1464 string_concat **concat = m_table->get (key_loc);
1465 if (!concat)
1466 return false;
1467
1468 *out_num = (*concat)->m_num;
1469 *out_locs =(*concat)->m_locs;
1470 return true;
1471}
1472
1473/* Internal function. Canonicalize LOC into a form suitable for
1474 use as a key within the database, stripping away macro expansion,
1475 ad-hoc information, and range information, using the location of
1476 the start of LOC within an ordinary linemap. */
1477
1478location_t
1479string_concat_db::get_key_loc (location_t loc)
1480{
1481 loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1482 NULL);
1483
1484 loc = get_range_from_loc (line_table, loc).m_start;
1485
1486 return loc;
1487}
1488
1489/* Helper class for use within get_substring_ranges_for_loc.
1490 An vec of cpp_string with responsibility for releasing all of the
1491 str->text for each str in the vector. */
1492
1493class auto_cpp_string_vec : public auto_vec <cpp_string>
1494{
1495 public:
1496 auto_cpp_string_vec (int alloc)
1497 : auto_vec <cpp_string> (alloc) {}
1498
1499 ~auto_cpp_string_vec ()
1500 {
1501 /* Clean up the copies within this vec. */
1502 int i;
1503 cpp_string *str;
1504 FOR_EACH_VEC_ELT (*this, i, str)
1505 free (const_cast <unsigned char *> (str->text));
1506 }
1507};
1508
1509/* Attempt to populate RANGES with source location information on the
1510 individual characters within the string literal found at STRLOC.
1511 If CONCATS is non-NULL, then any string literals that the token at
1512 STRLOC was concatenated with are also added to RANGES.
1513
1514 Return NULL if successful, or an error message if any errors occurred (in
1515 which case RANGES may be only partially populated and should not
1516 be used).
1517
1518 This is implemented by re-parsing the relevant source line(s). */
1519
1520static const char *
1521get_substring_ranges_for_loc (cpp_reader *pfile,
1522 string_concat_db *concats,
1523 location_t strloc,
1524 enum cpp_ttype type,
1525 cpp_substring_ranges &ranges)
1526{
1527 gcc_assert (pfile);
1528
1529 if (strloc == UNKNOWN_LOCATION)
1530 return "unknown location";
1531
67b5d0b2
DM
1532 /* Reparsing the strings requires accurate location information.
1533 If -ftrack-macro-expansion has been overridden from its default
1534 of 2, then we might have a location of a macro expansion point,
1535 rather than the location of the literal itself.
1536 Avoid this by requiring that we have full macro expansion tracking
1537 for substring locations to be available. */
1538 if (cpp_get_options (pfile)->track_macro_expansion != 2)
1539 return "track_macro_expansion != 2";
1540
94f597df
DM
1541 /* If #line or # 44 "file"-style directives are present, then there's
1542 no guarantee that the line numbers we have can be used to locate
1543 the strings. For example, we might have a .i file with # directives
1544 pointing back to lines within a .c file, but the .c file might
1545 have been edited since the .i file was created.
1546 In such a case, the safest course is to disable on-demand substring
1547 locations. */
1548 if (line_table->seen_line_directive)
1549 return "seen line directive";
1550
88fa5555
DM
1551 /* If string concatenation has occurred at STRLOC, get the locations
1552 of all of the literal tokens making up the compound string.
1553 Otherwise, just use STRLOC. */
1554 int num_locs = 1;
1555 location_t *strlocs = &strloc;
1556 if (concats)
1557 concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1558
1559 auto_cpp_string_vec strs (num_locs);
1560 auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1561 for (int i = 0; i < num_locs; i++)
1562 {
1563 /* Get range of strloc. We will use it to locate the start and finish
1564 of the literal token within the line. */
1565 source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1566
1567 if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
0d48e877
DM
1568 {
1569 /* If the string token was within a macro expansion, then we can
1570 cope with it for the simple case where we have a single token.
1571 Otherwise, bail out. */
1572 if (src_range.m_start != src_range.m_finish)
1573 return "macro expansion";
1574 }
1575 else
1576 {
1577 if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1578 /* If so, we can't reliably determine where the token started within
1579 its line. */
1580 return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1581
1582 if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1583 /* If so, we can't reliably determine where the token finished
1584 within its line. */
1585 return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1586 }
88fa5555
DM
1587
1588 expanded_location start
0d48e877
DM
1589 = expand_location_to_spelling_point (src_range.m_start,
1590 LOCATION_ASPECT_START);
88fa5555 1591 expanded_location finish
0d48e877
DM
1592 = expand_location_to_spelling_point (src_range.m_finish,
1593 LOCATION_ASPECT_FINISH);
88fa5555
DM
1594 if (start.file != finish.file)
1595 return "range endpoints are in different files";
1596 if (start.line != finish.line)
1597 return "range endpoints are on different lines";
1598 if (start.column > finish.column)
1599 return "range endpoints are reversed";
1600
7761dfbe
DM
1601 char_span line = location_get_source_line (start.file, start.line);
1602 if (!line)
88fa5555
DM
1603 return "unable to read source line";
1604
1605 /* Determine the location of the literal (including quotes
1606 and leading prefix chars, such as the 'u' in a u""
1607 token). */
7761dfbe 1608 size_t literal_length = finish.column - start.column + 1;
88fa5555 1609
7cfa044d 1610 /* Ensure that we don't crash if we got the wrong location. */
31dd5cd6
MP
1611 if (start.column < 1)
1612 return "zero start column";
7761dfbe 1613 if (line.length () < (start.column - 1 + literal_length))
7cfa044d
DM
1614 return "line is not wide enough";
1615
7761dfbe
DM
1616 char_span literal = line.subspan (start.column - 1, literal_length);
1617
88fa5555
DM
1618 cpp_string from;
1619 from.len = literal_length;
1620 /* Make a copy of the literal, to avoid having to rely on
1621 the lifetime of the copy of the line within the cache.
1622 This will be released by the auto_cpp_string_vec dtor. */
7761dfbe 1623 from.text = (unsigned char *)literal.xstrdup ();
88fa5555
DM
1624 strs.safe_push (from);
1625
1626 /* For very long lines, a new linemap could have started
1627 halfway through the token.
1628 Ensure that the loc_reader uses the linemap of the
1629 *end* of the token for its start location. */
05d57d65
DM
1630 const line_map_ordinary *start_ord_map;
1631 linemap_resolve_location (line_table, src_range.m_start,
1632 LRK_SPELLING_LOCATION, &start_ord_map);
88fa5555
DM
1633 const line_map_ordinary *final_ord_map;
1634 linemap_resolve_location (line_table, src_range.m_finish,
05d57d65 1635 LRK_SPELLING_LOCATION, &final_ord_map);
3d0a5393
DM
1636 if (start_ord_map == NULL || final_ord_map == NULL)
1637 return "failed to get ordinary maps";
05d57d65
DM
1638 /* Bulletproofing. We ought to only have different ordinary maps
1639 for start vs finish due to line-length jumps. */
1640 if (start_ord_map != final_ord_map
1641 && start_ord_map->to_file != final_ord_map->to_file)
ef33afeb
DM
1642 return "start and finish are spelled in different ordinary maps";
1643 /* The file from linemap_resolve_location ought to match that from
1644 expand_location_to_spelling_point. */
1645 if (start_ord_map->to_file != start.file)
1646 return "mismatching file after resolving linemap";
1647
88fa5555
DM
1648 location_t start_loc
1649 = linemap_position_for_line_and_column (line_table, final_ord_map,
1650 start.line, start.column);
1651
1652 cpp_string_location_reader loc_reader (start_loc, line_table);
1653 loc_readers.safe_push (loc_reader);
1654 }
1655
1656 /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1657 const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1658 loc_readers.address (),
1659 num_locs, &ranges, type);
1660 if (err)
1661 return err;
1662
1663 /* Success: "ranges" should now contain information on the string. */
1664 return NULL;
1665}
1666
65e736c0
DM
1667/* Attempt to populate *OUT_LOC with source location information on the
1668 given characters within the string literal found at STRLOC.
1669 CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1670 character set.
1671
1672 For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1673 and string literal "012345\n789"
1674 *OUT_LOC is written to with:
1675 "012345\n789"
1676 ~^~~~~
1677
88fa5555
DM
1678 If CONCATS is non-NULL, then any string literals that the token at
1679 STRLOC was concatenated with are also considered.
1680
1681 This is implemented by re-parsing the relevant source line(s).
1682
1683 Return NULL if successful, or an error message if any errors occurred.
1684 Error messages are intended for GCC developers (to help debugging) rather
1685 than for end-users. */
1686
1687const char *
620e594b
DM
1688get_location_within_string (cpp_reader *pfile,
1689 string_concat_db *concats,
1690 location_t strloc,
1691 enum cpp_ttype type,
1692 int caret_idx, int start_idx, int end_idx,
1693 location_t *out_loc)
65e736c0
DM
1694{
1695 gcc_checking_assert (caret_idx >= 0);
88fa5555
DM
1696 gcc_checking_assert (start_idx >= 0);
1697 gcc_checking_assert (end_idx >= 0);
65e736c0 1698 gcc_assert (out_loc);
88fa5555
DM
1699
1700 cpp_substring_ranges ranges;
1701 const char *err
1702 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1703 if (err)
1704 return err;
1705
65e736c0
DM
1706 if (caret_idx >= ranges.get_num_ranges ())
1707 return "caret_idx out of range";
88fa5555
DM
1708 if (start_idx >= ranges.get_num_ranges ())
1709 return "start_idx out of range";
1710 if (end_idx >= ranges.get_num_ranges ())
1711 return "end_idx out of range";
1712
65e736c0
DM
1713 *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1714 ranges.get_range (start_idx).m_start,
1715 ranges.get_range (end_idx).m_finish);
1716 return NULL;
1717}
1718
0e06d2b3
DM
1719#if CHECKING_P
1720
1721namespace selftest {
1722
1723/* Selftests of location handling. */
1724
65e736c0
DM
1725/* Attempt to populate *OUT_RANGE with source location information on the
1726 given character within the string literal found at STRLOC.
1727 CHAR_IDX refers to an offset within the execution character set.
1728 If CONCATS is non-NULL, then any string literals that the token at
1729 STRLOC was concatenated with are also considered.
1730
1731 This is implemented by re-parsing the relevant source line(s).
1732
1733 Return NULL if successful, or an error message if any errors occurred.
1734 Error messages are intended for GCC developers (to help debugging) rather
1735 than for end-users. */
1736
1737static const char *
1738get_source_range_for_char (cpp_reader *pfile,
1739 string_concat_db *concats,
1740 location_t strloc,
1741 enum cpp_ttype type,
1742 int char_idx,
1743 source_range *out_range)
1744{
1745 gcc_checking_assert (char_idx >= 0);
1746 gcc_assert (out_range);
1747
1748 cpp_substring_ranges ranges;
1749 const char *err
1750 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1751 if (err)
1752 return err;
1753
1754 if (char_idx >= ranges.get_num_ranges ())
1755 return "char_idx out of range";
1756
1757 *out_range = ranges.get_range (char_idx);
88fa5555
DM
1758 return NULL;
1759}
1760
65e736c0 1761/* As get_source_range_for_char, but write to *OUT the number
88fa5555
DM
1762 of ranges that are available. */
1763
0e06d2b3 1764static const char *
88fa5555
DM
1765get_num_source_ranges_for_substring (cpp_reader *pfile,
1766 string_concat_db *concats,
1767 location_t strloc,
1768 enum cpp_ttype type,
1769 int *out)
1770{
1771 gcc_assert (out);
1772
1773 cpp_substring_ranges ranges;
1774 const char *err
1775 = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1776
1777 if (err)
1778 return err;
1779
1780 *out = ranges.get_num_ranges ();
1781 return NULL;
1782}
1783
d9b950dd
DM
1784/* Selftests of location handling. */
1785
082284da
DM
1786/* Verify that compare() on linenum_type handles comparisons over the full
1787 range of the type. */
1788
1789static void
1790test_linenum_comparisons ()
1791{
1792 linenum_type min_line (0);
1793 linenum_type max_line (0xffffffff);
1794 ASSERT_EQ (0, compare (min_line, min_line));
1795 ASSERT_EQ (0, compare (max_line, max_line));
1796
1797 ASSERT_GT (compare (max_line, min_line), 0);
1798 ASSERT_LT (compare (min_line, max_line), 0);
1799}
1800
741d3be5
DM
1801/* Helper function for verifying location data: when location_t
1802 values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1803 as having column 0. */
1804
1805static bool
1806should_have_column_data_p (location_t loc)
1807{
1808 if (IS_ADHOC_LOC (loc))
1809 loc = get_location_from_adhoc_loc (line_table, loc);
1810 if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1811 return false;
1812 return true;
1813}
1814
1815/* Selftest for should_have_column_data_p. */
1816
1817static void
1818test_should_have_column_data_p ()
1819{
1820 ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1821 ASSERT_TRUE
1822 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1823 ASSERT_FALSE
1824 (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1825}
1826
d9b950dd
DM
1827/* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1828 on LOC. */
1829
1830static void
1831assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1832 location_t loc)
1833{
1834 ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1835 ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
741d3be5
DM
1836 /* If location_t values are sufficiently high, then column numbers
1837 will be unavailable and LOCATION_COLUMN (loc) will be 0.
1838 When close to the threshold, column numbers *may* be present: if
1839 the final linemap before the threshold contains a line that straddles
1840 the threshold, locations in that line have column information. */
1841 if (should_have_column_data_p (loc))
1842 ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1843}
1844
f87e22c5
DM
1845/* Various selftests involve constructing a line table and one or more
1846 line maps within it.
741d3be5
DM
1847
1848 For maximum test coverage we want to run these tests with a variety
1849 of situations:
1850 - line_table->default_range_bits: some frontends use a non-zero value
1851 and others use zero
1852 - the fallback modes within line-map.c: there are various threshold
620e594b 1853 values for location_t beyond line-map.c changes
741d3be5
DM
1854 behavior (disabling of the range-packing optimization, disabling
1855 of column-tracking). We can exercise these by starting the line_table
1856 at interesting values at or near these thresholds.
1857
1858 The following struct describes a particular case within our test
1859 matrix. */
1860
6c1dae73 1861class line_table_case
741d3be5 1862{
6c1dae73 1863public:
741d3be5
DM
1864 line_table_case (int default_range_bits, int base_location)
1865 : m_default_range_bits (default_range_bits),
1866 m_base_location (base_location)
1867 {}
1868
1869 int m_default_range_bits;
1870 int m_base_location;
1871};
1872
f87e22c5
DM
1873/* Constructor. Store the old value of line_table, and create a new
1874 one, using sane defaults. */
741d3be5 1875
f87e22c5 1876line_table_test::line_table_test ()
741d3be5 1877{
f87e22c5
DM
1878 gcc_assert (saved_line_table == NULL);
1879 saved_line_table = line_table;
1880 line_table = ggc_alloc<line_maps> ();
1881 linemap_init (line_table, BUILTINS_LOCATION);
1882 gcc_assert (saved_line_table->reallocator);
1883 line_table->reallocator = saved_line_table->reallocator;
1884 gcc_assert (saved_line_table->round_alloc_size);
1885 line_table->round_alloc_size = saved_line_table->round_alloc_size;
1886 line_table->default_range_bits = 0;
1887}
741d3be5
DM
1888
1889/* Constructor. Store the old value of line_table, and create a new
1890 one, using the sitation described in CASE_. */
1891
f87e22c5 1892line_table_test::line_table_test (const line_table_case &case_)
741d3be5 1893{
f87e22c5
DM
1894 gcc_assert (saved_line_table == NULL);
1895 saved_line_table = line_table;
741d3be5
DM
1896 line_table = ggc_alloc<line_maps> ();
1897 linemap_init (line_table, BUILTINS_LOCATION);
f87e22c5
DM
1898 gcc_assert (saved_line_table->reallocator);
1899 line_table->reallocator = saved_line_table->reallocator;
1900 gcc_assert (saved_line_table->round_alloc_size);
1901 line_table->round_alloc_size = saved_line_table->round_alloc_size;
741d3be5
DM
1902 line_table->default_range_bits = case_.m_default_range_bits;
1903 if (case_.m_base_location)
1904 {
1905 line_table->highest_location = case_.m_base_location;
1906 line_table->highest_line = case_.m_base_location;
1907 }
1908}
1909
1910/* Destructor. Restore the old value of line_table. */
1911
f87e22c5 1912line_table_test::~line_table_test ()
741d3be5 1913{
f87e22c5
DM
1914 gcc_assert (saved_line_table != NULL);
1915 line_table = saved_line_table;
1916 saved_line_table = NULL;
d9b950dd
DM
1917}
1918
1919/* Verify basic operation of ordinary linemaps. */
1920
1921static void
741d3be5 1922test_accessing_ordinary_linemaps (const line_table_case &case_)
d9b950dd 1923{
f87e22c5 1924 line_table_test ltt (case_);
741d3be5 1925
d9b950dd
DM
1926 /* Build a simple linemap describing some locations. */
1927 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1928
1929 linemap_line_start (line_table, 1, 100);
1930 location_t loc_a = linemap_position_for_column (line_table, 1);
1931 location_t loc_b = linemap_position_for_column (line_table, 23);
1932
1933 linemap_line_start (line_table, 2, 100);
1934 location_t loc_c = linemap_position_for_column (line_table, 1);
1935 location_t loc_d = linemap_position_for_column (line_table, 17);
1936
1937 /* Example of a very long line. */
1938 linemap_line_start (line_table, 3, 2000);
1939 location_t loc_e = linemap_position_for_column (line_table, 700);
1940
5ccf1d8d
DM
1941 /* Transitioning back to a short line. */
1942 linemap_line_start (line_table, 4, 0);
1943 location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1944
1945 if (should_have_column_data_p (loc_back_to_short))
1946 {
1947 /* Verify that we switched to short lines in the linemap. */
1948 line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1949 ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1950 }
1951
b9f4757f
DM
1952 /* Example of a line that will eventually be seen to be longer
1953 than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1954 below that. */
1955 linemap_line_start (line_table, 5, 2000);
1956
1957 location_t loc_start_of_very_long_line
1958 = linemap_position_for_column (line_table, 2000);
1959 location_t loc_too_wide
1960 = linemap_position_for_column (line_table, 4097);
1961 location_t loc_too_wide_2
1962 = linemap_position_for_column (line_table, 4098);
1963
1964 /* ...and back to a sane line length. */
1965 linemap_line_start (line_table, 6, 100);
1966 location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1967
d9b950dd
DM
1968 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1969
1970 /* Multiple files. */
1971 linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1972 linemap_line_start (line_table, 1, 200);
1973 location_t loc_f = linemap_position_for_column (line_table, 150);
1974 linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1975
1976 /* Verify that we can recover the location info. */
1977 assert_loceq ("foo.c", 1, 1, loc_a);
1978 assert_loceq ("foo.c", 1, 23, loc_b);
1979 assert_loceq ("foo.c", 2, 1, loc_c);
1980 assert_loceq ("foo.c", 2, 17, loc_d);
1981 assert_loceq ("foo.c", 3, 700, loc_e);
5ccf1d8d 1982 assert_loceq ("foo.c", 4, 100, loc_back_to_short);
b9f4757f
DM
1983
1984 /* In the very wide line, the initial location should be fully tracked. */
1985 assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1986 /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1987 be disabled. */
1988 assert_loceq ("foo.c", 5, 0, loc_too_wide);
1989 assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1990 /*...and column-tracking should be re-enabled for subsequent lines. */
1991 assert_loceq ("foo.c", 6, 10, loc_sane_again);
1992
d9b950dd
DM
1993 assert_loceq ("bar.c", 1, 150, loc_f);
1994
1995 ASSERT_FALSE (is_location_from_builtin_token (loc_a));
a01fc549
DM
1996 ASSERT_TRUE (pure_location_p (line_table, loc_a));
1997
1998 /* Verify using make_location to build a range, and extracting data
1999 back from it. */
2000 location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2001 ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2002 ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2003 source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2004 ASSERT_EQ (loc_b, src_range.m_start);
2005 ASSERT_EQ (loc_d, src_range.m_finish);
d9b950dd
DM
2006}
2007
2008/* Verify various properties of UNKNOWN_LOCATION. */
2009
2010static void
2011test_unknown_location ()
2012{
2013 ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2014 ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2015 ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2016}
2017
2018/* Verify various properties of BUILTINS_LOCATION. */
2019
2020static void
2021test_builtins ()
2022{
10d2fc23 2023 assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
d9b950dd
DM
2024 ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2025}
2026
9144eabb 2027/* Regression test for make_location.
cfa435e1
DM
2028 Ensure that we use pure locations for the start/finish of the range,
2029 rather than storing a packed or ad-hoc range as the start/finish. */
9144eabb
DM
2030
2031static void
2032test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2033{
2034 /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2035 with C++ frontend.
2036 ....................0000000001111111111222.
2037 ....................1234567890123456789012. */
2038 const char *content = " r += !aaa == bbb;\n";
2039 temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2040 line_table_test ltt (case_);
2041 linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2042
2043 const location_t c11 = linemap_position_for_column (line_table, 11);
2044 const location_t c12 = linemap_position_for_column (line_table, 12);
2045 const location_t c13 = linemap_position_for_column (line_table, 13);
2046 const location_t c14 = linemap_position_for_column (line_table, 14);
2047 const location_t c21 = linemap_position_for_column (line_table, 21);
2048
2049 if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2050 return;
2051
2052 /* Use column 13 for the caret location, arbitrarily, to verify that we
2053 handle start != caret. */
2054 const location_t aaa = make_location (c13, c12, c14);
2055 ASSERT_EQ (c13, get_pure_location (aaa));
2056 ASSERT_EQ (c12, get_start (aaa));
2057 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2058 ASSERT_EQ (c14, get_finish (aaa));
2059 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2060
2061 /* Make a location using a location with a range as the start-point. */
2062 const location_t not_aaa = make_location (c11, aaa, c14);
2063 ASSERT_EQ (c11, get_pure_location (not_aaa));
2064 /* It should use the start location of the range, not store the range
2065 itself. */
2066 ASSERT_EQ (c12, get_start (not_aaa));
2067 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2068 ASSERT_EQ (c14, get_finish (not_aaa));
2069 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2070
2071 /* Similarly, make a location with a range as the end-point. */
2072 const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2073 ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2074 ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2075 ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2076 ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2077 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2078 const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2079 /* It should use the finish location of the range, not store the range
2080 itself. */
2081 ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2082 ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2083 ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2084 ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2085 ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2086}
2087
d9b950dd
DM
2088/* Verify reading of input files (e.g. for caret-based diagnostics). */
2089
2090static void
2091test_reading_source_line ()
2092{
85ecd05c 2093 /* Create a tempfile and write some text to it. */
741d3be5
DM
2094 temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2095 "01234567890123456789\n"
2096 "This is the test text\n"
1adae327 2097 "This is the 3rd line");
85ecd05c
DM
2098
2099 /* Read back a specific line from the tempfile. */
7761dfbe
DM
2100 char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2101 ASSERT_TRUE (source_line);
2102 ASSERT_TRUE (source_line.get_buffer () != NULL);
2103 ASSERT_EQ (20, source_line.length ());
1adae327 2104 ASSERT_TRUE (!strncmp ("This is the 3rd line",
7761dfbe 2105 source_line.get_buffer (), source_line.length ()));
1adae327 2106
7761dfbe
DM
2107 source_line = location_get_source_line (tmp.get_filename (), 2);
2108 ASSERT_TRUE (source_line);
2109 ASSERT_TRUE (source_line.get_buffer () != NULL);
2110 ASSERT_EQ (21, source_line.length ());
1adae327 2111 ASSERT_TRUE (!strncmp ("This is the test text",
7761dfbe 2112 source_line.get_buffer (), source_line.length ()));
85ecd05c 2113
7761dfbe
DM
2114 source_line = location_get_source_line (tmp.get_filename (), 4);
2115 ASSERT_FALSE (source_line);
2116 ASSERT_TRUE (source_line.get_buffer () == NULL);
d9b950dd
DM
2117}
2118
741d3be5
DM
2119/* Tests of lexing. */
2120
2121/* Verify that token TOK from PARSER has cpp_token_as_text
2122 equal to EXPECTED_TEXT. */
2123
2124#define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
2125 SELFTEST_BEGIN_STMT \
2126 unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
2127 ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
2128 SELFTEST_END_STMT
2129
2130/* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2131 and ranges from EXP_START_COL to EXP_FINISH_COL.
2132 Use LOC as the effective location of the selftest. */
2133
2134static void
2135assert_token_loc_eq (const location &loc,
2136 const cpp_token *tok,
2137 const char *exp_filename, int exp_linenum,
2138 int exp_start_col, int exp_finish_col)
2139{
2140 location_t tok_loc = tok->src_loc;
2141 ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2142 ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2143
2144 /* If location_t values are sufficiently high, then column numbers
2145 will be unavailable. */
2146 if (!should_have_column_data_p (tok_loc))
2147 return;
2148
2149 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2150 source_range tok_range = get_range_from_loc (line_table, tok_loc);
2151 ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2152 ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2153}
2154
2155/* Use assert_token_loc_eq to verify the TOK->src_loc, using
2156 SELFTEST_LOCATION as the effective location of the selftest. */
2157
2158#define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2159 EXP_START_COL, EXP_FINISH_COL) \
2160 assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2161 (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2162
2163/* Test of lexing a file using libcpp, verifying tokens and their
2164 location information. */
2165
2166static void
2167test_lexer (const line_table_case &case_)
2168{
2169 /* Create a tempfile and write some text to it. */
2170 const char *content =
2171 /*00000000011111111112222222222333333.3333444444444.455555555556
2172 12345678901234567890123456789012345.6789012345678.901234567890. */
2173 ("test_name /* c-style comment */\n"
2174 " \"test literal\"\n"
2175 " // test c++-style comment\n"
2176 " 42\n");
2177 temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2178
f87e22c5 2179 line_table_test ltt (case_);
741d3be5
DM
2180
2181 cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2182
2183 const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2184 ASSERT_NE (fname, NULL);
2185
2186 /* Verify that we get the expected tokens back, with the correct
2187 location information. */
2188
2189 location_t loc;
2190 const cpp_token *tok;
2191 tok = cpp_get_token_with_location (parser, &loc);
2192 ASSERT_NE (tok, NULL);
2193 ASSERT_EQ (tok->type, CPP_NAME);
2194 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2195 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2196
2197 tok = cpp_get_token_with_location (parser, &loc);
2198 ASSERT_NE (tok, NULL);
2199 ASSERT_EQ (tok->type, CPP_STRING);
2200 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2201 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2202
2203 tok = cpp_get_token_with_location (parser, &loc);
2204 ASSERT_NE (tok, NULL);
2205 ASSERT_EQ (tok->type, CPP_NUMBER);
2206 ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2207 ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2208
2209 tok = cpp_get_token_with_location (parser, &loc);
2210 ASSERT_NE (tok, NULL);
2211 ASSERT_EQ (tok->type, CPP_EOF);
2212
2213 cpp_finish (parser, NULL);
2214 cpp_destroy (parser);
2215}
2216
88fa5555
DM
2217/* Forward decls. */
2218
99b1c316 2219class lexer_test;
88fa5555
DM
2220class lexer_test_options;
2221
2222/* A class for specifying options of a lexer_test.
2223 The "apply" vfunc is called during the lexer_test constructor. */
2224
2225class lexer_test_options
2226{
2227 public:
2228 virtual void apply (lexer_test &) = 0;
2229};
2230
f5ea989d
DM
2231/* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2232 in its dtor.
2233
2234 This is needed by struct lexer_test to ensure that the cleanup of the
2235 cpp_reader happens *after* the cleanup of the temp_source_file. */
2236
2237class cpp_reader_ptr
2238{
2239 public:
2240 cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2241
2242 ~cpp_reader_ptr ()
2243 {
2244 cpp_finish (m_ptr, NULL);
2245 cpp_destroy (m_ptr);
2246 }
2247
2248 operator cpp_reader * () const { return m_ptr; }
2249
2250 private:
2251 cpp_reader *m_ptr;
2252};
2253
88fa5555
DM
2254/* A struct for writing lexer tests. */
2255
6c1dae73 2256class lexer_test
88fa5555 2257{
6c1dae73 2258public:
88fa5555
DM
2259 lexer_test (const line_table_case &case_, const char *content,
2260 lexer_test_options *options);
2261 ~lexer_test ();
2262
2263 const cpp_token *get_token ();
2264
f5ea989d
DM
2265 /* The ordering of these fields matters.
2266 The line_table_test must be first, since the cpp_reader_ptr
2267 uses it.
2268 The cpp_reader must be cleaned up *after* the temp_source_file
2269 since the filenames in input.c's input cache are owned by the
2270 cpp_reader; in particular, when ~temp_source_file evicts the
2271 filename the filenames must still be alive. */
f87e22c5 2272 line_table_test m_ltt;
f5ea989d
DM
2273 cpp_reader_ptr m_parser;
2274 temp_source_file m_tempfile;
88fa5555 2275 string_concat_db m_concats;
a3998c2f 2276 bool m_implicitly_expect_EOF;
88fa5555
DM
2277};
2278
2279/* Use an EBCDIC encoding for the execution charset, specifically
2280 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2281
2282 This exercises iconv integration within libcpp.
2283 Not every build of iconv supports the given charset,
2284 so we need to flag this error and handle it gracefully. */
2285
2286class ebcdic_execution_charset : public lexer_test_options
2287{
2288 public:
2289 ebcdic_execution_charset () : m_num_iconv_errors (0)
2290 {
2291 gcc_assert (s_singleton == NULL);
2292 s_singleton = this;
2293 }
2294 ~ebcdic_execution_charset ()
2295 {
2296 gcc_assert (s_singleton == this);
2297 s_singleton = NULL;
2298 }
2299
2300 void apply (lexer_test &test) FINAL OVERRIDE
2301 {
2302 cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2303 cpp_opts->narrow_charset = "IBM1047";
2304
2305 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
c24300ba 2306 callbacks->diagnostic = on_diagnostic;
88fa5555
DM
2307 }
2308
c24300ba
DM
2309 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2310 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2311 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2312 rich_location *richloc ATTRIBUTE_UNUSED,
2313 const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
88fa5555
DM
2314 ATTRIBUTE_FPTR_PRINTF(5,0)
2315 {
2316 gcc_assert (s_singleton);
a7085816
JJ
2317 /* Avoid exgettext from picking this up, it is translated in libcpp. */
2318 const char *msg = "conversion from %s to %s not supported by iconv";
2319#ifdef ENABLE_NLS
2320 msg = dgettext ("cpplib", msg);
2321#endif
88fa5555
DM
2322 /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2323 when the local iconv build doesn't support the conversion. */
a7085816 2324 if (strcmp (msgid, msg) == 0)
88fa5555
DM
2325 {
2326 s_singleton->m_num_iconv_errors++;
2327 return true;
2328 }
2329
2330 /* Otherwise, we have an unexpected error. */
2331 abort ();
2332 }
2333
2334 bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2335
2336 private:
2337 static ebcdic_execution_charset *s_singleton;
2338 int m_num_iconv_errors;
2339};
2340
2341ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2342
c24300ba 2343/* A lexer_test_options subclass that records a list of diagnostic
a3998c2f
DM
2344 messages emitted by the lexer. */
2345
c24300ba 2346class lexer_diagnostic_sink : public lexer_test_options
a3998c2f
DM
2347{
2348 public:
c24300ba 2349 lexer_diagnostic_sink ()
a3998c2f
DM
2350 {
2351 gcc_assert (s_singleton == NULL);
2352 s_singleton = this;
2353 }
c24300ba 2354 ~lexer_diagnostic_sink ()
a3998c2f
DM
2355 {
2356 gcc_assert (s_singleton == this);
2357 s_singleton = NULL;
2358
2359 int i;
2360 char *str;
c24300ba 2361 FOR_EACH_VEC_ELT (m_diagnostics, i, str)
a3998c2f
DM
2362 free (str);
2363 }
2364
2365 void apply (lexer_test &test) FINAL OVERRIDE
2366 {
2367 cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
c24300ba 2368 callbacks->diagnostic = on_diagnostic;
a3998c2f
DM
2369 }
2370
c24300ba
DM
2371 static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2372 enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2373 enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2374 rich_location *richloc ATTRIBUTE_UNUSED,
2375 const char *msgid, va_list *ap)
a3998c2f
DM
2376 ATTRIBUTE_FPTR_PRINTF(5,0)
2377 {
2378 char *msg = xvasprintf (msgid, *ap);
c24300ba 2379 s_singleton->m_diagnostics.safe_push (msg);
a3998c2f
DM
2380 return true;
2381 }
2382
c24300ba 2383 auto_vec<char *> m_diagnostics;
a3998c2f
DM
2384
2385 private:
c24300ba 2386 static lexer_diagnostic_sink *s_singleton;
a3998c2f
DM
2387};
2388
c24300ba 2389lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
a3998c2f 2390
88fa5555
DM
2391/* Constructor. Override line_table with a new instance based on CASE_,
2392 and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2393 start parsing the tempfile. */
2394
2395lexer_test::lexer_test (const line_table_case &case_, const char *content,
f5ea989d
DM
2396 lexer_test_options *options)
2397: m_ltt (case_),
2398 m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
88fa5555
DM
2399 /* Create a tempfile and write the text to it. */
2400 m_tempfile (SELFTEST_LOCATION, ".c", content),
a3998c2f
DM
2401 m_concats (),
2402 m_implicitly_expect_EOF (true)
88fa5555
DM
2403{
2404 if (options)
2405 options->apply (*this);
2406
2407 cpp_init_iconv (m_parser);
2408
2409 /* Parse the file. */
2410 const char *fname = cpp_read_main_file (m_parser,
2411 m_tempfile.get_filename ());
2412 ASSERT_NE (fname, NULL);
2413}
2414
a3998c2f 2415/* Destructor. By default, verify that the next token in m_parser is EOF. */
88fa5555
DM
2416
2417lexer_test::~lexer_test ()
2418{
2419 location_t loc;
2420 const cpp_token *tok;
2421
a3998c2f
DM
2422 if (m_implicitly_expect_EOF)
2423 {
2424 tok = cpp_get_token_with_location (m_parser, &loc);
2425 ASSERT_NE (tok, NULL);
2426 ASSERT_EQ (tok->type, CPP_EOF);
2427 }
88fa5555
DM
2428}
2429
2430/* Get the next token from m_parser. */
2431
2432const cpp_token *
2433lexer_test::get_token ()
2434{
2435 location_t loc;
2436 const cpp_token *tok;
2437
2438 tok = cpp_get_token_with_location (m_parser, &loc);
2439 ASSERT_NE (tok, NULL);
2440 return tok;
2441}
2442
2443/* Verify that locations within string literals are correctly handled. */
2444
2445/* Verify get_source_range_for_substring for token(s) at STRLOC,
2446 using the string concatenation database for TEST.
2447
2448 Assert that the character at index IDX is on EXPECTED_LINE,
2449 and that it begins at column EXPECTED_START_COL and ends at
2450 EXPECTED_FINISH_COL (unless the locations are beyond
2451 LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2452 columns). */
2453
2454static void
2455assert_char_at_range (const location &loc,
2456 lexer_test& test,
2457 location_t strloc, enum cpp_ttype type, int idx,
2458 int expected_line, int expected_start_col,
2459 int expected_finish_col)
2460{
2461 cpp_reader *pfile = test.m_parser;
2462 string_concat_db *concats = &test.m_concats;
2463
a954833d 2464 source_range actual_range = source_range();
88fa5555 2465 const char *err
65e736c0
DM
2466 = get_source_range_for_char (pfile, concats, strloc, type, idx,
2467 &actual_range);
88fa5555
DM
2468 if (should_have_column_data_p (strloc))
2469 ASSERT_EQ_AT (loc, NULL, err);
2470 else
2471 {
2472 ASSERT_STREQ_AT (loc,
2473 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2474 err);
2475 return;
2476 }
2477
2478 int actual_start_line = LOCATION_LINE (actual_range.m_start);
2479 ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2480 int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2481 ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2482
2483 if (should_have_column_data_p (actual_range.m_start))
2484 {
2485 int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2486 ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2487 }
2488 if (should_have_column_data_p (actual_range.m_finish))
2489 {
2490 int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2491 ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2492 }
2493}
2494
2495/* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2496 the effective location of any errors. */
2497
2498#define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2499 EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2500 assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2501 (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2502 (EXPECTED_FINISH_COL))
2503
2504/* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2505 using the string concatenation database for TEST.
2506
2507 Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2508
2509static void
2510assert_num_substring_ranges (const location &loc,
2511 lexer_test& test,
2512 location_t strloc,
2513 enum cpp_ttype type,
2514 int expected_num_ranges)
2515{
2516 cpp_reader *pfile = test.m_parser;
2517 string_concat_db *concats = &test.m_concats;
2518
0e06d2b3 2519 int actual_num_ranges = -1;
88fa5555
DM
2520 const char *err
2521 = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2522 &actual_num_ranges);
2523 if (should_have_column_data_p (strloc))
2524 ASSERT_EQ_AT (loc, NULL, err);
2525 else
2526 {
2527 ASSERT_STREQ_AT (loc,
2528 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2529 err);
2530 return;
2531 }
2532 ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2533}
2534
2535/* Macro for calling assert_num_substring_ranges, supplying
2536 SELFTEST_LOCATION for the effective location of any errors. */
2537
2538#define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2539 EXPECTED_NUM_RANGES) \
2540 assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2541 (TYPE), (EXPECTED_NUM_RANGES))
2542
2543
2544/* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2545 returns an error (using the string concatenation database for TEST). */
2546
2547static void
2548assert_has_no_substring_ranges (const location &loc,
2549 lexer_test& test,
2550 location_t strloc,
2551 enum cpp_ttype type,
2552 const char *expected_err)
2553{
2554 cpp_reader *pfile = test.m_parser;
2555 string_concat_db *concats = &test.m_concats;
2556 cpp_substring_ranges ranges;
2557 const char *actual_err
2558 = get_substring_ranges_for_loc (pfile, concats, strloc,
2559 type, ranges);
2560 if (should_have_column_data_p (strloc))
2561 ASSERT_STREQ_AT (loc, expected_err, actual_err);
2562 else
2563 ASSERT_STREQ_AT (loc,
2564 "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2565 actual_err);
2566}
2567
2568#define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2569 assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2570 (STRLOC), (TYPE), (ERR))
2571
2572/* Lex a simple string literal. Verify the substring location data, before
2573 and after running cpp_interpret_string on it. */
2574
2575static void
2576test_lexer_string_locations_simple (const line_table_case &case_)
2577{
2578 /* Digits 0-9 (with 0 at column 10), the simple way.
2579 ....................000000000.11111111112.2222222223333333333
2580 ....................123456789.01234567890.1234567890123456789
2581 We add a trailing comment to ensure that we correctly locate
2582 the end of the string literal token. */
2583 const char *content = " \"0123456789\" /* not a string */\n";
2584 lexer_test test (case_, content, NULL);
2585
2586 /* Verify that we get the expected token back, with the correct
2587 location information. */
2588 const cpp_token *tok = test.get_token ();
2589 ASSERT_EQ (tok->type, CPP_STRING);
2590 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2591 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2592
2593 /* At this point in lexing, the quote characters are treated as part of
2594 the string (they are stripped off by cpp_interpret_string). */
2595
2596 ASSERT_EQ (tok->val.str.len, 12);
2597
2598 /* Verify that cpp_interpret_string works. */
2599 cpp_string dst_string;
2600 const enum cpp_ttype type = CPP_STRING;
2601 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2602 &dst_string, type);
2603 ASSERT_TRUE (result);
2604 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2605 free (const_cast <unsigned char *> (dst_string.text));
2606
2607 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3
DM
2608 opening quote, but does include the closing quote. */
2609 for (int i = 0; i <= 10; i++)
88fa5555
DM
2610 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2611 10 + i, 10 + i);
2612
bbd6fcf3 2613 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
88fa5555
DM
2614}
2615
2616/* As test_lexer_string_locations_simple, but use an EBCDIC execution
2617 encoding. */
2618
2619static void
2620test_lexer_string_locations_ebcdic (const line_table_case &case_)
2621{
2622 /* EBCDIC support requires iconv. */
2623 if (!HAVE_ICONV)
2624 return;
2625
2626 /* Digits 0-9 (with 0 at column 10), the simple way.
2627 ....................000000000.11111111112.2222222223333333333
2628 ....................123456789.01234567890.1234567890123456789
2629 We add a trailing comment to ensure that we correctly locate
2630 the end of the string literal token. */
2631 const char *content = " \"0123456789\" /* not a string */\n";
2632 ebcdic_execution_charset use_ebcdic;
2633 lexer_test test (case_, content, &use_ebcdic);
2634
2635 /* Verify that we get the expected token back, with the correct
2636 location information. */
2637 const cpp_token *tok = test.get_token ();
2638 ASSERT_EQ (tok->type, CPP_STRING);
2639 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2640 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2641
2642 /* At this point in lexing, the quote characters are treated as part of
2643 the string (they are stripped off by cpp_interpret_string). */
2644
2645 ASSERT_EQ (tok->val.str.len, 12);
2646
2647 /* The remainder of the test requires an iconv implementation that
2648 can convert from UTF-8 to the EBCDIC encoding requested above. */
2649 if (use_ebcdic.iconv_errors_occurred_p ())
2650 return;
2651
2652 /* Verify that cpp_interpret_string works. */
2653 cpp_string dst_string;
2654 const enum cpp_ttype type = CPP_STRING;
2655 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2656 &dst_string, type);
2657 ASSERT_TRUE (result);
2658 /* We should now have EBCDIC-encoded text, specifically
2659 IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2660 The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2661 ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2662 (const char *)dst_string.text);
2663 free (const_cast <unsigned char *> (dst_string.text));
2664
2665 /* Verify that we don't attempt to record substring location information
2666 for such cases. */
2667 ASSERT_HAS_NO_SUBSTRING_RANGES
2668 (test, tok->src_loc, type,
2669 "execution character set != source character set");
2670}
2671
2672/* Lex a string literal containing a hex-escaped character.
2673 Verify the substring location data, before and after running
2674 cpp_interpret_string on it. */
2675
2676static void
2677test_lexer_string_locations_hex (const line_table_case &case_)
2678{
2679 /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2680 and with a space in place of digit 6, to terminate the escaped
2681 hex code.
2682 ....................000000000.111111.11112222.
2683 ....................123456789.012345.67890123. */
2684 const char *content = " \"01234\\x35 789\"\n";
2685 lexer_test test (case_, content, NULL);
2686
2687 /* Verify that we get the expected token back, with the correct
2688 location information. */
2689 const cpp_token *tok = test.get_token ();
2690 ASSERT_EQ (tok->type, CPP_STRING);
2691 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2692 ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2693
2694 /* At this point in lexing, the quote characters are treated as part of
2695 the string (they are stripped off by cpp_interpret_string). */
2696 ASSERT_EQ (tok->val.str.len, 15);
2697
2698 /* Verify that cpp_interpret_string works. */
2699 cpp_string dst_string;
2700 const enum cpp_ttype type = CPP_STRING;
2701 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2702 &dst_string, type);
2703 ASSERT_TRUE (result);
2704 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2705 free (const_cast <unsigned char *> (dst_string.text));
2706
2707 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2708 opening quote, but does include the closing quote. */
88fa5555
DM
2709 for (int i = 0; i <= 4; i++)
2710 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2711 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
bbd6fcf3 2712 for (int i = 6; i <= 10; i++)
88fa5555
DM
2713 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2714
bbd6fcf3 2715 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
88fa5555
DM
2716}
2717
2718/* Lex a string literal containing an octal-escaped character.
2719 Verify the substring location data after running cpp_interpret_string
2720 on it. */
2721
2722static void
2723test_lexer_string_locations_oct (const line_table_case &case_)
2724{
2725 /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2726 and with a space in place of digit 6, to terminate the escaped
2727 octal code.
2728 ....................000000000.111111.11112222.2222223333333333444
2729 ....................123456789.012345.67890123.4567890123456789012 */
2730 const char *content = " \"01234\\065 789\" /* not a string */\n";
2731 lexer_test test (case_, content, NULL);
2732
2733 /* Verify that we get the expected token back, with the correct
2734 location information. */
2735 const cpp_token *tok = test.get_token ();
2736 ASSERT_EQ (tok->type, CPP_STRING);
2737 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2738
2739 /* Verify that cpp_interpret_string works. */
2740 cpp_string dst_string;
2741 const enum cpp_ttype type = CPP_STRING;
2742 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2743 &dst_string, type);
2744 ASSERT_TRUE (result);
2745 ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2746 free (const_cast <unsigned char *> (dst_string.text));
2747
2748 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2749 opening quote, but does include the closing quote. */
88fa5555
DM
2750 for (int i = 0; i < 5; i++)
2751 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2752 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
bbd6fcf3 2753 for (int i = 6; i <= 10; i++)
88fa5555
DM
2754 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2755
bbd6fcf3 2756 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
88fa5555
DM
2757}
2758
2759/* Test of string literal containing letter escapes. */
2760
2761static void
2762test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2763{
2764 /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2765 .....................000000000.1.11111.1.1.11222.22222223333333
2766 .....................123456789.0.12345.6.7.89012.34567890123456. */
2767 const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2768 lexer_test test (case_, content, NULL);
2769
2770 /* Verify that we get the expected tokens back. */
2771 const cpp_token *tok = test.get_token ();
2772 ASSERT_EQ (tok->type, CPP_STRING);
2773 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2774
2775 /* Verify ranges of individual characters. */
2776 /* "\t". */
2777 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2778 0, 1, 10, 11);
2779 /* "foo". */
2780 for (int i = 1; i <= 3; i++)
2781 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2782 i, 1, 11 + i, 11 + i);
2783 /* "\\" and "\n". */
2784 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2785 4, 1, 15, 16);
2786 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2787 5, 1, 17, 18);
2788
bbd6fcf3
DM
2789 /* "bar" and closing quote for nul-terminator. */
2790 for (int i = 6; i <= 9; i++)
88fa5555
DM
2791 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2792 i, 1, 13 + i, 13 + i);
2793
bbd6fcf3 2794 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
88fa5555
DM
2795}
2796
2797/* Another test of a string literal containing a letter escape.
2798 Based on string seen in
2799 printf ("%-%\n");
2800 in gcc.dg/format/c90-printf-1.c. */
2801
2802static void
2803test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2804{
2805 /* .....................000000000.1111.11.1111.22222222223.
2806 .....................123456789.0123.45.6789.01234567890. */
2807 const char *content = (" \"%-%\\n\" /* non-str */\n");
2808 lexer_test test (case_, content, NULL);
2809
2810 /* Verify that we get the expected tokens back. */
2811 const cpp_token *tok = test.get_token ();
2812 ASSERT_EQ (tok->type, CPP_STRING);
2813 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2814
2815 /* Verify ranges of individual characters. */
2816 /* "%-%". */
2817 for (int i = 0; i < 3; i++)
2818 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2819 i, 1, 10 + i, 10 + i);
2820 /* "\n". */
2821 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2822 3, 1, 13, 14);
2823
bbd6fcf3
DM
2824 /* Closing quote for nul-terminator. */
2825 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2826 4, 1, 15, 15);
2827
2828 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
88fa5555
DM
2829}
2830
2831/* Lex a string literal containing UCN 4 characters.
2832 Verify the substring location data after running cpp_interpret_string
2833 on it. */
2834
2835static void
2836test_lexer_string_locations_ucn4 (const line_table_case &case_)
2837{
2838 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2839 as UCN 4.
2840 ....................000000000.111111.111122.222222223.33333333344444
2841 ....................123456789.012345.678901.234567890.12345678901234 */
2842 const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2843 lexer_test test (case_, content, NULL);
2844
2845 /* Verify that we get the expected token back, with the correct
2846 location information. */
2847 const cpp_token *tok = test.get_token ();
2848 ASSERT_EQ (tok->type, CPP_STRING);
2849 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2850
2851 /* Verify that cpp_interpret_string works.
2852 The string should be encoded in the execution character
700d4cb0 2853 set. Assuming that is UTF-8, we should have the following:
88fa5555
DM
2854 ----------- ---- ----- ------- ----------------
2855 Byte offset Byte Octal Unicode Source Column(s)
2856 ----------- ---- ----- ------- ----------------
2857 0 0x30 '0' 10
2858 1 0x31 '1' 11
2859 2 0x32 '2' 12
2860 3 0x33 '3' 13
2861 4 0x34 '4' 14
2862 5 0xE2 \342 U+2174 15-20
2863 6 0x85 \205 (cont) 15-20
2864 7 0xB4 \264 (cont) 15-20
2865 8 0xE2 \342 U+2175 21-26
2866 9 0x85 \205 (cont) 21-26
2867 10 0xB5 \265 (cont) 21-26
2868 11 0x37 '7' 27
2869 12 0x38 '8' 28
2870 13 0x39 '9' 29
bbd6fcf3 2871 14 0x00 30 (closing quote)
88fa5555
DM
2872 ----------- ---- ----- ------- ---------------. */
2873
2874 cpp_string dst_string;
2875 const enum cpp_ttype type = CPP_STRING;
2876 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2877 &dst_string, type);
2878 ASSERT_TRUE (result);
2879 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2880 (const char *)dst_string.text);
2881 free (const_cast <unsigned char *> (dst_string.text));
2882
2883 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2884 opening quote, but does include the closing quote.
88fa5555
DM
2885 '01234'. */
2886 for (int i = 0; i <= 4; i++)
2887 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2888 /* U+2174. */
2889 for (int i = 5; i <= 7; i++)
2890 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2891 /* U+2175. */
2892 for (int i = 8; i <= 10; i++)
2893 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
bbd6fcf3
DM
2894 /* '789' and nul terminator */
2895 for (int i = 11; i <= 14; i++)
88fa5555
DM
2896 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2897
bbd6fcf3 2898 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
88fa5555
DM
2899}
2900
2901/* Lex a string literal containing UCN 8 characters.
2902 Verify the substring location data after running cpp_interpret_string
2903 on it. */
2904
2905static void
2906test_lexer_string_locations_ucn8 (const line_table_case &case_)
2907{
2908 /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2909 ....................000000000.111111.1111222222.2222333333333.344444
2910 ....................123456789.012345.6789012345.6789012345678.901234 */
2911 const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2912 lexer_test test (case_, content, NULL);
2913
2914 /* Verify that we get the expected token back, with the correct
2915 location information. */
2916 const cpp_token *tok = test.get_token ();
2917 ASSERT_EQ (tok->type, CPP_STRING);
2918 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2919 "\"01234\\U00002174\\U00002175789\"");
2920
2921 /* Verify that cpp_interpret_string works.
2922 The UTF-8 encoding of the string is identical to that from
2923 the ucn4 testcase above; the only difference is the column
2924 locations. */
2925 cpp_string dst_string;
2926 const enum cpp_ttype type = CPP_STRING;
2927 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2928 &dst_string, type);
2929 ASSERT_TRUE (result);
2930 ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2931 (const char *)dst_string.text);
2932 free (const_cast <unsigned char *> (dst_string.text));
2933
2934 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 2935 opening quote, but does include the closing quote.
88fa5555
DM
2936 '01234'. */
2937 for (int i = 0; i <= 4; i++)
2938 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2939 /* U+2174. */
2940 for (int i = 5; i <= 7; i++)
2941 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2942 /* U+2175. */
2943 for (int i = 8; i <= 10; i++)
2944 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2945 /* '789' at columns 35-37 */
2946 for (int i = 11; i <= 13; i++)
2947 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
bbd6fcf3
DM
2948 /* Closing quote/nul-terminator at column 38. */
2949 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
88fa5555 2950
bbd6fcf3 2951 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
88fa5555
DM
2952}
2953
2954/* Fetch a big-endian 32-bit value and convert to host endianness. */
2955
2956static uint32_t
2957uint32_from_big_endian (const uint32_t *ptr_be_value)
2958{
2959 const unsigned char *buf = (const unsigned char *)ptr_be_value;
2960 return (((uint32_t) buf[0] << 24)
2961 | ((uint32_t) buf[1] << 16)
2962 | ((uint32_t) buf[2] << 8)
2963 | (uint32_t) buf[3]);
2964}
2965
2966/* Lex a wide string literal and verify that attempts to read substring
2967 location data from it fail gracefully. */
2968
2969static void
2970test_lexer_string_locations_wide_string (const line_table_case &case_)
2971{
2972 /* Digits 0-9.
2973 ....................000000000.11111111112.22222222233333
2974 ....................123456789.01234567890.12345678901234 */
2975 const char *content = " L\"0123456789\" /* non-str */\n";
2976 lexer_test test (case_, content, NULL);
2977
2978 /* Verify that we get the expected token back, with the correct
2979 location information. */
2980 const cpp_token *tok = test.get_token ();
2981 ASSERT_EQ (tok->type, CPP_WSTRING);
2982 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2983
2984 /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2985 cpp_string dst_string;
2986 const enum cpp_ttype type = CPP_WSTRING;
2987 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2988 &dst_string, type);
2989 ASSERT_TRUE (result);
2990 /* The cpp_reader defaults to big-endian with
2991 CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2992 now be encoded as UTF-32BE. */
2993 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2994 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2995 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2996 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2997 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2998 free (const_cast <unsigned char *> (dst_string.text));
2999
3000 /* We don't yet support generating substring location information
3001 for L"" strings. */
3002 ASSERT_HAS_NO_SUBSTRING_RANGES
3003 (test, tok->src_loc, type,
3004 "execution character set != source character set");
3005}
3006
3007/* Fetch a big-endian 16-bit value and convert to host endianness. */
3008
3009static uint16_t
3010uint16_from_big_endian (const uint16_t *ptr_be_value)
3011{
3012 const unsigned char *buf = (const unsigned char *)ptr_be_value;
3013 return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3014}
3015
3016/* Lex a u"" string literal and verify that attempts to read substring
3017 location data from it fail gracefully. */
3018
3019static void
3020test_lexer_string_locations_string16 (const line_table_case &case_)
3021{
3022 /* Digits 0-9.
3023 ....................000000000.11111111112.22222222233333
3024 ....................123456789.01234567890.12345678901234 */
3025 const char *content = " u\"0123456789\" /* non-str */\n";
3026 lexer_test test (case_, content, NULL);
3027
3028 /* Verify that we get the expected token back, with the correct
3029 location information. */
3030 const cpp_token *tok = test.get_token ();
3031 ASSERT_EQ (tok->type, CPP_STRING16);
3032 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3033
3034 /* Verify that cpp_interpret_string works, using CPP_STRING16. */
3035 cpp_string dst_string;
3036 const enum cpp_ttype type = CPP_STRING16;
3037 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3038 &dst_string, type);
3039 ASSERT_TRUE (result);
3040
3041 /* The cpp_reader defaults to big-endian, so dst_string should
3042 now be encoded as UTF-16BE. */
3043 const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3044 ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3045 ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3046 ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3047 ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3048 free (const_cast <unsigned char *> (dst_string.text));
3049
3050 /* We don't yet support generating substring location information
3051 for L"" strings. */
3052 ASSERT_HAS_NO_SUBSTRING_RANGES
3053 (test, tok->src_loc, type,
3054 "execution character set != source character set");
3055}
3056
3057/* Lex a U"" string literal and verify that attempts to read substring
3058 location data from it fail gracefully. */
3059
3060static void
3061test_lexer_string_locations_string32 (const line_table_case &case_)
3062{
3063 /* Digits 0-9.
3064 ....................000000000.11111111112.22222222233333
3065 ....................123456789.01234567890.12345678901234 */
3066 const char *content = " U\"0123456789\" /* non-str */\n";
3067 lexer_test test (case_, content, NULL);
3068
3069 /* Verify that we get the expected token back, with the correct
3070 location information. */
3071 const cpp_token *tok = test.get_token ();
3072 ASSERT_EQ (tok->type, CPP_STRING32);
3073 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3074
3075 /* Verify that cpp_interpret_string works, using CPP_STRING32. */
3076 cpp_string dst_string;
3077 const enum cpp_ttype type = CPP_STRING32;
3078 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3079 &dst_string, type);
3080 ASSERT_TRUE (result);
3081
3082 /* The cpp_reader defaults to big-endian, so dst_string should
3083 now be encoded as UTF-32BE. */
3084 const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3085 ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3086 ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3087 ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3088 ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3089 free (const_cast <unsigned char *> (dst_string.text));
3090
3091 /* We don't yet support generating substring location information
3092 for L"" strings. */
3093 ASSERT_HAS_NO_SUBSTRING_RANGES
3094 (test, tok->src_loc, type,
3095 "execution character set != source character set");
3096}
3097
3098/* Lex a u8-string literal.
3099 Verify the substring location data after running cpp_interpret_string
3100 on it. */
3101
3102static void
3103test_lexer_string_locations_u8 (const line_table_case &case_)
3104{
3105 /* Digits 0-9.
3106 ....................000000000.11111111112.22222222233333
3107 ....................123456789.01234567890.12345678901234 */
3108 const char *content = " u8\"0123456789\" /* non-str */\n";
3109 lexer_test test (case_, content, NULL);
3110
3111 /* Verify that we get the expected token back, with the correct
3112 location information. */
3113 const cpp_token *tok = test.get_token ();
3114 ASSERT_EQ (tok->type, CPP_UTF8STRING);
3115 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3116
3117 /* Verify that cpp_interpret_string works. */
3118 cpp_string dst_string;
3119 const enum cpp_ttype type = CPP_STRING;
3120 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3121 &dst_string, type);
3122 ASSERT_TRUE (result);
3123 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3124 free (const_cast <unsigned char *> (dst_string.text));
3125
3126 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3
DM
3127 opening quote, but does include the closing quote. */
3128 for (int i = 0; i <= 10; i++)
88fa5555
DM
3129 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3130}
3131
3132/* Lex a string literal containing UTF-8 source characters.
3133 Verify the substring location data after running cpp_interpret_string
3134 on it. */
3135
3136static void
3137test_lexer_string_locations_utf8_source (const line_table_case &case_)
3138{
3139 /* This string literal is written out to the source file as UTF-8,
3140 and is of the form "before mojibake after", where "mojibake"
3141 is written as the following four unicode code points:
3142 U+6587 CJK UNIFIED IDEOGRAPH-6587
3143 U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3144 U+5316 CJK UNIFIED IDEOGRAPH-5316
3145 U+3051 HIRAGANA LETTER KE.
3146 Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3147 "before" and "after" are 1 byte per unicode character.
3148
3149 The numbering shown are "columns", which are *byte* numbers within
3150 the line, rather than unicode character numbers.
3151
3152 .................... 000000000.1111111.
3153 .................... 123456789.0123456. */
3154 const char *content = (" \"before "
3155 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3156 UTF-8: 0xE6 0x96 0x87
3157 C octal escaped UTF-8: \346\226\207
3158 "column" numbers: 17-19. */
3159 "\346\226\207"
3160
3161 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3162 UTF-8: 0xE5 0xAD 0x97
3163 C octal escaped UTF-8: \345\255\227
3164 "column" numbers: 20-22. */
3165 "\345\255\227"
3166
3167 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3168 UTF-8: 0xE5 0x8C 0x96
3169 C octal escaped UTF-8: \345\214\226
3170 "column" numbers: 23-25. */
3171 "\345\214\226"
3172
3173 /* U+3051 HIRAGANA LETTER KE
3174 UTF-8: 0xE3 0x81 0x91
3175 C octal escaped UTF-8: \343\201\221
3176 "column" numbers: 26-28. */
3177 "\343\201\221"
3178
3179 /* column numbers 29 onwards
3180 2333333.33334444444444
3181 9012345.67890123456789. */
3182 " after\" /* non-str */\n");
3183 lexer_test test (case_, content, NULL);
3184
3185 /* Verify that we get the expected token back, with the correct
3186 location information. */
3187 const cpp_token *tok = test.get_token ();
3188 ASSERT_EQ (tok->type, CPP_STRING);
3189 ASSERT_TOKEN_AS_TEXT_EQ
3190 (test.m_parser, tok,
3191 "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3192
3193 /* Verify that cpp_interpret_string works. */
3194 cpp_string dst_string;
3195 const enum cpp_ttype type = CPP_STRING;
3196 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3197 &dst_string, type);
3198 ASSERT_TRUE (result);
3199 ASSERT_STREQ
3200 ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3201 (const char *)dst_string.text);
3202 free (const_cast <unsigned char *> (dst_string.text));
3203
3204 /* Verify ranges of individual characters. This no longer includes the
bbd6fcf3 3205 opening quote, but does include the closing quote.
88fa5555 3206 Assuming that both source and execution encodings are UTF-8, we have
bbd6fcf3 3207 a run of 25 octets in each, plus the NUL terminator. */
88fa5555
DM
3208 for (int i = 0; i < 25; i++)
3209 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
bbd6fcf3
DM
3210 /* NUL-terminator should use the closing quote at column 35. */
3211 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
88fa5555 3212
bbd6fcf3 3213 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
88fa5555
DM
3214}
3215
3216/* Test of string literal concatenation. */
3217
3218static void
3219test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3220{
3221 /* Digits 0-9.
3222 .....................000000000.111111.11112222222222
3223 .....................123456789.012345.67890123456789. */
3224 const char *content = (" \"01234\" /* non-str */\n"
3225 " \"56789\" /* non-str */\n");
3226 lexer_test test (case_, content, NULL);
3227
3228 location_t input_locs[2];
3229
3230 /* Verify that we get the expected tokens back. */
3231 auto_vec <cpp_string> input_strings;
3232 const cpp_token *tok_a = test.get_token ();
3233 ASSERT_EQ (tok_a->type, CPP_STRING);
3234 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3235 input_strings.safe_push (tok_a->val.str);
3236 input_locs[0] = tok_a->src_loc;
3237
3238 const cpp_token *tok_b = test.get_token ();
3239 ASSERT_EQ (tok_b->type, CPP_STRING);
3240 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3241 input_strings.safe_push (tok_b->val.str);
3242 input_locs[1] = tok_b->src_loc;
3243
3244 /* Verify that cpp_interpret_string works. */
3245 cpp_string dst_string;
3246 const enum cpp_ttype type = CPP_STRING;
3247 bool result = cpp_interpret_string (test.m_parser,
3248 input_strings.address (), 2,
3249 &dst_string, type);
3250 ASSERT_TRUE (result);
3251 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3252 free (const_cast <unsigned char *> (dst_string.text));
3253
3254 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3255 test.m_concats.record_string_concatenation (2, input_locs);
3256
3257 location_t initial_loc = input_locs[0];
3258
bbd6fcf3 3259 /* "01234" on line 1. */
88fa5555
DM
3260 for (int i = 0; i <= 4; i++)
3261 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
bbd6fcf3
DM
3262 /* "56789" in line 2, plus its closing quote for the nul terminator. */
3263 for (int i = 5; i <= 10; i++)
88fa5555
DM
3264 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3265
bbd6fcf3 3266 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
88fa5555
DM
3267}
3268
3269/* Another test of string literal concatenation. */
3270
3271static void
3272test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3273{
3274 /* Digits 0-9.
3275 .....................000000000.111.11111112222222
3276 .....................123456789.012.34567890123456. */
3277 const char *content = (" \"01\" /* non-str */\n"
3278 " \"23\" /* non-str */\n"
3279 " \"45\" /* non-str */\n"
3280 " \"67\" /* non-str */\n"
3281 " \"89\" /* non-str */\n");
3282 lexer_test test (case_, content, NULL);
3283
3284 auto_vec <cpp_string> input_strings;
3285 location_t input_locs[5];
3286
3287 /* Verify that we get the expected tokens back. */
3288 for (int i = 0; i < 5; i++)
3289 {
3290 const cpp_token *tok = test.get_token ();
3291 ASSERT_EQ (tok->type, CPP_STRING);
3292 input_strings.safe_push (tok->val.str);
3293 input_locs[i] = tok->src_loc;
3294 }
3295
3296 /* Verify that cpp_interpret_string works. */
3297 cpp_string dst_string;
3298 const enum cpp_ttype type = CPP_STRING;
3299 bool result = cpp_interpret_string (test.m_parser,
3300 input_strings.address (), 5,
3301 &dst_string, type);
3302 ASSERT_TRUE (result);
3303 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3304 free (const_cast <unsigned char *> (dst_string.text));
3305
3306 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3307 test.m_concats.record_string_concatenation (5, input_locs);
3308
3309 location_t initial_loc = input_locs[0];
3310
3311 /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3312 detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3313 and expect get_source_range_for_substring to fail.
3314 However, for a string concatenation test, we can have a case
3315 where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3316 but subsequent strings can be after it.
3317 Attempting to detect this within assert_char_at_range
3318 would overcomplicate the logic for the common test cases, so
3319 we detect it here. */
3320 if (should_have_column_data_p (input_locs[0])
3321 && !should_have_column_data_p (input_locs[4]))
3322 {
3323 /* Verify that get_source_range_for_substring gracefully rejects
3324 this case. */
3325 source_range actual_range;
3326 const char *err
65e736c0
DM
3327 = get_source_range_for_char (test.m_parser, &test.m_concats,
3328 initial_loc, type, 0, &actual_range);
88fa5555
DM
3329 ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3330 return;
3331 }
3332
3333 for (int i = 0; i < 5; i++)
3334 for (int j = 0; j < 2; j++)
3335 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3336 i + 1, 10 + j, 10 + j);
3337
bbd6fcf3
DM
3338 /* NUL-terminator should use the final closing quote at line 5 column 12. */
3339 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3340
3341 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
88fa5555
DM
3342}
3343
3344/* Another test of string literal concatenation, this time combined with
3345 various kinds of escaped characters. */
3346
3347static void
3348test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3349{
3350 /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3351 digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3352 const char *content
3353 /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3354 .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3355 = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3356 lexer_test test (case_, content, NULL);
3357
3358 auto_vec <cpp_string> input_strings;
3359 location_t input_locs[4];
3360
3361 /* Verify that we get the expected tokens back. */
3362 for (int i = 0; i < 4; i++)
3363 {
3364 const cpp_token *tok = test.get_token ();
3365 ASSERT_EQ (tok->type, CPP_STRING);
3366 input_strings.safe_push (tok->val.str);
3367 input_locs[i] = tok->src_loc;
3368 }
3369
3370 /* Verify that cpp_interpret_string works. */
3371 cpp_string dst_string;
3372 const enum cpp_ttype type = CPP_STRING;
3373 bool result = cpp_interpret_string (test.m_parser,
3374 input_strings.address (), 4,
3375 &dst_string, type);
3376 ASSERT_TRUE (result);
3377 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3378 free (const_cast <unsigned char *> (dst_string.text));
3379
3380 /* Simulate c-lex.c's lex_string in order to record concatenation. */
3381 test.m_concats.record_string_concatenation (4, input_locs);
3382
3383 location_t initial_loc = input_locs[0];
3384
3385 for (int i = 0; i <= 4; i++)
3386 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3387 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3388 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3389 for (int i = 7; i <= 9; i++)
3390 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3391
bbd6fcf3
DM
3392 /* NUL-terminator should use the location of the final closing quote. */
3393 ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3394
3395 ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
88fa5555
DM
3396}
3397
3398/* Test of string literal in a macro. */
3399
3400static void
3401test_lexer_string_locations_macro (const line_table_case &case_)
3402{
3403 /* Digits 0-9.
3404 .....................0000000001111111111.22222222223.
3405 .....................1234567890123456789.01234567890. */
3406 const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3407 " MACRO");
3408 lexer_test test (case_, content, NULL);
3409
3410 /* Verify that we get the expected tokens back. */
3411 const cpp_token *tok = test.get_token ();
3412 ASSERT_EQ (tok->type, CPP_PADDING);
3413
3414 tok = test.get_token ();
3415 ASSERT_EQ (tok->type, CPP_STRING);
3416 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3417
3418 /* Verify ranges of individual characters. We ought to
3419 see columns within the macro definition. */
bbd6fcf3 3420 for (int i = 0; i <= 10; i++)
88fa5555
DM
3421 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3422 i, 1, 20 + i, 20 + i);
3423
bbd6fcf3 3424 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
88fa5555
DM
3425
3426 tok = test.get_token ();
3427 ASSERT_EQ (tok->type, CPP_PADDING);
3428}
3429
3430/* Test of stringification of a macro argument. */
3431
3432static void
3433test_lexer_string_locations_stringified_macro_argument
3434 (const line_table_case &case_)
3435{
3436 /* .....................000000000111111111122222222223.
3437 .....................123456789012345678901234567890. */
3438 const char *content = ("#define MACRO(X) #X /* non-str */\n"
3439 "MACRO(foo)\n");
3440 lexer_test test (case_, content, NULL);
3441
3442 /* Verify that we get the expected token back. */
3443 const cpp_token *tok = test.get_token ();
3444 ASSERT_EQ (tok->type, CPP_PADDING);
3445
3446 tok = test.get_token ();
3447 ASSERT_EQ (tok->type, CPP_STRING);
3448 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3449
3450 /* We don't support getting the location of a stringified macro
3451 argument. Verify that it fails gracefully. */
3452 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3453 "cpp_interpret_string_1 failed");
3454
3455 tok = test.get_token ();
3456 ASSERT_EQ (tok->type, CPP_PADDING);
3457
3458 tok = test.get_token ();
3459 ASSERT_EQ (tok->type, CPP_PADDING);
3460}
3461
3462/* Ensure that we are fail gracefully if something attempts to pass
3463 in a location that isn't a string literal token. Seen on this code:
3464
3465 const char a[] = " %d ";
3466 __builtin_printf (a, 0.5);
3467 ^
3468
3469 when c-format.c erroneously used the indicated one-character
3470 location as the format string location, leading to a read past the
3471 end of a string buffer in cpp_interpret_string_1. */
3472
3473static void
3474test_lexer_string_locations_non_string (const line_table_case &case_)
3475{
3476 /* .....................000000000111111111122222222223.
3477 .....................123456789012345678901234567890. */
3478 const char *content = (" a\n");
3479 lexer_test test (case_, content, NULL);
3480
3481 /* Verify that we get the expected token back. */
3482 const cpp_token *tok = test.get_token ();
3483 ASSERT_EQ (tok->type, CPP_NAME);
3484 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3485
3486 /* At this point, libcpp is attempting to interpret the name as a
3487 string literal, despite it not starting with a quote. We don't detect
3488 that, but we should at least fail gracefully. */
3489 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3490 "cpp_interpret_string_1 failed");
3491}
3492
3493/* Ensure that we can read substring information for a token which
3494 starts in one linemap and ends in another . Adapted from
3495 gcc.dg/cpp/pr69985.c. */
3496
3497static void
3498test_lexer_string_locations_long_line (const line_table_case &case_)
3499{
3500 /* .....................000000.000111111111
3501 .....................123456.789012346789. */
3502 const char *content = ("/* A very long line, so that we start a new line map. */\n"
3503 " \"0123456789012345678901234567890123456789"
3504 "0123456789012345678901234567890123456789"
3505 "0123456789012345678901234567890123456789"
3506 "0123456789\"\n");
3507
3508 lexer_test test (case_, content, NULL);
3509
3510 /* Verify that we get the expected token back. */
3511 const cpp_token *tok = test.get_token ();
3512 ASSERT_EQ (tok->type, CPP_STRING);
3513
3514 if (!should_have_column_data_p (line_table->highest_location))
3515 return;
3516
3517 /* Verify ranges of individual characters. */
bbd6fcf3
DM
3518 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3519 for (int i = 0; i < 131; i++)
88fa5555
DM
3520 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3521 i, 2, 7 + i, 7 + i);
3522}
3523
b8f56412
DM
3524/* Test of locations within a raw string that doesn't contain a newline. */
3525
3526static void
3527test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3528{
3529 /* .....................00.0000000111111111122.
3530 .....................12.3456789012345678901. */
3531 const char *content = ("R\"foo(0123456789)foo\"\n");
3532 lexer_test test (case_, content, NULL);
3533
3534 /* Verify that we get the expected token back. */
3535 const cpp_token *tok = test.get_token ();
3536 ASSERT_EQ (tok->type, CPP_STRING);
3537
3538 /* Verify that cpp_interpret_string works. */
3539 cpp_string dst_string;
3540 const enum cpp_ttype type = CPP_STRING;
3541 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3542 &dst_string, type);
3543 ASSERT_TRUE (result);
3544 ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3545 free (const_cast <unsigned char *> (dst_string.text));
3546
3547 if (!should_have_column_data_p (line_table->highest_location))
3548 return;
3549
3550 /* 0-9, plus the nil terminator. */
3551 ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3552 for (int i = 0; i < 11; i++)
3553 ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3554 i, 1, 7 + i, 7 + i);
3555}
3556
3557/* Test of locations within a raw string that contains a newline. */
3558
3559static void
3560test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3561{
3562 /* .....................00.0000.
3563 .....................12.3456. */
3564 const char *content = ("R\"foo(\n"
3565 /* .....................00000.
3566 .....................12345. */
3567 "hello\n"
3568 "world\n"
3569 /* .....................00000.
3570 .....................12345. */
3571 ")foo\"\n");
3572 lexer_test test (case_, content, NULL);
3573
3574 /* Verify that we get the expected token back. */
3575 const cpp_token *tok = test.get_token ();
3576 ASSERT_EQ (tok->type, CPP_STRING);
3577
3578 /* Verify that cpp_interpret_string works. */
3579 cpp_string dst_string;
3580 const enum cpp_ttype type = CPP_STRING;
3581 bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3582 &dst_string, type);
3583 ASSERT_TRUE (result);
3584 ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3585 free (const_cast <unsigned char *> (dst_string.text));
3586
3587 if (!should_have_column_data_p (line_table->highest_location))
3588 return;
3589
3590 /* Currently we don't support locations within raw strings that
3591 contain newlines. */
3592 ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3593 "range endpoints are on different lines");
3594}
3595
a3998c2f
DM
3596/* Test of parsing an unterminated raw string. */
3597
3598static void
3599test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3600{
3601 const char *content = "R\"ouch()ouCh\" /* etc */";
3602
c24300ba
DM
3603 lexer_diagnostic_sink diagnostics;
3604 lexer_test test (case_, content, &diagnostics);
a3998c2f
DM
3605 test.m_implicitly_expect_EOF = false;
3606
3607 /* Attempt to parse the raw string. */
3608 const cpp_token *tok = test.get_token ();
3609 ASSERT_EQ (tok->type, CPP_EOF);
3610
c24300ba 3611 ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
a3998c2f
DM
3612 /* We expect the message "unterminated raw string"
3613 in the "cpplib" translation domain.
3614 It's not clear that dgettext is available on all supported hosts,
3615 so this assertion is commented-out for now.
3616 ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
c24300ba 3617 diagnostics.m_diagnostics[0]);
a3998c2f
DM
3618 */
3619}
3620
88fa5555
DM
3621/* Test of lexing char constants. */
3622
3623static void
3624test_lexer_char_constants (const line_table_case &case_)
3625{
3626 /* Various char constants.
3627 .....................0000000001111111111.22222222223.
3628 .....................1234567890123456789.01234567890. */
3629 const char *content = (" 'a'\n"
3630 " u'a'\n"
3631 " U'a'\n"
3632 " L'a'\n"
3633 " 'abc'\n");
3634 lexer_test test (case_, content, NULL);
3635
3636 /* Verify that we get the expected tokens back. */
3637 /* 'a'. */
3638 const cpp_token *tok = test.get_token ();
3639 ASSERT_EQ (tok->type, CPP_CHAR);
3640 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3641
3642 unsigned int chars_seen;
3643 int unsignedp;
3644 cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3645 &chars_seen, &unsignedp);
3646 ASSERT_EQ (cc, 'a');
3647 ASSERT_EQ (chars_seen, 1);
3648
3649 /* u'a'. */
3650 tok = test.get_token ();
3651 ASSERT_EQ (tok->type, CPP_CHAR16);
3652 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3653
3654 /* U'a'. */
3655 tok = test.get_token ();
3656 ASSERT_EQ (tok->type, CPP_CHAR32);
3657 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3658
3659 /* L'a'. */
3660 tok = test.get_token ();
3661 ASSERT_EQ (tok->type, CPP_WCHAR);
3662 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3663
3664 /* 'abc' (c-char-sequence). */
3665 tok = test.get_token ();
3666 ASSERT_EQ (tok->type, CPP_CHAR);
3667 ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3668}
741d3be5
DM
3669/* A table of interesting location_t values, giving one axis of our test
3670 matrix. */
3671
3672static const location_t boundary_locations[] = {
3673 /* Zero means "don't override the default values for a new line_table". */
3674 0,
3675
3676 /* An arbitrary non-zero value that isn't close to one of
3677 the boundary values below. */
3678 0x10000,
3679
3680 /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3681 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3682 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3683 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3684 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3685 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3686
3687 /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3688 LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3689 LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3690 LINE_MAP_MAX_LOCATION_WITH_COLS,
3691 LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3692 LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3693};
3694
f87e22c5 3695/* Run TESTCASE multiple times, once for each case in our test matrix. */
d9b950dd
DM
3696
3697void
f87e22c5 3698for_each_line_table_case (void (*testcase) (const line_table_case &))
d9b950dd 3699{
741d3be5
DM
3700 /* As noted above in the description of struct line_table_case,
3701 we want to explore a test matrix of interesting line_table
3702 situations, running various selftests for each case within the
3703 matrix. */
3704
3705 /* Run all tests with:
3706 (a) line_table->default_range_bits == 0, and
3707 (b) line_table->default_range_bits == 5. */
3708 int num_cases_tested = 0;
3709 for (int default_range_bits = 0; default_range_bits <= 5;
3710 default_range_bits += 5)
3711 {
3712 /* ...and use each of the "interesting" location values as
3713 the starting location within line_table. */
3714 const int num_boundary_locations
3715 = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3716 for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3717 {
3718 line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3719
f87e22c5 3720 testcase (c);
741d3be5
DM
3721
3722 num_cases_tested++;
3723 }
3724 }
3725
3726 /* Verify that we fully covered the test matrix. */
3727 ASSERT_EQ (num_cases_tested, 2 * 12);
f87e22c5
DM
3728}
3729
a4553534
DM
3730/* Verify that when presented with a consecutive pair of locations with
3731 a very large line offset, we don't attempt to consolidate them into
3732 a single ordinary linemap where the line offsets within the line map
3733 would lead to overflow (PR lto/88147). */
3734
3735static void
3736test_line_offset_overflow ()
3737{
3738 line_table_test ltt (line_table_case (5, 0));
3739
3740 linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3741 linemap_line_start (line_table, 1, 100);
3742 location_t loc_a = linemap_line_start (line_table, 2578, 255);
3743 assert_loceq ("foo.c", 2578, 0, loc_a);
3744
3745 const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3746 ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3747 ASSERT_EQ (ordmap_a->m_range_bits, 5);
3748
3749 location_t loc_b = linemap_line_start (line_table, 404198, 512);
3750 assert_loceq ("foo.c", 404198, 0, loc_b);
3751
3752 /* We should have started a new linemap, rather than attempting to store
3753 a very large line offset. */
3754 const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3755 ASSERT_NE (ordmap_a, ordmap_b);
3756}
3757
ee925640
LH
3758void test_cpp_utf8 ()
3759{
004bb936 3760 const int def_tabstop = 8;
ee925640
LH
3761 /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
3762 {
004bb936 3763 int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, def_tabstop);
ee925640 3764 ASSERT_EQ (8, w_bad);
004bb936
LH
3765 int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, def_tabstop);
3766 ASSERT_EQ (5, w_ctrl);
ee925640
LH
3767 }
3768
3769 /* Verify that wcwidth of valid UTF-8 is as expected. */
3770 {
004bb936 3771 const int w_pi = cpp_display_width ("\xcf\x80", 2, def_tabstop);
ee925640 3772 ASSERT_EQ (1, w_pi);
004bb936 3773 const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, def_tabstop);
ee925640 3774 ASSERT_EQ (2, w_emoji);
004bb936
LH
3775 const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3776 def_tabstop);
ee925640 3777 ASSERT_EQ (1, w_umlaut_precomposed);
004bb936
LH
3778 const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3779 def_tabstop);
ee925640 3780 ASSERT_EQ (1, w_umlaut_combining);
004bb936 3781 const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, def_tabstop);
ee925640 3782 ASSERT_EQ (2, w_han);
004bb936 3783 const int w_ascii = cpp_display_width ("GCC", 3, def_tabstop);
ee925640
LH
3784 ASSERT_EQ (3, w_ascii);
3785 const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
004bb936
LH
3786 "\x9f! \xe4\xb8\xba y\xcc\x88",
3787 24, def_tabstop);
ee925640
LH
3788 ASSERT_EQ (18, w_mixed);
3789 }
3790
004bb936
LH
3791 /* Verify that display width properly expands tabs. */
3792 {
3793 const char *tstr = "\tabc\td";
3794 ASSERT_EQ (6, cpp_display_width (tstr, 6, 1));
3795 ASSERT_EQ (10, cpp_display_width (tstr, 6, 3));
3796 ASSERT_EQ (17, cpp_display_width (tstr, 6, 8));
3797 ASSERT_EQ (1, cpp_display_column_to_byte_column (tstr, 6, 7, 8));
3798 }
3799
ee925640
LH
3800 /* Verify that cpp_byte_column_to_display_column can go past the end,
3801 and similar edge cases. */
3802 {
3803 const char *str
3804 /* Display columns.
3805 111111112345 */
3806 = "\xcf\x80 abc";
3807 /* 111122223456
3808 Byte columns. */
3809
004bb936
LH
3810 ASSERT_EQ (5, cpp_display_width (str, 6, def_tabstop));
3811 ASSERT_EQ (105,
3812 cpp_byte_column_to_display_column (str, 6, 106, def_tabstop));
3813 ASSERT_EQ (10000,
3814 cpp_byte_column_to_display_column (NULL, 0, 10000, def_tabstop));
3815 ASSERT_EQ (0,
3816 cpp_byte_column_to_display_column (NULL, 10000, 0, def_tabstop));
ee925640
LH
3817 }
3818
3819 /* Verify that cpp_display_column_to_byte_column can go past the end,
3820 and similar edge cases, and check invertibility. */
3821 {
3822 const char *str
3823 /* Display columns.
3824 000000000000000000000000000000000000011
3825 111111112222222234444444455555555678901 */
3826 = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
3827 /* 000000000000000000000000000000000111111
3828 111122223333444456666777788889999012345
3829 Byte columns. */
004bb936
LH
3830 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, def_tabstop));
3831 ASSERT_EQ (15,
3832 cpp_display_column_to_byte_column (str, 15, 11, def_tabstop));
3833 ASSERT_EQ (115,
3834 cpp_display_column_to_byte_column (str, 15, 111, def_tabstop));
3835 ASSERT_EQ (10000,
3836 cpp_display_column_to_byte_column (NULL, 0, 10000, def_tabstop));
3837 ASSERT_EQ (0,
3838 cpp_display_column_to_byte_column (NULL, 10000, 0, def_tabstop));
ee925640
LH
3839
3840 /* Verify that we do not interrupt a UTF-8 sequence. */
004bb936 3841 ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, def_tabstop));
ee925640
LH
3842
3843 for (int byte_col = 1; byte_col <= 15; ++byte_col)
3844 {
004bb936
LH
3845 const int disp_col
3846 = cpp_byte_column_to_display_column (str, 15, byte_col, def_tabstop);
3847 const int byte_col2
3848 = cpp_display_column_to_byte_column (str, 15, disp_col, def_tabstop);
ee925640
LH
3849
3850 /* If we ask for the display column in the middle of a UTF-8
3851 sequence, it will return the length of the partial sequence,
3852 matching the behavior of GCC before display column support.
3853 Otherwise check the round trip was successful. */
3854 if (byte_col < 4)
3855 ASSERT_EQ (byte_col, disp_col);
3856 else if (byte_col >= 6 && byte_col < 9)
3857 ASSERT_EQ (3 + (byte_col - 5), disp_col);
3858 else
3859 ASSERT_EQ (byte_col2, byte_col);
3860 }
3861 }
3862
3863}
3864
f87e22c5
DM
3865/* Run all of the selftests within this file. */
3866
3867void
3868input_c_tests ()
3869{
082284da 3870 test_linenum_comparisons ();
f87e22c5
DM
3871 test_should_have_column_data_p ();
3872 test_unknown_location ();
3873 test_builtins ();
9144eabb 3874 for_each_line_table_case (test_make_location_nonpure_range_endpoints);
f87e22c5
DM
3875
3876 for_each_line_table_case (test_accessing_ordinary_linemaps);
3877 for_each_line_table_case (test_lexer);
3878 for_each_line_table_case (test_lexer_string_locations_simple);
3879 for_each_line_table_case (test_lexer_string_locations_ebcdic);
3880 for_each_line_table_case (test_lexer_string_locations_hex);
3881 for_each_line_table_case (test_lexer_string_locations_oct);
3882 for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3883 for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3884 for_each_line_table_case (test_lexer_string_locations_ucn4);
3885 for_each_line_table_case (test_lexer_string_locations_ucn8);
3886 for_each_line_table_case (test_lexer_string_locations_wide_string);
3887 for_each_line_table_case (test_lexer_string_locations_string16);
3888 for_each_line_table_case (test_lexer_string_locations_string32);
3889 for_each_line_table_case (test_lexer_string_locations_u8);
3890 for_each_line_table_case (test_lexer_string_locations_utf8_source);
3891 for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3892 for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3893 for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3894 for_each_line_table_case (test_lexer_string_locations_macro);
3895 for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3896 for_each_line_table_case (test_lexer_string_locations_non_string);
3897 for_each_line_table_case (test_lexer_string_locations_long_line);
b8f56412
DM
3898 for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3899 for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
a3998c2f 3900 for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
f87e22c5 3901 for_each_line_table_case (test_lexer_char_constants);
741d3be5 3902
d9b950dd 3903 test_reading_source_line ();
a4553534
DM
3904
3905 test_line_offset_overflow ();
ee925640
LH
3906
3907 test_cpp_utf8 ();
d9b950dd
DM
3908}
3909
3910} // namespace selftest
3911
3912#endif /* CHECKING_P */