gcc/input.c

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2019 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic.h"
  25 #include "diagnostic-core.h"
  26 #include "selftest.h"
  27 #include "cpplib.h"
  28
  29 #ifndef HAVE_ICONV
  30 #define HAVE_ICONV 0
  31 #endif
  32
  33 /* This is a cache used by get_next_line to store the content of a
  34    file to be searched for file lines.  */
  35 struct fcache
  36 {
  37   /* These are information used to store a line boundary.  */
  38   struct line_info
  39   {
  40     /* The line number.  It starts from 1.  */
  41     size_t line_num;
  42
  43     /* The position (byte count) of the beginning of the line,
  44        relative to the file data pointer.  This starts at zero.  */
  45     size_t start_pos;
  46
  47     /* The position (byte count) of the last byte of the line.  This
  48        normally points to the '\n' character, or to one byte after the
  49        last byte of the file, if the file doesn't contain a '\n'
  50        character.  */
  51     size_t end_pos;
  52
  53     line_info (size_t l, size_t s, size_t e)
  54       : line_num (l), start_pos (s), end_pos (e)
  55     {}
  56
  57     line_info ()
  58       :line_num (0), start_pos (0), end_pos (0)
  59     {}
  60   };
  61
  62   /* The number of time this file has been accessed.  This is used
  63      to designate which file cache to evict from the cache
  64      array.  */
  65   unsigned use_count;
  66
  67   /* The file_path is the key for identifying a particular file in
  68      the cache.
  69      For libcpp-using code, the underlying buffer for this field is
  70      owned by the corresponding _cpp_file within the cpp_reader.  */
  71   const char *file_path;
  72
  73   FILE *fp;
  74
  75   /* This points to the content of the file that we've read so
  76      far.  */
  77   char *data;
  78
  79   /*  The size of the DATA array above.*/
  80   size_t size;
  81
  82   /* The number of bytes read from the underlying file so far.  This
  83      must be less (or equal) than SIZE above.  */
  84   size_t nb_read;
  85
  86   /* The index of the beginning of the current line.  */
  87   size_t line_start_idx;
  88
  89   /* The number of the previous line read.  This starts at 1.  Zero
  90      means we've read no line so far.  */
  91   size_t line_num;
  92
  93   /* This is the total number of lines of the current file.  At the
  94      moment, we try to get this information from the line map
  95      subsystem.  Note that this is just a hint.  When using the C++
  96      front-end, this hint is correct because the input file is then
  97      completely tokenized before parsing starts; so the line map knows
  98      the number of lines before compilation really starts.  For e.g,
  99      the C front-end, it can happen that we start emitting diagnostics
 100      before the line map has seen the end of the file.  */
 101   size_t total_lines;
 102
 103   /* Could this file be missing a trailing newline on its final line?
 104      Initially true (to cope with empty files), set to true/false
 105      as each line is read.  */
 106   bool missing_trailing_newline;
 107
 108   /* This is a record of the beginning and end of the lines we've seen
 109      while reading the file.  This is useful to avoid walking the data
 110      from the beginning when we are asked to read a line that is
 111      before LINE_START_IDX above.  Note that the maximum size of this
 112      record is fcache_line_record_size, so that the memory consumption
 113      doesn't explode.  We thus scale total_lines down to
 114      fcache_line_record_size.  */
 115   vec<line_info, va_heap> line_record;
 116
 117   fcache ();
 118   ~fcache ();
 119 };
 120
 121 /* Current position in real source file.  */
 122
 123 location_t input_location = UNKNOWN_LOCATION;
 124
 125 struct line_maps *line_table;
 126
 127 /* A stashed copy of "line_table" for use by selftest::line_table_test.
 128    This needs to be a global so that it can be a GC root, and thus
 129    prevent the stashed copy from being garbage-collected if the GC runs
 130    during a line_table_test.  */
 131
 132 struct line_maps *saved_line_table;
 133
 134 static fcache *fcache_tab;
 135 static const size_t fcache_tab_size = 16;
 136 static const size_t fcache_buffer_size = 4 * 1024;
 137 static const size_t fcache_line_record_size = 100;
 138
 139 /* Expand the source location LOC into a human readable location.  If
 140    LOC resolves to a builtin location, the file name of the readable
 141    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 142    TRUE and LOC is virtual, then it is resolved to the expansion
 143    point of the involved macro.  Otherwise, it is resolved to the
 144    spelling location of the token.
 145
 146    When resolving to the spelling location of the token, if the
 147    resulting location is for a built-in location (that is, it has no
 148    associated line/column) in the context of a macro expansion, the
 149    returned location is the first one (while unwinding the macro
 150    location towards its expansion point) that is in real source
 151    code.
 152
 153    ASPECT controls which part of the location to use.  */
 154
 155 static expanded_location
 156 expand_location_1 (location_t loc,
 157                    bool expansion_point_p,
 158                    enum location_aspect aspect)
 159 {
 160   expanded_location xloc;
 161   const line_map_ordinary *map;
 162   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 163   tree block = NULL;
 164
 165   if (IS_ADHOC_LOC (loc))
 166     {
 167       block = LOCATION_BLOCK (loc);
 168       loc = LOCATION_LOCUS (loc);
 169     }
 170
 171   memset (&xloc, 0, sizeof (xloc));
 172
 173   if (loc >= RESERVED_LOCATION_COUNT)
 174     {
 175       if (!expansion_point_p)
 176         {
 177           /* We want to resolve LOC to its spelling location.
 178
 179              But if that spelling location is a reserved location that
 180              appears in the context of a macro expansion (like for a
 181              location for a built-in token), let's consider the first
 182              location (toward the expansion point) that is not reserved;
 183              that is, the first location that is in real source code.  */
 184           loc = linemap_unwind_to_first_non_reserved_loc (line_table,
 185                                                           loc, NULL);
 186           lrk = LRK_SPELLING_LOCATION;
 187         }
 188       loc = linemap_resolve_location (line_table, loc, lrk, &map);
 189
 190       /* loc is now either in an ordinary map, or is a reserved location.
 191          If it is a compound location, the caret is in a spelling location,
 192          but the start/finish might still be a virtual location.
 193          Depending of what the caller asked for, we may need to recurse
 194          one level in order to resolve any virtual locations in the
 195          end-points.  */
 196       switch (aspect)
 197         {
 198         default:
 199           gcc_unreachable ();
 200           /* Fall through.  */
 201         case LOCATION_ASPECT_CARET:
 202           break;
 203         case LOCATION_ASPECT_START:
 204           {
 205             location_t start = get_start (loc);
 206             if (start != loc)
 207               return expand_location_1 (start, expansion_point_p, aspect);
 208           }
 209           break;
 210         case LOCATION_ASPECT_FINISH:
 211           {
 212             location_t finish = get_finish (loc);
 213             if (finish != loc)
 214               return expand_location_1 (finish, expansion_point_p, aspect);
 215           }
 216           break;
 217         }
 218       xloc = linemap_expand_location (line_table, map, loc);
 219     }
 220
 221   xloc.data = block;
 222   if (loc <= BUILTINS_LOCATION)
 223     xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
 224
 225   return xloc;
 226 }
 227
 228 /* Initialize the set of cache used for files accessed by caret
 229    diagnostic.  */
 230
 231 static void
 232 diagnostic_file_cache_init (void)
 233 {
 234   if (fcache_tab == NULL)
 235     fcache_tab = new fcache[fcache_tab_size];
 236 }
 237
 238 /* Free the resources used by the set of cache used for files accessed
 239    by caret diagnostic.  */
 240
 241 void
 242 diagnostic_file_cache_fini (void)
 243 {
 244   if (fcache_tab)
 245     {
 246       delete [] (fcache_tab);
 247       fcache_tab = NULL;
 248     }
 249 }
 250
 251 /* Return the total lines number that have been read so far by the
 252    line map (in the preprocessor) so far.  For languages like C++ that
 253    entirely preprocess the input file before starting to parse, this
 254    equals the actual number of lines of the file.  */
 255
 256 static size_t
 257 total_lines_num (const char *file_path)
 258 {
 259   size_t r = 0;
 260   location_t l = 0;
 261   if (linemap_get_file_highest_location (line_table, file_path, &l))
 262     {
 263       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 264       expanded_location xloc = expand_location (l);
 265       r = xloc.line;
 266     }
 267   return r;
 268 }
 269
 270 /* Lookup the cache used for the content of a given file accessed by
 271    caret diagnostic.  Return the found cached file, or NULL if no
 272    cached file was found.  */
 273
 274 static fcache*
 275 lookup_file_in_cache_tab (const char *file_path)
 276 {
 277   if (file_path == NULL)
 278     return NULL;
 279
 280   diagnostic_file_cache_init ();
 281
 282   /* This will contain the found cached file.  */
 283   fcache *r = NULL;
 284   for (unsigned i = 0; i < fcache_tab_size; ++i)
 285     {
 286       fcache *c = &fcache_tab[i];
 287       if (c->file_path && !strcmp (c->file_path, file_path))
 288         {
 289           ++c->use_count;
 290           r = c;
 291         }
 292     }
 293
 294   if (r)
 295     ++r->use_count;
 296
 297   return r;
 298 }
 299
 300 /* Purge any mention of FILENAME from the cache of files used for
 301    printing source code.  For use in selftests when working
 302    with tempfiles.  */
 303
 304 void
 305 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
 306 {
 307   gcc_assert (file_path);
 308
 309   fcache *r = lookup_file_in_cache_tab (file_path);
 310   if (!r)
 311     /* Not found.  */
 312     return;
 313
 314   r->file_path = NULL;
 315   if (r->fp)
 316     fclose (r->fp);
 317   r->fp = NULL;
 318   r->nb_read = 0;
 319   r->line_start_idx = 0;
 320   r->line_num = 0;
 321   r->line_record.truncate (0);
 322   r->use_count = 0;
 323   r->total_lines = 0;
 324   r->missing_trailing_newline = true;
 325 }
 326
 327 /* Return the file cache that has been less used, recently, or the
 328    first empty one.  If HIGHEST_USE_COUNT is non-null,
 329    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 330    in the cache table.  */
 331
 332 static fcache*
 333 evicted_cache_tab_entry (unsigned *highest_use_count)
 334 {
 335   diagnostic_file_cache_init ();
 336
 337   fcache *to_evict = &fcache_tab[0];
 338   unsigned huc = to_evict->use_count;
 339   for (unsigned i = 1; i < fcache_tab_size; ++i)
 340     {
 341       fcache *c = &fcache_tab[i];
 342       bool c_is_empty = (c->file_path == NULL);
 343
 344       if (c->use_count < to_evict->use_count
 345           || (to_evict->file_path && c_is_empty))
 346         /* We evict C because it's either an entry with a lower use
 347            count or one that is empty.  */
 348         to_evict = c;
 349
 350       if (huc < c->use_count)
 351         huc = c->use_count;
 352
 353       if (c_is_empty)
 354         /* We've reached the end of the cache; subsequent elements are
 355            all empty.  */
 356         break;
 357     }
 358
 359   if (highest_use_count)
 360     *highest_use_count = huc;
 361
 362   return to_evict;
 363 }
 364
 365 /* Create the cache used for the content of a given file to be
 366    accessed by caret diagnostic.  This cache is added to an array of
 367    cache and can be retrieved by lookup_file_in_cache_tab.  This
 368    function returns the created cache.  Note that only the last
 369    fcache_tab_size files are cached.  */
 370
 371 static fcache*
 372 add_file_to_cache_tab (const char *file_path)
 373 {
 374
 375   FILE *fp = fopen (file_path, "r");
 376   if (fp == NULL)
 377     return NULL;
 378
 379   unsigned highest_use_count = 0;
 380   fcache *r = evicted_cache_tab_entry (&highest_use_count);
 381   r->file_path = file_path;
 382   if (r->fp)
 383     fclose (r->fp);
 384   r->fp = fp;
 385   r->nb_read = 0;
 386   r->line_start_idx = 0;
 387   r->line_num = 0;
 388   r->line_record.truncate (0);
 389   /* Ensure that this cache entry doesn't get evicted next time
 390      add_file_to_cache_tab is called.  */
 391   r->use_count = ++highest_use_count;
 392   r->total_lines = total_lines_num (file_path);
 393   r->missing_trailing_newline = true;
 394
 395   return r;
 396 }
 397
 398 /* Lookup the cache used for the content of a given file accessed by
 399    caret diagnostic.  If no cached file was found, create a new cache
 400    for this file, add it to the array of cached file and return
 401    it.  */
 402
 403 static fcache*
 404 lookup_or_add_file_to_cache_tab (const char *file_path)
 405 {
 406   fcache *r = lookup_file_in_cache_tab (file_path);
 407   if (r == NULL)
 408     r = add_file_to_cache_tab (file_path);
 409   return r;
 410 }
 411
 412 /* Default constructor for a cache of file used by caret
 413    diagnostic.  */
 414
 415 fcache::fcache ()
 416 : use_count (0), file_path (NULL), fp (NULL), data (0),
 417   size (0), nb_read (0), line_start_idx (0), line_num (0),
 418   total_lines (0), missing_trailing_newline (true)
 419 {
 420   line_record.create (0);
 421 }
 422
 423 /* Destructor for a cache of file used by caret diagnostic.  */
 424
 425 fcache::~fcache ()
 426 {
 427   if (fp)
 428     {
 429       fclose (fp);
 430       fp = NULL;
 431     }
 432   if (data)
 433     {
 434       XDELETEVEC (data);
 435       data = 0;
 436     }
 437   line_record.release ();
 438 }
 439
 440 /* Returns TRUE iff the cache would need to be filled with data coming
 441    from the file.  That is, either the cache is empty or full or the
 442    current line is empty.  Note that if the cache is full, it would
 443    need to be extended and filled again.  */
 444
 445 static bool
 446 needs_read (fcache *c)
 447 {
 448   return (c->nb_read == 0
 449           || c->nb_read == c->size
 450           || (c->line_start_idx >= c->nb_read - 1));
 451 }
 452
 453 /*  Return TRUE iff the cache is full and thus needs to be
 454     extended.  */
 455
 456 static bool
 457 needs_grow (fcache *c)
 458 {
 459   return c->nb_read == c->size;
 460 }
 461
 462 /* Grow the cache if it needs to be extended.  */
 463
 464 static void
 465 maybe_grow (fcache *c)
 466 {
 467   if (!needs_grow (c))
 468     return;
 469
 470   size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
 471   c->data = XRESIZEVEC (char, c->data, size);
 472   c->size = size;
 473 }
 474
 475 /*  Read more data into the cache.  Extends the cache if need be.
 476     Returns TRUE iff new data could be read.  */
 477
 478 static bool
 479 read_data (fcache *c)
 480 {
 481   if (feof (c->fp) || ferror (c->fp))
 482     return false;
 483
 484   maybe_grow (c);
 485
 486   char * from = c->data + c->nb_read;
 487   size_t to_read = c->size - c->nb_read;
 488   size_t nb_read = fread (from, 1, to_read, c->fp);
 489
 490   if (ferror (c->fp))
 491     return false;
 492
 493   c->nb_read += nb_read;
 494   return !!nb_read;
 495 }
 496
 497 /* Read new data iff the cache needs to be filled with more data
 498    coming from the file FP.  Return TRUE iff the cache was filled with
 499    mode data.  */
 500
 501 static bool
 502 maybe_read_data (fcache *c)
 503 {
 504   if (!needs_read (c))
 505     return false;
 506   return read_data (c);
 507 }
 508
 509 /* Read a new line from file FP, using C as a cache for the data
 510    coming from the file.  Upon successful completion, *LINE is set to
 511    the beginning of the line found.  *LINE points directly in the
 512    line cache and is only valid until the next call of get_next_line.
 513    *LINE_LEN is set to the length of the line.  Note that the line
 514    does not contain any terminal delimiter.  This function returns
 515    true if some data was read or process from the cache, false
 516    otherwise.  Note that subsequent calls to get_next_line might
 517    make the content of *LINE invalid.  */
 518
 519 static bool
 520 get_next_line (fcache *c, char **line, ssize_t *line_len)
 521 {
 522   /* Fill the cache with data to process.  */
 523   maybe_read_data (c);
 524
 525   size_t remaining_size = c->nb_read - c->line_start_idx;
 526   if (remaining_size == 0)
 527     /* There is no more data to process.  */
 528     return false;
 529
 530   char *line_start = c->data + c->line_start_idx;
 531
 532   char *next_line_start = NULL;
 533   size_t len = 0;
 534   char *line_end = (char *) memchr (line_start, '\n', remaining_size);
 535   if (line_end == NULL)
 536     {
 537       /* We haven't found the end-of-line delimiter in the cache.
 538          Fill the cache with more data from the file and look for the
 539          '\n'.  */
 540       while (maybe_read_data (c))
 541         {
 542           line_start = c->data + c->line_start_idx;
 543           remaining_size = c->nb_read - c->line_start_idx;
 544           line_end = (char *) memchr (line_start, '\n', remaining_size);
 545           if (line_end != NULL)
 546             {
 547               next_line_start = line_end + 1;
 548               break;
 549             }
 550         }
 551       if (line_end == NULL)
 552         {
 553           /* We've loadded all the file into the cache and still no
 554              '\n'.  Let's say the line ends up at one byte passed the
 555              end of the file.  This is to stay consistent with the case
 556              of when the line ends up with a '\n' and line_end points to
 557              that terminal '\n'.  That consistency is useful below in
 558              the len calculation.  */
 559           line_end = c->data + c->nb_read ;
 560           c->missing_trailing_newline = true;
 561         }
 562       else
 563         c->missing_trailing_newline = false;
 564     }
 565   else
 566     {
 567       next_line_start = line_end + 1;
 568       c->missing_trailing_newline = false;
 569     }
 570
 571   if (ferror (c->fp))
 572     return false;
 573
 574   /* At this point, we've found the end of the of line.  It either
 575      points to the '\n' or to one byte after the last byte of the
 576      file.  */
 577   gcc_assert (line_end != NULL);
 578
 579   len = line_end - line_start;
 580
 581   if (c->line_start_idx < c->nb_read)
 582     *line = line_start;
 583
 584   ++c->line_num;
 585
 586   /* Before we update our line record, make sure the hint about the
 587      total number of lines of the file is correct.  If it's not, then
 588      we give up recording line boundaries from now on.  */
 589   bool update_line_record = true;
 590   if (c->line_num > c->total_lines)
 591     update_line_record = false;
 592
 593     /* Now update our line record so that re-reading lines from the
 594      before c->line_start_idx is faster.  */
 595   if (update_line_record
 596       && c->line_record.length () < fcache_line_record_size)
 597     {
 598       /* If the file lines fits in the line record, we just record all
 599          its lines ...*/
 600       if (c->total_lines <= fcache_line_record_size
 601           && c->line_num > c->line_record.length ())
 602         c->line_record.safe_push (fcache::line_info (c->line_num,
 603                                                  c->line_start_idx,
 604                                                  line_end - c->data));
 605       else if (c->total_lines > fcache_line_record_size)
 606         {
 607           /* ... otherwise, we just scale total_lines down to
 608              (fcache_line_record_size lines.  */
 609           size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
 610           if (c->line_record.length () == 0
 611               || n >= c->line_record.length ())
 612             c->line_record.safe_push (fcache::line_info (c->line_num,
 613                                                      c->line_start_idx,
 614                                                      line_end - c->data));
 615         }
 616     }
 617
 618   /* Update c->line_start_idx so that it points to the next line to be
 619      read.  */
 620   if (next_line_start)
 621     c->line_start_idx = next_line_start - c->data;
 622   else
 623     /* We didn't find any terminal '\n'.  Let's consider that the end
 624        of line is the end of the data in the cache.  The next
 625        invocation of get_next_line will either read more data from the
 626        underlying file or return false early because we've reached the
 627        end of the file.  */
 628     c->line_start_idx = c->nb_read;
 629
 630   *line_len = len;
 631
 632   return true;
 633 }
 634
 635 /* Consume the next bytes coming from the cache (or from its
 636    underlying file if there are remaining unread bytes in the file)
 637    until we reach the next end-of-line (or end-of-file).  There is no
 638    copying from the cache involved.  Return TRUE upon successful
 639    completion.  */
 640
 641 static bool
 642 goto_next_line (fcache *cache)
 643 {
 644   char *l;
 645   ssize_t len;
 646
 647   return get_next_line (cache, &l, &len);
 648 }
 649
 650 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 651    If the line was read successfully, *LINE points to the beginning
 652    of the line in the file cache and *LINE_LEN is the length of the
 653    line.  *LINE is not nul-terminated, but may contain zero bytes.
 654    *LINE is only valid until the next call of read_line_num.
 655    This function returns bool if a line was read.  */
 656
 657 static bool
 658 read_line_num (fcache *c, size_t line_num,
 659                char **line, ssize_t *line_len)
 660 {
 661   gcc_assert (line_num > 0);
 662
 663   if (line_num <= c->line_num)
 664     {
 665       /* We've been asked to read lines that are before c->line_num.
 666          So lets use our line record (if it's not empty) to try to
 667          avoid re-reading the file from the beginning again.  */
 668
 669       if (c->line_record.is_empty ())
 670         {
 671           c->line_start_idx = 0;
 672           c->line_num = 0;
 673         }
 674       else
 675         {
 676           fcache::line_info *i = NULL;
 677           if (c->total_lines <= fcache_line_record_size)
 678             {
 679               /* In languages where the input file is not totally
 680                  preprocessed up front, the c->total_lines hint
 681                  can be smaller than the number of lines of the
 682                  file.  In that case, only the first
 683                  c->total_lines have been recorded.
 684
 685                  Otherwise, the first c->total_lines we've read have
 686                  their start/end recorded here.  */
 687               i = (line_num <= c->total_lines)
 688                 ? &c->line_record[line_num - 1]
 689                 : &c->line_record[c->total_lines - 1];
 690               gcc_assert (i->line_num <= line_num);
 691             }
 692           else
 693             {
 694               /*  So the file had more lines than our line record
 695                   size.  Thus the number of lines we've recorded has
 696                   been scaled down to fcache_line_reacord_size.  Let's
 697                   pick the start/end of the recorded line that is
 698                   closest to line_num.  */
 699               size_t n = (line_num <= c->total_lines)
 700                 ? line_num * fcache_line_record_size / c->total_lines
 701                 : c ->line_record.length () - 1;
 702               if (n < c->line_record.length ())
 703                 {
 704                   i = &c->line_record[n];
 705                   gcc_assert (i->line_num <= line_num);
 706                 }
 707             }
 708
 709           if (i && i->line_num == line_num)
 710             {
 711               /* We have the start/end of the line.  */
 712               *line = c->data + i->start_pos;
 713               *line_len = i->end_pos - i->start_pos;
 714               return true;
 715             }
 716
 717           if (i)
 718             {
 719               c->line_start_idx = i->start_pos;
 720               c->line_num = i->line_num - 1;
 721             }
 722           else
 723             {
 724               c->line_start_idx = 0;
 725               c->line_num = 0;
 726             }
 727         }
 728     }
 729
 730   /*  Let's walk from line c->line_num up to line_num - 1, without
 731       copying any line.  */
 732   while (c->line_num < line_num - 1)
 733     if (!goto_next_line (c))
 734       return false;
 735
 736   /* The line we want is the next one.  Let's read and copy it back to
 737      the caller.  */
 738   return get_next_line (c, line, line_len);
 739 }
 740
 741 /* Return the physical source line that corresponds to FILE_PATH/LINE.
 742    The line is not nul-terminated.  The returned pointer is only
 743    valid until the next call of location_get_source_line.
 744    Note that the line can contain several null characters,
 745    so the returned value's length has the actual length of the line.
 746    If the function fails, a NULL char_span is returned.  */
 747
 748 char_span
 749 location_get_source_line (const char *file_path, int line)
 750 {
 751   char *buffer = NULL;
 752   ssize_t len;
 753
 754   if (line == 0)
 755     return char_span (NULL, 0);
 756
 757   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 758   if (c == NULL)
 759     return char_span (NULL, 0);
 760
 761   bool read = read_line_num (c, line, &buffer, &len);
 762   if (!read)
 763     return char_span (NULL, 0);
 764
 765   return char_span (buffer, len);
 766 }
 767
 768 /* Determine if FILE_PATH missing a trailing newline on its final line.
 769    Only valid to call once all of the file has been loaded, by
 770    requesting a line number beyond the end of the file.  */
 771
 772 bool
 773 location_missing_trailing_newline (const char *file_path)
 774 {
 775   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 776   if (c == NULL)
 777     return false;
 778
 779   return c->missing_trailing_newline;
 780 }
 781
 782 /* Test if the location originates from the spelling location of a
 783    builtin-tokens.  That is, return TRUE if LOC is a (possibly
 784    virtual) location of a built-in token that appears in the expansion
 785    list of a macro.  Please note that this function also works on
 786    tokens that result from built-in tokens.  For instance, the
 787    function would return true if passed a token "4" that is the result
 788    of the expansion of the built-in __LINE__ macro.  */
 789 bool
 790 is_location_from_builtin_token (location_t loc)
 791 {
 792   const line_map_ordinary *map = NULL;
 793   loc = linemap_resolve_location (line_table, loc,
 794                                   LRK_SPELLING_LOCATION, &map);
 795   return loc == BUILTINS_LOCATION;
 796 }
 797
 798 /* Expand the source location LOC into a human readable location.  If
 799    LOC is virtual, it resolves to the expansion point of the involved
 800    macro.  If LOC resolves to a builtin location, the file name of the
 801    readable location is set to the string "<built-in>".  */
 802
 803 expanded_location
 804 expand_location (location_t loc)
 805 {
 806   return expand_location_1 (loc, /*expansion_point_p=*/true,
 807                             LOCATION_ASPECT_CARET);
 808 }
 809
 810 /* Expand the source location LOC into a human readable location.  If
 811    LOC is virtual, it resolves to the expansion location of the
 812    relevant macro.  If LOC resolves to a builtin location, the file
 813    name of the readable location is set to the string
 814    "<built-in>".  */
 815
 816 expanded_location
 817 expand_location_to_spelling_point (location_t loc,
 818                                    enum location_aspect aspect)
 819 {
 820   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
 821 }
 822
 823 /* The rich_location class within libcpp requires a way to expand
 824    location_t instances, and relies on the client code
 825    providing a symbol named
 826      linemap_client_expand_location_to_spelling_point
 827    to do this.
 828
 829    This is the implementation for libcommon.a (all host binaries),
 830    which simply calls into expand_location_1.  */
 831
 832 expanded_location
 833 linemap_client_expand_location_to_spelling_point (location_t loc,
 834                                                   enum location_aspect aspect)
 835 {
 836   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
 837 }
 838
 839
 840 /* If LOCATION is in a system header and if it is a virtual location for
 841    a token coming from the expansion of a macro, unwind it to the
 842    location of the expansion point of the macro.  Otherwise, just return
 843    LOCATION.
 844
 845    This is used for instance when we want to emit diagnostics about a
 846    token that may be located in a macro that is itself defined in a
 847    system header, for example, for the NULL macro.  In such a case, if
 848    LOCATION were passed directly to diagnostic functions such as
 849    warning_at, the diagnostic would be suppressed (unless
 850    -Wsystem-headers).  */
 851
 852 location_t
 853 expansion_point_location_if_in_system_header (location_t location)
 854 {
 855   if (in_system_header_at (location))
 856     location = linemap_resolve_location (line_table, location,
 857                                          LRK_MACRO_EXPANSION_POINT,
 858                                          NULL);
 859   return location;
 860 }
 861
 862 /* If LOCATION is a virtual location for a token coming from the expansion
 863    of a macro, unwind to the location of the expansion point of the macro.  */
 864
 865 location_t
 866 expansion_point_location (location_t location)
 867 {
 868   return linemap_resolve_location (line_table, location,
 869                                    LRK_MACRO_EXPANSION_POINT, NULL);
 870 }
 871
 872 /* Construct a location with caret at CARET, ranging from START to
 873    finish e.g.
 874
 875                  11111111112
 876         12345678901234567890
 877      522
 878      523   return foo + bar;
 879                   ~~~~^~~~~
 880      524
 881
 882    The location's caret is at the "+", line 523 column 15, but starts
 883    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
 884    of "bar" at column 19.  */
 885
 886 location_t
 887 make_location (location_t caret, location_t start, location_t finish)
 888 {
 889   location_t pure_loc = get_pure_location (caret);
 890   source_range src_range;
 891   src_range.m_start = get_start (start);
 892   src_range.m_finish = get_finish (finish);
 893   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
 894                                                    pure_loc,
 895                                                    src_range,
 896                                                    NULL);
 897   return combined_loc;
 898 }
 899
 900 /* Same as above, but taking a source range rather than two locations.  */
 901
 902 location_t
 903 make_location (location_t caret, source_range src_range)
 904 {
 905   location_t pure_loc = get_pure_location (caret);
 906   return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
 907 }
 908
 909 /* Dump statistics to stderr about the memory usage of the line_table
 910    set of line maps.  This also displays some statistics about macro
 911    expansion.  */
 912
 913 void
 914 dump_line_table_statistics (void)
 915 {
 916   struct linemap_stats s;
 917   long total_used_map_size,
 918     macro_maps_size,
 919     total_allocated_map_size;
 920
 921   memset (&s, 0, sizeof (s));
 922
 923   linemap_get_statistics (line_table, &s);
 924
 925   macro_maps_size = s.macro_maps_used_size
 926     + s.macro_maps_locations_size;
 927
 928   total_allocated_map_size = s.ordinary_maps_allocated_size
 929     + s.macro_maps_allocated_size
 930     + s.macro_maps_locations_size;
 931
 932   total_used_map_size = s.ordinary_maps_used_size
 933     + s.macro_maps_used_size
 934     + s.macro_maps_locations_size;
 935
 936   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
 937            s.num_expanded_macros);
 938   if (s.num_expanded_macros != 0)
 939     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
 940              s.num_macro_tokens / s.num_expanded_macros);
 941   fprintf (stderr,
 942            "\nLine Table allocations during the "
 943            "compilation process\n");
 944   fprintf (stderr, "Number of ordinary maps used:        " PRsa (5) "\n",
 945            SIZE_AMOUNT (s.num_ordinary_maps_used));
 946   fprintf (stderr, "Ordinary map used size:              " PRsa (5) "\n",
 947            SIZE_AMOUNT (s.ordinary_maps_used_size));
 948   fprintf (stderr, "Number of ordinary maps allocated:   " PRsa (5) "\n",
 949            SIZE_AMOUNT (s.num_ordinary_maps_allocated));
 950   fprintf (stderr, "Ordinary maps allocated size:        " PRsa (5) "\n",
 951            SIZE_AMOUNT (s.ordinary_maps_allocated_size));
 952   fprintf (stderr, "Number of macro maps used:           " PRsa (5) "\n",
 953            SIZE_AMOUNT (s.num_macro_maps_used));
 954   fprintf (stderr, "Macro maps used size:                " PRsa (5) "\n",
 955            SIZE_AMOUNT (s.macro_maps_used_size));
 956   fprintf (stderr, "Macro maps locations size:           " PRsa (5) "\n",
 957            SIZE_AMOUNT (s.macro_maps_locations_size));
 958   fprintf (stderr, "Macro maps size:                     " PRsa (5) "\n",
 959            SIZE_AMOUNT (macro_maps_size));
 960   fprintf (stderr, "Duplicated maps locations size:      " PRsa (5) "\n",
 961            SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
 962   fprintf (stderr, "Total allocated maps size:           " PRsa (5) "\n",
 963            SIZE_AMOUNT (total_allocated_map_size));
 964   fprintf (stderr, "Total used maps size:                " PRsa (5) "\n",
 965            SIZE_AMOUNT (total_used_map_size));
 966   fprintf (stderr, "Ad-hoc table size:                   " PRsa (5) "\n",
 967            SIZE_AMOUNT (s.adhoc_table_size));
 968   fprintf (stderr, "Ad-hoc table entries used:           " PRsa (5) "\n",
 969            SIZE_AMOUNT (s.adhoc_table_entries_used));
 970   fprintf (stderr, "optimized_ranges:                    " PRsa (5) "\n",
 971            SIZE_AMOUNT (line_table->num_optimized_ranges));
 972   fprintf (stderr, "unoptimized_ranges:                  " PRsa (5) "\n",
 973            SIZE_AMOUNT (line_table->num_unoptimized_ranges));
 974
 975   fprintf (stderr, "\n");
 976 }
 977
 978 /* Get location one beyond the final location in ordinary map IDX.  */
 979
 980 static location_t
 981 get_end_location (struct line_maps *set, unsigned int idx)
 982 {
 983   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
 984     return set->highest_location;
 985
 986   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
 987   return MAP_START_LOCATION (next_map);
 988 }
 989
 990 /* Helper function for write_digit_row.  */
 991
 992 static void
 993 write_digit (FILE *stream, int digit)
 994 {
 995   fputc ('0' + (digit % 10), stream);
 996 }
 997
 998 /* Helper function for dump_location_info.
 999    Write a row of numbers to STREAM, numbering a source line,
1000    giving the units, tens, hundreds etc of the column number.  */
1001
1002 static void
1003 write_digit_row (FILE *stream, int indent,
1004                  const line_map_ordinary *map,
1005                  location_t loc, int max_col, int divisor)
1006 {
1007   fprintf (stream, "%*c", indent, ' ');
1008   fprintf (stream, "|");
1009   for (int column = 1; column < max_col; column++)
1010     {
1011       location_t column_loc = loc + (column << map->m_range_bits);
1012       write_digit (stream, column_loc / divisor);
1013     }
1014   fprintf (stream, "\n");
1015 }
1016
1017 /* Write a half-closed (START) / half-open (END) interval of
1018    location_t to STREAM.  */
1019
1020 static void
1021 dump_location_range (FILE *stream,
1022                      location_t start, location_t end)
1023 {
1024   fprintf (stream,
1025            "  location_t interval: %u <= loc < %u\n",
1026            start, end);
1027 }
1028
1029 /* Write a labelled description of a half-closed (START) / half-open (END)
1030    interval of location_t to STREAM.  */
1031
1032 static void
1033 dump_labelled_location_range (FILE *stream,
1034                               const char *name,
1035                               location_t start, location_t end)
1036 {
1037   fprintf (stream, "%s\n", name);
1038   dump_location_range (stream, start, end);
1039   fprintf (stream, "\n");
1040 }
1041
1042 /* Write a visualization of the locations in the line_table to STREAM.  */
1043
1044 void
1045 dump_location_info (FILE *stream)
1046 {
1047   /* Visualize the reserved locations.  */
1048   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1049                                 0, RESERVED_LOCATION_COUNT);
1050
1051   /* Visualize the ordinary line_map instances, rendering the sources. */
1052   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1053     {
1054       location_t end_location = get_end_location (line_table, idx);
1055       /* half-closed: doesn't include this one. */
1056
1057       const line_map_ordinary *map
1058         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1059       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1060       dump_location_range (stream,
1061                            MAP_START_LOCATION (map), end_location);
1062       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1063       fprintf (stream, "  starting at line: %i\n",
1064                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1065       fprintf (stream, "  column and range bits: %i\n",
1066                map->m_column_and_range_bits);
1067       fprintf (stream, "  column bits: %i\n",
1068                map->m_column_and_range_bits - map->m_range_bits);
1069       fprintf (stream, "  range bits: %i\n",
1070                map->m_range_bits);
1071       const char * reason;
1072       switch (map->reason) {
1073       case LC_ENTER:
1074         reason = "LC_ENTER";
1075         break;
1076       case LC_LEAVE:
1077         reason = "LC_LEAVE";
1078         break;
1079       case LC_RENAME:
1080         reason = "LC_RENAME";
1081         break;
1082       case LC_RENAME_VERBATIM:
1083         reason = "LC_RENAME_VERBATIM";
1084         break;
1085       case LC_ENTER_MACRO:
1086         reason = "LC_RENAME_MACRO";
1087         break;
1088       default:
1089         reason = "Unknown";
1090       }
1091       fprintf (stream, "  reason: %d (%s)\n", map->reason, reason);
1092
1093       const line_map_ordinary *includer_map
1094         = linemap_included_from_linemap (line_table, map);
1095       fprintf (stream, "  included from location: %d",
1096                linemap_included_from (map));
1097       if (includer_map) {
1098         fprintf (stream, " (in ordinary map %d)",
1099                  int (includer_map - line_table->info_ordinary.maps));
1100       }
1101       fprintf (stream, "\n");
1102
1103       /* Render the span of source lines that this "map" covers.  */
1104       for (location_t loc = MAP_START_LOCATION (map);
1105            loc < end_location;
1106            loc += (1 << map->m_range_bits) )
1107         {
1108           gcc_assert (pure_location_p (line_table, loc) );
1109
1110           expanded_location exploc
1111             = linemap_expand_location (line_table, map, loc);
1112
1113           if (exploc.column == 0)
1114             {
1115               /* Beginning of a new source line: draw the line.  */
1116
1117               char_span line_text = location_get_source_line (exploc.file,
1118                                                               exploc.line);
1119               if (!line_text)
1120                 break;
1121               fprintf (stream,
1122                        "%s:%3i|loc:%5i|%.*s\n",
1123                        exploc.file, exploc.line,
1124                        loc,
1125                        (int)line_text.length (), line_text.get_buffer ());
1126
1127               /* "loc" is at column 0, which means "the whole line".
1128                  Render the locations *within* the line, by underlining
1129                  it, showing the location_t numeric values
1130                  at each column.  */
1131               size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1132               if (max_col > line_text.length ())
1133                 max_col = line_text.length () + 1;
1134
1135               int len_lnum = num_digits (exploc.line);
1136               if (len_lnum < 3)
1137                 len_lnum = 3;
1138               int len_loc = num_digits (loc);
1139               if (len_loc < 5)
1140                 len_loc = 5;
1141
1142               int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1143
1144               /* Thousands.  */
1145               if (end_location > 999)
1146                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1147
1148               /* Hundreds.  */
1149               if (end_location > 99)
1150                 write_digit_row (stream, indent, map, loc, max_col, 100);
1151
1152               /* Tens.  */
1153               write_digit_row (stream, indent, map, loc, max_col, 10);
1154
1155               /* Units.  */
1156               write_digit_row (stream, indent, map, loc, max_col, 1);
1157             }
1158         }
1159       fprintf (stream, "\n");
1160     }
1161
1162   /* Visualize unallocated values.  */
1163   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1164                                 line_table->highest_location,
1165                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1166
1167   /* Visualize the macro line_map instances, rendering the sources. */
1168   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1169     {
1170       /* Each macro map that is allocated owns location_t values
1171          that are *lower* that the one before them.
1172          Hence it's meaningful to view them either in order of ascending
1173          source locations, or in order of ascending macro map index.  */
1174       const bool ascending_location_ts = true;
1175       unsigned int idx = (ascending_location_ts
1176                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1177                           : i);
1178       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1179       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1180                idx,
1181                linemap_map_get_macro_name (map),
1182                MACRO_MAP_NUM_MACRO_TOKENS (map));
1183       dump_location_range (stream,
1184                            map->start_location,
1185                            (map->start_location
1186                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1187       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1188               "expansion point is location %i",
1189               MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1190       fprintf (stream, "  map->start_location: %u\n",
1191                map->start_location);
1192
1193       fprintf (stream, "  macro_locations:\n");
1194       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1195         {
1196           location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1197           location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1198
1199           /* linemap_add_macro_token encodes token numbers in an expansion
1200              by putting them after MAP_START_LOCATION. */
1201
1202           /* I'm typically seeing 4 uninitialized entries at the end of
1203              0xafafafaf.
1204              This appears to be due to macro.c:replace_args
1205              adding 2 extra args for padding tokens; presumably there may
1206              be a leading and/or trailing padding token injected,
1207              each for 2 more location slots.
1208              This would explain there being up to 4 location_ts slots
1209              that may be uninitialized.  */
1210
1211           fprintf (stream, "    %u: %u, %u\n",
1212                    i,
1213                    x,
1214                    y);
1215           if (x == y)
1216             {
1217               if (x < MAP_START_LOCATION (map))
1218                 inform (x, "token %u has %<x-location == y-location == %u%>",
1219                         i, x);
1220               else
1221                 fprintf (stream,
1222                          "x-location == y-location == %u encodes token # %u\n",
1223                          x, x - MAP_START_LOCATION (map));
1224                 }
1225           else
1226             {
1227               inform (x, "token %u has %<x-location == %u%>", i, x);
1228               inform (x, "token %u has %<y-location == %u%>", i, y);
1229             }
1230         }
1231       fprintf (stream, "\n");
1232     }
1233
1234   /* It appears that MAX_LOCATION_T itself is never assigned to a
1235      macro map, presumably due to an off-by-one error somewhere
1236      between the logic in linemap_enter_macro and
1237      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1238   dump_labelled_location_range (stream, "MAX_LOCATION_T",
1239                                 MAX_LOCATION_T,
1240                                 MAX_LOCATION_T + 1);
1241
1242   /* Visualize ad-hoc values.  */
1243   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1244                                 MAX_LOCATION_T + 1, UINT_MAX);
1245 }
1246
1247 /* string_concat's constructor.  */
1248
1249 string_concat::string_concat (int num, location_t *locs)
1250   : m_num (num)
1251 {
1252   m_locs = ggc_vec_alloc <location_t> (num);
1253   for (int i = 0; i < num; i++)
1254     m_locs[i] = locs[i];
1255 }
1256
1257 /* string_concat_db's constructor.  */
1258
1259 string_concat_db::string_concat_db ()
1260 {
1261   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1262 }
1263
1264 /* Record that a string concatenation occurred, covering NUM
1265    string literal tokens.  LOCS is an array of size NUM, containing the
1266    locations of the tokens.  A copy of LOCS is taken.  */
1267
1268 void
1269 string_concat_db::record_string_concatenation (int num, location_t *locs)
1270 {
1271   gcc_assert (num > 1);
1272   gcc_assert (locs);
1273
1274   location_t key_loc = get_key_loc (locs[0]);
1275
1276   string_concat *concat
1277     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1278   m_table->put (key_loc, concat);
1279 }
1280
1281 /* Determine if LOC was the location of the the initial token of a
1282    concatenation of string literal tokens.
1283    If so, *OUT_NUM is written to with the number of tokens, and
1284    *OUT_LOCS with the location of an array of locations of the
1285    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1286    storage owned by the string_concat_db.
1287    Otherwise, return false.  */
1288
1289 bool
1290 string_concat_db::get_string_concatenation (location_t loc,
1291                                             int *out_num,
1292                                             location_t **out_locs)
1293 {
1294   gcc_assert (out_num);
1295   gcc_assert (out_locs);
1296
1297   location_t key_loc = get_key_loc (loc);
1298
1299   string_concat **concat = m_table->get (key_loc);
1300   if (!concat)
1301     return false;
1302
1303   *out_num = (*concat)->m_num;
1304   *out_locs =(*concat)->m_locs;
1305   return true;
1306 }
1307
1308 /* Internal function.  Canonicalize LOC into a form suitable for
1309    use as a key within the database, stripping away macro expansion,
1310    ad-hoc information, and range information, using the location of
1311    the start of LOC within an ordinary linemap.  */
1312
1313 location_t
1314 string_concat_db::get_key_loc (location_t loc)
1315 {
1316   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1317                                   NULL);
1318
1319   loc = get_range_from_loc (line_table, loc).m_start;
1320
1321   return loc;
1322 }
1323
1324 /* Helper class for use within get_substring_ranges_for_loc.
1325    An vec of cpp_string with responsibility for releasing all of the
1326    str->text for each str in the vector.  */
1327
1328 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1329 {
1330  public:
1331   auto_cpp_string_vec (int alloc)
1332     : auto_vec <cpp_string> (alloc) {}
1333
1334   ~auto_cpp_string_vec ()
1335   {
1336     /* Clean up the copies within this vec.  */
1337     int i;
1338     cpp_string *str;
1339     FOR_EACH_VEC_ELT (*this, i, str)
1340       free (const_cast <unsigned char *> (str->text));
1341   }
1342 };
1343
1344 /* Attempt to populate RANGES with source location information on the
1345    individual characters within the string literal found at STRLOC.
1346    If CONCATS is non-NULL, then any string literals that the token at
1347    STRLOC  was concatenated with are also added to RANGES.
1348
1349    Return NULL if successful, or an error message if any errors occurred (in
1350    which case RANGES may be only partially populated and should not
1351    be used).
1352
1353    This is implemented by re-parsing the relevant source line(s).  */
1354
1355 static const char *
1356 get_substring_ranges_for_loc (cpp_reader *pfile,
1357                               string_concat_db *concats,
1358                               location_t strloc,
1359                               enum cpp_ttype type,
1360                               cpp_substring_ranges &ranges)
1361 {
1362   gcc_assert (pfile);
1363
1364   if (strloc == UNKNOWN_LOCATION)
1365     return "unknown location";
1366
1367   /* Reparsing the strings requires accurate location information.
1368      If -ftrack-macro-expansion has been overridden from its default
1369      of 2, then we might have a location of a macro expansion point,
1370      rather than the location of the literal itself.
1371      Avoid this by requiring that we have full macro expansion tracking
1372      for substring locations to be available.  */
1373   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1374     return "track_macro_expansion != 2";
1375
1376   /* If #line or # 44 "file"-style directives are present, then there's
1377      no guarantee that the line numbers we have can be used to locate
1378      the strings.  For example, we might have a .i file with # directives
1379      pointing back to lines within a .c file, but the .c file might
1380      have been edited since the .i file was created.
1381      In such a case, the safest course is to disable on-demand substring
1382      locations.  */
1383   if (line_table->seen_line_directive)
1384     return "seen line directive";
1385
1386   /* If string concatenation has occurred at STRLOC, get the locations
1387      of all of the literal tokens making up the compound string.
1388      Otherwise, just use STRLOC.  */
1389   int num_locs = 1;
1390   location_t *strlocs = &strloc;
1391   if (concats)
1392     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1393
1394   auto_cpp_string_vec strs (num_locs);
1395   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1396   for (int i = 0; i < num_locs; i++)
1397     {
1398       /* Get range of strloc.  We will use it to locate the start and finish
1399          of the literal token within the line.  */
1400       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1401
1402       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1403         {
1404           /* If the string token was within a macro expansion, then we can
1405              cope with it for the simple case where we have a single token.
1406              Otherwise, bail out.  */
1407           if (src_range.m_start != src_range.m_finish)
1408             return "macro expansion";
1409         }
1410       else
1411         {
1412           if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1413             /* If so, we can't reliably determine where the token started within
1414                its line.  */
1415             return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1416
1417           if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1418             /* If so, we can't reliably determine where the token finished
1419                within its line.  */
1420             return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1421         }
1422
1423       expanded_location start
1424         = expand_location_to_spelling_point (src_range.m_start,
1425                                              LOCATION_ASPECT_START);
1426       expanded_location finish
1427         = expand_location_to_spelling_point (src_range.m_finish,
1428                                              LOCATION_ASPECT_FINISH);
1429       if (start.file != finish.file)
1430         return "range endpoints are in different files";
1431       if (start.line != finish.line)
1432         return "range endpoints are on different lines";
1433       if (start.column > finish.column)
1434         return "range endpoints are reversed";
1435
1436       char_span line = location_get_source_line (start.file, start.line);
1437       if (!line)
1438         return "unable to read source line";
1439
1440       /* Determine the location of the literal (including quotes
1441          and leading prefix chars, such as the 'u' in a u""
1442          token).  */
1443       size_t literal_length = finish.column - start.column + 1;
1444
1445       /* Ensure that we don't crash if we got the wrong location.  */
1446       if (line.length () < (start.column - 1 + literal_length))
1447         return "line is not wide enough";
1448
1449       char_span literal = line.subspan (start.column - 1, literal_length);
1450
1451       cpp_string from;
1452       from.len = literal_length;
1453       /* Make a copy of the literal, to avoid having to rely on
1454          the lifetime of the copy of the line within the cache.
1455          This will be released by the auto_cpp_string_vec dtor.  */
1456       from.text = (unsigned char *)literal.xstrdup ();
1457       strs.safe_push (from);
1458
1459       /* For very long lines, a new linemap could have started
1460          halfway through the token.
1461          Ensure that the loc_reader uses the linemap of the
1462          *end* of the token for its start location.  */
1463       const line_map_ordinary *start_ord_map;
1464       linemap_resolve_location (line_table, src_range.m_start,
1465                                 LRK_SPELLING_LOCATION, &start_ord_map);
1466       const line_map_ordinary *final_ord_map;
1467       linemap_resolve_location (line_table, src_range.m_finish,
1468                                 LRK_SPELLING_LOCATION, &final_ord_map);
1469       if (start_ord_map == NULL || final_ord_map == NULL)
1470         return "failed to get ordinary maps";
1471       /* Bulletproofing.  We ought to only have different ordinary maps
1472          for start vs finish due to line-length jumps.  */
1473       if (start_ord_map != final_ord_map
1474           && start_ord_map->to_file != final_ord_map->to_file)
1475         return "start and finish are spelled in different ordinary maps";
1476       /* The file from linemap_resolve_location ought to match that from
1477          expand_location_to_spelling_point.  */
1478       if (start_ord_map->to_file != start.file)
1479         return "mismatching file after resolving linemap";
1480
1481       location_t start_loc
1482         = linemap_position_for_line_and_column (line_table, final_ord_map,
1483                                                 start.line, start.column);
1484
1485       cpp_string_location_reader loc_reader (start_loc, line_table);
1486       loc_readers.safe_push (loc_reader);
1487     }
1488
1489   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1490   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1491                                                  loc_readers.address (),
1492                                                  num_locs, &ranges, type);
1493   if (err)
1494     return err;
1495
1496   /* Success: "ranges" should now contain information on the string.  */
1497   return NULL;
1498 }
1499
1500 /* Attempt to populate *OUT_LOC with source location information on the
1501    given characters within the string literal found at STRLOC.
1502    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1503    character set.
1504
1505    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1506    and string literal "012345\n789"
1507    *OUT_LOC is written to with:
1508      "012345\n789"
1509          ~^~~~~
1510
1511    If CONCATS is non-NULL, then any string literals that the token at
1512    STRLOC was concatenated with are also considered.
1513
1514    This is implemented by re-parsing the relevant source line(s).
1515
1516    Return NULL if successful, or an error message if any errors occurred.
1517    Error messages are intended for GCC developers (to help debugging) rather
1518    than for end-users.  */
1519
1520 const char *
1521 get_location_within_string (cpp_reader *pfile,
1522                             string_concat_db *concats,
1523                             location_t strloc,
1524                             enum cpp_ttype type,
1525                             int caret_idx, int start_idx, int end_idx,
1526                             location_t *out_loc)
1527 {
1528   gcc_checking_assert (caret_idx >= 0);
1529   gcc_checking_assert (start_idx >= 0);
1530   gcc_checking_assert (end_idx >= 0);
1531   gcc_assert (out_loc);
1532
1533   cpp_substring_ranges ranges;
1534   const char *err
1535     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1536   if (err)
1537     return err;
1538
1539   if (caret_idx >= ranges.get_num_ranges ())
1540     return "caret_idx out of range";
1541   if (start_idx >= ranges.get_num_ranges ())
1542     return "start_idx out of range";
1543   if (end_idx >= ranges.get_num_ranges ())
1544     return "end_idx out of range";
1545
1546   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1547                             ranges.get_range (start_idx).m_start,
1548                             ranges.get_range (end_idx).m_finish);
1549   return NULL;
1550 }
1551
1552 #if CHECKING_P
1553
1554 namespace selftest {
1555
1556 /* Selftests of location handling.  */
1557
1558 /* Attempt to populate *OUT_RANGE with source location information on the
1559    given character within the string literal found at STRLOC.
1560    CHAR_IDX refers to an offset within the execution character set.
1561    If CONCATS is non-NULL, then any string literals that the token at
1562    STRLOC was concatenated with are also considered.
1563
1564    This is implemented by re-parsing the relevant source line(s).
1565
1566    Return NULL if successful, or an error message if any errors occurred.
1567    Error messages are intended for GCC developers (to help debugging) rather
1568    than for end-users.  */
1569
1570 static const char *
1571 get_source_range_for_char (cpp_reader *pfile,
1572                            string_concat_db *concats,
1573                            location_t strloc,
1574                            enum cpp_ttype type,
1575                            int char_idx,
1576                            source_range *out_range)
1577 {
1578   gcc_checking_assert (char_idx >= 0);
1579   gcc_assert (out_range);
1580
1581   cpp_substring_ranges ranges;
1582   const char *err
1583     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1584   if (err)
1585     return err;
1586
1587   if (char_idx >= ranges.get_num_ranges ())
1588     return "char_idx out of range";
1589
1590   *out_range = ranges.get_range (char_idx);
1591   return NULL;
1592 }
1593
1594 /* As get_source_range_for_char, but write to *OUT the number
1595    of ranges that are available.  */
1596
1597 static const char *
1598 get_num_source_ranges_for_substring (cpp_reader *pfile,
1599                                      string_concat_db *concats,
1600                                      location_t strloc,
1601                                      enum cpp_ttype type,
1602                                      int *out)
1603 {
1604   gcc_assert (out);
1605
1606   cpp_substring_ranges ranges;
1607   const char *err
1608     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1609
1610   if (err)
1611     return err;
1612
1613   *out = ranges.get_num_ranges ();
1614   return NULL;
1615 }
1616
1617 /* Selftests of location handling.  */
1618
1619 /* Verify that compare() on linenum_type handles comparisons over the full
1620    range of the type.  */
1621
1622 static void
1623 test_linenum_comparisons ()
1624 {
1625   linenum_type min_line (0);
1626   linenum_type max_line (0xffffffff);
1627   ASSERT_EQ (0, compare (min_line, min_line));
1628   ASSERT_EQ (0, compare (max_line, max_line));
1629
1630   ASSERT_GT (compare (max_line, min_line), 0);
1631   ASSERT_LT (compare (min_line, max_line), 0);
1632 }
1633
1634 /* Helper function for verifying location data: when location_t
1635    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1636    as having column 0.  */
1637
1638 static bool
1639 should_have_column_data_p (location_t loc)
1640 {
1641   if (IS_ADHOC_LOC (loc))
1642     loc = get_location_from_adhoc_loc (line_table, loc);
1643   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1644     return false;
1645   return true;
1646 }
1647
1648 /* Selftest for should_have_column_data_p.  */
1649
1650 static void
1651 test_should_have_column_data_p ()
1652 {
1653   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1654   ASSERT_TRUE
1655     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1656   ASSERT_FALSE
1657     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1658 }
1659
1660 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1661    on LOC.  */
1662
1663 static void
1664 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1665               location_t loc)
1666 {
1667   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1668   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1669   /* If location_t values are sufficiently high, then column numbers
1670      will be unavailable and LOCATION_COLUMN (loc) will be 0.
1671      When close to the threshold, column numbers *may* be present: if
1672      the final linemap before the threshold contains a line that straddles
1673      the threshold, locations in that line have column information.  */
1674   if (should_have_column_data_p (loc))
1675     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1676 }
1677
1678 /* Various selftests involve constructing a line table and one or more
1679    line maps within it.
1680
1681    For maximum test coverage we want to run these tests with a variety
1682    of situations:
1683    - line_table->default_range_bits: some frontends use a non-zero value
1684    and others use zero
1685    - the fallback modes within line-map.c: there are various threshold
1686    values for location_t beyond line-map.c changes
1687    behavior (disabling of the range-packing optimization, disabling
1688    of column-tracking).  We can exercise these by starting the line_table
1689    at interesting values at or near these thresholds.
1690
1691    The following struct describes a particular case within our test
1692    matrix.  */
1693
1694 struct line_table_case
1695 {
1696   line_table_case (int default_range_bits, int base_location)
1697   : m_default_range_bits (default_range_bits),
1698     m_base_location (base_location)
1699   {}
1700
1701   int m_default_range_bits;
1702   int m_base_location;
1703 };
1704
1705 /* Constructor.  Store the old value of line_table, and create a new
1706    one, using sane defaults.  */
1707
1708 line_table_test::line_table_test ()
1709 {
1710   gcc_assert (saved_line_table == NULL);
1711   saved_line_table = line_table;
1712   line_table = ggc_alloc<line_maps> ();
1713   linemap_init (line_table, BUILTINS_LOCATION);
1714   gcc_assert (saved_line_table->reallocator);
1715   line_table->reallocator = saved_line_table->reallocator;
1716   gcc_assert (saved_line_table->round_alloc_size);
1717   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1718   line_table->default_range_bits = 0;
1719 }
1720
1721 /* Constructor.  Store the old value of line_table, and create a new
1722    one, using the sitation described in CASE_.  */
1723
1724 line_table_test::line_table_test (const line_table_case &case_)
1725 {
1726   gcc_assert (saved_line_table == NULL);
1727   saved_line_table = line_table;
1728   line_table = ggc_alloc<line_maps> ();
1729   linemap_init (line_table, BUILTINS_LOCATION);
1730   gcc_assert (saved_line_table->reallocator);
1731   line_table->reallocator = saved_line_table->reallocator;
1732   gcc_assert (saved_line_table->round_alloc_size);
1733   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1734   line_table->default_range_bits = case_.m_default_range_bits;
1735   if (case_.m_base_location)
1736     {
1737       line_table->highest_location = case_.m_base_location;
1738       line_table->highest_line = case_.m_base_location;
1739     }
1740 }
1741
1742 /* Destructor.  Restore the old value of line_table.  */
1743
1744 line_table_test::~line_table_test ()
1745 {
1746   gcc_assert (saved_line_table != NULL);
1747   line_table = saved_line_table;
1748   saved_line_table = NULL;
1749 }
1750
1751 /* Verify basic operation of ordinary linemaps.  */
1752
1753 static void
1754 test_accessing_ordinary_linemaps (const line_table_case &case_)
1755 {
1756   line_table_test ltt (case_);
1757
1758   /* Build a simple linemap describing some locations. */
1759   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1760
1761   linemap_line_start (line_table, 1, 100);
1762   location_t loc_a = linemap_position_for_column (line_table, 1);
1763   location_t loc_b = linemap_position_for_column (line_table, 23);
1764
1765   linemap_line_start (line_table, 2, 100);
1766   location_t loc_c = linemap_position_for_column (line_table, 1);
1767   location_t loc_d = linemap_position_for_column (line_table, 17);
1768
1769   /* Example of a very long line.  */
1770   linemap_line_start (line_table, 3, 2000);
1771   location_t loc_e = linemap_position_for_column (line_table, 700);
1772
1773   /* Transitioning back to a short line.  */
1774   linemap_line_start (line_table, 4, 0);
1775   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1776
1777   if (should_have_column_data_p (loc_back_to_short))
1778     {
1779       /* Verify that we switched to short lines in the linemap.  */
1780       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1781       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1782     }
1783
1784   /* Example of a line that will eventually be seen to be longer
1785      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1786      below that.  */
1787   linemap_line_start (line_table, 5, 2000);
1788
1789   location_t loc_start_of_very_long_line
1790     = linemap_position_for_column (line_table, 2000);
1791   location_t loc_too_wide
1792     = linemap_position_for_column (line_table, 4097);
1793   location_t loc_too_wide_2
1794     = linemap_position_for_column (line_table, 4098);
1795
1796   /* ...and back to a sane line length.  */
1797   linemap_line_start (line_table, 6, 100);
1798   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1799
1800   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1801
1802   /* Multiple files.  */
1803   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1804   linemap_line_start (line_table, 1, 200);
1805   location_t loc_f = linemap_position_for_column (line_table, 150);
1806   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1807
1808   /* Verify that we can recover the location info.  */
1809   assert_loceq ("foo.c", 1, 1, loc_a);
1810   assert_loceq ("foo.c", 1, 23, loc_b);
1811   assert_loceq ("foo.c", 2, 1, loc_c);
1812   assert_loceq ("foo.c", 2, 17, loc_d);
1813   assert_loceq ("foo.c", 3, 700, loc_e);
1814   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1815
1816   /* In the very wide line, the initial location should be fully tracked.  */
1817   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1818   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1819      be disabled.  */
1820   assert_loceq ("foo.c", 5, 0, loc_too_wide);
1821   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1822   /*...and column-tracking should be re-enabled for subsequent lines.  */
1823   assert_loceq ("foo.c", 6, 10, loc_sane_again);
1824
1825   assert_loceq ("bar.c", 1, 150, loc_f);
1826
1827   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1828   ASSERT_TRUE (pure_location_p (line_table, loc_a));
1829
1830   /* Verify using make_location to build a range, and extracting data
1831      back from it.  */
1832   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1833   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1834   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1835   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1836   ASSERT_EQ (loc_b, src_range.m_start);
1837   ASSERT_EQ (loc_d, src_range.m_finish);
1838 }
1839
1840 /* Verify various properties of UNKNOWN_LOCATION.  */
1841
1842 static void
1843 test_unknown_location ()
1844 {
1845   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1846   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1847   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1848 }
1849
1850 /* Verify various properties of BUILTINS_LOCATION.  */
1851
1852 static void
1853 test_builtins ()
1854 {
1855   assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1856   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1857 }
1858
1859 /* Regression test for make_location.
1860    Ensure that we use pure locations for the start/finish of the range,
1861    rather than storing a packed or ad-hoc range as the start/finish.  */
1862
1863 static void
1864 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1865 {
1866   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1867      with C++ frontend.
1868      ....................0000000001111111111222.
1869      ....................1234567890123456789012.  */
1870   const char *content = "     r += !aaa == bbb;\n";
1871   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1872   line_table_test ltt (case_);
1873   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1874
1875   const location_t c11 = linemap_position_for_column (line_table, 11);
1876   const location_t c12 = linemap_position_for_column (line_table, 12);
1877   const location_t c13 = linemap_position_for_column (line_table, 13);
1878   const location_t c14 = linemap_position_for_column (line_table, 14);
1879   const location_t c21 = linemap_position_for_column (line_table, 21);
1880
1881   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1882     return;
1883
1884   /* Use column 13 for the caret location, arbitrarily, to verify that we
1885      handle start != caret.  */
1886   const location_t aaa = make_location (c13, c12, c14);
1887   ASSERT_EQ (c13, get_pure_location (aaa));
1888   ASSERT_EQ (c12, get_start (aaa));
1889   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1890   ASSERT_EQ (c14, get_finish (aaa));
1891   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1892
1893   /* Make a location using a location with a range as the start-point.  */
1894   const location_t not_aaa = make_location (c11, aaa, c14);
1895   ASSERT_EQ (c11, get_pure_location (not_aaa));
1896   /* It should use the start location of the range, not store the range
1897      itself.  */
1898   ASSERT_EQ (c12, get_start (not_aaa));
1899   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1900   ASSERT_EQ (c14, get_finish (not_aaa));
1901   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1902
1903   /* Similarly, make a location with a range as the end-point.  */
1904   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1905   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1906   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1907   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1908   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1909   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1910   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1911   /* It should use the finish location of the range, not store the range
1912      itself.  */
1913   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1914   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1915   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1916   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1917   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1918 }
1919
1920 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
1921
1922 static void
1923 test_reading_source_line ()
1924 {
1925   /* Create a tempfile and write some text to it.  */
1926   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1927                         "01234567890123456789\n"
1928                         "This is the test text\n"
1929                         "This is the 3rd line");
1930
1931   /* Read back a specific line from the tempfile.  */
1932   char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1933   ASSERT_TRUE (source_line);
1934   ASSERT_TRUE (source_line.get_buffer () != NULL);
1935   ASSERT_EQ (20, source_line.length ());
1936   ASSERT_TRUE (!strncmp ("This is the 3rd line",
1937                          source_line.get_buffer (), source_line.length ()));
1938
1939   source_line = location_get_source_line (tmp.get_filename (), 2);
1940   ASSERT_TRUE (source_line);
1941   ASSERT_TRUE (source_line.get_buffer () != NULL);
1942   ASSERT_EQ (21, source_line.length ());
1943   ASSERT_TRUE (!strncmp ("This is the test text",
1944                          source_line.get_buffer (), source_line.length ()));
1945
1946   source_line = location_get_source_line (tmp.get_filename (), 4);
1947   ASSERT_FALSE (source_line);
1948   ASSERT_TRUE (source_line.get_buffer () == NULL);
1949 }
1950
1951 /* Tests of lexing.  */
1952
1953 /* Verify that token TOK from PARSER has cpp_token_as_text
1954    equal to EXPECTED_TEXT.  */
1955
1956 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
1957   SELFTEST_BEGIN_STMT                                                   \
1958     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
1959     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
1960   SELFTEST_END_STMT
1961
1962 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1963    and ranges from EXP_START_COL to EXP_FINISH_COL.
1964    Use LOC as the effective location of the selftest.  */
1965
1966 static void
1967 assert_token_loc_eq (const location &loc,
1968                      const cpp_token *tok,
1969                      const char *exp_filename, int exp_linenum,
1970                      int exp_start_col, int exp_finish_col)
1971 {
1972   location_t tok_loc = tok->src_loc;
1973   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1974   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1975
1976   /* If location_t values are sufficiently high, then column numbers
1977      will be unavailable.  */
1978   if (!should_have_column_data_p (tok_loc))
1979     return;
1980
1981   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1982   source_range tok_range = get_range_from_loc (line_table, tok_loc);
1983   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1984   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1985 }
1986
1987 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1988    SELFTEST_LOCATION as the effective location of the selftest.  */
1989
1990 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1991                             EXP_START_COL, EXP_FINISH_COL) \
1992   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1993                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1994
1995 /* Test of lexing a file using libcpp, verifying tokens and their
1996    location information.  */
1997
1998 static void
1999 test_lexer (const line_table_case &case_)
2000 {
2001   /* Create a tempfile and write some text to it.  */
2002   const char *content =
2003     /*00000000011111111112222222222333333.3333444444444.455555555556
2004       12345678901234567890123456789012345.6789012345678.901234567890.  */
2005     ("test_name /* c-style comment */\n"
2006      "                                  \"test literal\"\n"
2007      " // test c++-style comment\n"
2008      "   42\n");
2009   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2010
2011   line_table_test ltt (case_);
2012
2013   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2014
2015   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2016   ASSERT_NE (fname, NULL);
2017
2018   /* Verify that we get the expected tokens back, with the correct
2019      location information.  */
2020
2021   location_t loc;
2022   const cpp_token *tok;
2023   tok = cpp_get_token_with_location (parser, &loc);
2024   ASSERT_NE (tok, NULL);
2025   ASSERT_EQ (tok->type, CPP_NAME);
2026   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2027   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2028
2029   tok = cpp_get_token_with_location (parser, &loc);
2030   ASSERT_NE (tok, NULL);
2031   ASSERT_EQ (tok->type, CPP_STRING);
2032   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2033   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2034
2035   tok = cpp_get_token_with_location (parser, &loc);
2036   ASSERT_NE (tok, NULL);
2037   ASSERT_EQ (tok->type, CPP_NUMBER);
2038   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2039   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2040
2041   tok = cpp_get_token_with_location (parser, &loc);
2042   ASSERT_NE (tok, NULL);
2043   ASSERT_EQ (tok->type, CPP_EOF);
2044
2045   cpp_finish (parser, NULL);
2046   cpp_destroy (parser);
2047 }
2048
2049 /* Forward decls.  */
2050
2051 struct lexer_test;
2052 class lexer_test_options;
2053
2054 /* A class for specifying options of a lexer_test.
2055    The "apply" vfunc is called during the lexer_test constructor.  */
2056
2057 class lexer_test_options
2058 {
2059  public:
2060   virtual void apply (lexer_test &) = 0;
2061 };
2062
2063 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2064    in its dtor.
2065
2066    This is needed by struct lexer_test to ensure that the cleanup of the
2067    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2068
2069 class cpp_reader_ptr
2070 {
2071  public:
2072   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2073
2074   ~cpp_reader_ptr ()
2075   {
2076     cpp_finish (m_ptr, NULL);
2077     cpp_destroy (m_ptr);
2078   }
2079
2080   operator cpp_reader * () const { return m_ptr; }
2081
2082  private:
2083   cpp_reader *m_ptr;
2084 };
2085
2086 /* A struct for writing lexer tests.  */
2087
2088 struct lexer_test
2089 {
2090   lexer_test (const line_table_case &case_, const char *content,
2091               lexer_test_options *options);
2092   ~lexer_test ();
2093
2094   const cpp_token *get_token ();
2095
2096   /* The ordering of these fields matters.
2097      The line_table_test must be first, since the cpp_reader_ptr
2098      uses it.
2099      The cpp_reader must be cleaned up *after* the temp_source_file
2100      since the filenames in input.c's input cache are owned by the
2101      cpp_reader; in particular, when ~temp_source_file evicts the
2102      filename the filenames must still be alive.  */
2103   line_table_test m_ltt;
2104   cpp_reader_ptr m_parser;
2105   temp_source_file m_tempfile;
2106   string_concat_db m_concats;
2107   bool m_implicitly_expect_EOF;
2108 };
2109
2110 /* Use an EBCDIC encoding for the execution charset, specifically
2111    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2112
2113    This exercises iconv integration within libcpp.
2114    Not every build of iconv supports the given charset,
2115    so we need to flag this error and handle it gracefully.  */
2116
2117 class ebcdic_execution_charset : public lexer_test_options
2118 {
2119  public:
2120   ebcdic_execution_charset () : m_num_iconv_errors (0)
2121     {
2122       gcc_assert (s_singleton == NULL);
2123       s_singleton = this;
2124     }
2125   ~ebcdic_execution_charset ()
2126     {
2127       gcc_assert (s_singleton == this);
2128       s_singleton = NULL;
2129     }
2130
2131   void apply (lexer_test &test) FINAL OVERRIDE
2132   {
2133     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2134     cpp_opts->narrow_charset = "IBM1047";
2135
2136     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2137     callbacks->diagnostic = on_diagnostic;
2138   }
2139
2140   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2141                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2142                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2143                              rich_location *richloc ATTRIBUTE_UNUSED,
2144                              const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2145     ATTRIBUTE_FPTR_PRINTF(5,0)
2146   {
2147     gcc_assert (s_singleton);
2148     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2149     const char *msg = "conversion from %s to %s not supported by iconv";
2150 #ifdef ENABLE_NLS
2151     msg = dgettext ("cpplib", msg);
2152 #endif
2153     /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2154        when the local iconv build doesn't support the conversion.  */
2155     if (strcmp (msgid, msg) == 0)
2156       {
2157         s_singleton->m_num_iconv_errors++;
2158         return true;
2159       }
2160
2161     /* Otherwise, we have an unexpected error.  */
2162     abort ();
2163   }
2164
2165   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2166
2167  private:
2168   static ebcdic_execution_charset *s_singleton;
2169   int m_num_iconv_errors;
2170 };
2171
2172 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2173
2174 /* A lexer_test_options subclass that records a list of diagnostic
2175    messages emitted by the lexer.  */
2176
2177 class lexer_diagnostic_sink : public lexer_test_options
2178 {
2179  public:
2180   lexer_diagnostic_sink ()
2181   {
2182     gcc_assert (s_singleton == NULL);
2183     s_singleton = this;
2184   }
2185   ~lexer_diagnostic_sink ()
2186   {
2187     gcc_assert (s_singleton == this);
2188     s_singleton = NULL;
2189
2190     int i;
2191     char *str;
2192     FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2193       free (str);
2194   }
2195
2196   void apply (lexer_test &test) FINAL OVERRIDE
2197   {
2198     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2199     callbacks->diagnostic = on_diagnostic;
2200   }
2201
2202   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2203                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2204                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2205                              rich_location *richloc ATTRIBUTE_UNUSED,
2206                              const char *msgid, va_list *ap)
2207     ATTRIBUTE_FPTR_PRINTF(5,0)
2208   {
2209     char *msg = xvasprintf (msgid, *ap);
2210     s_singleton->m_diagnostics.safe_push (msg);
2211     return true;
2212   }
2213
2214   auto_vec<char *> m_diagnostics;
2215
2216  private:
2217   static lexer_diagnostic_sink *s_singleton;
2218 };
2219
2220 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2221
2222 /* Constructor.  Override line_table with a new instance based on CASE_,
2223    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2224    start parsing the tempfile.  */
2225
2226 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2227                         lexer_test_options *options)
2228 : m_ltt (case_),
2229   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2230   /* Create a tempfile and write the text to it.  */
2231   m_tempfile (SELFTEST_LOCATION, ".c", content),
2232   m_concats (),
2233   m_implicitly_expect_EOF (true)
2234 {
2235   if (options)
2236     options->apply (*this);
2237
2238   cpp_init_iconv (m_parser);
2239
2240   /* Parse the file.  */
2241   const char *fname = cpp_read_main_file (m_parser,
2242                                           m_tempfile.get_filename ());
2243   ASSERT_NE (fname, NULL);
2244 }
2245
2246 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2247
2248 lexer_test::~lexer_test ()
2249 {
2250   location_t loc;
2251   const cpp_token *tok;
2252
2253   if (m_implicitly_expect_EOF)
2254     {
2255       tok = cpp_get_token_with_location (m_parser, &loc);
2256       ASSERT_NE (tok, NULL);
2257       ASSERT_EQ (tok->type, CPP_EOF);
2258     }
2259 }
2260
2261 /* Get the next token from m_parser.  */
2262
2263 const cpp_token *
2264 lexer_test::get_token ()
2265 {
2266   location_t loc;
2267   const cpp_token *tok;
2268
2269   tok = cpp_get_token_with_location (m_parser, &loc);
2270   ASSERT_NE (tok, NULL);
2271   return tok;
2272 }
2273
2274 /* Verify that locations within string literals are correctly handled.  */
2275
2276 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2277    using the string concatenation database for TEST.
2278
2279    Assert that the character at index IDX is on EXPECTED_LINE,
2280    and that it begins at column EXPECTED_START_COL and ends at
2281    EXPECTED_FINISH_COL (unless the locations are beyond
2282    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2283    columns).  */
2284
2285 static void
2286 assert_char_at_range (const location &loc,
2287                       lexer_test& test,
2288                       location_t strloc, enum cpp_ttype type, int idx,
2289                       int expected_line, int expected_start_col,
2290                       int expected_finish_col)
2291 {
2292   cpp_reader *pfile = test.m_parser;
2293   string_concat_db *concats = &test.m_concats;
2294
2295   source_range actual_range = source_range();
2296   const char *err
2297     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2298                                  &actual_range);
2299   if (should_have_column_data_p (strloc))
2300     ASSERT_EQ_AT (loc, NULL, err);
2301   else
2302     {
2303       ASSERT_STREQ_AT (loc,
2304                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2305                        err);
2306       return;
2307     }
2308
2309   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2310   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2311   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2312   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2313
2314   if (should_have_column_data_p (actual_range.m_start))
2315     {
2316       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2317       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2318     }
2319   if (should_have_column_data_p (actual_range.m_finish))
2320     {
2321       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2322       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2323     }
2324 }
2325
2326 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2327    the effective location of any errors.  */
2328
2329 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2330                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2331   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2332                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2333                         (EXPECTED_FINISH_COL))
2334
2335 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2336    using the string concatenation database for TEST.
2337
2338    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2339
2340 static void
2341 assert_num_substring_ranges (const location &loc,
2342                              lexer_test& test,
2343                              location_t strloc,
2344                              enum cpp_ttype type,
2345                              int expected_num_ranges)
2346 {
2347   cpp_reader *pfile = test.m_parser;
2348   string_concat_db *concats = &test.m_concats;
2349
2350   int actual_num_ranges = -1;
2351   const char *err
2352     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2353                                            &actual_num_ranges);
2354   if (should_have_column_data_p (strloc))
2355     ASSERT_EQ_AT (loc, NULL, err);
2356   else
2357     {
2358       ASSERT_STREQ_AT (loc,
2359                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2360                        err);
2361       return;
2362     }
2363   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2364 }
2365
2366 /* Macro for calling assert_num_substring_ranges, supplying
2367    SELFTEST_LOCATION for the effective location of any errors.  */
2368
2369 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2370                                     EXPECTED_NUM_RANGES)                \
2371   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2372                                (TYPE), (EXPECTED_NUM_RANGES))
2373
2374
2375 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2376    returns an error (using the string concatenation database for TEST).  */
2377
2378 static void
2379 assert_has_no_substring_ranges (const location &loc,
2380                                 lexer_test& test,
2381                                 location_t strloc,
2382                                 enum cpp_ttype type,
2383                                 const char *expected_err)
2384 {
2385   cpp_reader *pfile = test.m_parser;
2386   string_concat_db *concats = &test.m_concats;
2387   cpp_substring_ranges ranges;
2388   const char *actual_err
2389     = get_substring_ranges_for_loc (pfile, concats, strloc,
2390                                     type, ranges);
2391   if (should_have_column_data_p (strloc))
2392     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2393   else
2394     ASSERT_STREQ_AT (loc,
2395                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2396                      actual_err);
2397 }
2398
2399 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2400     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2401                                     (STRLOC), (TYPE), (ERR))
2402
2403 /* Lex a simple string literal.  Verify the substring location data, before
2404    and after running cpp_interpret_string on it.  */
2405
2406 static void
2407 test_lexer_string_locations_simple (const line_table_case &case_)
2408 {
2409   /* Digits 0-9 (with 0 at column 10), the simple way.
2410      ....................000000000.11111111112.2222222223333333333
2411      ....................123456789.01234567890.1234567890123456789
2412      We add a trailing comment to ensure that we correctly locate
2413      the end of the string literal token.  */
2414   const char *content = "        \"0123456789\" /* not a string */\n";
2415   lexer_test test (case_, content, NULL);
2416
2417   /* Verify that we get the expected token back, with the correct
2418      location information.  */
2419   const cpp_token *tok = test.get_token ();
2420   ASSERT_EQ (tok->type, CPP_STRING);
2421   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2422   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2423
2424   /* At this point in lexing, the quote characters are treated as part of
2425      the string (they are stripped off by cpp_interpret_string).  */
2426
2427   ASSERT_EQ (tok->val.str.len, 12);
2428
2429   /* Verify that cpp_interpret_string works.  */
2430   cpp_string dst_string;
2431   const enum cpp_ttype type = CPP_STRING;
2432   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2433                                       &dst_string, type);
2434   ASSERT_TRUE (result);
2435   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2436   free (const_cast <unsigned char *> (dst_string.text));
2437
2438   /* Verify ranges of individual characters.  This no longer includes the
2439      opening quote, but does include the closing quote.  */
2440   for (int i = 0; i <= 10; i++)
2441     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2442                           10 + i, 10 + i);
2443
2444   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2445 }
2446
2447 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2448    encoding.  */
2449
2450 static void
2451 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2452 {
2453   /* EBCDIC support requires iconv.  */
2454   if (!HAVE_ICONV)
2455     return;
2456
2457   /* Digits 0-9 (with 0 at column 10), the simple way.
2458      ....................000000000.11111111112.2222222223333333333
2459      ....................123456789.01234567890.1234567890123456789
2460      We add a trailing comment to ensure that we correctly locate
2461      the end of the string literal token.  */
2462   const char *content = "        \"0123456789\" /* not a string */\n";
2463   ebcdic_execution_charset use_ebcdic;
2464   lexer_test test (case_, content, &use_ebcdic);
2465
2466   /* Verify that we get the expected token back, with the correct
2467      location information.  */
2468   const cpp_token *tok = test.get_token ();
2469   ASSERT_EQ (tok->type, CPP_STRING);
2470   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2471   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2472
2473   /* At this point in lexing, the quote characters are treated as part of
2474      the string (they are stripped off by cpp_interpret_string).  */
2475
2476   ASSERT_EQ (tok->val.str.len, 12);
2477
2478   /* The remainder of the test requires an iconv implementation that
2479      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2480   if (use_ebcdic.iconv_errors_occurred_p ())
2481     return;
2482
2483   /* Verify that cpp_interpret_string works.  */
2484   cpp_string dst_string;
2485   const enum cpp_ttype type = CPP_STRING;
2486   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2487                                       &dst_string, type);
2488   ASSERT_TRUE (result);
2489   /* We should now have EBCDIC-encoded text, specifically
2490      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2491      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2492   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2493                 (const char *)dst_string.text);
2494   free (const_cast <unsigned char *> (dst_string.text));
2495
2496   /* Verify that we don't attempt to record substring location information
2497      for such cases.  */
2498   ASSERT_HAS_NO_SUBSTRING_RANGES
2499     (test, tok->src_loc, type,
2500      "execution character set != source character set");
2501 }
2502
2503 /* Lex a string literal containing a hex-escaped character.
2504    Verify the substring location data, before and after running
2505    cpp_interpret_string on it.  */
2506
2507 static void
2508 test_lexer_string_locations_hex (const line_table_case &case_)
2509 {
2510   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2511      and with a space in place of digit 6, to terminate the escaped
2512      hex code.
2513      ....................000000000.111111.11112222.
2514      ....................123456789.012345.67890123.  */
2515   const char *content = "        \"01234\\x35 789\"\n";
2516   lexer_test test (case_, content, NULL);
2517
2518   /* Verify that we get the expected token back, with the correct
2519      location information.  */
2520   const cpp_token *tok = test.get_token ();
2521   ASSERT_EQ (tok->type, CPP_STRING);
2522   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2523   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2524
2525   /* At this point in lexing, the quote characters are treated as part of
2526      the string (they are stripped off by cpp_interpret_string).  */
2527   ASSERT_EQ (tok->val.str.len, 15);
2528
2529   /* Verify that cpp_interpret_string works.  */
2530   cpp_string dst_string;
2531   const enum cpp_ttype type = CPP_STRING;
2532   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2533                                       &dst_string, type);
2534   ASSERT_TRUE (result);
2535   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2536   free (const_cast <unsigned char *> (dst_string.text));
2537
2538   /* Verify ranges of individual characters.  This no longer includes the
2539      opening quote, but does include the closing quote.  */
2540   for (int i = 0; i <= 4; i++)
2541     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2542   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2543   for (int i = 6; i <= 10; i++)
2544     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2545
2546   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2547 }
2548
2549 /* Lex a string literal containing an octal-escaped character.
2550    Verify the substring location data after running cpp_interpret_string
2551    on it.  */
2552
2553 static void
2554 test_lexer_string_locations_oct (const line_table_case &case_)
2555 {
2556   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2557      and with a space in place of digit 6, to terminate the escaped
2558      octal code.
2559      ....................000000000.111111.11112222.2222223333333333444
2560      ....................123456789.012345.67890123.4567890123456789012  */
2561   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2562   lexer_test test (case_, content, NULL);
2563
2564   /* Verify that we get the expected token back, with the correct
2565      location information.  */
2566   const cpp_token *tok = test.get_token ();
2567   ASSERT_EQ (tok->type, CPP_STRING);
2568   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2569
2570   /* Verify that cpp_interpret_string works.  */
2571   cpp_string dst_string;
2572   const enum cpp_ttype type = CPP_STRING;
2573   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2574                                       &dst_string, type);
2575   ASSERT_TRUE (result);
2576   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2577   free (const_cast <unsigned char *> (dst_string.text));
2578
2579   /* Verify ranges of individual characters.  This no longer includes the
2580      opening quote, but does include the closing quote.  */
2581   for (int i = 0; i < 5; i++)
2582     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2583   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2584   for (int i = 6; i <= 10; i++)
2585     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2586
2587   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2588 }
2589
2590 /* Test of string literal containing letter escapes.  */
2591
2592 static void
2593 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2594 {
2595   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2596      .....................000000000.1.11111.1.1.11222.22222223333333
2597      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2598   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2599   lexer_test test (case_, content, NULL);
2600
2601   /* Verify that we get the expected tokens back.  */
2602   const cpp_token *tok = test.get_token ();
2603   ASSERT_EQ (tok->type, CPP_STRING);
2604   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2605
2606   /* Verify ranges of individual characters. */
2607   /* "\t".  */
2608   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2609                         0, 1, 10, 11);
2610   /* "foo". */
2611   for (int i = 1; i <= 3; i++)
2612     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2613                           i, 1, 11 + i, 11 + i);
2614   /* "\\" and "\n".  */
2615   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2616                         4, 1, 15, 16);
2617   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2618                         5, 1, 17, 18);
2619
2620   /* "bar" and closing quote for nul-terminator.  */
2621   for (int i = 6; i <= 9; i++)
2622     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2623                           i, 1, 13 + i, 13 + i);
2624
2625   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2626 }
2627
2628 /* Another test of a string literal containing a letter escape.
2629    Based on string seen in
2630      printf ("%-%\n");
2631    in gcc.dg/format/c90-printf-1.c.  */
2632
2633 static void
2634 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2635 {
2636   /* .....................000000000.1111.11.1111.22222222223.
2637      .....................123456789.0123.45.6789.01234567890.  */
2638   const char *content = ("        \"%-%\\n\" /* non-str */\n");
2639   lexer_test test (case_, content, NULL);
2640
2641   /* Verify that we get the expected tokens back.  */
2642   const cpp_token *tok = test.get_token ();
2643   ASSERT_EQ (tok->type, CPP_STRING);
2644   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2645
2646   /* Verify ranges of individual characters. */
2647   /* "%-%".  */
2648   for (int i = 0; i < 3; i++)
2649     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2650                           i, 1, 10 + i, 10 + i);
2651   /* "\n".  */
2652   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2653                         3, 1, 13, 14);
2654
2655   /* Closing quote for nul-terminator.  */
2656   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2657                         4, 1, 15, 15);
2658
2659   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2660 }
2661
2662 /* Lex a string literal containing UCN 4 characters.
2663    Verify the substring location data after running cpp_interpret_string
2664    on it.  */
2665
2666 static void
2667 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2668 {
2669   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2670      as UCN 4.
2671      ....................000000000.111111.111122.222222223.33333333344444
2672      ....................123456789.012345.678901.234567890.12345678901234  */
2673   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
2674   lexer_test test (case_, content, NULL);
2675
2676   /* Verify that we get the expected token back, with the correct
2677      location information.  */
2678   const cpp_token *tok = test.get_token ();
2679   ASSERT_EQ (tok->type, CPP_STRING);
2680   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2681
2682   /* Verify that cpp_interpret_string works.
2683      The string should be encoded in the execution character
2684      set.  Assuming that that is UTF-8, we should have the following:
2685      -----------  ----  -----  -------  ----------------
2686      Byte offset  Byte  Octal  Unicode  Source Column(s)
2687      -----------  ----  -----  -------  ----------------
2688      0            0x30         '0'      10
2689      1            0x31         '1'      11
2690      2            0x32         '2'      12
2691      3            0x33         '3'      13
2692      4            0x34         '4'      14
2693      5            0xE2  \342   U+2174   15-20
2694      6            0x85  \205    (cont)  15-20
2695      7            0xB4  \264    (cont)  15-20
2696      8            0xE2  \342   U+2175   21-26
2697      9            0x85  \205    (cont)  21-26
2698      10           0xB5  \265    (cont)  21-26
2699      11           0x37         '7'      27
2700      12           0x38         '8'      28
2701      13           0x39         '9'      29
2702      14           0x00                  30 (closing quote)
2703      -----------  ----  -----  -------  ---------------.  */
2704
2705   cpp_string dst_string;
2706   const enum cpp_ttype type = CPP_STRING;
2707   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2708                                       &dst_string, type);
2709   ASSERT_TRUE (result);
2710   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2711                 (const char *)dst_string.text);
2712   free (const_cast <unsigned char *> (dst_string.text));
2713
2714   /* Verify ranges of individual characters.  This no longer includes the
2715      opening quote, but does include the closing quote.
2716      '01234'.  */
2717   for (int i = 0; i <= 4; i++)
2718     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2719   /* U+2174.  */
2720   for (int i = 5; i <= 7; i++)
2721     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2722   /* U+2175.  */
2723   for (int i = 8; i <= 10; i++)
2724     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2725   /* '789' and nul terminator  */
2726   for (int i = 11; i <= 14; i++)
2727     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2728
2729   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2730 }
2731
2732 /* Lex a string literal containing UCN 8 characters.
2733    Verify the substring location data after running cpp_interpret_string
2734    on it.  */
2735
2736 static void
2737 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2738 {
2739   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2740      ....................000000000.111111.1111222222.2222333333333.344444
2741      ....................123456789.012345.6789012345.6789012345678.901234  */
2742   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
2743   lexer_test test (case_, content, NULL);
2744
2745   /* Verify that we get the expected token back, with the correct
2746      location information.  */
2747   const cpp_token *tok = test.get_token ();
2748   ASSERT_EQ (tok->type, CPP_STRING);
2749   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2750                            "\"01234\\U00002174\\U00002175789\"");
2751
2752   /* Verify that cpp_interpret_string works.
2753      The UTF-8 encoding of the string is identical to that from
2754      the ucn4 testcase above; the only difference is the column
2755      locations.  */
2756   cpp_string dst_string;
2757   const enum cpp_ttype type = CPP_STRING;
2758   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2759                                       &dst_string, type);
2760   ASSERT_TRUE (result);
2761   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2762                 (const char *)dst_string.text);
2763   free (const_cast <unsigned char *> (dst_string.text));
2764
2765   /* Verify ranges of individual characters.  This no longer includes the
2766      opening quote, but does include the closing quote.
2767      '01234'.  */
2768   for (int i = 0; i <= 4; i++)
2769     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2770   /* U+2174.  */
2771   for (int i = 5; i <= 7; i++)
2772     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2773   /* U+2175.  */
2774   for (int i = 8; i <= 10; i++)
2775     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2776   /* '789' at columns 35-37  */
2777   for (int i = 11; i <= 13; i++)
2778     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2779   /* Closing quote/nul-terminator at column 38.  */
2780   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2781
2782   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2783 }
2784
2785 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
2786
2787 static uint32_t
2788 uint32_from_big_endian (const uint32_t *ptr_be_value)
2789 {
2790   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2791   return (((uint32_t) buf[0] << 24)
2792           | ((uint32_t) buf[1] << 16)
2793           | ((uint32_t) buf[2] << 8)
2794           | (uint32_t) buf[3]);
2795 }
2796
2797 /* Lex a wide string literal and verify that attempts to read substring
2798    location data from it fail gracefully.  */
2799
2800 static void
2801 test_lexer_string_locations_wide_string (const line_table_case &case_)
2802 {
2803   /* Digits 0-9.
2804      ....................000000000.11111111112.22222222233333
2805      ....................123456789.01234567890.12345678901234  */
2806   const char *content = "       L\"0123456789\" /* non-str */\n";
2807   lexer_test test (case_, content, NULL);
2808
2809   /* Verify that we get the expected token back, with the correct
2810      location information.  */
2811   const cpp_token *tok = test.get_token ();
2812   ASSERT_EQ (tok->type, CPP_WSTRING);
2813   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2814
2815   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
2816   cpp_string dst_string;
2817   const enum cpp_ttype type = CPP_WSTRING;
2818   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2819                                       &dst_string, type);
2820   ASSERT_TRUE (result);
2821   /* The cpp_reader defaults to big-endian with
2822      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2823      now be encoded as UTF-32BE.  */
2824   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2825   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2826   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2827   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2828   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2829   free (const_cast <unsigned char *> (dst_string.text));
2830
2831   /* We don't yet support generating substring location information
2832      for L"" strings.  */
2833   ASSERT_HAS_NO_SUBSTRING_RANGES
2834     (test, tok->src_loc, type,
2835      "execution character set != source character set");
2836 }
2837
2838 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
2839
2840 static uint16_t
2841 uint16_from_big_endian (const uint16_t *ptr_be_value)
2842 {
2843   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2844   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2845 }
2846
2847 /* Lex a u"" string literal and verify that attempts to read substring
2848    location data from it fail gracefully.  */
2849
2850 static void
2851 test_lexer_string_locations_string16 (const line_table_case &case_)
2852 {
2853   /* Digits 0-9.
2854      ....................000000000.11111111112.22222222233333
2855      ....................123456789.01234567890.12345678901234  */
2856   const char *content = "       u\"0123456789\" /* non-str */\n";
2857   lexer_test test (case_, content, NULL);
2858
2859   /* Verify that we get the expected token back, with the correct
2860      location information.  */
2861   const cpp_token *tok = test.get_token ();
2862   ASSERT_EQ (tok->type, CPP_STRING16);
2863   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2864
2865   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
2866   cpp_string dst_string;
2867   const enum cpp_ttype type = CPP_STRING16;
2868   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2869                                       &dst_string, type);
2870   ASSERT_TRUE (result);
2871
2872   /* The cpp_reader defaults to big-endian, so dst_string should
2873      now be encoded as UTF-16BE.  */
2874   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2875   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2876   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2877   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2878   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2879   free (const_cast <unsigned char *> (dst_string.text));
2880
2881   /* We don't yet support generating substring location information
2882      for L"" strings.  */
2883   ASSERT_HAS_NO_SUBSTRING_RANGES
2884     (test, tok->src_loc, type,
2885      "execution character set != source character set");
2886 }
2887
2888 /* Lex a U"" string literal and verify that attempts to read substring
2889    location data from it fail gracefully.  */
2890
2891 static void
2892 test_lexer_string_locations_string32 (const line_table_case &case_)
2893 {
2894   /* Digits 0-9.
2895      ....................000000000.11111111112.22222222233333
2896      ....................123456789.01234567890.12345678901234  */
2897   const char *content = "       U\"0123456789\" /* non-str */\n";
2898   lexer_test test (case_, content, NULL);
2899
2900   /* Verify that we get the expected token back, with the correct
2901      location information.  */
2902   const cpp_token *tok = test.get_token ();
2903   ASSERT_EQ (tok->type, CPP_STRING32);
2904   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2905
2906   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
2907   cpp_string dst_string;
2908   const enum cpp_ttype type = CPP_STRING32;
2909   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2910                                       &dst_string, type);
2911   ASSERT_TRUE (result);
2912
2913   /* The cpp_reader defaults to big-endian, so dst_string should
2914      now be encoded as UTF-32BE.  */
2915   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2916   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2917   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2918   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2919   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2920   free (const_cast <unsigned char *> (dst_string.text));
2921
2922   /* We don't yet support generating substring location information
2923      for L"" strings.  */
2924   ASSERT_HAS_NO_SUBSTRING_RANGES
2925     (test, tok->src_loc, type,
2926      "execution character set != source character set");
2927 }
2928
2929 /* Lex a u8-string literal.
2930    Verify the substring location data after running cpp_interpret_string
2931    on it.  */
2932
2933 static void
2934 test_lexer_string_locations_u8 (const line_table_case &case_)
2935 {
2936   /* Digits 0-9.
2937      ....................000000000.11111111112.22222222233333
2938      ....................123456789.01234567890.12345678901234  */
2939   const char *content = "      u8\"0123456789\" /* non-str */\n";
2940   lexer_test test (case_, content, NULL);
2941
2942   /* Verify that we get the expected token back, with the correct
2943      location information.  */
2944   const cpp_token *tok = test.get_token ();
2945   ASSERT_EQ (tok->type, CPP_UTF8STRING);
2946   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2947
2948   /* Verify that cpp_interpret_string works.  */
2949   cpp_string dst_string;
2950   const enum cpp_ttype type = CPP_STRING;
2951   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2952                                       &dst_string, type);
2953   ASSERT_TRUE (result);
2954   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2955   free (const_cast <unsigned char *> (dst_string.text));
2956
2957   /* Verify ranges of individual characters.  This no longer includes the
2958      opening quote, but does include the closing quote.  */
2959   for (int i = 0; i <= 10; i++)
2960     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2961 }
2962
2963 /* Lex a string literal containing UTF-8 source characters.
2964    Verify the substring location data after running cpp_interpret_string
2965    on it.  */
2966
2967 static void
2968 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2969 {
2970  /* This string literal is written out to the source file as UTF-8,
2971     and is of the form "before mojibake after", where "mojibake"
2972     is written as the following four unicode code points:
2973        U+6587 CJK UNIFIED IDEOGRAPH-6587
2974        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2975        U+5316 CJK UNIFIED IDEOGRAPH-5316
2976        U+3051 HIRAGANA LETTER KE.
2977      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2978      "before" and "after" are 1 byte per unicode character.
2979
2980      The numbering shown are "columns", which are *byte* numbers within
2981      the line, rather than unicode character numbers.
2982
2983      .................... 000000000.1111111.
2984      .................... 123456789.0123456.  */
2985   const char *content = ("        \"before "
2986                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2987                               UTF-8: 0xE6 0x96 0x87
2988                               C octal escaped UTF-8: \346\226\207
2989                             "column" numbers: 17-19.  */
2990                          "\346\226\207"
2991
2992                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2993                               UTF-8: 0xE5 0xAD 0x97
2994                               C octal escaped UTF-8: \345\255\227
2995                             "column" numbers: 20-22.  */
2996                          "\345\255\227"
2997
2998                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2999                               UTF-8: 0xE5 0x8C 0x96
3000                               C octal escaped UTF-8: \345\214\226
3001                             "column" numbers: 23-25.  */
3002                          "\345\214\226"
3003
3004                          /* U+3051 HIRAGANA LETTER KE
3005                               UTF-8: 0xE3 0x81 0x91
3006                               C octal escaped UTF-8: \343\201\221
3007                             "column" numbers: 26-28.  */
3008                          "\343\201\221"
3009
3010                          /* column numbers 29 onwards
3011                           2333333.33334444444444
3012                           9012345.67890123456789. */
3013                          " after\" /* non-str */\n");
3014   lexer_test test (case_, content, NULL);
3015
3016   /* Verify that we get the expected token back, with the correct
3017      location information.  */
3018   const cpp_token *tok = test.get_token ();
3019   ASSERT_EQ (tok->type, CPP_STRING);
3020   ASSERT_TOKEN_AS_TEXT_EQ
3021     (test.m_parser, tok,
3022      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3023
3024   /* Verify that cpp_interpret_string works.  */
3025   cpp_string dst_string;
3026   const enum cpp_ttype type = CPP_STRING;
3027   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3028                                       &dst_string, type);
3029   ASSERT_TRUE (result);
3030   ASSERT_STREQ
3031     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3032      (const char *)dst_string.text);
3033   free (const_cast <unsigned char *> (dst_string.text));
3034
3035   /* Verify ranges of individual characters.  This no longer includes the
3036      opening quote, but does include the closing quote.
3037      Assuming that both source and execution encodings are UTF-8, we have
3038      a run of 25 octets in each, plus the NUL terminator.  */
3039   for (int i = 0; i < 25; i++)
3040     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3041   /* NUL-terminator should use the closing quote at column 35.  */
3042   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3043
3044   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3045 }
3046
3047 /* Test of string literal concatenation.  */
3048
3049 static void
3050 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3051 {
3052   /* Digits 0-9.
3053      .....................000000000.111111.11112222222222
3054      .....................123456789.012345.67890123456789.  */
3055   const char *content = ("        \"01234\" /* non-str */\n"
3056                          "        \"56789\" /* non-str */\n");
3057   lexer_test test (case_, content, NULL);
3058
3059   location_t input_locs[2];
3060
3061   /* Verify that we get the expected tokens back.  */
3062   auto_vec <cpp_string> input_strings;
3063   const cpp_token *tok_a = test.get_token ();
3064   ASSERT_EQ (tok_a->type, CPP_STRING);
3065   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3066   input_strings.safe_push (tok_a->val.str);
3067   input_locs[0] = tok_a->src_loc;
3068
3069   const cpp_token *tok_b = test.get_token ();
3070   ASSERT_EQ (tok_b->type, CPP_STRING);
3071   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3072   input_strings.safe_push (tok_b->val.str);
3073   input_locs[1] = tok_b->src_loc;
3074
3075   /* Verify that cpp_interpret_string works.  */
3076   cpp_string dst_string;
3077   const enum cpp_ttype type = CPP_STRING;
3078   bool result = cpp_interpret_string (test.m_parser,
3079                                       input_strings.address (), 2,
3080                                       &dst_string, type);
3081   ASSERT_TRUE (result);
3082   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3083   free (const_cast <unsigned char *> (dst_string.text));
3084
3085   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3086   test.m_concats.record_string_concatenation (2, input_locs);
3087
3088   location_t initial_loc = input_locs[0];
3089
3090   /* "01234" on line 1.  */
3091   for (int i = 0; i <= 4; i++)
3092     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3093   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3094   for (int i = 5; i <= 10; i++)
3095     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3096
3097   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3098 }
3099
3100 /* Another test of string literal concatenation.  */
3101
3102 static void
3103 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3104 {
3105   /* Digits 0-9.
3106      .....................000000000.111.11111112222222
3107      .....................123456789.012.34567890123456.  */
3108   const char *content = ("        \"01\" /* non-str */\n"
3109                          "        \"23\" /* non-str */\n"
3110                          "        \"45\" /* non-str */\n"
3111                          "        \"67\" /* non-str */\n"
3112                          "        \"89\" /* non-str */\n");
3113   lexer_test test (case_, content, NULL);
3114
3115   auto_vec <cpp_string> input_strings;
3116   location_t input_locs[5];
3117
3118   /* Verify that we get the expected tokens back.  */
3119   for (int i = 0; i < 5; i++)
3120     {
3121       const cpp_token *tok = test.get_token ();
3122       ASSERT_EQ (tok->type, CPP_STRING);
3123       input_strings.safe_push (tok->val.str);
3124       input_locs[i] = tok->src_loc;
3125     }
3126
3127   /* Verify that cpp_interpret_string works.  */
3128   cpp_string dst_string;
3129   const enum cpp_ttype type = CPP_STRING;
3130   bool result = cpp_interpret_string (test.m_parser,
3131                                       input_strings.address (), 5,
3132                                       &dst_string, type);
3133   ASSERT_TRUE (result);
3134   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3135   free (const_cast <unsigned char *> (dst_string.text));
3136
3137   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3138   test.m_concats.record_string_concatenation (5, input_locs);
3139
3140   location_t initial_loc = input_locs[0];
3141
3142   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3143      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3144      and expect get_source_range_for_substring to fail.
3145      However, for a string concatenation test, we can have a case
3146      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3147      but subsequent strings can be after it.
3148      Attempting to detect this within assert_char_at_range
3149      would overcomplicate the logic for the common test cases, so
3150      we detect it here.  */
3151   if (should_have_column_data_p (input_locs[0])
3152       && !should_have_column_data_p (input_locs[4]))
3153     {
3154       /* Verify that get_source_range_for_substring gracefully rejects
3155          this case.  */
3156       source_range actual_range;
3157       const char *err
3158         = get_source_range_for_char (test.m_parser, &test.m_concats,
3159                                      initial_loc, type, 0, &actual_range);
3160       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3161       return;
3162     }
3163
3164   for (int i = 0; i < 5; i++)
3165     for (int j = 0; j < 2; j++)
3166       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3167                             i + 1, 10 + j, 10 + j);
3168
3169   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3170   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3171
3172   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3173 }
3174
3175 /* Another test of string literal concatenation, this time combined with
3176    various kinds of escaped characters.  */
3177
3178 static void
3179 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3180 {
3181   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3182      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3183   const char *content
3184     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3185        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3186     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3187   lexer_test test (case_, content, NULL);
3188
3189   auto_vec <cpp_string> input_strings;
3190   location_t input_locs[4];
3191
3192   /* Verify that we get the expected tokens back.  */
3193   for (int i = 0; i < 4; i++)
3194     {
3195       const cpp_token *tok = test.get_token ();
3196       ASSERT_EQ (tok->type, CPP_STRING);
3197       input_strings.safe_push (tok->val.str);
3198       input_locs[i] = tok->src_loc;
3199     }
3200
3201   /* Verify that cpp_interpret_string works.  */
3202   cpp_string dst_string;
3203   const enum cpp_ttype type = CPP_STRING;
3204   bool result = cpp_interpret_string (test.m_parser,
3205                                       input_strings.address (), 4,
3206                                       &dst_string, type);
3207   ASSERT_TRUE (result);
3208   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3209   free (const_cast <unsigned char *> (dst_string.text));
3210
3211   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3212   test.m_concats.record_string_concatenation (4, input_locs);
3213
3214   location_t initial_loc = input_locs[0];
3215
3216   for (int i = 0; i <= 4; i++)
3217     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3218   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3219   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3220   for (int i = 7; i <= 9; i++)
3221     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3222
3223   /* NUL-terminator should use the location of the final closing quote.  */
3224   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3225
3226   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3227 }
3228
3229 /* Test of string literal in a macro.  */
3230
3231 static void
3232 test_lexer_string_locations_macro (const line_table_case &case_)
3233 {
3234   /* Digits 0-9.
3235      .....................0000000001111111111.22222222223.
3236      .....................1234567890123456789.01234567890.  */
3237   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3238                          "  MACRO");
3239   lexer_test test (case_, content, NULL);
3240
3241   /* Verify that we get the expected tokens back.  */
3242   const cpp_token *tok = test.get_token ();
3243   ASSERT_EQ (tok->type, CPP_PADDING);
3244
3245   tok = test.get_token ();
3246   ASSERT_EQ (tok->type, CPP_STRING);
3247   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3248
3249   /* Verify ranges of individual characters.  We ought to
3250      see columns within the macro definition.  */
3251   for (int i = 0; i <= 10; i++)
3252     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3253                           i, 1, 20 + i, 20 + i);
3254
3255   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3256
3257   tok = test.get_token ();
3258   ASSERT_EQ (tok->type, CPP_PADDING);
3259 }
3260
3261 /* Test of stringification of a macro argument.  */
3262
3263 static void
3264 test_lexer_string_locations_stringified_macro_argument
3265   (const line_table_case &case_)
3266 {
3267   /* .....................000000000111111111122222222223.
3268      .....................123456789012345678901234567890.  */
3269   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3270                          "MACRO(foo)\n");
3271   lexer_test test (case_, content, NULL);
3272
3273   /* Verify that we get the expected token back.  */
3274   const cpp_token *tok = test.get_token ();
3275   ASSERT_EQ (tok->type, CPP_PADDING);
3276
3277   tok = test.get_token ();
3278   ASSERT_EQ (tok->type, CPP_STRING);
3279   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3280
3281   /* We don't support getting the location of a stringified macro
3282      argument.  Verify that it fails gracefully.  */
3283   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3284                                   "cpp_interpret_string_1 failed");
3285
3286   tok = test.get_token ();
3287   ASSERT_EQ (tok->type, CPP_PADDING);
3288
3289   tok = test.get_token ();
3290   ASSERT_EQ (tok->type, CPP_PADDING);
3291 }
3292
3293 /* Ensure that we are fail gracefully if something attempts to pass
3294    in a location that isn't a string literal token.  Seen on this code:
3295
3296      const char a[] = " %d ";
3297      __builtin_printf (a, 0.5);
3298                        ^
3299
3300    when c-format.c erroneously used the indicated one-character
3301    location as the format string location, leading to a read past the
3302    end of a string buffer in cpp_interpret_string_1.  */
3303
3304 static void
3305 test_lexer_string_locations_non_string (const line_table_case &case_)
3306 {
3307   /* .....................000000000111111111122222222223.
3308      .....................123456789012345678901234567890.  */
3309   const char *content = ("         a\n");
3310   lexer_test test (case_, content, NULL);
3311
3312   /* Verify that we get the expected token back.  */
3313   const cpp_token *tok = test.get_token ();
3314   ASSERT_EQ (tok->type, CPP_NAME);
3315   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3316
3317   /* At this point, libcpp is attempting to interpret the name as a
3318      string literal, despite it not starting with a quote.  We don't detect
3319      that, but we should at least fail gracefully.  */
3320   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3321                                   "cpp_interpret_string_1 failed");
3322 }
3323
3324 /* Ensure that we can read substring information for a token which
3325    starts in one linemap and ends in another .  Adapted from
3326    gcc.dg/cpp/pr69985.c.  */
3327
3328 static void
3329 test_lexer_string_locations_long_line (const line_table_case &case_)
3330 {
3331   /* .....................000000.000111111111
3332      .....................123456.789012346789.  */
3333   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3334                          "     \"0123456789012345678901234567890123456789"
3335                          "0123456789012345678901234567890123456789"
3336                          "0123456789012345678901234567890123456789"
3337                          "0123456789\"\n");
3338
3339   lexer_test test (case_, content, NULL);
3340
3341   /* Verify that we get the expected token back.  */
3342   const cpp_token *tok = test.get_token ();
3343   ASSERT_EQ (tok->type, CPP_STRING);
3344
3345   if (!should_have_column_data_p (line_table->highest_location))
3346     return;
3347
3348   /* Verify ranges of individual characters.  */
3349   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3350   for (int i = 0; i < 131; i++)
3351     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3352                           i, 2, 7 + i, 7 + i);
3353 }
3354
3355 /* Test of locations within a raw string that doesn't contain a newline.  */
3356
3357 static void
3358 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3359 {
3360   /* .....................00.0000000111111111122.
3361      .....................12.3456789012345678901.  */
3362   const char *content = ("R\"foo(0123456789)foo\"\n");
3363   lexer_test test (case_, content, NULL);
3364
3365   /* Verify that we get the expected token back.  */
3366   const cpp_token *tok = test.get_token ();
3367   ASSERT_EQ (tok->type, CPP_STRING);
3368
3369   /* Verify that cpp_interpret_string works.  */
3370   cpp_string dst_string;
3371   const enum cpp_ttype type = CPP_STRING;
3372   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3373                                       &dst_string, type);
3374   ASSERT_TRUE (result);
3375   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3376   free (const_cast <unsigned char *> (dst_string.text));
3377
3378   if (!should_have_column_data_p (line_table->highest_location))
3379     return;
3380
3381   /* 0-9, plus the nil terminator.  */
3382   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3383   for (int i = 0; i < 11; i++)
3384     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3385                           i, 1, 7 + i, 7 + i);
3386 }
3387
3388 /* Test of locations within a raw string that contains a newline.  */
3389
3390 static void
3391 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3392 {
3393   /* .....................00.0000.
3394      .....................12.3456.  */
3395   const char *content = ("R\"foo(\n"
3396   /* .....................00000.
3397      .....................12345.  */
3398                          "hello\n"
3399                          "world\n"
3400   /* .....................00000.
3401      .....................12345.  */
3402                          ")foo\"\n");
3403   lexer_test test (case_, content, NULL);
3404
3405   /* Verify that we get the expected token back.  */
3406   const cpp_token *tok = test.get_token ();
3407   ASSERT_EQ (tok->type, CPP_STRING);
3408
3409   /* Verify that cpp_interpret_string works.  */
3410   cpp_string dst_string;
3411   const enum cpp_ttype type = CPP_STRING;
3412   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3413                                       &dst_string, type);
3414   ASSERT_TRUE (result);
3415   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3416   free (const_cast <unsigned char *> (dst_string.text));
3417
3418   if (!should_have_column_data_p (line_table->highest_location))
3419     return;
3420
3421   /* Currently we don't support locations within raw strings that
3422      contain newlines.  */
3423   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3424                                   "range endpoints are on different lines");
3425 }
3426
3427 /* Test of parsing an unterminated raw string.  */
3428
3429 static void
3430 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3431 {
3432   const char *content = "R\"ouch()ouCh\" /* etc */";
3433
3434   lexer_diagnostic_sink diagnostics;
3435   lexer_test test (case_, content, &diagnostics);
3436   test.m_implicitly_expect_EOF = false;
3437
3438   /* Attempt to parse the raw string.  */
3439   const cpp_token *tok = test.get_token ();
3440   ASSERT_EQ (tok->type, CPP_EOF);
3441
3442   ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3443   /* We expect the message "unterminated raw string"
3444      in the "cpplib" translation domain.
3445      It's not clear that dgettext is available on all supported hosts,
3446      so this assertion is commented-out for now.
3447        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3448                      diagnostics.m_diagnostics[0]);
3449   */
3450 }
3451
3452 /* Test of lexing char constants.  */
3453
3454 static void
3455 test_lexer_char_constants (const line_table_case &case_)
3456 {
3457   /* Various char constants.
3458      .....................0000000001111111111.22222222223.
3459      .....................1234567890123456789.01234567890.  */
3460   const char *content = ("         'a'\n"
3461                          "        u'a'\n"
3462                          "        U'a'\n"
3463                          "        L'a'\n"
3464                          "         'abc'\n");
3465   lexer_test test (case_, content, NULL);
3466
3467   /* Verify that we get the expected tokens back.  */
3468   /* 'a'.  */
3469   const cpp_token *tok = test.get_token ();
3470   ASSERT_EQ (tok->type, CPP_CHAR);
3471   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3472
3473   unsigned int chars_seen;
3474   int unsignedp;
3475   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3476                                           &chars_seen, &unsignedp);
3477   ASSERT_EQ (cc, 'a');
3478   ASSERT_EQ (chars_seen, 1);
3479
3480   /* u'a'.  */
3481   tok = test.get_token ();
3482   ASSERT_EQ (tok->type, CPP_CHAR16);
3483   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3484
3485   /* U'a'.  */
3486   tok = test.get_token ();
3487   ASSERT_EQ (tok->type, CPP_CHAR32);
3488   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3489
3490   /* L'a'.  */
3491   tok = test.get_token ();
3492   ASSERT_EQ (tok->type, CPP_WCHAR);
3493   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3494
3495   /* 'abc' (c-char-sequence).  */
3496   tok = test.get_token ();
3497   ASSERT_EQ (tok->type, CPP_CHAR);
3498   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3499 }
3500 /* A table of interesting location_t values, giving one axis of our test
3501    matrix.  */
3502
3503 static const location_t boundary_locations[] = {
3504   /* Zero means "don't override the default values for a new line_table".  */
3505   0,
3506
3507   /* An arbitrary non-zero value that isn't close to one of
3508      the boundary values below.  */
3509   0x10000,
3510
3511   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3512   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3513   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3514   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3515   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3516   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3517
3518   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3519   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3520   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3521   LINE_MAP_MAX_LOCATION_WITH_COLS,
3522   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3523   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3524 };
3525
3526 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3527
3528 void
3529 for_each_line_table_case (void (*testcase) (const line_table_case &))
3530 {
3531   /* As noted above in the description of struct line_table_case,
3532      we want to explore a test matrix of interesting line_table
3533      situations, running various selftests for each case within the
3534      matrix.  */
3535
3536   /* Run all tests with:
3537      (a) line_table->default_range_bits == 0, and
3538      (b) line_table->default_range_bits == 5.  */
3539   int num_cases_tested = 0;
3540   for (int default_range_bits = 0; default_range_bits <= 5;
3541        default_range_bits += 5)
3542     {
3543       /* ...and use each of the "interesting" location values as
3544          the starting location within line_table.  */
3545       const int num_boundary_locations
3546         = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3547       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3548         {
3549           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3550
3551           testcase (c);
3552
3553           num_cases_tested++;
3554         }
3555     }
3556
3557   /* Verify that we fully covered the test matrix.  */
3558   ASSERT_EQ (num_cases_tested, 2 * 12);
3559 }
3560
3561 /* Verify that when presented with a consecutive pair of locations with
3562    a very large line offset, we don't attempt to consolidate them into
3563    a single ordinary linemap where the line offsets within the line map
3564    would lead to overflow (PR lto/88147).  */
3565
3566 static void
3567 test_line_offset_overflow ()
3568 {
3569   line_table_test ltt (line_table_case (5, 0));
3570
3571   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3572   linemap_line_start (line_table, 1, 100);
3573   location_t loc_a = linemap_line_start (line_table, 2578, 255);
3574   assert_loceq ("foo.c", 2578, 0, loc_a);
3575
3576   const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3577   ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3578   ASSERT_EQ (ordmap_a->m_range_bits, 5);
3579
3580   location_t loc_b = linemap_line_start (line_table, 404198, 512);
3581   assert_loceq ("foo.c", 404198, 0, loc_b);
3582
3583   /* We should have started a new linemap, rather than attempting to store
3584      a very large line offset.  */
3585   const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3586   ASSERT_NE (ordmap_a, ordmap_b);
3587 }
3588
3589 /* Run all of the selftests within this file.  */
3590
3591 void
3592 input_c_tests ()
3593 {
3594   test_linenum_comparisons ();
3595   test_should_have_column_data_p ();
3596   test_unknown_location ();
3597   test_builtins ();
3598   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3599
3600   for_each_line_table_case (test_accessing_ordinary_linemaps);
3601   for_each_line_table_case (test_lexer);
3602   for_each_line_table_case (test_lexer_string_locations_simple);
3603   for_each_line_table_case (test_lexer_string_locations_ebcdic);
3604   for_each_line_table_case (test_lexer_string_locations_hex);
3605   for_each_line_table_case (test_lexer_string_locations_oct);
3606   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3607   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3608   for_each_line_table_case (test_lexer_string_locations_ucn4);
3609   for_each_line_table_case (test_lexer_string_locations_ucn8);
3610   for_each_line_table_case (test_lexer_string_locations_wide_string);
3611   for_each_line_table_case (test_lexer_string_locations_string16);
3612   for_each_line_table_case (test_lexer_string_locations_string32);
3613   for_each_line_table_case (test_lexer_string_locations_u8);
3614   for_each_line_table_case (test_lexer_string_locations_utf8_source);
3615   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3616   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3617   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3618   for_each_line_table_case (test_lexer_string_locations_macro);
3619   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3620   for_each_line_table_case (test_lexer_string_locations_non_string);
3621   for_each_line_table_case (test_lexer_string_locations_long_line);
3622   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3623   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3624   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3625   for_each_line_table_case (test_lexer_char_constants);
3626
3627   test_reading_source_line ();
3628
3629   test_line_offset_overflow ();
3630 }
3631
3632 } // namespace selftest
3633
3634 #endif /* CHECKING_P */