gcc/input.c

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2021 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic.h"
  25 #include "diagnostic-core.h"
  26 #include "selftest.h"
  27 #include "cpplib.h"
  28
  29 #ifndef HAVE_ICONV
  30 #define HAVE_ICONV 0
  31 #endif
  32
  33 /* This is a cache used by get_next_line to store the content of a
  34    file to be searched for file lines.  */
  35 class file_cache_slot
  36 {
  37 public:
  38   file_cache_slot ();
  39   ~file_cache_slot ();
  40
  41   bool read_line_num (size_t line_num,
  42                       char ** line, ssize_t *line_len);
  43
  44   /* Accessors.  */
  45   const char *get_file_path () const { return m_file_path; }
  46   unsigned get_use_count () const { return m_use_count; }
  47   bool missing_trailing_newline_p () const
  48   {
  49     return m_missing_trailing_newline;
  50   }
  51
  52   void inc_use_count () { m_use_count++; }
  53
  54   void create (const char *file_path, FILE *fp, unsigned highest_use_count);
  55   void evict ();
  56
  57  private:
  58   /* These are information used to store a line boundary.  */
  59   class line_info
  60   {
  61   public:
  62     /* The line number.  It starts from 1.  */
  63     size_t line_num;
  64
  65     /* The position (byte count) of the beginning of the line,
  66        relative to the file data pointer.  This starts at zero.  */
  67     size_t start_pos;
  68
  69     /* The position (byte count) of the last byte of the line.  This
  70        normally points to the '\n' character, or to one byte after the
  71        last byte of the file, if the file doesn't contain a '\n'
  72        character.  */
  73     size_t end_pos;
  74
  75     line_info (size_t l, size_t s, size_t e)
  76       : line_num (l), start_pos (s), end_pos (e)
  77     {}
  78
  79     line_info ()
  80       :line_num (0), start_pos (0), end_pos (0)
  81     {}
  82   };
  83
  84   bool needs_read_p () const;
  85   bool needs_grow_p () const;
  86   void maybe_grow ();
  87   bool read_data ();
  88   bool maybe_read_data ();
  89   bool get_next_line (char **line, ssize_t *line_len);
  90   bool read_next_line (char ** line, ssize_t *line_len);
  91   bool goto_next_line ();
  92
  93   static const size_t buffer_size = 4 * 1024;
  94   static const size_t line_record_size = 100;
  95
  96   /* The number of time this file has been accessed.  This is used
  97      to designate which file cache to evict from the cache
  98      array.  */
  99   unsigned m_use_count;
 100
 101   /* The file_path is the key for identifying a particular file in
 102      the cache.
 103      For libcpp-using code, the underlying buffer for this field is
 104      owned by the corresponding _cpp_file within the cpp_reader.  */
 105   const char *m_file_path;
 106
 107   FILE *m_fp;
 108
 109   /* This points to the content of the file that we've read so
 110      far.  */
 111   char *m_data;
 112
 113   /*  The size of the DATA array above.*/
 114   size_t m_size;
 115
 116   /* The number of bytes read from the underlying file so far.  This
 117      must be less (or equal) than SIZE above.  */
 118   size_t m_nb_read;
 119
 120   /* The index of the beginning of the current line.  */
 121   size_t m_line_start_idx;
 122
 123   /* The number of the previous line read.  This starts at 1.  Zero
 124      means we've read no line so far.  */
 125   size_t m_line_num;
 126
 127   /* This is the total number of lines of the current file.  At the
 128      moment, we try to get this information from the line map
 129      subsystem.  Note that this is just a hint.  When using the C++
 130      front-end, this hint is correct because the input file is then
 131      completely tokenized before parsing starts; so the line map knows
 132      the number of lines before compilation really starts.  For e.g,
 133      the C front-end, it can happen that we start emitting diagnostics
 134      before the line map has seen the end of the file.  */
 135   size_t m_total_lines;
 136
 137   /* Could this file be missing a trailing newline on its final line?
 138      Initially true (to cope with empty files), set to true/false
 139      as each line is read.  */
 140   bool m_missing_trailing_newline;
 141
 142   /* This is a record of the beginning and end of the lines we've seen
 143      while reading the file.  This is useful to avoid walking the data
 144      from the beginning when we are asked to read a line that is
 145      before LINE_START_IDX above.  Note that the maximum size of this
 146      record is line_record_size, so that the memory consumption
 147      doesn't explode.  We thus scale total_lines down to
 148      line_record_size.  */
 149   vec<line_info, va_heap> m_line_record;
 150 };
 151
 152 /* Current position in real source file.  */
 153
 154 location_t input_location = UNKNOWN_LOCATION;
 155
 156 class line_maps *line_table;
 157
 158 /* A stashed copy of "line_table" for use by selftest::line_table_test.
 159    This needs to be a global so that it can be a GC root, and thus
 160    prevent the stashed copy from being garbage-collected if the GC runs
 161    during a line_table_test.  */
 162
 163 class line_maps *saved_line_table;
 164
 165 /* Expand the source location LOC into a human readable location.  If
 166    LOC resolves to a builtin location, the file name of the readable
 167    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 168    TRUE and LOC is virtual, then it is resolved to the expansion
 169    point of the involved macro.  Otherwise, it is resolved to the
 170    spelling location of the token.
 171
 172    When resolving to the spelling location of the token, if the
 173    resulting location is for a built-in location (that is, it has no
 174    associated line/column) in the context of a macro expansion, the
 175    returned location is the first one (while unwinding the macro
 176    location towards its expansion point) that is in real source
 177    code.
 178
 179    ASPECT controls which part of the location to use.  */
 180
 181 static expanded_location
 182 expand_location_1 (location_t loc,
 183                    bool expansion_point_p,
 184                    enum location_aspect aspect)
 185 {
 186   expanded_location xloc;
 187   const line_map_ordinary *map;
 188   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 189   tree block = NULL;
 190
 191   if (IS_ADHOC_LOC (loc))
 192     {
 193       block = LOCATION_BLOCK (loc);
 194       loc = LOCATION_LOCUS (loc);
 195     }
 196
 197   memset (&xloc, 0, sizeof (xloc));
 198
 199   if (loc >= RESERVED_LOCATION_COUNT)
 200     {
 201       if (!expansion_point_p)
 202         {
 203           /* We want to resolve LOC to its spelling location.
 204
 205              But if that spelling location is a reserved location that
 206              appears in the context of a macro expansion (like for a
 207              location for a built-in token), let's consider the first
 208              location (toward the expansion point) that is not reserved;
 209              that is, the first location that is in real source code.  */
 210           loc = linemap_unwind_to_first_non_reserved_loc (line_table,
 211                                                           loc, NULL);
 212           lrk = LRK_SPELLING_LOCATION;
 213         }
 214       loc = linemap_resolve_location (line_table, loc, lrk, &map);
 215
 216       /* loc is now either in an ordinary map, or is a reserved location.
 217          If it is a compound location, the caret is in a spelling location,
 218          but the start/finish might still be a virtual location.
 219          Depending of what the caller asked for, we may need to recurse
 220          one level in order to resolve any virtual locations in the
 221          end-points.  */
 222       switch (aspect)
 223         {
 224         default:
 225           gcc_unreachable ();
 226           /* Fall through.  */
 227         case LOCATION_ASPECT_CARET:
 228           break;
 229         case LOCATION_ASPECT_START:
 230           {
 231             location_t start = get_start (loc);
 232             if (start != loc)
 233               return expand_location_1 (start, expansion_point_p, aspect);
 234           }
 235           break;
 236         case LOCATION_ASPECT_FINISH:
 237           {
 238             location_t finish = get_finish (loc);
 239             if (finish != loc)
 240               return expand_location_1 (finish, expansion_point_p, aspect);
 241           }
 242           break;
 243         }
 244       xloc = linemap_expand_location (line_table, map, loc);
 245     }
 246
 247   xloc.data = block;
 248   if (loc <= BUILTINS_LOCATION)
 249     xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
 250
 251   return xloc;
 252 }
 253
 254 /* Initialize the set of cache used for files accessed by caret
 255    diagnostic.  */
 256
 257 static void
 258 diagnostic_file_cache_init (void)
 259 {
 260   gcc_assert (global_dc);
 261   if (global_dc->m_file_cache == NULL)
 262     global_dc->m_file_cache = new file_cache ();
 263 }
 264
 265 /* Free the resources used by the set of cache used for files accessed
 266    by caret diagnostic.  */
 267
 268 void
 269 diagnostic_file_cache_fini (void)
 270 {
 271   if (global_dc->m_file_cache)
 272     {
 273       delete global_dc->m_file_cache;
 274       global_dc->m_file_cache = NULL;
 275     }
 276 }
 277
 278 /* Return the total lines number that have been read so far by the
 279    line map (in the preprocessor) so far.  For languages like C++ that
 280    entirely preprocess the input file before starting to parse, this
 281    equals the actual number of lines of the file.  */
 282
 283 static size_t
 284 total_lines_num (const char *file_path)
 285 {
 286   size_t r = 0;
 287   location_t l = 0;
 288   if (linemap_get_file_highest_location (line_table, file_path, &l))
 289     {
 290       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 291       expanded_location xloc = expand_location (l);
 292       r = xloc.line;
 293     }
 294   return r;
 295 }
 296
 297 /* Lookup the cache used for the content of a given file accessed by
 298    caret diagnostic.  Return the found cached file, or NULL if no
 299    cached file was found.  */
 300
 301 file_cache_slot *
 302 file_cache::lookup_file (const char *file_path)
 303 {
 304   gcc_assert (file_path);
 305
 306   /* This will contain the found cached file.  */
 307   file_cache_slot *r = NULL;
 308   for (unsigned i = 0; i < num_file_slots; ++i)
 309     {
 310       file_cache_slot *c = &m_file_slots[i];
 311       if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
 312         {
 313           c->inc_use_count ();
 314           r = c;
 315         }
 316     }
 317
 318   if (r)
 319     r->inc_use_count ();
 320
 321   return r;
 322 }
 323
 324 /* Purge any mention of FILENAME from the cache of files used for
 325    printing source code.  For use in selftests when working
 326    with tempfiles.  */
 327
 328 void
 329 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
 330 {
 331   gcc_assert (file_path);
 332
 333   if (!global_dc->m_file_cache)
 334     return;
 335
 336   global_dc->m_file_cache->forcibly_evict_file (file_path);
 337 }
 338
 339 void
 340 file_cache::forcibly_evict_file (const char *file_path)
 341 {
 342   gcc_assert (file_path);
 343
 344   file_cache_slot *r = lookup_file (file_path);
 345   if (!r)
 346     /* Not found.  */
 347     return;
 348
 349   r->evict ();
 350 }
 351
 352 void
 353 file_cache_slot::evict ()
 354 {
 355   m_file_path = NULL;
 356   if (m_fp)
 357     fclose (m_fp);
 358   m_fp = NULL;
 359   m_nb_read = 0;
 360   m_line_start_idx = 0;
 361   m_line_num = 0;
 362   m_line_record.truncate (0);
 363   m_use_count = 0;
 364   m_total_lines = 0;
 365   m_missing_trailing_newline = true;
 366 }
 367
 368 /* Return the file cache that has been less used, recently, or the
 369    first empty one.  If HIGHEST_USE_COUNT is non-null,
 370    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 371    in the cache table.  */
 372
 373 file_cache_slot*
 374 file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
 375 {
 376   diagnostic_file_cache_init ();
 377
 378   file_cache_slot *to_evict = &m_file_slots[0];
 379   unsigned huc = to_evict->get_use_count ();
 380   for (unsigned i = 1; i < num_file_slots; ++i)
 381     {
 382       file_cache_slot *c = &m_file_slots[i];
 383       bool c_is_empty = (c->get_file_path () == NULL);
 384
 385       if (c->get_use_count () < to_evict->get_use_count ()
 386           || (to_evict->get_file_path () && c_is_empty))
 387         /* We evict C because it's either an entry with a lower use
 388            count or one that is empty.  */
 389         to_evict = c;
 390
 391       if (huc < c->get_use_count ())
 392         huc = c->get_use_count ();
 393
 394       if (c_is_empty)
 395         /* We've reached the end of the cache; subsequent elements are
 396            all empty.  */
 397         break;
 398     }
 399
 400   if (highest_use_count)
 401     *highest_use_count = huc;
 402
 403   return to_evict;
 404 }
 405
 406 /* Create the cache used for the content of a given file to be
 407    accessed by caret diagnostic.  This cache is added to an array of
 408    cache and can be retrieved by lookup_file_in_cache_tab.  This
 409    function returns the created cache.  Note that only the last
 410    num_file_slots files are cached.  */
 411
 412 file_cache_slot*
 413 file_cache::add_file (const char *file_path)
 414 {
 415
 416   FILE *fp = fopen (file_path, "r");
 417   if (fp == NULL)
 418     return NULL;
 419
 420   unsigned highest_use_count = 0;
 421   file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
 422   r->create (file_path, fp, highest_use_count);
 423   return r;
 424 }
 425
 426 /* Populate this slot for use on FILE_PATH and FP, dropping any
 427    existing cached content within it.  */
 428
 429 void
 430 file_cache_slot::create (const char *file_path, FILE *fp,
 431                          unsigned highest_use_count)
 432 {
 433   m_file_path = file_path;
 434   if (m_fp)
 435     fclose (m_fp);
 436   m_fp = fp;
 437   m_nb_read = 0;
 438   m_line_start_idx = 0;
 439   m_line_num = 0;
 440   m_line_record.truncate (0);
 441   /* Ensure that this cache entry doesn't get evicted next time
 442      add_file_to_cache_tab is called.  */
 443   m_use_count = ++highest_use_count;
 444   m_total_lines = total_lines_num (file_path);
 445   m_missing_trailing_newline = true;
 446 }
 447
 448 /* file_cache's ctor.  */
 449
 450 file_cache::file_cache ()
 451 : m_file_slots (new file_cache_slot[num_file_slots])
 452 {
 453 }
 454
 455 /* file_cache's dtor.  */
 456
 457 file_cache::~file_cache ()
 458 {
 459   delete[] m_file_slots;
 460 }
 461
 462 /* Lookup the cache used for the content of a given file accessed by
 463    caret diagnostic.  If no cached file was found, create a new cache
 464    for this file, add it to the array of cached file and return
 465    it.  */
 466
 467 file_cache_slot*
 468 file_cache::lookup_or_add_file (const char *file_path)
 469 {
 470   file_cache_slot *r = lookup_file (file_path);
 471   if (r == NULL)
 472     r = add_file (file_path);
 473   return r;
 474 }
 475
 476 /* Default constructor for a cache of file used by caret
 477    diagnostic.  */
 478
 479 file_cache_slot::file_cache_slot ()
 480 : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
 481   m_size (0), m_nb_read (0), m_line_start_idx (0), m_line_num (0),
 482   m_total_lines (0), m_missing_trailing_newline (true)
 483 {
 484   m_line_record.create (0);
 485 }
 486
 487 /* Destructor for a cache of file used by caret diagnostic.  */
 488
 489 file_cache_slot::~file_cache_slot ()
 490 {
 491   if (m_fp)
 492     {
 493       fclose (m_fp);
 494       m_fp = NULL;
 495     }
 496   if (m_data)
 497     {
 498       XDELETEVEC (m_data);
 499       m_data = 0;
 500     }
 501   m_line_record.release ();
 502 }
 503
 504 /* Returns TRUE iff the cache would need to be filled with data coming
 505    from the file.  That is, either the cache is empty or full or the
 506    current line is empty.  Note that if the cache is full, it would
 507    need to be extended and filled again.  */
 508
 509 bool
 510 file_cache_slot::needs_read_p () const
 511 {
 512   return (m_nb_read == 0
 513           || m_nb_read == m_size
 514           || (m_line_start_idx >= m_nb_read - 1));
 515 }
 516
 517 /*  Return TRUE iff the cache is full and thus needs to be
 518     extended.  */
 519
 520 bool
 521 file_cache_slot::needs_grow_p () const
 522 {
 523   return m_nb_read == m_size;
 524 }
 525
 526 /* Grow the cache if it needs to be extended.  */
 527
 528 void
 529 file_cache_slot::maybe_grow ()
 530 {
 531   if (!needs_grow_p ())
 532     return;
 533
 534   size_t size = m_size == 0 ? buffer_size : m_size * 2;
 535   m_data = XRESIZEVEC (char, m_data, size);
 536   m_size = size;
 537 }
 538
 539 /*  Read more data into the cache.  Extends the cache if need be.
 540     Returns TRUE iff new data could be read.  */
 541
 542 bool
 543 file_cache_slot::read_data ()
 544 {
 545   if (feof (m_fp) || ferror (m_fp))
 546     return false;
 547
 548   maybe_grow ();
 549
 550   char * from = m_data + m_nb_read;
 551   size_t to_read = m_size - m_nb_read;
 552   size_t nb_read = fread (from, 1, to_read, m_fp);
 553
 554   if (ferror (m_fp))
 555     return false;
 556
 557   m_nb_read += nb_read;
 558   return !!nb_read;
 559 }
 560
 561 /* Read new data iff the cache needs to be filled with more data
 562    coming from the file FP.  Return TRUE iff the cache was filled with
 563    mode data.  */
 564
 565 bool
 566 file_cache_slot::maybe_read_data ()
 567 {
 568   if (!needs_read_p ())
 569     return false;
 570   return read_data ();
 571 }
 572
 573 /* Read a new line from file FP, using C as a cache for the data
 574    coming from the file.  Upon successful completion, *LINE is set to
 575    the beginning of the line found.  *LINE points directly in the
 576    line cache and is only valid until the next call of get_next_line.
 577    *LINE_LEN is set to the length of the line.  Note that the line
 578    does not contain any terminal delimiter.  This function returns
 579    true if some data was read or process from the cache, false
 580    otherwise.  Note that subsequent calls to get_next_line might
 581    make the content of *LINE invalid.  */
 582
 583 bool
 584 file_cache_slot::get_next_line (char **line, ssize_t *line_len)
 585 {
 586   /* Fill the cache with data to process.  */
 587   maybe_read_data ();
 588
 589   size_t remaining_size = m_nb_read - m_line_start_idx;
 590   if (remaining_size == 0)
 591     /* There is no more data to process.  */
 592     return false;
 593
 594   char *line_start = m_data + m_line_start_idx;
 595
 596   char *next_line_start = NULL;
 597   size_t len = 0;
 598   char *line_end = (char *) memchr (line_start, '\n', remaining_size);
 599   if (line_end == NULL)
 600     {
 601       /* We haven't found the end-of-line delimiter in the cache.
 602          Fill the cache with more data from the file and look for the
 603          '\n'.  */
 604       while (maybe_read_data ())
 605         {
 606           line_start = m_data + m_line_start_idx;
 607           remaining_size = m_nb_read - m_line_start_idx;
 608           line_end = (char *) memchr (line_start, '\n', remaining_size);
 609           if (line_end != NULL)
 610             {
 611               next_line_start = line_end + 1;
 612               break;
 613             }
 614         }
 615       if (line_end == NULL)
 616         {
 617           /* We've loadded all the file into the cache and still no
 618              '\n'.  Let's say the line ends up at one byte passed the
 619              end of the file.  This is to stay consistent with the case
 620              of when the line ends up with a '\n' and line_end points to
 621              that terminal '\n'.  That consistency is useful below in
 622              the len calculation.  */
 623           line_end = m_data + m_nb_read ;
 624           m_missing_trailing_newline = true;
 625         }
 626       else
 627         m_missing_trailing_newline = false;
 628     }
 629   else
 630     {
 631       next_line_start = line_end + 1;
 632       m_missing_trailing_newline = false;
 633     }
 634
 635   if (ferror (m_fp))
 636     return false;
 637
 638   /* At this point, we've found the end of the of line.  It either
 639      points to the '\n' or to one byte after the last byte of the
 640      file.  */
 641   gcc_assert (line_end != NULL);
 642
 643   len = line_end - line_start;
 644
 645   if (m_line_start_idx < m_nb_read)
 646     *line = line_start;
 647
 648   ++m_line_num;
 649
 650   /* Before we update our line record, make sure the hint about the
 651      total number of lines of the file is correct.  If it's not, then
 652      we give up recording line boundaries from now on.  */
 653   bool update_line_record = true;
 654   if (m_line_num > m_total_lines)
 655     update_line_record = false;
 656
 657     /* Now update our line record so that re-reading lines from the
 658      before m_line_start_idx is faster.  */
 659   if (update_line_record
 660       && m_line_record.length () < line_record_size)
 661     {
 662       /* If the file lines fits in the line record, we just record all
 663          its lines ...*/
 664       if (m_total_lines <= line_record_size
 665           && m_line_num > m_line_record.length ())
 666         m_line_record.safe_push
 667           (file_cache_slot::line_info (m_line_num,
 668                                        m_line_start_idx,
 669                                        line_end - m_data));
 670       else if (m_total_lines > line_record_size)
 671         {
 672           /* ... otherwise, we just scale total_lines down to
 673              (line_record_size lines.  */
 674           size_t n = (m_line_num * line_record_size) / m_total_lines;
 675           if (m_line_record.length () == 0
 676               || n >= m_line_record.length ())
 677             m_line_record.safe_push
 678               (file_cache_slot::line_info (m_line_num,
 679                                            m_line_start_idx,
 680                                            line_end - m_data));
 681         }
 682     }
 683
 684   /* Update m_line_start_idx so that it points to the next line to be
 685      read.  */
 686   if (next_line_start)
 687     m_line_start_idx = next_line_start - m_data;
 688   else
 689     /* We didn't find any terminal '\n'.  Let's consider that the end
 690        of line is the end of the data in the cache.  The next
 691        invocation of get_next_line will either read more data from the
 692        underlying file or return false early because we've reached the
 693        end of the file.  */
 694     m_line_start_idx = m_nb_read;
 695
 696   *line_len = len;
 697
 698   return true;
 699 }
 700
 701 /* Consume the next bytes coming from the cache (or from its
 702    underlying file if there are remaining unread bytes in the file)
 703    until we reach the next end-of-line (or end-of-file).  There is no
 704    copying from the cache involved.  Return TRUE upon successful
 705    completion.  */
 706
 707 bool
 708 file_cache_slot::goto_next_line ()
 709 {
 710   char *l;
 711   ssize_t len;
 712
 713   return get_next_line (&l, &len);
 714 }
 715
 716 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 717    If the line was read successfully, *LINE points to the beginning
 718    of the line in the file cache and *LINE_LEN is the length of the
 719    line.  *LINE is not nul-terminated, but may contain zero bytes.
 720    *LINE is only valid until the next call of read_line_num.
 721    This function returns bool if a line was read.  */
 722
 723 bool
 724 file_cache_slot::read_line_num (size_t line_num,
 725                        char ** line, ssize_t *line_len)
 726 {
 727   gcc_assert (line_num > 0);
 728
 729   if (line_num <= m_line_num)
 730     {
 731       /* We've been asked to read lines that are before m_line_num.
 732          So lets use our line record (if it's not empty) to try to
 733          avoid re-reading the file from the beginning again.  */
 734
 735       if (m_line_record.is_empty ())
 736         {
 737           m_line_start_idx = 0;
 738           m_line_num = 0;
 739         }
 740       else
 741         {
 742           file_cache_slot::line_info *i = NULL;
 743           if (m_total_lines <= line_record_size)
 744             {
 745               /* In languages where the input file is not totally
 746                  preprocessed up front, the m_total_lines hint
 747                  can be smaller than the number of lines of the
 748                  file.  In that case, only the first
 749                  m_total_lines have been recorded.
 750
 751                  Otherwise, the first m_total_lines we've read have
 752                  their start/end recorded here.  */
 753               i = (line_num <= m_total_lines)
 754                 ? &m_line_record[line_num - 1]
 755                 : &m_line_record[m_total_lines - 1];
 756               gcc_assert (i->line_num <= line_num);
 757             }
 758           else
 759             {
 760               /*  So the file had more lines than our line record
 761                   size.  Thus the number of lines we've recorded has
 762                   been scaled down to line_record_size.  Let's
 763                   pick the start/end of the recorded line that is
 764                   closest to line_num.  */
 765               size_t n = (line_num <= m_total_lines)
 766                 ? line_num * line_record_size / m_total_lines
 767                 : m_line_record.length () - 1;
 768               if (n < m_line_record.length ())
 769                 {
 770                   i = &m_line_record[n];
 771                   gcc_assert (i->line_num <= line_num);
 772                 }
 773             }
 774
 775           if (i && i->line_num == line_num)
 776             {
 777               /* We have the start/end of the line.  */
 778               *line = m_data + i->start_pos;
 779               *line_len = i->end_pos - i->start_pos;
 780               return true;
 781             }
 782
 783           if (i)
 784             {
 785               m_line_start_idx = i->start_pos;
 786               m_line_num = i->line_num - 1;
 787             }
 788           else
 789             {
 790               m_line_start_idx = 0;
 791               m_line_num = 0;
 792             }
 793         }
 794     }
 795
 796   /*  Let's walk from line m_line_num up to line_num - 1, without
 797       copying any line.  */
 798   while (m_line_num < line_num - 1)
 799     if (!goto_next_line ())
 800       return false;
 801
 802   /* The line we want is the next one.  Let's read and copy it back to
 803      the caller.  */
 804   return get_next_line (line, line_len);
 805 }
 806
 807 /* Return the physical source line that corresponds to FILE_PATH/LINE.
 808    The line is not nul-terminated.  The returned pointer is only
 809    valid until the next call of location_get_source_line.
 810    Note that the line can contain several null characters,
 811    so the returned value's length has the actual length of the line.
 812    If the function fails, a NULL char_span is returned.  */
 813
 814 char_span
 815 location_get_source_line (const char *file_path, int line)
 816 {
 817   char *buffer = NULL;
 818   ssize_t len;
 819
 820   if (line == 0)
 821     return char_span (NULL, 0);
 822
 823   if (file_path == NULL)
 824     return char_span (NULL, 0);
 825
 826   diagnostic_file_cache_init ();
 827
 828   file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
 829   if (c == NULL)
 830     return char_span (NULL, 0);
 831
 832   bool read = c->read_line_num (line, &buffer, &len);
 833   if (!read)
 834     return char_span (NULL, 0);
 835
 836   return char_span (buffer, len);
 837 }
 838
 839 /* Determine if FILE_PATH missing a trailing newline on its final line.
 840    Only valid to call once all of the file has been loaded, by
 841    requesting a line number beyond the end of the file.  */
 842
 843 bool
 844 location_missing_trailing_newline (const char *file_path)
 845 {
 846   diagnostic_file_cache_init ();
 847
 848   file_cache_slot *c = global_dc->m_file_cache->lookup_or_add_file (file_path);
 849   if (c == NULL)
 850     return false;
 851
 852   return c->missing_trailing_newline_p ();
 853 }
 854
 855 /* Test if the location originates from the spelling location of a
 856    builtin-tokens.  That is, return TRUE if LOC is a (possibly
 857    virtual) location of a built-in token that appears in the expansion
 858    list of a macro.  Please note that this function also works on
 859    tokens that result from built-in tokens.  For instance, the
 860    function would return true if passed a token "4" that is the result
 861    of the expansion of the built-in __LINE__ macro.  */
 862 bool
 863 is_location_from_builtin_token (location_t loc)
 864 {
 865   const line_map_ordinary *map = NULL;
 866   loc = linemap_resolve_location (line_table, loc,
 867                                   LRK_SPELLING_LOCATION, &map);
 868   return loc == BUILTINS_LOCATION;
 869 }
 870
 871 /* Expand the source location LOC into a human readable location.  If
 872    LOC is virtual, it resolves to the expansion point of the involved
 873    macro.  If LOC resolves to a builtin location, the file name of the
 874    readable location is set to the string "<built-in>".  */
 875
 876 expanded_location
 877 expand_location (location_t loc)
 878 {
 879   return expand_location_1 (loc, /*expansion_point_p=*/true,
 880                             LOCATION_ASPECT_CARET);
 881 }
 882
 883 /* Expand the source location LOC into a human readable location.  If
 884    LOC is virtual, it resolves to the expansion location of the
 885    relevant macro.  If LOC resolves to a builtin location, the file
 886    name of the readable location is set to the string
 887    "<built-in>".  */
 888
 889 expanded_location
 890 expand_location_to_spelling_point (location_t loc,
 891                                    enum location_aspect aspect)
 892 {
 893   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
 894 }
 895
 896 /* The rich_location class within libcpp requires a way to expand
 897    location_t instances, and relies on the client code
 898    providing a symbol named
 899      linemap_client_expand_location_to_spelling_point
 900    to do this.
 901
 902    This is the implementation for libcommon.a (all host binaries),
 903    which simply calls into expand_location_1.  */
 904
 905 expanded_location
 906 linemap_client_expand_location_to_spelling_point (location_t loc,
 907                                                   enum location_aspect aspect)
 908 {
 909   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
 910 }
 911
 912
 913 /* If LOCATION is in a system header and if it is a virtual location for
 914    a token coming from the expansion of a macro, unwind it to the
 915    location of the expansion point of the macro.  Otherwise, just return
 916    LOCATION.
 917
 918    This is used for instance when we want to emit diagnostics about a
 919    token that may be located in a macro that is itself defined in a
 920    system header, for example, for the NULL macro.  In such a case, if
 921    LOCATION were passed directly to diagnostic functions such as
 922    warning_at, the diagnostic would be suppressed (unless
 923    -Wsystem-headers).  */
 924
 925 location_t
 926 expansion_point_location_if_in_system_header (location_t location)
 927 {
 928   if (in_system_header_at (location))
 929     location = linemap_resolve_location (line_table, location,
 930                                          LRK_MACRO_EXPANSION_POINT,
 931                                          NULL);
 932   return location;
 933 }
 934
 935 /* If LOCATION is a virtual location for a token coming from the expansion
 936    of a macro, unwind to the location of the expansion point of the macro.  */
 937
 938 location_t
 939 expansion_point_location (location_t location)
 940 {
 941   return linemap_resolve_location (line_table, location,
 942                                    LRK_MACRO_EXPANSION_POINT, NULL);
 943 }
 944
 945 /* Construct a location with caret at CARET, ranging from START to
 946    finish e.g.
 947
 948                  11111111112
 949         12345678901234567890
 950      522
 951      523   return foo + bar;
 952                   ~~~~^~~~~
 953      524
 954
 955    The location's caret is at the "+", line 523 column 15, but starts
 956    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
 957    of "bar" at column 19.  */
 958
 959 location_t
 960 make_location (location_t caret, location_t start, location_t finish)
 961 {
 962   location_t pure_loc = get_pure_location (caret);
 963   source_range src_range;
 964   src_range.m_start = get_start (start);
 965   src_range.m_finish = get_finish (finish);
 966   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
 967                                                    pure_loc,
 968                                                    src_range,
 969                                                    NULL);
 970   return combined_loc;
 971 }
 972
 973 /* Same as above, but taking a source range rather than two locations.  */
 974
 975 location_t
 976 make_location (location_t caret, source_range src_range)
 977 {
 978   location_t pure_loc = get_pure_location (caret);
 979   return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
 980 }
 981
 982 /* An expanded_location stores the column in byte units.  This function
 983    converts that column to display units.  That requires reading the associated
 984    source line in order to calculate the display width.  If that cannot be done
 985    for any reason, then returns the byte column as a fallback.  */
 986 int
 987 location_compute_display_column (expanded_location exploc, int tabstop)
 988 {
 989   if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
 990     return exploc.column;
 991   char_span line = location_get_source_line (exploc.file, exploc.line);
 992   /* If line is NULL, this function returns exploc.column which is the
 993      desired fallback.  */
 994   return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
 995                                             exploc.column, tabstop);
 996 }
 997
 998 /* Dump statistics to stderr about the memory usage of the line_table
 999    set of line maps.  This also displays some statistics about macro
1000    expansion.  */
1001
1002 void
1003 dump_line_table_statistics (void)
1004 {
1005   struct linemap_stats s;
1006   long total_used_map_size,
1007     macro_maps_size,
1008     total_allocated_map_size;
1009
1010   memset (&s, 0, sizeof (s));
1011
1012   linemap_get_statistics (line_table, &s);
1013
1014   macro_maps_size = s.macro_maps_used_size
1015     + s.macro_maps_locations_size;
1016
1017   total_allocated_map_size = s.ordinary_maps_allocated_size
1018     + s.macro_maps_allocated_size
1019     + s.macro_maps_locations_size;
1020
1021   total_used_map_size = s.ordinary_maps_used_size
1022     + s.macro_maps_used_size
1023     + s.macro_maps_locations_size;
1024
1025   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
1026            s.num_expanded_macros);
1027   if (s.num_expanded_macros != 0)
1028     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
1029              s.num_macro_tokens / s.num_expanded_macros);
1030   fprintf (stderr,
1031            "\nLine Table allocations during the "
1032            "compilation process\n");
1033   fprintf (stderr, "Number of ordinary maps used:        " PRsa (5) "\n",
1034            SIZE_AMOUNT (s.num_ordinary_maps_used));
1035   fprintf (stderr, "Ordinary map used size:              " PRsa (5) "\n",
1036            SIZE_AMOUNT (s.ordinary_maps_used_size));
1037   fprintf (stderr, "Number of ordinary maps allocated:   " PRsa (5) "\n",
1038            SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1039   fprintf (stderr, "Ordinary maps allocated size:        " PRsa (5) "\n",
1040            SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1041   fprintf (stderr, "Number of macro maps used:           " PRsa (5) "\n",
1042            SIZE_AMOUNT (s.num_macro_maps_used));
1043   fprintf (stderr, "Macro maps used size:                " PRsa (5) "\n",
1044            SIZE_AMOUNT (s.macro_maps_used_size));
1045   fprintf (stderr, "Macro maps locations size:           " PRsa (5) "\n",
1046            SIZE_AMOUNT (s.macro_maps_locations_size));
1047   fprintf (stderr, "Macro maps size:                     " PRsa (5) "\n",
1048            SIZE_AMOUNT (macro_maps_size));
1049   fprintf (stderr, "Duplicated maps locations size:      " PRsa (5) "\n",
1050            SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1051   fprintf (stderr, "Total allocated maps size:           " PRsa (5) "\n",
1052            SIZE_AMOUNT (total_allocated_map_size));
1053   fprintf (stderr, "Total used maps size:                " PRsa (5) "\n",
1054            SIZE_AMOUNT (total_used_map_size));
1055   fprintf (stderr, "Ad-hoc table size:                   " PRsa (5) "\n",
1056            SIZE_AMOUNT (s.adhoc_table_size));
1057   fprintf (stderr, "Ad-hoc table entries used:           " PRsa (5) "\n",
1058            SIZE_AMOUNT (s.adhoc_table_entries_used));
1059   fprintf (stderr, "optimized_ranges:                    " PRsa (5) "\n",
1060            SIZE_AMOUNT (line_table->num_optimized_ranges));
1061   fprintf (stderr, "unoptimized_ranges:                  " PRsa (5) "\n",
1062            SIZE_AMOUNT (line_table->num_unoptimized_ranges));
1063
1064   fprintf (stderr, "\n");
1065 }
1066
1067 /* Get location one beyond the final location in ordinary map IDX.  */
1068
1069 static location_t
1070 get_end_location (class line_maps *set, unsigned int idx)
1071 {
1072   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1073     return set->highest_location;
1074
1075   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1076   return MAP_START_LOCATION (next_map);
1077 }
1078
1079 /* Helper function for write_digit_row.  */
1080
1081 static void
1082 write_digit (FILE *stream, int digit)
1083 {
1084   fputc ('0' + (digit % 10), stream);
1085 }
1086
1087 /* Helper function for dump_location_info.
1088    Write a row of numbers to STREAM, numbering a source line,
1089    giving the units, tens, hundreds etc of the column number.  */
1090
1091 static void
1092 write_digit_row (FILE *stream, int indent,
1093                  const line_map_ordinary *map,
1094                  location_t loc, int max_col, int divisor)
1095 {
1096   fprintf (stream, "%*c", indent, ' ');
1097   fprintf (stream, "|");
1098   for (int column = 1; column < max_col; column++)
1099     {
1100       location_t column_loc = loc + (column << map->m_range_bits);
1101       write_digit (stream, column_loc / divisor);
1102     }
1103   fprintf (stream, "\n");
1104 }
1105
1106 /* Write a half-closed (START) / half-open (END) interval of
1107    location_t to STREAM.  */
1108
1109 static void
1110 dump_location_range (FILE *stream,
1111                      location_t start, location_t end)
1112 {
1113   fprintf (stream,
1114            "  location_t interval: %u <= loc < %u\n",
1115            start, end);
1116 }
1117
1118 /* Write a labelled description of a half-closed (START) / half-open (END)
1119    interval of location_t to STREAM.  */
1120
1121 static void
1122 dump_labelled_location_range (FILE *stream,
1123                               const char *name,
1124                               location_t start, location_t end)
1125 {
1126   fprintf (stream, "%s\n", name);
1127   dump_location_range (stream, start, end);
1128   fprintf (stream, "\n");
1129 }
1130
1131 /* Write a visualization of the locations in the line_table to STREAM.  */
1132
1133 void
1134 dump_location_info (FILE *stream)
1135 {
1136   /* Visualize the reserved locations.  */
1137   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1138                                 0, RESERVED_LOCATION_COUNT);
1139
1140   /* Visualize the ordinary line_map instances, rendering the sources. */
1141   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1142     {
1143       location_t end_location = get_end_location (line_table, idx);
1144       /* half-closed: doesn't include this one. */
1145
1146       const line_map_ordinary *map
1147         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1148       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1149       dump_location_range (stream,
1150                            MAP_START_LOCATION (map), end_location);
1151       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1152       fprintf (stream, "  starting at line: %i\n",
1153                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1154       fprintf (stream, "  column and range bits: %i\n",
1155                map->m_column_and_range_bits);
1156       fprintf (stream, "  column bits: %i\n",
1157                map->m_column_and_range_bits - map->m_range_bits);
1158       fprintf (stream, "  range bits: %i\n",
1159                map->m_range_bits);
1160       const char * reason;
1161       switch (map->reason) {
1162       case LC_ENTER:
1163         reason = "LC_ENTER";
1164         break;
1165       case LC_LEAVE:
1166         reason = "LC_LEAVE";
1167         break;
1168       case LC_RENAME:
1169         reason = "LC_RENAME";
1170         break;
1171       case LC_RENAME_VERBATIM:
1172         reason = "LC_RENAME_VERBATIM";
1173         break;
1174       case LC_ENTER_MACRO:
1175         reason = "LC_RENAME_MACRO";
1176         break;
1177       default:
1178         reason = "Unknown";
1179       }
1180       fprintf (stream, "  reason: %d (%s)\n", map->reason, reason);
1181
1182       const line_map_ordinary *includer_map
1183         = linemap_included_from_linemap (line_table, map);
1184       fprintf (stream, "  included from location: %d",
1185                linemap_included_from (map));
1186       if (includer_map) {
1187         fprintf (stream, " (in ordinary map %d)",
1188                  int (includer_map - line_table->info_ordinary.maps));
1189       }
1190       fprintf (stream, "\n");
1191
1192       /* Render the span of source lines that this "map" covers.  */
1193       for (location_t loc = MAP_START_LOCATION (map);
1194            loc < end_location;
1195            loc += (1 << map->m_range_bits) )
1196         {
1197           gcc_assert (pure_location_p (line_table, loc) );
1198
1199           expanded_location exploc
1200             = linemap_expand_location (line_table, map, loc);
1201
1202           if (exploc.column == 0)
1203             {
1204               /* Beginning of a new source line: draw the line.  */
1205
1206               char_span line_text = location_get_source_line (exploc.file,
1207                                                               exploc.line);
1208               if (!line_text)
1209                 break;
1210               fprintf (stream,
1211                        "%s:%3i|loc:%5i|%.*s\n",
1212                        exploc.file, exploc.line,
1213                        loc,
1214                        (int)line_text.length (), line_text.get_buffer ());
1215
1216               /* "loc" is at column 0, which means "the whole line".
1217                  Render the locations *within* the line, by underlining
1218                  it, showing the location_t numeric values
1219                  at each column.  */
1220               size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1221               if (max_col > line_text.length ())
1222                 max_col = line_text.length () + 1;
1223
1224               int len_lnum = num_digits (exploc.line);
1225               if (len_lnum < 3)
1226                 len_lnum = 3;
1227               int len_loc = num_digits (loc);
1228               if (len_loc < 5)
1229                 len_loc = 5;
1230
1231               int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1232
1233               /* Thousands.  */
1234               if (end_location > 999)
1235                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1236
1237               /* Hundreds.  */
1238               if (end_location > 99)
1239                 write_digit_row (stream, indent, map, loc, max_col, 100);
1240
1241               /* Tens.  */
1242               write_digit_row (stream, indent, map, loc, max_col, 10);
1243
1244               /* Units.  */
1245               write_digit_row (stream, indent, map, loc, max_col, 1);
1246             }
1247         }
1248       fprintf (stream, "\n");
1249     }
1250
1251   /* Visualize unallocated values.  */
1252   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1253                                 line_table->highest_location,
1254                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1255
1256   /* Visualize the macro line_map instances, rendering the sources. */
1257   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1258     {
1259       /* Each macro map that is allocated owns location_t values
1260          that are *lower* that the one before them.
1261          Hence it's meaningful to view them either in order of ascending
1262          source locations, or in order of ascending macro map index.  */
1263       const bool ascending_location_ts = true;
1264       unsigned int idx = (ascending_location_ts
1265                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1266                           : i);
1267       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1268       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1269                idx,
1270                linemap_map_get_macro_name (map),
1271                MACRO_MAP_NUM_MACRO_TOKENS (map));
1272       dump_location_range (stream,
1273                            map->start_location,
1274                            (map->start_location
1275                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1276       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1277               "expansion point is location %i",
1278               MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1279       fprintf (stream, "  map->start_location: %u\n",
1280                map->start_location);
1281
1282       fprintf (stream, "  macro_locations:\n");
1283       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1284         {
1285           location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1286           location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1287
1288           /* linemap_add_macro_token encodes token numbers in an expansion
1289              by putting them after MAP_START_LOCATION. */
1290
1291           /* I'm typically seeing 4 uninitialized entries at the end of
1292              0xafafafaf.
1293              This appears to be due to macro.c:replace_args
1294              adding 2 extra args for padding tokens; presumably there may
1295              be a leading and/or trailing padding token injected,
1296              each for 2 more location slots.
1297              This would explain there being up to 4 location_ts slots
1298              that may be uninitialized.  */
1299
1300           fprintf (stream, "    %u: %u, %u\n",
1301                    i,
1302                    x,
1303                    y);
1304           if (x == y)
1305             {
1306               if (x < MAP_START_LOCATION (map))
1307                 inform (x, "token %u has %<x-location == y-location == %u%>",
1308                         i, x);
1309               else
1310                 fprintf (stream,
1311                          "x-location == y-location == %u encodes token # %u\n",
1312                          x, x - MAP_START_LOCATION (map));
1313                 }
1314           else
1315             {
1316               inform (x, "token %u has %<x-location == %u%>", i, x);
1317               inform (x, "token %u has %<y-location == %u%>", i, y);
1318             }
1319         }
1320       fprintf (stream, "\n");
1321     }
1322
1323   /* It appears that MAX_LOCATION_T itself is never assigned to a
1324      macro map, presumably due to an off-by-one error somewhere
1325      between the logic in linemap_enter_macro and
1326      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1327   dump_labelled_location_range (stream, "MAX_LOCATION_T",
1328                                 MAX_LOCATION_T,
1329                                 MAX_LOCATION_T + 1);
1330
1331   /* Visualize ad-hoc values.  */
1332   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1333                                 MAX_LOCATION_T + 1, UINT_MAX);
1334 }
1335
1336 /* string_concat's constructor.  */
1337
1338 string_concat::string_concat (int num, location_t *locs)
1339   : m_num (num)
1340 {
1341   m_locs = ggc_vec_alloc <location_t> (num);
1342   for (int i = 0; i < num; i++)
1343     m_locs[i] = locs[i];
1344 }
1345
1346 /* string_concat_db's constructor.  */
1347
1348 string_concat_db::string_concat_db ()
1349 {
1350   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1351 }
1352
1353 /* Record that a string concatenation occurred, covering NUM
1354    string literal tokens.  LOCS is an array of size NUM, containing the
1355    locations of the tokens.  A copy of LOCS is taken.  */
1356
1357 void
1358 string_concat_db::record_string_concatenation (int num, location_t *locs)
1359 {
1360   gcc_assert (num > 1);
1361   gcc_assert (locs);
1362
1363   location_t key_loc = get_key_loc (locs[0]);
1364
1365   string_concat *concat
1366     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1367   m_table->put (key_loc, concat);
1368 }
1369
1370 /* Determine if LOC was the location of the initial token of a
1371    concatenation of string literal tokens.
1372    If so, *OUT_NUM is written to with the number of tokens, and
1373    *OUT_LOCS with the location of an array of locations of the
1374    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1375    storage owned by the string_concat_db.
1376    Otherwise, return false.  */
1377
1378 bool
1379 string_concat_db::get_string_concatenation (location_t loc,
1380                                             int *out_num,
1381                                             location_t **out_locs)
1382 {
1383   gcc_assert (out_num);
1384   gcc_assert (out_locs);
1385
1386   location_t key_loc = get_key_loc (loc);
1387
1388   string_concat **concat = m_table->get (key_loc);
1389   if (!concat)
1390     return false;
1391
1392   *out_num = (*concat)->m_num;
1393   *out_locs =(*concat)->m_locs;
1394   return true;
1395 }
1396
1397 /* Internal function.  Canonicalize LOC into a form suitable for
1398    use as a key within the database, stripping away macro expansion,
1399    ad-hoc information, and range information, using the location of
1400    the start of LOC within an ordinary linemap.  */
1401
1402 location_t
1403 string_concat_db::get_key_loc (location_t loc)
1404 {
1405   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1406                                   NULL);
1407
1408   loc = get_range_from_loc (line_table, loc).m_start;
1409
1410   return loc;
1411 }
1412
1413 /* Helper class for use within get_substring_ranges_for_loc.
1414    An vec of cpp_string with responsibility for releasing all of the
1415    str->text for each str in the vector.  */
1416
1417 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1418 {
1419  public:
1420   auto_cpp_string_vec (int alloc)
1421     : auto_vec <cpp_string> (alloc) {}
1422
1423   ~auto_cpp_string_vec ()
1424   {
1425     /* Clean up the copies within this vec.  */
1426     int i;
1427     cpp_string *str;
1428     FOR_EACH_VEC_ELT (*this, i, str)
1429       free (const_cast <unsigned char *> (str->text));
1430   }
1431 };
1432
1433 /* Attempt to populate RANGES with source location information on the
1434    individual characters within the string literal found at STRLOC.
1435    If CONCATS is non-NULL, then any string literals that the token at
1436    STRLOC  was concatenated with are also added to RANGES.
1437
1438    Return NULL if successful, or an error message if any errors occurred (in
1439    which case RANGES may be only partially populated and should not
1440    be used).
1441
1442    This is implemented by re-parsing the relevant source line(s).  */
1443
1444 static const char *
1445 get_substring_ranges_for_loc (cpp_reader *pfile,
1446                               string_concat_db *concats,
1447                               location_t strloc,
1448                               enum cpp_ttype type,
1449                               cpp_substring_ranges &ranges)
1450 {
1451   gcc_assert (pfile);
1452
1453   if (strloc == UNKNOWN_LOCATION)
1454     return "unknown location";
1455
1456   /* Reparsing the strings requires accurate location information.
1457      If -ftrack-macro-expansion has been overridden from its default
1458      of 2, then we might have a location of a macro expansion point,
1459      rather than the location of the literal itself.
1460      Avoid this by requiring that we have full macro expansion tracking
1461      for substring locations to be available.  */
1462   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1463     return "track_macro_expansion != 2";
1464
1465   /* If #line or # 44 "file"-style directives are present, then there's
1466      no guarantee that the line numbers we have can be used to locate
1467      the strings.  For example, we might have a .i file with # directives
1468      pointing back to lines within a .c file, but the .c file might
1469      have been edited since the .i file was created.
1470      In such a case, the safest course is to disable on-demand substring
1471      locations.  */
1472   if (line_table->seen_line_directive)
1473     return "seen line directive";
1474
1475   /* If string concatenation has occurred at STRLOC, get the locations
1476      of all of the literal tokens making up the compound string.
1477      Otherwise, just use STRLOC.  */
1478   int num_locs = 1;
1479   location_t *strlocs = &strloc;
1480   if (concats)
1481     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1482
1483   auto_cpp_string_vec strs (num_locs);
1484   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1485   for (int i = 0; i < num_locs; i++)
1486     {
1487       /* Get range of strloc.  We will use it to locate the start and finish
1488          of the literal token within the line.  */
1489       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1490
1491       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1492         {
1493           /* If the string token was within a macro expansion, then we can
1494              cope with it for the simple case where we have a single token.
1495              Otherwise, bail out.  */
1496           if (src_range.m_start != src_range.m_finish)
1497             return "macro expansion";
1498         }
1499       else
1500         {
1501           if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1502             /* If so, we can't reliably determine where the token started within
1503                its line.  */
1504             return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1505
1506           if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1507             /* If so, we can't reliably determine where the token finished
1508                within its line.  */
1509             return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1510         }
1511
1512       expanded_location start
1513         = expand_location_to_spelling_point (src_range.m_start,
1514                                              LOCATION_ASPECT_START);
1515       expanded_location finish
1516         = expand_location_to_spelling_point (src_range.m_finish,
1517                                              LOCATION_ASPECT_FINISH);
1518       if (start.file != finish.file)
1519         return "range endpoints are in different files";
1520       if (start.line != finish.line)
1521         return "range endpoints are on different lines";
1522       if (start.column > finish.column)
1523         return "range endpoints are reversed";
1524
1525       char_span line = location_get_source_line (start.file, start.line);
1526       if (!line)
1527         return "unable to read source line";
1528
1529       /* Determine the location of the literal (including quotes
1530          and leading prefix chars, such as the 'u' in a u""
1531          token).  */
1532       size_t literal_length = finish.column - start.column + 1;
1533
1534       /* Ensure that we don't crash if we got the wrong location.  */
1535       if (start.column < 1)
1536         return "zero start column";
1537       if (line.length () < (start.column - 1 + literal_length))
1538         return "line is not wide enough";
1539
1540       char_span literal = line.subspan (start.column - 1, literal_length);
1541
1542       cpp_string from;
1543       from.len = literal_length;
1544       /* Make a copy of the literal, to avoid having to rely on
1545          the lifetime of the copy of the line within the cache.
1546          This will be released by the auto_cpp_string_vec dtor.  */
1547       from.text = (unsigned char *)literal.xstrdup ();
1548       strs.safe_push (from);
1549
1550       /* For very long lines, a new linemap could have started
1551          halfway through the token.
1552          Ensure that the loc_reader uses the linemap of the
1553          *end* of the token for its start location.  */
1554       const line_map_ordinary *start_ord_map;
1555       linemap_resolve_location (line_table, src_range.m_start,
1556                                 LRK_SPELLING_LOCATION, &start_ord_map);
1557       const line_map_ordinary *final_ord_map;
1558       linemap_resolve_location (line_table, src_range.m_finish,
1559                                 LRK_SPELLING_LOCATION, &final_ord_map);
1560       if (start_ord_map == NULL || final_ord_map == NULL)
1561         return "failed to get ordinary maps";
1562       /* Bulletproofing.  We ought to only have different ordinary maps
1563          for start vs finish due to line-length jumps.  */
1564       if (start_ord_map != final_ord_map
1565           && start_ord_map->to_file != final_ord_map->to_file)
1566         return "start and finish are spelled in different ordinary maps";
1567       /* The file from linemap_resolve_location ought to match that from
1568          expand_location_to_spelling_point.  */
1569       if (start_ord_map->to_file != start.file)
1570         return "mismatching file after resolving linemap";
1571
1572       location_t start_loc
1573         = linemap_position_for_line_and_column (line_table, final_ord_map,
1574                                                 start.line, start.column);
1575
1576       cpp_string_location_reader loc_reader (start_loc, line_table);
1577       loc_readers.safe_push (loc_reader);
1578     }
1579
1580   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1581   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1582                                                  loc_readers.address (),
1583                                                  num_locs, &ranges, type);
1584   if (err)
1585     return err;
1586
1587   /* Success: "ranges" should now contain information on the string.  */
1588   return NULL;
1589 }
1590
1591 /* Attempt to populate *OUT_LOC with source location information on the
1592    given characters within the string literal found at STRLOC.
1593    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1594    character set.
1595
1596    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1597    and string literal "012345\n789"
1598    *OUT_LOC is written to with:
1599      "012345\n789"
1600          ~^~~~~
1601
1602    If CONCATS is non-NULL, then any string literals that the token at
1603    STRLOC was concatenated with are also considered.
1604
1605    This is implemented by re-parsing the relevant source line(s).
1606
1607    Return NULL if successful, or an error message if any errors occurred.
1608    Error messages are intended for GCC developers (to help debugging) rather
1609    than for end-users.  */
1610
1611 const char *
1612 get_location_within_string (cpp_reader *pfile,
1613                             string_concat_db *concats,
1614                             location_t strloc,
1615                             enum cpp_ttype type,
1616                             int caret_idx, int start_idx, int end_idx,
1617                             location_t *out_loc)
1618 {
1619   gcc_checking_assert (caret_idx >= 0);
1620   gcc_checking_assert (start_idx >= 0);
1621   gcc_checking_assert (end_idx >= 0);
1622   gcc_assert (out_loc);
1623
1624   cpp_substring_ranges ranges;
1625   const char *err
1626     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1627   if (err)
1628     return err;
1629
1630   if (caret_idx >= ranges.get_num_ranges ())
1631     return "caret_idx out of range";
1632   if (start_idx >= ranges.get_num_ranges ())
1633     return "start_idx out of range";
1634   if (end_idx >= ranges.get_num_ranges ())
1635     return "end_idx out of range";
1636
1637   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1638                             ranges.get_range (start_idx).m_start,
1639                             ranges.get_range (end_idx).m_finish);
1640   return NULL;
1641 }
1642
1643 #if CHECKING_P
1644
1645 namespace selftest {
1646
1647 /* Selftests of location handling.  */
1648
1649 /* Attempt to populate *OUT_RANGE with source location information on the
1650    given character within the string literal found at STRLOC.
1651    CHAR_IDX refers to an offset within the execution character set.
1652    If CONCATS is non-NULL, then any string literals that the token at
1653    STRLOC was concatenated with are also considered.
1654
1655    This is implemented by re-parsing the relevant source line(s).
1656
1657    Return NULL if successful, or an error message if any errors occurred.
1658    Error messages are intended for GCC developers (to help debugging) rather
1659    than for end-users.  */
1660
1661 static const char *
1662 get_source_range_for_char (cpp_reader *pfile,
1663                            string_concat_db *concats,
1664                            location_t strloc,
1665                            enum cpp_ttype type,
1666                            int char_idx,
1667                            source_range *out_range)
1668 {
1669   gcc_checking_assert (char_idx >= 0);
1670   gcc_assert (out_range);
1671
1672   cpp_substring_ranges ranges;
1673   const char *err
1674     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1675   if (err)
1676     return err;
1677
1678   if (char_idx >= ranges.get_num_ranges ())
1679     return "char_idx out of range";
1680
1681   *out_range = ranges.get_range (char_idx);
1682   return NULL;
1683 }
1684
1685 /* As get_source_range_for_char, but write to *OUT the number
1686    of ranges that are available.  */
1687
1688 static const char *
1689 get_num_source_ranges_for_substring (cpp_reader *pfile,
1690                                      string_concat_db *concats,
1691                                      location_t strloc,
1692                                      enum cpp_ttype type,
1693                                      int *out)
1694 {
1695   gcc_assert (out);
1696
1697   cpp_substring_ranges ranges;
1698   const char *err
1699     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1700
1701   if (err)
1702     return err;
1703
1704   *out = ranges.get_num_ranges ();
1705   return NULL;
1706 }
1707
1708 /* Selftests of location handling.  */
1709
1710 /* Verify that compare() on linenum_type handles comparisons over the full
1711    range of the type.  */
1712
1713 static void
1714 test_linenum_comparisons ()
1715 {
1716   linenum_type min_line (0);
1717   linenum_type max_line (0xffffffff);
1718   ASSERT_EQ (0, compare (min_line, min_line));
1719   ASSERT_EQ (0, compare (max_line, max_line));
1720
1721   ASSERT_GT (compare (max_line, min_line), 0);
1722   ASSERT_LT (compare (min_line, max_line), 0);
1723 }
1724
1725 /* Helper function for verifying location data: when location_t
1726    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1727    as having column 0.  */
1728
1729 static bool
1730 should_have_column_data_p (location_t loc)
1731 {
1732   if (IS_ADHOC_LOC (loc))
1733     loc = get_location_from_adhoc_loc (line_table, loc);
1734   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1735     return false;
1736   return true;
1737 }
1738
1739 /* Selftest for should_have_column_data_p.  */
1740
1741 static void
1742 test_should_have_column_data_p ()
1743 {
1744   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1745   ASSERT_TRUE
1746     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1747   ASSERT_FALSE
1748     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1749 }
1750
1751 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1752    on LOC.  */
1753
1754 static void
1755 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1756               location_t loc)
1757 {
1758   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1759   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1760   /* If location_t values are sufficiently high, then column numbers
1761      will be unavailable and LOCATION_COLUMN (loc) will be 0.
1762      When close to the threshold, column numbers *may* be present: if
1763      the final linemap before the threshold contains a line that straddles
1764      the threshold, locations in that line have column information.  */
1765   if (should_have_column_data_p (loc))
1766     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1767 }
1768
1769 /* Various selftests involve constructing a line table and one or more
1770    line maps within it.
1771
1772    For maximum test coverage we want to run these tests with a variety
1773    of situations:
1774    - line_table->default_range_bits: some frontends use a non-zero value
1775    and others use zero
1776    - the fallback modes within line-map.c: there are various threshold
1777    values for location_t beyond line-map.c changes
1778    behavior (disabling of the range-packing optimization, disabling
1779    of column-tracking).  We can exercise these by starting the line_table
1780    at interesting values at or near these thresholds.
1781
1782    The following struct describes a particular case within our test
1783    matrix.  */
1784
1785 class line_table_case
1786 {
1787 public:
1788   line_table_case (int default_range_bits, int base_location)
1789   : m_default_range_bits (default_range_bits),
1790     m_base_location (base_location)
1791   {}
1792
1793   int m_default_range_bits;
1794   int m_base_location;
1795 };
1796
1797 /* Constructor.  Store the old value of line_table, and create a new
1798    one, using sane defaults.  */
1799
1800 line_table_test::line_table_test ()
1801 {
1802   gcc_assert (saved_line_table == NULL);
1803   saved_line_table = line_table;
1804   line_table = ggc_alloc<line_maps> ();
1805   linemap_init (line_table, BUILTINS_LOCATION);
1806   gcc_assert (saved_line_table->reallocator);
1807   line_table->reallocator = saved_line_table->reallocator;
1808   gcc_assert (saved_line_table->round_alloc_size);
1809   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1810   line_table->default_range_bits = 0;
1811 }
1812
1813 /* Constructor.  Store the old value of line_table, and create a new
1814    one, using the sitation described in CASE_.  */
1815
1816 line_table_test::line_table_test (const line_table_case &case_)
1817 {
1818   gcc_assert (saved_line_table == NULL);
1819   saved_line_table = line_table;
1820   line_table = ggc_alloc<line_maps> ();
1821   linemap_init (line_table, BUILTINS_LOCATION);
1822   gcc_assert (saved_line_table->reallocator);
1823   line_table->reallocator = saved_line_table->reallocator;
1824   gcc_assert (saved_line_table->round_alloc_size);
1825   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1826   line_table->default_range_bits = case_.m_default_range_bits;
1827   if (case_.m_base_location)
1828     {
1829       line_table->highest_location = case_.m_base_location;
1830       line_table->highest_line = case_.m_base_location;
1831     }
1832 }
1833
1834 /* Destructor.  Restore the old value of line_table.  */
1835
1836 line_table_test::~line_table_test ()
1837 {
1838   gcc_assert (saved_line_table != NULL);
1839   line_table = saved_line_table;
1840   saved_line_table = NULL;
1841 }
1842
1843 /* Verify basic operation of ordinary linemaps.  */
1844
1845 static void
1846 test_accessing_ordinary_linemaps (const line_table_case &case_)
1847 {
1848   line_table_test ltt (case_);
1849
1850   /* Build a simple linemap describing some locations. */
1851   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1852
1853   linemap_line_start (line_table, 1, 100);
1854   location_t loc_a = linemap_position_for_column (line_table, 1);
1855   location_t loc_b = linemap_position_for_column (line_table, 23);
1856
1857   linemap_line_start (line_table, 2, 100);
1858   location_t loc_c = linemap_position_for_column (line_table, 1);
1859   location_t loc_d = linemap_position_for_column (line_table, 17);
1860
1861   /* Example of a very long line.  */
1862   linemap_line_start (line_table, 3, 2000);
1863   location_t loc_e = linemap_position_for_column (line_table, 700);
1864
1865   /* Transitioning back to a short line.  */
1866   linemap_line_start (line_table, 4, 0);
1867   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1868
1869   if (should_have_column_data_p (loc_back_to_short))
1870     {
1871       /* Verify that we switched to short lines in the linemap.  */
1872       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1873       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1874     }
1875
1876   /* Example of a line that will eventually be seen to be longer
1877      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1878      below that.  */
1879   linemap_line_start (line_table, 5, 2000);
1880
1881   location_t loc_start_of_very_long_line
1882     = linemap_position_for_column (line_table, 2000);
1883   location_t loc_too_wide
1884     = linemap_position_for_column (line_table, 4097);
1885   location_t loc_too_wide_2
1886     = linemap_position_for_column (line_table, 4098);
1887
1888   /* ...and back to a sane line length.  */
1889   linemap_line_start (line_table, 6, 100);
1890   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1891
1892   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1893
1894   /* Multiple files.  */
1895   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1896   linemap_line_start (line_table, 1, 200);
1897   location_t loc_f = linemap_position_for_column (line_table, 150);
1898   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1899
1900   /* Verify that we can recover the location info.  */
1901   assert_loceq ("foo.c", 1, 1, loc_a);
1902   assert_loceq ("foo.c", 1, 23, loc_b);
1903   assert_loceq ("foo.c", 2, 1, loc_c);
1904   assert_loceq ("foo.c", 2, 17, loc_d);
1905   assert_loceq ("foo.c", 3, 700, loc_e);
1906   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1907
1908   /* In the very wide line, the initial location should be fully tracked.  */
1909   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1910   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1911      be disabled.  */
1912   assert_loceq ("foo.c", 5, 0, loc_too_wide);
1913   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1914   /*...and column-tracking should be re-enabled for subsequent lines.  */
1915   assert_loceq ("foo.c", 6, 10, loc_sane_again);
1916
1917   assert_loceq ("bar.c", 1, 150, loc_f);
1918
1919   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1920   ASSERT_TRUE (pure_location_p (line_table, loc_a));
1921
1922   /* Verify using make_location to build a range, and extracting data
1923      back from it.  */
1924   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1925   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1926   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1927   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1928   ASSERT_EQ (loc_b, src_range.m_start);
1929   ASSERT_EQ (loc_d, src_range.m_finish);
1930 }
1931
1932 /* Verify various properties of UNKNOWN_LOCATION.  */
1933
1934 static void
1935 test_unknown_location ()
1936 {
1937   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1938   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1939   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1940 }
1941
1942 /* Verify various properties of BUILTINS_LOCATION.  */
1943
1944 static void
1945 test_builtins ()
1946 {
1947   assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1948   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1949 }
1950
1951 /* Regression test for make_location.
1952    Ensure that we use pure locations for the start/finish of the range,
1953    rather than storing a packed or ad-hoc range as the start/finish.  */
1954
1955 static void
1956 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1957 {
1958   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1959      with C++ frontend.
1960      ....................0000000001111111111222.
1961      ....................1234567890123456789012.  */
1962   const char *content = "     r += !aaa == bbb;\n";
1963   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1964   line_table_test ltt (case_);
1965   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1966
1967   const location_t c11 = linemap_position_for_column (line_table, 11);
1968   const location_t c12 = linemap_position_for_column (line_table, 12);
1969   const location_t c13 = linemap_position_for_column (line_table, 13);
1970   const location_t c14 = linemap_position_for_column (line_table, 14);
1971   const location_t c21 = linemap_position_for_column (line_table, 21);
1972
1973   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1974     return;
1975
1976   /* Use column 13 for the caret location, arbitrarily, to verify that we
1977      handle start != caret.  */
1978   const location_t aaa = make_location (c13, c12, c14);
1979   ASSERT_EQ (c13, get_pure_location (aaa));
1980   ASSERT_EQ (c12, get_start (aaa));
1981   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1982   ASSERT_EQ (c14, get_finish (aaa));
1983   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1984
1985   /* Make a location using a location with a range as the start-point.  */
1986   const location_t not_aaa = make_location (c11, aaa, c14);
1987   ASSERT_EQ (c11, get_pure_location (not_aaa));
1988   /* It should use the start location of the range, not store the range
1989      itself.  */
1990   ASSERT_EQ (c12, get_start (not_aaa));
1991   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1992   ASSERT_EQ (c14, get_finish (not_aaa));
1993   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1994
1995   /* Similarly, make a location with a range as the end-point.  */
1996   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1997   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1998   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1999   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2000   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2001   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2002   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2003   /* It should use the finish location of the range, not store the range
2004      itself.  */
2005   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2006   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2007   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2008   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2009   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2010 }
2011
2012 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
2013
2014 static void
2015 test_reading_source_line ()
2016 {
2017   /* Create a tempfile and write some text to it.  */
2018   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2019                         "01234567890123456789\n"
2020                         "This is the test text\n"
2021                         "This is the 3rd line");
2022
2023   /* Read back a specific line from the tempfile.  */
2024   char_span source_line = location_get_source_line (tmp.get_filename (), 3);
2025   ASSERT_TRUE (source_line);
2026   ASSERT_TRUE (source_line.get_buffer () != NULL);
2027   ASSERT_EQ (20, source_line.length ());
2028   ASSERT_TRUE (!strncmp ("This is the 3rd line",
2029                          source_line.get_buffer (), source_line.length ()));
2030
2031   source_line = location_get_source_line (tmp.get_filename (), 2);
2032   ASSERT_TRUE (source_line);
2033   ASSERT_TRUE (source_line.get_buffer () != NULL);
2034   ASSERT_EQ (21, source_line.length ());
2035   ASSERT_TRUE (!strncmp ("This is the test text",
2036                          source_line.get_buffer (), source_line.length ()));
2037
2038   source_line = location_get_source_line (tmp.get_filename (), 4);
2039   ASSERT_FALSE (source_line);
2040   ASSERT_TRUE (source_line.get_buffer () == NULL);
2041 }
2042
2043 /* Tests of lexing.  */
2044
2045 /* Verify that token TOK from PARSER has cpp_token_as_text
2046    equal to EXPECTED_TEXT.  */
2047
2048 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
2049   SELFTEST_BEGIN_STMT                                                   \
2050     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
2051     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
2052   SELFTEST_END_STMT
2053
2054 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2055    and ranges from EXP_START_COL to EXP_FINISH_COL.
2056    Use LOC as the effective location of the selftest.  */
2057
2058 static void
2059 assert_token_loc_eq (const location &loc,
2060                      const cpp_token *tok,
2061                      const char *exp_filename, int exp_linenum,
2062                      int exp_start_col, int exp_finish_col)
2063 {
2064   location_t tok_loc = tok->src_loc;
2065   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2066   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2067
2068   /* If location_t values are sufficiently high, then column numbers
2069      will be unavailable.  */
2070   if (!should_have_column_data_p (tok_loc))
2071     return;
2072
2073   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2074   source_range tok_range = get_range_from_loc (line_table, tok_loc);
2075   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2076   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2077 }
2078
2079 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2080    SELFTEST_LOCATION as the effective location of the selftest.  */
2081
2082 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2083                             EXP_START_COL, EXP_FINISH_COL) \
2084   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2085                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2086
2087 /* Test of lexing a file using libcpp, verifying tokens and their
2088    location information.  */
2089
2090 static void
2091 test_lexer (const line_table_case &case_)
2092 {
2093   /* Create a tempfile and write some text to it.  */
2094   const char *content =
2095     /*00000000011111111112222222222333333.3333444444444.455555555556
2096       12345678901234567890123456789012345.6789012345678.901234567890.  */
2097     ("test_name /* c-style comment */\n"
2098      "                                  \"test literal\"\n"
2099      " // test c++-style comment\n"
2100      "   42\n");
2101   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2102
2103   line_table_test ltt (case_);
2104
2105   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2106
2107   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2108   ASSERT_NE (fname, NULL);
2109
2110   /* Verify that we get the expected tokens back, with the correct
2111      location information.  */
2112
2113   location_t loc;
2114   const cpp_token *tok;
2115   tok = cpp_get_token_with_location (parser, &loc);
2116   ASSERT_NE (tok, NULL);
2117   ASSERT_EQ (tok->type, CPP_NAME);
2118   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2119   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2120
2121   tok = cpp_get_token_with_location (parser, &loc);
2122   ASSERT_NE (tok, NULL);
2123   ASSERT_EQ (tok->type, CPP_STRING);
2124   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2125   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2126
2127   tok = cpp_get_token_with_location (parser, &loc);
2128   ASSERT_NE (tok, NULL);
2129   ASSERT_EQ (tok->type, CPP_NUMBER);
2130   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2131   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2132
2133   tok = cpp_get_token_with_location (parser, &loc);
2134   ASSERT_NE (tok, NULL);
2135   ASSERT_EQ (tok->type, CPP_EOF);
2136
2137   cpp_finish (parser, NULL);
2138   cpp_destroy (parser);
2139 }
2140
2141 /* Forward decls.  */
2142
2143 class lexer_test;
2144 class lexer_test_options;
2145
2146 /* A class for specifying options of a lexer_test.
2147    The "apply" vfunc is called during the lexer_test constructor.  */
2148
2149 class lexer_test_options
2150 {
2151  public:
2152   virtual void apply (lexer_test &) = 0;
2153 };
2154
2155 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2156    in its dtor.
2157
2158    This is needed by struct lexer_test to ensure that the cleanup of the
2159    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2160
2161 class cpp_reader_ptr
2162 {
2163  public:
2164   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2165
2166   ~cpp_reader_ptr ()
2167   {
2168     cpp_finish (m_ptr, NULL);
2169     cpp_destroy (m_ptr);
2170   }
2171
2172   operator cpp_reader * () const { return m_ptr; }
2173
2174  private:
2175   cpp_reader *m_ptr;
2176 };
2177
2178 /* A struct for writing lexer tests.  */
2179
2180 class lexer_test
2181 {
2182 public:
2183   lexer_test (const line_table_case &case_, const char *content,
2184               lexer_test_options *options);
2185   ~lexer_test ();
2186
2187   const cpp_token *get_token ();
2188
2189   /* The ordering of these fields matters.
2190      The line_table_test must be first, since the cpp_reader_ptr
2191      uses it.
2192      The cpp_reader must be cleaned up *after* the temp_source_file
2193      since the filenames in input.c's input cache are owned by the
2194      cpp_reader; in particular, when ~temp_source_file evicts the
2195      filename the filenames must still be alive.  */
2196   line_table_test m_ltt;
2197   cpp_reader_ptr m_parser;
2198   temp_source_file m_tempfile;
2199   string_concat_db m_concats;
2200   bool m_implicitly_expect_EOF;
2201 };
2202
2203 /* Use an EBCDIC encoding for the execution charset, specifically
2204    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2205
2206    This exercises iconv integration within libcpp.
2207    Not every build of iconv supports the given charset,
2208    so we need to flag this error and handle it gracefully.  */
2209
2210 class ebcdic_execution_charset : public lexer_test_options
2211 {
2212  public:
2213   ebcdic_execution_charset () : m_num_iconv_errors (0)
2214     {
2215       gcc_assert (s_singleton == NULL);
2216       s_singleton = this;
2217     }
2218   ~ebcdic_execution_charset ()
2219     {
2220       gcc_assert (s_singleton == this);
2221       s_singleton = NULL;
2222     }
2223
2224   void apply (lexer_test &test) FINAL OVERRIDE
2225   {
2226     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2227     cpp_opts->narrow_charset = "IBM1047";
2228
2229     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2230     callbacks->diagnostic = on_diagnostic;
2231   }
2232
2233   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2234                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2235                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2236                              rich_location *richloc ATTRIBUTE_UNUSED,
2237                              const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2238     ATTRIBUTE_FPTR_PRINTF(5,0)
2239   {
2240     gcc_assert (s_singleton);
2241     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2242     const char *msg = "conversion from %s to %s not supported by iconv";
2243 #ifdef ENABLE_NLS
2244     msg = dgettext ("cpplib", msg);
2245 #endif
2246     /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2247        when the local iconv build doesn't support the conversion.  */
2248     if (strcmp (msgid, msg) == 0)
2249       {
2250         s_singleton->m_num_iconv_errors++;
2251         return true;
2252       }
2253
2254     /* Otherwise, we have an unexpected error.  */
2255     abort ();
2256   }
2257
2258   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2259
2260  private:
2261   static ebcdic_execution_charset *s_singleton;
2262   int m_num_iconv_errors;
2263 };
2264
2265 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2266
2267 /* A lexer_test_options subclass that records a list of diagnostic
2268    messages emitted by the lexer.  */
2269
2270 class lexer_diagnostic_sink : public lexer_test_options
2271 {
2272  public:
2273   lexer_diagnostic_sink ()
2274   {
2275     gcc_assert (s_singleton == NULL);
2276     s_singleton = this;
2277   }
2278   ~lexer_diagnostic_sink ()
2279   {
2280     gcc_assert (s_singleton == this);
2281     s_singleton = NULL;
2282
2283     int i;
2284     char *str;
2285     FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2286       free (str);
2287   }
2288
2289   void apply (lexer_test &test) FINAL OVERRIDE
2290   {
2291     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2292     callbacks->diagnostic = on_diagnostic;
2293   }
2294
2295   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2296                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2297                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2298                              rich_location *richloc ATTRIBUTE_UNUSED,
2299                              const char *msgid, va_list *ap)
2300     ATTRIBUTE_FPTR_PRINTF(5,0)
2301   {
2302     char *msg = xvasprintf (msgid, *ap);
2303     s_singleton->m_diagnostics.safe_push (msg);
2304     return true;
2305   }
2306
2307   auto_vec<char *> m_diagnostics;
2308
2309  private:
2310   static lexer_diagnostic_sink *s_singleton;
2311 };
2312
2313 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2314
2315 /* Constructor.  Override line_table with a new instance based on CASE_,
2316    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2317    start parsing the tempfile.  */
2318
2319 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2320                         lexer_test_options *options)
2321 : m_ltt (case_),
2322   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2323   /* Create a tempfile and write the text to it.  */
2324   m_tempfile (SELFTEST_LOCATION, ".c", content),
2325   m_concats (),
2326   m_implicitly_expect_EOF (true)
2327 {
2328   if (options)
2329     options->apply (*this);
2330
2331   cpp_init_iconv (m_parser);
2332
2333   /* Parse the file.  */
2334   const char *fname = cpp_read_main_file (m_parser,
2335                                           m_tempfile.get_filename ());
2336   ASSERT_NE (fname, NULL);
2337 }
2338
2339 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2340
2341 lexer_test::~lexer_test ()
2342 {
2343   location_t loc;
2344   const cpp_token *tok;
2345
2346   if (m_implicitly_expect_EOF)
2347     {
2348       tok = cpp_get_token_with_location (m_parser, &loc);
2349       ASSERT_NE (tok, NULL);
2350       ASSERT_EQ (tok->type, CPP_EOF);
2351     }
2352 }
2353
2354 /* Get the next token from m_parser.  */
2355
2356 const cpp_token *
2357 lexer_test::get_token ()
2358 {
2359   location_t loc;
2360   const cpp_token *tok;
2361
2362   tok = cpp_get_token_with_location (m_parser, &loc);
2363   ASSERT_NE (tok, NULL);
2364   return tok;
2365 }
2366
2367 /* Verify that locations within string literals are correctly handled.  */
2368
2369 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2370    using the string concatenation database for TEST.
2371
2372    Assert that the character at index IDX is on EXPECTED_LINE,
2373    and that it begins at column EXPECTED_START_COL and ends at
2374    EXPECTED_FINISH_COL (unless the locations are beyond
2375    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2376    columns).  */
2377
2378 static void
2379 assert_char_at_range (const location &loc,
2380                       lexer_test& test,
2381                       location_t strloc, enum cpp_ttype type, int idx,
2382                       int expected_line, int expected_start_col,
2383                       int expected_finish_col)
2384 {
2385   cpp_reader *pfile = test.m_parser;
2386   string_concat_db *concats = &test.m_concats;
2387
2388   source_range actual_range = source_range();
2389   const char *err
2390     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2391                                  &actual_range);
2392   if (should_have_column_data_p (strloc))
2393     ASSERT_EQ_AT (loc, NULL, err);
2394   else
2395     {
2396       ASSERT_STREQ_AT (loc,
2397                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2398                        err);
2399       return;
2400     }
2401
2402   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2403   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2404   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2405   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2406
2407   if (should_have_column_data_p (actual_range.m_start))
2408     {
2409       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2410       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2411     }
2412   if (should_have_column_data_p (actual_range.m_finish))
2413     {
2414       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2415       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2416     }
2417 }
2418
2419 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2420    the effective location of any errors.  */
2421
2422 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2423                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2424   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2425                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2426                         (EXPECTED_FINISH_COL))
2427
2428 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2429    using the string concatenation database for TEST.
2430
2431    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2432
2433 static void
2434 assert_num_substring_ranges (const location &loc,
2435                              lexer_test& test,
2436                              location_t strloc,
2437                              enum cpp_ttype type,
2438                              int expected_num_ranges)
2439 {
2440   cpp_reader *pfile = test.m_parser;
2441   string_concat_db *concats = &test.m_concats;
2442
2443   int actual_num_ranges = -1;
2444   const char *err
2445     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2446                                            &actual_num_ranges);
2447   if (should_have_column_data_p (strloc))
2448     ASSERT_EQ_AT (loc, NULL, err);
2449   else
2450     {
2451       ASSERT_STREQ_AT (loc,
2452                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2453                        err);
2454       return;
2455     }
2456   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2457 }
2458
2459 /* Macro for calling assert_num_substring_ranges, supplying
2460    SELFTEST_LOCATION for the effective location of any errors.  */
2461
2462 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2463                                     EXPECTED_NUM_RANGES)                \
2464   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2465                                (TYPE), (EXPECTED_NUM_RANGES))
2466
2467
2468 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2469    returns an error (using the string concatenation database for TEST).  */
2470
2471 static void
2472 assert_has_no_substring_ranges (const location &loc,
2473                                 lexer_test& test,
2474                                 location_t strloc,
2475                                 enum cpp_ttype type,
2476                                 const char *expected_err)
2477 {
2478   cpp_reader *pfile = test.m_parser;
2479   string_concat_db *concats = &test.m_concats;
2480   cpp_substring_ranges ranges;
2481   const char *actual_err
2482     = get_substring_ranges_for_loc (pfile, concats, strloc,
2483                                     type, ranges);
2484   if (should_have_column_data_p (strloc))
2485     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2486   else
2487     ASSERT_STREQ_AT (loc,
2488                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2489                      actual_err);
2490 }
2491
2492 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2493     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2494                                     (STRLOC), (TYPE), (ERR))
2495
2496 /* Lex a simple string literal.  Verify the substring location data, before
2497    and after running cpp_interpret_string on it.  */
2498
2499 static void
2500 test_lexer_string_locations_simple (const line_table_case &case_)
2501 {
2502   /* Digits 0-9 (with 0 at column 10), the simple way.
2503      ....................000000000.11111111112.2222222223333333333
2504      ....................123456789.01234567890.1234567890123456789
2505      We add a trailing comment to ensure that we correctly locate
2506      the end of the string literal token.  */
2507   const char *content = "        \"0123456789\" /* not a string */\n";
2508   lexer_test test (case_, content, NULL);
2509
2510   /* Verify that we get the expected token back, with the correct
2511      location information.  */
2512   const cpp_token *tok = test.get_token ();
2513   ASSERT_EQ (tok->type, CPP_STRING);
2514   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2515   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2516
2517   /* At this point in lexing, the quote characters are treated as part of
2518      the string (they are stripped off by cpp_interpret_string).  */
2519
2520   ASSERT_EQ (tok->val.str.len, 12);
2521
2522   /* Verify that cpp_interpret_string works.  */
2523   cpp_string dst_string;
2524   const enum cpp_ttype type = CPP_STRING;
2525   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2526                                       &dst_string, type);
2527   ASSERT_TRUE (result);
2528   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2529   free (const_cast <unsigned char *> (dst_string.text));
2530
2531   /* Verify ranges of individual characters.  This no longer includes the
2532      opening quote, but does include the closing quote.  */
2533   for (int i = 0; i <= 10; i++)
2534     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2535                           10 + i, 10 + i);
2536
2537   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2538 }
2539
2540 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2541    encoding.  */
2542
2543 static void
2544 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2545 {
2546   /* EBCDIC support requires iconv.  */
2547   if (!HAVE_ICONV)
2548     return;
2549
2550   /* Digits 0-9 (with 0 at column 10), the simple way.
2551      ....................000000000.11111111112.2222222223333333333
2552      ....................123456789.01234567890.1234567890123456789
2553      We add a trailing comment to ensure that we correctly locate
2554      the end of the string literal token.  */
2555   const char *content = "        \"0123456789\" /* not a string */\n";
2556   ebcdic_execution_charset use_ebcdic;
2557   lexer_test test (case_, content, &use_ebcdic);
2558
2559   /* Verify that we get the expected token back, with the correct
2560      location information.  */
2561   const cpp_token *tok = test.get_token ();
2562   ASSERT_EQ (tok->type, CPP_STRING);
2563   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2564   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2565
2566   /* At this point in lexing, the quote characters are treated as part of
2567      the string (they are stripped off by cpp_interpret_string).  */
2568
2569   ASSERT_EQ (tok->val.str.len, 12);
2570
2571   /* The remainder of the test requires an iconv implementation that
2572      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2573   if (use_ebcdic.iconv_errors_occurred_p ())
2574     return;
2575
2576   /* Verify that cpp_interpret_string works.  */
2577   cpp_string dst_string;
2578   const enum cpp_ttype type = CPP_STRING;
2579   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2580                                       &dst_string, type);
2581   ASSERT_TRUE (result);
2582   /* We should now have EBCDIC-encoded text, specifically
2583      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2584      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2585   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2586                 (const char *)dst_string.text);
2587   free (const_cast <unsigned char *> (dst_string.text));
2588
2589   /* Verify that we don't attempt to record substring location information
2590      for such cases.  */
2591   ASSERT_HAS_NO_SUBSTRING_RANGES
2592     (test, tok->src_loc, type,
2593      "execution character set != source character set");
2594 }
2595
2596 /* Lex a string literal containing a hex-escaped character.
2597    Verify the substring location data, before and after running
2598    cpp_interpret_string on it.  */
2599
2600 static void
2601 test_lexer_string_locations_hex (const line_table_case &case_)
2602 {
2603   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2604      and with a space in place of digit 6, to terminate the escaped
2605      hex code.
2606      ....................000000000.111111.11112222.
2607      ....................123456789.012345.67890123.  */
2608   const char *content = "        \"01234\\x35 789\"\n";
2609   lexer_test test (case_, content, NULL);
2610
2611   /* Verify that we get the expected token back, with the correct
2612      location information.  */
2613   const cpp_token *tok = test.get_token ();
2614   ASSERT_EQ (tok->type, CPP_STRING);
2615   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2616   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2617
2618   /* At this point in lexing, the quote characters are treated as part of
2619      the string (they are stripped off by cpp_interpret_string).  */
2620   ASSERT_EQ (tok->val.str.len, 15);
2621
2622   /* Verify that cpp_interpret_string works.  */
2623   cpp_string dst_string;
2624   const enum cpp_ttype type = CPP_STRING;
2625   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2626                                       &dst_string, type);
2627   ASSERT_TRUE (result);
2628   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2629   free (const_cast <unsigned char *> (dst_string.text));
2630
2631   /* Verify ranges of individual characters.  This no longer includes the
2632      opening quote, but does include the closing quote.  */
2633   for (int i = 0; i <= 4; i++)
2634     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2635   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2636   for (int i = 6; i <= 10; i++)
2637     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2638
2639   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2640 }
2641
2642 /* Lex a string literal containing an octal-escaped character.
2643    Verify the substring location data after running cpp_interpret_string
2644    on it.  */
2645
2646 static void
2647 test_lexer_string_locations_oct (const line_table_case &case_)
2648 {
2649   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2650      and with a space in place of digit 6, to terminate the escaped
2651      octal code.
2652      ....................000000000.111111.11112222.2222223333333333444
2653      ....................123456789.012345.67890123.4567890123456789012  */
2654   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2655   lexer_test test (case_, content, NULL);
2656
2657   /* Verify that we get the expected token back, with the correct
2658      location information.  */
2659   const cpp_token *tok = test.get_token ();
2660   ASSERT_EQ (tok->type, CPP_STRING);
2661   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2662
2663   /* Verify that cpp_interpret_string works.  */
2664   cpp_string dst_string;
2665   const enum cpp_ttype type = CPP_STRING;
2666   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2667                                       &dst_string, type);
2668   ASSERT_TRUE (result);
2669   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2670   free (const_cast <unsigned char *> (dst_string.text));
2671
2672   /* Verify ranges of individual characters.  This no longer includes the
2673      opening quote, but does include the closing quote.  */
2674   for (int i = 0; i < 5; i++)
2675     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2676   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2677   for (int i = 6; i <= 10; i++)
2678     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2679
2680   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2681 }
2682
2683 /* Test of string literal containing letter escapes.  */
2684
2685 static void
2686 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2687 {
2688   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2689      .....................000000000.1.11111.1.1.11222.22222223333333
2690      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2691   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2692   lexer_test test (case_, content, NULL);
2693
2694   /* Verify that we get the expected tokens back.  */
2695   const cpp_token *tok = test.get_token ();
2696   ASSERT_EQ (tok->type, CPP_STRING);
2697   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2698
2699   /* Verify ranges of individual characters. */
2700   /* "\t".  */
2701   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2702                         0, 1, 10, 11);
2703   /* "foo". */
2704   for (int i = 1; i <= 3; i++)
2705     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2706                           i, 1, 11 + i, 11 + i);
2707   /* "\\" and "\n".  */
2708   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2709                         4, 1, 15, 16);
2710   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2711                         5, 1, 17, 18);
2712
2713   /* "bar" and closing quote for nul-terminator.  */
2714   for (int i = 6; i <= 9; i++)
2715     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2716                           i, 1, 13 + i, 13 + i);
2717
2718   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2719 }
2720
2721 /* Another test of a string literal containing a letter escape.
2722    Based on string seen in
2723      printf ("%-%\n");
2724    in gcc.dg/format/c90-printf-1.c.  */
2725
2726 static void
2727 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2728 {
2729   /* .....................000000000.1111.11.1111.22222222223.
2730      .....................123456789.0123.45.6789.01234567890.  */
2731   const char *content = ("        \"%-%\\n\" /* non-str */\n");
2732   lexer_test test (case_, content, NULL);
2733
2734   /* Verify that we get the expected tokens back.  */
2735   const cpp_token *tok = test.get_token ();
2736   ASSERT_EQ (tok->type, CPP_STRING);
2737   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2738
2739   /* Verify ranges of individual characters. */
2740   /* "%-%".  */
2741   for (int i = 0; i < 3; i++)
2742     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2743                           i, 1, 10 + i, 10 + i);
2744   /* "\n".  */
2745   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2746                         3, 1, 13, 14);
2747
2748   /* Closing quote for nul-terminator.  */
2749   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2750                         4, 1, 15, 15);
2751
2752   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2753 }
2754
2755 /* Lex a string literal containing UCN 4 characters.
2756    Verify the substring location data after running cpp_interpret_string
2757    on it.  */
2758
2759 static void
2760 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2761 {
2762   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2763      as UCN 4.
2764      ....................000000000.111111.111122.222222223.33333333344444
2765      ....................123456789.012345.678901.234567890.12345678901234  */
2766   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
2767   lexer_test test (case_, content, NULL);
2768
2769   /* Verify that we get the expected token back, with the correct
2770      location information.  */
2771   const cpp_token *tok = test.get_token ();
2772   ASSERT_EQ (tok->type, CPP_STRING);
2773   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2774
2775   /* Verify that cpp_interpret_string works.
2776      The string should be encoded in the execution character
2777      set.  Assuming that is UTF-8, we should have the following:
2778      -----------  ----  -----  -------  ----------------
2779      Byte offset  Byte  Octal  Unicode  Source Column(s)
2780      -----------  ----  -----  -------  ----------------
2781      0            0x30         '0'      10
2782      1            0x31         '1'      11
2783      2            0x32         '2'      12
2784      3            0x33         '3'      13
2785      4            0x34         '4'      14
2786      5            0xE2  \342   U+2174   15-20
2787      6            0x85  \205    (cont)  15-20
2788      7            0xB4  \264    (cont)  15-20
2789      8            0xE2  \342   U+2175   21-26
2790      9            0x85  \205    (cont)  21-26
2791      10           0xB5  \265    (cont)  21-26
2792      11           0x37         '7'      27
2793      12           0x38         '8'      28
2794      13           0x39         '9'      29
2795      14           0x00                  30 (closing quote)
2796      -----------  ----  -----  -------  ---------------.  */
2797
2798   cpp_string dst_string;
2799   const enum cpp_ttype type = CPP_STRING;
2800   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2801                                       &dst_string, type);
2802   ASSERT_TRUE (result);
2803   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2804                 (const char *)dst_string.text);
2805   free (const_cast <unsigned char *> (dst_string.text));
2806
2807   /* Verify ranges of individual characters.  This no longer includes the
2808      opening quote, but does include the closing quote.
2809      '01234'.  */
2810   for (int i = 0; i <= 4; i++)
2811     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2812   /* U+2174.  */
2813   for (int i = 5; i <= 7; i++)
2814     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2815   /* U+2175.  */
2816   for (int i = 8; i <= 10; i++)
2817     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2818   /* '789' and nul terminator  */
2819   for (int i = 11; i <= 14; i++)
2820     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2821
2822   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2823 }
2824
2825 /* Lex a string literal containing UCN 8 characters.
2826    Verify the substring location data after running cpp_interpret_string
2827    on it.  */
2828
2829 static void
2830 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2831 {
2832   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2833      ....................000000000.111111.1111222222.2222333333333.344444
2834      ....................123456789.012345.6789012345.6789012345678.901234  */
2835   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
2836   lexer_test test (case_, content, NULL);
2837
2838   /* Verify that we get the expected token back, with the correct
2839      location information.  */
2840   const cpp_token *tok = test.get_token ();
2841   ASSERT_EQ (tok->type, CPP_STRING);
2842   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2843                            "\"01234\\U00002174\\U00002175789\"");
2844
2845   /* Verify that cpp_interpret_string works.
2846      The UTF-8 encoding of the string is identical to that from
2847      the ucn4 testcase above; the only difference is the column
2848      locations.  */
2849   cpp_string dst_string;
2850   const enum cpp_ttype type = CPP_STRING;
2851   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2852                                       &dst_string, type);
2853   ASSERT_TRUE (result);
2854   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2855                 (const char *)dst_string.text);
2856   free (const_cast <unsigned char *> (dst_string.text));
2857
2858   /* Verify ranges of individual characters.  This no longer includes the
2859      opening quote, but does include the closing quote.
2860      '01234'.  */
2861   for (int i = 0; i <= 4; i++)
2862     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2863   /* U+2174.  */
2864   for (int i = 5; i <= 7; i++)
2865     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2866   /* U+2175.  */
2867   for (int i = 8; i <= 10; i++)
2868     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2869   /* '789' at columns 35-37  */
2870   for (int i = 11; i <= 13; i++)
2871     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2872   /* Closing quote/nul-terminator at column 38.  */
2873   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2874
2875   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2876 }
2877
2878 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
2879
2880 static uint32_t
2881 uint32_from_big_endian (const uint32_t *ptr_be_value)
2882 {
2883   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2884   return (((uint32_t) buf[0] << 24)
2885           | ((uint32_t) buf[1] << 16)
2886           | ((uint32_t) buf[2] << 8)
2887           | (uint32_t) buf[3]);
2888 }
2889
2890 /* Lex a wide string literal and verify that attempts to read substring
2891    location data from it fail gracefully.  */
2892
2893 static void
2894 test_lexer_string_locations_wide_string (const line_table_case &case_)
2895 {
2896   /* Digits 0-9.
2897      ....................000000000.11111111112.22222222233333
2898      ....................123456789.01234567890.12345678901234  */
2899   const char *content = "       L\"0123456789\" /* non-str */\n";
2900   lexer_test test (case_, content, NULL);
2901
2902   /* Verify that we get the expected token back, with the correct
2903      location information.  */
2904   const cpp_token *tok = test.get_token ();
2905   ASSERT_EQ (tok->type, CPP_WSTRING);
2906   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2907
2908   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
2909   cpp_string dst_string;
2910   const enum cpp_ttype type = CPP_WSTRING;
2911   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2912                                       &dst_string, type);
2913   ASSERT_TRUE (result);
2914   /* The cpp_reader defaults to big-endian with
2915      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2916      now be encoded as UTF-32BE.  */
2917   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2918   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2919   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2920   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2921   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2922   free (const_cast <unsigned char *> (dst_string.text));
2923
2924   /* We don't yet support generating substring location information
2925      for L"" strings.  */
2926   ASSERT_HAS_NO_SUBSTRING_RANGES
2927     (test, tok->src_loc, type,
2928      "execution character set != source character set");
2929 }
2930
2931 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
2932
2933 static uint16_t
2934 uint16_from_big_endian (const uint16_t *ptr_be_value)
2935 {
2936   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2937   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2938 }
2939
2940 /* Lex a u"" string literal and verify that attempts to read substring
2941    location data from it fail gracefully.  */
2942
2943 static void
2944 test_lexer_string_locations_string16 (const line_table_case &case_)
2945 {
2946   /* Digits 0-9.
2947      ....................000000000.11111111112.22222222233333
2948      ....................123456789.01234567890.12345678901234  */
2949   const char *content = "       u\"0123456789\" /* non-str */\n";
2950   lexer_test test (case_, content, NULL);
2951
2952   /* Verify that we get the expected token back, with the correct
2953      location information.  */
2954   const cpp_token *tok = test.get_token ();
2955   ASSERT_EQ (tok->type, CPP_STRING16);
2956   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2957
2958   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
2959   cpp_string dst_string;
2960   const enum cpp_ttype type = CPP_STRING16;
2961   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2962                                       &dst_string, type);
2963   ASSERT_TRUE (result);
2964
2965   /* The cpp_reader defaults to big-endian, so dst_string should
2966      now be encoded as UTF-16BE.  */
2967   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2968   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2969   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2970   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2971   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2972   free (const_cast <unsigned char *> (dst_string.text));
2973
2974   /* We don't yet support generating substring location information
2975      for L"" strings.  */
2976   ASSERT_HAS_NO_SUBSTRING_RANGES
2977     (test, tok->src_loc, type,
2978      "execution character set != source character set");
2979 }
2980
2981 /* Lex a U"" string literal and verify that attempts to read substring
2982    location data from it fail gracefully.  */
2983
2984 static void
2985 test_lexer_string_locations_string32 (const line_table_case &case_)
2986 {
2987   /* Digits 0-9.
2988      ....................000000000.11111111112.22222222233333
2989      ....................123456789.01234567890.12345678901234  */
2990   const char *content = "       U\"0123456789\" /* non-str */\n";
2991   lexer_test test (case_, content, NULL);
2992
2993   /* Verify that we get the expected token back, with the correct
2994      location information.  */
2995   const cpp_token *tok = test.get_token ();
2996   ASSERT_EQ (tok->type, CPP_STRING32);
2997   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2998
2999   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
3000   cpp_string dst_string;
3001   const enum cpp_ttype type = CPP_STRING32;
3002   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3003                                       &dst_string, type);
3004   ASSERT_TRUE (result);
3005
3006   /* The cpp_reader defaults to big-endian, so dst_string should
3007      now be encoded as UTF-32BE.  */
3008   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3009   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3010   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3011   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3012   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3013   free (const_cast <unsigned char *> (dst_string.text));
3014
3015   /* We don't yet support generating substring location information
3016      for L"" strings.  */
3017   ASSERT_HAS_NO_SUBSTRING_RANGES
3018     (test, tok->src_loc, type,
3019      "execution character set != source character set");
3020 }
3021
3022 /* Lex a u8-string literal.
3023    Verify the substring location data after running cpp_interpret_string
3024    on it.  */
3025
3026 static void
3027 test_lexer_string_locations_u8 (const line_table_case &case_)
3028 {
3029   /* Digits 0-9.
3030      ....................000000000.11111111112.22222222233333
3031      ....................123456789.01234567890.12345678901234  */
3032   const char *content = "      u8\"0123456789\" /* non-str */\n";
3033   lexer_test test (case_, content, NULL);
3034
3035   /* Verify that we get the expected token back, with the correct
3036      location information.  */
3037   const cpp_token *tok = test.get_token ();
3038   ASSERT_EQ (tok->type, CPP_UTF8STRING);
3039   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3040
3041   /* Verify that cpp_interpret_string works.  */
3042   cpp_string dst_string;
3043   const enum cpp_ttype type = CPP_STRING;
3044   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3045                                       &dst_string, type);
3046   ASSERT_TRUE (result);
3047   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3048   free (const_cast <unsigned char *> (dst_string.text));
3049
3050   /* Verify ranges of individual characters.  This no longer includes the
3051      opening quote, but does include the closing quote.  */
3052   for (int i = 0; i <= 10; i++)
3053     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3054 }
3055
3056 /* Lex a string literal containing UTF-8 source characters.
3057    Verify the substring location data after running cpp_interpret_string
3058    on it.  */
3059
3060 static void
3061 test_lexer_string_locations_utf8_source (const line_table_case &case_)
3062 {
3063  /* This string literal is written out to the source file as UTF-8,
3064     and is of the form "before mojibake after", where "mojibake"
3065     is written as the following four unicode code points:
3066        U+6587 CJK UNIFIED IDEOGRAPH-6587
3067        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3068        U+5316 CJK UNIFIED IDEOGRAPH-5316
3069        U+3051 HIRAGANA LETTER KE.
3070      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3071      "before" and "after" are 1 byte per unicode character.
3072
3073      The numbering shown are "columns", which are *byte* numbers within
3074      the line, rather than unicode character numbers.
3075
3076      .................... 000000000.1111111.
3077      .................... 123456789.0123456.  */
3078   const char *content = ("        \"before "
3079                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3080                               UTF-8: 0xE6 0x96 0x87
3081                               C octal escaped UTF-8: \346\226\207
3082                             "column" numbers: 17-19.  */
3083                          "\346\226\207"
3084
3085                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3086                               UTF-8: 0xE5 0xAD 0x97
3087                               C octal escaped UTF-8: \345\255\227
3088                             "column" numbers: 20-22.  */
3089                          "\345\255\227"
3090
3091                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3092                               UTF-8: 0xE5 0x8C 0x96
3093                               C octal escaped UTF-8: \345\214\226
3094                             "column" numbers: 23-25.  */
3095                          "\345\214\226"
3096
3097                          /* U+3051 HIRAGANA LETTER KE
3098                               UTF-8: 0xE3 0x81 0x91
3099                               C octal escaped UTF-8: \343\201\221
3100                             "column" numbers: 26-28.  */
3101                          "\343\201\221"
3102
3103                          /* column numbers 29 onwards
3104                           2333333.33334444444444
3105                           9012345.67890123456789. */
3106                          " after\" /* non-str */\n");
3107   lexer_test test (case_, content, NULL);
3108
3109   /* Verify that we get the expected token back, with the correct
3110      location information.  */
3111   const cpp_token *tok = test.get_token ();
3112   ASSERT_EQ (tok->type, CPP_STRING);
3113   ASSERT_TOKEN_AS_TEXT_EQ
3114     (test.m_parser, tok,
3115      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3116
3117   /* Verify that cpp_interpret_string works.  */
3118   cpp_string dst_string;
3119   const enum cpp_ttype type = CPP_STRING;
3120   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3121                                       &dst_string, type);
3122   ASSERT_TRUE (result);
3123   ASSERT_STREQ
3124     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3125      (const char *)dst_string.text);
3126   free (const_cast <unsigned char *> (dst_string.text));
3127
3128   /* Verify ranges of individual characters.  This no longer includes the
3129      opening quote, but does include the closing quote.
3130      Assuming that both source and execution encodings are UTF-8, we have
3131      a run of 25 octets in each, plus the NUL terminator.  */
3132   for (int i = 0; i < 25; i++)
3133     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3134   /* NUL-terminator should use the closing quote at column 35.  */
3135   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3136
3137   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3138 }
3139
3140 /* Test of string literal concatenation.  */
3141
3142 static void
3143 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3144 {
3145   /* Digits 0-9.
3146      .....................000000000.111111.11112222222222
3147      .....................123456789.012345.67890123456789.  */
3148   const char *content = ("        \"01234\" /* non-str */\n"
3149                          "        \"56789\" /* non-str */\n");
3150   lexer_test test (case_, content, NULL);
3151
3152   location_t input_locs[2];
3153
3154   /* Verify that we get the expected tokens back.  */
3155   auto_vec <cpp_string> input_strings;
3156   const cpp_token *tok_a = test.get_token ();
3157   ASSERT_EQ (tok_a->type, CPP_STRING);
3158   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3159   input_strings.safe_push (tok_a->val.str);
3160   input_locs[0] = tok_a->src_loc;
3161
3162   const cpp_token *tok_b = test.get_token ();
3163   ASSERT_EQ (tok_b->type, CPP_STRING);
3164   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3165   input_strings.safe_push (tok_b->val.str);
3166   input_locs[1] = tok_b->src_loc;
3167
3168   /* Verify that cpp_interpret_string works.  */
3169   cpp_string dst_string;
3170   const enum cpp_ttype type = CPP_STRING;
3171   bool result = cpp_interpret_string (test.m_parser,
3172                                       input_strings.address (), 2,
3173                                       &dst_string, type);
3174   ASSERT_TRUE (result);
3175   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3176   free (const_cast <unsigned char *> (dst_string.text));
3177
3178   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3179   test.m_concats.record_string_concatenation (2, input_locs);
3180
3181   location_t initial_loc = input_locs[0];
3182
3183   /* "01234" on line 1.  */
3184   for (int i = 0; i <= 4; i++)
3185     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3186   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3187   for (int i = 5; i <= 10; i++)
3188     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3189
3190   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3191 }
3192
3193 /* Another test of string literal concatenation.  */
3194
3195 static void
3196 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3197 {
3198   /* Digits 0-9.
3199      .....................000000000.111.11111112222222
3200      .....................123456789.012.34567890123456.  */
3201   const char *content = ("        \"01\" /* non-str */\n"
3202                          "        \"23\" /* non-str */\n"
3203                          "        \"45\" /* non-str */\n"
3204                          "        \"67\" /* non-str */\n"
3205                          "        \"89\" /* non-str */\n");
3206   lexer_test test (case_, content, NULL);
3207
3208   auto_vec <cpp_string> input_strings;
3209   location_t input_locs[5];
3210
3211   /* Verify that we get the expected tokens back.  */
3212   for (int i = 0; i < 5; i++)
3213     {
3214       const cpp_token *tok = test.get_token ();
3215       ASSERT_EQ (tok->type, CPP_STRING);
3216       input_strings.safe_push (tok->val.str);
3217       input_locs[i] = tok->src_loc;
3218     }
3219
3220   /* Verify that cpp_interpret_string works.  */
3221   cpp_string dst_string;
3222   const enum cpp_ttype type = CPP_STRING;
3223   bool result = cpp_interpret_string (test.m_parser,
3224                                       input_strings.address (), 5,
3225                                       &dst_string, type);
3226   ASSERT_TRUE (result);
3227   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3228   free (const_cast <unsigned char *> (dst_string.text));
3229
3230   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3231   test.m_concats.record_string_concatenation (5, input_locs);
3232
3233   location_t initial_loc = input_locs[0];
3234
3235   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3236      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3237      and expect get_source_range_for_substring to fail.
3238      However, for a string concatenation test, we can have a case
3239      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3240      but subsequent strings can be after it.
3241      Attempting to detect this within assert_char_at_range
3242      would overcomplicate the logic for the common test cases, so
3243      we detect it here.  */
3244   if (should_have_column_data_p (input_locs[0])
3245       && !should_have_column_data_p (input_locs[4]))
3246     {
3247       /* Verify that get_source_range_for_substring gracefully rejects
3248          this case.  */
3249       source_range actual_range;
3250       const char *err
3251         = get_source_range_for_char (test.m_parser, &test.m_concats,
3252                                      initial_loc, type, 0, &actual_range);
3253       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3254       return;
3255     }
3256
3257   for (int i = 0; i < 5; i++)
3258     for (int j = 0; j < 2; j++)
3259       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3260                             i + 1, 10 + j, 10 + j);
3261
3262   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3263   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3264
3265   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3266 }
3267
3268 /* Another test of string literal concatenation, this time combined with
3269    various kinds of escaped characters.  */
3270
3271 static void
3272 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3273 {
3274   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3275      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3276   const char *content
3277     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3278        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3279     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3280   lexer_test test (case_, content, NULL);
3281
3282   auto_vec <cpp_string> input_strings;
3283   location_t input_locs[4];
3284
3285   /* Verify that we get the expected tokens back.  */
3286   for (int i = 0; i < 4; i++)
3287     {
3288       const cpp_token *tok = test.get_token ();
3289       ASSERT_EQ (tok->type, CPP_STRING);
3290       input_strings.safe_push (tok->val.str);
3291       input_locs[i] = tok->src_loc;
3292     }
3293
3294   /* Verify that cpp_interpret_string works.  */
3295   cpp_string dst_string;
3296   const enum cpp_ttype type = CPP_STRING;
3297   bool result = cpp_interpret_string (test.m_parser,
3298                                       input_strings.address (), 4,
3299                                       &dst_string, type);
3300   ASSERT_TRUE (result);
3301   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3302   free (const_cast <unsigned char *> (dst_string.text));
3303
3304   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3305   test.m_concats.record_string_concatenation (4, input_locs);
3306
3307   location_t initial_loc = input_locs[0];
3308
3309   for (int i = 0; i <= 4; i++)
3310     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3311   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3312   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3313   for (int i = 7; i <= 9; i++)
3314     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3315
3316   /* NUL-terminator should use the location of the final closing quote.  */
3317   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3318
3319   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3320 }
3321
3322 /* Test of string literal in a macro.  */
3323
3324 static void
3325 test_lexer_string_locations_macro (const line_table_case &case_)
3326 {
3327   /* Digits 0-9.
3328      .....................0000000001111111111.22222222223.
3329      .....................1234567890123456789.01234567890.  */
3330   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3331                          "  MACRO");
3332   lexer_test test (case_, content, NULL);
3333
3334   /* Verify that we get the expected tokens back.  */
3335   const cpp_token *tok = test.get_token ();
3336   ASSERT_EQ (tok->type, CPP_PADDING);
3337
3338   tok = test.get_token ();
3339   ASSERT_EQ (tok->type, CPP_STRING);
3340   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3341
3342   /* Verify ranges of individual characters.  We ought to
3343      see columns within the macro definition.  */
3344   for (int i = 0; i <= 10; i++)
3345     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3346                           i, 1, 20 + i, 20 + i);
3347
3348   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3349
3350   tok = test.get_token ();
3351   ASSERT_EQ (tok->type, CPP_PADDING);
3352 }
3353
3354 /* Test of stringification of a macro argument.  */
3355
3356 static void
3357 test_lexer_string_locations_stringified_macro_argument
3358   (const line_table_case &case_)
3359 {
3360   /* .....................000000000111111111122222222223.
3361      .....................123456789012345678901234567890.  */
3362   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3363                          "MACRO(foo)\n");
3364   lexer_test test (case_, content, NULL);
3365
3366   /* Verify that we get the expected token back.  */
3367   const cpp_token *tok = test.get_token ();
3368   ASSERT_EQ (tok->type, CPP_PADDING);
3369
3370   tok = test.get_token ();
3371   ASSERT_EQ (tok->type, CPP_STRING);
3372   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3373
3374   /* We don't support getting the location of a stringified macro
3375      argument.  Verify that it fails gracefully.  */
3376   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3377                                   "cpp_interpret_string_1 failed");
3378
3379   tok = test.get_token ();
3380   ASSERT_EQ (tok->type, CPP_PADDING);
3381
3382   tok = test.get_token ();
3383   ASSERT_EQ (tok->type, CPP_PADDING);
3384 }
3385
3386 /* Ensure that we are fail gracefully if something attempts to pass
3387    in a location that isn't a string literal token.  Seen on this code:
3388
3389      const char a[] = " %d ";
3390      __builtin_printf (a, 0.5);
3391                        ^
3392
3393    when c-format.c erroneously used the indicated one-character
3394    location as the format string location, leading to a read past the
3395    end of a string buffer in cpp_interpret_string_1.  */
3396
3397 static void
3398 test_lexer_string_locations_non_string (const line_table_case &case_)
3399 {
3400   /* .....................000000000111111111122222222223.
3401      .....................123456789012345678901234567890.  */
3402   const char *content = ("         a\n");
3403   lexer_test test (case_, content, NULL);
3404
3405   /* Verify that we get the expected token back.  */
3406   const cpp_token *tok = test.get_token ();
3407   ASSERT_EQ (tok->type, CPP_NAME);
3408   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3409
3410   /* At this point, libcpp is attempting to interpret the name as a
3411      string literal, despite it not starting with a quote.  We don't detect
3412      that, but we should at least fail gracefully.  */
3413   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3414                                   "cpp_interpret_string_1 failed");
3415 }
3416
3417 /* Ensure that we can read substring information for a token which
3418    starts in one linemap and ends in another .  Adapted from
3419    gcc.dg/cpp/pr69985.c.  */
3420
3421 static void
3422 test_lexer_string_locations_long_line (const line_table_case &case_)
3423 {
3424   /* .....................000000.000111111111
3425      .....................123456.789012346789.  */
3426   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3427                          "     \"0123456789012345678901234567890123456789"
3428                          "0123456789012345678901234567890123456789"
3429                          "0123456789012345678901234567890123456789"
3430                          "0123456789\"\n");
3431
3432   lexer_test test (case_, content, NULL);
3433
3434   /* Verify that we get the expected token back.  */
3435   const cpp_token *tok = test.get_token ();
3436   ASSERT_EQ (tok->type, CPP_STRING);
3437
3438   if (!should_have_column_data_p (line_table->highest_location))
3439     return;
3440
3441   /* Verify ranges of individual characters.  */
3442   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3443   for (int i = 0; i < 131; i++)
3444     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3445                           i, 2, 7 + i, 7 + i);
3446 }
3447
3448 /* Test of locations within a raw string that doesn't contain a newline.  */
3449
3450 static void
3451 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3452 {
3453   /* .....................00.0000000111111111122.
3454      .....................12.3456789012345678901.  */
3455   const char *content = ("R\"foo(0123456789)foo\"\n");
3456   lexer_test test (case_, content, NULL);
3457
3458   /* Verify that we get the expected token back.  */
3459   const cpp_token *tok = test.get_token ();
3460   ASSERT_EQ (tok->type, CPP_STRING);
3461
3462   /* Verify that cpp_interpret_string works.  */
3463   cpp_string dst_string;
3464   const enum cpp_ttype type = CPP_STRING;
3465   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3466                                       &dst_string, type);
3467   ASSERT_TRUE (result);
3468   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3469   free (const_cast <unsigned char *> (dst_string.text));
3470
3471   if (!should_have_column_data_p (line_table->highest_location))
3472     return;
3473
3474   /* 0-9, plus the nil terminator.  */
3475   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3476   for (int i = 0; i < 11; i++)
3477     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3478                           i, 1, 7 + i, 7 + i);
3479 }
3480
3481 /* Test of locations within a raw string that contains a newline.  */
3482
3483 static void
3484 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3485 {
3486   /* .....................00.0000.
3487      .....................12.3456.  */
3488   const char *content = ("R\"foo(\n"
3489   /* .....................00000.
3490      .....................12345.  */
3491                          "hello\n"
3492                          "world\n"
3493   /* .....................00000.
3494      .....................12345.  */
3495                          ")foo\"\n");
3496   lexer_test test (case_, content, NULL);
3497
3498   /* Verify that we get the expected token back.  */
3499   const cpp_token *tok = test.get_token ();
3500   ASSERT_EQ (tok->type, CPP_STRING);
3501
3502   /* Verify that cpp_interpret_string works.  */
3503   cpp_string dst_string;
3504   const enum cpp_ttype type = CPP_STRING;
3505   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3506                                       &dst_string, type);
3507   ASSERT_TRUE (result);
3508   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3509   free (const_cast <unsigned char *> (dst_string.text));
3510
3511   if (!should_have_column_data_p (line_table->highest_location))
3512     return;
3513
3514   /* Currently we don't support locations within raw strings that
3515      contain newlines.  */
3516   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3517                                   "range endpoints are on different lines");
3518 }
3519
3520 /* Test of parsing an unterminated raw string.  */
3521
3522 static void
3523 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3524 {
3525   const char *content = "R\"ouch()ouCh\" /* etc */";
3526
3527   lexer_diagnostic_sink diagnostics;
3528   lexer_test test (case_, content, &diagnostics);
3529   test.m_implicitly_expect_EOF = false;
3530
3531   /* Attempt to parse the raw string.  */
3532   const cpp_token *tok = test.get_token ();
3533   ASSERT_EQ (tok->type, CPP_EOF);
3534
3535   ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3536   /* We expect the message "unterminated raw string"
3537      in the "cpplib" translation domain.
3538      It's not clear that dgettext is available on all supported hosts,
3539      so this assertion is commented-out for now.
3540        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3541                      diagnostics.m_diagnostics[0]);
3542   */
3543 }
3544
3545 /* Test of lexing char constants.  */
3546
3547 static void
3548 test_lexer_char_constants (const line_table_case &case_)
3549 {
3550   /* Various char constants.
3551      .....................0000000001111111111.22222222223.
3552      .....................1234567890123456789.01234567890.  */
3553   const char *content = ("         'a'\n"
3554                          "        u'a'\n"
3555                          "        U'a'\n"
3556                          "        L'a'\n"
3557                          "         'abc'\n");
3558   lexer_test test (case_, content, NULL);
3559
3560   /* Verify that we get the expected tokens back.  */
3561   /* 'a'.  */
3562   const cpp_token *tok = test.get_token ();
3563   ASSERT_EQ (tok->type, CPP_CHAR);
3564   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3565
3566   unsigned int chars_seen;
3567   int unsignedp;
3568   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3569                                           &chars_seen, &unsignedp);
3570   ASSERT_EQ (cc, 'a');
3571   ASSERT_EQ (chars_seen, 1);
3572
3573   /* u'a'.  */
3574   tok = test.get_token ();
3575   ASSERT_EQ (tok->type, CPP_CHAR16);
3576   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3577
3578   /* U'a'.  */
3579   tok = test.get_token ();
3580   ASSERT_EQ (tok->type, CPP_CHAR32);
3581   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3582
3583   /* L'a'.  */
3584   tok = test.get_token ();
3585   ASSERT_EQ (tok->type, CPP_WCHAR);
3586   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3587
3588   /* 'abc' (c-char-sequence).  */
3589   tok = test.get_token ();
3590   ASSERT_EQ (tok->type, CPP_CHAR);
3591   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3592 }
3593 /* A table of interesting location_t values, giving one axis of our test
3594    matrix.  */
3595
3596 static const location_t boundary_locations[] = {
3597   /* Zero means "don't override the default values for a new line_table".  */
3598   0,
3599
3600   /* An arbitrary non-zero value that isn't close to one of
3601      the boundary values below.  */
3602   0x10000,
3603
3604   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3605   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3606   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3607   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3608   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3609   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3610
3611   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3612   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3613   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3614   LINE_MAP_MAX_LOCATION_WITH_COLS,
3615   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3616   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3617 };
3618
3619 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3620
3621 void
3622 for_each_line_table_case (void (*testcase) (const line_table_case &))
3623 {
3624   /* As noted above in the description of struct line_table_case,
3625      we want to explore a test matrix of interesting line_table
3626      situations, running various selftests for each case within the
3627      matrix.  */
3628
3629   /* Run all tests with:
3630      (a) line_table->default_range_bits == 0, and
3631      (b) line_table->default_range_bits == 5.  */
3632   int num_cases_tested = 0;
3633   for (int default_range_bits = 0; default_range_bits <= 5;
3634        default_range_bits += 5)
3635     {
3636       /* ...and use each of the "interesting" location values as
3637          the starting location within line_table.  */
3638       const int num_boundary_locations
3639         = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3640       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3641         {
3642           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3643
3644           testcase (c);
3645
3646           num_cases_tested++;
3647         }
3648     }
3649
3650   /* Verify that we fully covered the test matrix.  */
3651   ASSERT_EQ (num_cases_tested, 2 * 12);
3652 }
3653
3654 /* Verify that when presented with a consecutive pair of locations with
3655    a very large line offset, we don't attempt to consolidate them into
3656    a single ordinary linemap where the line offsets within the line map
3657    would lead to overflow (PR lto/88147).  */
3658
3659 static void
3660 test_line_offset_overflow ()
3661 {
3662   line_table_test ltt (line_table_case (5, 0));
3663
3664   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3665   linemap_line_start (line_table, 1, 100);
3666   location_t loc_a = linemap_line_start (line_table, 2578, 255);
3667   assert_loceq ("foo.c", 2578, 0, loc_a);
3668
3669   const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3670   ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3671   ASSERT_EQ (ordmap_a->m_range_bits, 5);
3672
3673   location_t loc_b = linemap_line_start (line_table, 404198, 512);
3674   assert_loceq ("foo.c", 404198, 0, loc_b);
3675
3676   /* We should have started a new linemap, rather than attempting to store
3677      a very large line offset.  */
3678   const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3679   ASSERT_NE (ordmap_a, ordmap_b);
3680 }
3681
3682 void test_cpp_utf8 ()
3683 {
3684   const int def_tabstop = 8;
3685   /* Verify that wcwidth of invalid UTF-8 or control bytes is 1.  */
3686   {
3687     int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, def_tabstop);
3688     ASSERT_EQ (8, w_bad);
3689     int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, def_tabstop);
3690     ASSERT_EQ (5, w_ctrl);
3691   }
3692
3693   /* Verify that wcwidth of valid UTF-8 is as expected.  */
3694   {
3695     const int w_pi = cpp_display_width ("\xcf\x80", 2, def_tabstop);
3696     ASSERT_EQ (1, w_pi);
3697     const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, def_tabstop);
3698     ASSERT_EQ (2, w_emoji);
3699     const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3700                                                         def_tabstop);
3701     ASSERT_EQ (1, w_umlaut_precomposed);
3702     const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3703                                                       def_tabstop);
3704     ASSERT_EQ (1, w_umlaut_combining);
3705     const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, def_tabstop);
3706     ASSERT_EQ (2, w_han);
3707     const int w_ascii = cpp_display_width ("GCC", 3, def_tabstop);
3708     ASSERT_EQ (3, w_ascii);
3709     const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3710                                            "\x9f! \xe4\xb8\xba y\xcc\x88",
3711                                            24, def_tabstop);
3712     ASSERT_EQ (18, w_mixed);
3713   }
3714
3715   /* Verify that display width properly expands tabs.  */
3716   {
3717     const char *tstr = "\tabc\td";
3718     ASSERT_EQ (6, cpp_display_width (tstr, 6, 1));
3719     ASSERT_EQ (10, cpp_display_width (tstr, 6, 3));
3720     ASSERT_EQ (17, cpp_display_width (tstr, 6, 8));
3721     ASSERT_EQ (1, cpp_display_column_to_byte_column (tstr, 6, 7, 8));
3722   }
3723
3724   /* Verify that cpp_byte_column_to_display_column can go past the end,
3725      and similar edge cases.  */
3726   {
3727     const char *str
3728       /* Display columns.
3729          111111112345  */
3730       = "\xcf\x80 abc";
3731       /* 111122223456
3732          Byte columns.  */
3733
3734     ASSERT_EQ (5, cpp_display_width (str, 6, def_tabstop));
3735     ASSERT_EQ (105,
3736                cpp_byte_column_to_display_column (str, 6, 106, def_tabstop));
3737     ASSERT_EQ (10000,
3738                cpp_byte_column_to_display_column (NULL, 0, 10000, def_tabstop));
3739     ASSERT_EQ (0,
3740                cpp_byte_column_to_display_column (NULL, 10000, 0, def_tabstop));
3741   }
3742
3743   /* Verify that cpp_display_column_to_byte_column can go past the end,
3744      and similar edge cases, and check invertibility.  */
3745   {
3746     const char *str
3747       /* Display columns.
3748          000000000000000000000000000000000000011
3749          111111112222222234444444455555555678901  */
3750       = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
3751       /* 000000000000000000000000000000000111111
3752          111122223333444456666777788889999012345
3753          Byte columns.  */
3754     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, def_tabstop));
3755     ASSERT_EQ (15,
3756                cpp_display_column_to_byte_column (str, 15, 11, def_tabstop));
3757     ASSERT_EQ (115,
3758                cpp_display_column_to_byte_column (str, 15, 111, def_tabstop));
3759     ASSERT_EQ (10000,
3760                cpp_display_column_to_byte_column (NULL, 0, 10000, def_tabstop));
3761     ASSERT_EQ (0,
3762                cpp_display_column_to_byte_column (NULL, 10000, 0, def_tabstop));
3763
3764     /* Verify that we do not interrupt a UTF-8 sequence.  */
3765     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, def_tabstop));
3766
3767     for (int byte_col = 1; byte_col <= 15; ++byte_col)
3768       {
3769         const int disp_col
3770           = cpp_byte_column_to_display_column (str, 15, byte_col, def_tabstop);
3771         const int byte_col2
3772           = cpp_display_column_to_byte_column (str, 15, disp_col, def_tabstop);
3773
3774         /* If we ask for the display column in the middle of a UTF-8
3775            sequence, it will return the length of the partial sequence,
3776            matching the behavior of GCC before display column support.
3777            Otherwise check the round trip was successful.  */
3778         if (byte_col < 4)
3779           ASSERT_EQ (byte_col, disp_col);
3780         else if (byte_col >= 6 && byte_col < 9)
3781           ASSERT_EQ (3 + (byte_col - 5), disp_col);
3782         else
3783           ASSERT_EQ (byte_col2, byte_col);
3784       }
3785   }
3786
3787 }
3788
3789 /* Run all of the selftests within this file.  */
3790
3791 void
3792 input_c_tests ()
3793 {
3794   test_linenum_comparisons ();
3795   test_should_have_column_data_p ();
3796   test_unknown_location ();
3797   test_builtins ();
3798   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3799
3800   for_each_line_table_case (test_accessing_ordinary_linemaps);
3801   for_each_line_table_case (test_lexer);
3802   for_each_line_table_case (test_lexer_string_locations_simple);
3803   for_each_line_table_case (test_lexer_string_locations_ebcdic);
3804   for_each_line_table_case (test_lexer_string_locations_hex);
3805   for_each_line_table_case (test_lexer_string_locations_oct);
3806   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3807   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3808   for_each_line_table_case (test_lexer_string_locations_ucn4);
3809   for_each_line_table_case (test_lexer_string_locations_ucn8);
3810   for_each_line_table_case (test_lexer_string_locations_wide_string);
3811   for_each_line_table_case (test_lexer_string_locations_string16);
3812   for_each_line_table_case (test_lexer_string_locations_string32);
3813   for_each_line_table_case (test_lexer_string_locations_u8);
3814   for_each_line_table_case (test_lexer_string_locations_utf8_source);
3815   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3816   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3817   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3818   for_each_line_table_case (test_lexer_string_locations_macro);
3819   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3820   for_each_line_table_case (test_lexer_string_locations_non_string);
3821   for_each_line_table_case (test_lexer_string_locations_long_line);
3822   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3823   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3824   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3825   for_each_line_table_case (test_lexer_char_constants);
3826
3827   test_reading_source_line ();
3828
3829   test_line_offset_overflow ();
3830
3831   test_cpp_utf8 ();
3832 }
3833
3834 } // namespace selftest
3835
3836 #endif /* CHECKING_P */