gcc/input.c

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic-core.h"
  25 #include "selftest.h"
  26 #include "cpplib.h"
  27
  28 /* This is a cache used by get_next_line to store the content of a
  29    file to be searched for file lines.  */
  30 struct fcache
  31 {
  32   /* These are information used to store a line boundary.  */
  33   struct line_info
  34   {
  35     /* The line number.  It starts from 1.  */
  36     size_t line_num;
  37
  38     /* The position (byte count) of the beginning of the line,
  39        relative to the file data pointer.  This starts at zero.  */
  40     size_t start_pos;
  41
  42     /* The position (byte count) of the last byte of the line.  This
  43        normally points to the '\n' character, or to one byte after the
  44        last byte of the file, if the file doesn't contain a '\n'
  45        character.  */
  46     size_t end_pos;
  47
  48     line_info (size_t l, size_t s, size_t e)
  49       : line_num (l), start_pos (s), end_pos (e)
  50     {}
  51
  52     line_info ()
  53       :line_num (0), start_pos (0), end_pos (0)
  54     {}
  55   };
  56
  57   /* The number of time this file has been accessed.  This is used
  58      to designate which file cache to evict from the cache
  59      array.  */
  60   unsigned use_count;
  61
  62   const char *file_path;
  63
  64   FILE *fp;
  65
  66   /* This points to the content of the file that we've read so
  67      far.  */
  68   char *data;
  69
  70   /*  The size of the DATA array above.*/
  71   size_t size;
  72
  73   /* The number of bytes read from the underlying file so far.  This
  74      must be less (or equal) than SIZE above.  */
  75   size_t nb_read;
  76
  77   /* The index of the beginning of the current line.  */
  78   size_t line_start_idx;
  79
  80   /* The number of the previous line read.  This starts at 1.  Zero
  81      means we've read no line so far.  */
  82   size_t line_num;
  83
  84   /* This is the total number of lines of the current file.  At the
  85      moment, we try to get this information from the line map
  86      subsystem.  Note that this is just a hint.  When using the C++
  87      front-end, this hint is correct because the input file is then
  88      completely tokenized before parsing starts; so the line map knows
  89      the number of lines before compilation really starts.  For e.g,
  90      the C front-end, it can happen that we start emitting diagnostics
  91      before the line map has seen the end of the file.  */
  92   size_t total_lines;
  93
  94   /* This is a record of the beginning and end of the lines we've seen
  95      while reading the file.  This is useful to avoid walking the data
  96      from the beginning when we are asked to read a line that is
  97      before LINE_START_IDX above.  Note that the maximum size of this
  98      record is fcache_line_record_size, so that the memory consumption
  99      doesn't explode.  We thus scale total_lines down to
 100      fcache_line_record_size.  */
 101   vec<line_info, va_heap> line_record;
 102
 103   fcache ();
 104   ~fcache ();
 105 };
 106
 107 /* Current position in real source file.  */
 108
 109 location_t input_location = UNKNOWN_LOCATION;
 110
 111 struct line_maps *line_table;
 112
 113 static fcache *fcache_tab;
 114 static const size_t fcache_tab_size = 16;
 115 static const size_t fcache_buffer_size = 4 * 1024;
 116 static const size_t fcache_line_record_size = 100;
 117
 118 /* Expand the source location LOC into a human readable location.  If
 119    LOC resolves to a builtin location, the file name of the readable
 120    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 121    TRUE and LOC is virtual, then it is resolved to the expansion
 122    point of the involved macro.  Otherwise, it is resolved to the
 123    spelling location of the token.
 124
 125    When resolving to the spelling location of the token, if the
 126    resulting location is for a built-in location (that is, it has no
 127    associated line/column) in the context of a macro expansion, the
 128    returned location is the first one (while unwinding the macro
 129    location towards its expansion point) that is in real source
 130    code.  */
 131
 132 static expanded_location
 133 expand_location_1 (source_location loc,
 134                    bool expansion_point_p)
 135 {
 136   expanded_location xloc;
 137   const line_map_ordinary *map;
 138   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 139   tree block = NULL;
 140
 141   if (IS_ADHOC_LOC (loc))
 142     {
 143       block = LOCATION_BLOCK (loc);
 144       loc = LOCATION_LOCUS (loc);
 145     }
 146
 147   memset (&xloc, 0, sizeof (xloc));
 148
 149   if (loc >= RESERVED_LOCATION_COUNT)
 150     {
 151       if (!expansion_point_p)
 152         {
 153           /* We want to resolve LOC to its spelling location.
 154
 155              But if that spelling location is a reserved location that
 156              appears in the context of a macro expansion (like for a
 157              location for a built-in token), let's consider the first
 158              location (toward the expansion point) that is not reserved;
 159              that is, the first location that is in real source code.  */
 160           loc = linemap_unwind_to_first_non_reserved_loc (line_table,
 161                                                           loc, NULL);
 162           lrk = LRK_SPELLING_LOCATION;
 163         }
 164       loc = linemap_resolve_location (line_table, loc,
 165                                       lrk, &map);
 166       xloc = linemap_expand_location (line_table, map, loc);
 167     }
 168
 169   xloc.data = block;
 170   if (loc <= BUILTINS_LOCATION)
 171     xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
 172
 173   return xloc;
 174 }
 175
 176 /* Initialize the set of cache used for files accessed by caret
 177    diagnostic.  */
 178
 179 static void
 180 diagnostic_file_cache_init (void)
 181 {
 182   if (fcache_tab == NULL)
 183     fcache_tab = new fcache[fcache_tab_size];
 184 }
 185
 186 /* Free the resources used by the set of cache used for files accessed
 187    by caret diagnostic.  */
 188
 189 void
 190 diagnostic_file_cache_fini (void)
 191 {
 192   if (fcache_tab)
 193     {
 194       delete [] (fcache_tab);
 195       fcache_tab = NULL;
 196     }
 197 }
 198
 199 /* Return the total lines number that have been read so far by the
 200    line map (in the preprocessor) so far.  For languages like C++ that
 201    entirely preprocess the input file before starting to parse, this
 202    equals the actual number of lines of the file.  */
 203
 204 static size_t
 205 total_lines_num (const char *file_path)
 206 {
 207   size_t r = 0;
 208   source_location l = 0;
 209   if (linemap_get_file_highest_location (line_table, file_path, &l))
 210     {
 211       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 212       expanded_location xloc = expand_location (l);
 213       r = xloc.line;
 214     }
 215   return r;
 216 }
 217
 218 /* Lookup the cache used for the content of a given file accessed by
 219    caret diagnostic.  Return the found cached file, or NULL if no
 220    cached file was found.  */
 221
 222 static fcache*
 223 lookup_file_in_cache_tab (const char *file_path)
 224 {
 225   if (file_path == NULL)
 226     return NULL;
 227
 228   diagnostic_file_cache_init ();
 229
 230   /* This will contain the found cached file.  */
 231   fcache *r = NULL;
 232   for (unsigned i = 0; i < fcache_tab_size; ++i)
 233     {
 234       fcache *c = &fcache_tab[i];
 235       if (c->file_path && !strcmp (c->file_path, file_path))
 236         {
 237           ++c->use_count;
 238           r = c;
 239         }
 240     }
 241
 242   if (r)
 243     ++r->use_count;
 244
 245   return r;
 246 }
 247
 248 /* Return the file cache that has been less used, recently, or the
 249    first empty one.  If HIGHEST_USE_COUNT is non-null,
 250    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 251    in the cache table.  */
 252
 253 static fcache*
 254 evicted_cache_tab_entry (unsigned *highest_use_count)
 255 {
 256   diagnostic_file_cache_init ();
 257
 258   fcache *to_evict = &fcache_tab[0];
 259   unsigned huc = to_evict->use_count;
 260   for (unsigned i = 1; i < fcache_tab_size; ++i)
 261     {
 262       fcache *c = &fcache_tab[i];
 263       bool c_is_empty = (c->file_path == NULL);
 264
 265       if (c->use_count < to_evict->use_count
 266           || (to_evict->file_path && c_is_empty))
 267         /* We evict C because it's either an entry with a lower use
 268            count or one that is empty.  */
 269         to_evict = c;
 270
 271       if (huc < c->use_count)
 272         huc = c->use_count;
 273
 274       if (c_is_empty)
 275         /* We've reached the end of the cache; subsequent elements are
 276            all empty.  */
 277         break;
 278     }
 279
 280   if (highest_use_count)
 281     *highest_use_count = huc;
 282
 283   return to_evict;
 284 }
 285
 286 /* Create the cache used for the content of a given file to be
 287    accessed by caret diagnostic.  This cache is added to an array of
 288    cache and can be retrieved by lookup_file_in_cache_tab.  This
 289    function returns the created cache.  Note that only the last
 290    fcache_tab_size files are cached.  */
 291
 292 static fcache*
 293 add_file_to_cache_tab (const char *file_path)
 294 {
 295
 296   FILE *fp = fopen (file_path, "r");
 297   if (fp == NULL)
 298     return NULL;
 299
 300   unsigned highest_use_count = 0;
 301   fcache *r = evicted_cache_tab_entry (&highest_use_count);
 302   r->file_path = file_path;
 303   if (r->fp)
 304     fclose (r->fp);
 305   r->fp = fp;
 306   r->nb_read = 0;
 307   r->line_start_idx = 0;
 308   r->line_num = 0;
 309   r->line_record.truncate (0);
 310   /* Ensure that this cache entry doesn't get evicted next time
 311      add_file_to_cache_tab is called.  */
 312   r->use_count = ++highest_use_count;
 313   r->total_lines = total_lines_num (file_path);
 314
 315   return r;
 316 }
 317
 318 /* Lookup the cache used for the content of a given file accessed by
 319    caret diagnostic.  If no cached file was found, create a new cache
 320    for this file, add it to the array of cached file and return
 321    it.  */
 322
 323 static fcache*
 324 lookup_or_add_file_to_cache_tab (const char *file_path)
 325 {
 326   fcache *r = lookup_file_in_cache_tab (file_path);
 327   if (r == NULL)
 328     r = add_file_to_cache_tab (file_path);
 329   return r;
 330 }
 331
 332 /* Default constructor for a cache of file used by caret
 333    diagnostic.  */
 334
 335 fcache::fcache ()
 336 : use_count (0), file_path (NULL), fp (NULL), data (0),
 337   size (0), nb_read (0), line_start_idx (0), line_num (0),
 338   total_lines (0)
 339 {
 340   line_record.create (0);
 341 }
 342
 343 /* Destructor for a cache of file used by caret diagnostic.  */
 344
 345 fcache::~fcache ()
 346 {
 347   if (fp)
 348     {
 349       fclose (fp);
 350       fp = NULL;
 351     }
 352   if (data)
 353     {
 354       XDELETEVEC (data);
 355       data = 0;
 356     }
 357   line_record.release ();
 358 }
 359
 360 /* Returns TRUE iff the cache would need to be filled with data coming
 361    from the file.  That is, either the cache is empty or full or the
 362    current line is empty.  Note that if the cache is full, it would
 363    need to be extended and filled again.  */
 364
 365 static bool
 366 needs_read (fcache *c)
 367 {
 368   return (c->nb_read == 0
 369           || c->nb_read == c->size
 370           || (c->line_start_idx >= c->nb_read - 1));
 371 }
 372
 373 /*  Return TRUE iff the cache is full and thus needs to be
 374     extended.  */
 375
 376 static bool
 377 needs_grow (fcache *c)
 378 {
 379   return c->nb_read == c->size;
 380 }
 381
 382 /* Grow the cache if it needs to be extended.  */
 383
 384 static void
 385 maybe_grow (fcache *c)
 386 {
 387   if (!needs_grow (c))
 388     return;
 389
 390   size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
 391   c->data = XRESIZEVEC (char, c->data, size + 1);
 392   c->size = size;
 393 }
 394
 395 /*  Read more data into the cache.  Extends the cache if need be.
 396     Returns TRUE iff new data could be read.  */
 397
 398 static bool
 399 read_data (fcache *c)
 400 {
 401   if (feof (c->fp) || ferror (c->fp))
 402     return false;
 403
 404   maybe_grow (c);
 405
 406   char * from = c->data + c->nb_read;
 407   size_t to_read = c->size - c->nb_read;
 408   size_t nb_read = fread (from, 1, to_read, c->fp);
 409
 410   if (ferror (c->fp))
 411     return false;
 412
 413   c->nb_read += nb_read;
 414   return !!nb_read;
 415 }
 416
 417 /* Read new data iff the cache needs to be filled with more data
 418    coming from the file FP.  Return TRUE iff the cache was filled with
 419    mode data.  */
 420
 421 static bool
 422 maybe_read_data (fcache *c)
 423 {
 424   if (!needs_read (c))
 425     return false;
 426   return read_data (c);
 427 }
 428
 429 /* Read a new line from file FP, using C as a cache for the data
 430    coming from the file.  Upon successful completion, *LINE is set to
 431    the beginning of the line found.  Space for that line has been
 432    allocated in the cache thus *LINE has the same life time as C.
 433    *LINE_LEN is set to the length of the line.  Note that the line
 434    does not contain any terminal delimiter.  This function returns
 435    true if some data was read or process from the cache, false
 436    otherwise.  Note that subsequent calls to get_next_line return the
 437    next lines of the file and might overwrite the content of
 438    *LINE.  */
 439
 440 static bool
 441 get_next_line (fcache *c, char **line, ssize_t *line_len)
 442 {
 443   /* Fill the cache with data to process.  */
 444   maybe_read_data (c);
 445
 446   size_t remaining_size = c->nb_read - c->line_start_idx;
 447   if (remaining_size == 0)
 448     /* There is no more data to process.  */
 449     return false;
 450
 451   char *line_start = c->data + c->line_start_idx;
 452
 453   char *next_line_start = NULL;
 454   size_t len = 0;
 455   char *line_end = (char *) memchr (line_start, '\n', remaining_size);
 456   if (line_end == NULL)
 457     {
 458       /* We haven't found the end-of-line delimiter in the cache.
 459          Fill the cache with more data from the file and look for the
 460          '\n'.  */
 461       while (maybe_read_data (c))
 462         {
 463           line_start = c->data + c->line_start_idx;
 464           remaining_size = c->nb_read - c->line_start_idx;
 465           line_end = (char *) memchr (line_start, '\n', remaining_size);
 466           if (line_end != NULL)
 467             {
 468               next_line_start = line_end + 1;
 469               break;
 470             }
 471         }
 472       if (line_end == NULL)
 473         /* We've loadded all the file into the cache and still no
 474            '\n'.  Let's say the line ends up at one byte passed the
 475            end of the file.  This is to stay consistent with the case
 476            of when the line ends up with a '\n' and line_end points to
 477            that terminal '\n'.  That consistency is useful below in
 478            the len calculation.  */
 479         line_end = c->data + c->nb_read ;
 480     }
 481   else
 482     next_line_start = line_end + 1;
 483
 484   if (ferror (c->fp))
 485     return -1;
 486
 487   /* At this point, we've found the end of the of line.  It either
 488      points to the '\n' or to one byte after the last byte of the
 489      file.  */
 490   gcc_assert (line_end != NULL);
 491
 492   len = line_end - line_start;
 493
 494   if (c->line_start_idx < c->nb_read)
 495     *line = line_start;
 496
 497   ++c->line_num;
 498
 499   /* Before we update our line record, make sure the hint about the
 500      total number of lines of the file is correct.  If it's not, then
 501      we give up recording line boundaries from now on.  */
 502   bool update_line_record = true;
 503   if (c->line_num > c->total_lines)
 504     update_line_record = false;
 505
 506     /* Now update our line record so that re-reading lines from the
 507      before c->line_start_idx is faster.  */
 508   if (update_line_record
 509       && c->line_record.length () < fcache_line_record_size)
 510     {
 511       /* If the file lines fits in the line record, we just record all
 512          its lines ...*/
 513       if (c->total_lines <= fcache_line_record_size
 514           && c->line_num > c->line_record.length ())
 515         c->line_record.safe_push (fcache::line_info (c->line_num,
 516                                                  c->line_start_idx,
 517                                                  line_end - c->data));
 518       else if (c->total_lines > fcache_line_record_size)
 519         {
 520           /* ... otherwise, we just scale total_lines down to
 521              (fcache_line_record_size lines.  */
 522           size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
 523           if (c->line_record.length () == 0
 524               || n >= c->line_record.length ())
 525             c->line_record.safe_push (fcache::line_info (c->line_num,
 526                                                      c->line_start_idx,
 527                                                      line_end - c->data));
 528         }
 529     }
 530
 531   /* Update c->line_start_idx so that it points to the next line to be
 532      read.  */
 533   if (next_line_start)
 534     c->line_start_idx = next_line_start - c->data;
 535   else
 536     /* We didn't find any terminal '\n'.  Let's consider that the end
 537        of line is the end of the data in the cache.  The next
 538        invocation of get_next_line will either read more data from the
 539        underlying file or return false early because we've reached the
 540        end of the file.  */
 541     c->line_start_idx = c->nb_read;
 542
 543   *line_len = len;
 544
 545   return true;
 546 }
 547
 548 /* Reads the next line from FILE into *LINE.  If *LINE is too small
 549    (or NULL) it is allocated (or extended) to have enough space to
 550    containe the line.  *LINE_LENGTH must contain the size of the
 551    initial*LINE buffer.  It's then updated by this function to the
 552    actual length of the returned line.  Note that the returned line
 553    can contain several zero bytes.  Also note that the returned string
 554    is allocated in static storage that is going to be re-used by
 555    subsequent invocations of read_line.  */
 556
 557 static bool
 558 read_next_line (fcache *cache, char ** line, ssize_t *line_len)
 559 {
 560   char *l = NULL;
 561   ssize_t len = 0;
 562
 563   if (!get_next_line (cache, &l, &len))
 564     return false;
 565
 566   if (*line == NULL)
 567     *line = XNEWVEC (char, len);
 568   else
 569     if (*line_len < len)
 570         *line = XRESIZEVEC (char, *line, len);
 571
 572   memcpy (*line, l, len);
 573   *line_len = len;
 574
 575   return true;
 576 }
 577
 578 /* Consume the next bytes coming from the cache (or from its
 579    underlying file if there are remaining unread bytes in the file)
 580    until we reach the next end-of-line (or end-of-file).  There is no
 581    copying from the cache involved.  Return TRUE upon successful
 582    completion.  */
 583
 584 static bool
 585 goto_next_line (fcache *cache)
 586 {
 587   char *l;
 588   ssize_t len;
 589
 590   return get_next_line (cache, &l, &len);
 591 }
 592
 593 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 594    The line is copied into *LINE.  *LINE_LEN must have been set to the
 595    length of *LINE.  If *LINE is too small (or NULL) it's extended (or
 596    allocated) and *LINE_LEN is adjusted accordingly.  *LINE ends up
 597    with a terminal zero byte and can contain additional zero bytes.
 598    This function returns bool if a line was read.  */
 599
 600 static bool
 601 read_line_num (fcache *c, size_t line_num,
 602                char ** line, ssize_t *line_len)
 603 {
 604   gcc_assert (line_num > 0);
 605
 606   if (line_num <= c->line_num)
 607     {
 608       /* We've been asked to read lines that are before c->line_num.
 609          So lets use our line record (if it's not empty) to try to
 610          avoid re-reading the file from the beginning again.  */
 611
 612       if (c->line_record.is_empty ())
 613         {
 614           c->line_start_idx = 0;
 615           c->line_num = 0;
 616         }
 617       else
 618         {
 619           fcache::line_info *i = NULL;
 620           if (c->total_lines <= fcache_line_record_size)
 621             {
 622               /* In languages where the input file is not totally
 623                  preprocessed up front, the c->total_lines hint
 624                  can be smaller than the number of lines of the
 625                  file.  In that case, only the first
 626                  c->total_lines have been recorded.
 627
 628                  Otherwise, the first c->total_lines we've read have
 629                  their start/end recorded here.  */
 630               i = (line_num <= c->total_lines)
 631                 ? &c->line_record[line_num - 1]
 632                 : &c->line_record[c->total_lines - 1];
 633               gcc_assert (i->line_num <= line_num);
 634             }
 635           else
 636             {
 637               /*  So the file had more lines than our line record
 638                   size.  Thus the number of lines we've recorded has
 639                   been scaled down to fcache_line_reacord_size.  Let's
 640                   pick the start/end of the recorded line that is
 641                   closest to line_num.  */
 642               size_t n = (line_num <= c->total_lines)
 643                 ? line_num * fcache_line_record_size / c->total_lines
 644                 : c ->line_record.length () - 1;
 645               if (n < c->line_record.length ())
 646                 {
 647                   i = &c->line_record[n];
 648                   gcc_assert (i->line_num <= line_num);
 649                 }
 650             }
 651
 652           if (i && i->line_num == line_num)
 653             {
 654               /* We have the start/end of the line.  Let's just copy
 655                  it again and we are done.  */
 656               ssize_t len = i->end_pos - i->start_pos + 1;
 657               if (*line_len < len)
 658                 *line = XRESIZEVEC (char, *line, len);
 659               memmove (*line, c->data + i->start_pos, len);
 660               (*line)[len - 1] = '\0';
 661               *line_len = --len;
 662               return true;
 663             }
 664
 665           if (i)
 666             {
 667               c->line_start_idx = i->start_pos;
 668               c->line_num = i->line_num - 1;
 669             }
 670           else
 671             {
 672               c->line_start_idx = 0;
 673               c->line_num = 0;
 674             }
 675         }
 676     }
 677
 678   /*  Let's walk from line c->line_num up to line_num - 1, without
 679       copying any line.  */
 680   while (c->line_num < line_num - 1)
 681     if (!goto_next_line (c))
 682       return false;
 683
 684   /* The line we want is the next one.  Let's read and copy it back to
 685      the caller.  */
 686   return read_next_line (c, line, line_len);
 687 }
 688
 689 /* Return the physical source line that corresponds to FILE_PATH/LINE in a
 690    buffer that is statically allocated.  The newline is replaced by
 691    the null character.  Note that the line can contain several null
 692    characters, so LINE_LEN, if non-null, points to the actual length
 693    of the line.  */
 694
 695 const char *
 696 location_get_source_line (const char *file_path, int line,
 697                           int *line_len)
 698 {
 699   static char *buffer;
 700   static ssize_t len;
 701
 702   if (line == 0)
 703     return NULL;
 704
 705   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 706   if (c == NULL)
 707     return NULL;
 708
 709   bool read = read_line_num (c, line, &buffer, &len);
 710
 711   if (read && line_len)
 712     *line_len = len;
 713
 714   return read ? buffer : NULL;
 715 }
 716
 717 /* Test if the location originates from the spelling location of a
 718    builtin-tokens.  That is, return TRUE if LOC is a (possibly
 719    virtual) location of a built-in token that appears in the expansion
 720    list of a macro.  Please note that this function also works on
 721    tokens that result from built-in tokens.  For instance, the
 722    function would return true if passed a token "4" that is the result
 723    of the expansion of the built-in __LINE__ macro.  */
 724 bool
 725 is_location_from_builtin_token (source_location loc)
 726 {
 727   const line_map_ordinary *map = NULL;
 728   loc = linemap_resolve_location (line_table, loc,
 729                                   LRK_SPELLING_LOCATION, &map);
 730   return loc == BUILTINS_LOCATION;
 731 }
 732
 733 /* Expand the source location LOC into a human readable location.  If
 734    LOC is virtual, it resolves to the expansion point of the involved
 735    macro.  If LOC resolves to a builtin location, the file name of the
 736    readable location is set to the string "<built-in>".  */
 737
 738 expanded_location
 739 expand_location (source_location loc)
 740 {
 741   return expand_location_1 (loc, /*expansion_point_p=*/true);
 742 }
 743
 744 /* Expand the source location LOC into a human readable location.  If
 745    LOC is virtual, it resolves to the expansion location of the
 746    relevant macro.  If LOC resolves to a builtin location, the file
 747    name of the readable location is set to the string
 748    "<built-in>".  */
 749
 750 expanded_location
 751 expand_location_to_spelling_point (source_location loc)
 752 {
 753   return expand_location_1 (loc, /*expansion_point_p=*/false);
 754 }
 755
 756 /* The rich_location class within libcpp requires a way to expand
 757    source_location instances, and relies on the client code
 758    providing a symbol named
 759      linemap_client_expand_location_to_spelling_point
 760    to do this.
 761
 762    This is the implementation for libcommon.a (all host binaries),
 763    which simply calls into expand_location_to_spelling_point.  */
 764
 765 expanded_location
 766 linemap_client_expand_location_to_spelling_point (source_location loc)
 767 {
 768   return expand_location_to_spelling_point (loc);
 769 }
 770
 771
 772 /* If LOCATION is in a system header and if it is a virtual location for
 773    a token coming from the expansion of a macro, unwind it to the
 774    location of the expansion point of the macro.  Otherwise, just return
 775    LOCATION.
 776
 777    This is used for instance when we want to emit diagnostics about a
 778    token that may be located in a macro that is itself defined in a
 779    system header, for example, for the NULL macro.  In such a case, if
 780    LOCATION were passed directly to diagnostic functions such as
 781    warning_at, the diagnostic would be suppressed (unless
 782    -Wsystem-headers).  */
 783
 784 source_location
 785 expansion_point_location_if_in_system_header (source_location location)
 786 {
 787   if (in_system_header_at (location))
 788     location = linemap_resolve_location (line_table, location,
 789                                          LRK_MACRO_EXPANSION_POINT,
 790                                          NULL);
 791   return location;
 792 }
 793
 794 /* If LOCATION is a virtual location for a token coming from the expansion
 795    of a macro, unwind to the location of the expansion point of the macro.  */
 796
 797 source_location
 798 expansion_point_location (source_location location)
 799 {
 800   return linemap_resolve_location (line_table, location,
 801                                    LRK_MACRO_EXPANSION_POINT, NULL);
 802 }
 803
 804 /* Given location LOC, strip away any packed range information
 805    or ad-hoc information.  */
 806
 807 location_t
 808 get_pure_location (location_t loc)
 809 {
 810   if (IS_ADHOC_LOC (loc))
 811     loc
 812       = line_table->location_adhoc_data_map.data[loc & MAX_SOURCE_LOCATION].locus;
 813
 814   if (loc >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
 815     return loc;
 816
 817   if (loc < RESERVED_LOCATION_COUNT)
 818     return loc;
 819
 820   const line_map *map = linemap_lookup (line_table, loc);
 821   const line_map_ordinary *ordmap = linemap_check_ordinary (map);
 822
 823   return loc & ~((1 << ordmap->m_range_bits) - 1);
 824 }
 825
 826 /* Construct a location with caret at CARET, ranging from START to
 827    finish e.g.
 828
 829                  11111111112
 830         12345678901234567890
 831      522
 832      523   return foo + bar;
 833                   ~~~~^~~~~
 834      524
 835
 836    The location's caret is at the "+", line 523 column 15, but starts
 837    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
 838    of "bar" at column 19.  */
 839
 840 location_t
 841 make_location (location_t caret, location_t start, location_t finish)
 842 {
 843   location_t pure_loc = get_pure_location (caret);
 844   source_range src_range;
 845   src_range.m_start = start;
 846   src_range.m_finish = finish;
 847   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
 848                                                    pure_loc,
 849                                                    src_range,
 850                                                    NULL);
 851   return combined_loc;
 852 }
 853
 854 #define ONE_K 1024
 855 #define ONE_M (ONE_K * ONE_K)
 856
 857 /* Display a number as an integer multiple of either:
 858    - 1024, if said integer is >= to 10 K (in base 2)
 859    - 1024 * 1024, if said integer is >= 10 M in (base 2)
 860  */
 861 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
 862                   ? (x) \
 863                   : ((x) < 10 * ONE_M \
 864                      ? (x) / ONE_K \
 865                      : (x) / ONE_M)))
 866
 867 /* For a given integer, display either:
 868    - the character 'k', if the number is higher than 10 K (in base 2)
 869      but strictly lower than 10 M (in base 2)
 870    - the character 'M' if the number is higher than 10 M (in base2)
 871    - the charcter ' ' if the number is strictly lower  than 10 K  */
 872 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
 873
 874 /* Display an integer amount as multiple of 1K or 1M (in base 2).
 875    Display the correct unit (either k, M, or ' ') after the amout, as
 876    well.  */
 877 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
 878
 879 /* Dump statistics to stderr about the memory usage of the line_table
 880    set of line maps.  This also displays some statistics about macro
 881    expansion.  */
 882
 883 void
 884 dump_line_table_statistics (void)
 885 {
 886   struct linemap_stats s;
 887   long total_used_map_size,
 888     macro_maps_size,
 889     total_allocated_map_size;
 890
 891   memset (&s, 0, sizeof (s));
 892
 893   linemap_get_statistics (line_table, &s);
 894
 895   macro_maps_size = s.macro_maps_used_size
 896     + s.macro_maps_locations_size;
 897
 898   total_allocated_map_size = s.ordinary_maps_allocated_size
 899     + s.macro_maps_allocated_size
 900     + s.macro_maps_locations_size;
 901
 902   total_used_map_size = s.ordinary_maps_used_size
 903     + s.macro_maps_used_size
 904     + s.macro_maps_locations_size;
 905
 906   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
 907            s.num_expanded_macros);
 908   if (s.num_expanded_macros != 0)
 909     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
 910              s.num_macro_tokens / s.num_expanded_macros);
 911   fprintf (stderr,
 912            "\nLine Table allocations during the "
 913            "compilation process\n");
 914   fprintf (stderr, "Number of ordinary maps used:        %5ld%c\n",
 915            SCALE (s.num_ordinary_maps_used),
 916            STAT_LABEL (s.num_ordinary_maps_used));
 917   fprintf (stderr, "Ordinary map used size:              %5ld%c\n",
 918            SCALE (s.ordinary_maps_used_size),
 919            STAT_LABEL (s.ordinary_maps_used_size));
 920   fprintf (stderr, "Number of ordinary maps allocated:   %5ld%c\n",
 921            SCALE (s.num_ordinary_maps_allocated),
 922            STAT_LABEL (s.num_ordinary_maps_allocated));
 923   fprintf (stderr, "Ordinary maps allocated size:        %5ld%c\n",
 924            SCALE (s.ordinary_maps_allocated_size),
 925            STAT_LABEL (s.ordinary_maps_allocated_size));
 926   fprintf (stderr, "Number of macro maps used:           %5ld%c\n",
 927            SCALE (s.num_macro_maps_used),
 928            STAT_LABEL (s.num_macro_maps_used));
 929   fprintf (stderr, "Macro maps used size:                %5ld%c\n",
 930            SCALE (s.macro_maps_used_size),
 931            STAT_LABEL (s.macro_maps_used_size));
 932   fprintf (stderr, "Macro maps locations size:           %5ld%c\n",
 933            SCALE (s.macro_maps_locations_size),
 934            STAT_LABEL (s.macro_maps_locations_size));
 935   fprintf (stderr, "Macro maps size:                     %5ld%c\n",
 936            SCALE (macro_maps_size),
 937            STAT_LABEL (macro_maps_size));
 938   fprintf (stderr, "Duplicated maps locations size:      %5ld%c\n",
 939            SCALE (s.duplicated_macro_maps_locations_size),
 940            STAT_LABEL (s.duplicated_macro_maps_locations_size));
 941   fprintf (stderr, "Total allocated maps size:           %5ld%c\n",
 942            SCALE (total_allocated_map_size),
 943            STAT_LABEL (total_allocated_map_size));
 944   fprintf (stderr, "Total used maps size:                %5ld%c\n",
 945            SCALE (total_used_map_size),
 946            STAT_LABEL (total_used_map_size));
 947   fprintf (stderr, "Ad-hoc table size:                   %5ld%c\n",
 948            SCALE (s.adhoc_table_size),
 949            STAT_LABEL (s.adhoc_table_size));
 950   fprintf (stderr, "Ad-hoc table entries used:           %5ld\n",
 951            s.adhoc_table_entries_used);
 952   fprintf (stderr, "optimized_ranges: %i\n",
 953            line_table->num_optimized_ranges);
 954   fprintf (stderr, "unoptimized_ranges: %i\n",
 955            line_table->num_unoptimized_ranges);
 956
 957   fprintf (stderr, "\n");
 958 }
 959
 960 /* Get location one beyond the final location in ordinary map IDX.  */
 961
 962 static source_location
 963 get_end_location (struct line_maps *set, unsigned int idx)
 964 {
 965   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
 966     return set->highest_location;
 967
 968   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
 969   return MAP_START_LOCATION (next_map);
 970 }
 971
 972 /* Helper function for write_digit_row.  */
 973
 974 static void
 975 write_digit (FILE *stream, int digit)
 976 {
 977   fputc ('0' + (digit % 10), stream);
 978 }
 979
 980 /* Helper function for dump_location_info.
 981    Write a row of numbers to STREAM, numbering a source line,
 982    giving the units, tens, hundreds etc of the column number.  */
 983
 984 static void
 985 write_digit_row (FILE *stream, int indent,
 986                  const line_map_ordinary *map,
 987                  source_location loc, int max_col, int divisor)
 988 {
 989   fprintf (stream, "%*c", indent, ' ');
 990   fprintf (stream, "|");
 991   for (int column = 1; column < max_col; column++)
 992     {
 993       source_location column_loc = loc + (column << map->m_range_bits);
 994       write_digit (stream, column_loc / divisor);
 995     }
 996   fprintf (stream, "\n");
 997 }
 998
 999 /* Write a half-closed (START) / half-open (END) interval of
1000    source_location to STREAM.  */
1001
1002 static void
1003 dump_location_range (FILE *stream,
1004                      source_location start, source_location end)
1005 {
1006   fprintf (stream,
1007            "  source_location interval: %u <= loc < %u\n",
1008            start, end);
1009 }
1010
1011 /* Write a labelled description of a half-closed (START) / half-open (END)
1012    interval of source_location to STREAM.  */
1013
1014 static void
1015 dump_labelled_location_range (FILE *stream,
1016                               const char *name,
1017                               source_location start, source_location end)
1018 {
1019   fprintf (stream, "%s\n", name);
1020   dump_location_range (stream, start, end);
1021   fprintf (stream, "\n");
1022 }
1023
1024 /* Write a visualization of the locations in the line_table to STREAM.  */
1025
1026 void
1027 dump_location_info (FILE *stream)
1028 {
1029   /* Visualize the reserved locations.  */
1030   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1031                                 0, RESERVED_LOCATION_COUNT);
1032
1033   /* Visualize the ordinary line_map instances, rendering the sources. */
1034   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1035     {
1036       source_location end_location = get_end_location (line_table, idx);
1037       /* half-closed: doesn't include this one. */
1038
1039       const line_map_ordinary *map
1040         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1041       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1042       dump_location_range (stream,
1043                            MAP_START_LOCATION (map), end_location);
1044       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1045       fprintf (stream, "  starting at line: %i\n",
1046                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1047       fprintf (stream, "  column and range bits: %i\n",
1048                map->m_column_and_range_bits);
1049       fprintf (stream, "  column bits: %i\n",
1050                map->m_column_and_range_bits - map->m_range_bits);
1051       fprintf (stream, "  range bits: %i\n",
1052                map->m_range_bits);
1053
1054       /* Render the span of source lines that this "map" covers.  */
1055       for (source_location loc = MAP_START_LOCATION (map);
1056            loc < end_location;
1057            loc += (1 << map->m_range_bits) )
1058         {
1059           gcc_assert (pure_location_p (line_table, loc) );
1060
1061           expanded_location exploc
1062             = linemap_expand_location (line_table, map, loc);
1063
1064           if (0 == exploc.column)
1065             {
1066               /* Beginning of a new source line: draw the line.  */
1067
1068               int line_size;
1069               const char *line_text = location_get_source_line (exploc.file,
1070                                                                 exploc.line,
1071                                                                 &line_size);
1072               if (!line_text)
1073                 break;
1074               fprintf (stream,
1075                        "%s:%3i|loc:%5i|%.*s\n",
1076                        exploc.file, exploc.line,
1077                        loc,
1078                        line_size, line_text);
1079
1080               /* "loc" is at column 0, which means "the whole line".
1081                  Render the locations *within* the line, by underlining
1082                  it, showing the source_location numeric values
1083                  at each column.  */
1084               int max_col = (1 << map->m_column_and_range_bits) - 1;
1085               if (max_col > line_size)
1086                 max_col = line_size + 1;
1087
1088               int indent = 14 + strlen (exploc.file);
1089
1090               /* Thousands.  */
1091               if (end_location > 999)
1092                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1093
1094               /* Hundreds.  */
1095               if (end_location > 99)
1096                 write_digit_row (stream, indent, map, loc, max_col, 100);
1097
1098               /* Tens.  */
1099               write_digit_row (stream, indent, map, loc, max_col, 10);
1100
1101               /* Units.  */
1102               write_digit_row (stream, indent, map, loc, max_col, 1);
1103             }
1104         }
1105       fprintf (stream, "\n");
1106     }
1107
1108   /* Visualize unallocated values.  */
1109   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1110                                 line_table->highest_location,
1111                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1112
1113   /* Visualize the macro line_map instances, rendering the sources. */
1114   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1115     {
1116       /* Each macro map that is allocated owns source_location values
1117          that are *lower* that the one before them.
1118          Hence it's meaningful to view them either in order of ascending
1119          source locations, or in order of ascending macro map index.  */
1120       const bool ascending_source_locations = true;
1121       unsigned int idx = (ascending_source_locations
1122                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1123                           : i);
1124       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1125       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1126                idx,
1127                linemap_map_get_macro_name (map),
1128                MACRO_MAP_NUM_MACRO_TOKENS (map));
1129       dump_location_range (stream,
1130                            map->start_location,
1131                            (map->start_location
1132                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1133       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1134               "expansion point is location %i",
1135               MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1136       fprintf (stream, "  map->start_location: %u\n",
1137                map->start_location);
1138
1139       fprintf (stream, "  macro_locations:\n");
1140       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1141         {
1142           source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1143           source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1144
1145           /* linemap_add_macro_token encodes token numbers in an expansion
1146              by putting them after MAP_START_LOCATION. */
1147
1148           /* I'm typically seeing 4 uninitialized entries at the end of
1149              0xafafafaf.
1150              This appears to be due to macro.c:replace_args
1151              adding 2 extra args for padding tokens; presumably there may
1152              be a leading and/or trailing padding token injected,
1153              each for 2 more location slots.
1154              This would explain there being up to 4 source_locations slots
1155              that may be uninitialized.  */
1156
1157           fprintf (stream, "    %u: %u, %u\n",
1158                    i,
1159                    x,
1160                    y);
1161           if (x == y)
1162             {
1163               if (x < MAP_START_LOCATION (map))
1164                 inform (x, "token %u has x-location == y-location == %u", i, x);
1165               else
1166                 fprintf (stream,
1167                          "x-location == y-location == %u encodes token # %u\n",
1168                          x, x - MAP_START_LOCATION (map));
1169                 }
1170           else
1171             {
1172               inform (x, "token %u has x-location == %u", i, x);
1173               inform (x, "token %u has y-location == %u", i, y);
1174             }
1175         }
1176       fprintf (stream, "\n");
1177     }
1178
1179   /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1180      macro map, presumably due to an off-by-one error somewhere
1181      between the logic in linemap_enter_macro and
1182      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1183   dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1184                                 MAX_SOURCE_LOCATION,
1185                                 MAX_SOURCE_LOCATION + 1);
1186
1187   /* Visualize ad-hoc values.  */
1188   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1189                                 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1190 }
1191
1192 /* string_concat's constructor.  */
1193
1194 string_concat::string_concat (int num, location_t *locs)
1195   : m_num (num)
1196 {
1197   m_locs = ggc_vec_alloc <location_t> (num);
1198   for (int i = 0; i < num; i++)
1199     m_locs[i] = locs[i];
1200 }
1201
1202 /* string_concat_db's constructor.  */
1203
1204 string_concat_db::string_concat_db ()
1205 {
1206   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1207 }
1208
1209 /* Record that a string concatenation occurred, covering NUM
1210    string literal tokens.  LOCS is an array of size NUM, containing the
1211    locations of the tokens.  A copy of LOCS is taken.  */
1212
1213 void
1214 string_concat_db::record_string_concatenation (int num, location_t *locs)
1215 {
1216   gcc_assert (num > 1);
1217   gcc_assert (locs);
1218
1219   location_t key_loc = get_key_loc (locs[0]);
1220
1221   string_concat *concat
1222     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1223   m_table->put (key_loc, concat);
1224 }
1225
1226 /* Determine if LOC was the location of the the initial token of a
1227    concatenation of string literal tokens.
1228    If so, *OUT_NUM is written to with the number of tokens, and
1229    *OUT_LOCS with the location of an array of locations of the
1230    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1231    storage owned by the string_concat_db.
1232    Otherwise, return false.  */
1233
1234 bool
1235 string_concat_db::get_string_concatenation (location_t loc,
1236                                             int *out_num,
1237                                             location_t **out_locs)
1238 {
1239   gcc_assert (out_num);
1240   gcc_assert (out_locs);
1241
1242   location_t key_loc = get_key_loc (loc);
1243
1244   string_concat **concat = m_table->get (key_loc);
1245   if (!concat)
1246     return false;
1247
1248   *out_num = (*concat)->m_num;
1249   *out_locs =(*concat)->m_locs;
1250   return true;
1251 }
1252
1253 /* Internal function.  Canonicalize LOC into a form suitable for
1254    use as a key within the database, stripping away macro expansion,
1255    ad-hoc information, and range information, using the location of
1256    the start of LOC within an ordinary linemap.  */
1257
1258 location_t
1259 string_concat_db::get_key_loc (location_t loc)
1260 {
1261   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1262                                   NULL);
1263
1264   loc = get_range_from_loc (line_table, loc).m_start;
1265
1266   return loc;
1267 }
1268
1269 /* Helper class for use within get_substring_ranges_for_loc.
1270    An vec of cpp_string with responsibility for releasing all of the
1271    str->text for each str in the vector.  */
1272
1273 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1274 {
1275  public:
1276   auto_cpp_string_vec (int alloc)
1277     : auto_vec <cpp_string> (alloc) {}
1278
1279   ~auto_cpp_string_vec ()
1280   {
1281     /* Clean up the copies within this vec.  */
1282     int i;
1283     cpp_string *str;
1284     FOR_EACH_VEC_ELT (*this, i, str)
1285       free (const_cast <unsigned char *> (str->text));
1286   }
1287 };
1288
1289 /* Attempt to populate RANGES with source location information on the
1290    individual characters within the string literal found at STRLOC.
1291    If CONCATS is non-NULL, then any string literals that the token at
1292    STRLOC  was concatenated with are also added to RANGES.
1293
1294    Return NULL if successful, or an error message if any errors occurred (in
1295    which case RANGES may be only partially populated and should not
1296    be used).
1297
1298    This is implemented by re-parsing the relevant source line(s).  */
1299
1300 static const char *
1301 get_substring_ranges_for_loc (cpp_reader *pfile,
1302                               string_concat_db *concats,
1303                               location_t strloc,
1304                               enum cpp_ttype type,
1305                               cpp_substring_ranges &ranges)
1306 {
1307   gcc_assert (pfile);
1308
1309   if (strloc == UNKNOWN_LOCATION)
1310     return "unknown location";
1311
1312   /* If string concatenation has occurred at STRLOC, get the locations
1313      of all of the literal tokens making up the compound string.
1314      Otherwise, just use STRLOC.  */
1315   int num_locs = 1;
1316   location_t *strlocs = &strloc;
1317   if (concats)
1318     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1319
1320   auto_cpp_string_vec strs (num_locs);
1321   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1322   for (int i = 0; i < num_locs; i++)
1323     {
1324       /* Get range of strloc.  We will use it to locate the start and finish
1325          of the literal token within the line.  */
1326       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1327
1328       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1329         /* If the string is within a macro expansion, we can't get at the
1330            end location.  */
1331         return "macro expansion";
1332
1333       if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1334         /* If so, we can't reliably determine where the token started within
1335            its line.  */
1336         return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1337
1338       if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1339         /* If so, we can't reliably determine where the token finished within
1340            its line.  */
1341         return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1342
1343       expanded_location start
1344         = expand_location_to_spelling_point (src_range.m_start);
1345       expanded_location finish
1346         = expand_location_to_spelling_point (src_range.m_finish);
1347       if (start.file != finish.file)
1348         return "range endpoints are in different files";
1349       if (start.line != finish.line)
1350         return "range endpoints are on different lines";
1351       if (start.column > finish.column)
1352         return "range endpoints are reversed";
1353
1354       int line_width;
1355       const char *line = location_get_source_line (start.file, start.line,
1356                                                    &line_width);
1357       if (line == NULL)
1358         return "unable to read source line";
1359
1360       /* Determine the location of the literal (including quotes
1361          and leading prefix chars, such as the 'u' in a u""
1362          token).  */
1363       const char *literal = line + start.column - 1;
1364       int literal_length = finish.column - start.column + 1;
1365
1366       gcc_assert (line_width >= (start.column - 1 + literal_length));
1367       cpp_string from;
1368       from.len = literal_length;
1369       /* Make a copy of the literal, to avoid having to rely on
1370          the lifetime of the copy of the line within the cache.
1371          This will be released by the auto_cpp_string_vec dtor.  */
1372       from.text = XDUPVEC (unsigned char, literal, literal_length);
1373       strs.safe_push (from);
1374
1375       /* For very long lines, a new linemap could have started
1376          halfway through the token.
1377          Ensure that the loc_reader uses the linemap of the
1378          *end* of the token for its start location.  */
1379       const line_map_ordinary *final_ord_map;
1380       linemap_resolve_location (line_table, src_range.m_finish,
1381                                 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1382       location_t start_loc
1383         = linemap_position_for_line_and_column (line_table, final_ord_map,
1384                                                 start.line, start.column);
1385
1386       cpp_string_location_reader loc_reader (start_loc, line_table);
1387       loc_readers.safe_push (loc_reader);
1388     }
1389
1390   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1391   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1392                                                  loc_readers.address (),
1393                                                  num_locs, &ranges, type);
1394   if (err)
1395     return err;
1396
1397   /* Success: "ranges" should now contain information on the string.  */
1398   return NULL;
1399 }
1400
1401 /* Attempt to populate *OUT_RANGE with source location information on the
1402    range of given characters within the string literal found at STRLOC.
1403    START_IDX and END_IDX refer to offsets within the execution character
1404    set.
1405    If CONCATS is non-NULL, then any string literals that the token at
1406    STRLOC was concatenated with are also considered.
1407
1408    This is implemented by re-parsing the relevant source line(s).
1409
1410    Return NULL if successful, or an error message if any errors occurred.
1411    Error messages are intended for GCC developers (to help debugging) rather
1412    than for end-users.  */
1413
1414 const char *
1415 get_source_range_for_substring (cpp_reader *pfile,
1416                                 string_concat_db *concats,
1417                                 location_t strloc,
1418                                 enum cpp_ttype type,
1419                                 int start_idx, int end_idx,
1420                                 source_range *out_range)
1421 {
1422   gcc_checking_assert (start_idx >= 0);
1423   gcc_checking_assert (end_idx >= 0);
1424   gcc_assert (out_range);
1425
1426   cpp_substring_ranges ranges;
1427   const char *err
1428     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1429   if (err)
1430     return err;
1431
1432   if (start_idx >= ranges.get_num_ranges ())
1433     return "start_idx out of range";
1434   if (end_idx >= ranges.get_num_ranges ())
1435     return "end_idx out of range";
1436
1437   out_range->m_start = ranges.get_range (start_idx).m_start;
1438   out_range->m_finish = ranges.get_range (end_idx).m_finish;
1439   return NULL;
1440 }
1441
1442 /* As get_source_range_for_substring, but write to *OUT the number
1443    of ranges that are available.  */
1444
1445 const char *
1446 get_num_source_ranges_for_substring (cpp_reader *pfile,
1447                                      string_concat_db *concats,
1448                                      location_t strloc,
1449                                      enum cpp_ttype type,
1450                                      int *out)
1451 {
1452   gcc_assert (out);
1453
1454   cpp_substring_ranges ranges;
1455   const char *err
1456     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1457
1458   if (err)
1459     return err;
1460
1461   *out = ranges.get_num_ranges ();
1462   return NULL;
1463 }
1464
1465 #if CHECKING_P
1466
1467 namespace selftest {
1468
1469 /* Selftests of location handling.  */
1470
1471 /* A class for writing out a temporary sourcefile for use in selftests
1472    of input handling.  */
1473
1474 class temp_source_file
1475 {
1476  public:
1477   temp_source_file (const location &loc, const char *suffix,
1478                     const char *content);
1479   ~temp_source_file ();
1480
1481   const char *get_filename () const { return m_filename; }
1482
1483  private:
1484   char *m_filename;
1485 };
1486
1487 /* Constructor.  Create a tempfile using SUFFIX, and write CONTENT to
1488    it.  Abort if anything goes wrong, using LOC as the effective
1489    location in the problem report.  */
1490
1491 temp_source_file::temp_source_file (const location &loc, const char *suffix,
1492                                     const char *content)
1493 {
1494   m_filename = make_temp_file (suffix);
1495   ASSERT_NE (m_filename, NULL);
1496
1497   FILE *out = fopen (m_filename, "w");
1498   if (!out)
1499     ::selftest::fail_formatted (loc, "unable to open tempfile: %s",
1500                                 m_filename);
1501   fprintf (out, "%s", content);
1502   fclose (out);
1503 }
1504
1505 /* Destructor.  Delete the tempfile.  */
1506
1507 temp_source_file::~temp_source_file ()
1508 {
1509   unlink (m_filename);
1510   free (m_filename);
1511 }
1512
1513 /* Helper function for verifying location data: when location_t
1514    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1515    as having column 0.  */
1516
1517 static bool
1518 should_have_column_data_p (location_t loc)
1519 {
1520   if (IS_ADHOC_LOC (loc))
1521     loc = get_location_from_adhoc_loc (line_table, loc);
1522   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1523     return false;
1524   return true;
1525 }
1526
1527 /* Selftest for should_have_column_data_p.  */
1528
1529 static void
1530 test_should_have_column_data_p ()
1531 {
1532   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1533   ASSERT_TRUE
1534     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1535   ASSERT_FALSE
1536     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1537 }
1538
1539 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1540    on LOC.  */
1541
1542 static void
1543 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1544               location_t loc)
1545 {
1546   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1547   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1548   /* If location_t values are sufficiently high, then column numbers
1549      will be unavailable and LOCATION_COLUMN (loc) will be 0.
1550      When close to the threshold, column numbers *may* be present: if
1551      the final linemap before the threshold contains a line that straddles
1552      the threshold, locations in that line have column information.  */
1553   if (should_have_column_data_p (loc))
1554     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1555 }
1556
1557 /* Various selftests in this file involve constructing a line table
1558    and one or more line maps within it.
1559
1560    For maximum test coverage we want to run these tests with a variety
1561    of situations:
1562    - line_table->default_range_bits: some frontends use a non-zero value
1563    and others use zero
1564    - the fallback modes within line-map.c: there are various threshold
1565    values for source_location/location_t beyond line-map.c changes
1566    behavior (disabling of the range-packing optimization, disabling
1567    of column-tracking).  We can exercise these by starting the line_table
1568    at interesting values at or near these thresholds.
1569
1570    The following struct describes a particular case within our test
1571    matrix.  */
1572
1573 struct line_table_case
1574 {
1575   line_table_case (int default_range_bits, int base_location)
1576   : m_default_range_bits (default_range_bits),
1577     m_base_location (base_location)
1578   {}
1579
1580   int m_default_range_bits;
1581   int m_base_location;
1582 };
1583
1584 /* A class for overriding the global "line_table" within a selftest,
1585    restoring its value afterwards.  */
1586
1587 class temp_line_table
1588 {
1589  public:
1590   temp_line_table (const line_table_case &);
1591   ~temp_line_table ();
1592
1593  private:
1594   line_maps *m_old_line_table;
1595 };
1596
1597 /* Constructor.  Store the old value of line_table, and create a new
1598    one, using the sitation described in CASE_.  */
1599
1600 temp_line_table::temp_line_table (const line_table_case &case_)
1601   : m_old_line_table (line_table)
1602 {
1603   line_table = ggc_alloc<line_maps> ();
1604   linemap_init (line_table, BUILTINS_LOCATION);
1605   line_table->reallocator = m_old_line_table->reallocator;
1606   line_table->round_alloc_size = m_old_line_table->round_alloc_size;
1607   line_table->default_range_bits = case_.m_default_range_bits;
1608   if (case_.m_base_location)
1609     {
1610       line_table->highest_location = case_.m_base_location;
1611       line_table->highest_line = case_.m_base_location;
1612     }
1613 }
1614
1615 /* Destructor.  Restore the old value of line_table.  */
1616
1617 temp_line_table::~temp_line_table ()
1618 {
1619   line_table = m_old_line_table;
1620 }
1621
1622 /* Verify basic operation of ordinary linemaps.  */
1623
1624 static void
1625 test_accessing_ordinary_linemaps (const line_table_case &case_)
1626 {
1627   temp_line_table tmp_lt (case_);
1628
1629   /* Build a simple linemap describing some locations. */
1630   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1631
1632   linemap_line_start (line_table, 1, 100);
1633   location_t loc_a = linemap_position_for_column (line_table, 1);
1634   location_t loc_b = linemap_position_for_column (line_table, 23);
1635
1636   linemap_line_start (line_table, 2, 100);
1637   location_t loc_c = linemap_position_for_column (line_table, 1);
1638   location_t loc_d = linemap_position_for_column (line_table, 17);
1639
1640   /* Example of a very long line.  */
1641   linemap_line_start (line_table, 3, 2000);
1642   location_t loc_e = linemap_position_for_column (line_table, 700);
1643
1644   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1645
1646   /* Multiple files.  */
1647   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1648   linemap_line_start (line_table, 1, 200);
1649   location_t loc_f = linemap_position_for_column (line_table, 150);
1650   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1651
1652   /* Verify that we can recover the location info.  */
1653   assert_loceq ("foo.c", 1, 1, loc_a);
1654   assert_loceq ("foo.c", 1, 23, loc_b);
1655   assert_loceq ("foo.c", 2, 1, loc_c);
1656   assert_loceq ("foo.c", 2, 17, loc_d);
1657   assert_loceq ("foo.c", 3, 700, loc_e);
1658   assert_loceq ("bar.c", 1, 150, loc_f);
1659
1660   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1661   ASSERT_TRUE (pure_location_p (line_table, loc_a));
1662
1663   /* Verify using make_location to build a range, and extracting data
1664      back from it.  */
1665   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1666   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1667   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1668   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1669   ASSERT_EQ (loc_b, src_range.m_start);
1670   ASSERT_EQ (loc_d, src_range.m_finish);
1671 }
1672
1673 /* Verify various properties of UNKNOWN_LOCATION.  */
1674
1675 static void
1676 test_unknown_location ()
1677 {
1678   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1679   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1680   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1681 }
1682
1683 /* Verify various properties of BUILTINS_LOCATION.  */
1684
1685 static void
1686 test_builtins ()
1687 {
1688   assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1689   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1690 }
1691
1692 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
1693
1694 static void
1695 test_reading_source_line ()
1696 {
1697   /* Create a tempfile and write some text to it.  */
1698   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1699                         "01234567890123456789\n"
1700                         "This is the test text\n"
1701                         "This is the 3rd line\n");
1702
1703   /* Read back a specific line from the tempfile.  */
1704   int line_size;
1705   const char *source_line = location_get_source_line (tmp.get_filename (),
1706                                                       2, &line_size);
1707   ASSERT_TRUE (source_line != NULL);
1708   ASSERT_EQ (21, line_size);
1709   if (!strncmp ("This is the test text",
1710                 source_line, line_size))
1711     ::selftest::pass (SELFTEST_LOCATION,
1712                       "source_line matched expected value");
1713   else
1714     ::selftest::fail (SELFTEST_LOCATION,
1715                       "source_line did not match expected value");
1716
1717 }
1718
1719 /* Tests of lexing.  */
1720
1721 /* Verify that token TOK from PARSER has cpp_token_as_text
1722    equal to EXPECTED_TEXT.  */
1723
1724 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
1725   SELFTEST_BEGIN_STMT                                                   \
1726     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
1727     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
1728   SELFTEST_END_STMT
1729
1730 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1731    and ranges from EXP_START_COL to EXP_FINISH_COL.
1732    Use LOC as the effective location of the selftest.  */
1733
1734 static void
1735 assert_token_loc_eq (const location &loc,
1736                      const cpp_token *tok,
1737                      const char *exp_filename, int exp_linenum,
1738                      int exp_start_col, int exp_finish_col)
1739 {
1740   location_t tok_loc = tok->src_loc;
1741   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1742   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1743
1744   /* If location_t values are sufficiently high, then column numbers
1745      will be unavailable.  */
1746   if (!should_have_column_data_p (tok_loc))
1747     return;
1748
1749   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1750   source_range tok_range = get_range_from_loc (line_table, tok_loc);
1751   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1752   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1753 }
1754
1755 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1756    SELFTEST_LOCATION as the effective location of the selftest.  */
1757
1758 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1759                             EXP_START_COL, EXP_FINISH_COL) \
1760   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1761                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1762
1763 /* Test of lexing a file using libcpp, verifying tokens and their
1764    location information.  */
1765
1766 static void
1767 test_lexer (const line_table_case &case_)
1768 {
1769   /* Create a tempfile and write some text to it.  */
1770   const char *content =
1771     /*00000000011111111112222222222333333.3333444444444.455555555556
1772       12345678901234567890123456789012345.6789012345678.901234567890.  */
1773     ("test_name /* c-style comment */\n"
1774      "                                  \"test literal\"\n"
1775      " // test c++-style comment\n"
1776      "   42\n");
1777   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1778
1779   temp_line_table tmp_lt (case_);
1780
1781   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1782
1783   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1784   ASSERT_NE (fname, NULL);
1785
1786   /* Verify that we get the expected tokens back, with the correct
1787      location information.  */
1788
1789   location_t loc;
1790   const cpp_token *tok;
1791   tok = cpp_get_token_with_location (parser, &loc);
1792   ASSERT_NE (tok, NULL);
1793   ASSERT_EQ (tok->type, CPP_NAME);
1794   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1795   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1796
1797   tok = cpp_get_token_with_location (parser, &loc);
1798   ASSERT_NE (tok, NULL);
1799   ASSERT_EQ (tok->type, CPP_STRING);
1800   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1801   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1802
1803   tok = cpp_get_token_with_location (parser, &loc);
1804   ASSERT_NE (tok, NULL);
1805   ASSERT_EQ (tok->type, CPP_NUMBER);
1806   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1807   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1808
1809   tok = cpp_get_token_with_location (parser, &loc);
1810   ASSERT_NE (tok, NULL);
1811   ASSERT_EQ (tok->type, CPP_EOF);
1812
1813   cpp_finish (parser, NULL);
1814   cpp_destroy (parser);
1815 }
1816
1817 /* Forward decls.  */
1818
1819 struct lexer_test;
1820 class lexer_test_options;
1821
1822 /* A class for specifying options of a lexer_test.
1823    The "apply" vfunc is called during the lexer_test constructor.  */
1824
1825 class lexer_test_options
1826 {
1827  public:
1828   virtual void apply (lexer_test &) = 0;
1829 };
1830
1831 /* A struct for writing lexer tests.  */
1832
1833 struct lexer_test
1834 {
1835   lexer_test (const line_table_case &case_, const char *content,
1836               lexer_test_options *options);
1837   ~lexer_test ();
1838
1839   const cpp_token *get_token ();
1840
1841   temp_source_file m_tempfile;
1842   temp_line_table m_tmp_lt;
1843   cpp_reader *m_parser;
1844   string_concat_db m_concats;
1845 };
1846
1847 /* Use an EBCDIC encoding for the execution charset, specifically
1848    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
1849
1850    This exercises iconv integration within libcpp.
1851    Not every build of iconv supports the given charset,
1852    so we need to flag this error and handle it gracefully.  */
1853
1854 class ebcdic_execution_charset : public lexer_test_options
1855 {
1856  public:
1857   ebcdic_execution_charset () : m_num_iconv_errors (0)
1858     {
1859       gcc_assert (s_singleton == NULL);
1860       s_singleton = this;
1861     }
1862   ~ebcdic_execution_charset ()
1863     {
1864       gcc_assert (s_singleton == this);
1865       s_singleton = NULL;
1866     }
1867
1868   void apply (lexer_test &test) FINAL OVERRIDE
1869   {
1870     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
1871     cpp_opts->narrow_charset = "IBM1047";
1872
1873     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
1874     callbacks->error = on_error;
1875   }
1876
1877   static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
1878                         int level ATTRIBUTE_UNUSED,
1879                         int reason ATTRIBUTE_UNUSED,
1880                         rich_location *richloc ATTRIBUTE_UNUSED,
1881                         const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
1882     ATTRIBUTE_FPTR_PRINTF(5,0)
1883   {
1884     gcc_assert (s_singleton);
1885     /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
1886        when the local iconv build doesn't support the conversion.  */
1887     if (strstr (msgid, "not supported by iconv"))
1888       {
1889         s_singleton->m_num_iconv_errors++;
1890         return true;
1891       }
1892
1893     /* Otherwise, we have an unexpected error.  */
1894     abort ();
1895   }
1896
1897   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
1898
1899  private:
1900   static ebcdic_execution_charset *s_singleton;
1901   int m_num_iconv_errors;
1902 };
1903
1904 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
1905
1906 /* Constructor.  Override line_table with a new instance based on CASE_,
1907    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
1908    start parsing the tempfile.  */
1909
1910 lexer_test::lexer_test (const line_table_case &case_, const char *content,
1911                         lexer_test_options *options) :
1912   /* Create a tempfile and write the text to it.  */
1913   m_tempfile (SELFTEST_LOCATION, ".c", content),
1914   m_tmp_lt (case_),
1915   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
1916   m_concats ()
1917 {
1918   if (options)
1919     options->apply (*this);
1920
1921   cpp_init_iconv (m_parser);
1922
1923   /* Parse the file.  */
1924   const char *fname = cpp_read_main_file (m_parser,
1925                                           m_tempfile.get_filename ());
1926   ASSERT_NE (fname, NULL);
1927 }
1928
1929 /* Destructor.  Verify that the next token in m_parser is EOF.  */
1930
1931 lexer_test::~lexer_test ()
1932 {
1933   location_t loc;
1934   const cpp_token *tok;
1935
1936   tok = cpp_get_token_with_location (m_parser, &loc);
1937   ASSERT_NE (tok, NULL);
1938   ASSERT_EQ (tok->type, CPP_EOF);
1939
1940   cpp_finish (m_parser, NULL);
1941   cpp_destroy (m_parser);
1942 }
1943
1944 /* Get the next token from m_parser.  */
1945
1946 const cpp_token *
1947 lexer_test::get_token ()
1948 {
1949   location_t loc;
1950   const cpp_token *tok;
1951
1952   tok = cpp_get_token_with_location (m_parser, &loc);
1953   ASSERT_NE (tok, NULL);
1954   return tok;
1955 }
1956
1957 /* Verify that locations within string literals are correctly handled.  */
1958
1959 /* Verify get_source_range_for_substring for token(s) at STRLOC,
1960    using the string concatenation database for TEST.
1961
1962    Assert that the character at index IDX is on EXPECTED_LINE,
1963    and that it begins at column EXPECTED_START_COL and ends at
1964    EXPECTED_FINISH_COL (unless the locations are beyond
1965    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
1966    columns).  */
1967
1968 static void
1969 assert_char_at_range (const location &loc,
1970                       lexer_test& test,
1971                       location_t strloc, enum cpp_ttype type, int idx,
1972                       int expected_line, int expected_start_col,
1973                       int expected_finish_col)
1974 {
1975   cpp_reader *pfile = test.m_parser;
1976   string_concat_db *concats = &test.m_concats;
1977
1978   source_range actual_range;
1979   const char *err
1980     = get_source_range_for_substring (pfile, concats, strloc, type,
1981                                       idx, idx, &actual_range);
1982   if (should_have_column_data_p (strloc))
1983     ASSERT_EQ_AT (loc, NULL, err);
1984   else
1985     {
1986       ASSERT_STREQ_AT (loc,
1987                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
1988                        err);
1989       return;
1990     }
1991
1992   int actual_start_line = LOCATION_LINE (actual_range.m_start);
1993   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
1994   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
1995   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
1996
1997   if (should_have_column_data_p (actual_range.m_start))
1998     {
1999       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2000       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2001     }
2002   if (should_have_column_data_p (actual_range.m_finish))
2003     {
2004       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2005       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2006     }
2007 }
2008
2009 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2010    the effective location of any errors.  */
2011
2012 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2013                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2014   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2015                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2016                         (EXPECTED_FINISH_COL))
2017
2018 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2019    using the string concatenation database for TEST.
2020
2021    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2022
2023 static void
2024 assert_num_substring_ranges (const location &loc,
2025                              lexer_test& test,
2026                              location_t strloc,
2027                              enum cpp_ttype type,
2028                              int expected_num_ranges)
2029 {
2030   cpp_reader *pfile = test.m_parser;
2031   string_concat_db *concats = &test.m_concats;
2032
2033   int actual_num_ranges;
2034   const char *err
2035     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2036                                            &actual_num_ranges);
2037   if (should_have_column_data_p (strloc))
2038     ASSERT_EQ_AT (loc, NULL, err);
2039   else
2040     {
2041       ASSERT_STREQ_AT (loc,
2042                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2043                        err);
2044       return;
2045     }
2046   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2047 }
2048
2049 /* Macro for calling assert_num_substring_ranges, supplying
2050    SELFTEST_LOCATION for the effective location of any errors.  */
2051
2052 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2053                                     EXPECTED_NUM_RANGES)                \
2054   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2055                                (TYPE), (EXPECTED_NUM_RANGES))
2056
2057
2058 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2059    returns an error (using the string concatenation database for TEST).  */
2060
2061 static void
2062 assert_has_no_substring_ranges (const location &loc,
2063                                 lexer_test& test,
2064                                 location_t strloc,
2065                                 enum cpp_ttype type,
2066                                 const char *expected_err)
2067 {
2068   cpp_reader *pfile = test.m_parser;
2069   string_concat_db *concats = &test.m_concats;
2070   cpp_substring_ranges ranges;
2071   const char *actual_err
2072     = get_substring_ranges_for_loc (pfile, concats, strloc,
2073                                     type, ranges);
2074   if (should_have_column_data_p (strloc))
2075     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2076   else
2077     ASSERT_STREQ_AT (loc,
2078                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2079                      actual_err);
2080 }
2081
2082 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2083     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2084                                     (STRLOC), (TYPE), (ERR))
2085
2086 /* Lex a simple string literal.  Verify the substring location data, before
2087    and after running cpp_interpret_string on it.  */
2088
2089 static void
2090 test_lexer_string_locations_simple (const line_table_case &case_)
2091 {
2092   /* Digits 0-9 (with 0 at column 10), the simple way.
2093      ....................000000000.11111111112.2222222223333333333
2094      ....................123456789.01234567890.1234567890123456789
2095      We add a trailing comment to ensure that we correctly locate
2096      the end of the string literal token.  */
2097   const char *content = "        \"0123456789\" /* not a string */\n";
2098   lexer_test test (case_, content, NULL);
2099
2100   /* Verify that we get the expected token back, with the correct
2101      location information.  */
2102   const cpp_token *tok = test.get_token ();
2103   ASSERT_EQ (tok->type, CPP_STRING);
2104   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2105   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2106
2107   /* At this point in lexing, the quote characters are treated as part of
2108      the string (they are stripped off by cpp_interpret_string).  */
2109
2110   ASSERT_EQ (tok->val.str.len, 12);
2111
2112   /* Verify that cpp_interpret_string works.  */
2113   cpp_string dst_string;
2114   const enum cpp_ttype type = CPP_STRING;
2115   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2116                                       &dst_string, type);
2117   ASSERT_TRUE (result);
2118   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2119   free (const_cast <unsigned char *> (dst_string.text));
2120
2121   /* Verify ranges of individual characters.  This no longer includes the
2122      quotes.  */
2123   for (int i = 0; i <= 9; i++)
2124     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2125                           10 + i, 10 + i);
2126
2127   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2128 }
2129
2130 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2131    encoding.  */
2132
2133 static void
2134 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2135 {
2136   /* EBCDIC support requires iconv.  */
2137   if (!HAVE_ICONV)
2138     return;
2139
2140   /* Digits 0-9 (with 0 at column 10), the simple way.
2141      ....................000000000.11111111112.2222222223333333333
2142      ....................123456789.01234567890.1234567890123456789
2143      We add a trailing comment to ensure that we correctly locate
2144      the end of the string literal token.  */
2145   const char *content = "        \"0123456789\" /* not a string */\n";
2146   ebcdic_execution_charset use_ebcdic;
2147   lexer_test test (case_, content, &use_ebcdic);
2148
2149   /* Verify that we get the expected token back, with the correct
2150      location information.  */
2151   const cpp_token *tok = test.get_token ();
2152   ASSERT_EQ (tok->type, CPP_STRING);
2153   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2154   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2155
2156   /* At this point in lexing, the quote characters are treated as part of
2157      the string (they are stripped off by cpp_interpret_string).  */
2158
2159   ASSERT_EQ (tok->val.str.len, 12);
2160
2161   /* The remainder of the test requires an iconv implementation that
2162      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2163   if (use_ebcdic.iconv_errors_occurred_p ())
2164     return;
2165
2166   /* Verify that cpp_interpret_string works.  */
2167   cpp_string dst_string;
2168   const enum cpp_ttype type = CPP_STRING;
2169   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2170                                       &dst_string, type);
2171   ASSERT_TRUE (result);
2172   /* We should now have EBCDIC-encoded text, specifically
2173      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2174      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2175   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2176                 (const char *)dst_string.text);
2177   free (const_cast <unsigned char *> (dst_string.text));
2178
2179   /* Verify that we don't attempt to record substring location information
2180      for such cases.  */
2181   ASSERT_HAS_NO_SUBSTRING_RANGES
2182     (test, tok->src_loc, type,
2183      "execution character set != source character set");
2184 }
2185
2186 /* Lex a string literal containing a hex-escaped character.
2187    Verify the substring location data, before and after running
2188    cpp_interpret_string on it.  */
2189
2190 static void
2191 test_lexer_string_locations_hex (const line_table_case &case_)
2192 {
2193   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2194      and with a space in place of digit 6, to terminate the escaped
2195      hex code.
2196      ....................000000000.111111.11112222.
2197      ....................123456789.012345.67890123.  */
2198   const char *content = "        \"01234\\x35 789\"\n";
2199   lexer_test test (case_, content, NULL);
2200
2201   /* Verify that we get the expected token back, with the correct
2202      location information.  */
2203   const cpp_token *tok = test.get_token ();
2204   ASSERT_EQ (tok->type, CPP_STRING);
2205   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2206   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2207
2208   /* At this point in lexing, the quote characters are treated as part of
2209      the string (they are stripped off by cpp_interpret_string).  */
2210   ASSERT_EQ (tok->val.str.len, 15);
2211
2212   /* Verify that cpp_interpret_string works.  */
2213   cpp_string dst_string;
2214   const enum cpp_ttype type = CPP_STRING;
2215   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2216                                       &dst_string, type);
2217   ASSERT_TRUE (result);
2218   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2219   free (const_cast <unsigned char *> (dst_string.text));
2220
2221   /* Verify ranges of individual characters.  This no longer includes the
2222      quotes.  */
2223   for (int i = 0; i <= 4; i++)
2224     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2225   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2226   for (int i = 6; i <= 9; i++)
2227     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2228
2229   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2230 }
2231
2232 /* Lex a string literal containing an octal-escaped character.
2233    Verify the substring location data after running cpp_interpret_string
2234    on it.  */
2235
2236 static void
2237 test_lexer_string_locations_oct (const line_table_case &case_)
2238 {
2239   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2240      and with a space in place of digit 6, to terminate the escaped
2241      octal code.
2242      ....................000000000.111111.11112222.2222223333333333444
2243      ....................123456789.012345.67890123.4567890123456789012  */
2244   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2245   lexer_test test (case_, content, NULL);
2246
2247   /* Verify that we get the expected token back, with the correct
2248      location information.  */
2249   const cpp_token *tok = test.get_token ();
2250   ASSERT_EQ (tok->type, CPP_STRING);
2251   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2252
2253   /* Verify that cpp_interpret_string works.  */
2254   cpp_string dst_string;
2255   const enum cpp_ttype type = CPP_STRING;
2256   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2257                                       &dst_string, type);
2258   ASSERT_TRUE (result);
2259   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2260   free (const_cast <unsigned char *> (dst_string.text));
2261
2262   /* Verify ranges of individual characters.  This no longer includes the
2263      quotes.  */
2264   for (int i = 0; i < 5; i++)
2265     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2266   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2267   for (int i = 6; i <= 9; i++)
2268     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2269
2270   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 10);
2271 }
2272
2273 /* Test of string literal containing letter escapes.  */
2274
2275 static void
2276 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2277 {
2278   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2279      .....................000000000.1.11111.1.1.11222.22222223333333
2280      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2281   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2282   lexer_test test (case_, content, NULL);
2283
2284   /* Verify that we get the expected tokens back.  */
2285   const cpp_token *tok = test.get_token ();
2286   ASSERT_EQ (tok->type, CPP_STRING);
2287   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2288
2289   /* Verify ranges of individual characters. */
2290   /* "\t".  */
2291   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2292                         0, 1, 10, 11);
2293   /* "foo". */
2294   for (int i = 1; i <= 3; i++)
2295     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2296                           i, 1, 11 + i, 11 + i);
2297   /* "\\" and "\n".  */
2298   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2299                         4, 1, 15, 16);
2300   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2301                         5, 1, 17, 18);
2302
2303   /* "bar".  */
2304   for (int i = 6; i <= 8; i++)
2305     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2306                           i, 1, 13 + i, 13 + i);
2307
2308   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 9);
2309 }
2310
2311 /* Another test of a string literal containing a letter escape.
2312    Based on string seen in
2313      printf ("%-%\n");
2314    in gcc.dg/format/c90-printf-1.c.  */
2315
2316 static void
2317 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2318 {
2319   /* .....................000000000.1111.11.1111.22222222223.
2320      .....................123456789.0123.45.6789.01234567890.  */
2321   const char *content = ("        \"%-%\\n\" /* non-str */\n");
2322   lexer_test test (case_, content, NULL);
2323
2324   /* Verify that we get the expected tokens back.  */
2325   const cpp_token *tok = test.get_token ();
2326   ASSERT_EQ (tok->type, CPP_STRING);
2327   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2328
2329   /* Verify ranges of individual characters. */
2330   /* "%-%".  */
2331   for (int i = 0; i < 3; i++)
2332     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2333                           i, 1, 10 + i, 10 + i);
2334   /* "\n".  */
2335   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2336                         3, 1, 13, 14);
2337
2338   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 4);
2339 }
2340
2341 /* Lex a string literal containing UCN 4 characters.
2342    Verify the substring location data after running cpp_interpret_string
2343    on it.  */
2344
2345 static void
2346 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2347 {
2348   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2349      as UCN 4.
2350      ....................000000000.111111.111122.222222223.33333333344444
2351      ....................123456789.012345.678901.234567890.12345678901234  */
2352   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
2353   lexer_test test (case_, content, NULL);
2354
2355   /* Verify that we get the expected token back, with the correct
2356      location information.  */
2357   const cpp_token *tok = test.get_token ();
2358   ASSERT_EQ (tok->type, CPP_STRING);
2359   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2360
2361   /* Verify that cpp_interpret_string works.
2362      The string should be encoded in the execution character
2363      set.  Assuming that that is UTF-8, we should have the following:
2364      -----------  ----  -----  -------  ----------------
2365      Byte offset  Byte  Octal  Unicode  Source Column(s)
2366      -----------  ----  -----  -------  ----------------
2367      0            0x30         '0'      10
2368      1            0x31         '1'      11
2369      2            0x32         '2'      12
2370      3            0x33         '3'      13
2371      4            0x34         '4'      14
2372      5            0xE2  \342   U+2174   15-20
2373      6            0x85  \205    (cont)  15-20
2374      7            0xB4  \264    (cont)  15-20
2375      8            0xE2  \342   U+2175   21-26
2376      9            0x85  \205    (cont)  21-26
2377      10           0xB5  \265    (cont)  21-26
2378      11           0x37         '7'      27
2379      12           0x38         '8'      28
2380      13           0x39         '9'      29
2381      -----------  ----  -----  -------  ---------------.  */
2382
2383   cpp_string dst_string;
2384   const enum cpp_ttype type = CPP_STRING;
2385   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2386                                       &dst_string, type);
2387   ASSERT_TRUE (result);
2388   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2389                 (const char *)dst_string.text);
2390   free (const_cast <unsigned char *> (dst_string.text));
2391
2392   /* Verify ranges of individual characters.  This no longer includes the
2393      quotes.
2394      '01234'.  */
2395   for (int i = 0; i <= 4; i++)
2396     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2397   /* U+2174.  */
2398   for (int i = 5; i <= 7; i++)
2399     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2400   /* U+2175.  */
2401   for (int i = 8; i <= 10; i++)
2402     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2403   /* '789'.  */
2404   for (int i = 11; i <= 13; i++)
2405     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2406
2407   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 14);
2408 }
2409
2410 /* Lex a string literal containing UCN 8 characters.
2411    Verify the substring location data after running cpp_interpret_string
2412    on it.  */
2413
2414 static void
2415 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2416 {
2417   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2418      ....................000000000.111111.1111222222.2222333333333.344444
2419      ....................123456789.012345.6789012345.6789012345678.901234  */
2420   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
2421   lexer_test test (case_, content, NULL);
2422
2423   /* Verify that we get the expected token back, with the correct
2424      location information.  */
2425   const cpp_token *tok = test.get_token ();
2426   ASSERT_EQ (tok->type, CPP_STRING);
2427   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2428                            "\"01234\\U00002174\\U00002175789\"");
2429
2430   /* Verify that cpp_interpret_string works.
2431      The UTF-8 encoding of the string is identical to that from
2432      the ucn4 testcase above; the only difference is the column
2433      locations.  */
2434   cpp_string dst_string;
2435   const enum cpp_ttype type = CPP_STRING;
2436   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2437                                       &dst_string, type);
2438   ASSERT_TRUE (result);
2439   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2440                 (const char *)dst_string.text);
2441   free (const_cast <unsigned char *> (dst_string.text));
2442
2443   /* Verify ranges of individual characters.  This no longer includes the
2444      quotes.
2445      '01234'.  */
2446   for (int i = 0; i <= 4; i++)
2447     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2448   /* U+2174.  */
2449   for (int i = 5; i <= 7; i++)
2450     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2451   /* U+2175.  */
2452   for (int i = 8; i <= 10; i++)
2453     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2454   /* '789' at columns 35-37  */
2455   for (int i = 11; i <= 13; i++)
2456     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2457
2458   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 14);
2459 }
2460
2461 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
2462
2463 static uint32_t
2464 uint32_from_big_endian (const uint32_t *ptr_be_value)
2465 {
2466   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2467   return (((uint32_t) buf[0] << 24)
2468           | ((uint32_t) buf[1] << 16)
2469           | ((uint32_t) buf[2] << 8)
2470           | (uint32_t) buf[3]);
2471 }
2472
2473 /* Lex a wide string literal and verify that attempts to read substring
2474    location data from it fail gracefully.  */
2475
2476 static void
2477 test_lexer_string_locations_wide_string (const line_table_case &case_)
2478 {
2479   /* Digits 0-9.
2480      ....................000000000.11111111112.22222222233333
2481      ....................123456789.01234567890.12345678901234  */
2482   const char *content = "       L\"0123456789\" /* non-str */\n";
2483   lexer_test test (case_, content, NULL);
2484
2485   /* Verify that we get the expected token back, with the correct
2486      location information.  */
2487   const cpp_token *tok = test.get_token ();
2488   ASSERT_EQ (tok->type, CPP_WSTRING);
2489   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2490
2491   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
2492   cpp_string dst_string;
2493   const enum cpp_ttype type = CPP_WSTRING;
2494   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2495                                       &dst_string, type);
2496   ASSERT_TRUE (result);
2497   /* The cpp_reader defaults to big-endian with
2498      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2499      now be encoded as UTF-32BE.  */
2500   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2501   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2502   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2503   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2504   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2505   free (const_cast <unsigned char *> (dst_string.text));
2506
2507   /* We don't yet support generating substring location information
2508      for L"" strings.  */
2509   ASSERT_HAS_NO_SUBSTRING_RANGES
2510     (test, tok->src_loc, type,
2511      "execution character set != source character set");
2512 }
2513
2514 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
2515
2516 static uint16_t
2517 uint16_from_big_endian (const uint16_t *ptr_be_value)
2518 {
2519   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2520   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2521 }
2522
2523 /* Lex a u"" string literal and verify that attempts to read substring
2524    location data from it fail gracefully.  */
2525
2526 static void
2527 test_lexer_string_locations_string16 (const line_table_case &case_)
2528 {
2529   /* Digits 0-9.
2530      ....................000000000.11111111112.22222222233333
2531      ....................123456789.01234567890.12345678901234  */
2532   const char *content = "       u\"0123456789\" /* non-str */\n";
2533   lexer_test test (case_, content, NULL);
2534
2535   /* Verify that we get the expected token back, with the correct
2536      location information.  */
2537   const cpp_token *tok = test.get_token ();
2538   ASSERT_EQ (tok->type, CPP_STRING16);
2539   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2540
2541   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
2542   cpp_string dst_string;
2543   const enum cpp_ttype type = CPP_STRING16;
2544   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2545                                       &dst_string, type);
2546   ASSERT_TRUE (result);
2547
2548   /* The cpp_reader defaults to big-endian, so dst_string should
2549      now be encoded as UTF-16BE.  */
2550   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2551   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2552   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2553   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2554   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2555   free (const_cast <unsigned char *> (dst_string.text));
2556
2557   /* We don't yet support generating substring location information
2558      for L"" strings.  */
2559   ASSERT_HAS_NO_SUBSTRING_RANGES
2560     (test, tok->src_loc, type,
2561      "execution character set != source character set");
2562 }
2563
2564 /* Lex a U"" string literal and verify that attempts to read substring
2565    location data from it fail gracefully.  */
2566
2567 static void
2568 test_lexer_string_locations_string32 (const line_table_case &case_)
2569 {
2570   /* Digits 0-9.
2571      ....................000000000.11111111112.22222222233333
2572      ....................123456789.01234567890.12345678901234  */
2573   const char *content = "       U\"0123456789\" /* non-str */\n";
2574   lexer_test test (case_, content, NULL);
2575
2576   /* Verify that we get the expected token back, with the correct
2577      location information.  */
2578   const cpp_token *tok = test.get_token ();
2579   ASSERT_EQ (tok->type, CPP_STRING32);
2580   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2581
2582   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
2583   cpp_string dst_string;
2584   const enum cpp_ttype type = CPP_STRING32;
2585   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2586                                       &dst_string, type);
2587   ASSERT_TRUE (result);
2588
2589   /* The cpp_reader defaults to big-endian, so dst_string should
2590      now be encoded as UTF-32BE.  */
2591   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2592   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2593   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2594   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2595   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2596   free (const_cast <unsigned char *> (dst_string.text));
2597
2598   /* We don't yet support generating substring location information
2599      for L"" strings.  */
2600   ASSERT_HAS_NO_SUBSTRING_RANGES
2601     (test, tok->src_loc, type,
2602      "execution character set != source character set");
2603 }
2604
2605 /* Lex a u8-string literal.
2606    Verify the substring location data after running cpp_interpret_string
2607    on it.  */
2608
2609 static void
2610 test_lexer_string_locations_u8 (const line_table_case &case_)
2611 {
2612   /* Digits 0-9.
2613      ....................000000000.11111111112.22222222233333
2614      ....................123456789.01234567890.12345678901234  */
2615   const char *content = "      u8\"0123456789\" /* non-str */\n";
2616   lexer_test test (case_, content, NULL);
2617
2618   /* Verify that we get the expected token back, with the correct
2619      location information.  */
2620   const cpp_token *tok = test.get_token ();
2621   ASSERT_EQ (tok->type, CPP_UTF8STRING);
2622   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2623
2624   /* Verify that cpp_interpret_string works.  */
2625   cpp_string dst_string;
2626   const enum cpp_ttype type = CPP_STRING;
2627   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2628                                       &dst_string, type);
2629   ASSERT_TRUE (result);
2630   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2631   free (const_cast <unsigned char *> (dst_string.text));
2632
2633   /* Verify ranges of individual characters.  This no longer includes the
2634      quotes.  */
2635   for (int i = 0; i <= 9; i++)
2636     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2637 }
2638
2639 /* Lex a string literal containing UTF-8 source characters.
2640    Verify the substring location data after running cpp_interpret_string
2641    on it.  */
2642
2643 static void
2644 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2645 {
2646  /* This string literal is written out to the source file as UTF-8,
2647     and is of the form "before mojibake after", where "mojibake"
2648     is written as the following four unicode code points:
2649        U+6587 CJK UNIFIED IDEOGRAPH-6587
2650        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2651        U+5316 CJK UNIFIED IDEOGRAPH-5316
2652        U+3051 HIRAGANA LETTER KE.
2653      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2654      "before" and "after" are 1 byte per unicode character.
2655
2656      The numbering shown are "columns", which are *byte* numbers within
2657      the line, rather than unicode character numbers.
2658
2659      .................... 000000000.1111111.
2660      .................... 123456789.0123456.  */
2661   const char *content = ("        \"before "
2662                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2663                               UTF-8: 0xE6 0x96 0x87
2664                               C octal escaped UTF-8: \346\226\207
2665                             "column" numbers: 17-19.  */
2666                          "\346\226\207"
2667
2668                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2669                               UTF-8: 0xE5 0xAD 0x97
2670                               C octal escaped UTF-8: \345\255\227
2671                             "column" numbers: 20-22.  */
2672                          "\345\255\227"
2673
2674                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2675                               UTF-8: 0xE5 0x8C 0x96
2676                               C octal escaped UTF-8: \345\214\226
2677                             "column" numbers: 23-25.  */
2678                          "\345\214\226"
2679
2680                          /* U+3051 HIRAGANA LETTER KE
2681                               UTF-8: 0xE3 0x81 0x91
2682                               C octal escaped UTF-8: \343\201\221
2683                             "column" numbers: 26-28.  */
2684                          "\343\201\221"
2685
2686                          /* column numbers 29 onwards
2687                           2333333.33334444444444
2688                           9012345.67890123456789. */
2689                          " after\" /* non-str */\n");
2690   lexer_test test (case_, content, NULL);
2691
2692   /* Verify that we get the expected token back, with the correct
2693      location information.  */
2694   const cpp_token *tok = test.get_token ();
2695   ASSERT_EQ (tok->type, CPP_STRING);
2696   ASSERT_TOKEN_AS_TEXT_EQ
2697     (test.m_parser, tok,
2698      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2699
2700   /* Verify that cpp_interpret_string works.  */
2701   cpp_string dst_string;
2702   const enum cpp_ttype type = CPP_STRING;
2703   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2704                                       &dst_string, type);
2705   ASSERT_TRUE (result);
2706   ASSERT_STREQ
2707     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2708      (const char *)dst_string.text);
2709   free (const_cast <unsigned char *> (dst_string.text));
2710
2711   /* Verify ranges of individual characters.  This no longer includes the
2712      quotes.
2713      Assuming that both source and execution encodings are UTF-8, we have
2714      a run of 25 octets in each.  */
2715   for (int i = 0; i < 25; i++)
2716     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2717
2718   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 25);
2719 }
2720
2721 /* Test of string literal concatenation.  */
2722
2723 static void
2724 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
2725 {
2726   /* Digits 0-9.
2727      .....................000000000.111111.11112222222222
2728      .....................123456789.012345.67890123456789.  */
2729   const char *content = ("        \"01234\" /* non-str */\n"
2730                          "        \"56789\" /* non-str */\n");
2731   lexer_test test (case_, content, NULL);
2732
2733   location_t input_locs[2];
2734
2735   /* Verify that we get the expected tokens back.  */
2736   auto_vec <cpp_string> input_strings;
2737   const cpp_token *tok_a = test.get_token ();
2738   ASSERT_EQ (tok_a->type, CPP_STRING);
2739   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
2740   input_strings.safe_push (tok_a->val.str);
2741   input_locs[0] = tok_a->src_loc;
2742
2743   const cpp_token *tok_b = test.get_token ();
2744   ASSERT_EQ (tok_b->type, CPP_STRING);
2745   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
2746   input_strings.safe_push (tok_b->val.str);
2747   input_locs[1] = tok_b->src_loc;
2748
2749   /* Verify that cpp_interpret_string works.  */
2750   cpp_string dst_string;
2751   const enum cpp_ttype type = CPP_STRING;
2752   bool result = cpp_interpret_string (test.m_parser,
2753                                       input_strings.address (), 2,
2754                                       &dst_string, type);
2755   ASSERT_TRUE (result);
2756   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2757   free (const_cast <unsigned char *> (dst_string.text));
2758
2759   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
2760   test.m_concats.record_string_concatenation (2, input_locs);
2761
2762   location_t initial_loc = input_locs[0];
2763
2764   for (int i = 0; i <= 4; i++)
2765     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2766   for (int i = 5; i <= 9; i++)
2767     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
2768
2769   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2770 }
2771
2772 /* Another test of string literal concatenation.  */
2773
2774 static void
2775 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
2776 {
2777   /* Digits 0-9.
2778      .....................000000000.111.11111112222222
2779      .....................123456789.012.34567890123456.  */
2780   const char *content = ("        \"01\" /* non-str */\n"
2781                          "        \"23\" /* non-str */\n"
2782                          "        \"45\" /* non-str */\n"
2783                          "        \"67\" /* non-str */\n"
2784                          "        \"89\" /* non-str */\n");
2785   lexer_test test (case_, content, NULL);
2786
2787   auto_vec <cpp_string> input_strings;
2788   location_t input_locs[5];
2789
2790   /* Verify that we get the expected tokens back.  */
2791   for (int i = 0; i < 5; i++)
2792     {
2793       const cpp_token *tok = test.get_token ();
2794       ASSERT_EQ (tok->type, CPP_STRING);
2795       input_strings.safe_push (tok->val.str);
2796       input_locs[i] = tok->src_loc;
2797     }
2798
2799   /* Verify that cpp_interpret_string works.  */
2800   cpp_string dst_string;
2801   const enum cpp_ttype type = CPP_STRING;
2802   bool result = cpp_interpret_string (test.m_parser,
2803                                       input_strings.address (), 5,
2804                                       &dst_string, type);
2805   ASSERT_TRUE (result);
2806   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2807   free (const_cast <unsigned char *> (dst_string.text));
2808
2809   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
2810   test.m_concats.record_string_concatenation (5, input_locs);
2811
2812   location_t initial_loc = input_locs[0];
2813
2814   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
2815      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
2816      and expect get_source_range_for_substring to fail.
2817      However, for a string concatenation test, we can have a case
2818      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
2819      but subsequent strings can be after it.
2820      Attempting to detect this within assert_char_at_range
2821      would overcomplicate the logic for the common test cases, so
2822      we detect it here.  */
2823   if (should_have_column_data_p (input_locs[0])
2824       && !should_have_column_data_p (input_locs[4]))
2825     {
2826       /* Verify that get_source_range_for_substring gracefully rejects
2827          this case.  */
2828       source_range actual_range;
2829       const char *err
2830         = get_source_range_for_substring (test.m_parser, &test.m_concats,
2831                                           initial_loc, type, 0, 0,
2832                                           &actual_range);
2833       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
2834       return;
2835     }
2836
2837   for (int i = 0; i < 5; i++)
2838     for (int j = 0; j < 2; j++)
2839       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
2840                             i + 1, 10 + j, 10 + j);
2841
2842   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2843 }
2844
2845 /* Another test of string literal concatenation, this time combined with
2846    various kinds of escaped characters.  */
2847
2848 static void
2849 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
2850 {
2851   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
2852      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
2853   const char *content
2854     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
2855        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
2856     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
2857   lexer_test test (case_, content, NULL);
2858
2859   auto_vec <cpp_string> input_strings;
2860   location_t input_locs[4];
2861
2862   /* Verify that we get the expected tokens back.  */
2863   for (int i = 0; i < 4; i++)
2864     {
2865       const cpp_token *tok = test.get_token ();
2866       ASSERT_EQ (tok->type, CPP_STRING);
2867       input_strings.safe_push (tok->val.str);
2868       input_locs[i] = tok->src_loc;
2869     }
2870
2871   /* Verify that cpp_interpret_string works.  */
2872   cpp_string dst_string;
2873   const enum cpp_ttype type = CPP_STRING;
2874   bool result = cpp_interpret_string (test.m_parser,
2875                                       input_strings.address (), 4,
2876                                       &dst_string, type);
2877   ASSERT_TRUE (result);
2878   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2879   free (const_cast <unsigned char *> (dst_string.text));
2880
2881   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
2882   test.m_concats.record_string_concatenation (4, input_locs);
2883
2884   location_t initial_loc = input_locs[0];
2885
2886   for (int i = 0; i <= 4; i++)
2887     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2888   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
2889   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
2890   for (int i = 7; i <= 9; i++)
2891     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
2892
2893   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 10);
2894 }
2895
2896 /* Test of string literal in a macro.  */
2897
2898 static void
2899 test_lexer_string_locations_macro (const line_table_case &case_)
2900 {
2901   /* Digits 0-9.
2902      .....................0000000001111111111.22222222223.
2903      .....................1234567890123456789.01234567890.  */
2904   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
2905                          "  MACRO");
2906   lexer_test test (case_, content, NULL);
2907
2908   /* Verify that we get the expected tokens back.  */
2909   const cpp_token *tok = test.get_token ();
2910   ASSERT_EQ (tok->type, CPP_PADDING);
2911
2912   tok = test.get_token ();
2913   ASSERT_EQ (tok->type, CPP_STRING);
2914   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2915
2916   /* Verify ranges of individual characters.  We ought to
2917      see columns within the macro definition.  */
2918   for (int i = 0; i <= 9; i++)
2919     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2920                           i, 1, 20 + i, 20 + i);
2921
2922   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2923
2924   tok = test.get_token ();
2925   ASSERT_EQ (tok->type, CPP_PADDING);
2926 }
2927
2928 /* Test of stringification of a macro argument.  */
2929
2930 static void
2931 test_lexer_string_locations_stringified_macro_argument
2932   (const line_table_case &case_)
2933 {
2934   /* .....................000000000111111111122222222223.
2935      .....................123456789012345678901234567890.  */
2936   const char *content = ("#define MACRO(X) #X /* non-str */\n"
2937                          "MACRO(foo)\n");
2938   lexer_test test (case_, content, NULL);
2939
2940   /* Verify that we get the expected token back.  */
2941   const cpp_token *tok = test.get_token ();
2942   ASSERT_EQ (tok->type, CPP_PADDING);
2943
2944   tok = test.get_token ();
2945   ASSERT_EQ (tok->type, CPP_STRING);
2946   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
2947
2948   /* We don't support getting the location of a stringified macro
2949      argument.  Verify that it fails gracefully.  */
2950   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
2951                                   "cpp_interpret_string_1 failed");
2952
2953   tok = test.get_token ();
2954   ASSERT_EQ (tok->type, CPP_PADDING);
2955
2956   tok = test.get_token ();
2957   ASSERT_EQ (tok->type, CPP_PADDING);
2958 }
2959
2960 /* Ensure that we are fail gracefully if something attempts to pass
2961    in a location that isn't a string literal token.  Seen on this code:
2962
2963      const char a[] = " %d ";
2964      __builtin_printf (a, 0.5);
2965                        ^
2966
2967    when c-format.c erroneously used the indicated one-character
2968    location as the format string location, leading to a read past the
2969    end of a string buffer in cpp_interpret_string_1.  */
2970
2971 static void
2972 test_lexer_string_locations_non_string (const line_table_case &case_)
2973 {
2974   /* .....................000000000111111111122222222223.
2975      .....................123456789012345678901234567890.  */
2976   const char *content = ("         a\n");
2977   lexer_test test (case_, content, NULL);
2978
2979   /* Verify that we get the expected token back.  */
2980   const cpp_token *tok = test.get_token ();
2981   ASSERT_EQ (tok->type, CPP_NAME);
2982   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
2983
2984   /* At this point, libcpp is attempting to interpret the name as a
2985      string literal, despite it not starting with a quote.  We don't detect
2986      that, but we should at least fail gracefully.  */
2987   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
2988                                   "cpp_interpret_string_1 failed");
2989 }
2990
2991 /* Ensure that we can read substring information for a token which
2992    starts in one linemap and ends in another .  Adapted from
2993    gcc.dg/cpp/pr69985.c.  */
2994
2995 static void
2996 test_lexer_string_locations_long_line (const line_table_case &case_)
2997 {
2998   /* .....................000000.000111111111
2999      .....................123456.789012346789.  */
3000   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3001                          "     \"0123456789012345678901234567890123456789"
3002                          "0123456789012345678901234567890123456789"
3003                          "0123456789012345678901234567890123456789"
3004                          "0123456789\"\n");
3005
3006   lexer_test test (case_, content, NULL);
3007
3008   /* Verify that we get the expected token back.  */
3009   const cpp_token *tok = test.get_token ();
3010   ASSERT_EQ (tok->type, CPP_STRING);
3011
3012   if (!should_have_column_data_p (line_table->highest_location))
3013     return;
3014
3015   /* Verify ranges of individual characters.  */
3016   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 130);
3017   for (int i = 0; i < 130; i++)
3018     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3019                           i, 2, 7 + i, 7 + i);
3020 }
3021
3022 /* Test of lexing char constants.  */
3023
3024 static void
3025 test_lexer_char_constants (const line_table_case &case_)
3026 {
3027   /* Various char constants.
3028      .....................0000000001111111111.22222222223.
3029      .....................1234567890123456789.01234567890.  */
3030   const char *content = ("         'a'\n"
3031                          "        u'a'\n"
3032                          "        U'a'\n"
3033                          "        L'a'\n"
3034                          "         'abc'\n");
3035   lexer_test test (case_, content, NULL);
3036
3037   /* Verify that we get the expected tokens back.  */
3038   /* 'a'.  */
3039   const cpp_token *tok = test.get_token ();
3040   ASSERT_EQ (tok->type, CPP_CHAR);
3041   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3042
3043   unsigned int chars_seen;
3044   int unsignedp;
3045   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3046                                           &chars_seen, &unsignedp);
3047   ASSERT_EQ (cc, 'a');
3048   ASSERT_EQ (chars_seen, 1);
3049
3050   /* u'a'.  */
3051   tok = test.get_token ();
3052   ASSERT_EQ (tok->type, CPP_CHAR16);
3053   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3054
3055   /* U'a'.  */
3056   tok = test.get_token ();
3057   ASSERT_EQ (tok->type, CPP_CHAR32);
3058   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3059
3060   /* L'a'.  */
3061   tok = test.get_token ();
3062   ASSERT_EQ (tok->type, CPP_WCHAR);
3063   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3064
3065   /* 'abc' (c-char-sequence).  */
3066   tok = test.get_token ();
3067   ASSERT_EQ (tok->type, CPP_CHAR);
3068   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3069 }
3070 /* A table of interesting location_t values, giving one axis of our test
3071    matrix.  */
3072
3073 static const location_t boundary_locations[] = {
3074   /* Zero means "don't override the default values for a new line_table".  */
3075   0,
3076
3077   /* An arbitrary non-zero value that isn't close to one of
3078      the boundary values below.  */
3079   0x10000,
3080
3081   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3082   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3083   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3084   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3085   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3086   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3087
3088   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3089   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3090   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3091   LINE_MAP_MAX_LOCATION_WITH_COLS,
3092   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3093   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3094 };
3095
3096 /* Run all of the selftests within this file.  */
3097
3098 void
3099 input_c_tests ()
3100 {
3101   test_should_have_column_data_p ();
3102   test_unknown_location ();
3103   test_builtins ();
3104
3105   /* As noted above in the description of struct line_table_case,
3106      we want to explore a test matrix of interesting line_table
3107      situations, running various selftests for each case within the
3108      matrix.  */
3109
3110   /* Run all tests with:
3111      (a) line_table->default_range_bits == 0, and
3112      (b) line_table->default_range_bits == 5.  */
3113   int num_cases_tested = 0;
3114   for (int default_range_bits = 0; default_range_bits <= 5;
3115        default_range_bits += 5)
3116     {
3117       /* ...and use each of the "interesting" location values as
3118          the starting location within line_table.  */
3119       const int num_boundary_locations
3120         = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3121       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3122         {
3123           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3124
3125           /* Run all tests for the given case within the test matrix.  */
3126           test_accessing_ordinary_linemaps (c);
3127           test_lexer (c);
3128           test_lexer_string_locations_simple (c);
3129           test_lexer_string_locations_ebcdic (c);
3130           test_lexer_string_locations_hex (c);
3131           test_lexer_string_locations_oct (c);
3132           test_lexer_string_locations_letter_escape_1 (c);
3133           test_lexer_string_locations_letter_escape_2 (c);
3134           test_lexer_string_locations_ucn4 (c);
3135           test_lexer_string_locations_ucn8 (c);
3136           test_lexer_string_locations_wide_string (c);
3137           test_lexer_string_locations_string16 (c);
3138           test_lexer_string_locations_string32 (c);
3139           test_lexer_string_locations_u8 (c);
3140           test_lexer_string_locations_utf8_source (c);
3141           test_lexer_string_locations_concatenation_1 (c);
3142           test_lexer_string_locations_concatenation_2 (c);
3143           test_lexer_string_locations_concatenation_3 (c);
3144           test_lexer_string_locations_macro (c);
3145           test_lexer_string_locations_stringified_macro_argument (c);
3146           test_lexer_string_locations_non_string (c);
3147           test_lexer_string_locations_long_line (c);
3148           test_lexer_char_constants (c);
3149
3150           num_cases_tested++;
3151         }
3152     }
3153
3154   /* Verify that we fully covered the test matrix.  */
3155   ASSERT_EQ (num_cases_tested, 2 * 12);
3156
3157   test_reading_source_line ();
3158 }
3159
3160 } // namespace selftest
3161
3162 #endif /* CHECKING_P */