gas/app.c

   1 /* This is the Assembler Pre-Processor
   2    Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 1996
   3    Free Software Foundation, Inc.
   4
   5    This file is part of GAS, the GNU Assembler.
   6
   7    GAS is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    GAS is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GAS; see the file COPYING.  If not, write to
  19    the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  20
  21 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
  22 /* App, the assembler pre-processor.  This pre-processor strips out excess
  23    spaces, turns single-quoted characters into a decimal constant, and turns
  24    # <number> <filename> <garbage> into a .line <number>\n.file <filename>
  25    pair.  This needs better error-handling.  */
  26
  27 #include <stdio.h>
  28 #include "as.h"                 /* For BAD_CASE() only */
  29
  30 #if (__STDC__ != 1)
  31 #ifndef const
  32 #define const  /* empty */
  33 #endif
  34 #endif
  35
  36 static char lex[256];
  37 static const char symbol_chars[] =
  38 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  39
  40 #define LEX_IS_SYMBOL_COMPONENT         1
  41 #define LEX_IS_WHITESPACE               2
  42 #define LEX_IS_LINE_SEPARATOR           3
  43 #define LEX_IS_COMMENT_START            4
  44 #define LEX_IS_LINE_COMMENT_START       5
  45 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  46 #define LEX_IS_TWOCHAR_COMMENT_2ND      7
  47 #define LEX_IS_STRINGQUOTE              8
  48 #define LEX_IS_COLON                    9
  49 #define LEX_IS_NEWLINE                  10
  50 #define LEX_IS_ONECHAR_QUOTE            11
  51 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  52 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  53 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  54 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  55 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  56 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  57
  58 static int process_escape PARAMS ((int));
  59
  60 /* FIXME-soon: The entire lexer/parser thingy should be
  61    built statically at compile time rather than dynamically
  62    each and every time the assembler is run.  xoxorich. */
  63
  64 void
  65 do_scrub_begin ()
  66 {
  67   const char *p;
  68
  69   lex[' '] = LEX_IS_WHITESPACE;
  70   lex['\t'] = LEX_IS_WHITESPACE;
  71   lex['\n'] = LEX_IS_NEWLINE;
  72   lex[';'] = LEX_IS_LINE_SEPARATOR;
  73   lex[':'] = LEX_IS_COLON;
  74
  75   if (! flag_m68k_mri)
  76     {
  77       lex['"'] = LEX_IS_STRINGQUOTE;
  78
  79 #ifndef TC_HPPA
  80       lex['\''] = LEX_IS_ONECHAR_QUOTE;
  81 #endif
  82
  83 #ifdef SINGLE_QUOTE_STRINGS
  84       lex['\''] = LEX_IS_STRINGQUOTE;
  85 #endif
  86     }
  87
  88   /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
  89      in state 5 of do_scrub_chars must be changed.  */
  90
  91   /* Note that these override the previous defaults, e.g. if ';' is a
  92      comment char, then it isn't a line separator.  */
  93   for (p = symbol_chars; *p; ++p)
  94     {
  95       lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
  96     }                           /* declare symbol characters */
  97
  98   for (p = comment_chars; *p; p++)
  99     {
 100       lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
 101     }                           /* declare comment chars */
 102
 103   for (p = line_comment_chars; *p; p++)
 104     {
 105       lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
 106     }                           /* declare line comment chars */
 107
 108   for (p = line_separator_chars; *p; p++)
 109     {
 110       lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
 111     }                           /* declare line separators */
 112
 113   /* Only allow slash-star comments if slash is not in use */
 114   if (lex['/'] == 0)
 115     {
 116       lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 117     }
 118   /* FIXME-soon.  This is a bad hack but otherwise, we can't do
 119      c-style comments when '/' is a line comment char. xoxorich. */
 120   if (lex['*'] == 0)
 121     {
 122       lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
 123     }
 124
 125   if (flag_m68k_mri)
 126     {
 127       lex['\''] = LEX_IS_STRINGQUOTE;
 128       lex[';'] = LEX_IS_COMMENT_START;
 129       lex['*'] = LEX_IS_LINE_COMMENT_START;
 130       /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
 131          then it can't be used in an expression.  */
 132       lex['!'] = LEX_IS_LINE_COMMENT_START;
 133     }
 134 }                               /* do_scrub_begin() */
 135
 136 /* Saved state of the scrubber */
 137 static int state;
 138 static int old_state;
 139 static char *out_string;
 140 static char out_buf[20];
 141 static int add_newlines;
 142 static char *saved_input;
 143 static int saved_input_len;
 144
 145 /* Data structure for saving the state of app across #include's.  Note that
 146    app is called asynchronously to the parsing of the .include's, so our
 147    state at the time .include is interpreted is completely unrelated.
 148    That's why we have to save it all.  */
 149
 150 struct app_save
 151   {
 152     int state;
 153     int old_state;
 154     char *out_string;
 155     char out_buf[sizeof (out_buf)];
 156     int add_newlines;
 157     char *saved_input;
 158     int saved_input_len;
 159   };
 160
 161 char *
 162 app_push ()
 163 {
 164   register struct app_save *saved;
 165
 166   saved = (struct app_save *) xmalloc (sizeof (*saved));
 167   saved->state = state;
 168   saved->old_state = old_state;
 169   saved->out_string = out_string;
 170   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
 171   saved->add_newlines = add_newlines;
 172   saved->saved_input = saved_input;
 173   saved->saved_input_len = saved_input_len;
 174
 175   /* do_scrub_begin() is not useful, just wastes time. */
 176
 177   state = 0;
 178   saved_input = NULL;
 179
 180   return (char *) saved;
 181 }
 182
 183 void
 184 app_pop (arg)
 185      char *arg;
 186 {
 187   register struct app_save *saved = (struct app_save *) arg;
 188
 189   /* There is no do_scrub_end (). */
 190   state = saved->state;
 191   old_state = saved->old_state;
 192   out_string = saved->out_string;
 193   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
 194   add_newlines = saved->add_newlines;
 195   saved_input = saved->saved_input;
 196   saved_input_len = saved->saved_input_len;
 197
 198   free (arg);
 199 }                               /* app_pop() */
 200
 201 /* @@ This assumes that \n &c are the same on host and target.  This is not
 202    necessarily true.  */
 203 static int
 204 process_escape (ch)
 205      int ch;
 206 {
 207   switch (ch)
 208     {
 209     case 'b':
 210       return '\b';
 211     case 'f':
 212       return '\f';
 213     case 'n':
 214       return '\n';
 215     case 'r':
 216       return '\r';
 217     case 't':
 218       return '\t';
 219     case '\'':
 220       return '\'';
 221     case '"':
 222       return '\"';
 223     default:
 224       return ch;
 225     }
 226 }
 227
 228 /* This function is called to process input characters.  The GET
 229    parameter is used to retrieve more input characters.  GET should
 230    set its parameter to point to a buffer, and return the length of
 231    the buffer; it should return 0 at end of file.  The scrubbed output
 232    characters are put into the buffer starting at TOSTART; the TOSTART
 233    buffer is TOLEN bytes in length.  The function returns the number
 234    of scrubbed characters put into TOSTART.  This will be TOLEN unless
 235    end of file was seen.  This function is arranged as a state
 236    machine, and saves its state so that it may return at any point.
 237    This is the way the old code used to work.  */
 238
 239 int
 240 do_scrub_chars (get, tostart, tolen)
 241      int (*get) PARAMS ((char **));
 242      char *tostart;
 243      int tolen;
 244 {
 245   char *to = tostart;
 246   char *toend = tostart + tolen;
 247   char *from;
 248   char *fromend;
 249   int fromlen;
 250   register int ch, ch2 = 0;
 251   int not_cpp_line = 0;
 252
 253   /*State 0: beginning of normal line
 254           1: After first whitespace on line (flush more white)
 255           2: After first non-white (opcode) on line (keep 1white)
 256           3: after second white on line (into operands) (flush white)
 257           4: after putting out a .line, put out digits
 258           5: parsing a string, then go to old-state
 259           6: putting out \ escape in a "d string.
 260           7: After putting out a .appfile, put out string.
 261           8: After putting out a .appfile string, flush until newline.
 262           9: After seeing symbol char in state 3 (keep 1white after symchar)
 263          10: After seeing whitespace in state 9 (keep white before symchar)
 264          11: After seeing a symbol character in state 0 (eg a label definition)
 265          -1: output string in out_string and go to the state in old_state
 266          -2: flush text until a '*' '/' is seen, then go to state old_state
 267           */
 268
 269   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
 270      constructs like ``.loc 1 20''.  This was turning into ``.loc
 271      120''.  States 9 and 10 ensure that a space is never dropped in
 272      between characters which could appear in a identifier.  Ian
 273      Taylor, ian@cygnus.com.
 274
 275      I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
 276      correctly on the PA (and any other target where colons are optional).
 277      Jeff Law, law@cs.utah.edu.  */
 278
 279   /* This macro gets the next input character.  */
 280
 281 #define GET()                           \
 282   (from < fromend                       \
 283    ? *from++                            \
 284    : ((saved_input != NULL              \
 285        ? (free (saved_input),           \
 286           saved_input = NULL,           \
 287           0)                            \
 288        : 0),                            \
 289       fromlen = (*get) (&from),         \
 290       fromend = from + fromlen,         \
 291       (fromlen == 0                     \
 292        ? EOF                            \
 293        : *from++)))
 294
 295   /* This macro pushes a character back on the input stream.  */
 296
 297 #define UNGET(uch) (*--from = (uch))
 298
 299   /* This macro puts a character into the output buffer.  If this
 300      character fills the output buffer, this macro jumps to the label
 301      TOFULL.  We use this rather ugly approach because we need to
 302      handle two different termination conditions: EOF on the input
 303      stream, and a full output buffer.  It would be simpler if we
 304      always read in the entire input stream before processing it, but
 305      I don't want to make such a significant change to the assembler's
 306      memory usage.  */
 307
 308 #define PUT(pch)                        \
 309   do                                    \
 310     {                                   \
 311       *to++ = (pch);                    \
 312       if (to >= toend)                  \
 313         goto tofull;                    \
 314     }                                   \
 315   while (0)
 316
 317   if (saved_input != NULL)
 318     {
 319       from = saved_input;
 320       fromend = from + saved_input_len;
 321     }
 322   else
 323     {
 324       fromlen = (*get) (&from);
 325       if (fromlen == 0)
 326         return 0;
 327       fromend = from + fromlen;
 328     }
 329
 330   while (1)
 331     {
 332       /* The cases in this switch end with continue, in order to
 333          branch back to the top of this while loop and generate the
 334          next output character in the appropriate state.  */
 335       switch (state)
 336         {
 337         case -1:
 338           ch = *out_string++;
 339           if (*out_string == '\0')
 340             {
 341               state = old_state;
 342               old_state = 3;
 343             }
 344           PUT (ch);
 345           continue;
 346
 347         case -2:
 348           for (;;)
 349             {
 350               do
 351                 {
 352                   ch = GET ();
 353
 354                   if (ch == EOF)
 355                     {
 356                       as_warn ("end of file in comment");
 357                       goto fromeof;
 358                     }
 359
 360                   if (ch == '\n')
 361                     PUT ('\n');
 362                 }
 363               while (ch != '*');
 364
 365               while ((ch = GET ()) == '*')
 366                 ;
 367
 368               if (ch == EOF)
 369                 {
 370                   as_warn ("end of file in comment");
 371                   goto fromeof;
 372                 }
 373
 374               if (ch == '/')
 375                 break;
 376
 377               UNGET (ch);
 378             }
 379
 380           state = old_state;
 381           PUT (' ');
 382           continue;
 383
 384         case 4:
 385           ch = GET ();
 386           if (ch == EOF)
 387             goto fromeof;
 388           else if (ch >= '0' && ch <= '9')
 389             PUT (ch);
 390           else
 391             {
 392               while (ch != EOF && IS_WHITESPACE (ch))
 393                 ch = GET ();
 394               if (ch == '"')
 395                 {
 396                   UNGET (ch);
 397                   if (flag_m68k_mri)
 398                     out_string = "\n\tappfile ";
 399                   else
 400                     out_string = "\n\t.appfile ";
 401                   old_state = 7;
 402                   state = -1;
 403                   PUT (*out_string++);
 404                 }
 405               else
 406                 {
 407                   while (ch != EOF && ch != '\n')
 408                     ch = GET ();
 409                   state = 0;
 410                   PUT (ch);
 411                 }
 412             }
 413           continue;
 414
 415         case 5:
 416           /* We are going to copy everything up to a quote character,
 417              with special handling for a backslash.  We try to
 418              optimize the copying in the simple case without using the
 419              GET and PUT macros.  */
 420           {
 421             char *s;
 422             int len;
 423
 424             for (s = from; s < fromend; s++)
 425               {
 426                 ch = *s;
 427                 /* This condition must be changed if the type of any
 428                    other character can be LEX_IS_STRINGQUOTE.  */
 429                 if (ch == '\\'
 430                     || ch == '"'
 431                     || ch == '\''
 432                     || ch == '\n')
 433                   break;
 434               }
 435             len = s - from;
 436             if (len > toend - to)
 437               len = toend - to;
 438             if (len > 0)
 439               {
 440                 memcpy (to, from, len);
 441                 to += len;
 442                 from += len;
 443               }
 444           }
 445
 446           ch = GET ();
 447           if (ch == EOF)
 448             {
 449               as_warn ("end of file in string: inserted '\"'");
 450               state = old_state;
 451               UNGET ('\n');
 452               PUT ('"');
 453             }
 454           else if (lex[ch] == LEX_IS_STRINGQUOTE)
 455             {
 456               state = old_state;
 457               PUT (ch);
 458             }
 459 #ifndef NO_STRING_ESCAPES
 460           else if (ch == '\\')
 461             {
 462               state = 6;
 463               PUT (ch);
 464             }
 465 #endif
 466           else if (flag_m68k_mri && ch == '\n')
 467             {
 468               /* Just quietly terminate the string.  This permits lines like
 469                    bne  label   loop if we haven't reach end yet
 470                  */
 471               state = old_state;
 472               UNGET (ch);
 473               PUT ('\'');
 474             }
 475           else
 476             {
 477               PUT (ch);
 478             }
 479           continue;
 480
 481         case 6:
 482           state = 5;
 483           ch = GET ();
 484           switch (ch)
 485             {
 486               /* Handle strings broken across lines, by turning '\n' into
 487                  '\\' and 'n'.  */
 488             case '\n':
 489               UNGET ('n');
 490               add_newlines++;
 491               PUT ('\\');
 492               continue;
 493
 494             case '"':
 495             case '\\':
 496             case 'b':
 497             case 'f':
 498             case 'n':
 499             case 'r':
 500             case 't':
 501             case 'v':
 502             case 'x':
 503             case 'X':
 504             case '0':
 505             case '1':
 506             case '2':
 507             case '3':
 508             case '4':
 509             case '5':
 510             case '6':
 511             case '7':
 512               break;
 513 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
 514             default:
 515               as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
 516               break;
 517 #else  /* ONLY_STANDARD_ESCAPES */
 518             default:
 519               /* Accept \x as x for any x */
 520               break;
 521 #endif /* ONLY_STANDARD_ESCAPES */
 522
 523             case EOF:
 524               as_warn ("End of file in string: '\"' inserted");
 525               PUT ('"');
 526               continue;
 527             }
 528           PUT (ch);
 529           continue;
 530
 531         case 7:
 532           ch = GET ();
 533           state = 5;
 534           old_state = 8;
 535           if (ch == EOF)
 536             goto fromeof;
 537           PUT (ch);
 538           continue;
 539
 540         case 8:
 541           do
 542             ch = GET ();
 543           while (ch != '\n' && ch != EOF);
 544           if (ch == EOF)
 545             goto fromeof;
 546           state = 0;
 547           PUT (ch);
 548           continue;
 549         }
 550
 551       /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
 552
 553       /* flushchar: */
 554       ch = GET ();
 555     recycle:
 556       if (ch == EOF)
 557         {
 558           if (state != 0)
 559             {
 560               as_warn ("end of file not at end of a line; newline inserted");
 561               state = 0;
 562               PUT ('\n');
 563             }
 564           goto fromeof;
 565         }
 566
 567       switch (lex[ch])
 568         {
 569         case LEX_IS_WHITESPACE:
 570           do
 571             {
 572               ch = GET ();
 573             }
 574           while (ch != EOF && IS_WHITESPACE (ch));
 575           if (ch == EOF)
 576             goto fromeof;
 577
 578           if (state == 0)
 579             {
 580               /* Preserve a single whitespace character at the
 581                  beginning of a line.  */
 582               state = 1;
 583               UNGET (ch);
 584               PUT (' ');
 585               break;
 586             }
 587
 588           if (IS_COMMENT (ch)
 589               || ch == '/'
 590               || IS_LINE_SEPARATOR (ch))
 591             {
 592               /* cpp never outputs a leading space before the #, so
 593                  try to avoid being confused.  */
 594               not_cpp_line = 1;
 595               if (flag_m68k_mri)
 596                 {
 597                   /* In MRI mode, we keep these spaces.  */
 598                   UNGET (ch);
 599                   PUT (' ');
 600                   break;
 601                 }
 602               goto recycle;
 603             }
 604
 605           /* If we're in state 2 or 11, we've seen a non-white
 606              character followed by whitespace.  If the next character
 607              is ':', this is whitespace after a label name which we
 608              normally must ignore.  In MRI mode, though, spaces are
 609              not permitted between the label and the colon.  */
 610           if ((state == 2 || state == 11)
 611               && lex[ch] == LEX_IS_COLON
 612               && ! flag_m68k_mri)
 613             {
 614               state = 1;
 615               PUT (ch);
 616               break;
 617             }
 618
 619           switch (state)
 620             {
 621             case 0:
 622               state++;
 623               goto recycle;     /* Punted leading sp */
 624             case 1:
 625               /* We can arrive here if we leave a leading whitespace
 626                  character at the beginning of a line.  */
 627               goto recycle;
 628             case 2:
 629               state = 3;
 630               if (to + 1 < toend)
 631                 {
 632                   /* Optimize common case by skipping UNGET/GET.  */
 633                   PUT (' ');    /* Sp after opco */
 634                   goto recycle;
 635                 }
 636               UNGET (ch);
 637               PUT (' ');
 638               break;
 639             case 3:
 640               if (flag_m68k_mri)
 641                 {
 642                   /* In MRI mode, we keep these spaces.  */
 643                   UNGET (ch);
 644                   PUT (' ');
 645                   break;
 646                 }
 647               goto recycle;     /* Sp in operands */
 648             case 9:
 649             case 10:
 650               if (flag_m68k_mri)
 651                 {
 652                   /* In MRI mode, we keep these spaces.  */
 653                   state = 3;
 654                   UNGET (ch);
 655                   PUT (' ');
 656                   break;
 657                 }
 658               state = 10;       /* Sp after symbol char */
 659               goto recycle;
 660             case 11:
 661               state = 1;
 662               UNGET (ch);
 663               PUT (' ');        /* Sp after label definition.  */
 664               break;
 665             default:
 666               BAD_CASE (state);
 667             }
 668           break;
 669
 670         case LEX_IS_TWOCHAR_COMMENT_1ST:
 671           ch2 = GET ();
 672           if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
 673             {
 674               for (;;)
 675                 {
 676                   do
 677                     {
 678                       ch2 = GET ();
 679                       if (ch2 != EOF && IS_NEWLINE (ch2))
 680                         add_newlines++;
 681                     }
 682                   while (ch2 != EOF &&
 683                          (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
 684
 685                   while (ch2 != EOF &&
 686                          (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
 687                     {
 688                       ch2 = GET ();
 689                     }
 690
 691                   if (ch2 == EOF
 692                       || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
 693                     break;
 694                   UNGET (ch);
 695                 }
 696               if (ch2 == EOF)
 697                 as_warn ("end of file in multiline comment");
 698
 699               ch = ' ';
 700               goto recycle;
 701             }
 702           else
 703             {
 704               if (ch2 != EOF)
 705                 UNGET (ch2);
 706               if (state == 9 || state == 10)
 707                 state = 3;
 708               PUT (ch);
 709             }
 710           break;
 711
 712         case LEX_IS_STRINGQUOTE:
 713           if (state == 10)
 714             {
 715               /* Preserve the whitespace in foo "bar" */
 716               UNGET (ch);
 717               state = 3;
 718               PUT (' ');
 719
 720               /* PUT didn't jump out.  We could just break, but we
 721                  know what will happen, so optimize a bit.  */
 722               ch = GET ();
 723               old_state = 3;
 724             }
 725           else if (state == 9)
 726             old_state = 3;
 727           else
 728             old_state = state;
 729           state = 5;
 730           PUT (ch);
 731           break;
 732
 733 #ifndef IEEE_STYLE
 734         case LEX_IS_ONECHAR_QUOTE:
 735           if (state == 10)
 736             {
 737               /* Preserve the whitespace in foo 'b' */
 738               UNGET (ch);
 739               state = 3;
 740               PUT (' ');
 741               break;
 742             }
 743           ch = GET ();
 744           if (ch == EOF)
 745             {
 746               as_warn ("end of file after a one-character quote; \\0 inserted");
 747               ch = 0;
 748             }
 749           if (ch == '\\')
 750             {
 751               ch = GET ();
 752               if (ch == EOF)
 753                 {
 754                   as_warn ("end of file in escape character");
 755                   ch = '\\';
 756                 }
 757               else
 758                 ch = process_escape (ch);
 759             }
 760           sprintf (out_buf, "%d", (int) (unsigned char) ch);
 761
 762           /* None of these 'x constants for us.  We want 'x'.  */
 763           if ((ch = GET ()) != '\'')
 764             {
 765 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 766               as_warn ("Missing close quote: (assumed)");
 767 #else
 768               if (ch != EOF)
 769                 UNGET (ch);
 770 #endif
 771             }
 772           if (strlen (out_buf) == 1)
 773             {
 774               PUT (out_buf[0]);
 775               break;
 776             }
 777           if (state == 9)
 778             old_state = 3;
 779           else
 780             old_state = state;
 781           state = -1;
 782           out_string = out_buf;
 783           PUT (*out_string++);
 784           break;
 785 #endif
 786
 787         case LEX_IS_COLON:
 788           if (state == 9 || state == 10)
 789             state = 3;
 790           else if (state != 3)
 791             state = 1;
 792           PUT (ch);
 793           break;
 794
 795         case LEX_IS_NEWLINE:
 796           /* Roll out a bunch of newlines from inside comments, etc.  */
 797           if (add_newlines)
 798             {
 799               --add_newlines;
 800               UNGET (ch);
 801             }
 802           /* fall thru into... */
 803
 804         case LEX_IS_LINE_SEPARATOR:
 805           state = 0;
 806           PUT (ch);
 807           break;
 808
 809         case LEX_IS_LINE_COMMENT_START:
 810           if (state == 0)       /* Only comment at start of line.  */
 811             {
 812               /* FIXME-someday: The two character comment stuff was
 813                  badly thought out.  On i386, we want '/' as line
 814                  comment start AND we want C style comments.  hence
 815                  this hack.  The whole lexical process should be
 816                  reworked.  xoxorich.  */
 817               if (ch == '/')
 818                 {
 819                   ch2 = GET ();
 820                   if (ch2 == '*')
 821                     {
 822                       state = -2;
 823                       break;
 824                     }
 825                   else
 826                     {
 827                       UNGET (ch2);
 828                     }
 829                 } /* bad hack */
 830
 831               if (ch != '#')
 832                 not_cpp_line = 1;
 833
 834               do
 835                 {
 836                   ch = GET ();
 837                 }
 838               while (ch != EOF && IS_WHITESPACE (ch));
 839               if (ch == EOF)
 840                 {
 841                   as_warn ("end of file in comment; newline inserted");
 842                   PUT ('\n');
 843                   break;
 844                 }
 845               if (ch < '0' || ch > '9' || not_cpp_line)
 846                 {
 847                   /* Non-numerics:  Eat whole comment line */
 848                   while (ch != EOF && !IS_NEWLINE (ch))
 849                     ch = GET ();
 850                   if (ch == EOF)
 851                     as_warn ("EOF in Comment: Newline inserted");
 852                   state = 0;
 853                   PUT ('\n');
 854                   break;
 855                 }
 856               /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
 857               UNGET (ch);
 858               old_state = 4;
 859               state = -1;
 860               if (flag_m68k_mri)
 861                 out_string = "\tappline ";
 862               else
 863                 out_string = "\t.appline ";
 864               PUT (*out_string++);
 865               break;
 866             }
 867
 868           /* We have a line comment character which is not at the
 869              start of a line.  If this is also a normal comment
 870              character, fall through.  Otherwise treat it as a default
 871              character.  */
 872           if (strchr (comment_chars, ch) == NULL
 873               && (! flag_m68k_mri
 874                   || (ch != '!' && ch != '*')))
 875             goto de_fault;
 876           if (flag_m68k_mri
 877               && (ch == '!' || ch == '*' || ch == '#')
 878               && state != 1
 879               && state != 10)
 880             goto de_fault;
 881           /* Fall through.  */
 882         case LEX_IS_COMMENT_START:
 883           do
 884             {
 885               ch = GET ();
 886             }
 887           while (ch != EOF && !IS_NEWLINE (ch));
 888           if (ch == EOF)
 889             as_warn ("end of file in comment; newline inserted");
 890           state = 0;
 891           PUT ('\n');
 892           break;
 893
 894         case LEX_IS_SYMBOL_COMPONENT:
 895           if (state == 10)
 896             {
 897               /* This is a symbol character following another symbol
 898                  character, with whitespace in between.  We skipped
 899                  the whitespace earlier, so output it now.  */
 900               UNGET (ch);
 901               state = 3;
 902               PUT (' ');
 903               break;
 904             }
 905
 906           if (state == 3)
 907             state = 9;
 908
 909           /* This is a common case.  Quickly copy CH and all the
 910              following symbol component or normal characters.  */
 911           if (to + 1 < toend)
 912             {
 913               char *s;
 914               int len;
 915
 916               for (s = from; s < fromend; s++)
 917                 {
 918                   int type;
 919
 920                   ch2 = *s;
 921                   type = lex[ch2];
 922                   if (type != 0
 923                       && type != LEX_IS_SYMBOL_COMPONENT)
 924                     break;
 925                 }
 926               if (s > from)
 927                 {
 928                   /* Handle the last character normally, for
 929                      simplicity.  */
 930                   --s;
 931                 }
 932               len = s - from;
 933               if (len > (toend - to) - 1)
 934                 len = (toend - to) - 1;
 935               if (len > 0)
 936                 {
 937                   PUT (ch);
 938                   if (len > 8)
 939                     {
 940                       memcpy (to, from, len);
 941                       to += len;
 942                       from += len;
 943                     }
 944                   else
 945                     {
 946                       switch (len)
 947                         {
 948                         case 8: *to++ = *from++;
 949                         case 7: *to++ = *from++;
 950                         case 6: *to++ = *from++;
 951                         case 5: *to++ = *from++;
 952                         case 4: *to++ = *from++;
 953                         case 3: *to++ = *from++;
 954                         case 2: *to++ = *from++;
 955                         case 1: *to++ = *from++;
 956                         }
 957                     }
 958                   ch = GET ();
 959                 }
 960             }
 961
 962           /* Fall through.  */
 963         default:
 964         de_fault:
 965           /* Some relatively `normal' character.  */
 966           if (state == 0)
 967             {
 968               state = 11;       /* Now seeing label definition */
 969             }
 970           else if (state == 1)
 971             {
 972               state = 2;        /* Ditto */
 973             }
 974           else if (state == 9)
 975             {
 976               if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
 977                 state = 3;
 978             }
 979           else if (state == 10)
 980             {
 981               state = 3;
 982             }
 983           PUT (ch);
 984           break;
 985         }
 986     }
 987
 988   /*NOTREACHED*/
 989
 990  fromeof:
 991   /* We have reached the end of the input.  */
 992   return to - tostart;
 993
 994  tofull:
 995   /* The output buffer is full.  Save any input we have not yet
 996      processed.  */
 997   if (fromend > from)
 998     {
 999       char *save;
1000
1001       save = (char *) xmalloc (fromend - from);
1002       memcpy (save, from, fromend - from);
1003       if (saved_input != NULL)
1004         free (saved_input);
1005       saved_input = save;
1006       saved_input_len = fromend - from;
1007     }
1008   else
1009     {
1010       if (saved_input != NULL)
1011         {
1012           free (saved_input);
1013           saved_input = NULL;
1014         }
1015     }
1016   return to - tostart;
1017 }
1018
1019 /* end of app.c */