]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blob - gas/app.c
Remove IEEE 695 object support
[thirdparty/binutils-gdb.git] / gas / app.c
1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987-2018 Free Software Foundation, Inc.
3
4 This file is part of GAS, the GNU Assembler.
5
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GAS is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19 02110-1301, USA. */
20
21 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
22 /* App, the assembler pre-processor. This pre-processor strips out
23 excess spaces, turns single-quoted characters into a decimal
24 constant, and turns the # in # <number> <filename> <garbage> into a
25 .linefile. This needs better error-handling. */
26
27 #include "as.h"
28
29 #if (__STDC__ != 1)
30 #ifndef const
31 #define const /* empty */
32 #endif
33 #endif
34
35 #ifdef H_TICK_HEX
36 int enable_h_tick_hex = 0;
37 #endif
38
39 #ifdef TC_M68K
40 /* Whether we are scrubbing in m68k MRI mode. This is different from
41 flag_m68k_mri, because the two flags will be affected by the .mri
42 pseudo-op at different times. */
43 static int scrub_m68k_mri;
44
45 /* The pseudo-op which switches in and out of MRI mode. See the
46 comment in do_scrub_chars. */
47 static const char mri_pseudo[] = ".mri 0";
48 #else
49 #define scrub_m68k_mri 0
50 #endif
51
52 #if defined TC_ARM && defined OBJ_ELF
53 /* The pseudo-op for which we need to special-case `@' characters.
54 See the comment in do_scrub_chars. */
55 static const char symver_pseudo[] = ".symver";
56 static const char * symver_state;
57 #endif
58 #ifdef TC_ARM
59 static char last_char;
60 #endif
61
62 static char lex[256];
63 static const char symbol_chars[] =
64 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
65
66 #define LEX_IS_SYMBOL_COMPONENT 1
67 #define LEX_IS_WHITESPACE 2
68 #define LEX_IS_LINE_SEPARATOR 3
69 #define LEX_IS_COMMENT_START 4
70 #define LEX_IS_LINE_COMMENT_START 5
71 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
72 #define LEX_IS_STRINGQUOTE 8
73 #define LEX_IS_COLON 9
74 #define LEX_IS_NEWLINE 10
75 #define LEX_IS_ONECHAR_QUOTE 11
76 #ifdef TC_V850
77 #define LEX_IS_DOUBLEDASH_1ST 12
78 #endif
79 #ifdef TC_M32R
80 #define DOUBLEBAR_PARALLEL
81 #endif
82 #ifdef DOUBLEBAR_PARALLEL
83 #define LEX_IS_DOUBLEBAR_1ST 13
84 #endif
85 #define LEX_IS_PARALLEL_SEPARATOR 14
86 #ifdef H_TICK_HEX
87 #define LEX_IS_H 15
88 #endif
89 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
90 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
91 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
92 #define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
93 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
94 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
95 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
96
97 static int process_escape (int);
98
99 /* FIXME-soon: The entire lexer/parser thingy should be
100 built statically at compile time rather than dynamically
101 each and every time the assembler is run. xoxorich. */
102
103 void
104 do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
105 {
106 const char *p;
107 int c;
108
109 lex[' '] = LEX_IS_WHITESPACE;
110 lex['\t'] = LEX_IS_WHITESPACE;
111 lex['\r'] = LEX_IS_WHITESPACE;
112 lex['\n'] = LEX_IS_NEWLINE;
113 lex[':'] = LEX_IS_COLON;
114
115 #ifdef TC_M68K
116 scrub_m68k_mri = m68k_mri;
117
118 if (! m68k_mri)
119 #endif
120 {
121 lex['"'] = LEX_IS_STRINGQUOTE;
122
123 #if ! defined (TC_HPPA) && ! defined (TC_I370)
124 /* I370 uses single-quotes to delimit integer, float constants. */
125 lex['\''] = LEX_IS_ONECHAR_QUOTE;
126 #endif
127
128 #ifdef SINGLE_QUOTE_STRINGS
129 lex['\''] = LEX_IS_STRINGQUOTE;
130 #endif
131 }
132
133 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
134 in state 5 of do_scrub_chars must be changed. */
135
136 /* Note that these override the previous defaults, e.g. if ';' is a
137 comment char, then it isn't a line separator. */
138 for (p = symbol_chars; *p; ++p)
139 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
140
141 for (c = 128; c < 256; ++c)
142 lex[c] = LEX_IS_SYMBOL_COMPONENT;
143
144 #ifdef tc_symbol_chars
145 /* This macro permits the processor to specify all characters which
146 may appears in an operand. This will prevent the scrubber from
147 discarding meaningful whitespace in certain cases. The i386
148 backend uses this to support prefixes, which can confuse the
149 scrubber as to whether it is parsing operands or opcodes. */
150 for (p = tc_symbol_chars; *p; ++p)
151 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
152 #endif
153
154 /* The m68k backend wants to be able to change comment_chars. */
155 #ifndef tc_comment_chars
156 #define tc_comment_chars comment_chars
157 #endif
158 for (p = tc_comment_chars; *p; p++)
159 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
160
161 for (p = line_comment_chars; *p; p++)
162 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
163
164 #ifndef tc_line_separator_chars
165 #define tc_line_separator_chars line_separator_chars
166 #endif
167 for (p = tc_line_separator_chars; *p; p++)
168 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
169
170 #ifdef tc_parallel_separator_chars
171 /* This macro permits the processor to specify all characters which
172 separate parallel insns on the same line. */
173 for (p = tc_parallel_separator_chars; *p; p++)
174 lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
175 #endif
176
177 /* Only allow slash-star comments if slash is not in use.
178 FIXME: This isn't right. We should always permit them. */
179 if (lex['/'] == 0)
180 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
181
182 #ifdef TC_M68K
183 if (m68k_mri)
184 {
185 lex['\''] = LEX_IS_STRINGQUOTE;
186 lex[';'] = LEX_IS_COMMENT_START;
187 lex['*'] = LEX_IS_LINE_COMMENT_START;
188 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
189 then it can't be used in an expression. */
190 lex['!'] = LEX_IS_LINE_COMMENT_START;
191 }
192 #endif
193
194 #ifdef TC_V850
195 lex['-'] = LEX_IS_DOUBLEDASH_1ST;
196 #endif
197 #ifdef DOUBLEBAR_PARALLEL
198 lex['|'] = LEX_IS_DOUBLEBAR_1ST;
199 #endif
200 #ifdef TC_D30V
201 /* Must do this is we want VLIW instruction with "->" or "<-". */
202 lex['-'] = LEX_IS_SYMBOL_COMPONENT;
203 #endif
204
205 #ifdef H_TICK_HEX
206 if (enable_h_tick_hex)
207 {
208 lex['h'] = LEX_IS_H;
209 lex['H'] = LEX_IS_H;
210 }
211 #endif
212 }
213
214 /* Saved state of the scrubber. */
215 static int state;
216 static int old_state;
217 static const char *out_string;
218 static char out_buf[20];
219 static int add_newlines;
220 static char *saved_input;
221 static size_t saved_input_len;
222 static char input_buffer[32 * 1024];
223 static const char *mri_state;
224 static char mri_last_ch;
225
226 /* Data structure for saving the state of app across #include's. Note that
227 app is called asynchronously to the parsing of the .include's, so our
228 state at the time .include is interpreted is completely unrelated.
229 That's why we have to save it all. */
230
231 struct app_save
232 {
233 int state;
234 int old_state;
235 const char * out_string;
236 char out_buf[sizeof (out_buf)];
237 int add_newlines;
238 char * saved_input;
239 size_t saved_input_len;
240 #ifdef TC_M68K
241 int scrub_m68k_mri;
242 #endif
243 const char * mri_state;
244 char mri_last_ch;
245 #if defined TC_ARM && defined OBJ_ELF
246 const char * symver_state;
247 #endif
248 #ifdef TC_ARM
249 char last_char;
250 #endif
251 };
252
253 char *
254 app_push (void)
255 {
256 struct app_save *saved;
257
258 saved = XNEW (struct app_save);
259 saved->state = state;
260 saved->old_state = old_state;
261 saved->out_string = out_string;
262 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
263 saved->add_newlines = add_newlines;
264 if (saved_input == NULL)
265 saved->saved_input = NULL;
266 else
267 {
268 saved->saved_input = XNEWVEC (char, saved_input_len);
269 memcpy (saved->saved_input, saved_input, saved_input_len);
270 saved->saved_input_len = saved_input_len;
271 }
272 #ifdef TC_M68K
273 saved->scrub_m68k_mri = scrub_m68k_mri;
274 #endif
275 saved->mri_state = mri_state;
276 saved->mri_last_ch = mri_last_ch;
277 #if defined TC_ARM && defined OBJ_ELF
278 saved->symver_state = symver_state;
279 #endif
280 #ifdef TC_ARM
281 saved->last_char = last_char;
282 #endif
283
284 /* do_scrub_begin() is not useful, just wastes time. */
285
286 state = 0;
287 saved_input = NULL;
288 add_newlines = 0;
289
290 return (char *) saved;
291 }
292
293 void
294 app_pop (char *arg)
295 {
296 struct app_save *saved = (struct app_save *) arg;
297
298 /* There is no do_scrub_end (). */
299 state = saved->state;
300 old_state = saved->old_state;
301 out_string = saved->out_string;
302 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
303 add_newlines = saved->add_newlines;
304 if (saved->saved_input == NULL)
305 saved_input = NULL;
306 else
307 {
308 gas_assert (saved->saved_input_len <= sizeof (input_buffer));
309 memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
310 saved_input = input_buffer;
311 saved_input_len = saved->saved_input_len;
312 free (saved->saved_input);
313 }
314 #ifdef TC_M68K
315 scrub_m68k_mri = saved->scrub_m68k_mri;
316 #endif
317 mri_state = saved->mri_state;
318 mri_last_ch = saved->mri_last_ch;
319 #if defined TC_ARM && defined OBJ_ELF
320 symver_state = saved->symver_state;
321 #endif
322 #ifdef TC_ARM
323 last_char = saved->last_char;
324 #endif
325
326 free (arg);
327 }
328
329 /* @@ This assumes that \n &c are the same on host and target. This is not
330 necessarily true. */
331
332 static int
333 process_escape (int ch)
334 {
335 switch (ch)
336 {
337 case 'b':
338 return '\b';
339 case 'f':
340 return '\f';
341 case 'n':
342 return '\n';
343 case 'r':
344 return '\r';
345 case 't':
346 return '\t';
347 case '\'':
348 return '\'';
349 case '"':
350 return '\"';
351 default:
352 return ch;
353 }
354 }
355
356 /* This function is called to process input characters. The GET
357 parameter is used to retrieve more input characters. GET should
358 set its parameter to point to a buffer, and return the length of
359 the buffer; it should return 0 at end of file. The scrubbed output
360 characters are put into the buffer starting at TOSTART; the TOSTART
361 buffer is TOLEN bytes in length. The function returns the number
362 of scrubbed characters put into TOSTART. This will be TOLEN unless
363 end of file was seen. This function is arranged as a state
364 machine, and saves its state so that it may return at any point.
365 This is the way the old code used to work. */
366
367 size_t
368 do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
369 {
370 char *to = tostart;
371 char *toend = tostart + tolen;
372 char *from;
373 char *fromend;
374 size_t fromlen;
375 int ch, ch2 = 0;
376 /* Character that started the string we're working on. */
377 static char quotechar;
378
379 /*State 0: beginning of normal line
380 1: After first whitespace on line (flush more white)
381 2: After first non-white (opcode) on line (keep 1white)
382 3: after second white on line (into operands) (flush white)
383 4: after putting out a .linefile, put out digits
384 5: parsing a string, then go to old-state
385 6: putting out \ escape in a "d string.
386 7: no longer used
387 8: no longer used
388 9: After seeing symbol char in state 3 (keep 1white after symchar)
389 10: After seeing whitespace in state 9 (keep white before symchar)
390 11: After seeing a symbol character in state 0 (eg a label definition)
391 -1: output string in out_string and go to the state in old_state
392 -2: flush text until a '*' '/' is seen, then go to state old_state
393 #ifdef TC_V850
394 12: After seeing a dash, looking for a second dash as a start
395 of comment.
396 #endif
397 #ifdef DOUBLEBAR_PARALLEL
398 13: After seeing a vertical bar, looking for a second
399 vertical bar as a parallel expression separator.
400 #endif
401 #ifdef TC_PREDICATE_START_CHAR
402 14: After seeing a predicate start character at state 0, looking
403 for a predicate end character as predicate.
404 15: After seeing a predicate start character at state 1, looking
405 for a predicate end character as predicate.
406 #endif
407 #ifdef TC_Z80
408 16: After seeing an 'a' or an 'A' at the start of a symbol
409 17: After seeing an 'f' or an 'F' in state 16
410 #endif
411 */
412
413 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
414 constructs like ``.loc 1 20''. This was turning into ``.loc
415 120''. States 9 and 10 ensure that a space is never dropped in
416 between characters which could appear in an identifier. Ian
417 Taylor, ian@cygnus.com.
418
419 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
420 correctly on the PA (and any other target where colons are optional).
421 Jeff Law, law@cs.utah.edu.
422
423 I added state 13 so that something like "cmp r1, r2 || trap #1" does not
424 get squashed into "cmp r1,r2||trap#1", with the all important space
425 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
426
427 /* This macro gets the next input character. */
428
429 #define GET() \
430 (from < fromend \
431 ? * (unsigned char *) (from++) \
432 : (saved_input = NULL, \
433 fromlen = (*get) (input_buffer, sizeof input_buffer), \
434 from = input_buffer, \
435 fromend = from + fromlen, \
436 (fromlen == 0 \
437 ? EOF \
438 : * (unsigned char *) (from++))))
439
440 /* This macro pushes a character back on the input stream. */
441
442 #define UNGET(uch) (*--from = (uch))
443
444 /* This macro puts a character into the output buffer. If this
445 character fills the output buffer, this macro jumps to the label
446 TOFULL. We use this rather ugly approach because we need to
447 handle two different termination conditions: EOF on the input
448 stream, and a full output buffer. It would be simpler if we
449 always read in the entire input stream before processing it, but
450 I don't want to make such a significant change to the assembler's
451 memory usage. */
452
453 #define PUT(pch) \
454 do \
455 { \
456 *to++ = (pch); \
457 if (to >= toend) \
458 goto tofull; \
459 } \
460 while (0)
461
462 if (saved_input != NULL)
463 {
464 from = saved_input;
465 fromend = from + saved_input_len;
466 }
467 else
468 {
469 fromlen = (*get) (input_buffer, sizeof input_buffer);
470 if (fromlen == 0)
471 return 0;
472 from = input_buffer;
473 fromend = from + fromlen;
474 }
475
476 while (1)
477 {
478 /* The cases in this switch end with continue, in order to
479 branch back to the top of this while loop and generate the
480 next output character in the appropriate state. */
481 switch (state)
482 {
483 case -1:
484 ch = *out_string++;
485 if (*out_string == '\0')
486 {
487 state = old_state;
488 old_state = 3;
489 }
490 PUT (ch);
491 continue;
492
493 case -2:
494 for (;;)
495 {
496 do
497 {
498 ch = GET ();
499
500 if (ch == EOF)
501 {
502 as_warn (_("end of file in comment"));
503 goto fromeof;
504 }
505
506 if (ch == '\n')
507 PUT ('\n');
508 }
509 while (ch != '*');
510
511 while ((ch = GET ()) == '*')
512 ;
513
514 if (ch == EOF)
515 {
516 as_warn (_("end of file in comment"));
517 goto fromeof;
518 }
519
520 if (ch == '/')
521 break;
522
523 UNGET (ch);
524 }
525
526 state = old_state;
527 UNGET (' ');
528 continue;
529
530 case 4:
531 ch = GET ();
532 if (ch == EOF)
533 goto fromeof;
534 else if (ch >= '0' && ch <= '9')
535 PUT (ch);
536 else
537 {
538 while (ch != EOF && IS_WHITESPACE (ch))
539 ch = GET ();
540 if (ch == '"')
541 {
542 quotechar = ch;
543 state = 5;
544 old_state = 3;
545 PUT (ch);
546 }
547 else
548 {
549 while (ch != EOF && ch != '\n')
550 ch = GET ();
551 state = 0;
552 PUT (ch);
553 }
554 }
555 continue;
556
557 case 5:
558 /* We are going to copy everything up to a quote character,
559 with special handling for a backslash. We try to
560 optimize the copying in the simple case without using the
561 GET and PUT macros. */
562 {
563 char *s;
564 ptrdiff_t len;
565
566 for (s = from; s < fromend; s++)
567 {
568 ch = *s;
569 if (ch == '\\'
570 || ch == quotechar
571 || ch == '\n')
572 break;
573 }
574 len = s - from;
575 if (len > toend - to)
576 len = toend - to;
577 if (len > 0)
578 {
579 memcpy (to, from, len);
580 to += len;
581 from += len;
582 if (to >= toend)
583 goto tofull;
584 }
585 }
586
587 ch = GET ();
588 if (ch == EOF)
589 {
590 /* This buffer is here specifically so
591 that the UNGET below will work. */
592 static char one_char_buf[1];
593
594 as_warn (_("end of file in string; '%c' inserted"), quotechar);
595 state = old_state;
596 from = fromend = one_char_buf + 1;
597 fromlen = 1;
598 UNGET ('\n');
599 PUT (quotechar);
600 }
601 else if (ch == quotechar)
602 {
603 state = old_state;
604 PUT (ch);
605 }
606 #ifndef NO_STRING_ESCAPES
607 else if (ch == '\\')
608 {
609 state = 6;
610 PUT (ch);
611 }
612 #endif
613 else if (scrub_m68k_mri && ch == '\n')
614 {
615 /* Just quietly terminate the string. This permits lines like
616 bne label loop if we haven't reach end yet. */
617 state = old_state;
618 UNGET (ch);
619 PUT ('\'');
620 }
621 else
622 {
623 PUT (ch);
624 }
625 continue;
626
627 case 6:
628 state = 5;
629 ch = GET ();
630 switch (ch)
631 {
632 /* Handle strings broken across lines, by turning '\n' into
633 '\\' and 'n'. */
634 case '\n':
635 UNGET ('n');
636 add_newlines++;
637 PUT ('\\');
638 continue;
639
640 case EOF:
641 as_warn (_("end of file in string; '%c' inserted"), quotechar);
642 PUT (quotechar);
643 continue;
644
645 case '"':
646 case '\\':
647 case 'b':
648 case 'f':
649 case 'n':
650 case 'r':
651 case 't':
652 case 'v':
653 case 'x':
654 case 'X':
655 case '0':
656 case '1':
657 case '2':
658 case '3':
659 case '4':
660 case '5':
661 case '6':
662 case '7':
663 break;
664
665 default:
666 #ifdef ONLY_STANDARD_ESCAPES
667 as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
668 #endif
669 break;
670 }
671 PUT (ch);
672 continue;
673
674 #ifdef DOUBLEBAR_PARALLEL
675 case 13:
676 ch = GET ();
677 if (ch != '|')
678 abort ();
679
680 /* Reset back to state 1 and pretend that we are parsing a
681 line from just after the first white space. */
682 state = 1;
683 PUT ('|');
684 #ifdef TC_TIC6X
685 /* "||^" is used for SPMASKed instructions. */
686 ch = GET ();
687 if (ch == EOF)
688 goto fromeof;
689 else if (ch == '^')
690 PUT ('^');
691 else
692 UNGET (ch);
693 #endif
694 continue;
695 #endif
696 #ifdef TC_Z80
697 case 16:
698 /* We have seen an 'a' at the start of a symbol, look for an 'f'. */
699 ch = GET ();
700 if (ch == 'f' || ch == 'F')
701 {
702 state = 17;
703 PUT (ch);
704 }
705 else
706 {
707 state = 9;
708 break;
709 }
710 /* Fall through. */
711 case 17:
712 /* We have seen "af" at the start of a symbol,
713 a ' here is a part of that symbol. */
714 ch = GET ();
715 state = 9;
716 if (ch == '\'')
717 /* Change to avoid warning about unclosed string. */
718 PUT ('`');
719 else if (ch != EOF)
720 UNGET (ch);
721 break;
722 #endif
723 }
724
725 /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
726
727 /* flushchar: */
728 ch = GET ();
729
730 #ifdef TC_PREDICATE_START_CHAR
731 if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
732 {
733 state += 14;
734 PUT (ch);
735 continue;
736 }
737 else if (state == 14 || state == 15)
738 {
739 if (ch == TC_PREDICATE_END_CHAR)
740 {
741 state -= 14;
742 PUT (ch);
743 ch = GET ();
744 }
745 else
746 {
747 PUT (ch);
748 continue;
749 }
750 }
751 #endif
752
753 recycle:
754
755 #if defined TC_ARM && defined OBJ_ELF
756 /* We need to watch out for .symver directives. See the comment later
757 in this function. */
758 if (symver_state == NULL)
759 {
760 if ((state == 0 || state == 1) && ch == symver_pseudo[0])
761 symver_state = symver_pseudo + 1;
762 }
763 else
764 {
765 /* We advance to the next state if we find the right
766 character. */
767 if (ch != '\0' && (*symver_state == ch))
768 ++symver_state;
769 else if (*symver_state != '\0')
770 /* We did not get the expected character, or we didn't
771 get a valid terminating character after seeing the
772 entire pseudo-op, so we must go back to the beginning. */
773 symver_state = NULL;
774 else
775 {
776 /* We've read the entire pseudo-op. If this is the end
777 of the line, go back to the beginning. */
778 if (IS_NEWLINE (ch))
779 symver_state = NULL;
780 }
781 }
782 #endif /* TC_ARM && OBJ_ELF */
783
784 #ifdef TC_M68K
785 /* We want to have pseudo-ops which control whether we are in
786 MRI mode or not. Unfortunately, since m68k MRI mode affects
787 the scrubber, that means that we need a special purpose
788 recognizer here. */
789 if (mri_state == NULL)
790 {
791 if ((state == 0 || state == 1)
792 && ch == mri_pseudo[0])
793 mri_state = mri_pseudo + 1;
794 }
795 else
796 {
797 /* We advance to the next state if we find the right
798 character, or if we need a space character and we get any
799 whitespace character, or if we need a '0' and we get a
800 '1' (this is so that we only need one state to handle
801 ``.mri 0'' and ``.mri 1''). */
802 if (ch != '\0'
803 && (*mri_state == ch
804 || (*mri_state == ' '
805 && lex[ch] == LEX_IS_WHITESPACE)
806 || (*mri_state == '0'
807 && ch == '1')))
808 {
809 mri_last_ch = ch;
810 ++mri_state;
811 }
812 else if (*mri_state != '\0'
813 || (lex[ch] != LEX_IS_WHITESPACE
814 && lex[ch] != LEX_IS_NEWLINE))
815 {
816 /* We did not get the expected character, or we didn't
817 get a valid terminating character after seeing the
818 entire pseudo-op, so we must go back to the
819 beginning. */
820 mri_state = NULL;
821 }
822 else
823 {
824 /* We've read the entire pseudo-op. mips_last_ch is
825 either '0' or '1' indicating whether to enter or
826 leave MRI mode. */
827 do_scrub_begin (mri_last_ch == '1');
828 mri_state = NULL;
829
830 /* We continue handling the character as usual. The
831 main gas reader must also handle the .mri pseudo-op
832 to control expression parsing and the like. */
833 }
834 }
835 #endif
836
837 if (ch == EOF)
838 {
839 if (state != 0)
840 {
841 as_warn (_("end of file not at end of a line; newline inserted"));
842 state = 0;
843 PUT ('\n');
844 }
845 goto fromeof;
846 }
847
848 switch (lex[ch])
849 {
850 case LEX_IS_WHITESPACE:
851 do
852 {
853 ch = GET ();
854 }
855 while (ch != EOF && IS_WHITESPACE (ch));
856 if (ch == EOF)
857 goto fromeof;
858
859 if (state == 0)
860 {
861 /* Preserve a single whitespace character at the
862 beginning of a line. */
863 state = 1;
864 UNGET (ch);
865 PUT (' ');
866 break;
867 }
868
869 #ifdef KEEP_WHITE_AROUND_COLON
870 if (lex[ch] == LEX_IS_COLON)
871 {
872 /* Only keep this white if there's no white *after* the
873 colon. */
874 ch2 = GET ();
875 if (ch2 != EOF)
876 UNGET (ch2);
877 if (!IS_WHITESPACE (ch2))
878 {
879 state = 9;
880 UNGET (ch);
881 PUT (' ');
882 break;
883 }
884 }
885 #endif
886 if (IS_COMMENT (ch)
887 || ch == '/'
888 || IS_LINE_SEPARATOR (ch)
889 || IS_PARALLEL_SEPARATOR (ch))
890 {
891 if (scrub_m68k_mri)
892 {
893 /* In MRI mode, we keep these spaces. */
894 UNGET (ch);
895 PUT (' ');
896 break;
897 }
898 goto recycle;
899 }
900
901 /* If we're in state 2 or 11, we've seen a non-white
902 character followed by whitespace. If the next character
903 is ':', this is whitespace after a label name which we
904 normally must ignore. In MRI mode, though, spaces are
905 not permitted between the label and the colon. */
906 if ((state == 2 || state == 11)
907 && lex[ch] == LEX_IS_COLON
908 && ! scrub_m68k_mri)
909 {
910 state = 1;
911 PUT (ch);
912 break;
913 }
914
915 switch (state)
916 {
917 case 1:
918 /* We can arrive here if we leave a leading whitespace
919 character at the beginning of a line. */
920 goto recycle;
921 case 2:
922 state = 3;
923 if (to + 1 < toend)
924 {
925 /* Optimize common case by skipping UNGET/GET. */
926 PUT (' '); /* Sp after opco */
927 goto recycle;
928 }
929 UNGET (ch);
930 PUT (' ');
931 break;
932 case 3:
933 #ifndef TC_KEEP_OPERAND_SPACES
934 /* For TI C6X, we keep these spaces as they may separate
935 functional unit specifiers from operands. */
936 if (scrub_m68k_mri)
937 #endif
938 {
939 /* In MRI mode, we keep these spaces. */
940 UNGET (ch);
941 PUT (' ');
942 break;
943 }
944 goto recycle; /* Sp in operands */
945 case 9:
946 case 10:
947 #ifndef TC_KEEP_OPERAND_SPACES
948 if (scrub_m68k_mri)
949 #endif
950 {
951 /* In MRI mode, we keep these spaces. */
952 state = 3;
953 UNGET (ch);
954 PUT (' ');
955 break;
956 }
957 state = 10; /* Sp after symbol char */
958 goto recycle;
959 case 11:
960 if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
961 state = 1;
962 else
963 {
964 /* We know that ch is not ':', since we tested that
965 case above. Therefore this is not a label, so it
966 must be the opcode, and we've just seen the
967 whitespace after it. */
968 state = 3;
969 }
970 UNGET (ch);
971 PUT (' '); /* Sp after label definition. */
972 break;
973 default:
974 BAD_CASE (state);
975 }
976 break;
977
978 case LEX_IS_TWOCHAR_COMMENT_1ST:
979 ch2 = GET ();
980 if (ch2 == '*')
981 {
982 for (;;)
983 {
984 do
985 {
986 ch2 = GET ();
987 if (ch2 != EOF && IS_NEWLINE (ch2))
988 add_newlines++;
989 }
990 while (ch2 != EOF && ch2 != '*');
991
992 while (ch2 == '*')
993 ch2 = GET ();
994
995 if (ch2 == EOF || ch2 == '/')
996 break;
997
998 /* This UNGET will ensure that we count newlines
999 correctly. */
1000 UNGET (ch2);
1001 }
1002
1003 if (ch2 == EOF)
1004 as_warn (_("end of file in multiline comment"));
1005
1006 ch = ' ';
1007 goto recycle;
1008 }
1009 #ifdef DOUBLESLASH_LINE_COMMENTS
1010 else if (ch2 == '/')
1011 {
1012 do
1013 {
1014 ch = GET ();
1015 }
1016 while (ch != EOF && !IS_NEWLINE (ch));
1017 if (ch == EOF)
1018 as_warn ("end of file in comment; newline inserted");
1019 state = 0;
1020 PUT ('\n');
1021 break;
1022 }
1023 #endif
1024 else
1025 {
1026 if (ch2 != EOF)
1027 UNGET (ch2);
1028 if (state == 9 || state == 10)
1029 state = 3;
1030 PUT (ch);
1031 }
1032 break;
1033
1034 case LEX_IS_STRINGQUOTE:
1035 quotechar = ch;
1036 if (state == 10)
1037 {
1038 /* Preserve the whitespace in foo "bar". */
1039 UNGET (ch);
1040 state = 3;
1041 PUT (' ');
1042
1043 /* PUT didn't jump out. We could just break, but we
1044 know what will happen, so optimize a bit. */
1045 ch = GET ();
1046 old_state = 3;
1047 }
1048 else if (state == 9)
1049 old_state = 3;
1050 else
1051 old_state = state;
1052 state = 5;
1053 PUT (ch);
1054 break;
1055
1056 case LEX_IS_ONECHAR_QUOTE:
1057 #ifdef H_TICK_HEX
1058 if (state == 9 && enable_h_tick_hex)
1059 {
1060 char c;
1061
1062 c = GET ();
1063 as_warn ("'%c found after symbol", c);
1064 UNGET (c);
1065 }
1066 #endif
1067 if (state == 10)
1068 {
1069 /* Preserve the whitespace in foo 'b'. */
1070 UNGET (ch);
1071 state = 3;
1072 PUT (' ');
1073 break;
1074 }
1075 ch = GET ();
1076 if (ch == EOF)
1077 {
1078 as_warn (_("end of file after a one-character quote; \\0 inserted"));
1079 ch = 0;
1080 }
1081 if (ch == '\\')
1082 {
1083 ch = GET ();
1084 if (ch == EOF)
1085 {
1086 as_warn (_("end of file in escape character"));
1087 ch = '\\';
1088 }
1089 else
1090 ch = process_escape (ch);
1091 }
1092 sprintf (out_buf, "%d", (int) (unsigned char) ch);
1093
1094 /* None of these 'x constants for us. We want 'x'. */
1095 if ((ch = GET ()) != '\'')
1096 {
1097 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1098 as_warn (_("missing close quote; (assumed)"));
1099 #else
1100 if (ch != EOF)
1101 UNGET (ch);
1102 #endif
1103 }
1104 if (strlen (out_buf) == 1)
1105 {
1106 PUT (out_buf[0]);
1107 break;
1108 }
1109 if (state == 9)
1110 old_state = 3;
1111 else
1112 old_state = state;
1113 state = -1;
1114 out_string = out_buf;
1115 PUT (*out_string++);
1116 break;
1117
1118 case LEX_IS_COLON:
1119 #ifdef KEEP_WHITE_AROUND_COLON
1120 state = 9;
1121 #else
1122 if (state == 9 || state == 10)
1123 state = 3;
1124 else if (state != 3)
1125 state = 1;
1126 #endif
1127 PUT (ch);
1128 break;
1129
1130 case LEX_IS_NEWLINE:
1131 /* Roll out a bunch of newlines from inside comments, etc. */
1132 if (add_newlines)
1133 {
1134 --add_newlines;
1135 UNGET (ch);
1136 }
1137 /* Fall through. */
1138
1139 case LEX_IS_LINE_SEPARATOR:
1140 state = 0;
1141 PUT (ch);
1142 break;
1143
1144 case LEX_IS_PARALLEL_SEPARATOR:
1145 state = 1;
1146 PUT (ch);
1147 break;
1148
1149 #ifdef TC_V850
1150 case LEX_IS_DOUBLEDASH_1ST:
1151 ch2 = GET ();
1152 if (ch2 != '-')
1153 {
1154 if (ch2 != EOF)
1155 UNGET (ch2);
1156 goto de_fault;
1157 }
1158 /* Read and skip to end of line. */
1159 do
1160 {
1161 ch = GET ();
1162 }
1163 while (ch != EOF && ch != '\n');
1164
1165 if (ch == EOF)
1166 as_warn (_("end of file in comment; newline inserted"));
1167
1168 state = 0;
1169 PUT ('\n');
1170 break;
1171 #endif
1172 #ifdef DOUBLEBAR_PARALLEL
1173 case LEX_IS_DOUBLEBAR_1ST:
1174 ch2 = GET ();
1175 if (ch2 != EOF)
1176 UNGET (ch2);
1177 if (ch2 != '|')
1178 goto de_fault;
1179
1180 /* Handle '||' in two states as invoking PUT twice might
1181 result in the first one jumping out of this loop. We'd
1182 then lose track of the state and one '|' char. */
1183 state = 13;
1184 PUT ('|');
1185 break;
1186 #endif
1187 case LEX_IS_LINE_COMMENT_START:
1188 /* FIXME-someday: The two character comment stuff was badly
1189 thought out. On i386, we want '/' as line comment start
1190 AND we want C style comments. hence this hack. The
1191 whole lexical process should be reworked. xoxorich. */
1192 if (ch == '/')
1193 {
1194 ch2 = GET ();
1195 if (ch2 == '*')
1196 {
1197 old_state = 3;
1198 state = -2;
1199 break;
1200 }
1201 else if (ch2 != EOF)
1202 {
1203 UNGET (ch2);
1204 }
1205 }
1206
1207 if (state == 0 || state == 1) /* Only comment at start of line. */
1208 {
1209 int startch;
1210
1211 startch = ch;
1212
1213 do
1214 {
1215 ch = GET ();
1216 }
1217 while (ch != EOF && IS_WHITESPACE (ch));
1218
1219 if (ch == EOF)
1220 {
1221 as_warn (_("end of file in comment; newline inserted"));
1222 PUT ('\n');
1223 break;
1224 }
1225
1226 if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1227 {
1228 /* Not a cpp line. */
1229 while (ch != EOF && !IS_NEWLINE (ch))
1230 ch = GET ();
1231 if (ch == EOF)
1232 {
1233 as_warn (_("end of file in comment; newline inserted"));
1234 PUT ('\n');
1235 }
1236 else /* IS_NEWLINE (ch) */
1237 {
1238 /* To process non-zero add_newlines. */
1239 UNGET (ch);
1240 }
1241 state = 0;
1242 break;
1243 }
1244 /* Looks like `# 123 "filename"' from cpp. */
1245 UNGET (ch);
1246 old_state = 4;
1247 state = -1;
1248 if (scrub_m68k_mri)
1249 out_string = "\tlinefile ";
1250 else
1251 out_string = "\t.linefile ";
1252 PUT (*out_string++);
1253 break;
1254 }
1255
1256 #ifdef TC_D10V
1257 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1258 Trap is the only short insn that has a first operand that is
1259 neither register nor label.
1260 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1261 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1262 already LEX_IS_LINE_COMMENT_START. However, it is the
1263 only character in line_comment_chars for d10v, hence we
1264 can recognize it as such. */
1265 /* An alternative approach would be to reset the state to 1 when
1266 we see '||', '<'- or '->', but that seems to be overkill. */
1267 if (state == 10)
1268 PUT (' ');
1269 #endif
1270 /* We have a line comment character which is not at the
1271 start of a line. If this is also a normal comment
1272 character, fall through. Otherwise treat it as a default
1273 character. */
1274 if (strchr (tc_comment_chars, ch) == NULL
1275 && (! scrub_m68k_mri
1276 || (ch != '!' && ch != '*')))
1277 goto de_fault;
1278 if (scrub_m68k_mri
1279 && (ch == '!' || ch == '*' || ch == '#')
1280 && state != 1
1281 && state != 10)
1282 goto de_fault;
1283 /* Fall through. */
1284 case LEX_IS_COMMENT_START:
1285 #if defined TC_ARM && defined OBJ_ELF
1286 /* On the ARM, `@' is the comment character.
1287 Unfortunately this is also a special character in ELF .symver
1288 directives (and .type, though we deal with those another way).
1289 So we check if this line is such a directive, and treat
1290 the character as default if so. This is a hack. */
1291 if ((symver_state != NULL) && (*symver_state == 0))
1292 goto de_fault;
1293 #endif
1294
1295 #ifdef TC_ARM
1296 /* For the ARM, care is needed not to damage occurrences of \@
1297 by stripping the @ onwards. Yuck. */
1298 if ((to > tostart ? to[-1] : last_char) == '\\')
1299 /* Do not treat the @ as a start-of-comment. */
1300 goto de_fault;
1301 #endif
1302
1303 #ifdef WARN_COMMENTS
1304 if (!found_comment)
1305 found_comment_file = as_where (&found_comment);
1306 #endif
1307 do
1308 {
1309 ch = GET ();
1310 }
1311 while (ch != EOF && !IS_NEWLINE (ch));
1312 if (ch == EOF)
1313 as_warn (_("end of file in comment; newline inserted"));
1314 state = 0;
1315 PUT ('\n');
1316 break;
1317
1318 #ifdef H_TICK_HEX
1319 case LEX_IS_H:
1320 /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1321 the H' with 0x to make them gas-style hex characters. */
1322 if (enable_h_tick_hex)
1323 {
1324 char quot;
1325
1326 quot = GET ();
1327 if (quot == '\'')
1328 {
1329 UNGET ('x');
1330 ch = '0';
1331 }
1332 else
1333 UNGET (quot);
1334 }
1335 #endif
1336 /* Fall through. */
1337
1338 case LEX_IS_SYMBOL_COMPONENT:
1339 if (state == 10)
1340 {
1341 /* This is a symbol character following another symbol
1342 character, with whitespace in between. We skipped
1343 the whitespace earlier, so output it now. */
1344 UNGET (ch);
1345 state = 3;
1346 PUT (' ');
1347 break;
1348 }
1349
1350 #ifdef TC_Z80
1351 /* "af'" is a symbol containing '\''. */
1352 if (state == 3 && (ch == 'a' || ch == 'A'))
1353 {
1354 state = 16;
1355 PUT (ch);
1356 ch = GET ();
1357 if (ch == 'f' || ch == 'F')
1358 {
1359 state = 17;
1360 PUT (ch);
1361 break;
1362 }
1363 else
1364 {
1365 state = 9;
1366 if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1367 {
1368 if (ch != EOF)
1369 UNGET (ch);
1370 break;
1371 }
1372 }
1373 }
1374 #endif
1375 if (state == 3)
1376 state = 9;
1377
1378 /* This is a common case. Quickly copy CH and all the
1379 following symbol component or normal characters. */
1380 if (to + 1 < toend
1381 && mri_state == NULL
1382 #if defined TC_ARM && defined OBJ_ELF
1383 && symver_state == NULL
1384 #endif
1385 )
1386 {
1387 char *s;
1388 ptrdiff_t len;
1389
1390 for (s = from; s < fromend; s++)
1391 {
1392 int type;
1393
1394 ch2 = *(unsigned char *) s;
1395 type = lex[ch2];
1396 if (type != 0
1397 && type != LEX_IS_SYMBOL_COMPONENT)
1398 break;
1399 }
1400
1401 if (s > from)
1402 /* Handle the last character normally, for
1403 simplicity. */
1404 --s;
1405
1406 len = s - from;
1407
1408 if (len > (toend - to) - 1)
1409 len = (toend - to) - 1;
1410
1411 if (len > 0)
1412 {
1413 PUT (ch);
1414 memcpy (to, from, len);
1415 to += len;
1416 from += len;
1417 if (to >= toend)
1418 goto tofull;
1419 ch = GET ();
1420 }
1421 }
1422
1423 /* Fall through. */
1424 default:
1425 de_fault:
1426 /* Some relatively `normal' character. */
1427 if (state == 0)
1428 {
1429 state = 11; /* Now seeing label definition. */
1430 }
1431 else if (state == 1)
1432 {
1433 state = 2; /* Ditto. */
1434 }
1435 else if (state == 9)
1436 {
1437 if (!IS_SYMBOL_COMPONENT (ch))
1438 state = 3;
1439 }
1440 else if (state == 10)
1441 {
1442 if (ch == '\\')
1443 {
1444 /* Special handling for backslash: a backslash may
1445 be the beginning of a formal parameter (of a
1446 macro) following another symbol character, with
1447 whitespace in between. If that is the case, we
1448 output a space before the parameter. Strictly
1449 speaking, correct handling depends upon what the
1450 macro parameter expands into; if the parameter
1451 expands into something which does not start with
1452 an operand character, then we don't want to keep
1453 the space. We don't have enough information to
1454 make the right choice, so here we are making the
1455 choice which is more likely to be correct. */
1456 if (to + 1 >= toend)
1457 {
1458 /* If we're near the end of the buffer, save the
1459 character for the next time round. Otherwise
1460 we'll lose our state. */
1461 UNGET (ch);
1462 goto tofull;
1463 }
1464 *to++ = ' ';
1465 }
1466
1467 state = 3;
1468 }
1469 PUT (ch);
1470 break;
1471 }
1472 }
1473
1474 /*NOTREACHED*/
1475
1476 fromeof:
1477 /* We have reached the end of the input. */
1478 #ifdef TC_ARM
1479 if (to > tostart)
1480 last_char = to[-1];
1481 #endif
1482 return to - tostart;
1483
1484 tofull:
1485 /* The output buffer is full. Save any input we have not yet
1486 processed. */
1487 if (fromend > from)
1488 {
1489 saved_input = from;
1490 saved_input_len = fromend - from;
1491 }
1492 else
1493 saved_input = NULL;
1494
1495 #ifdef TC_ARM
1496 if (to > tostart)
1497 last_char = to[-1];
1498 #endif
1499 return to - tostart;
1500 }