]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blame - gas/app.c
Update year range in copyright notice of binutils files
[thirdparty/binutils-gdb.git] / gas / app.c
CommitLineData
252b5132 1/* This is the Assembler Pre-Processor
fd67aa11 2 Copyright (C) 1987-2024 Free Software Foundation, Inc.
252b5132
RH
3
4 This file is part of GAS, the GNU Assembler.
5
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
ec2655a6 8 the Free Software Foundation; either version 3, or (at your option)
252b5132
RH
9 any later version.
10
ec2655a6
NC
11 GAS is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 License for more details.
252b5132
RH
15
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to the Free
4b4da160
NC
18 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19 02110-1301, USA. */
252b5132 20
204cd129 21/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
93e914b2
AO
22/* App, the assembler pre-processor. This pre-processor strips out
23 excess spaces, turns single-quoted characters into a decimal
24 constant, and turns the # in # <number> <filename> <garbage> into a
25 .linefile. This needs better error-handling. */
252b5132 26
ebd1c875 27#include "as.h"
252b5132
RH
28
29#if (__STDC__ != 1)
30#ifndef const
31#define const /* empty */
32#endif
33#endif
34
c54b5932
DD
35#ifdef H_TICK_HEX
36int enable_h_tick_hex = 0;
37#endif
38
abd63a32 39#ifdef TC_M68K
252b5132
RH
40/* Whether we are scrubbing in m68k MRI mode. This is different from
41 flag_m68k_mri, because the two flags will be affected by the .mri
42 pseudo-op at different times. */
43static int scrub_m68k_mri;
44
45/* The pseudo-op which switches in and out of MRI mode. See the
46 comment in do_scrub_chars. */
47static const char mri_pseudo[] = ".mri 0";
72297628
AM
48#else
49#define scrub_m68k_mri 0
50#endif
252b5132
RH
51
52#if defined TC_ARM && defined OBJ_ELF
3ee4defc 53/* The pseudo-op for which we need to special-case `@' characters.
252b5132
RH
54 See the comment in do_scrub_chars. */
55static const char symver_pseudo[] = ".symver";
56static const char * symver_state;
57#endif
750e4bf7 58
ab1fadc6 59static char last_char;
252b5132
RH
60
61static char lex[256];
62static const char symbol_chars[] =
63"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
64
65#define LEX_IS_SYMBOL_COMPONENT 1
66#define LEX_IS_WHITESPACE 2
67#define LEX_IS_LINE_SEPARATOR 3
68#define LEX_IS_COMMENT_START 4
69#define LEX_IS_LINE_COMMENT_START 5
70#define LEX_IS_TWOCHAR_COMMENT_1ST 6
71#define LEX_IS_STRINGQUOTE 8
72#define LEX_IS_COLON 9
73#define LEX_IS_NEWLINE 10
74#define LEX_IS_ONECHAR_QUOTE 11
75#ifdef TC_V850
76#define LEX_IS_DOUBLEDASH_1ST 12
77#endif
78#ifdef TC_M32R
f28e8eb3
TW
79#define DOUBLEBAR_PARALLEL
80#endif
81#ifdef DOUBLEBAR_PARALLEL
252b5132
RH
82#define LEX_IS_DOUBLEBAR_1ST 13
83#endif
62f65a7b 84#define LEX_IS_PARALLEL_SEPARATOR 14
c54b5932
DD
85#ifdef H_TICK_HEX
86#define LEX_IS_H 15
87#endif
252b5132
RH
88#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
89#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
90#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
62f65a7b 91#define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
252b5132
RH
92#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
93#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
94#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
95
73ee5e4c 96static int process_escape (int);
252b5132
RH
97
98/* FIXME-soon: The entire lexer/parser thingy should be
99 built statically at compile time rather than dynamically
3ee4defc 100 each and every time the assembler is run. xoxorich. */
252b5132 101
3ee4defc 102void
73ee5e4c 103do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
252b5132
RH
104{
105 const char *p;
106 int c;
107
252b5132
RH
108 lex[' '] = LEX_IS_WHITESPACE;
109 lex['\t'] = LEX_IS_WHITESPACE;
110 lex['\r'] = LEX_IS_WHITESPACE;
111 lex['\n'] = LEX_IS_NEWLINE;
252b5132
RH
112 lex[':'] = LEX_IS_COLON;
113
abd63a32
AM
114#ifdef TC_M68K
115 scrub_m68k_mri = m68k_mri;
116
252b5132 117 if (! m68k_mri)
abd63a32 118#endif
252b5132
RH
119 {
120 lex['"'] = LEX_IS_STRINGQUOTE;
121
6793974d 122#if ! defined (TC_HPPA)
252b5132
RH
123 lex['\''] = LEX_IS_ONECHAR_QUOTE;
124#endif
125
126#ifdef SINGLE_QUOTE_STRINGS
127 lex['\''] = LEX_IS_STRINGQUOTE;
128#endif
129 }
130
131 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
132 in state 5 of do_scrub_chars must be changed. */
133
134 /* Note that these override the previous defaults, e.g. if ';' is a
135 comment char, then it isn't a line separator. */
136 for (p = symbol_chars; *p; ++p)
204cd129 137 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
252b5132
RH
138
139 for (c = 128; c < 256; ++c)
140 lex[c] = LEX_IS_SYMBOL_COMPONENT;
141
142#ifdef tc_symbol_chars
143 /* This macro permits the processor to specify all characters which
144 may appears in an operand. This will prevent the scrubber from
145 discarding meaningful whitespace in certain cases. The i386
146 backend uses this to support prefixes, which can confuse the
147 scrubber as to whether it is parsing operands or opcodes. */
148 for (p = tc_symbol_chars; *p; ++p)
149 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
150#endif
151
152 /* The m68k backend wants to be able to change comment_chars. */
153#ifndef tc_comment_chars
154#define tc_comment_chars comment_chars
155#endif
156 for (p = tc_comment_chars; *p; p++)
204cd129 157 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
252b5132
RH
158
159 for (p = line_comment_chars; *p; p++)
204cd129 160 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
252b5132 161
2e6976a8
DG
162#ifndef tc_line_separator_chars
163#define tc_line_separator_chars line_separator_chars
164#endif
165 for (p = tc_line_separator_chars; *p; p++)
204cd129 166 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
252b5132 167
62f65a7b
DB
168#ifdef tc_parallel_separator_chars
169 /* This macro permits the processor to specify all characters which
170 separate parallel insns on the same line. */
171 for (p = tc_parallel_separator_chars; *p; p++)
204cd129 172 lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
62f65a7b
DB
173#endif
174
252b5132
RH
175 /* Only allow slash-star comments if slash is not in use.
176 FIXME: This isn't right. We should always permit them. */
177 if (lex['/'] == 0)
204cd129 178 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
252b5132 179
abd63a32 180#ifdef TC_M68K
252b5132
RH
181 if (m68k_mri)
182 {
183 lex['\''] = LEX_IS_STRINGQUOTE;
184 lex[';'] = LEX_IS_COMMENT_START;
185 lex['*'] = LEX_IS_LINE_COMMENT_START;
186 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
b1ac4c66 187 then it can't be used in an expression. */
252b5132
RH
188 lex['!'] = LEX_IS_LINE_COMMENT_START;
189 }
abd63a32 190#endif
252b5132
RH
191
192#ifdef TC_V850
193 lex['-'] = LEX_IS_DOUBLEDASH_1ST;
194#endif
f28e8eb3 195#ifdef DOUBLEBAR_PARALLEL
252b5132
RH
196 lex['|'] = LEX_IS_DOUBLEBAR_1ST;
197#endif
198#ifdef TC_D30V
204cd129 199 /* Must do this is we want VLIW instruction with "->" or "<-". */
252b5132
RH
200 lex['-'] = LEX_IS_SYMBOL_COMPONENT;
201#endif
c54b5932
DD
202
203#ifdef H_TICK_HEX
204 if (enable_h_tick_hex)
205 {
206 lex['h'] = LEX_IS_H;
207 lex['H'] = LEX_IS_H;
208 }
209#endif
204cd129 210}
252b5132 211
204cd129 212/* Saved state of the scrubber. */
252b5132
RH
213static int state;
214static int old_state;
cd0bbe6e 215static const char *out_string;
252b5132
RH
216static char out_buf[20];
217static int add_newlines;
218static char *saved_input;
39a45edc 219static size_t saved_input_len;
2b47531b 220static char input_buffer[32 * 1024];
252b5132
RH
221static const char *mri_state;
222static char mri_last_ch;
223
224/* Data structure for saving the state of app across #include's. Note that
225 app is called asynchronously to the parsing of the .include's, so our
226 state at the time .include is interpreted is completely unrelated.
227 That's why we have to save it all. */
228
204cd129
NC
229struct app_save
230{
30a2b4ef
KH
231 int state;
232 int old_state;
cd0bbe6e 233 const char * out_string;
30a2b4ef
KH
234 char out_buf[sizeof (out_buf)];
235 int add_newlines;
236 char * saved_input;
39a45edc 237 size_t saved_input_len;
abd63a32 238#ifdef TC_M68K
30a2b4ef 239 int scrub_m68k_mri;
abd63a32 240#endif
30a2b4ef
KH
241 const char * mri_state;
242 char mri_last_ch;
252b5132 243#if defined TC_ARM && defined OBJ_ELF
30a2b4ef 244 const char * symver_state;
252b5132 245#endif
750e4bf7 246 char last_char;
30a2b4ef 247};
252b5132
RH
248
249char *
73ee5e4c 250app_push (void)
252b5132 251{
ed9e98c2 252 struct app_save *saved;
252b5132 253
325801bd 254 saved = XNEW (struct app_save);
252b5132
RH
255 saved->state = state;
256 saved->old_state = old_state;
257 saved->out_string = out_string;
258 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
259 saved->add_newlines = add_newlines;
2b47531b
ILT
260 if (saved_input == NULL)
261 saved->saved_input = NULL;
262 else
263 {
add39d23 264 saved->saved_input = XNEWVEC (char, saved_input_len);
2b47531b
ILT
265 memcpy (saved->saved_input, saved_input, saved_input_len);
266 saved->saved_input_len = saved_input_len;
267 }
abd63a32 268#ifdef TC_M68K
252b5132 269 saved->scrub_m68k_mri = scrub_m68k_mri;
abd63a32 270#endif
252b5132
RH
271 saved->mri_state = mri_state;
272 saved->mri_last_ch = mri_last_ch;
273#if defined TC_ARM && defined OBJ_ELF
274 saved->symver_state = symver_state;
275#endif
ab1fadc6 276 saved->last_char = last_char;
252b5132 277
3ee4defc 278 /* do_scrub_begin() is not useful, just wastes time. */
252b5132
RH
279
280 state = 0;
281 saved_input = NULL;
f8819316 282 add_newlines = 0;
252b5132
RH
283
284 return (char *) saved;
285}
286
3ee4defc 287void
73ee5e4c 288app_pop (char *arg)
252b5132 289{
ed9e98c2 290 struct app_save *saved = (struct app_save *) arg;
252b5132 291
3ee4defc 292 /* There is no do_scrub_end (). */
252b5132
RH
293 state = saved->state;
294 old_state = saved->old_state;
295 out_string = saved->out_string;
296 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
297 add_newlines = saved->add_newlines;
2b47531b
ILT
298 if (saved->saved_input == NULL)
299 saved_input = NULL;
300 else
301 {
39a45edc 302 gas_assert (saved->saved_input_len <= sizeof (input_buffer));
2b47531b
ILT
303 memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
304 saved_input = input_buffer;
305 saved_input_len = saved->saved_input_len;
306 free (saved->saved_input);
307 }
abd63a32 308#ifdef TC_M68K
252b5132 309 scrub_m68k_mri = saved->scrub_m68k_mri;
abd63a32 310#endif
252b5132
RH
311 mri_state = saved->mri_state;
312 mri_last_ch = saved->mri_last_ch;
313#if defined TC_ARM && defined OBJ_ELF
314 symver_state = saved->symver_state;
315#endif
ab1fadc6 316 last_char = saved->last_char;
252b5132
RH
317
318 free (arg);
204cd129 319}
252b5132
RH
320
321/* @@ This assumes that \n &c are the same on host and target. This is not
322 necessarily true. */
204cd129 323
3ee4defc 324static int
73ee5e4c 325process_escape (int ch)
252b5132
RH
326{
327 switch (ch)
328 {
329 case 'b':
330 return '\b';
331 case 'f':
332 return '\f';
333 case 'n':
334 return '\n';
335 case 'r':
336 return '\r';
337 case 't':
338 return '\t';
339 case '\'':
340 return '\'';
341 case '"':
342 return '\"';
343 default:
344 return ch;
345 }
346}
347
578c64a4
NC
348#define MULTIBYTE_WARN_COUNT_LIMIT 10
349static unsigned int multibyte_warn_count = 0;
350
351bool
352scan_for_multibyte_characters (const unsigned char * start,
353 const unsigned char * end,
354 bool warn)
355{
356 if (end <= start)
357 return false;
358
359 if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT)
360 return false;
361
362 bool found = false;
363
364 while (start < end)
365 {
366 unsigned char c;
367
368 if ((c = * start++) <= 0x7f)
369 continue;
370
371 if (!warn)
372 return true;
373
374 found = true;
375
376 const char * filename;
377 unsigned int lineno;
378
379 filename = as_where (& lineno);
380 if (filename == NULL)
381 as_warn (_("multibyte character (%#x) encountered in input"), c);
382 else if (lineno == 0)
383 as_warn (_("multibyte character (%#x) encountered in %s"), c, filename);
384 else
385 as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno);
386
387 if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT)
388 {
389 as_warn (_("further multibyte character warnings suppressed"));
390 break;
391 }
392 }
393
394 return found;
395}
396
252b5132
RH
397/* This function is called to process input characters. The GET
398 parameter is used to retrieve more input characters. GET should
399 set its parameter to point to a buffer, and return the length of
400 the buffer; it should return 0 at end of file. The scrubbed output
401 characters are put into the buffer starting at TOSTART; the TOSTART
402 buffer is TOLEN bytes in length. The function returns the number
403 of scrubbed characters put into TOSTART. This will be TOLEN unless
404 end of file was seen. This function is arranged as a state
405 machine, and saves its state so that it may return at any point.
406 This is the way the old code used to work. */
407
39a45edc
AM
408size_t
409do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
252b5132
RH
410{
411 char *to = tostart;
412 char *toend = tostart + tolen;
413 char *from;
414 char *fromend;
39a45edc 415 size_t fromlen;
ed9e98c2 416 int ch, ch2 = 0;
c9c5dcda
AM
417 /* Character that started the string we're working on. */
418 static char quotechar;
252b5132
RH
419
420 /*State 0: beginning of normal line
421 1: After first whitespace on line (flush more white)
422 2: After first non-white (opcode) on line (keep 1white)
423 3: after second white on line (into operands) (flush white)
93e914b2 424 4: after putting out a .linefile, put out digits
252b5132
RH
425 5: parsing a string, then go to old-state
426 6: putting out \ escape in a "d string.
93e914b2 427 7: no longer used
e9fc6c21 428 8: no longer used
252b5132
RH
429 9: After seeing symbol char in state 3 (keep 1white after symchar)
430 10: After seeing whitespace in state 9 (keep white before symchar)
431 11: After seeing a symbol character in state 0 (eg a label definition)
432 -1: output string in out_string and go to the state in old_state
433 -2: flush text until a '*' '/' is seen, then go to state old_state
434#ifdef TC_V850
b1ac4c66
AM
435 12: After seeing a dash, looking for a second dash as a start
436 of comment.
252b5132 437#endif
f28e8eb3 438#ifdef DOUBLEBAR_PARALLEL
b1ac4c66
AM
439 13: After seeing a vertical bar, looking for a second
440 vertical bar as a parallel expression separator.
52628315 441#endif
40b36596
JM
442#ifdef TC_PREDICATE_START_CHAR
443 14: After seeing a predicate start character at state 0, looking
444 for a predicate end character as predicate.
445 15: After seeing a predicate start character at state 1, looking
446 for a predicate end character as predicate.
3c9b82ba
NC
447#endif
448#ifdef TC_Z80
449 16: After seeing an 'a' or an 'A' at the start of a symbol
450 17: After seeing an 'f' or an 'F' in state 16
252b5132
RH
451#endif
452 */
453
454 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
455 constructs like ``.loc 1 20''. This was turning into ``.loc
456 120''. States 9 and 10 ensure that a space is never dropped in
3b37fd66 457 between characters which could appear in an identifier. Ian
252b5132
RH
458 Taylor, ian@cygnus.com.
459
460 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
461 correctly on the PA (and any other target where colons are optional).
462 Jeff Law, law@cs.utah.edu.
463
464 I added state 13 so that something like "cmp r1, r2 || trap #1" does not
465 get squashed into "cmp r1,r2||trap#1", with the all important space
466 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
467
468 /* This macro gets the next input character. */
469
2b47531b
ILT
470#define GET() \
471 (from < fromend \
472 ? * (unsigned char *) (from++) \
473 : (saved_input = NULL, \
474 fromlen = (*get) (input_buffer, sizeof input_buffer), \
475 from = input_buffer, \
476 fromend = from + fromlen, \
477 (fromlen == 0 \
478 ? EOF \
252b5132
RH
479 : * (unsigned char *) (from++))))
480
481 /* This macro pushes a character back on the input stream. */
482
483#define UNGET(uch) (*--from = (uch))
484
485 /* This macro puts a character into the output buffer. If this
486 character fills the output buffer, this macro jumps to the label
487 TOFULL. We use this rather ugly approach because we need to
488 handle two different termination conditions: EOF on the input
489 stream, and a full output buffer. It would be simpler if we
490 always read in the entire input stream before processing it, but
491 I don't want to make such a significant change to the assembler's
492 memory usage. */
493
411863a4
KH
494#define PUT(pch) \
495 do \
496 { \
497 *to++ = (pch); \
498 if (to >= toend) \
499 goto tofull; \
500 } \
252b5132
RH
501 while (0)
502
503 if (saved_input != NULL)
504 {
505 from = saved_input;
506 fromend = from + saved_input_len;
507 }
508 else
509 {
2b47531b 510 fromlen = (*get) (input_buffer, sizeof input_buffer);
252b5132
RH
511 if (fromlen == 0)
512 return 0;
2b47531b 513 from = input_buffer;
252b5132 514 fromend = from + fromlen;
578c64a4
NC
515
516 if (multibyte_handling == multibyte_warn)
517 (void) scan_for_multibyte_characters ((const unsigned char *) from,
518 (const unsigned char* ) fromend,
519 true /* Generate warnings. */);
252b5132
RH
520 }
521
522 while (1)
523 {
524 /* The cases in this switch end with continue, in order to
b1ac4c66
AM
525 branch back to the top of this while loop and generate the
526 next output character in the appropriate state. */
252b5132
RH
527 switch (state)
528 {
529 case -1:
530 ch = *out_string++;
531 if (*out_string == '\0')
532 {
533 state = old_state;
534 old_state = 3;
535 }
536 PUT (ch);
537 continue;
538
539 case -2:
540 for (;;)
541 {
542 do
543 {
544 ch = GET ();
545
546 if (ch == EOF)
547 {
548 as_warn (_("end of file in comment"));
549 goto fromeof;
550 }
551
552 if (ch == '\n')
553 PUT ('\n');
554 }
555 while (ch != '*');
556
557 while ((ch = GET ()) == '*')
558 ;
559
560 if (ch == EOF)
561 {
562 as_warn (_("end of file in comment"));
563 goto fromeof;
564 }
565
566 if (ch == '/')
567 break;
568
569 UNGET (ch);
570 }
571
572 state = old_state;
573 UNGET (' ');
574 continue;
575
576 case 4:
577 ch = GET ();
578 if (ch == EOF)
579 goto fromeof;
580 else if (ch >= '0' && ch <= '9')
581 PUT (ch);
582 else
583 {
584 while (ch != EOF && IS_WHITESPACE (ch))
585 ch = GET ();
586 if (ch == '"')
587 {
93e914b2
AO
588 quotechar = ch;
589 state = 5;
e9fc6c21 590 old_state = 3;
4061927e 591 PUT (ch);
252b5132
RH
592 }
593 else
594 {
595 while (ch != EOF && ch != '\n')
596 ch = GET ();
597 state = 0;
598 PUT (ch);
599 }
600 }
601 continue;
602
603 case 5:
604 /* We are going to copy everything up to a quote character,
b1ac4c66
AM
605 with special handling for a backslash. We try to
606 optimize the copying in the simple case without using the
607 GET and PUT macros. */
252b5132
RH
608 {
609 char *s;
39a45edc 610 ptrdiff_t len;
252b5132
RH
611
612 for (s = from; s < fromend; s++)
613 {
614 ch = *s;
252b5132 615 if (ch == '\\'
c9c5dcda 616 || ch == quotechar
252b5132
RH
617 || ch == '\n')
618 break;
619 }
620 len = s - from;
621 if (len > toend - to)
622 len = toend - to;
623 if (len > 0)
624 {
625 memcpy (to, from, len);
626 to += len;
627 from += len;
df816087
AM
628 if (to >= toend)
629 goto tofull;
252b5132
RH
630 }
631 }
632
633 ch = GET ();
634 if (ch == EOF)
635 {
fc5910c0
NC
636 /* This buffer is here specifically so
637 that the UNGET below will work. */
638 static char one_char_buf[1];
639
c9c5dcda 640 as_warn (_("end of file in string; '%c' inserted"), quotechar);
252b5132 641 state = old_state;
fc5910c0
NC
642 from = fromend = one_char_buf + 1;
643 fromlen = 1;
252b5132 644 UNGET ('\n');
c9c5dcda 645 PUT (quotechar);
252b5132 646 }
c9c5dcda 647 else if (ch == quotechar)
252b5132
RH
648 {
649 state = old_state;
650 PUT (ch);
651 }
16d87673 652 else if (TC_STRING_ESCAPES && ch == '\\')
252b5132
RH
653 {
654 state = 6;
655 PUT (ch);
656 }
252b5132
RH
657 else if (scrub_m68k_mri && ch == '\n')
658 {
659 /* Just quietly terminate the string. This permits lines like
204cd129 660 bne label loop if we haven't reach end yet. */
252b5132
RH
661 state = old_state;
662 UNGET (ch);
663 PUT ('\'');
664 }
665 else
666 {
667 PUT (ch);
668 }
669 continue;
670
671 case 6:
672 state = 5;
673 ch = GET ();
674 switch (ch)
675 {
676 /* Handle strings broken across lines, by turning '\n' into
677 '\\' and 'n'. */
678 case '\n':
679 UNGET ('n');
680 add_newlines++;
681 PUT ('\\');
682 continue;
683
4252e537 684 case EOF:
c9c5dcda
AM
685 as_warn (_("end of file in string; '%c' inserted"), quotechar);
686 PUT (quotechar);
4252e537
AM
687 continue;
688
252b5132
RH
689 case '"':
690 case '\\':
691 case 'b':
692 case 'f':
693 case 'n':
694 case 'r':
695 case 't':
696 case 'v':
697 case 'x':
698 case 'X':
699 case '0':
700 case '1':
701 case '2':
702 case '3':
703 case '4':
704 case '5':
705 case '6':
706 case '7':
707 break;
4252e537 708
252b5132 709 default:
4252e537 710#ifdef ONLY_STANDARD_ESCAPES
0e389e77 711 as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
4252e537 712#endif
252b5132 713 break;
252b5132
RH
714 }
715 PUT (ch);
716 continue;
717
b1ac4c66
AM
718#ifdef DOUBLEBAR_PARALLEL
719 case 13:
720 ch = GET ();
721 if (ch != '|')
722 abort ();
723
724 /* Reset back to state 1 and pretend that we are parsing a
725 line from just after the first white space. */
726 state = 1;
727 PUT ('|');
40b36596
JM
728#ifdef TC_TIC6X
729 /* "||^" is used for SPMASKed instructions. */
730 ch = GET ();
731 if (ch == EOF)
732 goto fromeof;
733 else if (ch == '^')
734 PUT ('^');
735 else
736 UNGET (ch);
737#endif
b1ac4c66 738 continue;
3c9b82ba
NC
739#endif
740#ifdef TC_Z80
741 case 16:
742 /* We have seen an 'a' at the start of a symbol, look for an 'f'. */
743 ch = GET ();
34bca508 744 if (ch == 'f' || ch == 'F')
3c9b82ba
NC
745 {
746 state = 17;
747 PUT (ch);
748 }
749 else
750 {
1adce770
SB
751 if (ch != EOF)
752 UNGET (ch);
3c9b82ba
NC
753 state = 9;
754 break;
755 }
1a0670f3 756 /* Fall through. */
3c9b82ba
NC
757 case 17:
758 /* We have seen "af" at the start of a symbol,
759 a ' here is a part of that symbol. */
760 ch = GET ();
761 state = 9;
762 if (ch == '\'')
763 /* Change to avoid warning about unclosed string. */
764 PUT ('`');
0146fc9d 765 else if (ch != EOF)
3c9b82ba
NC
766 UNGET (ch);
767 break;
b1ac4c66 768#endif
252b5132
RH
769 }
770
204cd129 771 /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
252b5132
RH
772
773 /* flushchar: */
774 ch = GET ();
775
40b36596
JM
776#ifdef TC_PREDICATE_START_CHAR
777 if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
52628315
L
778 {
779 state += 14;
780 PUT (ch);
781 continue;
782 }
783 else if (state == 14 || state == 15)
784 {
40b36596 785 if (ch == TC_PREDICATE_END_CHAR)
70b911ad
JJ
786 {
787 state -= 14;
788 PUT (ch);
789 ch = GET ();
790 }
52628315
L
791 else
792 {
793 PUT (ch);
794 continue;
795 }
796 }
797#endif
798
252b5132
RH
799 recycle:
800
801#if defined TC_ARM && defined OBJ_ELF
802 /* We need to watch out for .symver directives. See the comment later
803 in this function. */
804 if (symver_state == NULL)
805 {
806 if ((state == 0 || state == 1) && ch == symver_pseudo[0])
807 symver_state = symver_pseudo + 1;
808 }
809 else
810 {
811 /* We advance to the next state if we find the right
812 character. */
813 if (ch != '\0' && (*symver_state == ch))
814 ++symver_state;
815 else if (*symver_state != '\0')
816 /* We did not get the expected character, or we didn't
817 get a valid terminating character after seeing the
818 entire pseudo-op, so we must go back to the beginning. */
819 symver_state = NULL;
820 else
821 {
822 /* We've read the entire pseudo-op. If this is the end
823 of the line, go back to the beginning. */
824 if (IS_NEWLINE (ch))
825 symver_state = NULL;
826 }
827 }
828#endif /* TC_ARM && OBJ_ELF */
829
830#ifdef TC_M68K
831 /* We want to have pseudo-ops which control whether we are in
b1ac4c66
AM
832 MRI mode or not. Unfortunately, since m68k MRI mode affects
833 the scrubber, that means that we need a special purpose
834 recognizer here. */
252b5132
RH
835 if (mri_state == NULL)
836 {
837 if ((state == 0 || state == 1)
838 && ch == mri_pseudo[0])
839 mri_state = mri_pseudo + 1;
840 }
841 else
842 {
843 /* We advance to the next state if we find the right
844 character, or if we need a space character and we get any
845 whitespace character, or if we need a '0' and we get a
846 '1' (this is so that we only need one state to handle
847 ``.mri 0'' and ``.mri 1''). */
848 if (ch != '\0'
849 && (*mri_state == ch
850 || (*mri_state == ' '
851 && lex[ch] == LEX_IS_WHITESPACE)
852 || (*mri_state == '0'
853 && ch == '1')))
854 {
855 mri_last_ch = ch;
856 ++mri_state;
857 }
858 else if (*mri_state != '\0'
859 || (lex[ch] != LEX_IS_WHITESPACE
860 && lex[ch] != LEX_IS_NEWLINE))
861 {
862 /* We did not get the expected character, or we didn't
863 get a valid terminating character after seeing the
864 entire pseudo-op, so we must go back to the
865 beginning. */
866 mri_state = NULL;
867 }
868 else
869 {
870 /* We've read the entire pseudo-op. mips_last_ch is
b1ac4c66
AM
871 either '0' or '1' indicating whether to enter or
872 leave MRI mode. */
252b5132
RH
873 do_scrub_begin (mri_last_ch == '1');
874 mri_state = NULL;
875
876 /* We continue handling the character as usual. The
b1ac4c66
AM
877 main gas reader must also handle the .mri pseudo-op
878 to control expression parsing and the like. */
252b5132
RH
879 }
880 }
881#endif
882
883 if (ch == EOF)
884 {
885 if (state != 0)
886 {
887 as_warn (_("end of file not at end of a line; newline inserted"));
888 state = 0;
889 PUT ('\n');
890 }
891 goto fromeof;
892 }
893
894 switch (lex[ch])
895 {
896 case LEX_IS_WHITESPACE:
897 do
898 {
899 ch = GET ();
900 }
901 while (ch != EOF && IS_WHITESPACE (ch));
902 if (ch == EOF)
903 goto fromeof;
904
905 if (state == 0)
906 {
907 /* Preserve a single whitespace character at the
908 beginning of a line. */
909 state = 1;
910 UNGET (ch);
911 PUT (' ');
912 break;
913 }
914
f28e8eb3 915#ifdef KEEP_WHITE_AROUND_COLON
30a2b4ef
KH
916 if (lex[ch] == LEX_IS_COLON)
917 {
918 /* Only keep this white if there's no white *after* the
b1ac4c66 919 colon. */
30a2b4ef 920 ch2 = GET ();
83bd7402
NC
921 if (ch2 != EOF)
922 UNGET (ch2);
30a2b4ef
KH
923 if (!IS_WHITESPACE (ch2))
924 {
925 state = 9;
926 UNGET (ch);
927 PUT (' ');
928 break;
929 }
930 }
f28e8eb3 931#endif
252b5132 932 if (IS_COMMENT (ch)
62f65a7b
DB
933 || IS_LINE_SEPARATOR (ch)
934 || IS_PARALLEL_SEPARATOR (ch))
252b5132
RH
935 {
936 if (scrub_m68k_mri)
937 {
938 /* In MRI mode, we keep these spaces. */
939 UNGET (ch);
940 PUT (' ');
941 break;
942 }
943 goto recycle;
944 }
945
946 /* If we're in state 2 or 11, we've seen a non-white
947 character followed by whitespace. If the next character
948 is ':', this is whitespace after a label name which we
949 normally must ignore. In MRI mode, though, spaces are
950 not permitted between the label and the colon. */
951 if ((state == 2 || state == 11)
952 && lex[ch] == LEX_IS_COLON
953 && ! scrub_m68k_mri)
954 {
955 state = 1;
956 PUT (ch);
957 break;
958 }
959
960 switch (state)
961 {
252b5132
RH
962 case 1:
963 /* We can arrive here if we leave a leading whitespace
964 character at the beginning of a line. */
965 goto recycle;
966 case 2:
967 state = 3;
968 if (to + 1 < toend)
969 {
970 /* Optimize common case by skipping UNGET/GET. */
971 PUT (' '); /* Sp after opco */
972 goto recycle;
973 }
974 UNGET (ch);
975 PUT (' ');
976 break;
977 case 3:
40b36596
JM
978#ifndef TC_KEEP_OPERAND_SPACES
979 /* For TI C6X, we keep these spaces as they may separate
980 functional unit specifiers from operands. */
252b5132 981 if (scrub_m68k_mri)
40b36596 982#endif
252b5132
RH
983 {
984 /* In MRI mode, we keep these spaces. */
985 UNGET (ch);
986 PUT (' ');
987 break;
988 }
989 goto recycle; /* Sp in operands */
990 case 9:
991 case 10:
40b36596 992#ifndef TC_KEEP_OPERAND_SPACES
252b5132 993 if (scrub_m68k_mri)
40b36596 994#endif
252b5132
RH
995 {
996 /* In MRI mode, we keep these spaces. */
997 state = 3;
998 UNGET (ch);
999 PUT (' ');
1000 break;
1001 }
1002 state = 10; /* Sp after symbol char */
1003 goto recycle;
1004 case 11:
abd63a32 1005 if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
252b5132
RH
1006 state = 1;
1007 else
1008 {
1009 /* We know that ch is not ':', since we tested that
b1ac4c66
AM
1010 case above. Therefore this is not a label, so it
1011 must be the opcode, and we've just seen the
1012 whitespace after it. */
252b5132
RH
1013 state = 3;
1014 }
1015 UNGET (ch);
1016 PUT (' '); /* Sp after label definition. */
1017 break;
1018 default:
1019 BAD_CASE (state);
1020 }
1021 break;
1022
1023 case LEX_IS_TWOCHAR_COMMENT_1ST:
1024 ch2 = GET ();
1025 if (ch2 == '*')
1026 {
1027 for (;;)
1028 {
1029 do
1030 {
1031 ch2 = GET ();
1032 if (ch2 != EOF && IS_NEWLINE (ch2))
1033 add_newlines++;
1034 }
1035 while (ch2 != EOF && ch2 != '*');
1036
1037 while (ch2 == '*')
1038 ch2 = GET ();
1039
1040 if (ch2 == EOF || ch2 == '/')
1041 break;
1042
1043 /* This UNGET will ensure that we count newlines
b1ac4c66 1044 correctly. */
252b5132
RH
1045 UNGET (ch2);
1046 }
1047
1048 if (ch2 == EOF)
1049 as_warn (_("end of file in multiline comment"));
1050
1051 ch = ' ';
1052 goto recycle;
1053 }
800eeca4
JW
1054#ifdef DOUBLESLASH_LINE_COMMENTS
1055 else if (ch2 == '/')
1056 {
1057 do
1058 {
1059 ch = GET ();
1060 }
1061 while (ch != EOF && !IS_NEWLINE (ch));
1062 if (ch == EOF)
1063 as_warn ("end of file in comment; newline inserted");
1064 state = 0;
1065 PUT ('\n');
1066 break;
1067 }
1068#endif
252b5132
RH
1069 else
1070 {
1071 if (ch2 != EOF)
1072 UNGET (ch2);
1073 if (state == 9 || state == 10)
1074 state = 3;
1075 PUT (ch);
1076 }
1077 break;
1078
1079 case LEX_IS_STRINGQUOTE:
c9c5dcda 1080 quotechar = ch;
252b5132
RH
1081 if (state == 10)
1082 {
204cd129 1083 /* Preserve the whitespace in foo "bar". */
252b5132
RH
1084 UNGET (ch);
1085 state = 3;
1086 PUT (' ');
1087
1088 /* PUT didn't jump out. We could just break, but we
b1ac4c66 1089 know what will happen, so optimize a bit. */
252b5132 1090 ch = GET ();
b3446f94 1091 old_state = 9;
252b5132 1092 }
b3446f94
JB
1093 else if (state == 3)
1094 old_state = 9;
252b5132
RH
1095 else
1096 old_state = state;
1097 state = 5;
1098 PUT (ch);
1099 break;
1100
252b5132 1101 case LEX_IS_ONECHAR_QUOTE:
c0a139c7
NC
1102#ifdef H_TICK_HEX
1103 if (state == 9 && enable_h_tick_hex)
c54b5932
DD
1104 {
1105 char c;
1106
1107 c = GET ();
1108 as_warn ("'%c found after symbol", c);
1109 UNGET (c);
1110 }
c0a139c7 1111#endif
252b5132
RH
1112 if (state == 10)
1113 {
204cd129 1114 /* Preserve the whitespace in foo 'b'. */
252b5132
RH
1115 UNGET (ch);
1116 state = 3;
1117 PUT (' ');
1118 break;
1119 }
1120 ch = GET ();
1121 if (ch == EOF)
1122 {
1123 as_warn (_("end of file after a one-character quote; \\0 inserted"));
1124 ch = 0;
1125 }
1126 if (ch == '\\')
1127 {
1128 ch = GET ();
1129 if (ch == EOF)
1130 {
1131 as_warn (_("end of file in escape character"));
1132 ch = '\\';
1133 }
1134 else
1135 ch = process_escape (ch);
1136 }
1137 sprintf (out_buf, "%d", (int) (unsigned char) ch);
1138
1139 /* None of these 'x constants for us. We want 'x'. */
1140 if ((ch = GET ()) != '\'')
1141 {
1142#ifdef REQUIRE_CHAR_CLOSE_QUOTE
0e389e77 1143 as_warn (_("missing close quote; (assumed)"));
252b5132
RH
1144#else
1145 if (ch != EOF)
1146 UNGET (ch);
1147#endif
1148 }
1149 if (strlen (out_buf) == 1)
1150 {
1151 PUT (out_buf[0]);
1152 break;
1153 }
1154 if (state == 9)
1155 old_state = 3;
1156 else
1157 old_state = state;
1158 state = -1;
1159 out_string = out_buf;
1160 PUT (*out_string++);
1161 break;
252b5132
RH
1162
1163 case LEX_IS_COLON:
f28e8eb3 1164#ifdef KEEP_WHITE_AROUND_COLON
30a2b4ef 1165 state = 9;
f28e8eb3 1166#else
252b5132
RH
1167 if (state == 9 || state == 10)
1168 state = 3;
1169 else if (state != 3)
1170 state = 1;
f28e8eb3 1171#endif
252b5132
RH
1172 PUT (ch);
1173 break;
1174
1175 case LEX_IS_NEWLINE:
1176 /* Roll out a bunch of newlines from inside comments, etc. */
1177 if (add_newlines)
1178 {
1179 --add_newlines;
1180 UNGET (ch);
1181 }
3ee4defc 1182 /* Fall through. */
252b5132
RH
1183
1184 case LEX_IS_LINE_SEPARATOR:
1185 state = 0;
1186 PUT (ch);
1187 break;
1188
62f65a7b
DB
1189 case LEX_IS_PARALLEL_SEPARATOR:
1190 state = 1;
1191 PUT (ch);
1192 break;
1193
252b5132
RH
1194#ifdef TC_V850
1195 case LEX_IS_DOUBLEDASH_1ST:
30a2b4ef 1196 ch2 = GET ();
252b5132
RH
1197 if (ch2 != '-')
1198 {
0146fc9d
NC
1199 if (ch2 != EOF)
1200 UNGET (ch2);
252b5132
RH
1201 goto de_fault;
1202 }
3ee4defc 1203 /* Read and skip to end of line. */
252b5132
RH
1204 do
1205 {
1206 ch = GET ();
1207 }
1208 while (ch != EOF && ch != '\n');
204cd129 1209
252b5132 1210 if (ch == EOF)
204cd129
NC
1211 as_warn (_("end of file in comment; newline inserted"));
1212
252b5132
RH
1213 state = 0;
1214 PUT ('\n');
1215 break;
3ee4defc 1216#endif
f28e8eb3 1217#ifdef DOUBLEBAR_PARALLEL
252b5132 1218 case LEX_IS_DOUBLEBAR_1ST:
30a2b4ef 1219 ch2 = GET ();
83bd7402
NC
1220 if (ch2 != EOF)
1221 UNGET (ch2);
252b5132 1222 if (ch2 != '|')
204cd129
NC
1223 goto de_fault;
1224
b1ac4c66
AM
1225 /* Handle '||' in two states as invoking PUT twice might
1226 result in the first one jumping out of this loop. We'd
1227 then lose track of the state and one '|' char. */
1228 state = 13;
252b5132
RH
1229 PUT ('|');
1230 break;
3ee4defc 1231#endif
252b5132
RH
1232 case LEX_IS_LINE_COMMENT_START:
1233 /* FIXME-someday: The two character comment stuff was badly
1234 thought out. On i386, we want '/' as line comment start
1235 AND we want C style comments. hence this hack. The
1236 whole lexical process should be reworked. xoxorich. */
1237 if (ch == '/')
1238 {
1239 ch2 = GET ();
1240 if (ch2 == '*')
1241 {
1242 old_state = 3;
1243 state = -2;
1244 break;
1245 }
69ace220 1246 else if (ch2 != EOF)
252b5132
RH
1247 {
1248 UNGET (ch2);
1249 }
204cd129 1250 }
252b5132
RH
1251
1252 if (state == 0 || state == 1) /* Only comment at start of line. */
1253 {
1254 int startch;
1255
1256 startch = ch;
1257
1258 do
1259 {
1260 ch = GET ();
1261 }
1262 while (ch != EOF && IS_WHITESPACE (ch));
204cd129 1263
252b5132
RH
1264 if (ch == EOF)
1265 {
1266 as_warn (_("end of file in comment; newline inserted"));
1267 PUT ('\n');
1268 break;
1269 }
204cd129 1270
252b5132
RH
1271 if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1272 {
1273 /* Not a cpp line. */
1274 while (ch != EOF && !IS_NEWLINE (ch))
1275 ch = GET ();
1276 if (ch == EOF)
cf3f45fa
AM
1277 {
1278 as_warn (_("end of file in comment; newline inserted"));
1279 PUT ('\n');
1280 }
1281 else /* IS_NEWLINE (ch) */
1282 {
1283 /* To process non-zero add_newlines. */
1284 UNGET (ch);
1285 }
252b5132 1286 state = 0;
252b5132
RH
1287 break;
1288 }
3ee4defc 1289 /* Looks like `# 123 "filename"' from cpp. */
252b5132
RH
1290 UNGET (ch);
1291 old_state = 4;
1292 state = -1;
1293 if (scrub_m68k_mri)
93e914b2 1294 out_string = "\tlinefile ";
252b5132 1295 else
93e914b2 1296 out_string = "\t.linefile ";
252b5132
RH
1297 PUT (*out_string++);
1298 break;
1299 }
1300
1301#ifdef TC_D10V
1302 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1303 Trap is the only short insn that has a first operand that is
1304 neither register nor label.
1305 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
30a2b4ef
KH
1306 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1307 already LEX_IS_LINE_COMMENT_START. However, it is the
1308 only character in line_comment_chars for d10v, hence we
1309 can recognize it as such. */
252b5132
RH
1310 /* An alternative approach would be to reset the state to 1 when
1311 we see '||', '<'- or '->', but that seems to be overkill. */
30a2b4ef
KH
1312 if (state == 10)
1313 PUT (' ');
252b5132
RH
1314#endif
1315 /* We have a line comment character which is not at the
1316 start of a line. If this is also a normal comment
1317 character, fall through. Otherwise treat it as a default
1318 character. */
1319 if (strchr (tc_comment_chars, ch) == NULL
1320 && (! scrub_m68k_mri
1321 || (ch != '!' && ch != '*')))
1322 goto de_fault;
1323 if (scrub_m68k_mri
1324 && (ch == '!' || ch == '*' || ch == '#')
1325 && state != 1
1326 && state != 10)
1327 goto de_fault;
1328 /* Fall through. */
1329 case LEX_IS_COMMENT_START:
1330#if defined TC_ARM && defined OBJ_ELF
1331 /* On the ARM, `@' is the comment character.
1332 Unfortunately this is also a special character in ELF .symver
30a2b4ef
KH
1333 directives (and .type, though we deal with those another way).
1334 So we check if this line is such a directive, and treat
1335 the character as default if so. This is a hack. */
252b5132
RH
1336 if ((symver_state != NULL) && (*symver_state == 0))
1337 goto de_fault;
4c400d5e 1338#endif
2a676888 1339
750e4bf7
JB
1340 /* Care is needed not to damage occurrences of \<comment-char>
1341 by stripping the <comment-char> onwards. Yuck. */
ab1fadc6 1342 if ((to > tostart ? to[-1] : last_char) == '\\')
750e4bf7 1343 /* Do not treat the <comment-char> as a start-of-comment. */
2a676888 1344 goto de_fault;
2a676888 1345
4c400d5e
AM
1346#ifdef WARN_COMMENTS
1347 if (!found_comment)
3b4dbbbf 1348 found_comment_file = as_where (&found_comment);
252b5132
RH
1349#endif
1350 do
1351 {
1352 ch = GET ();
1353 }
1354 while (ch != EOF && !IS_NEWLINE (ch));
1355 if (ch == EOF)
1356 as_warn (_("end of file in comment; newline inserted"));
1357 state = 0;
1358 PUT ('\n');
1359 break;
1360
c54b5932
DD
1361#ifdef H_TICK_HEX
1362 case LEX_IS_H:
1363 /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1364 the H' with 0x to make them gas-style hex characters. */
1365 if (enable_h_tick_hex)
1366 {
1367 char quot;
1368
1369 quot = GET ();
1370 if (quot == '\'')
1371 {
1372 UNGET ('x');
1373 ch = '0';
1374 }
1375 else
1376 UNGET (quot);
1377 }
c54b5932 1378#endif
fcddde94 1379 /* Fall through. */
c54b5932 1380
252b5132
RH
1381 case LEX_IS_SYMBOL_COMPONENT:
1382 if (state == 10)
1383 {
1384 /* This is a symbol character following another symbol
1385 character, with whitespace in between. We skipped
1386 the whitespace earlier, so output it now. */
1387 UNGET (ch);
1388 state = 3;
1389 PUT (' ');
1390 break;
1391 }
1392
3c9b82ba
NC
1393#ifdef TC_Z80
1394 /* "af'" is a symbol containing '\''. */
34bca508 1395 if (state == 3 && (ch == 'a' || ch == 'A'))
3c9b82ba
NC
1396 {
1397 state = 16;
1398 PUT (ch);
1399 ch = GET ();
34bca508 1400 if (ch == 'f' || ch == 'F')
3c9b82ba
NC
1401 {
1402 state = 17;
1403 PUT (ch);
1404 break;
1405 }
1406 else
1407 {
1408 state = 9;
536695d0 1409 if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
3c9b82ba 1410 {
0146fc9d
NC
1411 if (ch != EOF)
1412 UNGET (ch);
3c9b82ba
NC
1413 break;
1414 }
1415 }
1416 }
1417#endif
252b5132
RH
1418 if (state == 3)
1419 state = 9;
1420
1421 /* This is a common case. Quickly copy CH and all the
b1ac4c66 1422 following symbol component or normal characters. */
252b5132
RH
1423 if (to + 1 < toend
1424 && mri_state == NULL
1425#if defined TC_ARM && defined OBJ_ELF
1426 && symver_state == NULL
1427#endif
1428 )
1429 {
1430 char *s;
39a45edc 1431 ptrdiff_t len;
252b5132
RH
1432
1433 for (s = from; s < fromend; s++)
1434 {
1435 int type;
1436
30a2b4ef 1437 ch2 = *(unsigned char *) s;
252b5132
RH
1438 type = lex[ch2];
1439 if (type != 0
1440 && type != LEX_IS_SYMBOL_COMPONENT)
1441 break;
1442 }
204cd129 1443
252b5132 1444 if (s > from)
204cd129
NC
1445 /* Handle the last character normally, for
1446 simplicity. */
1447 --s;
1448
252b5132 1449 len = s - from;
204cd129 1450
252b5132
RH
1451 if (len > (toend - to) - 1)
1452 len = (toend - to) - 1;
204cd129 1453
252b5132
RH
1454 if (len > 0)
1455 {
1456 PUT (ch);
518051dc
BE
1457 memcpy (to, from, len);
1458 to += len;
1459 from += len;
37b75c0c
AM
1460 if (to >= toend)
1461 goto tofull;
252b5132
RH
1462 ch = GET ();
1463 }
1464 }
1465
1466 /* Fall through. */
1467 default:
1468 de_fault:
1469 /* Some relatively `normal' character. */
1470 if (state == 0)
1471 {
9a124774 1472 state = 11; /* Now seeing label definition. */
252b5132
RH
1473 }
1474 else if (state == 1)
1475 {
9a124774 1476 state = 2; /* Ditto. */
252b5132
RH
1477 }
1478 else if (state == 9)
1479 {
2cdb18a7 1480 if (!IS_SYMBOL_COMPONENT (ch))
252b5132
RH
1481 state = 3;
1482 }
1483 else if (state == 10)
1484 {
c5c834aa
AH
1485 if (ch == '\\')
1486 {
1487 /* Special handling for backslash: a backslash may
1488 be the beginning of a formal parameter (of a
1489 macro) following another symbol character, with
1490 whitespace in between. If that is the case, we
1491 output a space before the parameter. Strictly
1492 speaking, correct handling depends upon what the
1493 macro parameter expands into; if the parameter
1494 expands into something which does not start with
1495 an operand character, then we don't want to keep
1496 the space. We don't have enough information to
1497 make the right choice, so here we are making the
1498 choice which is more likely to be correct. */
1740b7b1
NS
1499 if (to + 1 >= toend)
1500 {
1501 /* If we're near the end of the buffer, save the
1502 character for the next time round. Otherwise
1503 we'll lose our state. */
1504 UNGET (ch);
1505 goto tofull;
1506 }
1507 *to++ = ' ';
c5c834aa
AH
1508 }
1509
252b5132
RH
1510 state = 3;
1511 }
1512 PUT (ch);
1513 break;
1514 }
1515 }
1516
1517 /*NOTREACHED*/
1518
1519 fromeof:
1520 /* We have reached the end of the input. */
ab1fadc6
AM
1521 if (to > tostart)
1522 last_char = to[-1];
252b5132
RH
1523 return to - tostart;
1524
1525 tofull:
1526 /* The output buffer is full. Save any input we have not yet
1527 processed. */
1528 if (fromend > from)
1529 {
2b47531b 1530 saved_input = from;
252b5132
RH
1531 saved_input_len = fromend - from;
1532 }
1533 else
2b47531b
ILT
1534 saved_input = NULL;
1535
ab1fadc6
AM
1536 if (to > tostart)
1537 last_char = to[-1];
252b5132
RH
1538 return to - tostart;
1539}
4d74aab7
AM
1540
1541/* Return amount of pending input. */
1542
1543size_t
1544do_scrub_pending (void)
1545{
1546 size_t len = 0;
1547 if (saved_input)
1548 len += saved_input_len;
1549 if (state == -1)
1550 len += strlen (out_string);
1551 return len;
1552}