]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blob - gdb/ada-lex.l
ada-lex.l: Ignore register diagnostic also for g++ defaulting to ISO C++17
[thirdparty/binutils-gdb.git] / gdb / ada-lex.l
1 /* FLEX lexer for Ada expressions, for GDB.
2 Copyright (C) 1994-2020 Free Software Foundation, Inc.
3
4 This file is part of GDB.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18
19 /*----------------------------------------------------------------------*/
20
21 /* The converted version of this file is to be included in ada-exp.y, */
22 /* the Ada parser for gdb. The function yylex obtains characters from */
23 /* the global pointer lexptr. It returns a syntactic category for */
24 /* each successive token and places a semantic value into yylval */
25 /* (ada-lval), defined by the parser. */
26
27 DIG [0-9]
28 NUM10 ({DIG}({DIG}|_)*)
29 HEXDIG [0-9a-f]
30 NUM16 ({HEXDIG}({HEXDIG}|_)*)
31 OCTDIG [0-7]
32 LETTER [a-z_]
33 ID ({LETTER}({LETTER}|{DIG})*|"<"{LETTER}({LETTER}|{DIG})*">")
34 WHITE [ \t\n]
35 TICK ("'"{WHITE}*)
36 GRAPHIC [a-z0-9 #&'()*+,-./:;<>=_|!$%?@\[\]\\^`{}~]
37 OPER ([-+*/=<>&]|"<="|">="|"**"|"/="|"and"|"or"|"xor"|"not"|"mod"|"rem"|"abs")
38
39 EXP (e[+-]{NUM10})
40 POSEXP (e"+"?{NUM10})
41
42 %{
43
44 #include "diagnostics.h"
45
46 /* Some old versions of flex generate code that uses the "register" keyword,
47 which clang warns about. This was observed for example with flex 2.5.35,
48 as shipped with macOS 10.12. The same happens with flex 2.5.37 and g++ 11
49 which defaults to ISO C++17, that does not allow register storage class
50 specifiers. */
51 DIAGNOSTIC_PUSH
52 DIAGNOSTIC_IGNORE_DEPRECATED_REGISTER
53
54 #define NUMERAL_WIDTH 256
55 #define LONGEST_SIGN ((ULONGEST) 1 << (sizeof(LONGEST) * HOST_CHAR_BIT - 1))
56
57 /* Temporary staging for numeric literals. */
58 static char numbuf[NUMERAL_WIDTH];
59 static void canonicalizeNumeral (char *s1, const char *);
60 static struct stoken processString (const char*, int);
61 static int processInt (struct parser_state *, const char *, const char *,
62 const char *);
63 static int processReal (struct parser_state *, const char *);
64 static struct stoken processId (const char *, int);
65 static int processAttribute (const char *);
66 static int find_dot_all (const char *);
67 static void rewind_to_char (int);
68
69 #undef YY_DECL
70 #define YY_DECL static int yylex ( void )
71
72 /* Flex generates a static function "input" which is not used.
73 Defining YY_NO_INPUT comments it out. */
74 #define YY_NO_INPUT
75
76 #undef YY_INPUT
77 #define YY_INPUT(BUF, RESULT, MAX_SIZE) \
78 if ( *pstate->lexptr == '\000' ) \
79 (RESULT) = YY_NULL; \
80 else \
81 { \
82 *(BUF) = *pstate->lexptr; \
83 (RESULT) = 1; \
84 pstate->lexptr += 1; \
85 }
86
87 static int find_dot_all (const char *);
88
89 /* Depth of parentheses. */
90 static int paren_depth;
91
92 %}
93
94 %option case-insensitive interactive nodefault noyywrap
95
96 %s BEFORE_QUAL_QUOTE
97
98 %%
99
100 {WHITE} { }
101
102 "--".* { yyterminate(); }
103
104 {NUM10}{POSEXP} {
105 canonicalizeNumeral (numbuf, yytext);
106 return processInt (pstate, NULL, numbuf,
107 strrchr (numbuf, 'e') + 1);
108 }
109
110 {NUM10} {
111 canonicalizeNumeral (numbuf, yytext);
112 return processInt (pstate, NULL, numbuf, NULL);
113 }
114
115 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#"{POSEXP} {
116 canonicalizeNumeral (numbuf, yytext);
117 return processInt (pstate, numbuf,
118 strchr (numbuf, '#') + 1,
119 strrchr(numbuf, '#') + 1);
120 }
121
122 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#" {
123 canonicalizeNumeral (numbuf, yytext);
124 return processInt (pstate, numbuf, strchr (numbuf, '#') + 1,
125 NULL);
126 }
127
128 "0x"{HEXDIG}+ {
129 canonicalizeNumeral (numbuf, yytext+2);
130 return processInt (pstate, "16#", numbuf, NULL);
131 }
132
133
134 {NUM10}"."{NUM10}{EXP} {
135 canonicalizeNumeral (numbuf, yytext);
136 return processReal (pstate, numbuf);
137 }
138
139 {NUM10}"."{NUM10} {
140 canonicalizeNumeral (numbuf, yytext);
141 return processReal (pstate, numbuf);
142 }
143
144 {NUM10}"#"{NUM16}"."{NUM16}"#"{EXP} {
145 error (_("Based real literals not implemented yet."));
146 }
147
148 {NUM10}"#"{NUM16}"."{NUM16}"#" {
149 error (_("Based real literals not implemented yet."));
150 }
151
152 <INITIAL>"'"({GRAPHIC}|\")"'" {
153 yylval.typed_val.type = type_char (pstate);
154 yylval.typed_val.val = yytext[1];
155 return CHARLIT;
156 }
157
158 <INITIAL>"'[\""{HEXDIG}{2}"\"]'" {
159 int v;
160 yylval.typed_val.type = type_char (pstate);
161 sscanf (yytext+3, "%2x", &v);
162 yylval.typed_val.val = v;
163 return CHARLIT;
164 }
165
166 \"({GRAPHIC}|"[\""({HEXDIG}{2}|\")"\"]")*\" {
167 yylval.sval = processString (yytext+1, yyleng-2);
168 return STRING;
169 }
170
171 \" {
172 error (_("ill-formed or non-terminated string literal"));
173 }
174
175
176 if {
177 rewind_to_char ('i');
178 return 0;
179 }
180
181 task {
182 rewind_to_char ('t');
183 return 0;
184 }
185
186 thread{WHITE}+{DIG} {
187 /* This keyword signals the end of the expression and
188 will be processed separately. */
189 rewind_to_char ('t');
190 return 0;
191 }
192
193 /* ADA KEYWORDS */
194
195 abs { return ABS; }
196 and { return _AND_; }
197 else { return ELSE; }
198 in { return IN; }
199 mod { return MOD; }
200 new { return NEW; }
201 not { return NOT; }
202 null { return NULL_PTR; }
203 or { return OR; }
204 others { return OTHERS; }
205 rem { return REM; }
206 then { return THEN; }
207 xor { return XOR; }
208
209 /* BOOLEAN "KEYWORDS" */
210
211 /* True and False are not keywords in Ada, but rather enumeration constants.
212 However, the boolean type is no longer represented as an enum, so True
213 and False are no longer defined in symbol tables. We compromise by
214 making them keywords (when bare). */
215
216 true { return TRUEKEYWORD; }
217 false { return FALSEKEYWORD; }
218
219 /* ATTRIBUTES */
220
221 {TICK}[a-zA-Z][a-zA-Z_]+ { BEGIN INITIAL; return processAttribute (yytext+1); }
222
223 /* PUNCTUATION */
224
225 "=>" { return ARROW; }
226 ".." { return DOTDOT; }
227 "**" { return STARSTAR; }
228 ":=" { return ASSIGN; }
229 "/=" { return NOTEQUAL; }
230 "<=" { return LEQ; }
231 ">=" { return GEQ; }
232
233 <BEFORE_QUAL_QUOTE>"'" { BEGIN INITIAL; return '\''; }
234
235 [-&*+./:<>=|;\[\]] { return yytext[0]; }
236
237 "," { if (paren_depth == 0 && pstate->comma_terminates)
238 {
239 rewind_to_char (',');
240 return 0;
241 }
242 else
243 return ',';
244 }
245
246 "(" { paren_depth += 1; return '('; }
247 ")" { if (paren_depth == 0)
248 {
249 rewind_to_char (')');
250 return 0;
251 }
252 else
253 {
254 paren_depth -= 1;
255 return ')';
256 }
257 }
258
259 "."{WHITE}*all { return DOT_ALL; }
260
261 "."{WHITE}*{ID} {
262 yylval.sval = processId (yytext+1, yyleng-1);
263 return DOT_ID;
264 }
265
266 {ID}({WHITE}*"."{WHITE}*({ID}|\"{OPER}\"))*(" "*"'")? {
267 int all_posn = find_dot_all (yytext);
268
269 if (all_posn == -1 && yytext[yyleng-1] == '\'')
270 {
271 BEGIN BEFORE_QUAL_QUOTE;
272 yyless (yyleng-1);
273 }
274 else if (all_posn >= 0)
275 yyless (all_posn);
276 yylval.sval = processId (yytext, yyleng);
277 return NAME;
278 }
279
280
281 /* GDB EXPRESSION CONSTRUCTS */
282
283 "'"[^']+"'"{WHITE}*:: {
284 yyless (yyleng - 2);
285 yylval.sval = processId (yytext, yyleng);
286 return NAME;
287 }
288
289 "::" { return COLONCOLON; }
290
291 [{}@] { return yytext[0]; }
292
293 /* REGISTERS AND GDB CONVENIENCE VARIABLES */
294
295 "$"({LETTER}|{DIG}|"$")* {
296 yylval.sval.ptr = yytext;
297 yylval.sval.length = yyleng;
298 return DOLLAR_VARIABLE;
299 }
300
301 /* CATCH-ALL ERROR CASE */
302
303 . { error (_("Invalid character '%s' in expression."), yytext); }
304 %%
305
306 #include <ctype.h>
307 /* Initialize the lexer for processing new expression. */
308
309 static void
310 lexer_init (FILE *inp)
311 {
312 BEGIN INITIAL;
313 paren_depth = 0;
314 yyrestart (inp);
315 }
316
317
318 /* Copy S2 to S1, removing all underscores, and downcasing all letters. */
319
320 static void
321 canonicalizeNumeral (char *s1, const char *s2)
322 {
323 for (; *s2 != '\000'; s2 += 1)
324 {
325 if (*s2 != '_')
326 {
327 *s1 = tolower(*s2);
328 s1 += 1;
329 }
330 }
331 s1[0] = '\000';
332 }
333
334 /* Interprets the prefix of NUM that consists of digits of the given BASE
335 as an integer of that BASE, with the string EXP as an exponent.
336 Puts value in yylval, and returns INT, if the string is valid. Causes
337 an error if the number is improperly formated. BASE, if NULL, defaults
338 to "10", and EXP to "1". The EXP does not contain a leading 'e' or 'E'.
339 */
340
341 static int
342 processInt (struct parser_state *par_state, const char *base0,
343 const char *num0, const char *exp0)
344 {
345 ULONGEST result;
346 long exp;
347 int base;
348 const char *trailer;
349
350 if (base0 == NULL)
351 base = 10;
352 else
353 {
354 base = strtol (base0, (char **) NULL, 10);
355 if (base < 2 || base > 16)
356 error (_("Invalid base: %d."), base);
357 }
358
359 if (exp0 == NULL)
360 exp = 0;
361 else
362 exp = strtol(exp0, (char **) NULL, 10);
363
364 errno = 0;
365 result = strtoulst (num0, &trailer, base);
366 if (errno == ERANGE)
367 error (_("Integer literal out of range"));
368 if (isxdigit(*trailer))
369 error (_("Invalid digit `%c' in based literal"), *trailer);
370
371 while (exp > 0)
372 {
373 if (result > (ULONG_MAX / base))
374 error (_("Integer literal out of range"));
375 result *= base;
376 exp -= 1;
377 }
378
379 if ((result >> (gdbarch_int_bit (par_state->gdbarch ())-1)) == 0)
380 yylval.typed_val.type = type_int (par_state);
381 else if ((result >> (gdbarch_long_bit (par_state->gdbarch ())-1)) == 0)
382 yylval.typed_val.type = type_long (par_state);
383 else if (((result >> (gdbarch_long_bit (par_state->gdbarch ())-1)) >> 1) == 0)
384 {
385 /* We have a number representable as an unsigned integer quantity.
386 For consistency with the C treatment, we will treat it as an
387 anonymous modular (unsigned) quantity. Alas, the types are such
388 that we need to store .val as a signed quantity. Sorry
389 for the mess, but C doesn't officially guarantee that a simple
390 assignment does the trick (no, it doesn't; read the reference manual).
391 */
392 yylval.typed_val.type
393 = builtin_type (par_state->gdbarch ())->builtin_unsigned_long;
394 if (result & LONGEST_SIGN)
395 yylval.typed_val.val =
396 (LONGEST) (result & ~LONGEST_SIGN)
397 - (LONGEST_SIGN>>1) - (LONGEST_SIGN>>1);
398 else
399 yylval.typed_val.val = (LONGEST) result;
400 return INT;
401 }
402 else
403 yylval.typed_val.type = type_long_long (par_state);
404
405 yylval.typed_val.val = (LONGEST) result;
406 return INT;
407 }
408
409 static int
410 processReal (struct parser_state *par_state, const char *num0)
411 {
412 yylval.typed_val_float.type = type_long_double (par_state);
413
414 bool parsed = parse_float (num0, strlen (num0),
415 yylval.typed_val_float.type,
416 yylval.typed_val_float.val);
417 gdb_assert (parsed);
418 return FLOAT;
419 }
420
421
422 /* Store a canonicalized version of NAME0[0..LEN-1] in yylval.ssym. The
423 resulting string is valid until the next call to ada_parse. If
424 NAME0 contains the substring "___", it is assumed to be already
425 encoded and the resulting name is equal to it. Similarly, if the name
426 starts with '<', it is copied verbatim. Otherwise, it differs
427 from NAME0 in that:
428 + Characters between '...' are transfered verbatim to yylval.ssym.
429 + Trailing "'" characters in quoted sequences are removed (a leading quote is
430 preserved to indicate that the name is not to be GNAT-encoded).
431 + Unquoted whitespace is removed.
432 + Unquoted alphabetic characters are mapped to lower case.
433 Result is returned as a struct stoken, but for convenience, the string
434 is also null-terminated. Result string valid until the next call of
435 ada_parse.
436 */
437 static struct stoken
438 processId (const char *name0, int len)
439 {
440 char *name = (char *) obstack_alloc (&temp_parse_space, len + 11);
441 int i0, i;
442 struct stoken result;
443
444 result.ptr = name;
445 while (len > 0 && isspace (name0[len-1]))
446 len -= 1;
447
448 if (name0[0] == '<' || strstr (name0, "___") != NULL)
449 {
450 strncpy (name, name0, len);
451 name[len] = '\000';
452 result.length = len;
453 return result;
454 }
455
456 i = i0 = 0;
457 while (i0 < len)
458 {
459 if (isalnum (name0[i0]))
460 {
461 name[i] = tolower (name0[i0]);
462 i += 1; i0 += 1;
463 }
464 else switch (name0[i0])
465 {
466 default:
467 name[i] = name0[i0];
468 i += 1; i0 += 1;
469 break;
470 case ' ': case '\t':
471 i0 += 1;
472 break;
473 case '\'':
474 do
475 {
476 name[i] = name0[i0];
477 i += 1; i0 += 1;
478 }
479 while (i0 < len && name0[i0] != '\'');
480 i0 += 1;
481 break;
482 }
483 }
484 name[i] = '\000';
485
486 result.length = i;
487 return result;
488 }
489
490 /* Return TEXT[0..LEN-1], a string literal without surrounding quotes,
491 with special hex character notations replaced with characters.
492 Result valid until the next call to ada_parse. */
493
494 static struct stoken
495 processString (const char *text, int len)
496 {
497 const char *p;
498 char *q;
499 const char *lim = text + len;
500 struct stoken result;
501
502 q = (char *) obstack_alloc (&temp_parse_space, len);
503 result.ptr = q;
504 p = text;
505 while (p < lim)
506 {
507 if (p[0] == '[' && p[1] == '"' && p+2 < lim)
508 {
509 if (p[2] == '"') /* "...["""]... */
510 {
511 *q = '"';
512 p += 4;
513 }
514 else
515 {
516 int chr;
517 sscanf (p+2, "%2x", &chr);
518 *q = (char) chr;
519 p += 5;
520 }
521 }
522 else
523 *q = *p;
524 q += 1;
525 p += 1;
526 }
527 result.length = q - result.ptr;
528 return result;
529 }
530
531 /* Returns the position within STR of the '.' in a
532 '.{WHITE}*all' component of a dotted name, or -1 if there is none.
533 Note: we actually don't need this routine, since 'all' can never be an
534 Ada identifier. Thus, looking up foo.all or foo.all.x as a name
535 must fail, and will eventually be interpreted as (foo).all or
536 (foo).all.x. However, this does avoid an extraneous lookup. */
537
538 static int
539 find_dot_all (const char *str)
540 {
541 int i;
542
543 for (i = 0; str[i] != '\000'; i++)
544 if (str[i] == '.')
545 {
546 int i0 = i;
547
548 do
549 i += 1;
550 while (isspace (str[i]));
551
552 if (strncasecmp (str + i, "all", 3) == 0
553 && !isalnum (str[i + 3]) && str[i + 3] != '_')
554 return i0;
555 }
556 return -1;
557 }
558
559 /* Returns non-zero iff string SUBSEQ matches a subsequence of STR, ignoring
560 case. */
561
562 static int
563 subseqMatch (const char *subseq, const char *str)
564 {
565 if (subseq[0] == '\0')
566 return 1;
567 else if (str[0] == '\0')
568 return 0;
569 else if (tolower (subseq[0]) == tolower (str[0]))
570 return subseqMatch (subseq+1, str+1) || subseqMatch (subseq, str+1);
571 else
572 return subseqMatch (subseq, str+1);
573 }
574
575
576 static struct { const char *name; int code; }
577 attributes[] = {
578 { "address", TICK_ADDRESS },
579 { "unchecked_access", TICK_ACCESS },
580 { "unrestricted_access", TICK_ACCESS },
581 { "access", TICK_ACCESS },
582 { "first", TICK_FIRST },
583 { "last", TICK_LAST },
584 { "length", TICK_LENGTH },
585 { "max", TICK_MAX },
586 { "min", TICK_MIN },
587 { "modulus", TICK_MODULUS },
588 { "pos", TICK_POS },
589 { "range", TICK_RANGE },
590 { "size", TICK_SIZE },
591 { "tag", TICK_TAG },
592 { "val", TICK_VAL },
593 { NULL, -1 }
594 };
595
596 /* Return the syntactic code corresponding to the attribute name or
597 abbreviation STR. */
598
599 static int
600 processAttribute (const char *str)
601 {
602 int i, k;
603
604 for (i = 0; attributes[i].code != -1; i += 1)
605 if (strcasecmp (str, attributes[i].name) == 0)
606 return attributes[i].code;
607
608 for (i = 0, k = -1; attributes[i].code != -1; i += 1)
609 if (subseqMatch (str, attributes[i].name))
610 {
611 if (k == -1)
612 k = i;
613 else
614 error (_("ambiguous attribute name: `%s'"), str);
615 }
616 if (k == -1)
617 error (_("unrecognized attribute: `%s'"), str);
618
619 return attributes[k].code;
620 }
621
622 /* Back up lexptr by yyleng and then to the rightmost occurrence of
623 character CH, case-folded (there must be one). WARNING: since
624 lexptr points to the next input character that Flex has not yet
625 transferred to its internal buffer, the use of this function
626 depends on the assumption that Flex calls YY_INPUT only when it is
627 logically necessary to do so (thus, there is no reading ahead
628 farther than needed to identify the next token.) */
629
630 static void
631 rewind_to_char (int ch)
632 {
633 pstate->lexptr -= yyleng;
634 while (toupper (*pstate->lexptr) != toupper (ch))
635 pstate->lexptr -= 1;
636 yyrestart (NULL);
637 }
638
639 /* Dummy definition to suppress warnings about unused static definitions. */
640 typedef void (*dummy_function) ();
641 dummy_function ada_flex_use[] =
642 {
643 (dummy_function) yyunput
644 };
645
646 DIAGNOSTIC_POP