]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/ada/scng.adb
a-convec.adb (Merge): Added assertions to check whether vector params are sorted.
[thirdparty/gcc.git] / gcc / ada / scng.adb
CommitLineData
fbf5a39b
AC
1------------------------------------------------------------------------------
2-- --
3-- GNAT COMPILER COMPONENTS --
4-- --
5-- S C N G --
6-- --
7-- B o d y --
8-- --
82c80734 9-- Copyright (C) 1992-2005 Free Software Foundation, Inc. --
fbf5a39b
AC
10-- --
11-- GNAT is free software; you can redistribute it and/or modify it under --
12-- terms of the GNU General Public License as published by the Free Soft- --
13-- ware Foundation; either version 2, or (at your option) any later ver- --
14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17-- for more details. You should have received a copy of the GNU General --
18-- Public License distributed with GNAT; see file COPYING. If not, write --
cb5fee25
KC
19-- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20-- Boston, MA 02110-1301, USA. --
fbf5a39b
AC
21-- --
22-- GNAT was originally developed by the GNAT team at New York University. --
23-- Extensive contributions were provided by Ada Core Technologies Inc. --
24-- --
25------------------------------------------------------------------------------
26
27with Csets; use Csets;
28with Err_Vars; use Err_Vars;
fbf5a39b
AC
29with Namet; use Namet;
30with Opt; use Opt;
31with Scans; use Scans;
32with Sinput; use Sinput;
33with Snames; use Snames;
34with Stringt; use Stringt;
35with Stylesw; use Stylesw;
36with Uintp; use Uintp;
37with Urealp; use Urealp;
38with Widechar; use Widechar;
39
40with System.CRC32;
41with System.WCh_Con; use System.WCh_Con;
42
c8427bff
RD
43with GNAT.UTF_32; use GNAT.UTF_32;
44
fbf5a39b
AC
45package body Scng is
46
47 use ASCII;
48 -- Make control characters visible
49
50 Special_Characters : array (Character) of Boolean := (others => False);
51 -- For characters that are Special token, the value is True
52
c45b6ae0
AC
53 Comment_Is_Token : Boolean := False;
54 -- True if comments are tokens
55
fbf5a39b
AC
56 End_Of_Line_Is_Token : Boolean := False;
57 -- True if End_Of_Line is a token
58
59 -----------------------
60 -- Local Subprograms --
61 -----------------------
62
6b6fcd3e
AC
63 procedure Accumulate_Token_Checksum;
64 pragma Inline (Accumulate_Token_Checksum);
65
fbf5a39b
AC
66 procedure Accumulate_Checksum (C : Character);
67 pragma Inline (Accumulate_Checksum);
68 -- This routine accumulates the checksum given character C. During the
69 -- scanning of a source file, this routine is called with every character
70 -- in the source, excluding blanks, and all control characters (except
71 -- that ESC is included in the checksum). Upper case letters not in string
72 -- literals are folded by the caller. See Sinput spec for the documentation
73 -- of the checksum algorithm. Note: checksum values are only used if we
74 -- generate code, so it is not necessary to worry about making the right
75 -- sequence of calls in any error situation.
76
77 procedure Accumulate_Checksum (C : Char_Code);
78 pragma Inline (Accumulate_Checksum);
79 -- This version is identical, except that the argument, C, is a character
80 -- code value instead of a character. This is used when wide characters
81 -- are scanned. We use the character code rather than the ASCII characters
82 -- so that the checksum is independent of wide character encoding method.
83
84 procedure Initialize_Checksum;
85 pragma Inline (Initialize_Checksum);
86 -- Initialize checksum value
87
88 -------------------------
89 -- Accumulate_Checksum --
90 -------------------------
91
92 procedure Accumulate_Checksum (C : Character) is
93 begin
94 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
95 end Accumulate_Checksum;
96
97 procedure Accumulate_Checksum (C : Char_Code) is
98 begin
82c80734 99 if C > 16#FFFF# then
357ac4df
RD
100 Accumulate_Checksum (Character'Val (C / 2 ** 24));
101 Accumulate_Checksum (Character'Val ((C / 2 ** 16) mod 256));
82c80734
RD
102 Accumulate_Checksum (Character'Val ((C / 256) mod 256));
103 else
104 Accumulate_Checksum (Character'Val (C / 256));
105 end if;
106
fbf5a39b
AC
107 Accumulate_Checksum (Character'Val (C mod 256));
108 end Accumulate_Checksum;
109
6b6fcd3e
AC
110 -------------------------------
111 -- Accumulate_Token_Checksum --
112 -------------------------------
113
114 procedure Accumulate_Token_Checksum is
115 begin
116 System.CRC32.Update
117 (System.CRC32.CRC32 (Checksum),
118 Character'Val (Token_Type'Pos (Token)));
119 end Accumulate_Token_Checksum;
120
fbf5a39b
AC
121 ----------------------------
122 -- Determine_Token_Casing --
123 ----------------------------
124
125 function Determine_Token_Casing return Casing_Type is
126 begin
127 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
128 end Determine_Token_Casing;
129
130 -------------------------
131 -- Initialize_Checksum --
132 -------------------------
133
134 procedure Initialize_Checksum is
135 begin
136 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
137 end Initialize_Checksum;
138
139 ------------------------
140 -- Initialize_Scanner --
141 ------------------------
142
143 procedure Initialize_Scanner
144 (Unit : Unit_Number_Type;
145 Index : Source_File_Index)
146 is
82c80734
RD
147 procedure Set_Reserved (N : Name_Id; T : Token_Type);
148 pragma Inline (Set_Reserved);
149 -- Set given name as a reserved keyword (T is the corresponding token)
150
151 -------------
152 -- Set_NTB --
153 -------------
154
155 procedure Set_Reserved (N : Name_Id; T : Token_Type) is
156 begin
157 -- Set up Token_Type values in Names Table entries for reserved
158 -- keywords We use the Pos value of the Token_Type value. Note we
159 -- rely on the fact that Token_Type'Val (0) is not a reserved word!
160
161 Set_Name_Table_Byte (N, Token_Type'Pos (T));
162 end Set_Reserved;
163
164 -- Start of processing for Initialize_Scanner
165
fbf5a39b 166 begin
82c80734
RD
167 -- Establish reserved words
168
169 Set_Reserved (Name_Abort, Tok_Abort);
170 Set_Reserved (Name_Abs, Tok_Abs);
171 Set_Reserved (Name_Abstract, Tok_Abstract);
172 Set_Reserved (Name_Accept, Tok_Accept);
173 Set_Reserved (Name_Access, Tok_Access);
174 Set_Reserved (Name_And, Tok_And);
175 Set_Reserved (Name_Aliased, Tok_Aliased);
176 Set_Reserved (Name_All, Tok_All);
177 Set_Reserved (Name_Array, Tok_Array);
178 Set_Reserved (Name_At, Tok_At);
179 Set_Reserved (Name_Begin, Tok_Begin);
180 Set_Reserved (Name_Body, Tok_Body);
181 Set_Reserved (Name_Case, Tok_Case);
182 Set_Reserved (Name_Constant, Tok_Constant);
183 Set_Reserved (Name_Declare, Tok_Declare);
184 Set_Reserved (Name_Delay, Tok_Delay);
185 Set_Reserved (Name_Delta, Tok_Delta);
186 Set_Reserved (Name_Digits, Tok_Digits);
187 Set_Reserved (Name_Do, Tok_Do);
188 Set_Reserved (Name_Else, Tok_Else);
189 Set_Reserved (Name_Elsif, Tok_Elsif);
190 Set_Reserved (Name_End, Tok_End);
191 Set_Reserved (Name_Entry, Tok_Entry);
192 Set_Reserved (Name_Exception, Tok_Exception);
193 Set_Reserved (Name_Exit, Tok_Exit);
194 Set_Reserved (Name_For, Tok_For);
195 Set_Reserved (Name_Function, Tok_Function);
196 Set_Reserved (Name_Generic, Tok_Generic);
197 Set_Reserved (Name_Goto, Tok_Goto);
198 Set_Reserved (Name_If, Tok_If);
199 Set_Reserved (Name_In, Tok_In);
200 Set_Reserved (Name_Is, Tok_Is);
201 Set_Reserved (Name_Limited, Tok_Limited);
202 Set_Reserved (Name_Loop, Tok_Loop);
203 Set_Reserved (Name_Mod, Tok_Mod);
204 Set_Reserved (Name_New, Tok_New);
205 Set_Reserved (Name_Not, Tok_Not);
206 Set_Reserved (Name_Null, Tok_Null);
207 Set_Reserved (Name_Of, Tok_Of);
208 Set_Reserved (Name_Or, Tok_Or);
209 Set_Reserved (Name_Others, Tok_Others);
210 Set_Reserved (Name_Out, Tok_Out);
211 Set_Reserved (Name_Package, Tok_Package);
212 Set_Reserved (Name_Pragma, Tok_Pragma);
213 Set_Reserved (Name_Private, Tok_Private);
214 Set_Reserved (Name_Procedure, Tok_Procedure);
215 Set_Reserved (Name_Protected, Tok_Protected);
216 Set_Reserved (Name_Raise, Tok_Raise);
217 Set_Reserved (Name_Range, Tok_Range);
218 Set_Reserved (Name_Record, Tok_Record);
219 Set_Reserved (Name_Rem, Tok_Rem);
220 Set_Reserved (Name_Renames, Tok_Renames);
221 Set_Reserved (Name_Requeue, Tok_Requeue);
222 Set_Reserved (Name_Return, Tok_Return);
223 Set_Reserved (Name_Reverse, Tok_Reverse);
224 Set_Reserved (Name_Select, Tok_Select);
225 Set_Reserved (Name_Separate, Tok_Separate);
226 Set_Reserved (Name_Subtype, Tok_Subtype);
227 Set_Reserved (Name_Tagged, Tok_Tagged);
228 Set_Reserved (Name_Task, Tok_Task);
229 Set_Reserved (Name_Terminate, Tok_Terminate);
230 Set_Reserved (Name_Then, Tok_Then);
231 Set_Reserved (Name_Type, Tok_Type);
232 Set_Reserved (Name_Until, Tok_Until);
233 Set_Reserved (Name_Use, Tok_Use);
234 Set_Reserved (Name_When, Tok_When);
235 Set_Reserved (Name_While, Tok_While);
236 Set_Reserved (Name_With, Tok_With);
237 Set_Reserved (Name_Xor, Tok_Xor);
238
239 -- Ada 2005 reserved words
240
241 Set_Reserved (Name_Interface, Tok_Interface);
242 Set_Reserved (Name_Overriding, Tok_Overriding);
243 Set_Reserved (Name_Synchronized, Tok_Synchronized);
fbf5a39b
AC
244
245 -- Initialize scan control variables
246
247 Current_Source_File := Index;
248 Source := Source_Text (Current_Source_File);
249 Current_Source_Unit := Unit;
250 Scan_Ptr := Source_First (Current_Source_File);
251 Token := No_Token;
252 Token_Ptr := Scan_Ptr;
253 Current_Line_Start := Scan_Ptr;
254 Token_Node := Empty;
255 Token_Name := No_Name;
256 Start_Column := Set_Start_Column;
257 First_Non_Blank_Location := Scan_Ptr;
258
259 Initialize_Checksum;
d52f1094 260 Wide_Char_Byte_Count := 0;
fbf5a39b 261
5f3ab6fb 262 -- Do not call Scan, otherwise the License stuff does not work in Scn
fbf5a39b
AC
263
264 end Initialize_Scanner;
265
266 ------------------------------
267 -- Reset_Special_Characters --
268 ------------------------------
269
270 procedure Reset_Special_Characters is
271 begin
272 Special_Characters := (others => False);
273 end Reset_Special_Characters;
274
275 ----------
276 -- Scan --
277 ----------
278
279 procedure Scan is
280
c45b6ae0 281 Start_Of_Comment : Source_Ptr;
82c80734
RD
282 -- Record start of comment position
283
284 Underline_Found : Boolean;
285 -- During scanning of an identifier, set to True if last character
286 -- scanned was an underline or other punctuation character. This
287 -- is used to flag the error of two underlines/punctuations in a
288 -- row or ending an identifier with a underline/punctuation. Here
289 -- punctuation means any UTF_32 character in the Unicode category
290 -- Punctuation,Connector.
291
292 Wptr : Source_Ptr;
293 -- Used to remember start of last wide character scanned
c45b6ae0 294
fbf5a39b 295 procedure Check_End_Of_Line;
82c80734
RD
296 -- Called when end of line encountered. Checks that line is not too
297 -- long, and that other style checks for the end of line are met.
fbf5a39b
AC
298
299 function Double_Char_Token (C : Character) return Boolean;
300 -- This function is used for double character tokens like := or <>. It
301 -- checks if the character following Source (Scan_Ptr) is C, and if so
302 -- bumps Scan_Ptr past the pair of characters and returns True. A space
303 -- between the two characters is also recognized with an appropriate
304 -- error message being issued. If C is not present, False is returned.
305 -- Note that Double_Char_Token can only be used for tokens defined in
306 -- the Ada syntax (it's use for error cases like && is not appropriate
307 -- since we do not want a junk message for a case like &-space-&).
308
309 procedure Error_Illegal_Character;
82c80734
RD
310 -- Give illegal character error, Scan_Ptr points to character. On
311 -- return, Scan_Ptr is bumped past the illegal character.
fbf5a39b
AC
312
313 procedure Error_Illegal_Wide_Character;
314 -- Give illegal wide character message. On return, Scan_Ptr is bumped
315 -- past the illegal character, which may still leave us pointing to
316 -- junk, not much we can do if the escape sequence is messed up!
317
318 procedure Error_Long_Line;
319 -- Signal error of excessively long line
320
321 procedure Error_No_Double_Underline;
82c80734
RD
322 -- Signal error of two underline or punctuation characters in a row.
323 -- Called with Scan_Ptr pointing to second underline/punctuation char.
fbf5a39b
AC
324
325 procedure Nlit;
326 -- This is the procedure for scanning out numeric literals. On entry,
327 -- Scan_Ptr points to the digit that starts the numeric literal (the
328 -- checksum for this character has not been accumulated yet). On return
329 -- Scan_Ptr points past the last character of the numeric literal, Token
330 -- and Token_Node are set appropriately, and the checksum is updated.
331
332 procedure Slit;
333 -- This is the procedure for scanning out string literals. On entry,
334 -- Scan_Ptr points to the opening string quote (the checksum for this
335 -- character has not been accumulated yet). On return Scan_Ptr points
336 -- past the closing quote of the string literal, Token and Token_Node
337 -- are set appropriately, and the checksum is upated.
338
339 -----------------------
340 -- Check_End_Of_Line --
341 -----------------------
342
343 procedure Check_End_Of_Line is
d52f1094
RD
344 Len : constant Int :=
345 Int (Scan_Ptr) -
346 Int (Current_Line_Start) -
347 Wide_Char_Byte_Count;
fbf5a39b
AC
348
349 begin
debe0ab6 350 if Style_Check then
fbf5a39b 351 Style.Check_Line_Terminator (Len);
debe0ab6
RD
352 end if;
353
354 -- Deal with checking maximum line length
355
356 if Style_Check and Style_Check_Max_Line_Length then
357 Style.Check_Line_Max_Length (Len);
fbf5a39b 358
5950a3ac
AC
359 -- If style checking is inactive, check maximum line length against
360 -- standard value. Note that we take this from Opt.Max_Line_Length
361 -- rather than Hostparm.Max_Line_Length because we do not want to
362 -- impose any limit during scanning of configuration pragma files,
363 -- and Opt.Max_Line_Length (normally set to Hostparm.Max_Line_Length)
364 -- is reset to Column_Number'Max during scanning of such files.
365
366 elsif Len > Opt.Max_Line_Length then
fbf5a39b
AC
367 Error_Long_Line;
368 end if;
d52f1094
RD
369
370 -- Reset wide character byte count for next line
371
372 Wide_Char_Byte_Count := 0;
fbf5a39b
AC
373 end Check_End_Of_Line;
374
375 -----------------------
376 -- Double_Char_Token --
377 -----------------------
378
379 function Double_Char_Token (C : Character) return Boolean is
380 begin
381 if Source (Scan_Ptr + 1) = C then
382 Accumulate_Checksum (C);
383 Scan_Ptr := Scan_Ptr + 2;
384 return True;
385
386 elsif Source (Scan_Ptr + 1) = ' '
387 and then Source (Scan_Ptr + 2) = C
388 then
389 Scan_Ptr := Scan_Ptr + 1;
390 Error_Msg_S ("no space allowed here");
391 Scan_Ptr := Scan_Ptr + 2;
392 return True;
393
394 else
395 return False;
396 end if;
397 end Double_Char_Token;
398
399 -----------------------------
400 -- Error_Illegal_Character --
401 -----------------------------
402
403 procedure Error_Illegal_Character is
404 begin
405 Error_Msg_S ("illegal character");
406 Scan_Ptr := Scan_Ptr + 1;
407 end Error_Illegal_Character;
408
409 ----------------------------------
410 -- Error_Illegal_Wide_Character --
411 ----------------------------------
412
413 procedure Error_Illegal_Wide_Character is
414 begin
82c80734 415 Error_Msg ("illegal wide character", Wptr);
fbf5a39b
AC
416 end Error_Illegal_Wide_Character;
417
418 ---------------------
419 -- Error_Long_Line --
420 ---------------------
421
422 procedure Error_Long_Line is
423 begin
424 Error_Msg
425 ("this line is too long",
5950a3ac 426 Current_Line_Start + Source_Ptr (Opt.Max_Line_Length));
fbf5a39b
AC
427 end Error_Long_Line;
428
429 -------------------------------
430 -- Error_No_Double_Underline --
431 -------------------------------
432
433 procedure Error_No_Double_Underline is
434 begin
82c80734
RD
435 Underline_Found := False;
436
437 -- There are four cases, and we special case the messages
438
439 if Source (Scan_Ptr) = '_' then
440 if Source (Scan_Ptr - 1) = '_' then
441 Error_Msg_S
442 ("two consecutive underlines not permitted");
443 else
444 Error_Msg_S
445 ("underline cannot follow punctuation character");
446 end if;
447
448 else
449 if Source (Scan_Ptr - 1) = '_' then
450 Error_Msg_S
451 ("punctuation character cannot follow underline");
452 else
453 Error_Msg_S
454 ("two consecutive punctuation characters not permitted");
455 end if;
456 end if;
fbf5a39b
AC
457 end Error_No_Double_Underline;
458
459 ----------
460 -- Nlit --
461 ----------
462
463 procedure Nlit is
464
465 C : Character;
466 -- Current source program character
467
468 Base_Char : Character;
469 -- Either # or : (character at start of based number)
470
471 Base : Int;
472 -- Value of base
473
474 UI_Base : Uint;
475 -- Value of base in Uint format
476
477 UI_Int_Value : Uint;
478 -- Value of integer scanned by Scan_Integer in Uint format
479
480 UI_Num_Value : Uint;
481 -- Value of integer in numeric value being scanned
482
483 Scale : Int;
484 -- Scale value for real literal
485
486 UI_Scale : Uint;
487 -- Scale in Uint format
488
489 Exponent_Is_Negative : Boolean;
490 -- Set true for negative exponent
491
492 Extended_Digit_Value : Int;
493 -- Extended digit value
494
495 Point_Scanned : Boolean;
496 -- Flag for decimal point scanned in numeric literal
497
498 -----------------------
499 -- Local Subprograms --
500 -----------------------
501
502 procedure Error_Digit_Expected;
503 -- Signal error of bad digit, Scan_Ptr points to the location at
504 -- which the digit was expected on input, and is unchanged on return.
505
506 procedure Scan_Integer;
82c80734
RD
507 -- Procedure to scan integer literal. On entry, Scan_Ptr points to a
508 -- digit, on exit Scan_Ptr points past the last character of the
509 -- integer.
6b6fcd3e 510 --
82c80734
RD
511 -- For each digit encountered, UI_Int_Value is multiplied by 10, and
512 -- the value of the digit added to the result. In addition, the
513 -- value in Scale is decremented by one for each actual digit
fbf5a39b
AC
514 -- scanned.
515
516 --------------------------
517 -- Error_Digit_Expected --
518 --------------------------
519
520 procedure Error_Digit_Expected is
521 begin
522 Error_Msg_S ("digit expected");
523 end Error_Digit_Expected;
524
15ce9ca2
AC
525 ------------------
526 -- Scan_Integer --
527 ------------------
fbf5a39b
AC
528
529 procedure Scan_Integer is
530 C : Character;
531 -- Next character scanned
532
533 begin
534 C := Source (Scan_Ptr);
535
536 -- Loop through digits (allowing underlines)
537
538 loop
539 Accumulate_Checksum (C);
540 UI_Int_Value :=
541 UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0'));
542 Scan_Ptr := Scan_Ptr + 1;
543 Scale := Scale - 1;
544 C := Source (Scan_Ptr);
545
82c80734
RD
546 -- Case of underline encountered
547
fbf5a39b 548 if C = '_' then
30c20106
AC
549
550 -- We do not accumulate the '_' in the checksum, so that
551 -- 1_234 is equivalent to 1234, and does not trigger
552 -- compilation for "minimal recompilation" (gnatmake -m).
fbf5a39b
AC
553
554 loop
555 Scan_Ptr := Scan_Ptr + 1;
556 C := Source (Scan_Ptr);
557 exit when C /= '_';
558 Error_No_Double_Underline;
559 end loop;
560
561 if C not in '0' .. '9' then
562 Error_Digit_Expected;
563 exit;
564 end if;
565
566 else
567 exit when C not in '0' .. '9';
568 end if;
569 end loop;
fbf5a39b
AC
570 end Scan_Integer;
571
82c80734 572 -- Start of Processing for Nlit
fbf5a39b
AC
573
574 begin
575 Base := 10;
576 UI_Base := Uint_10;
577 UI_Int_Value := Uint_0;
578 Scale := 0;
579 Scan_Integer;
580 Scale := 0;
581 Point_Scanned := False;
582 UI_Num_Value := UI_Int_Value;
583
82c80734
RD
584 -- Various possibilities now for continuing the literal are period,
585 -- E/e (for exponent), or :/# (for based literal).
fbf5a39b
AC
586
587 Scale := 0;
588 C := Source (Scan_Ptr);
589
590 if C = '.' then
591
592 -- Scan out point, but do not scan past .. which is a range
593 -- sequence, and must not be eaten up scanning a numeric literal.
594
595 while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop
596 Accumulate_Checksum ('.');
597
598 if Point_Scanned then
599 Error_Msg_S ("duplicate point ignored");
600 end if;
601
602 Point_Scanned := True;
603 Scan_Ptr := Scan_Ptr + 1;
604 C := Source (Scan_Ptr);
605
606 if C not in '0' .. '9' then
607 Error_Msg
608 ("real literal cannot end with point", Scan_Ptr - 1);
609 else
610 Scan_Integer;
611 UI_Num_Value := UI_Int_Value;
612 end if;
613 end loop;
614
82c80734
RD
615 -- Based literal case. The base is the value we already scanned.
616 -- In the case of colon, we insist that the following character
617 -- is indeed an extended digit or a period. This catches a number
618 -- of common errors, as well as catching the well known tricky
619 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;"
fbf5a39b
AC
620
621 elsif C = '#'
622 or else (C = ':' and then
623 (Source (Scan_Ptr + 1) = '.'
624 or else
625 Source (Scan_Ptr + 1) in '0' .. '9'
626 or else
627 Source (Scan_Ptr + 1) in 'A' .. 'Z'
628 or else
629 Source (Scan_Ptr + 1) in 'a' .. 'z'))
630 then
5f3ab6fb
AC
631 if C = ':' then
632 Obsolescent_Check (Scan_Ptr);
633
634 if Warn_On_Obsolescent_Feature then
635 Error_Msg_S
636 ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
637 Error_Msg_S
638 ("\use ""'#"" instead?");
639 end if;
fbf5a39b
AC
640 end if;
641
642 Accumulate_Checksum (C);
643 Base_Char := C;
644 UI_Base := UI_Int_Value;
645
646 if UI_Base < 2 or else UI_Base > 16 then
647 Error_Msg_SC ("base not 2-16");
648 UI_Base := Uint_16;
649 end if;
650
651 Base := UI_To_Int (UI_Base);
652 Scan_Ptr := Scan_Ptr + 1;
653
654 -- Scan out extended integer [. integer]
655
656 C := Source (Scan_Ptr);
657 UI_Int_Value := Uint_0;
658 Scale := 0;
659
660 loop
661 if C in '0' .. '9' then
662 Accumulate_Checksum (C);
663 Extended_Digit_Value :=
664 Int'(Character'Pos (C)) - Int'(Character'Pos ('0'));
665
666 elsif C in 'A' .. 'F' then
667 Accumulate_Checksum (Character'Val (Character'Pos (C) + 32));
668 Extended_Digit_Value :=
669 Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
670
671 elsif C in 'a' .. 'f' then
672 Accumulate_Checksum (C);
673 Extended_Digit_Value :=
674 Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
675
676 else
677 Error_Msg_S ("extended digit expected");
678 exit;
679 end if;
680
681 if Extended_Digit_Value >= Base then
682 Error_Msg_S ("digit '>= base");
683 end if;
684
685 UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value;
686 Scale := Scale - 1;
687 Scan_Ptr := Scan_Ptr + 1;
688 C := Source (Scan_Ptr);
689
690 if C = '_' then
691 loop
692 Accumulate_Checksum ('_');
693 Scan_Ptr := Scan_Ptr + 1;
694 C := Source (Scan_Ptr);
695 exit when C /= '_';
696 Error_No_Double_Underline;
697 end loop;
698
699 elsif C = '.' then
700 Accumulate_Checksum ('.');
701
702 if Point_Scanned then
703 Error_Msg_S ("duplicate point ignored");
704 end if;
705
706 Scan_Ptr := Scan_Ptr + 1;
707 C := Source (Scan_Ptr);
708 Point_Scanned := True;
709 Scale := 0;
710
711 elsif C = Base_Char then
712 Accumulate_Checksum (C);
713 Scan_Ptr := Scan_Ptr + 1;
714 exit;
715
716 elsif C = '#' or else C = ':' then
717 Error_Msg_S ("based number delimiters must match");
718 Scan_Ptr := Scan_Ptr + 1;
719 exit;
720
721 elsif not Identifier_Char (C) then
722 if Base_Char = '#' then
723 Error_Msg_S ("missing '#");
724 else
725 Error_Msg_S ("missing ':");
726 end if;
727
728 exit;
729 end if;
730
731 end loop;
732
733 UI_Num_Value := UI_Int_Value;
734 end if;
735
736 -- Scan out exponent
737
738 if not Point_Scanned then
739 Scale := 0;
740 UI_Scale := Uint_0;
741 else
742 UI_Scale := UI_From_Int (Scale);
743 end if;
744
745 if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then
746 Accumulate_Checksum ('e');
747 Scan_Ptr := Scan_Ptr + 1;
748 Exponent_Is_Negative := False;
749
750 if Source (Scan_Ptr) = '+' then
751 Accumulate_Checksum ('+');
752 Scan_Ptr := Scan_Ptr + 1;
753
754 elsif Source (Scan_Ptr) = '-' then
755 Accumulate_Checksum ('-');
756
757 if not Point_Scanned then
758 Error_Msg_S
759 ("negative exponent not allowed for integer literal");
760 else
761 Exponent_Is_Negative := True;
762 end if;
763
764 Scan_Ptr := Scan_Ptr + 1;
765 end if;
766
767 UI_Int_Value := Uint_0;
768
769 if Source (Scan_Ptr) in '0' .. '9' then
770 Scan_Integer;
771 else
772 Error_Digit_Expected;
773 end if;
774
775 if Exponent_Is_Negative then
776 UI_Scale := UI_Scale - UI_Int_Value;
777 else
778 UI_Scale := UI_Scale + UI_Int_Value;
779 end if;
780 end if;
781
782 -- Case of real literal to be returned
783
784 if Point_Scanned then
785 Token := Tok_Real_Literal;
786 Real_Literal_Value :=
787 UR_From_Components (
788 Num => UI_Num_Value,
789 Den => -UI_Scale,
790 Rbase => Base);
791
82c80734 792 -- Case of integer literal to be returned
fbf5a39b
AC
793
794 else
795 Token := Tok_Integer_Literal;
796
797 if UI_Scale = 0 then
798 Int_Literal_Value := UI_Num_Value;
799
82c80734
RD
800 -- Avoid doing possibly expensive calculations in cases like
801 -- parsing 163E800_000# when semantics will not be done anyway.
802 -- This is especially useful when parsing garbled input.
fbf5a39b
AC
803
804 elsif Operating_Mode /= Check_Syntax
805 and then (Serious_Errors_Detected = 0 or else Try_Semantics)
806 then
807 Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale;
808
809 else
810 Int_Literal_Value := No_Uint;
fbf5a39b 811 end if;
fbf5a39b
AC
812 end if;
813
6b6fcd3e
AC
814 Accumulate_Token_Checksum;
815
fbf5a39b 816 return;
fbf5a39b
AC
817 end Nlit;
818
819 ----------
820 -- Slit --
821 ----------
822
823 procedure Slit is
824
825 Delimiter : Character;
826 -- Delimiter (first character of string)
827
828 C : Character;
829 -- Current source program character
830
831 Code : Char_Code;
832 -- Current character code value
833
834 Err : Boolean;
835 -- Error flag for Scan_Wide call
836
837 procedure Error_Bad_String_Char;
838 -- Signal bad character in string/character literal. On entry
82c80734
RD
839 -- Scan_Ptr points to the improper character encountered during the
840 -- scan. Scan_Ptr is not modified, so it still points to the bad
fbf5a39b
AC
841 -- character on return.
842
843 procedure Error_Unterminated_String;
844 -- Procedure called if a line terminator character is encountered
845 -- during scanning a string, meaning that the string is not properly
846 -- terminated.
847
848 procedure Set_String;
849 -- Procedure used to distinguish between string and operator symbol.
82c80734
RD
850 -- On entry the string has been scanned out, and its characters
851 -- start at Token_Ptr and end one character before Scan_Ptr. On exit
852 -- Token is set to Tok_String_Literal or Tok_Operator_Symbol as
853 -- appropriate, and Token_Node is appropriately initialized. In
854 -- addition, in the operator symbol case, Token_Name is
fbf5a39b
AC
855 -- appropriately set.
856
857 ---------------------------
858 -- Error_Bad_String_Char --
859 ---------------------------
860
861 procedure Error_Bad_String_Char is
862 C : constant Character := Source (Scan_Ptr);
863
864 begin
865 if C = HT then
866 Error_Msg_S ("horizontal tab not allowed in string");
867
868 elsif C = VT or else C = FF then
869 Error_Msg_S ("format effector not allowed in string");
870
871 elsif C in Upper_Half_Character then
872 Error_Msg_S ("(Ada 83) upper half character not allowed");
873
874 else
875 Error_Msg_S ("control character not allowed in string");
876 end if;
877 end Error_Bad_String_Char;
878
879 -------------------------------
880 -- Error_Unterminated_String --
881 -------------------------------
882
883 procedure Error_Unterminated_String is
884 begin
885 -- An interesting little refinement. Consider the following
886 -- examples:
887
888 -- A := "this is an unterminated string;
889 -- A := "this is an unterminated string &
890 -- P(A, "this is a parameter that didn't get terminated);
891
892 -- We fiddle a little to do slightly better placement in these
893 -- cases also if there is white space at the end of the line we
894 -- place the flag at the start of this white space, not at the
895 -- end. Note that we only have to test for blanks, since tabs
896 -- aren't allowed in strings in the first place and would have
897 -- caused an error message.
898
899 -- Two more cases that we treat specially are:
900
901 -- A := "this string uses the wrong terminator'
902 -- A := "this string uses the wrong terminator' &
903
904 -- In these cases we give a different error message as well
905
906 -- We actually reposition the scan pointer to the point where we
907 -- place the flag in these cases, since it seems a better bet on
908 -- the original intention.
909
910 while Source (Scan_Ptr - 1) = ' '
911 or else Source (Scan_Ptr - 1) = '&'
912 loop
913 Scan_Ptr := Scan_Ptr - 1;
914 Unstore_String_Char;
915 end loop;
916
917 -- Check for case of incorrect string terminator, but single quote
918 -- is not considered incorrect if the opening terminator misused
919 -- a single quote (error message already given).
920
921 if Delimiter /= '''
922 and then Source (Scan_Ptr - 1) = '''
923 then
924 Unstore_String_Char;
925 Error_Msg
926 ("incorrect string terminator character", Scan_Ptr - 1);
927 return;
928 end if;
929
930 if Source (Scan_Ptr - 1) = ';' then
931 Scan_Ptr := Scan_Ptr - 1;
932 Unstore_String_Char;
933
934 if Source (Scan_Ptr - 1) = ')' then
935 Scan_Ptr := Scan_Ptr - 1;
936 Unstore_String_Char;
937 end if;
938 end if;
939
940 Error_Msg_S ("missing string quote");
941 end Error_Unterminated_String;
942
943 ----------------
944 -- Set_String --
945 ----------------
946
947 procedure Set_String is
948 Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2);
949 C1 : Character;
950 C2 : Character;
951 C3 : Character;
952
953 begin
954 -- Token_Name is currently set to Error_Name. The following
955 -- section of code resets Token_Name to the proper Name_Op_xx
956 -- value if the string is a valid operator symbol, otherwise it is
957 -- left set to Error_Name.
958
959 if Slen = 1 then
960 C1 := Source (Token_Ptr + 1);
961
962 case C1 is
963 when '=' =>
964 Token_Name := Name_Op_Eq;
965
966 when '>' =>
967 Token_Name := Name_Op_Gt;
968
969 when '<' =>
970 Token_Name := Name_Op_Lt;
971
972 when '+' =>
973 Token_Name := Name_Op_Add;
974
975 when '-' =>
976 Token_Name := Name_Op_Subtract;
977
978 when '&' =>
979 Token_Name := Name_Op_Concat;
980
981 when '*' =>
982 Token_Name := Name_Op_Multiply;
983
984 when '/' =>
985 Token_Name := Name_Op_Divide;
986
987 when others =>
988 null;
989 end case;
990
991 elsif Slen = 2 then
992 C1 := Source (Token_Ptr + 1);
993 C2 := Source (Token_Ptr + 2);
994
995 if C1 = '*' and then C2 = '*' then
996 Token_Name := Name_Op_Expon;
997
998 elsif C2 = '=' then
999
1000 if C1 = '/' then
1001 Token_Name := Name_Op_Ne;
1002 elsif C1 = '<' then
1003 Token_Name := Name_Op_Le;
1004 elsif C1 = '>' then
1005 Token_Name := Name_Op_Ge;
1006 end if;
1007
1008 elsif (C1 = 'O' or else C1 = 'o') and then -- OR
1009 (C2 = 'R' or else C2 = 'r')
1010 then
1011 Token_Name := Name_Op_Or;
1012 end if;
1013
1014 elsif Slen = 3 then
1015 C1 := Source (Token_Ptr + 1);
1016 C2 := Source (Token_Ptr + 2);
1017 C3 := Source (Token_Ptr + 3);
1018
1019 if (C1 = 'A' or else C1 = 'a') and then -- AND
1020 (C2 = 'N' or else C2 = 'n') and then
1021 (C3 = 'D' or else C3 = 'd')
1022 then
1023 Token_Name := Name_Op_And;
1024
1025 elsif (C1 = 'A' or else C1 = 'a') and then -- ABS
1026 (C2 = 'B' or else C2 = 'b') and then
1027 (C3 = 'S' or else C3 = 's')
1028 then
1029 Token_Name := Name_Op_Abs;
1030
1031 elsif (C1 = 'M' or else C1 = 'm') and then -- MOD
1032 (C2 = 'O' or else C2 = 'o') and then
1033 (C3 = 'D' or else C3 = 'd')
1034 then
1035 Token_Name := Name_Op_Mod;
1036
1037 elsif (C1 = 'N' or else C1 = 'n') and then -- NOT
1038 (C2 = 'O' or else C2 = 'o') and then
1039 (C3 = 'T' or else C3 = 't')
1040 then
1041 Token_Name := Name_Op_Not;
1042
1043 elsif (C1 = 'R' or else C1 = 'r') and then -- REM
1044 (C2 = 'E' or else C2 = 'e') and then
1045 (C3 = 'M' or else C3 = 'm')
1046 then
1047 Token_Name := Name_Op_Rem;
1048
1049 elsif (C1 = 'X' or else C1 = 'x') and then -- XOR
1050 (C2 = 'O' or else C2 = 'o') and then
1051 (C3 = 'R' or else C3 = 'r')
1052 then
1053 Token_Name := Name_Op_Xor;
1054 end if;
1055
1056 end if;
1057
82c80734
RD
1058 -- If it is an operator symbol, then Token_Name is set. If it is
1059 -- some other string value, then Token_Name still contains
1060 -- Error_Name.
fbf5a39b
AC
1061
1062 if Token_Name = Error_Name then
1063 Token := Tok_String_Literal;
1064
1065 else
1066 Token := Tok_Operator_Symbol;
1067 end if;
fbf5a39b
AC
1068 end Set_String;
1069
82c80734 1070 -- Start of processing for Slit
fbf5a39b
AC
1071
1072 begin
1073 -- On entry, Scan_Ptr points to the opening character of the string
82c80734
RD
1074 -- which is either a percent, double quote, or apostrophe (single
1075 -- quote). The latter case is an error detected by the character
1076 -- literal circuit.
fbf5a39b
AC
1077
1078 Delimiter := Source (Scan_Ptr);
1079 Accumulate_Checksum (Delimiter);
1080 Start_String;
1081 Scan_Ptr := Scan_Ptr + 1;
1082
1083 -- Loop to scan out characters of string literal
1084
1085 loop
1086 C := Source (Scan_Ptr);
1087
1088 if C = Delimiter then
1089 Accumulate_Checksum (C);
1090 Scan_Ptr := Scan_Ptr + 1;
1091 exit when Source (Scan_Ptr) /= Delimiter;
1092 Code := Get_Char_Code (C);
1093 Accumulate_Checksum (C);
1094 Scan_Ptr := Scan_Ptr + 1;
1095
1096 else
1097 if C = '"' and then Delimiter = '%' then
1098 Error_Msg_S
1099 ("quote not allowed in percent delimited string");
1100 Code := Get_Char_Code (C);
1101 Scan_Ptr := Scan_Ptr + 1;
1102
1103 elsif (C = ESC
82c80734
RD
1104 and then Wide_Character_Encoding_Method
1105 in WC_ESC_Encoding_Method)
1106 or else (C in Upper_Half_Character
1107 and then Upper_Half_Encoding)
1108 or else (C = '['
1109 and then Source (Scan_Ptr + 1) = '"'
1110 and then Identifier_Char (Source (Scan_Ptr + 2)))
fbf5a39b 1111 then
82c80734 1112 Wptr := Scan_Ptr;
fbf5a39b 1113 Scan_Wide (Source, Scan_Ptr, Code, Err);
fbf5a39b
AC
1114
1115 if Err then
1116 Error_Illegal_Wide_Character;
1117 Code := Get_Char_Code (' ');
1118 end if;
1119
82c80734
RD
1120 Accumulate_Checksum (Code);
1121
357ac4df
RD
1122 -- In Ada 95 mode we allow any wide characters in a string
1123 -- but in Ada 2005, the set of characters allowed has been
1124 -- restricted to graphic characters.
1125
82c80734 1126 if Ada_Version >= Ada_05
c8427bff 1127 and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
82c80734
RD
1128 then
1129 Error_Msg
1130 ("(Ada 2005) non-graphic character not permitted " &
1131 "in string literal", Wptr);
1132 end if;
1133
fbf5a39b
AC
1134 else
1135 Accumulate_Checksum (C);
1136
1137 if C not in Graphic_Character then
1138 if C in Line_Terminator then
1139 Error_Unterminated_String;
1140 exit;
1141
1142 elsif C in Upper_Half_Character then
0ab80019 1143 if Ada_Version = Ada_83 then
fbf5a39b
AC
1144 Error_Bad_String_Char;
1145 end if;
1146
1147 else
1148 Error_Bad_String_Char;
1149 end if;
1150 end if;
1151
1152 Code := Get_Char_Code (C);
1153 Scan_Ptr := Scan_Ptr + 1;
1154 end if;
1155 end if;
1156
1157 Store_String_Char (Code);
1158
1159 if not In_Character_Range (Code) then
1160 Wide_Character_Found := True;
1161 end if;
1162 end loop;
1163
1164 String_Literal_Id := End_String;
1165 Set_String;
1166 return;
fbf5a39b
AC
1167 end Slit;
1168
82c80734 1169 -- Start of processing for Scan
fbf5a39b
AC
1170
1171 begin
1172 Prev_Token := Token;
1173 Prev_Token_Ptr := Token_Ptr;
1174 Token_Name := Error_Name;
1175
1176 -- The following loop runs more than once only if a format effector
1177 -- (tab, vertical tab, form feed, line feed, carriage return) is
1178 -- encountered and skipped, or some error situation, such as an
1179 -- illegal character, is encountered.
1180
82c80734
RD
1181 <<Scan_Next_Character>>
1182
fbf5a39b
AC
1183 loop
1184 -- Skip past blanks, loop is opened up for speed
1185
1186 while Source (Scan_Ptr) = ' ' loop
fbf5a39b
AC
1187 if Source (Scan_Ptr + 1) /= ' ' then
1188 Scan_Ptr := Scan_Ptr + 1;
1189 exit;
1190 end if;
1191
1192 if Source (Scan_Ptr + 2) /= ' ' then
1193 Scan_Ptr := Scan_Ptr + 2;
1194 exit;
1195 end if;
1196
1197 if Source (Scan_Ptr + 3) /= ' ' then
1198 Scan_Ptr := Scan_Ptr + 3;
1199 exit;
1200 end if;
1201
1202 if Source (Scan_Ptr + 4) /= ' ' then
1203 Scan_Ptr := Scan_Ptr + 4;
1204 exit;
1205 end if;
1206
1207 if Source (Scan_Ptr + 5) /= ' ' then
1208 Scan_Ptr := Scan_Ptr + 5;
1209 exit;
1210 end if;
1211
1212 if Source (Scan_Ptr + 6) /= ' ' then
1213 Scan_Ptr := Scan_Ptr + 6;
1214 exit;
1215 end if;
1216
1217 if Source (Scan_Ptr + 7) /= ' ' then
1218 Scan_Ptr := Scan_Ptr + 7;
1219 exit;
1220 end if;
1221
1222 Scan_Ptr := Scan_Ptr + 8;
1223 end loop;
1224
1225 -- We are now at a non-blank character, which is the first character
1226 -- of the token we will scan, and hence the value of Token_Ptr.
1227
1228 Token_Ptr := Scan_Ptr;
1229
82c80734
RD
1230 -- Here begins the main case statement which transfers control on the
1231 -- basis of the non-blank character we have encountered.
fbf5a39b
AC
1232
1233 case Source (Scan_Ptr) is
1234
1235 -- Line terminator characters
1236
82c80734
RD
1237 when CR | LF | FF | VT =>
1238 goto Scan_Line_Terminator;
fbf5a39b
AC
1239
1240 -- Horizontal tab, just skip past it
1241
1242 when HT =>
1243 if Style_Check then Style.Check_HT; end if;
1244 Scan_Ptr := Scan_Ptr + 1;
1245
82c80734
RD
1246 -- End of file character, treated as an end of file only if it is
1247 -- the last character in the buffer, otherwise it is ignored.
fbf5a39b
AC
1248
1249 when EOF =>
1250 if Scan_Ptr = Source_Last (Current_Source_File) then
1251 Check_End_Of_Line;
357ac4df 1252 if Style_Check then Style.Check_EOF; end if;
fbf5a39b
AC
1253 Token := Tok_EOF;
1254 return;
fbf5a39b
AC
1255 else
1256 Scan_Ptr := Scan_Ptr + 1;
1257 end if;
1258
1259 -- Ampersand
1260
1261 when '&' =>
1262 Accumulate_Checksum ('&');
1263
1264 if Source (Scan_Ptr + 1) = '&' then
1265 Error_Msg_S ("'&'& should be `AND THEN`");
1266 Scan_Ptr := Scan_Ptr + 2;
1267 Token := Tok_And;
1268 return;
1269
1270 else
1271 Scan_Ptr := Scan_Ptr + 1;
1272 Token := Tok_Ampersand;
1273 return;
1274 end if;
1275
82c80734
RD
1276 -- Asterisk (can be multiplication operator or double asterisk which
1277 -- is the exponentiation compound delimiter).
fbf5a39b
AC
1278
1279 when '*' =>
1280 Accumulate_Checksum ('*');
1281
1282 if Source (Scan_Ptr + 1) = '*' then
1283 Accumulate_Checksum ('*');
1284 Scan_Ptr := Scan_Ptr + 2;
1285 Token := Tok_Double_Asterisk;
1286 return;
1287
1288 else
1289 Scan_Ptr := Scan_Ptr + 1;
1290 Token := Tok_Asterisk;
1291 return;
1292 end if;
1293
1294 -- Colon, which can either be an isolated colon, or part of an
1295 -- assignment compound delimiter.
1296
1297 when ':' =>
1298 Accumulate_Checksum (':');
1299
1300 if Double_Char_Token ('=') then
1301 Token := Tok_Colon_Equal;
1302 if Style_Check then Style.Check_Colon_Equal; end if;
1303 return;
1304
1305 elsif Source (Scan_Ptr + 1) = '-'
1306 and then Source (Scan_Ptr + 2) /= '-'
1307 then
1308 Token := Tok_Colon_Equal;
1309 Error_Msg (":- should be :=", Scan_Ptr);
1310 Scan_Ptr := Scan_Ptr + 2;
1311 return;
1312
1313 else
1314 Scan_Ptr := Scan_Ptr + 1;
1315 Token := Tok_Colon;
1316 if Style_Check then Style.Check_Colon; end if;
1317 return;
1318 end if;
1319
1320 -- Left parenthesis
1321
1322 when '(' =>
1323 Accumulate_Checksum ('(');
1324 Scan_Ptr := Scan_Ptr + 1;
1325 Token := Tok_Left_Paren;
1326 if Style_Check then Style.Check_Left_Paren; end if;
1327 return;
1328
1329 -- Left bracket
1330
1331 when '[' =>
1332 if Source (Scan_Ptr + 1) = '"' then
82c80734 1333 goto Scan_Wide_Character;
fbf5a39b
AC
1334
1335 else
1336 Error_Msg_S ("illegal character, replaced by ""(""");
1337 Scan_Ptr := Scan_Ptr + 1;
1338 Token := Tok_Left_Paren;
1339 return;
1340 end if;
1341
1342 -- Left brace
1343
1344 when '{' =>
1345 Error_Msg_S ("illegal character, replaced by ""(""");
1346 Scan_Ptr := Scan_Ptr + 1;
1347 Token := Tok_Left_Paren;
1348 return;
1349
1350 -- Comma
1351
1352 when ',' =>
1353 Accumulate_Checksum (',');
1354 Scan_Ptr := Scan_Ptr + 1;
1355 Token := Tok_Comma;
1356 if Style_Check then Style.Check_Comma; end if;
1357 return;
1358
82c80734
RD
1359 -- Dot, which is either an isolated period, or part of a double dot
1360 -- compound delimiter sequence. We also check for the case of a
1361 -- digit following the period, to give a better error message.
fbf5a39b
AC
1362
1363 when '.' =>
1364 Accumulate_Checksum ('.');
1365
1366 if Double_Char_Token ('.') then
1367 Token := Tok_Dot_Dot;
1368 if Style_Check then Style.Check_Dot_Dot; end if;
1369 return;
1370
1371 elsif Source (Scan_Ptr + 1) in '0' .. '9' then
1372 Error_Msg_S ("numeric literal cannot start with point");
1373 Scan_Ptr := Scan_Ptr + 1;
1374
1375 else
1376 Scan_Ptr := Scan_Ptr + 1;
1377 Token := Tok_Dot;
1378 return;
1379 end if;
1380
1381 -- Equal, which can either be an equality operator, or part of the
1382 -- arrow (=>) compound delimiter.
1383
1384 when '=' =>
1385 Accumulate_Checksum ('=');
1386
1387 if Double_Char_Token ('>') then
1388 Token := Tok_Arrow;
1389 if Style_Check then Style.Check_Arrow; end if;
1390 return;
1391
1392 elsif Source (Scan_Ptr + 1) = '=' then
1393 Error_Msg_S ("== should be =");
1394 Scan_Ptr := Scan_Ptr + 1;
1395 end if;
1396
1397 Scan_Ptr := Scan_Ptr + 1;
1398 Token := Tok_Equal;
1399 return;
1400
1401 -- Greater than, which can be a greater than operator, greater than
1402 -- or equal operator, or first character of a right label bracket.
1403
1404 when '>' =>
1405 Accumulate_Checksum ('>');
1406
1407 if Double_Char_Token ('=') then
1408 Token := Tok_Greater_Equal;
1409 return;
1410
1411 elsif Double_Char_Token ('>') then
1412 Token := Tok_Greater_Greater;
1413 return;
1414
1415 else
1416 Scan_Ptr := Scan_Ptr + 1;
1417 Token := Tok_Greater;
1418 return;
1419 end if;
1420
1421 -- Less than, which can be a less than operator, less than or equal
1422 -- operator, or the first character of a left label bracket, or the
1423 -- first character of a box (<>) compound delimiter.
1424
1425 when '<' =>
1426 Accumulate_Checksum ('<');
1427
1428 if Double_Char_Token ('=') then
1429 Token := Tok_Less_Equal;
1430 return;
1431
1432 elsif Double_Char_Token ('>') then
1433 Token := Tok_Box;
1434 if Style_Check then Style.Check_Box; end if;
1435 return;
1436
1437 elsif Double_Char_Token ('<') then
1438 Token := Tok_Less_Less;
1439 return;
1440
1441 else
1442 Scan_Ptr := Scan_Ptr + 1;
1443 Token := Tok_Less;
1444 return;
1445 end if;
1446
1447 -- Minus, which is either a subtraction operator, or the first
1448 -- character of double minus starting a comment
1449
1450 when '-' => Minus_Case : begin
1451 if Source (Scan_Ptr + 1) = '>' then
1452 Error_Msg_S ("invalid token");
1453 Scan_Ptr := Scan_Ptr + 2;
1454 Token := Tok_Arrow;
1455 return;
1456
1457 elsif Source (Scan_Ptr + 1) /= '-' then
1458 Accumulate_Checksum ('-');
1459 Scan_Ptr := Scan_Ptr + 1;
1460 Token := Tok_Minus;
1461 return;
1462
1463 -- Comment
1464
1465 else -- Source (Scan_Ptr + 1) = '-' then
1466 if Style_Check then Style.Check_Comment; end if;
1467 Scan_Ptr := Scan_Ptr + 2;
383b2b42
RD
1468
1469 -- If we are in preprocessor mode with Replace_In_Comments set,
1470 -- then we return the "--" as a token on its own.
1471
1472 if Replace_In_Comments then
1473 Token := Tok_Comment;
1474 return;
1475 end if;
1476
1477 -- Otherwise scan out the comment
1478
c45b6ae0 1479 Start_Of_Comment := Scan_Ptr;
fbf5a39b
AC
1480
1481 -- Loop to scan comment (this loop runs more than once only if
1482 -- a horizontal tab or other non-graphic character is scanned)
1483
1484 loop
1485 -- Scan to non graphic character (opened up for speed)
1486
82c80734
RD
1487 -- Note that we just eat left brackets, which means that
1488 -- bracket notation cannot be used for end of line
1489 -- characters in comments. This seems a reasonable choice,
1490 -- since no one would ever use brackets notation in a real
1491 -- program in this situation, and if we allow brackets
1492 -- notation, we forbid some valid comments which contain a
1493 -- brackets sequence that happens to match an end of line
1494 -- character.
1495
fbf5a39b
AC
1496 loop
1497 exit when Source (Scan_Ptr) not in Graphic_Character;
1498 Scan_Ptr := Scan_Ptr + 1;
1499 exit when Source (Scan_Ptr) not in Graphic_Character;
1500 Scan_Ptr := Scan_Ptr + 1;
1501 exit when Source (Scan_Ptr) not in Graphic_Character;
1502 Scan_Ptr := Scan_Ptr + 1;
1503 exit when Source (Scan_Ptr) not in Graphic_Character;
1504 Scan_Ptr := Scan_Ptr + 1;
1505 exit when Source (Scan_Ptr) not in Graphic_Character;
1506 Scan_Ptr := Scan_Ptr + 1;
1507 end loop;
1508
1509 -- Keep going if horizontal tab
1510
1511 if Source (Scan_Ptr) = HT then
1512 if Style_Check then Style.Check_HT; end if;
1513 Scan_Ptr := Scan_Ptr + 1;
1514
1515 -- Terminate scan of comment if line terminator
1516
1517 elsif Source (Scan_Ptr) in Line_Terminator then
1518 exit;
1519
1520 -- Terminate scan of comment if end of file encountered
1521 -- (embedded EOF character or real last character in file)
1522
1523 elsif Source (Scan_Ptr) = EOF then
1524 exit;
1525
82c80734
RD
1526 -- If we have a wide character, we have to scan it out,
1527 -- because it might be a legitimate line terminator
1528
1529 elsif (Source (Scan_Ptr) = ESC
1530 and then Identifier_Char (ESC))
1531 or else
1532 (Source (Scan_Ptr) in Upper_Half_Character
1533 and then Upper_Half_Encoding)
1534 then
1535 declare
1536 Wptr : constant Source_Ptr := Scan_Ptr;
1537 Code : Char_Code;
1538 Err : Boolean;
1539
1540 begin
1541 Scan_Wide (Source, Scan_Ptr, Code, Err);
1542
1543 -- If not well formed wide character, then just skip
1544 -- past it and ignore it.
1545
1546 if Err then
1547 Scan_Ptr := Wptr + 1;
1548
1549 -- If UTF_32 terminator, terminate comment scan
1550
c8427bff 1551 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
82c80734
RD
1552 Scan_Ptr := Wptr;
1553 exit;
1554 end if;
1555 end;
1556
fbf5a39b
AC
1557 -- Keep going if character in 80-FF range, or is ESC. These
1558 -- characters are allowed in comments by RM-2.1(1), 2.7(2).
1559 -- They are allowed even in Ada 83 mode according to the
1560 -- approved AI. ESC was added to the AI in June 93.
1561
1562 elsif Source (Scan_Ptr) in Upper_Half_Character
82c80734 1563 or else Source (Scan_Ptr) = ESC
fbf5a39b
AC
1564 then
1565 Scan_Ptr := Scan_Ptr + 1;
1566
1567 -- Otherwise we have an illegal comment character
1568
1569 else
1570 Error_Illegal_Character;
1571 end if;
fbf5a39b
AC
1572 end loop;
1573
c45b6ae0
AC
1574 -- Note that, except when comments are tokens, we do NOT
1575 -- execute a return here, instead we fall through to reexecute
1576 -- the scan loop to look for a token.
1577
1578 if Comment_Is_Token then
1579 Name_Len := Integer (Scan_Ptr - Start_Of_Comment);
1580 Name_Buffer (1 .. Name_Len) :=
1581 String (Source (Start_Of_Comment .. Scan_Ptr - 1));
1582 Comment_Id := Name_Find;
1583 Token := Tok_Comment;
1584 return;
1585 end if;
fbf5a39b
AC
1586 end if;
1587 end Minus_Case;
1588
1589 -- Double quote starting a string literal
1590
1591 when '"' =>
1592 Slit;
1593 Post_Scan;
1594 return;
1595
1596 -- Percent starting a string literal
1597
1598 when '%' =>
5f3ab6fb
AC
1599 Obsolescent_Check (Token_Ptr);
1600
fbf5a39b
AC
1601 if Warn_On_Obsolescent_Feature then
1602 Error_Msg_S
1603 ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1604 Error_Msg_S
1605 ("\use """""" instead?");
1606 end if;
1607
1608 Slit;
1609 Post_Scan;
1610 return;
1611
1612 -- Apostrophe. This can either be the start of a character literal,
1613 -- or an isolated apostrophe used in a qualified expression or an
1614 -- attribute. We treat it as a character literal if it does not
1615 -- follow a right parenthesis, identifier, the keyword ALL or
1616 -- a literal. This means that we correctly treat constructs like:
1617
1618 -- A := CHARACTER'('A');
1619
1620 -- Note that RM-2.2(7) does not require a separator between
1621 -- "CHARACTER" and "'" in the above.
1622
1623 when ''' => Char_Literal_Case : declare
1624 Code : Char_Code;
1625 Err : Boolean;
1626
1627 begin
1628 Accumulate_Checksum (''');
1629 Scan_Ptr := Scan_Ptr + 1;
1630
1631 -- Here is where we make the test to distinguish the cases. Treat
1632 -- as apostrophe if previous token is an identifier, right paren
1633 -- or the reserved word "all" (latter case as in A.all'Address)
82c80734
RD
1634 -- (or the reserved word "project" in project files). Also treat
1635 -- it as apostrophe after a literal (this catches some legitimate
1636 -- cases, like A."abs"'Address, and also gives better error
1637 -- behavior for impossible cases like 123'xxx).
fbf5a39b
AC
1638
1639 if Prev_Token = Tok_Identifier
1640 or else Prev_Token = Tok_Right_Paren
1641 or else Prev_Token = Tok_All
1642 or else Prev_Token = Tok_Project
1643 or else Prev_Token in Token_Class_Literal
1644 then
1645 Token := Tok_Apostrophe;
1646 if Style_Check then Style.Check_Apostrophe; end if;
1647 return;
1648
1649 -- Otherwise the apostrophe starts a character literal
1650
1651 else
82c80734 1652 -- Case of wide character literal
fbf5a39b
AC
1653
1654 if (Source (Scan_Ptr) = ESC
1655 and then
1656 Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
1657 or else
1658 (Source (Scan_Ptr) in Upper_Half_Character
1659 and then
1660 Upper_Half_Encoding)
1661 or else
1662 (Source (Scan_Ptr) = '['
1663 and then
1664 Source (Scan_Ptr + 1) = '"')
1665 then
82c80734 1666 Wptr := Scan_Ptr;
fbf5a39b
AC
1667 Scan_Wide (Source, Scan_Ptr, Code, Err);
1668 Accumulate_Checksum (Code);
1669
1670 if Err then
1671 Error_Illegal_Wide_Character;
357ac4df
RD
1672 Code := Character'Pos (' ');
1673
1674 -- In Ada 95 mode we allow any wide character in a character
1675 -- literal, but in Ada 2005, the set of characters allowed
1676 -- is restricted to graphic characters.
82c80734
RD
1677
1678 elsif Ada_Version >= Ada_05
c8427bff 1679 and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
82c80734
RD
1680 then
1681 Error_Msg
1682 ("(Ada 2005) non-graphic character not permitted " &
1683 "in character literal", Wptr);
fbf5a39b
AC
1684 end if;
1685
1686 if Source (Scan_Ptr) /= ''' then
1687 Error_Msg_S ("missing apostrophe");
1688 else
1689 Scan_Ptr := Scan_Ptr + 1;
1690 end if;
1691
1692 -- If we do not find a closing quote in the expected place then
1693 -- assume that we have a misguided attempt at a string literal.
1694
1695 -- However, if previous token is RANGE, then we return an
1696 -- apostrophe instead since this gives better error recovery
1697
1698 elsif Source (Scan_Ptr + 1) /= ''' then
fbf5a39b
AC
1699 if Prev_Token = Tok_Range then
1700 Token := Tok_Apostrophe;
1701 return;
1702
1703 else
1704 Scan_Ptr := Scan_Ptr - 1;
1705 Error_Msg_S
1706 ("strings are delimited by double quote character");
1707 Slit;
1708 Post_Scan;
1709 return;
1710 end if;
1711
1712 -- Otherwise we have a (non-wide) character literal
1713
1714 else
1715 Accumulate_Checksum (Source (Scan_Ptr));
1716
1717 if Source (Scan_Ptr) not in Graphic_Character then
1718 if Source (Scan_Ptr) in Upper_Half_Character then
0ab80019 1719 if Ada_Version = Ada_83 then
fbf5a39b
AC
1720 Error_Illegal_Character;
1721 end if;
1722
1723 else
1724 Error_Illegal_Character;
1725 end if;
1726 end if;
1727
1728 Code := Get_Char_Code (Source (Scan_Ptr));
1729 Scan_Ptr := Scan_Ptr + 2;
1730 end if;
1731
1732 -- Fall through here with Scan_Ptr updated past the closing
1733 -- quote, and Code set to the Char_Code value for the literal
1734
1735 Accumulate_Checksum (''');
1736 Token := Tok_Char_Literal;
1737 Set_Character_Literal_Name (Code);
1738 Token_Name := Name_Find;
1739 Character_Code := Code;
1740 Post_Scan;
1741 return;
1742 end if;
1743 end Char_Literal_Case;
1744
1745 -- Right parenthesis
1746
1747 when ')' =>
1748 Accumulate_Checksum (')');
1749 Scan_Ptr := Scan_Ptr + 1;
1750 Token := Tok_Right_Paren;
1751 if Style_Check then Style.Check_Right_Paren; end if;
1752 return;
1753
1754 -- Right bracket or right brace, treated as right paren
1755
1756 when ']' | '}' =>
1757 Error_Msg_S ("illegal character, replaced by "")""");
1758 Scan_Ptr := Scan_Ptr + 1;
1759 Token := Tok_Right_Paren;
1760 return;
1761
1762 -- Slash (can be division operator or first character of not equal)
1763
1764 when '/' =>
1765 Accumulate_Checksum ('/');
1766
1767 if Double_Char_Token ('=') then
1768 Token := Tok_Not_Equal;
1769 return;
1770 else
1771 Scan_Ptr := Scan_Ptr + 1;
1772 Token := Tok_Slash;
1773 return;
1774 end if;
1775
1776 -- Semicolon
1777
1778 when ';' =>
1779 Accumulate_Checksum (';');
1780 Scan_Ptr := Scan_Ptr + 1;
1781 Token := Tok_Semicolon;
1782 if Style_Check then Style.Check_Semicolon; end if;
1783 return;
1784
1785 -- Vertical bar
1786
1787 when '|' => Vertical_Bar_Case : begin
1788 Accumulate_Checksum ('|');
1789
1790 -- Special check for || to give nice message
1791
1792 if Source (Scan_Ptr + 1) = '|' then
1793 Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1794 Scan_Ptr := Scan_Ptr + 2;
1795 Token := Tok_Or;
1796 return;
1797
1798 else
1799 Scan_Ptr := Scan_Ptr + 1;
1800 Token := Tok_Vertical_Bar;
1801 if Style_Check then Style.Check_Vertical_Bar; end if;
1802 return;
1803 end if;
1804 end Vertical_Bar_Case;
1805
1806 -- Exclamation, replacement character for vertical bar
1807
1808 when '!' => Exclamation_Case : begin
1809 Accumulate_Checksum ('!');
5f3ab6fb 1810 Obsolescent_Check (Token_Ptr);
fbf5a39b
AC
1811
1812 if Warn_On_Obsolescent_Feature then
1813 Error_Msg_S
1814 ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1815 Error_Msg_S
1816 ("\use ""'|"" instead?");
1817 end if;
1818
1819 if Source (Scan_Ptr + 1) = '=' then
1820 Error_Msg_S ("'!= should be /=");
1821 Scan_Ptr := Scan_Ptr + 2;
1822 Token := Tok_Not_Equal;
1823 return;
1824
1825 else
1826 Scan_Ptr := Scan_Ptr + 1;
1827 Token := Tok_Vertical_Bar;
1828 return;
1829 end if;
fbf5a39b
AC
1830 end Exclamation_Case;
1831
1832 -- Plus
1833
1834 when '+' => Plus_Case : begin
1835 Accumulate_Checksum ('+');
1836 Scan_Ptr := Scan_Ptr + 1;
1837 Token := Tok_Plus;
1838 return;
1839 end Plus_Case;
1840
1841 -- Digits starting a numeric literal
1842
1843 when '0' .. '9' =>
1844 Nlit;
1845
1846 if Identifier_Char (Source (Scan_Ptr)) then
1847 Error_Msg_S
1848 ("delimiter required between literal and identifier");
1849 end if;
1850 Post_Scan;
1851 return;
1852
1853 -- Lower case letters
1854
1855 when 'a' .. 'z' =>
1856 Name_Len := 1;
82c80734 1857 Underline_Found := False;
fbf5a39b
AC
1858 Name_Buffer (1) := Source (Scan_Ptr);
1859 Accumulate_Checksum (Name_Buffer (1));
1860 Scan_Ptr := Scan_Ptr + 1;
1861 goto Scan_Identifier;
1862
1863 -- Upper case letters
1864
1865 when 'A' .. 'Z' =>
1866 Name_Len := 1;
82c80734 1867 Underline_Found := False;
fbf5a39b
AC
1868 Name_Buffer (1) :=
1869 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1870 Accumulate_Checksum (Name_Buffer (1));
1871 Scan_Ptr := Scan_Ptr + 1;
1872 goto Scan_Identifier;
1873
1874 -- Underline character
1875
1876 when '_' =>
1877 if Special_Characters ('_') then
1878 Token_Ptr := Scan_Ptr;
1879 Scan_Ptr := Scan_Ptr + 1;
1880 Token := Tok_Special;
1881 Special_Character := '_';
1882 return;
1883 end if;
1884
1885 Error_Msg_S ("identifier cannot start with underline");
1886 Name_Len := 1;
1887 Name_Buffer (1) := '_';
1888 Scan_Ptr := Scan_Ptr + 1;
82c80734 1889 Underline_Found := False;
fbf5a39b
AC
1890 goto Scan_Identifier;
1891
1892 -- Space (not possible, because we scanned past blanks)
1893
1894 when ' ' =>
1895 raise Program_Error;
1896
1897 -- Characters in top half of ASCII 8-bit chart
1898
1899 when Upper_Half_Character =>
1900
82c80734 1901 -- Wide character case
fbf5a39b
AC
1902
1903 if Upper_Half_Encoding then
82c80734 1904 goto Scan_Wide_Character;
fbf5a39b
AC
1905
1906 -- Otherwise we have OK Latin-1 character
1907
1908 else
1909 -- Upper half characters may possibly be identifier letters
82c80734
RD
1910 -- but can never be digits, so Identifier_Char can be used to
1911 -- test for a valid start of identifier character.
fbf5a39b
AC
1912
1913 if Identifier_Char (Source (Scan_Ptr)) then
1914 Name_Len := 0;
82c80734 1915 Underline_Found := False;
fbf5a39b
AC
1916 goto Scan_Identifier;
1917 else
1918 Error_Illegal_Character;
1919 end if;
1920 end if;
1921
1922 when ESC =>
1923
1924 -- ESC character, possible start of identifier if wide characters
1925 -- using ESC encoding are allowed in identifiers, which we can
1926 -- tell by looking at the Identifier_Char flag for ESC, which is
82c80734
RD
1927 -- only true if these conditions are met. In Ada 2005 mode, may
1928 -- also be valid UTF_32 space or line terminator character.
fbf5a39b
AC
1929
1930 if Identifier_Char (ESC) then
1931 Name_Len := 0;
82c80734 1932 goto Scan_Wide_Character;
fbf5a39b 1933 else
82c80734 1934 Error_Illegal_Character;
fbf5a39b
AC
1935 end if;
1936
1937 -- Invalid control characters
1938
c8427bff 1939 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | ASCII.SO |
fbf5a39b
AC
1940 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1941 EM | FS | GS | RS | US | DEL
1942 =>
1943 Error_Illegal_Character;
1944
1945 -- Invalid graphic characters
1946
1947 when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
82c80734 1948
fbf5a39b
AC
1949 -- If Set_Special_Character has been called for this character,
1950 -- set Scans.Special_Character and return a Special token.
1951
1952 if Special_Characters (Source (Scan_Ptr)) then
1953 Token_Ptr := Scan_Ptr;
1954 Token := Tok_Special;
1955 Special_Character := Source (Scan_Ptr);
1956 Scan_Ptr := Scan_Ptr + 1;
1957 return;
1958
82c80734 1959 -- Otherwise, this is an illegal character
fbf5a39b
AC
1960
1961 else
1962 Error_Illegal_Character;
1963 end if;
1964
1965 -- End switch on non-blank character
1966
1967 end case;
1968
1969 -- End loop past format effectors. The exit from this loop is by
1970 -- executing a return statement following completion of token scan
1971 -- (control never falls out of this loop to the code which follows)
1972
1973 end loop;
1974
82c80734
RD
1975 -- Wide_Character scanning routine. On entry we have encountered the
1976 -- initial character of a wide character sequence.
fbf5a39b 1977
82c80734 1978 <<Scan_Wide_Character>>
fbf5a39b 1979
82c80734
RD
1980 declare
1981 Code : Char_Code;
c8427bff 1982 Cat : Category;
82c80734 1983 Err : Boolean;
fbf5a39b 1984
82c80734
RD
1985 begin
1986 Wptr := Scan_Ptr;
1987 Scan_Wide (Source, Scan_Ptr, Code, Err);
fbf5a39b 1988
82c80734 1989 -- If bad wide character, signal error and continue scan
fbf5a39b 1990
82c80734
RD
1991 if Err then
1992 Error_Illegal_Wide_Character;
1993 goto Scan_Next_Character;
c8427bff
RD
1994 end if;
1995
1996 Cat := Get_Category (UTF_32 (Code));
fbf5a39b 1997
82c80734 1998 -- If OK letter, reset scan ptr and go scan identifier
fbf5a39b 1999
c8427bff 2000 if Is_UTF_32_Letter (Cat) then
82c80734
RD
2001 Scan_Ptr := Wptr;
2002 Name_Len := 0;
2003 Underline_Found := False;
2004 goto Scan_Identifier;
2005
2006 -- If OK wide space, ignore and keep scanning (we do not include
2007 -- any ignored spaces in checksum)
2008
c8427bff 2009 elsif Is_UTF_32_Space (Cat) then
82c80734
RD
2010 goto Scan_Next_Character;
2011
2012 -- If OK wide line terminator, terminate current line
2013
c8427bff 2014 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
82c80734
RD
2015 Scan_Ptr := Wptr;
2016 goto Scan_Line_Terminator;
2017
2018 -- Punctuation is an error (at start of identifier)
2019
c8427bff 2020 elsif Is_UTF_32_Punctuation (Cat) then
82c80734
RD
2021 Error_Msg
2022 ("identifier cannot start with punctuation", Wptr);
2023 Scan_Ptr := Wptr;
2024 Name_Len := 0;
2025 Underline_Found := False;
2026 goto Scan_Identifier;
2027
2028 -- Mark character is an error (at start of identifer)
2029
c8427bff 2030 elsif Is_UTF_32_Mark (Cat) then
82c80734
RD
2031 Error_Msg
2032 ("identifier cannot start with mark character", Wptr);
2033 Scan_Ptr := Wptr;
2034 Name_Len := 0;
2035 Underline_Found := False;
2036 goto Scan_Identifier;
2037
2038 -- Other format character is an error (at start of identifer)
2039
c8427bff 2040 elsif Is_UTF_32_Other (Cat) then
82c80734
RD
2041 Error_Msg
2042 ("identifier cannot start with other format character", Wptr);
2043 Scan_Ptr := Wptr;
2044 Name_Len := 0;
2045 Underline_Found := False;
2046 goto Scan_Identifier;
2047
2048 -- Extended digit character is an error. Could be bad start of
2049 -- identifier or bad literal. Not worth doing too much to try to
2050 -- distinguish these cases, but we will do a little bit.
2051
c8427bff 2052 elsif Is_UTF_32_Digit (Cat) then
82c80734
RD
2053 Error_Msg
2054 ("identifier cannot start with digit character", Wptr);
2055 Scan_Ptr := Wptr;
2056 Name_Len := 0;
2057 Underline_Found := False;
2058 goto Scan_Identifier;
2059
2060 -- All other wide characters are illegal here
fbf5a39b
AC
2061
2062 else
82c80734
RD
2063 Error_Illegal_Wide_Character;
2064 goto Scan_Next_Character;
fbf5a39b 2065 end if;
82c80734 2066 end;
fbf5a39b 2067
82c80734
RD
2068 -- Routine to scan line terminator. On entry Scan_Ptr points to a
2069 -- character which is one of FF,LR,CR,VT, or one of the wide characters
2070 -- that is treated as a line termiantor.
fbf5a39b 2071
82c80734
RD
2072 <<Scan_Line_Terminator>>
2073
2074 -- Check line too long
2075
2076 Check_End_Of_Line;
2077
2078 -- Set Token_Ptr, if End_Of_Line is a token, for the case when it is
2079 -- a physical line.
2080
2081 if End_Of_Line_Is_Token then
2082 Token_Ptr := Scan_Ptr;
2083 end if;
2084
2085 declare
2086 Physical : Boolean;
2087
2088 begin
2089 Skip_Line_Terminators (Scan_Ptr, Physical);
2090
2091 -- If we are at start of physical line, update scan pointers to
2092 -- reflect the start of the new line.
2093
2094 if Physical then
2095 Current_Line_Start := Scan_Ptr;
2096 Start_Column := Set_Start_Column;
2097 First_Non_Blank_Location := Scan_Ptr;
2098
2099 -- If End_Of_Line is a token, we return it as it is a
2100 -- physical line.
2101
2102 if End_Of_Line_Is_Token then
2103 Token := Tok_End_Of_Line;
2104 return;
2105 end if;
fbf5a39b 2106 end if;
82c80734
RD
2107 end;
2108
2109 goto Scan_Next_Character;
2110
2111 -- Identifier scanning routine. On entry, some initial characters of
2112 -- the identifier may have already been stored in Name_Buffer. If so,
2113 -- Name_Len has the number of characters stored. otherwise Name_Len is
2114 -- set to zero on entry. Underline_Found is also set False on entry.
2115
2116 <<Scan_Identifier>>
2117
2118 -- This loop scans as fast as possible past lower half letters and
2119 -- digits, which we expect to be the most common characters.
fbf5a39b 2120
82c80734
RD
2121 loop
2122 if Source (Scan_Ptr) in 'a' .. 'z'
2123 or else Source (Scan_Ptr) in '0' .. '9'
fbf5a39b 2124 then
82c80734
RD
2125 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
2126 Accumulate_Checksum (Source (Scan_Ptr));
fbf5a39b 2127
82c80734
RD
2128 elsif Source (Scan_Ptr) in 'A' .. 'Z' then
2129 Name_Buffer (Name_Len + 1) :=
2130 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
2131 Accumulate_Checksum (Name_Buffer (Name_Len + 1));
fbf5a39b
AC
2132
2133 else
fbf5a39b
AC
2134 exit;
2135 end if;
2136
82c80734
RD
2137 Underline_Found := False;
2138 Scan_Ptr := Scan_Ptr + 1;
2139 Name_Len := Name_Len + 1;
fbf5a39b
AC
2140 end loop;
2141
2142 -- If we fall through, then we have encountered either an underline
2143 -- character, or an extended identifier character (i.e. one from the
82c80734
RD
2144 -- upper half), or a wide character, or an identifier terminator. The
2145 -- initial test speeds us up in the most common case where we have
2146 -- an identifier terminator. Note that ESC is an identifier character
2147 -- only if a wide character encoding method that uses ESC encoding
2148 -- is active, so if we find an ESC character we know that we have a
2149 -- wide character.
fbf5a39b
AC
2150
2151 if Identifier_Char (Source (Scan_Ptr)) then
2152
2153 -- Case of underline
2154
2155 if Source (Scan_Ptr) = '_' then
2156 Accumulate_Checksum ('_');
2157
82c80734
RD
2158 if Underline_Found then
2159 Error_No_Double_Underline;
fbf5a39b 2160 else
82c80734 2161 Underline_Found := True;
fbf5a39b
AC
2162 Name_Len := Name_Len + 1;
2163 Name_Buffer (Name_Len) := '_';
2164 end if;
2165
2166 Scan_Ptr := Scan_Ptr + 1;
2167 goto Scan_Identifier;
2168
2169 -- Upper half character
2170
2171 elsif Source (Scan_Ptr) in Upper_Half_Character
2172 and then not Upper_Half_Encoding
2173 then
2174 Accumulate_Checksum (Source (Scan_Ptr));
2175 Store_Encoded_Character
2176 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
2177 Scan_Ptr := Scan_Ptr + 1;
82c80734 2178 Underline_Found := False;
fbf5a39b
AC
2179 goto Scan_Identifier;
2180
2181 -- Left bracket not followed by a quote terminates an identifier.
2182 -- This is an error, but we don't want to give a junk error msg
2183 -- about wide characters in this case!
2184
2185 elsif Source (Scan_Ptr) = '['
2186 and then Source (Scan_Ptr + 1) /= '"'
2187 then
2188 null;
2189
2190 -- We know we have a wide character encoding here (the current
2191 -- character is either ESC, left bracket, or an upper half
2192 -- character depending on the encoding method).
2193
2194 else
2195 -- Scan out the wide character and insert the appropriate
2196 -- encoding into the name table entry for the identifier.
2197
2198 declare
c8427bff
RD
2199 Code : Char_Code;
2200 Err : Boolean;
2201 Chr : Character;
2202 Cat : Category;
fbf5a39b
AC
2203
2204 begin
82c80734 2205 Wptr := Scan_Ptr;
fbf5a39b
AC
2206 Scan_Wide (Source, Scan_Ptr, Code, Err);
2207
2208 -- If error, signal error
2209
2210 if Err then
2211 Error_Illegal_Wide_Character;
2212
2213 -- If the character scanned is a normal identifier
2214 -- character, then we treat it that way.
2215
2216 elsif In_Character_Range (Code)
2217 and then Identifier_Char (Get_Character (Code))
2218 then
2219 Chr := Get_Character (Code);
2220 Accumulate_Checksum (Chr);
2221 Store_Encoded_Character
2222 (Get_Char_Code (Fold_Lower (Chr)));
82c80734 2223 Underline_Found := False;
fbf5a39b 2224
82c80734 2225 -- Here if not a normal identifier character
fbf5a39b
AC
2226
2227 else
fbf5a39b
AC
2228 -- Make sure we are allowing wide characters in
2229 -- identifiers. Note that we allow wide character
82c80734
RD
2230 -- notation for an OK identifier character. This in
2231 -- particular allows bracket or other notation to be
2232 -- used for upper half letters.
fbf5a39b 2233
5f3ab6fb
AC
2234 -- Wide characters are always allowed in Ada 2005
2235
2236 if Identifier_Character_Set /= 'w'
2237 and then Ada_Version < Ada_05
2238 then
fbf5a39b 2239 Error_Msg
82c80734
RD
2240 ("wide character not allowed in identifier", Wptr);
2241 end if;
2242
c8427bff
RD
2243 Cat := Get_Category (UTF_32 (Code));
2244
82c80734
RD
2245 -- If OK letter, store it folding to upper case. Note
2246 -- that we include the folded letter in the checksum.
2247
c8427bff
RD
2248 if Is_UTF_32_Letter (Cat) then
2249 Code :=
2250 Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code)));
82c80734
RD
2251 Accumulate_Checksum (Code);
2252 Store_Encoded_Character (Code);
2253 Underline_Found := False;
2254
2255 -- If OK extended digit or mark, then store it
2256
c8427bff
RD
2257 elsif Is_UTF_32_Digit (Cat)
2258 or else Is_UTF_32_Mark (Cat)
82c80734
RD
2259 then
2260 Accumulate_Checksum (Code);
2261 Store_Encoded_Character (Code);
2262 Underline_Found := False;
2263
2264 -- Wide punctuation is also stored, but counts as an
2265 -- underline character for error checking purposes.
2266
c8427bff 2267 elsif Is_UTF_32_Punctuation (Cat) then
82c80734
RD
2268 Accumulate_Checksum (Code);
2269
2270 if Underline_Found then
2271 declare
2272 Cend : constant Source_Ptr := Scan_Ptr;
2273 begin
2274 Scan_Ptr := Wptr;
2275 Error_No_Double_Underline;
2276 Scan_Ptr := Cend;
2277 end;
2278
2279 else
2280 Store_Encoded_Character (Code);
2281 Underline_Found := True;
2282 end if;
2283
2284 -- Wide character in Unicode cateogory "Other, Format"
2285 -- is accepted in an identifier, but is ignored and not
2286 -- stored. It seems reasonable to exclude it from the
2287 -- checksum.
2288
357ac4df
RD
2289 -- Note that it is correct (see AI-395) to simply strip
2290 -- other format characters, before testing for double
2291 -- underlines, or for reserved words).
2292
c8427bff 2293 elsif Is_UTF_32_Other (Cat) then
82c80734
RD
2294 null;
2295
2296 -- Wide character in category Separator,Space terminates
2297
c8427bff 2298 elsif Is_UTF_32_Space (Cat) then
82c80734
RD
2299 goto Scan_Identifier_Complete;
2300
2301 -- Any other wide character is not acceptable
2302
2303 else
2304 Error_Msg
2305 ("invalid wide character in identifier", Wptr);
fbf5a39b
AC
2306 end if;
2307 end if;
fbf5a39b 2308
82c80734
RD
2309 goto Scan_Identifier;
2310 end;
fbf5a39b
AC
2311 end if;
2312 end if;
2313
82c80734
RD
2314 -- Scan of identifier is complete. The identifier is stored in
2315 -- Name_Buffer, and Scan_Ptr points past the last character.
fbf5a39b 2316
82c80734 2317 <<Scan_Identifier_Complete>>
fbf5a39b
AC
2318 Token_Name := Name_Find;
2319
82c80734
RD
2320 -- Check for identifier ending with underline or punctuation char
2321
2322 if Underline_Found then
2323 Underline_Found := False;
2324
2325 if Source (Scan_Ptr - 1) = '_' then
2326 Error_Msg
2327 ("identifier cannot end with underline", Scan_Ptr - 1);
2328 else
2329 Error_Msg
2330 ("identifier cannot end with punctuation character", Wptr);
2331 end if;
2332 end if;
2333
fbf5a39b
AC
2334 -- Here is where we check if it was a keyword
2335
2336 if Get_Name_Table_Byte (Token_Name) /= 0
0ab80019
AC
2337 and then (Ada_Version >= Ada_95
2338 or else Token_Name not in Ada_95_Reserved_Words)
82c80734
RD
2339 and then (Ada_Version >= Ada_05
2340 or else Token_Name not in Ada_2005_Reserved_Words)
fbf5a39b
AC
2341 then
2342 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
2343
82c80734
RD
2344 -- Deal with possible style check for non-lower case keyword, but
2345 -- we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords for
2346 -- this purpose if they appear as attribute designators. Actually
2347 -- we only check the first character for speed.
2348
2349 -- Ada 2005 (AI-284): Do not apply the style check in case of
2350 -- "pragma Interface"
fbf5a39b 2351
323b59c4
RD
2352 -- Ada 2005 (AI-340): Do not apply the style check in case of
2353 -- MOD attribute.
2354
fbf5a39b
AC
2355 if Style_Check
2356 and then Source (Token_Ptr) <= 'Z'
2357 and then (Prev_Token /= Tok_Apostrophe
2358 or else
323b59c4
RD
2359 (Token /= Tok_Access and then
2360 Token /= Tok_Delta and then
2361 Token /= Tok_Digits and then
2362 Token /= Tok_Mod and then
2363 Token /= Tok_Range))
82c80734
RD
2364 and then (Token /= Tok_Interface
2365 or else
2366 (Token = Tok_Interface
2367 and then Prev_Token /= Tok_Pragma))
fbf5a39b
AC
2368 then
2369 Style.Non_Lower_Case_Keyword;
2370 end if;
2371
82c80734
RD
2372 -- We must reset Token_Name since this is not an identifier and
2373 -- if we leave Token_Name set, the parser gets confused because
2374 -- it thinks it is dealing with an identifier instead of the
2375 -- corresponding keyword.
fbf5a39b
AC
2376
2377 Token_Name := No_Name;
6b6fcd3e 2378 Accumulate_Token_Checksum;
fbf5a39b
AC
2379 return;
2380
2381 -- It is an identifier after all
2382
2383 else
2384 Token := Tok_Identifier;
6b6fcd3e 2385 Accumulate_Token_Checksum;
fbf5a39b
AC
2386 Post_Scan;
2387 return;
2388 end if;
2389 end Scan;
6b6fcd3e 2390
c45b6ae0
AC
2391 --------------------------
2392 -- Set_Comment_As_Token --
2393 --------------------------
2394
2395 procedure Set_Comment_As_Token (Value : Boolean) is
2396 begin
2397 Comment_Is_Token := Value;
2398 end Set_Comment_As_Token;
fbf5a39b
AC
2399
2400 ------------------------------
2401 -- Set_End_Of_Line_As_Token --
2402 ------------------------------
2403
2404 procedure Set_End_Of_Line_As_Token (Value : Boolean) is
2405 begin
2406 End_Of_Line_Is_Token := Value;
2407 end Set_End_Of_Line_As_Token;
2408
2409 ---------------------------
2410 -- Set_Special_Character --
2411 ---------------------------
2412
2413 procedure Set_Special_Character (C : Character) is
2414 begin
2415 case C is
2416 when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' =>
2417 Special_Characters (C) := True;
2418
2419 when others =>
2420 null;
2421 end case;
2422 end Set_Special_Character;
2423
2424 ----------------------
2425 -- Set_Start_Column --
2426 ----------------------
2427
2428 -- Note: it seems at first glance a little expensive to compute this value
2429 -- for every source line (since it is certainly not used for all source
2430 -- lines). On the other hand, it doesn't take much more work to skip past
2431 -- the initial white space on the line counting the columns than it would
2432 -- to scan past the white space using the standard scanning circuits.
2433
2434 function Set_Start_Column return Column_Number is
2435 Start_Column : Column_Number := 0;
2436
2437 begin
2438 -- Outer loop scans past horizontal tab characters
2439
2440 Tabs_Loop : loop
2441
2442 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2443 -- past the blanks and adjusting Start_Column to account for them.
2444
2445 Blanks_Loop : loop
2446 if Source (Scan_Ptr) = ' ' then
2447 if Source (Scan_Ptr + 1) = ' ' then
2448 if Source (Scan_Ptr + 2) = ' ' then
2449 if Source (Scan_Ptr + 3) = ' ' then
2450 if Source (Scan_Ptr + 4) = ' ' then
2451 if Source (Scan_Ptr + 5) = ' ' then
2452 if Source (Scan_Ptr + 6) = ' ' then
2453 Scan_Ptr := Scan_Ptr + 7;
2454 Start_Column := Start_Column + 7;
2455 else
2456 Scan_Ptr := Scan_Ptr + 6;
2457 Start_Column := Start_Column + 6;
2458 exit Blanks_Loop;
2459 end if;
2460 else
2461 Scan_Ptr := Scan_Ptr + 5;
2462 Start_Column := Start_Column + 5;
2463 exit Blanks_Loop;
2464 end if;
2465 else
2466 Scan_Ptr := Scan_Ptr + 4;
2467 Start_Column := Start_Column + 4;
2468 exit Blanks_Loop;
2469 end if;
2470 else
2471 Scan_Ptr := Scan_Ptr + 3;
2472 Start_Column := Start_Column + 3;
2473 exit Blanks_Loop;
2474 end if;
2475 else
2476 Scan_Ptr := Scan_Ptr + 2;
2477 Start_Column := Start_Column + 2;
2478 exit Blanks_Loop;
2479 end if;
2480 else
2481 Scan_Ptr := Scan_Ptr + 1;
2482 Start_Column := Start_Column + 1;
2483 exit Blanks_Loop;
2484 end if;
2485 else
2486 exit Blanks_Loop;
2487 end if;
2488 end loop Blanks_Loop;
2489
2490 -- Outer loop keeps going only if a horizontal tab follows
2491
2492 if Source (Scan_Ptr) = HT then
2493 if Style_Check then Style.Check_HT; end if;
2494 Scan_Ptr := Scan_Ptr + 1;
2495 Start_Column := (Start_Column / 8) * 8 + 8;
2496 else
2497 exit Tabs_Loop;
2498 end if;
2499
2500 end loop Tabs_Loop;
2501
2502 return Start_Column;
2503 end Set_Start_Column;
2504
2505end Scng;