]>
Commit | Line | Data |
---|---|---|
1eee94d3 GM |
1 | (* M2Lex.mod provides a non tokenised lexical analyser. |
2 | ||
83ffe9cd | 3 | Copyright (C) 2001-2023 Free Software Foundation, Inc. |
1eee94d3 GM |
4 | Contributed by Gaius Mulley <gaius.mulley@southwales.ac.uk>. |
5 | ||
6 | This file is part of GNU Modula-2. | |
7 | ||
8 | GNU Modula-2 is free software; you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 3, or (at your option) | |
11 | any later version. | |
12 | ||
13 | GNU Modula-2 is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with GNU Modula-2; see the file COPYING3. If not see | |
20 | <http://www.gnu.org/licenses/>. *) | |
21 | ||
22 | IMPLEMENTATION MODULE M2Lex ; | |
23 | ||
24 | ||
25 | FROM FIO IMPORT File, OpenToRead, ReadChar, Close, IsNoError ; | |
26 | FROM StrIO IMPORT ReadString, WriteString, WriteLn ; | |
27 | FROM StdIO IMPORT Write ; | |
28 | FROM NumberIO IMPORT WriteCard ; | |
29 | FROM ASCII IMPORT nul, lf, cr, EOL ; | |
30 | FROM StrLib IMPORT StrCopy, StrEqual, StrLen ; | |
31 | ||
32 | ||
33 | CONST | |
34 | LineBuf = 1 ; | |
35 | Wrap = LineBuf+1 ; | |
36 | eof = 032C ; | |
37 | MaxStack= 10 ; | |
38 | ||
39 | VAR | |
40 | f: File ; | |
41 | Opened : BOOLEAN ; | |
42 | CurrentChar : CHAR ; | |
43 | NextChar : CHAR ; | |
44 | FileName : ARRAY [0..MaxLine] OF CHAR ; | |
45 | Lines : ARRAY [0..LineBuf] OF ARRAY [0..255] OF CHAR ; | |
46 | (* Need two lines since the delimiter of the CurrentSymbol *) | |
47 | (* maybe on the next line. *) | |
48 | HighNext : CARDINAL ; (* Length of the NextChar line. *) | |
49 | CurLine : CARDINAL ; (* Line number of the Current Char Line. *) | |
50 | NextLine : CARDINAL ; (* Line number of the Next Char Line. *) | |
51 | IndexCur : CARDINAL ; (* Index to the Lines array for Current Ln *) | |
52 | IndexNext : CARDINAL ; (* Index to the Lines array for NextChar Ln *) | |
53 | CurSym : CARDINAL ; (* Character start of the CurrentSymbol *) | |
54 | CurSymLine : CARDINAL ; (* Line number of the CurrentSymbol *) | |
55 | CurCharIndex : CARDINAL ; (* Character number of CurChar. *) | |
56 | NextCharIndex : CARDINAL ; (* Character number of NextChar. *) | |
57 | Eof : BOOLEAN ; (* End of source file. *) | |
58 | InQuotes : BOOLEAN ; (* If we are in quotes. *) | |
59 | QuoteChar : CHAR ; (* Quote character expected. *) | |
60 | Stack : ARRAY [0..MaxStack] OF ARRAY [0..255] OF CHAR ; | |
61 | StackPtr : CARDINAL ; | |
62 | ||
63 | ||
64 | (* | |
65 | IsSym - returns the result of the comparison between CurrentSymbol | |
66 | and Name. | |
67 | *) | |
68 | ||
69 | PROCEDURE IsSym (Name: ARRAY OF CHAR) : BOOLEAN ; | |
70 | BEGIN | |
71 | RETURN( StrEqual(CurrentSymbol, Name) ) | |
72 | END IsSym ; | |
73 | ||
74 | ||
75 | (* | |
76 | SymIs - if Name is equal to the CurrentSymbol the next Symbol is read | |
77 | and true is returned, otherwise false is returned. | |
78 | *) | |
79 | ||
80 | PROCEDURE SymIs (Name: ARRAY OF CHAR) : BOOLEAN ; | |
81 | BEGIN | |
82 | IF StrEqual(CurrentSymbol, Name) | |
83 | THEN | |
84 | GetSymbol ; | |
85 | RETURN( TRUE ) | |
86 | ELSE | |
87 | RETURN( FALSE ) | |
88 | END | |
89 | END SymIs ; | |
90 | ||
91 | ||
92 | (* | |
93 | WriteError - displays the source line and points to the symbol in error. | |
94 | The message, a, is displayed. | |
95 | *) | |
96 | ||
97 | PROCEDURE WriteError (a: ARRAY OF CHAR) ; | |
98 | VAR | |
99 | i: CARDINAL ; | |
100 | BEGIN | |
101 | WriteString(FileName) ; Write(':') ; WriteCard(CurSymLine, 0) ; Write(':') ; WriteString(a) ; | |
102 | WriteLn ; | |
103 | WriteString( Lines[IndexCur] ) ; WriteLn ; | |
104 | i := CurSym ; | |
105 | WHILE i>0 DO | |
106 | Write(' ') ; | |
107 | DEC(i) | |
108 | END ; | |
109 | i := StrLen(CurrentSymbol) ; | |
110 | WHILE i>0 DO | |
111 | Write('^') ; | |
112 | DEC(i) | |
113 | END ; | |
114 | WriteLn ; | |
115 | WriteString(a) ; WriteLn ; | |
116 | END WriteError ; | |
117 | ||
118 | ||
119 | (* | |
120 | OpenSource - Attempts to open the source file, a. | |
121 | The success of the operation is returned. | |
122 | *) | |
123 | ||
124 | PROCEDURE OpenSource (a: ARRAY OF CHAR) : BOOLEAN ; | |
125 | BEGIN | |
126 | f := OpenToRead(a) ; | |
127 | IF IsNoError(f) | |
128 | THEN | |
129 | StrCopy(a, FileName) ; | |
130 | Opened := TRUE ; | |
131 | Init ; | |
132 | RETURN( TRUE ) | |
133 | ELSE | |
134 | Opened := FALSE ; | |
135 | Eof := TRUE ; | |
136 | RETURN( FALSE ) | |
137 | END | |
138 | END OpenSource ; | |
139 | ||
140 | ||
141 | (* | |
142 | CloseSource - Closes the current open file. | |
143 | *) | |
144 | ||
145 | PROCEDURE CloseSource ; | |
146 | BEGIN | |
147 | IF Opened=TRUE | |
148 | THEN | |
149 | Opened := FALSE ; | |
150 | Close( f ) | |
151 | END | |
152 | END CloseSource ; | |
153 | ||
154 | ||
155 | (* | |
156 | GetSymbol - gets the next Symbol into CurrentSymbol. | |
157 | *) | |
158 | ||
159 | PROCEDURE GetSymbol ; | |
160 | BEGIN | |
161 | StrCopy( CurrentSymbol, LastSymbol ) ; | |
162 | IF StackPtr>0 | |
163 | THEN | |
164 | DEC(StackPtr) ; | |
165 | StrCopy( Stack[StackPtr], CurrentSymbol ) | |
166 | ELSE | |
167 | ReadSymbol( CurrentSymbol ) | |
168 | END | |
169 | END GetSymbol ; | |
170 | ||
171 | ||
172 | (* | |
173 | PutSymbol - pushes a symbol, Name, back onto the input. | |
174 | GetSymbol will set CurrentSymbol to, Name. | |
175 | *) | |
176 | ||
177 | PROCEDURE PutSymbol (Name: ARRAY OF CHAR) ; | |
178 | BEGIN | |
179 | IF StackPtr=MaxStack | |
180 | THEN | |
181 | WriteError('Maximum push back symbol exceeded - Increase CONST MaxStack') | |
182 | ELSE | |
183 | StrCopy(Name, Stack[StackPtr]) ; | |
184 | INC(StackPtr) | |
185 | END | |
186 | END PutSymbol ; | |
187 | ||
188 | ||
189 | PROCEDURE ReadSymbol (VAR a: ARRAY OF CHAR) ; | |
190 | VAR | |
191 | high, | |
192 | i : CARDINAL ; | |
193 | ok : BOOLEAN ; | |
194 | BEGIN | |
195 | high := HIGH(a) ; | |
196 | IF NOT Eof | |
197 | THEN | |
198 | IF InQuotes | |
199 | THEN | |
200 | i := 0 ; | |
201 | IF CurrentChar=QuoteChar | |
202 | THEN | |
203 | InQuotes := FALSE ; | |
204 | a[i] := QuoteChar ; | |
205 | INC(i) ; | |
206 | AdvanceChar | |
207 | ELSE | |
208 | (* Fill in string or character *) | |
209 | i := 0 ; | |
210 | REPEAT | |
211 | a[i] := CurrentChar ; | |
212 | INC(i) ; | |
213 | AdvanceChar | |
214 | UNTIL (CurrentChar=QuoteChar) OR Eof OR (i>high) ; | |
215 | END | |
216 | ELSE | |
217 | (* Get rid of all excess spaces *) | |
218 | ||
219 | REPEAT | |
220 | IF CurrentChar=' ' | |
221 | THEN | |
222 | WHILE (CurrentChar=' ') AND (NOT Eof) DO | |
223 | AdvanceChar | |
224 | END ; | |
225 | ok := FALSE | |
226 | ELSIF (CurrentChar='(') AND (NextChar='*') | |
227 | THEN | |
228 | ConsumeComments ; | |
229 | ok := FALSE | |
230 | ELSE | |
231 | ok := TRUE | |
232 | END | |
233 | UNTIL ok ; | |
234 | i := 0 ; | |
235 | CurSym := CurCharIndex ; | |
236 | CurSymLine := CurLine ; | |
237 | IF (CurrentChar='"') OR (CurrentChar="'") | |
238 | THEN | |
239 | InQuotes := TRUE ; | |
240 | QuoteChar := CurrentChar ; | |
241 | a[i] := CurrentChar ; | |
242 | AdvanceChar ; | |
243 | INC(i) | |
244 | ELSIF DoubleDelimiter() | |
245 | THEN | |
246 | a[i] := CurrentChar ; | |
247 | AdvanceChar ; | |
248 | INC(i) ; | |
249 | a[i] := CurrentChar ; | |
250 | AdvanceChar ; | |
251 | INC(i) | |
252 | ELSIF Delimiter() | |
253 | THEN | |
254 | a[i] := CurrentChar ; | |
255 | AdvanceChar ; | |
256 | INC(i) | |
257 | ELSE | |
258 | REPEAT | |
259 | a[i] := CurrentChar ; | |
260 | AdvanceChar ; | |
261 | INC(i) | |
262 | UNTIL Delimiter() OR (i>high) OR (CurrentChar=' ') OR Eof | |
263 | END | |
264 | END | |
265 | ELSE | |
266 | (* eof *) | |
267 | i := 0 ; | |
268 | a[i] := eof ; | |
269 | INC(i) | |
270 | END ; | |
271 | IF i<=HIGH(a) | |
272 | THEN | |
273 | a[i] := nul | |
274 | END | |
275 | END ReadSymbol ; | |
276 | ||
277 | ||
278 | (* | |
279 | ConsumeComments - consumes Modula-2 comments. | |
280 | *) | |
281 | ||
282 | PROCEDURE ConsumeComments ; | |
283 | VAR | |
284 | Level: CARDINAL ; | |
285 | BEGIN | |
286 | Level := 0 ; | |
287 | REPEAT | |
288 | IF (CurrentChar='(') AND (NextChar='*') | |
289 | THEN | |
290 | INC(Level) | |
291 | ELSIF (CurrentChar='*') AND (NextChar=')') | |
292 | THEN | |
293 | DEC(Level) | |
294 | END ; | |
295 | AdvanceChar ; | |
296 | UNTIL (Level=0) OR Eof ; | |
297 | AdvanceChar | |
298 | END ConsumeComments; | |
299 | ||
300 | ||
301 | (* Delimiter returns true if and only if CurrentChar is a delimiter *) | |
302 | ||
303 | PROCEDURE Delimiter() : BOOLEAN ; | |
304 | BEGIN | |
305 | IF (CurrentChar='-') OR | |
306 | (CurrentChar='+') OR (CurrentChar='*') OR (CurrentChar='\') OR | |
307 | (CurrentChar='|') OR (CurrentChar='(') OR (CurrentChar=')') OR | |
308 | (CurrentChar='"') OR (CurrentChar="'") OR (CurrentChar='{') | |
309 | THEN | |
310 | RETURN( TRUE ) | |
311 | ELSIF | |
312 | (CurrentChar='}') OR (CurrentChar='[') OR (CurrentChar=']') OR | |
313 | (CurrentChar='#') OR (CurrentChar='=') OR (CurrentChar='<') | |
314 | THEN | |
315 | RETURN( TRUE ) | |
316 | ELSIF | |
317 | (CurrentChar='>') OR (CurrentChar='.') OR (CurrentChar=';') OR | |
318 | (CurrentChar=':') OR (CurrentChar='^') OR (CurrentChar=',') | |
319 | THEN | |
320 | RETURN( TRUE ) | |
321 | ELSE | |
322 | RETURN( FALSE ) | |
323 | END | |
324 | END Delimiter ; | |
325 | ||
326 | ||
327 | PROCEDURE DoubleDelimiter () : BOOLEAN ; | |
328 | BEGIN | |
329 | RETURN ( | |
330 | ((CurrentChar='>') AND (NextChar='=')) OR | |
331 | ((CurrentChar='<') AND (NextChar='=')) OR | |
332 | ((CurrentChar='<') AND (NextChar='>')) OR | |
333 | ((CurrentChar=':') AND (NextChar='=')) OR | |
334 | ((CurrentChar='.') AND (NextChar='.')) | |
335 | ) | |
336 | END DoubleDelimiter ; | |
337 | ||
338 | ||
339 | PROCEDURE AdvanceChar ; | |
340 | BEGIN | |
341 | IF NOT Eof | |
342 | THEN | |
343 | CurrentChar := NextChar ; | |
344 | CurCharIndex := NextCharIndex ; | |
345 | IndexCur := IndexNext ; | |
346 | CurLine := NextLine ; | |
347 | IF CurrentChar=eof | |
348 | THEN | |
349 | Eof := TRUE | |
350 | ELSIF NextCharIndex=HighNext | |
351 | THEN | |
352 | IndexNext := (IndexCur+1) MOD Wrap ; | |
353 | HighNext := 0 ; | |
354 | REPEAT | |
355 | NextChar := ReadChar(f) ; | |
356 | IF NOT IsNoError(f) | |
357 | THEN | |
358 | NextChar := eof ; | |
359 | Lines[IndexNext][HighNext] := NextChar ; | |
360 | INC( HighNext ) | |
361 | END ; | |
362 | WHILE (NextChar#eof) AND (NextChar#lf) AND (NextChar#cr) AND (HighNext<MaxLine) DO | |
363 | Lines[IndexNext][HighNext] := NextChar ; | |
364 | INC( HighNext ) ; | |
365 | NextChar := ReadChar(f) ; | |
366 | IF NOT IsNoError(f) | |
367 | THEN | |
368 | NextChar := eof | |
369 | END | |
370 | END ; | |
371 | IF (NextChar=eof) OR (NextChar=lf) OR (NextChar=cr) | |
372 | THEN | |
373 | IF InQuotes | |
374 | THEN | |
375 | Lines[IndexNext][HighNext] := ' ' ; (* Space for delimiter *) | |
376 | Lines[IndexNext][HighNext+1] := nul ; | |
377 | WriteError('missing end of quote on this source line') ; HALT | |
378 | END ; | |
379 | INC( NextLine ) | |
380 | END | |
381 | UNTIL HighNext>0 ; | |
382 | IF HighNext>=MaxLine THEN WriteError('Line too long') ; HALT END ; | |
383 | Lines[IndexNext][HighNext] := ' ' ; (* Space for delimiter *) | |
384 | Lines[IndexNext][HighNext+1] := nul ; | |
385 | NextCharIndex := 0 ; | |
386 | NextChar := Lines[IndexNext][NextCharIndex] | |
387 | ELSE | |
388 | INC(NextCharIndex) ; | |
389 | NextChar := Lines[IndexNext][NextCharIndex] | |
390 | END | |
391 | END | |
392 | END AdvanceChar ; | |
393 | ||
394 | ||
395 | PROCEDURE Init ; | |
396 | BEGIN | |
397 | StackPtr := 0 ; | |
398 | InQuotes := FALSE ; | |
399 | Eof := FALSE ; | |
400 | IndexCur := 1 ; | |
401 | IndexNext := 0 ; | |
402 | CurCharIndex := 0 ; | |
403 | Lines[IndexCur][0] := nul ; | |
404 | HighNext := 0 ; | |
405 | NextCharIndex := 0 ; | |
406 | CurLine := 1 ; | |
407 | NextLine := 1 ; | |
408 | CurrentChar := ' ' ; | |
409 | NextChar := ' ' ; | |
410 | StrCopy("", CurrentSymbol) ; | |
411 | StrCopy("", LastSymbol) ; | |
412 | IndexCur := IndexNext | |
413 | END Init ; | |
414 | ||
415 | ||
416 | BEGIN | |
417 | Init | |
418 | END M2Lex. |