]>
Commit | Line | Data |
---|---|---|
692ed3e7 | 1 | /* mclex.c -- lexer for Windows mc files parser. |
a2c58332 | 2 | Copyright (C) 2007-2022 Free Software Foundation, Inc. |
692ed3e7 NC |
3 | |
4 | Written by Kai Tietz, Onevision. | |
5 | ||
6 | This file is part of GNU Binutils. | |
7 | ||
8 | This program is free software; you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
32866df7 | 10 | the Free Software Foundation; either version 3 of the License, or |
692ed3e7 NC |
11 | (at your option) any later version. |
12 | ||
13 | This program is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with this program; if not, write to the Free Software | |
20 | Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA | |
21 | 02110-1301, USA. */ | |
22 | ||
23 | /* This is a lexer used by the Windows rc file parser. | |
24 | It basically just recognized a bunch of keywords. */ | |
25 | ||
26 | #include "sysdep.h" | |
27 | #include "bfd.h" | |
28 | #include "bucomm.h" | |
29 | #include "libiberty.h" | |
30 | #include "safe-ctype.h" | |
31 | #include "windmc.h" | |
32 | #include "mcparse.h" | |
33 | ||
34 | #include <assert.h> | |
35 | ||
36 | /* Exported globals. */ | |
015dc7e1 AM |
37 | bool mclex_want_nl = false; |
38 | bool mclex_want_line = false; | |
39 | bool mclex_want_filename = false; | |
692ed3e7 NC |
40 | |
41 | /* Local globals. */ | |
42 | static unichar *input_stream = NULL; | |
43 | static unichar *input_stream_pos = NULL; | |
44 | static int input_line = 1; | |
45 | static const char *input_filename = NULL; | |
46 | ||
47 | void | |
48 | mc_set_content (const unichar *src) | |
49 | { | |
50 | if (!src) | |
51 | return; | |
52 | input_stream = input_stream_pos = unichar_dup (src); | |
53 | } | |
54 | ||
55 | void | |
56 | mc_set_inputfile (const char *name) | |
57 | { | |
58 | if (! name || *name == 0) | |
59 | input_filename = "-"; | |
60 | else | |
61 | { | |
62 | const char *s1 = strrchr (name, '/'); | |
63 | const char *s2 = strrchr (name, '\\'); | |
64 | ||
65 | if (! s1) | |
66 | s1 = s2; | |
67 | if (s1 && s2 && s1 < s2) | |
68 | s1 = s2; | |
69 | if (! s1) | |
70 | s1 = name; | |
71 | else | |
72 | s1++; | |
73 | s1 = xstrdup (s1); | |
74 | input_filename = s1; | |
75 | } | |
76 | } | |
77 | ||
78 | static void | |
79 | show_msg (const char *kind, const char *msg, va_list argp) | |
80 | { | |
81 | fprintf (stderr, "In %s at line %d: %s: ", input_filename, input_line, kind); | |
82 | vfprintf (stderr, msg, argp); | |
83 | fprintf (stderr, ".\n"); | |
84 | } | |
85 | ||
86 | void | |
87 | mc_warn (const char *s, ...) | |
88 | { | |
89 | va_list argp; | |
90 | va_start (argp, s); | |
91 | show_msg ("warning", s, argp); | |
92 | va_end (argp); | |
93 | } | |
94 | ||
95 | void | |
96 | mc_fatal (const char *s, ...) | |
97 | { | |
98 | va_list argp; | |
99 | va_start (argp, s); | |
100 | show_msg ("fatal", s, argp); | |
101 | va_end (argp); | |
102 | xexit (1); | |
103 | } | |
104 | ||
105 | ||
314ec7ae AM |
106 | static void |
107 | mc_error (const char *s, ...) | |
692ed3e7 NC |
108 | { |
109 | va_list argp; | |
110 | va_start (argp, s); | |
111 | show_msg ("parser", s, argp); | |
112 | va_end (argp); | |
314ec7ae AM |
113 | } |
114 | ||
115 | void | |
116 | yyerror (const char *s) | |
117 | { | |
118 | mc_error (s); | |
692ed3e7 NC |
119 | } |
120 | ||
121 | static unichar * | |
122 | get_diff (unichar *end, unichar *start) | |
123 | { | |
124 | unichar *ret; | |
125 | unichar save = *end; | |
126 | ||
127 | *end = 0; | |
128 | ret = unichar_dup (start); | |
129 | *end = save; | |
130 | return ret; | |
131 | } | |
132 | ||
133 | static rc_uint_type | |
134 | parse_digit (unichar ch) | |
135 | { | |
136 | rc_uint_type base = 10, v = 0, c; | |
137 | ||
138 | if (ch == '0') | |
139 | { | |
140 | base = 8; | |
141 | switch (input_stream_pos[0]) | |
142 | { | |
143 | case 'x': case 'X': base = 16; input_stream_pos++; break; | |
144 | case 'o': case 'O': base = 8; input_stream_pos++; break; | |
145 | case 'b': case 'B': base = 2; input_stream_pos++; break; | |
146 | } | |
147 | } | |
148 | else | |
149 | v = (rc_uint_type) (ch - '0'); | |
150 | ||
151 | while ((ch = input_stream_pos[0]) != 0) | |
152 | { | |
153 | if (ch >= 'A' && ch <= 'F') | |
154 | c = (rc_uint_type) (ch - 'A') + 10; | |
155 | else if (ch >= 'a' && ch <= 'f') | |
156 | c = (rc_uint_type) (ch - 'a') + 10; | |
157 | else if (ch >= '0' && ch <= '9') | |
158 | c = (rc_uint_type) (ch - '0'); | |
159 | else | |
160 | break; | |
161 | v *= base; | |
162 | v += c; | |
163 | ++input_stream_pos; | |
164 | } | |
165 | if (input_stream_pos[0] == 'U' || input_stream_pos[0] == 'u') | |
166 | input_stream_pos++; | |
167 | if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l') | |
168 | input_stream_pos++; | |
169 | if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l') | |
170 | input_stream_pos++; | |
171 | return v; | |
172 | } | |
173 | ||
174 | static mc_keyword *keyword_top = NULL; | |
175 | ||
176 | const mc_keyword * | |
177 | enum_facility (int e) | |
178 | { | |
179 | mc_keyword *h = keyword_top; | |
180 | ||
181 | while (h != NULL) | |
182 | { | |
183 | while (h && strcmp (h->group_name, "facility") != 0) | |
184 | h = h->next; | |
185 | if (e == 0) | |
186 | return h; | |
187 | --e; | |
188 | if (h) | |
189 | h = h->next; | |
190 | } | |
191 | return h; | |
192 | } | |
193 | ||
194 | const mc_keyword * | |
195 | enum_severity (int e) | |
196 | { | |
197 | mc_keyword *h = keyword_top; | |
198 | ||
199 | while (h != NULL) | |
200 | { | |
201 | while (h && strcmp (h->group_name, "severity") != 0) | |
202 | h = h->next; | |
203 | if (e == 0) | |
204 | return h; | |
205 | --e; | |
206 | if (h) | |
207 | h = h->next; | |
208 | } | |
209 | return h; | |
210 | } | |
211 | ||
212 | static void | |
213 | mc_add_keyword_ascii (const char *sz, int rid, const char *grp, rc_uint_type nv, const char *sv) | |
214 | { | |
95086e1e | 215 | unichar *usz = NULL, *usv = NULL; |
692ed3e7 NC |
216 | rc_uint_type usz_len; |
217 | ||
218 | unicode_from_codepage (&usz_len, &usz, sz, CP_ACP); | |
219 | if (sv) | |
220 | unicode_from_codepage (&usz_len, &usv, sv, CP_ACP); | |
221 | mc_add_keyword (usz, rid, grp, nv, usv); | |
222 | } | |
223 | ||
224 | void | |
225 | mc_add_keyword (unichar *usz, int rid, const char *grp, rc_uint_type nv, unichar *sv) | |
226 | { | |
227 | mc_keyword *p, *c, *n; | |
228 | size_t len = unichar_len (usz); | |
229 | ||
230 | c = keyword_top; | |
231 | p = NULL; | |
232 | while (c != NULL) | |
233 | { | |
234 | if (c->len > len) | |
235 | break; | |
236 | if (c->len == len) | |
237 | { | |
238 | int e = memcmp (usz, c->usz, len * sizeof (unichar)); | |
239 | ||
240 | if (e < 0) | |
241 | break; | |
242 | if (! e) | |
243 | { | |
244 | if (! strcmp (grp, "keyword") || strcmp (c->group_name, grp) != 0) | |
245 | fatal (_("Duplicate symbol entered into keyword list.")); | |
246 | c->rid = rid; | |
247 | c->nval = nv; | |
248 | c->sval = (!sv ? NULL : unichar_dup (sv)); | |
249 | if (! strcmp (grp, "language")) | |
250 | { | |
251 | const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv); | |
252 | ||
253 | if (lag == NULL) | |
254 | fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv); | |
255 | memcpy (&c->lang_info, lag, sizeof (*lag)); | |
256 | } | |
257 | return; | |
258 | } | |
259 | } | |
260 | c = (p = c)->next; | |
261 | } | |
262 | n = xmalloc (sizeof (mc_keyword)); | |
263 | n->next = c; | |
264 | n->len = len; | |
265 | n->group_name = grp; | |
266 | n->usz = usz; | |
267 | n->rid = rid; | |
268 | n->nval = nv; | |
269 | n->sval = (!sv ? NULL : unichar_dup (sv)); | |
270 | if (! strcmp (grp, "language")) | |
271 | { | |
272 | const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv); | |
273 | if (lag == NULL) | |
274 | fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv); | |
275 | memcpy (&n->lang_info, lag, sizeof (*lag)); | |
276 | } | |
277 | if (! p) | |
278 | keyword_top = n; | |
279 | else | |
280 | p->next = n; | |
281 | } | |
282 | ||
283 | static int | |
284 | mc_token (const unichar *t, size_t len) | |
285 | { | |
286 | static int was_init = 0; | |
287 | mc_keyword *k; | |
288 | ||
289 | if (! was_init) | |
290 | { | |
291 | was_init = 1; | |
292 | mc_add_keyword_ascii ("OutputBase", MCOUTPUTBASE, "keyword", 0, NULL); | |
293 | mc_add_keyword_ascii ("MessageIdTypedef", MCMESSAGEIDTYPEDEF, "keyword", 0, NULL); | |
294 | mc_add_keyword_ascii ("SeverityNames", MCSEVERITYNAMES, "keyword", 0, NULL); | |
295 | mc_add_keyword_ascii ("FacilityNames", MCFACILITYNAMES, "keyword", 0, NULL); | |
296 | mc_add_keyword_ascii ("LanguageNames", MCLANGUAGENAMES, "keyword", 0, NULL); | |
297 | mc_add_keyword_ascii ("MessageId", MCMESSAGEID, "keyword", 0, NULL); | |
298 | mc_add_keyword_ascii ("Severity", MCSEVERITY, "keyword", 0, NULL); | |
299 | mc_add_keyword_ascii ("Facility", MCFACILITY, "keyword", 0, NULL); | |
300 | mc_add_keyword_ascii ("SymbolicName", MCSYMBOLICNAME, "keyword", 0, NULL); | |
301 | mc_add_keyword_ascii ("Language", MCLANGUAGE, "keyword", 0, NULL); | |
302 | mc_add_keyword_ascii ("Success", MCTOKEN, "severity", 0, NULL); | |
303 | mc_add_keyword_ascii ("Informational", MCTOKEN, "severity", 1, NULL); | |
304 | mc_add_keyword_ascii ("Warning", MCTOKEN, "severity", 2, NULL); | |
305 | mc_add_keyword_ascii ("Error", MCTOKEN, "severity", 3, NULL); | |
306 | mc_add_keyword_ascii ("System", MCTOKEN, "facility", 0xff, NULL); | |
307 | mc_add_keyword_ascii ("Application", MCTOKEN, "facility", 0xfff, NULL); | |
308 | mc_add_keyword_ascii ("English", MCTOKEN, "language", 0x409, "MSG00001"); | |
309 | } | |
310 | k = keyword_top; | |
311 | if (!len || !t || *t == 0) | |
312 | return -1; | |
313 | while (k != NULL) | |
314 | { | |
315 | if (k->len > len) | |
316 | break; | |
317 | if (k->len == len) | |
318 | { | |
319 | if (! memcmp (k->usz, t, len * sizeof (unichar))) | |
320 | { | |
321 | if (k->rid == MCTOKEN) | |
322 | yylval.tok = k; | |
323 | return k->rid; | |
324 | } | |
325 | } | |
326 | k = k->next; | |
327 | } | |
328 | return -1; | |
329 | } | |
330 | ||
25065fcd RH |
331 | /* Skip characters in input_stream_pos up to and including a newline |
332 | character. Returns non-zero if the newline was found, zero otherwise. */ | |
333 | ||
334 | static int | |
335 | skip_until_eol (void) | |
336 | { | |
337 | while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n') | |
338 | ++input_stream_pos; | |
339 | if (input_stream_pos[0] == 0) | |
340 | return 0; | |
341 | if (input_stream_pos[0] == '\n') | |
6b5473c9 RH |
342 | { |
343 | ++input_stream_pos; | |
344 | input_line += 1; | |
345 | } | |
25065fcd RH |
346 | return 1; |
347 | } | |
348 | ||
692ed3e7 NC |
349 | int |
350 | yylex (void) | |
351 | { | |
352 | unichar *start_token; | |
353 | unichar ch; | |
354 | ||
355 | if (! input_stream_pos) | |
356 | { | |
357 | fatal ("Input stream not setuped.\n"); | |
358 | return -1; | |
359 | } | |
25065fcd | 360 | |
692ed3e7 NC |
361 | if (mclex_want_line) |
362 | { | |
363 | start_token = input_stream_pos; | |
8affa48a JA |
364 | if (input_stream_pos[0] == 0) |
365 | return -1; | |
25065fcd RH |
366 | /* PR 26082: Reject a period followed by EOF. */ |
367 | if (input_stream_pos[0] == '.' && input_stream_pos[1] == 0) | |
368 | return -1; | |
692ed3e7 NC |
369 | if (input_stream_pos[0] == '.' |
370 | && (input_stream_pos[1] == '\n' | |
371 | || (input_stream_pos[1] == '\r' && input_stream_pos[2] == '\n'))) | |
8affa48a | 372 | { |
015dc7e1 | 373 | mclex_want_line = false; |
25065fcd | 374 | return skip_until_eol () ? MCENDLINE : -1; |
8affa48a | 375 | } |
25065fcd RH |
376 | if (!skip_until_eol ()) |
377 | return -1; | |
692ed3e7 NC |
378 | yylval.ustr = get_diff (input_stream_pos, start_token); |
379 | return MCLINE; | |
380 | } | |
25065fcd | 381 | |
692ed3e7 NC |
382 | while ((ch = input_stream_pos[0]) <= 0x20) |
383 | { | |
384 | if (ch == 0) | |
385 | return -1; | |
386 | ++input_stream_pos; | |
387 | if (ch == '\n') | |
388 | input_line += 1; | |
389 | if (mclex_want_nl && ch == '\n') | |
390 | { | |
015dc7e1 | 391 | mclex_want_nl = false; |
692ed3e7 NC |
392 | return NL; |
393 | } | |
394 | } | |
395 | start_token = input_stream_pos; | |
396 | ++input_stream_pos; | |
397 | if (mclex_want_filename) | |
398 | { | |
015dc7e1 | 399 | mclex_want_filename = false; |
692ed3e7 NC |
400 | if (ch == '"') |
401 | { | |
402 | start_token++; | |
403 | while ((ch = input_stream_pos[0]) != 0) | |
404 | { | |
405 | if (ch == '"') | |
406 | break; | |
407 | ++input_stream_pos; | |
408 | } | |
409 | yylval.ustr = get_diff (input_stream_pos, start_token); | |
410 | if (ch == '"') | |
411 | ++input_stream_pos; | |
412 | } | |
413 | else | |
414 | { | |
415 | while ((ch = input_stream_pos[0]) != 0) | |
416 | { | |
417 | if (ch <= 0x20 || ch == ')') | |
418 | break; | |
419 | ++input_stream_pos; | |
420 | } | |
421 | yylval.ustr = get_diff (input_stream_pos, start_token); | |
422 | } | |
423 | return MCFILENAME; | |
424 | } | |
425 | switch (ch) | |
426 | { | |
427 | case ';': | |
428 | ++start_token; | |
25065fcd RH |
429 | if (!skip_until_eol ()) |
430 | return -1; | |
692ed3e7 NC |
431 | yylval.ustr = get_diff (input_stream_pos, start_token); |
432 | return MCCOMMENT; | |
433 | case '=': | |
434 | return '='; | |
435 | case '(': | |
436 | return '('; | |
437 | case ')': | |
438 | return ')'; | |
439 | case '+': | |
440 | return '+'; | |
441 | case ':': | |
442 | return ':'; | |
443 | case '0': case '1': case '2': case '3': case '4': | |
444 | case '5': case '6': case '7': case '8': case '9': | |
445 | yylval.ival = parse_digit (ch); | |
446 | return MCNUMBER; | |
447 | default: | |
448 | if (ch >= 0x40) | |
449 | { | |
450 | int ret; | |
451 | while (input_stream_pos[0] >= 0x40 || (input_stream_pos[0] >= '0' && input_stream_pos[0] <= '9')) | |
452 | ++input_stream_pos; | |
453 | ret = mc_token (start_token, (size_t) (input_stream_pos - start_token)); | |
454 | if (ret != -1) | |
455 | return ret; | |
456 | yylval.ustr = get_diff (input_stream_pos, start_token); | |
457 | return MCIDENT; | |
458 | } | |
314ec7ae | 459 | mc_error ("illegal character 0x%x.", ch); |
692ed3e7 NC |
460 | } |
461 | return -1; | |
462 | } |