]> git.ipfire.org Git - thirdparty/glibc.git/blob - locale/programs/linereader.c
Update.
[thirdparty/glibc.git] / locale / programs / linereader.c
1 /* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <ctype.h>
25 #include <errno.h>
26 #include <libintl.h>
27 #include <stdarg.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "error.h"
32 #include "linereader.h"
33 #include "charset.h"
34 #include "stringtrans.h"
35
36
37 void *xmalloc (size_t __n);
38 void *xrealloc (void *__p, size_t __n);
39 char *xstrdup (const char *__str);
40
41
42 static struct token *get_toplvl_escape (struct linereader *lr);
43 static struct token *get_symname (struct linereader *lr);
44 static struct token *get_ident (struct linereader *lr);
45 static struct token *get_string (struct linereader *lr,
46 const struct charset_t *charset);
47
48
49 struct linereader *
50 lr_open (const char *fname, kw_hash_fct_t hf)
51 {
52 FILE *fp;
53 struct linereader *result;
54 int n;
55
56 if (fname == NULL || strcmp (fname, "-") == 0
57 || strcmp (fname, "/dev/stdin") == 0)
58 fp = stdin;
59 else
60 {
61 fp = fopen (fname, "r");
62 if (fp == NULL)
63 return NULL;
64 }
65
66 result = (struct linereader *) xmalloc (sizeof (*result));
67
68 result->fp = fp;
69 result->fname = xstrdup (fname ? : "<stdin>");
70 result->buf = NULL;
71 result->bufsize = 0;
72 result->lineno = 1;
73 result->idx = 0;
74 result->comment_char = '#';
75 result->escape_char = '\\';
76 result->translate_strings = 1;
77
78 n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
79 if (n < 0)
80 {
81 int save = errno;
82 fclose (result->fp);
83 free ((char *) result->fname);
84 free (result);
85 errno = save;
86 return NULL;
87 }
88
89 if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
90 n -= 2;
91
92 result->buf[n] = '\0';
93 result->bufact = n;
94 result->hash_fct = hf;
95
96 return result;
97 }
98
99
100 int
101 lr_eof (struct linereader *lr)
102 {
103 return lr->bufact = 0;
104 }
105
106
107 void
108 lr_close (struct linereader *lr)
109 {
110 fclose (lr->fp);
111 free (lr->buf);
112 free (lr);
113 }
114
115
116 int
117 lr_next (struct linereader *lr)
118 {
119 int n;
120
121 n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
122 if (n < 0)
123 return -1;
124
125 ++lr->lineno;
126
127 if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
128 {
129 /* An escaped newline character is substituted with a single <SP>. */
130 --n;
131 lr->buf[n - 1] = ' ';
132 }
133
134 lr->buf[n] = '\0';
135 lr->bufact = n;
136 lr->idx = 0;
137
138 return 0;
139 }
140
141
142 /* Defined in error.c. */
143 /* This variable is incremented each time `error' is called. */
144 extern unsigned int error_message_count;
145
146 /* The calling program should define program_name and set it to the
147 name of the executing program. */
148 extern char *program_name;
149
150
151 struct token *
152 lr_token (struct linereader *lr, const struct charset_t *charset)
153 {
154 int ch;
155
156 while (1)
157 {
158 do
159 {
160 ch = lr_getc (lr);
161
162 if (ch == EOF)
163 {
164 lr->token.tok = tok_eof;
165 return &lr->token;
166 };
167
168 if (ch == '\n')
169 {
170 lr->token.tok = tok_eol;
171 return &lr->token;
172 }
173 }
174 while (isspace (ch));
175
176 if (ch == EOF)
177 {
178 lr->token.tok = tok_eof;
179 return &lr->token;
180 };
181
182 if (ch != lr->comment_char)
183 break;
184
185 /* Ignore rest of line. */
186 lr_ignore_rest (lr, 0);
187 lr->token.tok = tok_eol;
188 return &lr->token;
189 }
190
191 /* Match escape sequences. */
192 if (ch == lr->escape_char)
193 return get_toplvl_escape (lr);
194
195 /* Match ellipsis. */
196 if (ch == '.' && strncmp (&lr->buf[lr->idx], "..", 2) == 0)
197 {
198 lr_getc (lr);
199 lr_getc (lr);
200 lr->token.tok = tok_ellipsis;
201 return &lr->token;
202 }
203
204 switch (ch)
205 {
206 case '<':
207 return get_symname (lr);
208
209 case '0' ... '9':
210 lr->token.tok = tok_number;
211 lr->token.val.num = ch - '0';
212
213 while (isdigit (ch = lr_getc (lr)))
214 {
215 lr->token.val.num *= 10;
216 lr->token.val.num += ch - '0';
217 }
218 if (isalpha (ch))
219 lr_error (lr, _("garbage at end of number"));
220 lr_ungetn (lr, 1);
221
222 return &lr->token;
223
224 case ';':
225 lr->token.tok = tok_semicolon;
226 return &lr->token;
227
228 case ',':
229 lr->token.tok = tok_comma;
230 return &lr->token;
231
232 case '(':
233 lr->token.tok = tok_open_brace;
234 return &lr->token;
235
236 case ')':
237 lr->token.tok = tok_close_brace;
238 return &lr->token;
239
240 case '"':
241 return get_string (lr, charset);
242
243 case '-':
244 ch = lr_getc (lr);
245 if (ch == '1')
246 {
247 lr->token.tok = tok_minus1;
248 return &lr->token;
249 }
250 lr_ungetn (lr, 2);
251 break;
252 }
253
254 return get_ident (lr);
255 }
256
257
258 static struct token *
259 get_toplvl_escape (struct linereader *lr)
260 {
261 /* This is supposed to be a numeric value. We return the
262 numerical value and the number of bytes. */
263 size_t start_idx = lr->idx - 1;
264 unsigned int value = 0;
265 int nbytes = 0;
266 int ch;
267
268 do
269 {
270 unsigned int byte = 0;
271 unsigned int base = 8;
272
273 ch = lr_getc (lr);
274
275 if (ch == 'd')
276 {
277 base = 10;
278 ch = lr_getc (lr);
279 }
280 else if (ch == 'x')
281 {
282 base = 16;
283 ch = lr_getc (lr);
284 }
285
286 if ((base == 16 && !isxdigit (ch))
287 || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
288 {
289 esc_error:
290 lr->token.val.str.start = &lr->buf[start_idx];
291
292 while (ch != EOF && !isspace (ch))
293 ch = lr_getc (lr);
294 lr->token.val.str.len = lr->idx - start_idx;
295
296 lr->token.tok = tok_error;
297 return &lr->token;
298 }
299
300 if (isdigit (ch))
301 byte = ch - '0';
302 else
303 byte = _tolower (ch) - 'a' + 10;
304
305 ch = lr_getc (lr);
306 if ((base == 16 && !isxdigit (ch))
307 || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
308 goto esc_error;
309
310 byte *= base;
311 if (isdigit (ch))
312 byte += ch - '0';
313 else
314 byte += _tolower (ch) - 'a' + 10;
315
316 ch = lr_getc (lr);
317 if (base != 16 && isdigit (ch))
318 {
319 byte *= base;
320 byte += ch - '0';
321
322 ch = lr_getc (lr);
323 }
324
325 value *= 256;
326 value += byte;
327
328 ++nbytes;
329 }
330 while (ch == lr->escape_char && nbytes < 4);
331
332 if (!isspace (ch))
333 lr_error (lr, _("garbage at end of character code specification"));
334
335 lr_ungetn (lr, 1);
336
337 lr->token.tok = tok_charcode;
338 lr->token.val.charcode.val = value;
339 lr->token.val.charcode.nbytes = nbytes;
340
341 return &lr->token;
342 }
343
344
345 #define ADDC(ch) \
346 do \
347 { \
348 if (bufact == bufmax) \
349 { \
350 bufmax *= 2; \
351 buf = xrealloc (buf, bufmax); \
352 } \
353 buf[bufact++] = (ch); \
354 } \
355 while (0)
356
357
358 static struct token *
359 get_symname (struct linereader *lr)
360 {
361 /* Symbol in brackets. We must distinguish three kinds:
362 1. reserved words
363 2. ISO 10646 position values
364 3. all other. */
365 char *buf;
366 size_t bufact = 0;
367 size_t bufmax = 56;
368 const struct keyword_t *kw;
369 int ch;
370
371 buf = (char *) xmalloc (bufmax);
372
373 do
374 {
375 ch = lr_getc (lr);
376 if (ch == lr->escape_char)
377 {
378 int c2 = lr_getc (lr);
379 ADDC (c2);
380
381 if (c2 == '\n')
382 ch = '\n';
383 }
384 else
385 ADDC (ch);
386 }
387 while (ch != '>' && ch != '\n');
388
389 if (ch == '\n')
390 lr_error (lr, _("unterminated symbolic name"));
391
392 /* Test for ISO 10646 position value. */
393 if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
394 {
395 char *cp = buf + 1;
396 while (cp < &buf[bufact - 1] && isxdigit (*cp))
397 ++cp;
398
399 if (cp == &buf[bufact - 1])
400 {
401 /* Yes, it is. */
402 lr->token.tok = bufact == 6 ? tok_ucs2 : tok_ucs4;
403 lr->token.val.charcode.val = strtoul (buf, NULL, 16);
404 lr->token.val.charcode.nbytes = lr->token.tok == tok_ucs2 ? 2 : 4;
405
406 return &lr->token;
407 }
408 }
409
410 /* It is a symbolic name. Test for reserved words. */
411 kw = lr->hash_fct (buf, bufact - 1);
412
413 if (kw != NULL && kw->symname_or_ident == 1)
414 {
415 lr->token.tok = kw->token;
416 free (buf);
417 }
418 else
419 {
420 lr->token.tok = tok_bsymbol;
421
422 buf[bufact] = '\0';
423 buf = xrealloc (buf, bufact + 1);
424
425 lr->token.val.str.start = buf;
426 lr->token.val.str.len = bufact - 1;
427 }
428
429 return &lr->token;
430 }
431
432
433 static struct token *
434 get_ident (struct linereader *lr)
435 {
436 char *buf;
437 size_t bufact;
438 size_t bufmax = 56;
439 const struct keyword_t *kw;
440 int ch;
441
442 buf = xmalloc (bufmax);
443 bufact = 0;
444
445 ADDC (lr->buf[lr->idx - 1]);
446
447 while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
448 && ch != '<' && ch != ',')
449 /* XXX Handle escape sequences? */
450 ADDC (ch);
451
452 lr_ungetn (lr, 1);
453
454 kw = lr->hash_fct (buf, bufact);
455
456 if (kw != NULL && kw->symname_or_ident == 0)
457 {
458 lr->token.tok = kw->token;
459 free (buf);
460 }
461 else
462 {
463 lr->token.tok = tok_ident;
464
465 buf[bufact] = '\0';
466 buf = xrealloc (buf, bufact + 1);
467
468 lr->token.val.str.start = buf;
469 lr->token.val.str.len = bufact;
470 }
471
472 return &lr->token;
473 }
474
475
476 static struct token *
477 get_string (struct linereader *lr, const struct charset_t *charset)
478 {
479 int illegal_string = 0;
480 char *buf, *cp;
481 size_t bufact;
482 size_t bufmax = 56;
483 int ch;
484
485 buf = xmalloc (bufmax);
486 bufact = 0;
487
488 while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
489 if (ch != '<' || charset == NULL)
490 {
491 if (ch == lr->escape_char)
492 {
493 ch = lr_getc (lr);
494 if (ch == '\n' || ch == EOF)
495 break;
496 }
497 ADDC (ch);
498 }
499 else
500 {
501 /* We have to get the value of the symbol. */
502 unsigned int value;
503 size_t startidx = bufact;
504
505 if (!lr->translate_strings)
506 ADDC ('<');
507
508 while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
509 {
510 if (ch == lr->escape_char)
511 {
512 ch = lr_getc (lr);
513 if (ch == '\n' || ch == EOF)
514 break;
515 }
516 ADDC (ch);
517 }
518
519 if (ch == '\n' || ch == EOF)
520 lr_error (lr, _("unterminated string"));
521 else
522 if (!lr->translate_strings)
523 ADDC ('>');
524
525 if (lr->translate_strings)
526 {
527 value = charset_find_value (&charset->char_table, &buf[startidx],
528 bufact - startidx);
529 if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
530 illegal_string = 1;
531 bufact = startidx;
532
533 if (bufmax - bufact < 8)
534 {
535 bufmax *= 2;
536 buf = (char *) xrealloc (buf, bufmax);
537 }
538
539 cp = &buf[bufact];
540 if (encode_char (value, &cp))
541 illegal_string = 1;
542
543 bufact = cp - buf;
544 }
545 }
546
547 /* Catch errors with trailing escape character. */
548 if (bufact > 0 && buf[bufact - 1] == lr->escape_char
549 && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
550 {
551 lr_error (lr, _("illegal escape sequence at end of string"));
552 --bufact;
553 }
554 else if (ch == '\n' || ch == EOF)
555 lr_error (lr, _("unterminated string"));
556
557 /* Terminate string if necessary. */
558 if (lr->translate_strings)
559 {
560 cp = &buf[bufact];
561 if (encode_char (0, &cp))
562 illegal_string = 1;
563
564 bufact = cp - buf;
565 }
566 else
567 ADDC ('\0');
568
569 lr->token.tok = tok_string;
570
571 if (illegal_string)
572 {
573 free (buf);
574 lr->token.val.str.start = NULL;
575 lr->token.val.str.len = 0;
576 }
577 else
578 {
579 buf = xrealloc (buf, bufact + 1);
580
581 lr->token.val.str.start = buf;
582 lr->token.val.str.len = bufact;
583 }
584
585 return &lr->token;
586 }