]>
git.ipfire.org Git - thirdparty/glibc.git/blob - locale/programs/linereader.c
1 /* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
32 #include "linereader.h"
34 #include "stringtrans.h"
37 void *xmalloc (size_t __n
);
38 void *xrealloc (void *__p
, size_t __n
);
39 char *xstrdup (const char *__str
);
42 static struct token
*get_toplvl_escape (struct linereader
*lr
);
43 static struct token
*get_symname (struct linereader
*lr
);
44 static struct token
*get_ident (struct linereader
*lr
);
45 static struct token
*get_string (struct linereader
*lr
,
46 const struct charset_t
*charset
);
50 lr_open (const char *fname
, kw_hash_fct_t hf
)
53 struct linereader
*result
;
56 if (fname
== NULL
|| strcmp (fname
, "-") == 0
57 || strcmp (fname
, "/dev/stdin") == 0)
61 fp
= fopen (fname
, "r");
66 result
= (struct linereader
*) xmalloc (sizeof (*result
));
69 result
->fname
= xstrdup (fname
? : "<stdin>");
74 result
->comment_char
= '#';
75 result
->escape_char
= '\\';
76 result
->translate_strings
= 1;
78 n
= getdelim (&result
->buf
, &result
->bufsize
, '\n', result
->fp
);
83 free ((char *) result
->fname
);
89 if (n
> 1 && result
->buf
[n
- 2] == '\\' && result
->buf
[n
- 1] == '\n')
92 result
->buf
[n
] = '\0';
94 result
->hash_fct
= hf
;
101 lr_eof (struct linereader
*lr
)
103 return lr
->bufact
= 0;
108 lr_close (struct linereader
*lr
)
117 lr_next (struct linereader
*lr
)
121 n
= getdelim (&lr
->buf
, &lr
->bufsize
, '\n', lr
->fp
);
127 if (n
> 1 && lr
->buf
[n
- 2] == lr
->escape_char
&& lr
->buf
[n
- 1] == '\n')
129 /* An escaped newline character is substituted with a single <SP>. */
131 lr
->buf
[n
- 1] = ' ';
142 /* Defined in error.c. */
143 /* This variable is incremented each time `error' is called. */
144 extern unsigned int error_message_count
;
146 /* The calling program should define program_name and set it to the
147 name of the executing program. */
148 extern char *program_name
;
152 lr_token (struct linereader
*lr
, const struct charset_t
*charset
)
164 lr
->token
.tok
= tok_eof
;
170 lr
->token
.tok
= tok_eol
;
174 while (isspace (ch
));
178 lr
->token
.tok
= tok_eof
;
182 if (ch
!= lr
->comment_char
)
185 /* Ignore rest of line. */
186 lr_ignore_rest (lr
, 0);
187 lr
->token
.tok
= tok_eol
;
191 /* Match escape sequences. */
192 if (ch
== lr
->escape_char
)
193 return get_toplvl_escape (lr
);
195 /* Match ellipsis. */
196 if (ch
== '.' && strncmp (&lr
->buf
[lr
->idx
], "..", 2) == 0)
200 lr
->token
.tok
= tok_ellipsis
;
207 return get_symname (lr
);
210 lr
->token
.tok
= tok_number
;
211 lr
->token
.val
.num
= ch
- '0';
213 while (isdigit (ch
= lr_getc (lr
)))
215 lr
->token
.val
.num
*= 10;
216 lr
->token
.val
.num
+= ch
- '0';
219 lr_error (lr
, _("garbage at end of number"));
225 lr
->token
.tok
= tok_semicolon
;
229 lr
->token
.tok
= tok_comma
;
233 lr
->token
.tok
= tok_open_brace
;
237 lr
->token
.tok
= tok_close_brace
;
241 return get_string (lr
, charset
);
247 lr
->token
.tok
= tok_minus1
;
254 return get_ident (lr
);
258 static struct token
*
259 get_toplvl_escape (struct linereader
*lr
)
261 /* This is supposed to be a numeric value. We return the
262 numerical value and the number of bytes. */
263 size_t start_idx
= lr
->idx
- 1;
264 unsigned int value
= 0;
270 unsigned int byte
= 0;
271 unsigned int base
= 8;
286 if ((base
== 16 && !isxdigit (ch
))
287 || (base
!= 16 && (ch
< '0' || ch
>= (int) ('0' + base
))))
290 lr
->token
.val
.str
.start
= &lr
->buf
[start_idx
];
292 while (ch
!= EOF
&& !isspace (ch
))
294 lr
->token
.val
.str
.len
= lr
->idx
- start_idx
;
296 lr
->token
.tok
= tok_error
;
303 byte
= _tolower (ch
) - 'a' + 10;
306 if ((base
== 16 && !isxdigit (ch
))
307 || (base
!= 16 && (ch
< '0' || ch
>= (int) ('0' + base
))))
314 byte
+= _tolower (ch
) - 'a' + 10;
317 if (base
!= 16 && isdigit (ch
))
330 while (ch
== lr
->escape_char
&& nbytes
< 4);
333 lr_error (lr
, _("garbage at end of character code specification"));
337 lr
->token
.tok
= tok_charcode
;
338 lr
->token
.val
.charcode
.val
= value
;
339 lr
->token
.val
.charcode
.nbytes
= nbytes
;
348 if (bufact == bufmax) \
351 buf = xrealloc (buf, bufmax); \
353 buf[bufact++] = (ch); \
358 static struct token
*
359 get_symname (struct linereader
*lr
)
361 /* Symbol in brackets. We must distinguish three kinds:
363 2. ISO 10646 position values
368 const struct keyword_t
*kw
;
371 buf
= (char *) xmalloc (bufmax
);
376 if (ch
== lr
->escape_char
)
378 int c2
= lr_getc (lr
);
387 while (ch
!= '>' && ch
!= '\n');
390 lr_error (lr
, _("unterminated symbolic name"));
392 /* Test for ISO 10646 position value. */
393 if (buf
[0] == 'U' && (bufact
== 6 || bufact
== 10))
396 while (cp
< &buf
[bufact
- 1] && isxdigit (*cp
))
399 if (cp
== &buf
[bufact
- 1])
402 lr
->token
.tok
= bufact
== 6 ? tok_ucs2
: tok_ucs4
;
403 lr
->token
.val
.charcode
.val
= strtoul (buf
, NULL
, 16);
404 lr
->token
.val
.charcode
.nbytes
= lr
->token
.tok
== tok_ucs2
? 2 : 4;
410 /* It is a symbolic name. Test for reserved words. */
411 kw
= lr
->hash_fct (buf
, bufact
- 1);
413 if (kw
!= NULL
&& kw
->symname_or_ident
== 1)
415 lr
->token
.tok
= kw
->token
;
420 lr
->token
.tok
= tok_bsymbol
;
423 buf
= xrealloc (buf
, bufact
+ 1);
425 lr
->token
.val
.str
.start
= buf
;
426 lr
->token
.val
.str
.len
= bufact
- 1;
433 static struct token
*
434 get_ident (struct linereader
*lr
)
439 const struct keyword_t
*kw
;
442 buf
= xmalloc (bufmax
);
445 ADDC (lr
->buf
[lr
->idx
- 1]);
447 while (!isspace ((ch
= lr_getc (lr
))) && ch
!= '"' && ch
!= ';'
448 && ch
!= '<' && ch
!= ',')
449 /* XXX Handle escape sequences? */
454 kw
= lr
->hash_fct (buf
, bufact
);
456 if (kw
!= NULL
&& kw
->symname_or_ident
== 0)
458 lr
->token
.tok
= kw
->token
;
463 lr
->token
.tok
= tok_ident
;
466 buf
= xrealloc (buf
, bufact
+ 1);
468 lr
->token
.val
.str
.start
= buf
;
469 lr
->token
.val
.str
.len
= bufact
;
476 static struct token
*
477 get_string (struct linereader
*lr
, const struct charset_t
*charset
)
479 int illegal_string
= 0;
485 buf
= xmalloc (bufmax
);
488 while ((ch
= lr_getc (lr
)) != '"' && ch
!= '\n' && ch
!= EOF
)
489 if (ch
!= '<' || charset
== NULL
)
491 if (ch
== lr
->escape_char
)
494 if (ch
== '\n' || ch
== EOF
)
501 /* We have to get the value of the symbol. */
503 size_t startidx
= bufact
;
505 if (!lr
->translate_strings
)
508 while ((ch
= lr_getc (lr
)) != '>' && ch
!= '\n' && ch
!= EOF
)
510 if (ch
== lr
->escape_char
)
513 if (ch
== '\n' || ch
== EOF
)
519 if (ch
== '\n' || ch
== EOF
)
520 lr_error (lr
, _("unterminated string"));
522 if (!lr
->translate_strings
)
525 if (lr
->translate_strings
)
527 value
= charset_find_value (&charset
->char_table
, &buf
[startidx
],
529 if ((wchar_t) value
== ILLEGAL_CHAR_VALUE
)
533 if (bufmax
- bufact
< 8)
536 buf
= (char *) xrealloc (buf
, bufmax
);
540 if (encode_char (value
, &cp
))
547 /* Catch errors with trailing escape character. */
548 if (bufact
> 0 && buf
[bufact
- 1] == lr
->escape_char
549 && (bufact
== 1 || buf
[bufact
- 2] != lr
->escape_char
))
551 lr_error (lr
, _("illegal escape sequence at end of string"));
554 else if (ch
== '\n' || ch
== EOF
)
555 lr_error (lr
, _("unterminated string"));
557 /* Terminate string if necessary. */
558 if (lr
->translate_strings
)
561 if (encode_char (0, &cp
))
569 lr
->token
.tok
= tok_string
;
574 lr
->token
.val
.str
.start
= NULL
;
575 lr
->token
.val
.str
.len
= 0;
579 buf
= xrealloc (buf
, bufact
+ 1);
581 lr
->token
.val
.str
.start
= buf
;
582 lr
->token
.val
.str
.len
= bufact
;